ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Search.cpp
Revision: 1
Committed: 2002-12-04T20:22:59-08:00 (22 years, 6 months ago) by douglas
File size: 13491 byte(s)
Log Message:
Initial revision

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Main
46     //
47     // Douglas Thrift
48     //
49     // Search.cpp
50    
51     #include "Search.h"
52     #include "Indexer.h"
53     #include "Searcher.h"
54     #include "Outputer.h"
55    
56     #ifndef _WIN32
57     #include <sys/utsname.h>
58     #else
59     #include <windows.h>
60     #endif // _WIN32
61    
62     string program;
63     string programName = "Douglas Thrift's Search Engine";
64     string programVersion = "1.1alpha";
65     bool debug = false;
66    
67     int main(int argc, char* argv[])
68     {
69     program = argv[0];
70    
71     bool indexMode = false;
72     string indexURL;
73     set<string> indexDomains;
74     set<string> indexRestrictions;
75    
76     unsigned page = 1;
77     string query;
78    
79     vector<string> indices;
80    
81     string header = "header.html";
82     string body = "body.html";
83     string footer = "footer.html";
84     string notfound = "notfound.html";
85     string pages = "pages.html";
86    
87     for (int index = 1; index < argc; index++)
88     {
89     string arg(argv[index]);
90    
91     if (arg == "-help")
92     {
93     usage();
94     return 0;
95     }
96     else if (arg == "-version")
97     {
98     version();
99     return 0;
100     }
101     else if (arg == "-license")
102     {
103     license();
104     return 0;
105     }
106     else if (arg == "-P")
107     {
108     if (++index < argc)
109     {
110     page = strtoul(argv[index],0,0);
111     }
112     else
113     {
114     cerr << program << ": Bad arguments\n";
115     return 1;
116     }
117     }
118     else if (arg == "-i")
119     {
120     indexMode = true;
121    
122     if (++index < argc)
123     {
124     indexURL = argv[index];
125     }
126     else
127     {
128     cerr << program << ": Bad arguments\n";
129     return 1;
130     }
131     }
132     else if (arg == "-d")
133     {
134     if (++index < argc)
135     {
136     indexDomains.insert(argv[index]);
137     }
138     else
139     {
140     cerr << program << ": Bad arguments\n";
141     return 1;
142     }
143     }
144     else if (arg == "-r")
145     {
146     if (++index < argc)
147     {
148     indexRestrictions.insert(argv[index]);
149     }
150     else
151     {
152     cerr << program << ": Bad arguments\n";
153     return 1;
154     }
155     }
156     else if (arg == "-h")
157     {
158     if (++index < argc)
159     {
160     header = argv[index];
161     }
162     else
163     {
164     cerr << program << ": Bad arguments\n";
165     return 1;
166     }
167     }
168     else if (arg == "-b")
169     {
170     if (++index < argc)
171     {
172     body = argv[index];
173     }
174     else
175     {
176     cerr << program << ": Bad arguments\n";
177     return 1;
178     }
179     }
180     else if (arg == "-f")
181     {
182     if (++index < argc)
183     {
184     footer = argv[index];
185     }
186     else
187     {
188     cerr << program << ": Bad arguments\n";
189     return 1;
190     }
191     }
192     else if (arg == "-n")
193     {
194     if (++index < argc)
195     {
196     notfound = argv[index];
197     }
198     else
199     {
200     cerr << program << ": Bad arguments\n";
201     return 1;
202     }
203     }
204     else if (arg == "-p")
205     {
206     if (++index < argc)
207     {
208     pages = argv[index];
209     }
210     else
211     {
212     cerr << program << ": Bad arguments\n";
213     return 1;
214     }
215     }
216     else if (arg == "-D")
217     {
218     debug = true;
219     }
220     else
221     {
222     indices.push_back(arg);
223     }
224     }
225    
226     if (indices.size() < 1)
227     {
228     usage();
229     return 0;
230     }
231    
232     if (indexMode)
233     {
234     if (indices.size() > 1)
235     {
236     cerr << program << ": Too many indices, can only build one index"
237     << " at a time\n";
238     return 1;
239     }
240    
241     if (indexDomains.size() < 1)
242     {
243     cerr << program << ": Must specify at least one domain\n";
244     return 1;
245     }
246    
247     Indexer indexer(indices[0], indexDomains, indexRestrictions);
248    
249     indexer.index(indexURL);
250     }
251     else
252     {
253     string line;
254     getline(cin, line);
255     query = line;
256    
257     Searcher searcher(query);
258    
259     searcher.search(indices);
260    
261     Outputer outputer(header, body, footer, notfound,
262     pages);
263    
264     outputer.output(searcher, page < 1 ? page : --page);
265     }
266    
267     return 0;
268     }
269    
270     string platform()
271     {
272     string platform;
273     string os;
274     string version;
275     string architecture;
276     string marketing;
277    
278     #ifdef _WIN32
279     OSVERSIONINFO* computer = new OSVERSIONINFO;
280     computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
281     GetVersionEx(computer);
282    
283     os = computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
284     "Windows";
285     unsigned major = computer->dwMajorVersion;
286     unsigned minor = computer->dwMinorVersion;
287    
288     delete computer;
289    
290     SYSTEM_INFO* system = new SYSTEM_INFO;
291     GetSystemInfo(system);
292    
293     switch (system->wProcessorArchitecture)
294     {
295     case PROCESSOR_ARCHITECTURE_INTEL:
296     architecture = "ix86";
297     break;
298     case PROCESSOR_ARCHITECTURE_MIPS:
299     architecture = "mips";
300     break;
301     case PROCESSOR_ARCHITECTURE_ALPHA:
302     architecture = "alpha";
303     break;
304     case PROCESSOR_ARCHITECTURE_PPC:
305     architecture = "ppc";
306     break;
307     case PROCESSOR_ARCHITECTURE_IA64:
308     architecture = "ia64";
309     break;
310     case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
311     architecture = "ix86_on_win64";
312     break;
313     case PROCESSOR_ARCHITECTURE_AMD64:
314     architecture = "amd64";
315     break;
316     default:
317     architecture = "unknown";
318     break;
319     }
320    
321     char* cversion = new char[1024];
322     sprintf(cversion, "%u.%u", major, minor);
323     version = cversion;
324    
325     delete [] cversion;
326    
327     if (major == 4 && minor <= 3 && os != "Windows NT")
328     {
329     marketing = " [Windows 95]";
330     }
331     else if (major == 4 && minor == 10 && os != "Windows NT")
332     {
333     marketing = " [Windows 98]";
334     }
335     else if (major == 5 && minor == 0 && os == "Windows NT")
336     {
337     marketing = " [Windows 2000]";
338     }
339     else if (major == 4 && minor == 90 && os != "Windows NT")
340     {
341     marketing = " [Windows ME]";
342     }
343     else if (major == 5 && minor == 1 && os == "Windows NT")
344     {
345     marketing = " [Windows XP]";
346     }
347     else if (major == 5 && minor == 2 && os == "Windows NT")
348     {
349     marketing = " [Windows .NET Server]";
350     }
351     #else // _WIN32
352     struct utsname* computer = new struct utsname;
353     uname(computer);
354    
355     os = computer->sysname;
356     version = computer->release;
357     architecture = computer->machine;
358    
359     delete computer;
360     #endif // _WIN32
361    
362     platform = "(" + os + " " + version + marketing + " " + architecture + ")";
363    
364     return platform;
365     }
366    
367     void usage()
368     {
369     string tab(8 + program.length(), ' ');
370    
371     cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
372     << "dy]\n"
373     << tab << "[-f footer] [-n notfound] [-p pages]\n"
374     << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
375     << tab << "[-D] [-version] [-help]\n"
376     << "Options:\n"
377     << " index Index file to use (can only use one file for i"
378     << "ndexing)\n"
379     << " -P page Page of search to display (defaults to 1)\n"
380     << " -h header Header template to use (defaults to header.htm"
381     << "l)\n"
382     << " -b body Body template to use (defaults to body.html)\n"
383     << " -f footer Footer template to use (defaults to footer.htm"
384     << "l)\n"
385     << " -n notfound Not found template to use (defaults to notfoun"
386     << "d.html)\n"
387     << " -p pages Pages template to use (defaults to pages.html)"
388     << "\n"
389     << " -i begin URL to begin indexing (causes indexing rather "
390     << "than search)\n"
391     << " -d domain Domain to include in indexing\n"
392     << " -r restriction URL to restrict from indexing\n"
393     << " -D Display debug information\n"
394     << " -version Display version information and exit\n"
395     << " -license Display license information and exit\n"
396     << " -help Display this message and exit\n";
397     }
398    
399     void version()
400     {
401     cout << programName << " " << programVersion << " "<< platform() << "\n\n"
402     << " Copyright (C) 2002, Douglas Thrift. All Rights Reserved.\n\n"
403     << " This product includes software developed by Douglas Thrift\n"
404     << " (http://computers.douglasthrift.net/searchengine/).\n";
405     }
406    
407     void license()
408     {
409     cout << "License:\n"
410     << " Douglas Thrift's Search Engine License\n\n"
411     << " Copyright (C) 2002, Douglas Thrift. All Rights Reserved.\n\n"
412     << " Redistribution and use in source and binary forms, with or with"
413     << "out\n"
414     << " modification, are permitted provided that the following conditi"
415     << "ons are met:\n\n"
416     << " 1. Redistributions of source code must retain the above copyrig"
417     << "ht notice,\n"
418     << " this list of conditions and the following disclaimer.\n\n"
419     << " 2. Redistributions in binary form must reproduce the above copy"
420     << "right notice,\n"
421     << " this list of conditions and the following disclaimer in the "
422     << "documentation\n"
423     << " and/or other materials provided with the distribution.\n\n"
424     << " 3. The end-user documentation included with the redistribution,"
425     << " if any, must\n"
426     << " include the following acknowledgment:\n\n"
427     << " \"This product includes software developed by Douglas Thr"
428     << "ift\n"
429     << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
430     << " Alternately, this acknowledgment may appear in the software "
431     << "itself, if\n"
432     << " and wherever such third-party acknowledgments normally appea"
433     << "r.\n\n"
434     << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
435     << "Engine\" must not\n"
436     << " be used to endorse or promote products derived from this sof"
437     << "tware without\n"
438     << " specific prior written permission. For written permission, p"
439     << "lease visit\n"
440     << " http://www.douglasthrift.net/contact.cgi for contact inform"
441     << "ation.\n\n"
442     << " 5. Products derived from this software may not be called \"Doug"
443     << "las Thrift\'s\n"
444     << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
445     << " appear in their\n"
446     << " name, without prior written permission.\n\n"
447     << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
448     << "WARRANTIES,\n"
449     << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
450     << "NTABILITY AND\n"
451     << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
452     << "ALL THE\n"
453     << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
454     << "ECT,\n"
455     << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
456     << "DING, BUT NOT\n"
457     << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
458     << "F USE, DATA,\n"
459     << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
460     << " THEORY OF\n"
461     << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
462     << "UDING\n"
463     << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
464     << "HIS SOFTWARE,\n"
465     << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
466     }
467    
468     void entities(string& line, char character, char* entity)
469     {
470     int begin = 0;
471    
472     while (begin < line.length())
473     {
474     int spot = line.find(character, begin);
475    
476     int end = spot + 1;
477    
478     if (spot != string::npos)
479     {
480     line.replace(spot, 1, entity);
481     }
482     else
483     {
484     break;
485     }
486    
487     begin = end;
488     }
489     }
490    
491     void entities(string& line, char* entity, char character)
492     {
493     int begin = 0;
494    
495     while (begin < line.length())
496     {
497     int spot = line.find(entity, begin);
498    
499     int end = spot + 1;
500    
501     if (spot != string::npos)
502     {
503     line.replace(spot, strlen(entity), 1, character);
504     }
505     else
506     {
507     break;
508     }
509    
510     begin = end;
511     }
512     }
513    
514     void normalize(string& abbynormal)
515     {
516     for (unsigned index = 0; index < abbynormal.length(); index++)
517     {
518     if (isspace(abbynormal[index]))
519     {
520     unsigned next = index + 1;
521     while (isspace(abbynormal[next]))
522     {
523     next++;
524     }
525     abbynormal.replace(index, next - index, 1, abbynormal[index]);
526     }
527     }
528    
529     if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
530     if (isspace(abbynormal[abbynormal.length() - 1]))
531     abbynormal.erase(abbynormal.length() - 1, 1);
532     }