ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Search.cpp
Revision: 28
Committed: 2003-01-02T19:42:33-08:00 (22 years, 5 months ago) by douglas
File size: 13642 byte(s)
Log Message:
Changed Copyright notices to state 2002-2003.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Main
46     //
47     // Douglas Thrift
48     //
49     // Search.cpp
50    
51     #include "Search.h"
52     #include "Indexer.h"
53     #include "Searcher.h"
54     #include "Outputer.h"
55    
56     #ifndef _WIN32
57     #include <sys/utsname.h>
58     #else
59     #include <windows.h>
60     #endif // _WIN32
61    
62     string program;
63     string programName = "Douglas Thrift's Search Engine";
64 douglas 18 string programVersion = "1.1alpha1";
65 douglas 1 bool debug = false;
66    
67     int main(int argc, char* argv[])
68     {
69     program = argv[0];
70    
71     bool indexMode = false;
72     string indexURL;
73     set<string> indexDomains;
74     set<string> indexRestrictions;
75    
76     unsigned page = 1;
77     string query;
78    
79     vector<string> indices;
80    
81     string header = "header.html";
82     string body = "body.html";
83     string footer = "footer.html";
84     string notfound = "notfound.html";
85     string pages = "pages.html";
86    
87     for (int index = 1; index < argc; index++)
88     {
89     string arg(argv[index]);
90    
91     if (arg == "-help")
92     {
93     usage();
94     return 0;
95     }
96     else if (arg == "-version")
97     {
98     version();
99     return 0;
100     }
101     else if (arg == "-license")
102     {
103     license();
104     return 0;
105     }
106     else if (arg == "-P")
107     {
108     if (++index < argc)
109     {
110     page = strtoul(argv[index],0,0);
111     }
112     else
113     {
114     cerr << program << ": Bad arguments\n";
115     return 1;
116     }
117     }
118     else if (arg == "-i")
119     {
120     indexMode = true;
121    
122     if (++index < argc)
123     {
124     indexURL = argv[index];
125     }
126     else
127     {
128     cerr << program << ": Bad arguments\n";
129     return 1;
130     }
131     }
132     else if (arg == "-d")
133     {
134     if (++index < argc)
135     {
136     indexDomains.insert(argv[index]);
137     }
138     else
139     {
140     cerr << program << ": Bad arguments\n";
141     return 1;
142     }
143     }
144     else if (arg == "-r")
145     {
146     if (++index < argc)
147     {
148     indexRestrictions.insert(argv[index]);
149     }
150     else
151     {
152     cerr << program << ": Bad arguments\n";
153     return 1;
154     }
155     }
156     else if (arg == "-h")
157     {
158     if (++index < argc)
159     {
160     header = argv[index];
161     }
162     else
163     {
164     cerr << program << ": Bad arguments\n";
165     return 1;
166     }
167     }
168     else if (arg == "-b")
169     {
170     if (++index < argc)
171     {
172     body = argv[index];
173     }
174     else
175     {
176     cerr << program << ": Bad arguments\n";
177     return 1;
178     }
179     }
180     else if (arg == "-f")
181     {
182     if (++index < argc)
183     {
184     footer = argv[index];
185     }
186     else
187     {
188     cerr << program << ": Bad arguments\n";
189     return 1;
190     }
191     }
192     else if (arg == "-n")
193     {
194     if (++index < argc)
195     {
196     notfound = argv[index];
197     }
198     else
199     {
200     cerr << program << ": Bad arguments\n";
201     return 1;
202     }
203     }
204     else if (arg == "-p")
205     {
206     if (++index < argc)
207     {
208     pages = argv[index];
209     }
210     else
211     {
212     cerr << program << ": Bad arguments\n";
213     return 1;
214     }
215     }
216     else if (arg == "-D")
217     {
218     debug = true;
219     }
220     else
221     {
222     indices.push_back(arg);
223     }
224     }
225    
226     if (indices.size() < 1)
227     {
228     usage();
229     return 0;
230     }
231    
232     if (indexMode)
233     {
234     if (indices.size() > 1)
235     {
236     cerr << program << ": Too many indices, can only build one index"
237     << " at a time\n";
238     return 1;
239     }
240    
241     if (indexDomains.size() < 1)
242     {
243     cerr << program << ": Must specify at least one domain\n";
244     return 1;
245     }
246    
247     Indexer indexer(indices[0], indexDomains, indexRestrictions);
248    
249     indexer.index(indexURL);
250     }
251     else
252     {
253     string line;
254     getline(cin, line);
255     query = line;
256    
257     Searcher searcher(query);
258    
259     searcher.search(indices);
260    
261     Outputer outputer(header, body, footer, notfound,
262     pages);
263    
264     outputer.output(searcher, page < 1 ? page : --page);
265     }
266    
267     return 0;
268     }
269    
270 douglas 12 string agent(bool version)
271     {
272     string agent = programName + (version ? ('/' + programVersion) : "");
273    
274     return agent;
275     }
276    
277 douglas 1 string platform()
278     {
279     string platform;
280     string os;
281     string version;
282     string architecture;
283     string marketing;
284    
285     #ifdef _WIN32
286     OSVERSIONINFO* computer = new OSVERSIONINFO;
287     computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
288     GetVersionEx(computer);
289    
290     os = computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
291     "Windows";
292     unsigned major = computer->dwMajorVersion;
293     unsigned minor = computer->dwMinorVersion;
294    
295     delete computer;
296    
297     SYSTEM_INFO* system = new SYSTEM_INFO;
298     GetSystemInfo(system);
299    
300     switch (system->wProcessorArchitecture)
301     {
302     case PROCESSOR_ARCHITECTURE_INTEL:
303     architecture = "ix86";
304     break;
305     case PROCESSOR_ARCHITECTURE_MIPS:
306     architecture = "mips";
307     break;
308     case PROCESSOR_ARCHITECTURE_ALPHA:
309     architecture = "alpha";
310     break;
311     case PROCESSOR_ARCHITECTURE_PPC:
312     architecture = "ppc";
313     break;
314     case PROCESSOR_ARCHITECTURE_IA64:
315     architecture = "ia64";
316     break;
317     case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
318     architecture = "ix86_on_win64";
319     break;
320     case PROCESSOR_ARCHITECTURE_AMD64:
321     architecture = "amd64";
322     break;
323     default:
324     architecture = "unknown";
325     break;
326     }
327    
328     char* cversion = new char[1024];
329     sprintf(cversion, "%u.%u", major, minor);
330     version = cversion;
331    
332     delete [] cversion;
333    
334     if (major == 4 && minor <= 3 && os != "Windows NT")
335     {
336     marketing = " [Windows 95]";
337     }
338     else if (major == 4 && minor == 10 && os != "Windows NT")
339     {
340     marketing = " [Windows 98]";
341     }
342     else if (major == 5 && minor == 0 && os == "Windows NT")
343     {
344     marketing = " [Windows 2000]";
345     }
346     else if (major == 4 && minor == 90 && os != "Windows NT")
347     {
348     marketing = " [Windows ME]";
349     }
350     else if (major == 5 && minor == 1 && os == "Windows NT")
351     {
352     marketing = " [Windows XP]";
353     }
354     else if (major == 5 && minor == 2 && os == "Windows NT")
355     {
356     marketing = " [Windows .NET Server]";
357     }
358     #else // _WIN32
359     struct utsname* computer = new struct utsname;
360     uname(computer);
361    
362     os = computer->sysname;
363     version = computer->release;
364     architecture = computer->machine;
365    
366     delete computer;
367     #endif // _WIN32
368    
369     platform = "(" + os + " " + version + marketing + " " + architecture + ")";
370    
371     return platform;
372     }
373    
374     void usage()
375     {
376     string tab(8 + program.length(), ' ');
377    
378     cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
379     << "dy]\n"
380     << tab << "[-f footer] [-n notfound] [-p pages]\n"
381     << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
382     << tab << "[-D] [-version] [-help]\n"
383     << "Options:\n"
384     << " index Index file to use (can only use one file for i"
385     << "ndexing)\n"
386     << " -P page Page of search to display (defaults to 1)\n"
387     << " -h header Header template to use (defaults to header.htm"
388     << "l)\n"
389     << " -b body Body template to use (defaults to body.html)\n"
390     << " -f footer Footer template to use (defaults to footer.htm"
391     << "l)\n"
392     << " -n notfound Not found template to use (defaults to notfoun"
393     << "d.html)\n"
394     << " -p pages Pages template to use (defaults to pages.html)"
395     << "\n"
396     << " -i begin URL to begin indexing (causes indexing rather "
397     << "than search)\n"
398     << " -d domain Domain to include in indexing\n"
399     << " -r restriction URL to restrict from indexing\n"
400     << " -D Display debug information\n"
401     << " -version Display version information and exit\n"
402     << " -license Display license information and exit\n"
403     << " -help Display this message and exit\n";
404     }
405    
406     void version()
407     {
408     cout << programName << " " << programVersion << " "<< platform() << "\n\n"
409 douglas 28 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
410     << "\n"
411 douglas 1 << " This product includes software developed by Douglas Thrift\n"
412     << " (http://computers.douglasthrift.net/searchengine/).\n";
413     }
414    
415     void license()
416     {
417     cout << "License:\n"
418     << " Douglas Thrift's Search Engine License\n\n"
419 douglas 28 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
420     << "\n"
421 douglas 1 << " Redistribution and use in source and binary forms, with or with"
422     << "out\n"
423     << " modification, are permitted provided that the following conditi"
424     << "ons are met:\n\n"
425     << " 1. Redistributions of source code must retain the above copyrig"
426     << "ht notice,\n"
427     << " this list of conditions and the following disclaimer.\n\n"
428     << " 2. Redistributions in binary form must reproduce the above copy"
429     << "right notice,\n"
430     << " this list of conditions and the following disclaimer in the "
431     << "documentation\n"
432     << " and/or other materials provided with the distribution.\n\n"
433     << " 3. The end-user documentation included with the redistribution,"
434     << " if any, must\n"
435     << " include the following acknowledgment:\n\n"
436     << " \"This product includes software developed by Douglas Thr"
437     << "ift\n"
438     << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
439     << " Alternately, this acknowledgment may appear in the software "
440     << "itself, if\n"
441     << " and wherever such third-party acknowledgments normally appea"
442     << "r.\n\n"
443     << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
444     << "Engine\" must not\n"
445     << " be used to endorse or promote products derived from this sof"
446     << "tware without\n"
447     << " specific prior written permission. For written permission, p"
448     << "lease visit\n"
449     << " http://www.douglasthrift.net/contact.cgi for contact inform"
450     << "ation.\n\n"
451     << " 5. Products derived from this software may not be called \"Doug"
452     << "las Thrift\'s\n"
453     << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
454     << " appear in their\n"
455     << " name, without prior written permission.\n\n"
456     << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
457     << "WARRANTIES,\n"
458     << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
459     << "NTABILITY AND\n"
460     << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
461     << "ALL THE\n"
462     << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
463     << "ECT,\n"
464     << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
465     << "DING, BUT NOT\n"
466     << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
467     << "F USE, DATA,\n"
468     << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
469     << " THEORY OF\n"
470     << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
471     << "UDING\n"
472     << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
473     << "HIS SOFTWARE,\n"
474     << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
475     }
476    
477     void entities(string& line, char character, char* entity)
478     {
479     int begin = 0;
480    
481     while (begin < line.length())
482     {
483     int spot = line.find(character, begin);
484    
485     int end = spot + 1;
486    
487     if (spot != string::npos)
488     {
489     line.replace(spot, 1, entity);
490     }
491     else
492     {
493     break;
494     }
495    
496     begin = end;
497     }
498     }
499    
500     void entities(string& line, char* entity, char character)
501     {
502     int begin = 0;
503    
504     while (begin < line.length())
505     {
506     int spot = line.find(entity, begin);
507    
508     int end = spot + 1;
509    
510     if (spot != string::npos)
511     {
512     line.replace(spot, strlen(entity), 1, character);
513     }
514     else
515     {
516     break;
517     }
518    
519     begin = end;
520     }
521     }
522    
523     void normalize(string& abbynormal)
524     {
525     for (unsigned index = 0; index < abbynormal.length(); index++)
526     {
527     if (isspace(abbynormal[index]))
528     {
529     unsigned next = index + 1;
530     while (isspace(abbynormal[next]))
531     {
532     next++;
533     }
534     abbynormal.replace(index, next - index, 1, abbynormal[index]);
535     }
536     }
537    
538     if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
539     if (isspace(abbynormal[abbynormal.length() - 1]))
540     abbynormal.erase(abbynormal.length() - 1, 1);
541     }