ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Search.cpp
Revision: 234
Committed: 2003-08-07T20:38:49-07:00 (21 years, 10 months ago) by douglas
File size: 14327 byte(s)
Log Message:
Changed version to 1.2beta.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Main
46     //
47     // Douglas Thrift
48     //
49 douglas 234 // $Id: Search.cpp,v 1.23 2003/08/08 03:38:49 douglas Exp $
50 douglas 1
51     #include "Search.h"
52     #include "Indexer.h"
53     #include "Searcher.h"
54     #include "Outputer.h"
55    
56     #ifndef _WIN32
57     #include <sys/utsname.h>
58     #else
59     #include <windows.h>
60 douglas 199 #endif
61 douglas 1
62     string program;
63     string programName = "Douglas Thrift's Search Engine";
64 douglas 234 string programVersion = "1.2beta";
65 douglas 1 bool debug = false;
66    
67     int main(int argc, char* argv[])
68     {
69     program = argv[0];
70    
71     bool indexMode = false;
72     string indexURL;
73     set<string> indexDomains;
74     set<string> indexRestrictions;
75    
76     unsigned page = 1;
77     string query;
78    
79     vector<string> indices;
80    
81     string header = "header.html";
82     string body = "body.html";
83     string footer = "footer.html";
84     string notfound = "notfound.html";
85     string pages = "pages.html";
86    
87 douglas 37 string email;
88    
89 douglas 1 for (int index = 1; index < argc; index++)
90     {
91     string arg(argv[index]);
92    
93     if (arg == "-help")
94     {
95     usage();
96     return 0;
97     }
98     else if (arg == "-version")
99     {
100     version();
101     return 0;
102     }
103     else if (arg == "-license")
104     {
105     license();
106     return 0;
107     }
108     else if (arg == "-P")
109     {
110     if (++index < argc)
111     {
112 douglas 212 istringstream number(argv[index]);
113    
114     number >> page;
115 douglas 1 }
116     else
117     {
118     cerr << program << ": Bad arguments\n";
119 douglas 35 usage();
120 douglas 1 return 1;
121     }
122     }
123     else if (arg == "-i")
124     {
125     indexMode = true;
126    
127     if (++index < argc)
128     {
129     indexURL = argv[index];
130     }
131     else
132     {
133     cerr << program << ": Bad arguments\n";
134 douglas 35 usage();
135 douglas 1 return 1;
136     }
137     }
138     else if (arg == "-d")
139     {
140     if (++index < argc)
141     {
142     indexDomains.insert(argv[index]);
143     }
144     else
145     {
146     cerr << program << ": Bad arguments\n";
147 douglas 35 usage();
148 douglas 1 return 1;
149     }
150     }
151     else if (arg == "-r")
152     {
153     if (++index < argc)
154     {
155     indexRestrictions.insert(argv[index]);
156     }
157     else
158     {
159     cerr << program << ": Bad arguments\n";
160 douglas 35 usage();
161 douglas 1 return 1;
162     }
163     }
164     else if (arg == "-h")
165     {
166     if (++index < argc)
167     {
168     header = argv[index];
169     }
170     else
171     {
172     cerr << program << ": Bad arguments\n";
173 douglas 35 usage();
174 douglas 1 return 1;
175     }
176     }
177     else if (arg == "-b")
178     {
179     if (++index < argc)
180     {
181     body = argv[index];
182     }
183     else
184     {
185     cerr << program << ": Bad arguments\n";
186 douglas 35 usage();
187 douglas 1 return 1;
188     }
189     }
190     else if (arg == "-f")
191     {
192     if (++index < argc)
193     {
194     footer = argv[index];
195     }
196     else
197     {
198     cerr << program << ": Bad arguments\n";
199 douglas 35 usage();
200 douglas 1 return 1;
201     }
202     }
203     else if (arg == "-n")
204     {
205     if (++index < argc)
206     {
207     notfound = argv[index];
208     }
209     else
210     {
211     cerr << program << ": Bad arguments\n";
212 douglas 35 usage();
213 douglas 1 return 1;
214     }
215     }
216     else if (arg == "-p")
217     {
218     if (++index < argc)
219     {
220     pages = argv[index];
221     }
222     else
223     {
224     cerr << program << ": Bad arguments\n";
225 douglas 35 usage();
226 douglas 1 return 1;
227     }
228     }
229     else if (arg == "-D")
230     {
231     debug = true;
232 douglas 183 cerr.setf(ios_base::boolalpha);
233 douglas 1 }
234     else
235     {
236     indices.push_back(arg);
237     }
238     }
239    
240     if (indices.size() < 1)
241     {
242     usage();
243     return 0;
244     }
245    
246     if (indexMode)
247     {
248     if (indices.size() > 1)
249     {
250     cerr << program << ": Too many indices, can only build one index"
251     << " at a time\n";
252 douglas 37 usage();
253 douglas 1 return 1;
254     }
255    
256     if (indexDomains.size() < 1)
257     {
258     cerr << program << ": Must specify at least one domain\n";
259 douglas 37 usage();
260 douglas 1 return 1;
261     }
262    
263     Indexer indexer(indices[0], indexDomains, indexRestrictions);
264    
265     indexer.index(indexURL);
266     }
267     else
268     {
269     string line;
270     getline(cin, line);
271     query = line;
272    
273     Searcher searcher(query);
274    
275     searcher.search(indices);
276    
277     Outputer outputer(header, body, footer, notfound,
278     pages);
279    
280     outputer.output(searcher, page < 1 ? page : --page);
281     }
282    
283     return 0;
284     }
285    
286 douglas 12 string agent(bool version)
287     {
288     string agent = programName + (version ? ('/' + programVersion) : "");
289    
290     return agent;
291     }
292    
293 douglas 1 string platform()
294     {
295     string platform;
296     string os;
297     string version;
298     string architecture;
299     string marketing;
300    
301     #ifdef _WIN32
302     OSVERSIONINFO* computer = new OSVERSIONINFO;
303     computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
304     GetVersionEx(computer);
305    
306     os = computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
307     "Windows";
308     unsigned major = computer->dwMajorVersion;
309     unsigned minor = computer->dwMinorVersion;
310    
311     delete computer;
312    
313     SYSTEM_INFO* system = new SYSTEM_INFO;
314     GetSystemInfo(system);
315    
316     switch (system->wProcessorArchitecture)
317     {
318     case PROCESSOR_ARCHITECTURE_INTEL:
319     architecture = "ix86";
320     break;
321     case PROCESSOR_ARCHITECTURE_MIPS:
322     architecture = "mips";
323     break;
324     case PROCESSOR_ARCHITECTURE_ALPHA:
325     architecture = "alpha";
326     break;
327     case PROCESSOR_ARCHITECTURE_PPC:
328     architecture = "ppc";
329     break;
330     case PROCESSOR_ARCHITECTURE_IA64:
331     architecture = "ia64";
332     break;
333     case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
334     architecture = "ix86_on_win64";
335     break;
336     case PROCESSOR_ARCHITECTURE_AMD64:
337     architecture = "amd64";
338     break;
339     default:
340     architecture = "unknown";
341     break;
342     }
343    
344     char* cversion = new char[1024];
345     sprintf(cversion, "%u.%u", major, minor);
346     version = cversion;
347    
348     delete [] cversion;
349    
350     if (major == 4 && minor <= 3 && os != "Windows NT")
351     {
352     marketing = " [Windows 95]";
353     }
354     else if (major == 4 && minor == 10 && os != "Windows NT")
355     {
356     marketing = " [Windows 98]";
357     }
358     else if (major == 5 && minor == 0 && os == "Windows NT")
359     {
360     marketing = " [Windows 2000]";
361     }
362     else if (major == 4 && minor == 90 && os != "Windows NT")
363     {
364     marketing = " [Windows ME]";
365     }
366     else if (major == 5 && minor == 1 && os == "Windows NT")
367     {
368     marketing = " [Windows XP]";
369     }
370     else if (major == 5 && minor == 2 && os == "Windows NT")
371     {
372     marketing = " [Windows .NET Server]";
373     }
374     #else // _WIN32
375     struct utsname* computer = new struct utsname;
376     uname(computer);
377    
378     os = computer->sysname;
379     version = computer->release;
380     architecture = computer->machine;
381    
382     delete computer;
383     #endif // _WIN32
384    
385     platform = "(" + os + " " + version + marketing + " " + architecture + ")";
386    
387     return platform;
388     }
389    
390     void usage()
391     {
392 douglas 47 #ifdef _WIN32
393     OSVERSIONINFO* computer = new OSVERSIONINFO;
394     computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
395     GetVersionEx(computer);
396    
397     string program = ::program;
398     if (computer->dwPlatformId != VER_PLATFORM_WIN32_NT)
399     {
400     program = "Search";
401     }
402    
403     delete computer;
404     #endif // _WIN32
405    
406 douglas 1 string tab(8 + program.length(), ' ');
407    
408     cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
409     << "dy]\n"
410     << tab << "[-f footer] [-n notfound] [-p pages]\n"
411     << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
412     << tab << "[-D] [-version] [-help]\n"
413     << "Options:\n"
414     << " index Index file to use (can only use one file for i"
415     << "ndexing)\n"
416     << " -P page Page of search to display (defaults to 1)\n"
417     << " -h header Header template to use (defaults to header.htm"
418     << "l)\n"
419     << " -b body Body template to use (defaults to body.html)\n"
420     << " -f footer Footer template to use (defaults to footer.htm"
421     << "l)\n"
422     << " -n notfound Not found template to use (defaults to notfoun"
423     << "d.html)\n"
424     << " -p pages Pages template to use (defaults to pages.html)"
425     << "\n"
426     << " -i begin URL to begin indexing (causes indexing rather "
427     << "than search)\n"
428     << " -d domain Domain to include in indexing\n"
429     << " -r restriction URL to restrict from indexing\n"
430     << " -D Display debug information\n"
431     << " -version Display version information and exit\n"
432     << " -license Display license information and exit\n"
433     << " -help Display this message and exit\n";
434     }
435    
436     void version()
437     {
438     cout << programName << " " << programVersion << " "<< platform() << "\n\n"
439 douglas 28 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
440     << "\n"
441 douglas 1 << " This product includes software developed by Douglas Thrift\n"
442     << " (http://computers.douglasthrift.net/searchengine/).\n";
443 douglas 199 #ifdef _OpenSSL_
444 douglas 207 cout << "\n" << openssl() << " " << SSLeay_version(SSLEAY_BUILT_ON) << " "
445     << SSLeay_version(SSLEAY_PLATFORM) << "\n";
446 douglas 199 #endif
447 douglas 1 }
448    
449     void license()
450     {
451     cout << "License:\n"
452     << " Douglas Thrift's Search Engine License\n\n"
453 douglas 28 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
454     << "\n"
455 douglas 1 << " Redistribution and use in source and binary forms, with or with"
456     << "out\n"
457     << " modification, are permitted provided that the following conditi"
458     << "ons are met:\n\n"
459     << " 1. Redistributions of source code must retain the above copyrig"
460     << "ht notice,\n"
461     << " this list of conditions and the following disclaimer.\n\n"
462     << " 2. Redistributions in binary form must reproduce the above copy"
463     << "right notice,\n"
464     << " this list of conditions and the following disclaimer in the "
465     << "documentation\n"
466     << " and/or other materials provided with the distribution.\n\n"
467     << " 3. The end-user documentation included with the redistribution,"
468     << " if any, must\n"
469     << " include the following acknowledgment:\n\n"
470     << " \"This product includes software developed by Douglas Thr"
471     << "ift\n"
472     << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
473     << " Alternately, this acknowledgment may appear in the software "
474     << "itself, if\n"
475     << " and wherever such third-party acknowledgments normally appea"
476     << "r.\n\n"
477     << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
478     << "Engine\" must not\n"
479     << " be used to endorse or promote products derived from this sof"
480     << "tware without\n"
481     << " specific prior written permission. For written permission, p"
482     << "lease visit\n"
483     << " http://www.douglasthrift.net/contact.cgi for contact inform"
484     << "ation.\n\n"
485     << " 5. Products derived from this software may not be called \"Doug"
486     << "las Thrift\'s\n"
487     << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
488     << " appear in their\n"
489     << " name, without prior written permission.\n\n"
490     << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
491     << "WARRANTIES,\n"
492     << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
493     << "NTABILITY AND\n"
494     << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
495     << "ALL THE\n"
496     << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
497     << "ECT,\n"
498     << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
499     << "DING, BUT NOT\n"
500     << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
501     << "F USE, DATA,\n"
502     << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
503     << " THEORY OF\n"
504     << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
505     << "UDING\n"
506     << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
507     << "HIS SOFTWARE,\n"
508     << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
509     }
510    
511     void entities(string& line, char character, char* entity)
512     {
513     int begin = 0;
514    
515     while (begin < line.length())
516     {
517     int spot = line.find(character, begin);
518    
519     int end = spot + 1;
520    
521     if (spot != string::npos)
522     {
523     line.replace(spot, 1, entity);
524     }
525     else
526     {
527     break;
528     }
529    
530     begin = end;
531     }
532     }
533    
534     void entities(string& line, char* entity, char character)
535     {
536     int begin = 0;
537    
538     while (begin < line.length())
539     {
540     int spot = line.find(entity, begin);
541    
542     int end = spot + 1;
543    
544     if (spot != string::npos)
545     {
546     line.replace(spot, strlen(entity), 1, character);
547     }
548     else
549     {
550     break;
551     }
552    
553     begin = end;
554     }
555     }
556    
557     void normalize(string& abbynormal)
558     {
559     for (unsigned index = 0; index < abbynormal.length(); index++)
560     {
561     if (isspace(abbynormal[index]))
562     {
563     unsigned next = index + 1;
564     while (isspace(abbynormal[next]))
565     {
566     next++;
567     }
568     abbynormal.replace(index, next - index, 1, abbynormal[index]);
569     }
570     }
571    
572     if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
573     if (isspace(abbynormal[abbynormal.length() - 1]))
574     abbynormal.erase(abbynormal.length() - 1, 1);
575     }