ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Search.cpp
Revision: 234
Committed: 2003-08-07T20:38:49-07:00 (21 years, 10 months ago) by douglas
File size: 14327 byte(s)
Log Message:
Changed version to 1.2beta.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine Main
46 //
47 // Douglas Thrift
48 //
49 // $Id: Search.cpp,v 1.23 2003/08/08 03:38:49 douglas Exp $
50
51 #include "Search.h"
52 #include "Indexer.h"
53 #include "Searcher.h"
54 #include "Outputer.h"
55
56 #ifndef _WIN32
57 #include <sys/utsname.h>
58 #else
59 #include <windows.h>
60 #endif
61
62 string program;
63 string programName = "Douglas Thrift's Search Engine";
64 string programVersion = "1.2beta";
65 bool debug = false;
66
67 int main(int argc, char* argv[])
68 {
69 program = argv[0];
70
71 bool indexMode = false;
72 string indexURL;
73 set<string> indexDomains;
74 set<string> indexRestrictions;
75
76 unsigned page = 1;
77 string query;
78
79 vector<string> indices;
80
81 string header = "header.html";
82 string body = "body.html";
83 string footer = "footer.html";
84 string notfound = "notfound.html";
85 string pages = "pages.html";
86
87 string email;
88
89 for (int index = 1; index < argc; index++)
90 {
91 string arg(argv[index]);
92
93 if (arg == "-help")
94 {
95 usage();
96 return 0;
97 }
98 else if (arg == "-version")
99 {
100 version();
101 return 0;
102 }
103 else if (arg == "-license")
104 {
105 license();
106 return 0;
107 }
108 else if (arg == "-P")
109 {
110 if (++index < argc)
111 {
112 istringstream number(argv[index]);
113
114 number >> page;
115 }
116 else
117 {
118 cerr << program << ": Bad arguments\n";
119 usage();
120 return 1;
121 }
122 }
123 else if (arg == "-i")
124 {
125 indexMode = true;
126
127 if (++index < argc)
128 {
129 indexURL = argv[index];
130 }
131 else
132 {
133 cerr << program << ": Bad arguments\n";
134 usage();
135 return 1;
136 }
137 }
138 else if (arg == "-d")
139 {
140 if (++index < argc)
141 {
142 indexDomains.insert(argv[index]);
143 }
144 else
145 {
146 cerr << program << ": Bad arguments\n";
147 usage();
148 return 1;
149 }
150 }
151 else if (arg == "-r")
152 {
153 if (++index < argc)
154 {
155 indexRestrictions.insert(argv[index]);
156 }
157 else
158 {
159 cerr << program << ": Bad arguments\n";
160 usage();
161 return 1;
162 }
163 }
164 else if (arg == "-h")
165 {
166 if (++index < argc)
167 {
168 header = argv[index];
169 }
170 else
171 {
172 cerr << program << ": Bad arguments\n";
173 usage();
174 return 1;
175 }
176 }
177 else if (arg == "-b")
178 {
179 if (++index < argc)
180 {
181 body = argv[index];
182 }
183 else
184 {
185 cerr << program << ": Bad arguments\n";
186 usage();
187 return 1;
188 }
189 }
190 else if (arg == "-f")
191 {
192 if (++index < argc)
193 {
194 footer = argv[index];
195 }
196 else
197 {
198 cerr << program << ": Bad arguments\n";
199 usage();
200 return 1;
201 }
202 }
203 else if (arg == "-n")
204 {
205 if (++index < argc)
206 {
207 notfound = argv[index];
208 }
209 else
210 {
211 cerr << program << ": Bad arguments\n";
212 usage();
213 return 1;
214 }
215 }
216 else if (arg == "-p")
217 {
218 if (++index < argc)
219 {
220 pages = argv[index];
221 }
222 else
223 {
224 cerr << program << ": Bad arguments\n";
225 usage();
226 return 1;
227 }
228 }
229 else if (arg == "-D")
230 {
231 debug = true;
232 cerr.setf(ios_base::boolalpha);
233 }
234 else
235 {
236 indices.push_back(arg);
237 }
238 }
239
240 if (indices.size() < 1)
241 {
242 usage();
243 return 0;
244 }
245
246 if (indexMode)
247 {
248 if (indices.size() > 1)
249 {
250 cerr << program << ": Too many indices, can only build one index"
251 << " at a time\n";
252 usage();
253 return 1;
254 }
255
256 if (indexDomains.size() < 1)
257 {
258 cerr << program << ": Must specify at least one domain\n";
259 usage();
260 return 1;
261 }
262
263 Indexer indexer(indices[0], indexDomains, indexRestrictions);
264
265 indexer.index(indexURL);
266 }
267 else
268 {
269 string line;
270 getline(cin, line);
271 query = line;
272
273 Searcher searcher(query);
274
275 searcher.search(indices);
276
277 Outputer outputer(header, body, footer, notfound,
278 pages);
279
280 outputer.output(searcher, page < 1 ? page : --page);
281 }
282
283 return 0;
284 }
285
286 string agent(bool version)
287 {
288 string agent = programName + (version ? ('/' + programVersion) : "");
289
290 return agent;
291 }
292
293 string platform()
294 {
295 string platform;
296 string os;
297 string version;
298 string architecture;
299 string marketing;
300
301 #ifdef _WIN32
302 OSVERSIONINFO* computer = new OSVERSIONINFO;
303 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
304 GetVersionEx(computer);
305
306 os = computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
307 "Windows";
308 unsigned major = computer->dwMajorVersion;
309 unsigned minor = computer->dwMinorVersion;
310
311 delete computer;
312
313 SYSTEM_INFO* system = new SYSTEM_INFO;
314 GetSystemInfo(system);
315
316 switch (system->wProcessorArchitecture)
317 {
318 case PROCESSOR_ARCHITECTURE_INTEL:
319 architecture = "ix86";
320 break;
321 case PROCESSOR_ARCHITECTURE_MIPS:
322 architecture = "mips";
323 break;
324 case PROCESSOR_ARCHITECTURE_ALPHA:
325 architecture = "alpha";
326 break;
327 case PROCESSOR_ARCHITECTURE_PPC:
328 architecture = "ppc";
329 break;
330 case PROCESSOR_ARCHITECTURE_IA64:
331 architecture = "ia64";
332 break;
333 case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
334 architecture = "ix86_on_win64";
335 break;
336 case PROCESSOR_ARCHITECTURE_AMD64:
337 architecture = "amd64";
338 break;
339 default:
340 architecture = "unknown";
341 break;
342 }
343
344 char* cversion = new char[1024];
345 sprintf(cversion, "%u.%u", major, minor);
346 version = cversion;
347
348 delete [] cversion;
349
350 if (major == 4 && minor <= 3 && os != "Windows NT")
351 {
352 marketing = " [Windows 95]";
353 }
354 else if (major == 4 && minor == 10 && os != "Windows NT")
355 {
356 marketing = " [Windows 98]";
357 }
358 else if (major == 5 && minor == 0 && os == "Windows NT")
359 {
360 marketing = " [Windows 2000]";
361 }
362 else if (major == 4 && minor == 90 && os != "Windows NT")
363 {
364 marketing = " [Windows ME]";
365 }
366 else if (major == 5 && minor == 1 && os == "Windows NT")
367 {
368 marketing = " [Windows XP]";
369 }
370 else if (major == 5 && minor == 2 && os == "Windows NT")
371 {
372 marketing = " [Windows .NET Server]";
373 }
374 #else // _WIN32
375 struct utsname* computer = new struct utsname;
376 uname(computer);
377
378 os = computer->sysname;
379 version = computer->release;
380 architecture = computer->machine;
381
382 delete computer;
383 #endif // _WIN32
384
385 platform = "(" + os + " " + version + marketing + " " + architecture + ")";
386
387 return platform;
388 }
389
390 void usage()
391 {
392 #ifdef _WIN32
393 OSVERSIONINFO* computer = new OSVERSIONINFO;
394 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
395 GetVersionEx(computer);
396
397 string program = ::program;
398 if (computer->dwPlatformId != VER_PLATFORM_WIN32_NT)
399 {
400 program = "Search";
401 }
402
403 delete computer;
404 #endif // _WIN32
405
406 string tab(8 + program.length(), ' ');
407
408 cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
409 << "dy]\n"
410 << tab << "[-f footer] [-n notfound] [-p pages]\n"
411 << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
412 << tab << "[-D] [-version] [-help]\n"
413 << "Options:\n"
414 << " index Index file to use (can only use one file for i"
415 << "ndexing)\n"
416 << " -P page Page of search to display (defaults to 1)\n"
417 << " -h header Header template to use (defaults to header.htm"
418 << "l)\n"
419 << " -b body Body template to use (defaults to body.html)\n"
420 << " -f footer Footer template to use (defaults to footer.htm"
421 << "l)\n"
422 << " -n notfound Not found template to use (defaults to notfoun"
423 << "d.html)\n"
424 << " -p pages Pages template to use (defaults to pages.html)"
425 << "\n"
426 << " -i begin URL to begin indexing (causes indexing rather "
427 << "than search)\n"
428 << " -d domain Domain to include in indexing\n"
429 << " -r restriction URL to restrict from indexing\n"
430 << " -D Display debug information\n"
431 << " -version Display version information and exit\n"
432 << " -license Display license information and exit\n"
433 << " -help Display this message and exit\n";
434 }
435
436 void version()
437 {
438 cout << programName << " " << programVersion << " "<< platform() << "\n\n"
439 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
440 << "\n"
441 << " This product includes software developed by Douglas Thrift\n"
442 << " (http://computers.douglasthrift.net/searchengine/).\n";
443 #ifdef _OpenSSL_
444 cout << "\n" << openssl() << " " << SSLeay_version(SSLEAY_BUILT_ON) << " "
445 << SSLeay_version(SSLEAY_PLATFORM) << "\n";
446 #endif
447 }
448
449 void license()
450 {
451 cout << "License:\n"
452 << " Douglas Thrift's Search Engine License\n\n"
453 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
454 << "\n"
455 << " Redistribution and use in source and binary forms, with or with"
456 << "out\n"
457 << " modification, are permitted provided that the following conditi"
458 << "ons are met:\n\n"
459 << " 1. Redistributions of source code must retain the above copyrig"
460 << "ht notice,\n"
461 << " this list of conditions and the following disclaimer.\n\n"
462 << " 2. Redistributions in binary form must reproduce the above copy"
463 << "right notice,\n"
464 << " this list of conditions and the following disclaimer in the "
465 << "documentation\n"
466 << " and/or other materials provided with the distribution.\n\n"
467 << " 3. The end-user documentation included with the redistribution,"
468 << " if any, must\n"
469 << " include the following acknowledgment:\n\n"
470 << " \"This product includes software developed by Douglas Thr"
471 << "ift\n"
472 << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
473 << " Alternately, this acknowledgment may appear in the software "
474 << "itself, if\n"
475 << " and wherever such third-party acknowledgments normally appea"
476 << "r.\n\n"
477 << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
478 << "Engine\" must not\n"
479 << " be used to endorse or promote products derived from this sof"
480 << "tware without\n"
481 << " specific prior written permission. For written permission, p"
482 << "lease visit\n"
483 << " http://www.douglasthrift.net/contact.cgi for contact inform"
484 << "ation.\n\n"
485 << " 5. Products derived from this software may not be called \"Doug"
486 << "las Thrift\'s\n"
487 << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
488 << " appear in their\n"
489 << " name, without prior written permission.\n\n"
490 << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
491 << "WARRANTIES,\n"
492 << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
493 << "NTABILITY AND\n"
494 << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
495 << "ALL THE\n"
496 << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
497 << "ECT,\n"
498 << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
499 << "DING, BUT NOT\n"
500 << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
501 << "F USE, DATA,\n"
502 << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
503 << " THEORY OF\n"
504 << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
505 << "UDING\n"
506 << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
507 << "HIS SOFTWARE,\n"
508 << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
509 }
510
511 void entities(string& line, char character, char* entity)
512 {
513 int begin = 0;
514
515 while (begin < line.length())
516 {
517 int spot = line.find(character, begin);
518
519 int end = spot + 1;
520
521 if (spot != string::npos)
522 {
523 line.replace(spot, 1, entity);
524 }
525 else
526 {
527 break;
528 }
529
530 begin = end;
531 }
532 }
533
534 void entities(string& line, char* entity, char character)
535 {
536 int begin = 0;
537
538 while (begin < line.length())
539 {
540 int spot = line.find(entity, begin);
541
542 int end = spot + 1;
543
544 if (spot != string::npos)
545 {
546 line.replace(spot, strlen(entity), 1, character);
547 }
548 else
549 {
550 break;
551 }
552
553 begin = end;
554 }
555 }
556
557 void normalize(string& abbynormal)
558 {
559 for (unsigned index = 0; index < abbynormal.length(); index++)
560 {
561 if (isspace(abbynormal[index]))
562 {
563 unsigned next = index + 1;
564 while (isspace(abbynormal[next]))
565 {
566 next++;
567 }
568 abbynormal.replace(index, next - index, 1, abbynormal[index]);
569 }
570 }
571
572 if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
573 if (isspace(abbynormal[abbynormal.length() - 1]))
574 abbynormal.erase(abbynormal.length() - 1, 1);
575 }