ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Search.cpp
Revision: 199
Committed: 2003-07-15T00:22:06-07:00 (21 years, 11 months ago) by douglas
File size: 14953 byte(s)
Log Message:
Did more OpenSSL stuff, like the version output.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine Main
46 //
47 // Douglas Thrift
48 //
49 // $Id: Search.cpp,v 1.17 2003/07/15 07:22:06 douglas Exp $
50
51 #include "Search.h"
52 #include "Indexer.h"
53 #include "Searcher.h"
54 #include "Outputer.h"
55
56 #ifndef _WIN32
57 #include <sys/utsname.h>
58 #else
59 #include <windows.h>
60 #endif
61
62 #ifdef _OpenSSL_
63 #include <openssl/crypto.h>
64
65 inline string openssl(bool agent = false)
66 {
67 ostringstream openssl;
68
69 openssl << "OpenSSL" << (agent ? "/" : " ");
70
71 long version = SSLeay();
72 long major = version / 0x10000000;
73 long minor = (version % 0x10000000) / 0x100000;
74 long fix = (version % 0x100000) / 0x1000;
75 long patch = (version % 0x1000);
76
77 openssl << major << "." << minor << "." << fix;
78
79 if (patch == 0x0)
80 {
81 openssl << "-dev";
82 }
83 else if (patch >= 0x1 && patch <= 0xe)
84 {
85 openssl << "-beta" << patch;
86 }
87 else if (patch == 0xf)
88 {
89 // release
90 }
91 else if (patch > 0xf)
92 {
93 openssl << char('a' - 1 + (patch / 0x10) % 26);
94 }
95
96 openssl << " " << SSLeay_version(SSLEAY_PLATFORM);
97
98 return openssl.str();
99 }
100 #endif
101
102 string program;
103 string programName = "Douglas Thrift's Search Engine";
104 string programVersion = "1.2alpha";
105 bool debug = false;
106
107 int main(int argc, char* argv[])
108 {
109 program = argv[0];
110
111 bool indexMode = false;
112 string indexURL;
113 set<string> indexDomains;
114 set<string> indexRestrictions;
115
116 unsigned page = 1;
117 string query;
118
119 vector<string> indices;
120
121 string header = "header.html";
122 string body = "body.html";
123 string footer = "footer.html";
124 string notfound = "notfound.html";
125 string pages = "pages.html";
126
127 string email;
128
129 for (int index = 1; index < argc; index++)
130 {
131 string arg(argv[index]);
132
133 if (arg == "-help")
134 {
135 usage();
136 return 0;
137 }
138 else if (arg == "-version")
139 {
140 version();
141 return 0;
142 }
143 else if (arg == "-license")
144 {
145 license();
146 return 0;
147 }
148 else if (arg == "-P")
149 {
150 if (++index < argc)
151 {
152 page = strtoul(argv[index],0,0);
153 }
154 else
155 {
156 cerr << program << ": Bad arguments\n";
157 usage();
158 return 1;
159 }
160 }
161 else if (arg == "-i")
162 {
163 indexMode = true;
164
165 if (++index < argc)
166 {
167 indexURL = argv[index];
168 }
169 else
170 {
171 cerr << program << ": Bad arguments\n";
172 usage();
173 return 1;
174 }
175 }
176 else if (arg == "-d")
177 {
178 if (++index < argc)
179 {
180 indexDomains.insert(argv[index]);
181 }
182 else
183 {
184 cerr << program << ": Bad arguments\n";
185 usage();
186 return 1;
187 }
188 }
189 else if (arg == "-r")
190 {
191 if (++index < argc)
192 {
193 indexRestrictions.insert(argv[index]);
194 }
195 else
196 {
197 cerr << program << ": Bad arguments\n";
198 usage();
199 return 1;
200 }
201 }
202 else if (arg == "-h")
203 {
204 if (++index < argc)
205 {
206 header = argv[index];
207 }
208 else
209 {
210 cerr << program << ": Bad arguments\n";
211 usage();
212 return 1;
213 }
214 }
215 else if (arg == "-b")
216 {
217 if (++index < argc)
218 {
219 body = argv[index];
220 }
221 else
222 {
223 cerr << program << ": Bad arguments\n";
224 usage();
225 return 1;
226 }
227 }
228 else if (arg == "-f")
229 {
230 if (++index < argc)
231 {
232 footer = argv[index];
233 }
234 else
235 {
236 cerr << program << ": Bad arguments\n";
237 usage();
238 return 1;
239 }
240 }
241 else if (arg == "-n")
242 {
243 if (++index < argc)
244 {
245 notfound = argv[index];
246 }
247 else
248 {
249 cerr << program << ": Bad arguments\n";
250 usage();
251 return 1;
252 }
253 }
254 else if (arg == "-p")
255 {
256 if (++index < argc)
257 {
258 pages = argv[index];
259 }
260 else
261 {
262 cerr << program << ": Bad arguments\n";
263 usage();
264 return 1;
265 }
266 }
267 else if (arg == "-D")
268 {
269 debug = true;
270 cerr.setf(ios_base::boolalpha);
271 }
272 else
273 {
274 indices.push_back(arg);
275 }
276 }
277
278 if (indices.size() < 1)
279 {
280 usage();
281 return 0;
282 }
283
284 if (indexMode)
285 {
286 if (indices.size() > 1)
287 {
288 cerr << program << ": Too many indices, can only build one index"
289 << " at a time\n";
290 usage();
291 return 1;
292 }
293
294 if (indexDomains.size() < 1)
295 {
296 cerr << program << ": Must specify at least one domain\n";
297 usage();
298 return 1;
299 }
300
301 Indexer indexer(indices[0], indexDomains, indexRestrictions);
302
303 indexer.index(indexURL);
304 }
305 else
306 {
307 string line;
308 getline(cin, line);
309 query = line;
310
311 Searcher searcher(query);
312
313 searcher.search(indices);
314
315 Outputer outputer(header, body, footer, notfound,
316 pages);
317
318 outputer.output(searcher, page < 1 ? page : --page);
319 }
320
321 return 0;
322 }
323
324 string agent(bool version)
325 {
326 string agent = programName + (version ? ('/' + programVersion) : "");
327
328 return agent;
329 }
330
331 string platform()
332 {
333 string platform;
334 string os;
335 string version;
336 string architecture;
337 string marketing;
338
339 #ifdef _WIN32
340 OSVERSIONINFO* computer = new OSVERSIONINFO;
341 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
342 GetVersionEx(computer);
343
344 os = computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
345 "Windows";
346 unsigned major = computer->dwMajorVersion;
347 unsigned minor = computer->dwMinorVersion;
348
349 delete computer;
350
351 SYSTEM_INFO* system = new SYSTEM_INFO;
352 GetSystemInfo(system);
353
354 switch (system->wProcessorArchitecture)
355 {
356 case PROCESSOR_ARCHITECTURE_INTEL:
357 architecture = "ix86";
358 break;
359 case PROCESSOR_ARCHITECTURE_MIPS:
360 architecture = "mips";
361 break;
362 case PROCESSOR_ARCHITECTURE_ALPHA:
363 architecture = "alpha";
364 break;
365 case PROCESSOR_ARCHITECTURE_PPC:
366 architecture = "ppc";
367 break;
368 case PROCESSOR_ARCHITECTURE_IA64:
369 architecture = "ia64";
370 break;
371 case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
372 architecture = "ix86_on_win64";
373 break;
374 case PROCESSOR_ARCHITECTURE_AMD64:
375 architecture = "amd64";
376 break;
377 default:
378 architecture = "unknown";
379 break;
380 }
381
382 char* cversion = new char[1024];
383 sprintf(cversion, "%u.%u", major, minor);
384 version = cversion;
385
386 delete [] cversion;
387
388 if (major == 4 && minor <= 3 && os != "Windows NT")
389 {
390 marketing = " [Windows 95]";
391 }
392 else if (major == 4 && minor == 10 && os != "Windows NT")
393 {
394 marketing = " [Windows 98]";
395 }
396 else if (major == 5 && minor == 0 && os == "Windows NT")
397 {
398 marketing = " [Windows 2000]";
399 }
400 else if (major == 4 && minor == 90 && os != "Windows NT")
401 {
402 marketing = " [Windows ME]";
403 }
404 else if (major == 5 && minor == 1 && os == "Windows NT")
405 {
406 marketing = " [Windows XP]";
407 }
408 else if (major == 5 && minor == 2 && os == "Windows NT")
409 {
410 marketing = " [Windows .NET Server]";
411 }
412 #else // _WIN32
413 struct utsname* computer = new struct utsname;
414 uname(computer);
415
416 os = computer->sysname;
417 version = computer->release;
418 architecture = computer->machine;
419
420 delete computer;
421 #endif // _WIN32
422
423 platform = "(" + os + " " + version + marketing + " " + architecture + ")";
424
425 return platform;
426 }
427
428 void usage()
429 {
430 #ifdef _WIN32
431 OSVERSIONINFO* computer = new OSVERSIONINFO;
432 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
433 GetVersionEx(computer);
434
435 string program = ::program;
436 if (computer->dwPlatformId != VER_PLATFORM_WIN32_NT)
437 {
438 program = "Search";
439 }
440
441 delete computer;
442 #endif // _WIN32
443
444 string tab(8 + program.length(), ' ');
445
446 cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
447 << "dy]\n"
448 << tab << "[-f footer] [-n notfound] [-p pages]\n"
449 << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
450 << tab << "[-D] [-version] [-help]\n"
451 << "Options:\n"
452 << " index Index file to use (can only use one file for i"
453 << "ndexing)\n"
454 << " -P page Page of search to display (defaults to 1)\n"
455 << " -h header Header template to use (defaults to header.htm"
456 << "l)\n"
457 << " -b body Body template to use (defaults to body.html)\n"
458 << " -f footer Footer template to use (defaults to footer.htm"
459 << "l)\n"
460 << " -n notfound Not found template to use (defaults to notfoun"
461 << "d.html)\n"
462 << " -p pages Pages template to use (defaults to pages.html)"
463 << "\n"
464 << " -i begin URL to begin indexing (causes indexing rather "
465 << "than search)\n"
466 << " -d domain Domain to include in indexing\n"
467 << " -r restriction URL to restrict from indexing\n"
468 << " -D Display debug information\n"
469 << " -version Display version information and exit\n"
470 << " -license Display license information and exit\n"
471 << " -help Display this message and exit\n";
472 }
473
474 void version()
475 {
476 cout << programName << " " << programVersion << " "<< platform() << "\n\n"
477 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
478 << "\n"
479 << " This product includes software developed by Douglas Thrift\n"
480 << " (http://computers.douglasthrift.net/searchengine/).\n";
481 #ifdef _OpenSSL_
482 cout << "\n" << openssl() << "\n";
483 #endif
484 }
485
486 void license()
487 {
488 cout << "License:\n"
489 << " Douglas Thrift's Search Engine License\n\n"
490 << " Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.\n"
491 << "\n"
492 << " Redistribution and use in source and binary forms, with or with"
493 << "out\n"
494 << " modification, are permitted provided that the following conditi"
495 << "ons are met:\n\n"
496 << " 1. Redistributions of source code must retain the above copyrig"
497 << "ht notice,\n"
498 << " this list of conditions and the following disclaimer.\n\n"
499 << " 2. Redistributions in binary form must reproduce the above copy"
500 << "right notice,\n"
501 << " this list of conditions and the following disclaimer in the "
502 << "documentation\n"
503 << " and/or other materials provided with the distribution.\n\n"
504 << " 3. The end-user documentation included with the redistribution,"
505 << " if any, must\n"
506 << " include the following acknowledgment:\n\n"
507 << " \"This product includes software developed by Douglas Thr"
508 << "ift\n"
509 << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
510 << " Alternately, this acknowledgment may appear in the software "
511 << "itself, if\n"
512 << " and wherever such third-party acknowledgments normally appea"
513 << "r.\n\n"
514 << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
515 << "Engine\" must not\n"
516 << " be used to endorse or promote products derived from this sof"
517 << "tware without\n"
518 << " specific prior written permission. For written permission, p"
519 << "lease visit\n"
520 << " http://www.douglasthrift.net/contact.cgi for contact inform"
521 << "ation.\n\n"
522 << " 5. Products derived from this software may not be called \"Doug"
523 << "las Thrift\'s\n"
524 << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
525 << " appear in their\n"
526 << " name, without prior written permission.\n\n"
527 << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
528 << "WARRANTIES,\n"
529 << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
530 << "NTABILITY AND\n"
531 << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
532 << "ALL THE\n"
533 << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
534 << "ECT,\n"
535 << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
536 << "DING, BUT NOT\n"
537 << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
538 << "F USE, DATA,\n"
539 << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
540 << " THEORY OF\n"
541 << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
542 << "UDING\n"
543 << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
544 << "HIS SOFTWARE,\n"
545 << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
546 }
547
548 void entities(string& line, char character, char* entity)
549 {
550 int begin = 0;
551
552 while (begin < line.length())
553 {
554 int spot = line.find(character, begin);
555
556 int end = spot + 1;
557
558 if (spot != string::npos)
559 {
560 line.replace(spot, 1, entity);
561 }
562 else
563 {
564 break;
565 }
566
567 begin = end;
568 }
569 }
570
571 void entities(string& line, char* entity, char character)
572 {
573 int begin = 0;
574
575 while (begin < line.length())
576 {
577 int spot = line.find(entity, begin);
578
579 int end = spot + 1;
580
581 if (spot != string::npos)
582 {
583 line.replace(spot, strlen(entity), 1, character);
584 }
585 else
586 {
587 break;
588 }
589
590 begin = end;
591 }
592 }
593
594 void normalize(string& abbynormal)
595 {
596 for (unsigned index = 0; index < abbynormal.length(); index++)
597 {
598 if (isspace(abbynormal[index]))
599 {
600 unsigned next = index + 1;
601 while (isspace(abbynormal[next]))
602 {
603 next++;
604 }
605 abbynormal.replace(index, next - index, 1, abbynormal[index]);
606 }
607 }
608
609 if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
610 if (isspace(abbynormal[abbynormal.length() - 1]))
611 abbynormal.erase(abbynormal.length() - 1, 1);
612 }