ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/Search/trunk/Search.cpp
Revision: 369
Committed: 2008-08-23T03:06:21-07:00 (16 years, 9 months ago) by douglas
File size: 14184 byte(s)
Log Message:
Update Windows versions.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine Main
46 //
47 // Douglas Thrift
48 //
49 // $Id$
50
51 #include "Search.hpp"
52 #include "Indexer.hpp"
53 #include "Searcher.hpp"
54 #include "Outputter.hpp"
55
56 #ifndef _WIN32
57 #include <sys/utsname.h>
58 #else
59 #include <windows.h>
60
61 // Stupid MinGW
62 #ifndef PROCESSOR_ARCHITECTURE_AMD64
63 #define PROCESSOR_ARCHITECTURE_AMD64 9
64 #endif
65 #ifndef PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
66 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10
67 #endif
68
69 #endif
70
71 string program, programName("Douglas Thrift's Search Engine"),
72 programVersion("1.3beta");
73 bool debug(false);
74
75 int main(int argc, char* argv[])
76 {
77 program = argv[0];
78
79 bool indexMode(false);
80 set<string> indexDomains, indexRestrictions;
81 size_t page(1);
82 vector<string> indices;
83 string indexURL, query, header("header.html"), body("body.html"),
84 footer("footer.html"), notfound("notfound.html"), pages("pages.html");
85
86 for (int index(1); index < argc; index++)
87 {
88 string arg(argv[index]);
89
90 if (arg == "-help")
91 {
92 usage();
93 return 0;
94 }
95 else if (arg == "-version")
96 {
97 version();
98 return 0;
99 }
100 else if (arg == "-license")
101 {
102 license();
103 return 0;
104 }
105 else if (arg == "-P")
106 {
107 if (++index < argc)
108 {
109 istringstream number(argv[index]);
110
111 number >> page;
112 }
113 else
114 {
115 cerr << program << ": Bad arguments\n";
116 usage();
117 return 1;
118 }
119 }
120 else if (arg == "-i")
121 {
122 indexMode = true;
123
124 if (++index < argc)
125 {
126 indexURL = argv[index];
127 }
128 else
129 {
130 cerr << program << ": Bad arguments\n";
131 usage();
132 return 1;
133 }
134 }
135 else if (arg == "-d")
136 {
137 if (++index < argc)
138 {
139 indexDomains.insert(argv[index]);
140 }
141 else
142 {
143 cerr << program << ": Bad arguments\n";
144 usage();
145 return 1;
146 }
147 }
148 else if (arg == "-r")
149 {
150 if (++index < argc)
151 {
152 indexRestrictions.insert(argv[index]);
153 }
154 else
155 {
156 cerr << program << ": Bad arguments\n";
157 usage();
158 return 1;
159 }
160 }
161 else if (arg == "-h")
162 {
163 if (++index < argc)
164 {
165 header = argv[index];
166 }
167 else
168 {
169 cerr << program << ": Bad arguments\n";
170 usage();
171 return 1;
172 }
173 }
174 else if (arg == "-b")
175 {
176 if (++index < argc)
177 {
178 body = argv[index];
179 }
180 else
181 {
182 cerr << program << ": Bad arguments\n";
183 usage();
184 return 1;
185 }
186 }
187 else if (arg == "-f")
188 {
189 if (++index < argc)
190 {
191 footer = argv[index];
192 }
193 else
194 {
195 cerr << program << ": Bad arguments\n";
196 usage();
197 return 1;
198 }
199 }
200 else if (arg == "-n")
201 {
202 if (++index < argc)
203 {
204 notfound = argv[index];
205 }
206 else
207 {
208 cerr << program << ": Bad arguments\n";
209 usage();
210 return 1;
211 }
212 }
213 else if (arg == "-p")
214 {
215 if (++index < argc)
216 {
217 pages = argv[index];
218 }
219 else
220 {
221 cerr << program << ": Bad arguments\n";
222 usage();
223 return 1;
224 }
225 }
226 else if (arg == "-D")
227 {
228 debug = true;
229 cerr.setf(ios_base::boolalpha);
230 }
231 else
232 {
233 indices.push_back(arg);
234 }
235 }
236
237 if (indices.size() < 1)
238 {
239 usage();
240 return 0;
241 }
242
243 if (indexMode)
244 {
245 if (indices.size() > 1)
246 {
247 cerr << program << ": Too many indices, can only build one index"
248 << " at a time\n";
249 usage();
250 return 1;
251 }
252
253 if (indexDomains.size() < 1)
254 {
255 cerr << program << ": Must specify at least one domain\n";
256 usage();
257 return 1;
258 }
259
260 Indexer indexer(indices[0], indexDomains, indexRestrictions);
261
262 indexer.index(indexURL);
263 }
264 else
265 {
266 string line;
267
268 getline(cin, line);
269
270 query = line;
271
272 Searcher searcher(query);
273 Outputter outputter(header, body, footer, notfound, pages);
274
275 searcher.search(indices);
276 outputter.output(searcher, page < 1 ? page : --page);
277 }
278
279 return 0;
280 }
281
282 string platform()
283 {
284 ostringstream platform;
285
286 platform << '(';
287
288 #ifdef _WIN32
289 OSVERSIONINFO* computer = new OSVERSIONINFO;
290
291 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
292
293 GetVersionEx(computer);
294
295 string os(computer->dwPlatformId == VER_PLATFORM_WIN32_NT ? "Windows NT" :
296 "Windows");
297 size_t major(computer->dwMajorVersion), minor(computer->dwMinorVersion);
298
299 platform << os << ' ' << major << '.' << minor;
300
301 delete computer;
302
303 if (major == 4 && minor <= 3 && os != "Windows NT")
304 {
305 platform << " [Windows 95]";
306 }
307 else if (major == 4 && minor == 10 && os != "Windows NT")
308 {
309 platform << " [Windows 98]";
310 }
311 else if (major == 5 && minor == 0 && os == "Windows NT")
312 {
313 platform << " [Windows 2000]";
314 }
315 else if (major == 4 && minor == 90 && os != "Windows NT")
316 {
317 platform << " [Windows ME]";
318 }
319 else if (major == 5 && minor == 1 && os == "Windows NT")
320 {
321 platform << " [Windows XP]";
322 }
323 else if (major == 5 && minor == 2 && os == "Windows NT")
324 {
325 platform << " [Windows Server 2003 or Windows XP x64]";
326 }
327 else if (major == 6 && minor == 0 && os == "Windows NT")
328 {
329 platform << " [Windows Vista or Windows Server 2008]";
330 }
331
332 platform << ' ';
333
334 SYSTEM_INFO* system = new SYSTEM_INFO;
335
336 GetSystemInfo(system);
337
338 switch (system->wProcessorArchitecture)
339 {
340 case PROCESSOR_ARCHITECTURE_INTEL:
341 platform << "ix86";
342 break;
343 case PROCESSOR_ARCHITECTURE_MIPS:
344 platform << "mips";
345 break;
346 case PROCESSOR_ARCHITECTURE_ALPHA:
347 platform << "alpha";
348 break;
349 case PROCESSOR_ARCHITECTURE_PPC:
350 platform << "ppc";
351 break;
352 case PROCESSOR_ARCHITECTURE_IA64:
353 platform << "ia64";
354 break;
355 case PROCESSOR_ARCHITECTURE_IA32_ON_WIN64:
356 platform << "ix86_on_win64";
357 break;
358 case PROCESSOR_ARCHITECTURE_AMD64:
359 platform << "amd64";
360 break;
361 default:
362 platform << "unknown";
363 break;
364 }
365
366 #else // _WIN32
367 struct utsname* computer = new struct utsname;
368
369 uname(computer);
370
371 platform << computer->sysname << ' ' << computer->release << ' ' <<
372 computer->machine;
373
374 delete computer;
375 #endif // _WIN32
376
377 platform << ')';
378
379 return platform.str();
380 }
381
382 void usage()
383 {
384 #ifdef _WIN32
385 string program(program);
386 OSVERSIONINFO* computer = new OSVERSIONINFO;
387
388 computer->dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
389
390 GetVersionEx(computer);
391
392 if (computer->dwPlatformId != VER_PLATFORM_WIN32_NT) program = "Search";
393
394 delete computer;
395 #endif // _WIN32
396
397 string tab(8 + program.length(), ' ');
398
399 cout << "Usage: " << program << " [index ...] [-P page] [-h header] [-b bo"
400 << "dy]\n"
401 << tab << "[-f footer] [-n notfound] [-p pages]\n"
402 << tab << "[-i begin] [-d domain ...] [-r restriction ...]\n"
403 << tab << "[-D] [-version] [-help]\n"
404 << "Options:\n"
405 << " index Index file to use (can only use one file for i"
406 << "ndexing)\n"
407 << " -P page Page of search to display (defaults to 1)\n"
408 << " -h header Header template to use (defaults to header.htm"
409 << "l)\n"
410 << " -b body Body template to use (defaults to body.html)\n"
411 << " -f footer Footer template to use (defaults to footer.htm"
412 << "l)\n"
413 << " -n notfound Not found template to use (defaults to notfoun"
414 << "d.html)\n"
415 << " -p pages Pages template to use (defaults to pages.html)"
416 << "\n"
417 << " -i begin URL to begin indexing (causes indexing rather "
418 << "than search)\n"
419 << " -d domain Domain to include in indexing\n"
420 << " -r restriction URL to restrict from indexing\n"
421 << " -D Display debug information\n"
422 << " -version Display version information and exit\n"
423 << " -license Display license information and exit\n"
424 << " -help Display this message and exit\n";
425 }
426
427 void version()
428 {
429 cout << programName << " " << programVersion << " "<< platform() << "\n\n"
430 << " Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved.\n"
431 << "\n"
432 << " This product includes software developed by Douglas Thrift\n"
433 << " (http://computers.douglasthrift.net/searchengine/).\n";
434 #ifdef _OpenSSL_
435 cout << "\n" << openssl() << " " << SSLeay_version(SSLEAY_BUILT_ON) << " "
436 << SSLeay_version(SSLEAY_PLATFORM) << "\n";
437 #endif
438 }
439
440 void license()
441 {
442 cout << "License:\n"
443 << " Douglas Thrift's Search Engine License\n\n"
444 << " Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved.\n"
445 << "\n"
446 << " Redistribution and use in source and binary forms, with or with"
447 << "out\n"
448 << " modification, are permitted provided that the following conditi"
449 << "ons are met:\n\n"
450 << " 1. Redistributions of source code must retain the above copyrig"
451 << "ht notice,\n"
452 << " this list of conditions and the following disclaimer.\n\n"
453 << " 2. Redistributions in binary form must reproduce the above copy"
454 << "right notice,\n"
455 << " this list of conditions and the following disclaimer in the "
456 << "documentation\n"
457 << " and/or other materials provided with the distribution.\n\n"
458 << " 3. The end-user documentation included with the redistribution,"
459 << " if any, must\n"
460 << " include the following acknowledgment:\n\n"
461 << " \"This product includes software developed by Douglas Thr"
462 << "ift\n"
463 << " (http://computers.douglasthrift.net/searchengine/).\"\n\n"
464 << " Alternately, this acknowledgment may appear in the software "
465 << "itself, if\n"
466 << " and wherever such third-party acknowledgments normally appea"
467 << "r.\n\n"
468 << " 4. The names \"Douglas Thrift\" and \"Douglas Thrift\'s Search "
469 << "Engine\" must not\n"
470 << " be used to endorse or promote products derived from this sof"
471 << "tware without\n"
472 << " specific prior written permission. For written permission, p"
473 << "lease visit\n"
474 << " http://www.douglasthrift.net/contact.cgi for contact inform"
475 << "ation.\n\n"
476 << " 5. Products derived from this software may not be called \"Doug"
477 << "las Thrift\'s\n"
478 << " Search Engine\", nor may \"Douglas Thrift\'s Search Engine\""
479 << " appear in their\n"
480 << " name, without prior written permission.\n\n"
481 << " THIS SOFTWARE IS PROVIDED \"AS IS\" AND ANY EXPRESS OR IMPLIED "
482 << "WARRANTIES,\n"
483 << " INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA"
484 << "NTABILITY AND\n"
485 << " FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SH"
486 << "ALL THE\n"
487 << " COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIR"
488 << "ECT,\n"
489 << " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU"
490 << "DING, BUT NOT\n"
491 << " LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS O"
492 << "F USE, DATA,\n"
493 << " OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY"
494 << " THEORY OF\n"
495 << " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCL"
496 << "UDING\n"
497 << " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF T"
498 << "HIS SOFTWARE,\n"
499 << " EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n";
500 }
501
502 void entities(string& line, char character, const string& entity)
503 {
504 size_t begin(0);
505
506 while (begin < line.length())
507 {
508 size_t spot(line.find(character, begin)), end(spot + 1);
509
510 if (spot != string::npos)
511 {
512 line.replace(spot, 1, entity);
513 }
514 else break;
515
516 begin = end;
517 }
518 }
519
520 void entities(string& line, const string& entity, char character)
521 {
522 size_t begin(0);
523
524 while (begin < line.length())
525 {
526 size_t spot(line.find(entity, begin)), end(spot + 1);
527
528 if (spot != string::npos)
529 {
530 line.replace(spot, entity.length(), 1, character);
531 }
532 else break;
533
534 begin = end;
535 }
536 }
537
538 void normalize(string& abbynormal)
539 {
540 for (size_t index(0); index < abbynormal.length(); index++)
541 {
542 if (isspace(abbynormal[index]))
543 {
544 size_t next(index + 1);
545
546 while (isspace(abbynormal[next])) next++;
547
548 abbynormal.replace(index, next - index, 1, abbynormal[index]);
549 }
550 }
551
552 if (isspace(abbynormal[0])) abbynormal.erase(0, 1);
553 if (isspace(abbynormal[abbynormal.length() - 1]))
554 abbynormal.erase(abbynormal.length() - 1, 1);
555 }

Properties

Name Value
svn:eol-style native
svn:keywords Id