1 |
/* ============================================================================ |
2 |
* Douglas Thrift's Search Engine License |
3 |
* |
4 |
* Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved. |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions are met: |
7 |
* |
8 |
* 1. Redistributions of source code must retain the above copyright notice, |
9 |
* this list of conditions and the following disclaimer. |
10 |
* |
11 |
* 2. Redistributions in binary form must reproduce the above copyright notice, |
12 |
* this list of conditions and the following disclaimer in the documentation |
13 |
* and/or other materials provided with the distribution. |
14 |
* |
15 |
* 3. The end-user documentation included with the redistribution, if any, must |
16 |
* include the following acknowledgment: |
17 |
* |
18 |
* "This product includes software developed by Douglas Thrift |
19 |
* (http://computers.douglasthrift.net/searchengine/)." |
20 |
* |
21 |
* Alternately, this acknowledgment may appear in the software itself, if |
22 |
* and wherever such third-party acknowledgments normally appear. |
23 |
* |
24 |
* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not |
25 |
* be used to endorse or promote products derived from this software without |
26 |
* specific prior written permission. For written permission, please visit |
27 |
* http://www.douglasthrift.net/contact.cgi for contact information. |
28 |
* |
29 |
* 5. Products derived from this software may not be called "Douglas Thrift's |
30 |
* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their |
31 |
* name, without prior written permission. |
32 |
* |
33 |
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
34 |
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
35 |
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 |
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
38 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
39 |
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
40 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
41 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
42 |
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 |
* ============================================================================ |
44 |
*/ |
45 |
// Douglas Thrift's Search Engine Outputer |
46 |
// |
47 |
// Douglas Thrift |
48 |
// |
49 |
// $Id: Outputer.cpp,v 1.9 2003/07/17 07:13:45 douglas Exp $ |
50 |
|
51 |
#include "Outputer.h" |
52 |
|
53 |
Outputer::Outputer(const string& headerFile, const string& bodyFile, const |
54 |
string& footerFile, const string& notfoundFile, const string& pagesFile) |
55 |
{ |
56 |
this->headerFile = headerFile; |
57 |
this->bodyFile = bodyFile; |
58 |
this->footerFile = footerFile; |
59 |
this->notfoundFile = notfoundFile; |
60 |
this->pagesFile = pagesFile; |
61 |
} |
62 |
|
63 |
void Outputer::output(Searcher& searcher, unsigned page) |
64 |
{ |
65 |
MultiSet pagesSet = searcher.getPages(); |
66 |
numWebpages = pagesSet.size(); |
67 |
numPages = (numWebpages + 9) / 10; |
68 |
string query = searcher.getQueryString(); |
69 |
vector<string> common = searcher.getCommonUsed(); |
70 |
|
71 |
MultiSetIterator itor = pagesSet.begin(); |
72 |
|
73 |
for (int count = 0; count < page * 10 && itor != pagesSet.end(); count++) |
74 |
{ |
75 |
itor++; |
76 |
} |
77 |
|
78 |
for (int index = 0; index < 10 && itor != pagesSet.end(); index++, itor++) |
79 |
{ |
80 |
webpages.push_back(*itor); |
81 |
} |
82 |
|
83 |
this->query = searcher.getQuery().size() > 0; |
84 |
results = webpages.size() > 0; |
85 |
time = searcher.time(); |
86 |
|
87 |
if (debug) |
88 |
{ |
89 |
cerr << "query = " << this->query << "\n" |
90 |
<< "results = " << results << "\n" |
91 |
<< "time = " << duration() << "\n"; |
92 |
} |
93 |
|
94 |
entities(query, '&', "&"); |
95 |
entities(query, '\"', """); |
96 |
entities(query, '<', "<"); |
97 |
entities(query, '>', ">"); |
98 |
|
99 |
string ignore = searcher.getIgnore(); |
100 |
|
101 |
header(query, page, common, searcher.getAnd(), searcher.getOr(), |
102 |
ignore); |
103 |
|
104 |
if (results) |
105 |
{ |
106 |
body(); |
107 |
} |
108 |
else if (this->query) |
109 |
{ |
110 |
notfound(query, searcher.getQuery().size()); |
111 |
} |
112 |
|
113 |
footer(query, page, common, searcher.getAnd(), searcher.getOr(), |
114 |
ignore); |
115 |
} |
116 |
|
117 |
void Outputer::header(const string& query, unsigned page, vector<string> |
118 |
common, bool and_, bool or_, const string& ignore) |
119 |
{ |
120 |
ifstream fin(headerFile.c_str()); |
121 |
|
122 |
string line; |
123 |
while (fin.good()) |
124 |
{ |
125 |
getline(fin, line); |
126 |
|
127 |
conditional(line, fin, "<?ifquery?>", this->query); |
128 |
conditional(line, fin, "<?ifresults?>", results); |
129 |
conditional(line, fin, "<?ifor?>", or_); |
130 |
conditional(line, fin, "<?ifand?>", and_); |
131 |
conditional(line, fin, "<?ifignore?>", ignore != ""); |
132 |
conditional(line, fin, "<?ifcommon?>", common.size() == 1); |
133 |
conditional(line, fin, "<?ifmanycommon?>", common.size() > 1); |
134 |
|
135 |
#ifndef _OpenSSL_ |
136 |
tag(line, "<?version?>", programName + ' ' + programVersion + ' ' + |
137 |
platform()); |
138 |
#else |
139 |
tag(line, "<?version?>", programName + ' ' + programVersion + ' ' + |
140 |
platform() + ' ' + openssl(true)); |
141 |
#endif |
142 |
tag(line, "<?query?>", query); |
143 |
tag(line, "<?range?>", range(page)); |
144 |
tag(line, "<?total?>", total()); |
145 |
tag(line, "<?time?>", duration()); |
146 |
tag(line, "<?pages?>", pages(query, page)); |
147 |
tag(line, "<?ignore?>", ignore); |
148 |
tag(line, "<?common?>", common[0]); |
149 |
tag(line, "<?manycommon?>", manycommon(common)); |
150 |
|
151 |
cout << line << (fin.good() ? "\n" : ""); |
152 |
} |
153 |
|
154 |
fin.close(); |
155 |
} |
156 |
|
157 |
void Outputer::body() |
158 |
{ |
159 |
for (int index = 0; index < webpages.size(); index++) |
160 |
{ |
161 |
Ranker webpage = webpages[index]; |
162 |
string title = webpage.getTitle(); |
163 |
if (title == "") |
164 |
{ |
165 |
title = webpage.getURL(); |
166 |
entities(title, '&', "&"); |
167 |
entities(title, '\"', """); |
168 |
entities(title, '<', "<"); |
169 |
entities(title, '>', ">"); |
170 |
} |
171 |
string address = webpage.getURL(); |
172 |
string sample = webpage.getSample(); |
173 |
string description = webpage.getDescription(); |
174 |
|
175 |
char* csize = new char[1024]; |
176 |
sprintf(csize, "%.0fk", (double(webpage.getSize()) / double(1024))); |
177 |
|
178 |
string size = csize; |
179 |
|
180 |
delete [] csize; |
181 |
|
182 |
entities(address, '&', "&"); |
183 |
entities(address, '\"', """); |
184 |
entities(address, '<', "<"); |
185 |
entities(address, '>', ">"); |
186 |
|
187 |
ifstream fin(bodyFile.c_str()); |
188 |
|
189 |
string line; |
190 |
while (fin.good()) |
191 |
{ |
192 |
getline(fin, line); |
193 |
|
194 |
conditional(line, fin, "<?ifdescription?>", description != ""); |
195 |
|
196 |
tag(line, "<?address?>", address); |
197 |
tag(line, "<?title?>", title); |
198 |
tag(line, "<?sample?>", sample); |
199 |
tag(line, "<?description?>", description); |
200 |
tag(line, "<?size?>", size); |
201 |
|
202 |
cout << line << (fin.good() ? "\n" : ""); |
203 |
} |
204 |
|
205 |
fin.close(); |
206 |
} |
207 |
} |
208 |
|
209 |
void Outputer::footer(const string& query, unsigned page, vector<string> |
210 |
common, bool and_, bool or_, const string& ignore) |
211 |
{ |
212 |
ifstream fin(footerFile.c_str()); |
213 |
|
214 |
string line; |
215 |
while (fin.good()) |
216 |
{ |
217 |
getline(fin, line); |
218 |
|
219 |
conditional(line, fin, "<?ifquery?>", this->query); |
220 |
conditional(line, fin, "<?ifresults?>", results); |
221 |
conditional(line, fin, "<?ifor?>", or_); |
222 |
conditional(line, fin, "<?ifand?>", and_); |
223 |
conditional(line, fin, "<?ifignore?>", ignore != ""); |
224 |
conditional(line, fin, "<?ifcommon?>", common.size() == 1); |
225 |
conditional(line, fin, "<?ifmanycommon?>", common.size() > 1); |
226 |
|
227 |
#ifndef _OpenSSL_ |
228 |
tag(line, "<?version?>", programName + ' ' + programVersion + ' ' + |
229 |
platform()); |
230 |
#else |
231 |
tag(line, "<?version?>", programName + ' ' + programVersion + ' ' + |
232 |
platform() + ' ' + openssl(true)); |
233 |
#endif |
234 |
tag(line, "<?query?>", query); |
235 |
tag(line, "<?range?>", range(page)); |
236 |
tag(line, "<?total?>", total()); |
237 |
tag(line, "<?time?>", duration()); |
238 |
tag(line, "<?pages?>", pages(query, page)); |
239 |
tag(line, "<?ignore?>", ignore); |
240 |
tag(line, "<?common?>", common[0]); |
241 |
tag(line, "<?manycommon?>", manycommon(common)); |
242 |
|
243 |
cout << line << (fin.good() ? "\n" : ""); |
244 |
} |
245 |
|
246 |
fin.close(); |
247 |
} |
248 |
|
249 |
void Outputer::notfound(const string& query, unsigned keywords) |
250 |
{ |
251 |
ifstream fin(notfoundFile.c_str()); |
252 |
|
253 |
string line; |
254 |
while (fin.good()) |
255 |
{ |
256 |
getline(fin, line); |
257 |
|
258 |
conditional(line, fin, "<?ifmany?>", keywords > 1); |
259 |
|
260 |
tag(line, "<?query?>", query); |
261 |
|
262 |
cout << line << (fin.good() ? "\n" : ""); |
263 |
} |
264 |
|
265 |
fin.close(); |
266 |
} |
267 |
|
268 |
string Outputer::pages(string query, unsigned page) |
269 |
{ |
270 |
entities(query, "<", '<'); |
271 |
entities(query, ">", '>'); |
272 |
entities(query, """, '\"'); |
273 |
entities(query, "&", '&'); |
274 |
|
275 |
entities(query, '%', "%25"); |
276 |
entities(query, '\t', "%09"); |
277 |
entities(query, ' ', "%20"); |
278 |
entities(query, '\"', "%22"); |
279 |
entities(query, '#', "%23"); |
280 |
entities(query, '$', "%24"); |
281 |
entities(query, '&', "%26"); |
282 |
entities(query, '\'', "%27"); |
283 |
entities(query, '+', "%2B"); |
284 |
entities(query, ',', "%2C"); |
285 |
entities(query, '/', "%2F"); |
286 |
entities(query, ':', "%3A"); |
287 |
entities(query, ';', "%3B"); |
288 |
entities(query, '<', "%3C"); |
289 |
entities(query, '=', "%3D"); |
290 |
entities(query, '>', "%3E"); |
291 |
entities(query, '?', "%3F"); |
292 |
entities(query, '@', "%40"); |
293 |
entities(query, '[', "%5B"); |
294 |
entities(query, ']', "%5D"); |
295 |
entities(query, '\\', "%5C"); |
296 |
entities(query, '^', "%5E"); |
297 |
entities(query, '`', "%60"); |
298 |
entities(query, '{', "%7B"); |
299 |
entities(query, '|', "%7C"); |
300 |
entities(query, '}', "%7D"); |
301 |
entities(query, '~', "%7E"); |
302 |
|
303 |
string lines; |
304 |
|
305 |
ifstream fin(pagesFile.c_str()); |
306 |
|
307 |
string line; |
308 |
while (fin.good()) |
309 |
{ |
310 |
getline(fin, line); |
311 |
conditional(line, fin, "<?ifprevious?>", page >= 1); |
312 |
conditional(line, fin, "<?ifpage?>", false); |
313 |
conditional(line, fin, "<?ifnum?>", false); |
314 |
conditional(line, fin, "<?ifnext?>", false); |
315 |
|
316 |
char* cprevious = new char[1024]; |
317 |
|
318 |
sprintf(cprevious, "%u", page); |
319 |
|
320 |
string previous = cprevious; |
321 |
|
322 |
delete [] cprevious; |
323 |
|
324 |
tag(line, "<?query?>", query); |
325 |
tag(line, "<?previous?>", previous); |
326 |
|
327 |
lines += line + (fin.good() ? "\n" : ""); |
328 |
} |
329 |
|
330 |
fin.close(); |
331 |
fin.clear(); |
332 |
|
333 |
for (int index = 0; index < numPages; index++) |
334 |
{ |
335 |
fin.open(pagesFile.c_str()); |
336 |
|
337 |
while (fin.good()) |
338 |
{ |
339 |
getline(fin, line); |
340 |
if (index == page) |
341 |
{ |
342 |
conditional(line, fin, "<?ifprevious?>", false); |
343 |
conditional(line, fin, "<?ifpage?>", true); |
344 |
conditional(line, fin, "<?ifnum?>", false); |
345 |
conditional(line, fin, "<?ifnext?>", false); |
346 |
|
347 |
char* cpage = new char[1024]; |
348 |
|
349 |
sprintf(cpage, "%u", (index + 1)); |
350 |
|
351 |
string spage = cpage; |
352 |
|
353 |
delete [] cpage; |
354 |
|
355 |
tag(line, "<?page?>", spage); |
356 |
} |
357 |
else |
358 |
{ |
359 |
conditional(line, fin, "<?ifprevious?>", false); |
360 |
conditional(line, fin, "<?ifpage?>", false); |
361 |
conditional(line, fin, "<?ifnum?>", true); |
362 |
conditional(line, fin, "<?ifnext?>", false); |
363 |
|
364 |
char* cnum = new char[1024]; |
365 |
|
366 |
sprintf(cnum, "%u", (index + 1)); |
367 |
|
368 |
string num = cnum; |
369 |
|
370 |
delete [] cnum; |
371 |
|
372 |
tag(line, "<?query?>", query); |
373 |
tag(line, "<?num?>", num); |
374 |
} |
375 |
|
376 |
lines += line + (fin.good() ? "\n" : ""); |
377 |
} |
378 |
|
379 |
fin.close(); |
380 |
fin.clear(); |
381 |
} |
382 |
|
383 |
fin.open(pagesFile.c_str()); |
384 |
|
385 |
while (fin.good()) |
386 |
{ |
387 |
getline(fin, line); |
388 |
conditional(line, fin, "<?ifprevious?>", false); |
389 |
conditional(line, fin, "<?ifpage?>", false); |
390 |
conditional(line, fin, "<?ifnum?>", false); |
391 |
conditional(line, fin, "<?ifnext?>", page + 2 <= numPages); |
392 |
|
393 |
char* cnext = new char[1024]; |
394 |
|
395 |
sprintf(cnext, "%u", (page + 2)); |
396 |
|
397 |
string next = cnext; |
398 |
|
399 |
delete [] cnext; |
400 |
|
401 |
tag(line, "<?query?>", query); |
402 |
tag(line, "<?next?>", next); |
403 |
|
404 |
lines += line + (fin.good() ? "\n" : ""); |
405 |
} |
406 |
|
407 |
fin.close(); |
408 |
|
409 |
return lines; |
410 |
} |
411 |
|
412 |
string Outputer::range(unsigned page) |
413 |
{ |
414 |
unsigned bottom = page * 10 + 1; |
415 |
unsigned top = numWebpages > page * 10 + 10 ? page * 10 + 10 : numWebpages; |
416 |
|
417 |
char* cbottom = new char[1024]; |
418 |
char* ctop = new char[1024]; |
419 |
|
420 |
sprintf(cbottom, "%u", bottom); |
421 |
sprintf(ctop, "%u", top); |
422 |
|
423 |
string range = string(cbottom) + " - " + ctop; |
424 |
|
425 |
delete [] cbottom; |
426 |
delete [] ctop; |
427 |
|
428 |
return range; |
429 |
} |
430 |
|
431 |
string Outputer::total() |
432 |
{ |
433 |
char* ctotal = new char[1024]; |
434 |
|
435 |
sprintf(ctotal, "%u", numWebpages); |
436 |
|
437 |
string total = ctotal; |
438 |
|
439 |
delete [] ctotal; |
440 |
|
441 |
return total; |
442 |
} |
443 |
|
444 |
string Outputer::duration() |
445 |
{ |
446 |
char* ctime = new char[1024]; |
447 |
sprintf(ctime, "%.2f", time); |
448 |
|
449 |
string duration = ctime; |
450 |
|
451 |
delete [] ctime; |
452 |
|
453 |
return duration; |
454 |
} |
455 |
|
456 |
string Outputer::manycommon(vector<string> common) |
457 |
{ |
458 |
string line; |
459 |
|
460 |
for (int index = 0; index < common.size(); index++) |
461 |
{ |
462 |
line += common[index]; |
463 |
|
464 |
if (index != common.size() - 1) line += ' '; |
465 |
} |
466 |
|
467 |
return line; |
468 |
} |
469 |
|
470 |
void Outputer::tag(string& line, char* tag, const string& replacement) |
471 |
{ |
472 |
int begin = 0; |
473 |
while (begin < line.length()) |
474 |
{ |
475 |
int spot = line.find(tag, begin); |
476 |
|
477 |
if (spot != string::npos) |
478 |
{ |
479 |
line.replace(spot, strlen(tag), replacement); |
480 |
} |
481 |
else |
482 |
{ |
483 |
break; |
484 |
} |
485 |
|
486 |
begin = spot + replacement.length(); |
487 |
} |
488 |
} |
489 |
|
490 |
void Outputer::conditional(string& line, ifstream& fin, char* tag, bool |
491 |
condition) |
492 |
{ |
493 |
unsigned begin = 0; |
494 |
while (begin < line.length()) |
495 |
{ |
496 |
unsigned start = line.find(tag, begin); |
497 |
unsigned finish = line.find("<?endif?>", start); |
498 |
|
499 |
if (start == string::npos) break; |
500 |
|
501 |
string next; |
502 |
while (finish == string::npos) |
503 |
{ |
504 |
getline(fin, next); |
505 |
line += '\n' + next; |
506 |
finish = line.find("<?endif?>", start); |
507 |
} |
508 |
|
509 |
if (condition) |
510 |
{ |
511 |
line.erase(start, strlen(tag)); |
512 |
line.erase(finish - strlen(tag), 9); |
513 |
|
514 |
begin = finish - strlen(tag) - 9; |
515 |
} |
516 |
else |
517 |
{ |
518 |
line.erase(start, finish - start + 9); |
519 |
|
520 |
begin = start; |
521 |
} |
522 |
} |
523 |
} |