ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Outputer.cpp
Revision: 1
Committed: 2002-12-04T20:22:59-08:00 (22 years, 6 months ago) by douglas
File size: 12640 byte(s)
Log Message:
Initial revision

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine Outputer
46 //
47 // Douglas Thrift
48 //
49 // Outputer.cpp
50
51 #include "Outputer.h"
52
53 Outputer::Outputer(string& headerFile, string& bodyFile, string& footerFile,
54 string& notfoundFile, string& pagesFile)
55 {
56 this->headerFile = headerFile;
57 this->bodyFile = bodyFile;
58 this->footerFile = footerFile;
59 this->notfoundFile = notfoundFile;
60 this->pagesFile = pagesFile;
61 }
62
63 void Outputer::output(Searcher& searcher, unsigned page)
64 {
65 MultiSet pagesSet = searcher.getPages();
66 numWebpages = pagesSet.size();
67 numPages = (numWebpages + 9) / 10;
68 string query = searcher.getQueryString();
69 vector<string> common = searcher.getCommonUsed();
70
71 MultiSetIterator itor = pagesSet.begin();
72
73 for (int count = 0; count < page * 10 && itor != pagesSet.end(); count++)
74 {
75 itor++;
76 }
77
78 for (int index = 0; index < 10 && itor != pagesSet.end(); index++, itor++)
79 {
80 webpages.push_back(*itor);
81 }
82
83 this->query = searcher.getQuery().size() > 0;
84 results = webpages.size() > 0;
85 time = searcher.time();
86
87 if (debug)
88 {
89 cerr << "query = " << (this->query ? "true" : "false") << "\n"
90 << "results = " << (results ? "true" : "false") << "\n"
91 << "time = " << duration() << "\n";
92 }
93
94 entities(query, '&', "&amp;");
95 entities(query, '\"', "&quot;");
96 entities(query, '<', "&lt;");
97 entities(query, '>', "&gt;");
98
99 string ignore = searcher.getIgnore();
100
101 header(query, page, common, searcher.getAnd(), searcher.getOr(),
102 ignore);
103
104 if (results)
105 {
106 body();
107 }
108 else if (this->query)
109 {
110 notfound(query, searcher.getQuery().size());
111 }
112
113 footer(query, page, common, searcher.getAnd(), searcher.getOr(),
114 ignore);
115 }
116
117 void Outputer::header(string& query, unsigned page, vector<string> common,
118 bool and_, bool or_, string& ignore)
119 {
120 ifstream fin(headerFile.c_str());
121
122 string line;
123 while (fin.good())
124 {
125 getline(fin, line);
126
127 conditional(line, fin, "<?ifquery?>", this->query);
128 conditional(line, fin, "<?ifresults?>", results);
129 conditional(line, fin, "<?ifor?>", or_);
130 conditional(line, fin, "<?ifand?>", and_);
131 conditional(line, fin, "<?ifignore?>", ignore != "");
132 conditional(line, fin, "<?ifcommon?>", common.size() == 1);
133 conditional(line, fin, "<?ifmanycommon?>", common.size() > 1);
134
135 tag(line, "<?query?>", query);
136
137 string fred = range(page);
138
139 tag(line, "<?range?>", fred);
140
141 string martha = total();
142
143 tag(line, "<?total?>", martha);
144
145 string george = duration();
146
147 tag(line, "<?time?>", george);
148
149 string derf = pages(query, page);
150
151 tag(line, "<?pages?>", derf);
152 tag(line, "<?ignore?>", ignore);
153 tag(line, "<?common?>", common[0]);
154
155 string elaine = manycommon(common);
156
157 tag(line, "<?manycommon?>", elaine);
158
159 cout << line << "\n";
160 }
161
162 fin.close();
163 }
164
165 void Outputer::body()
166 {
167 for (int index = 0; index < webpages.size(); index++)
168 {
169 Ranker webpage = webpages[index];
170 string title = webpage.getTitle();
171 if (title == "")
172 {
173 title = webpage.getURL();
174 entities(title, '&', "&amp;");
175 entities(title, '\"', "&quot;");
176 entities(title, '<', "&lt;");
177 entities(title, '>', "&gt;");
178 }
179 string address = webpage.getURL();
180 string sample = webpage.getSample();
181 string description = webpage.getDescription();
182
183 char* csize = new char[1024];
184 sprintf(csize, "%.0fk", (double(webpage.getSize()) / double(1024)));
185
186 string size = csize;
187
188 delete [] csize;
189
190 entities(address, '&', "&amp;");
191 entities(address, '\"', "&quot;");
192 entities(address, '<', "&lt;");
193 entities(address, '>', "&gt;");
194
195 ifstream fin(bodyFile.c_str());
196
197 string line;
198 while (fin.good())
199 {
200 getline(fin, line);
201
202 conditional(line, fin, "<?ifdescription?>", description != "");
203
204 tag(line, "<?address?>", address);
205 tag(line, "<?title?>", title);
206 tag(line, "<?sample?>", sample);
207 tag(line, "<?description?>", description);
208 tag(line, "<?size?>", size);
209
210 cout << line << "\n";
211 }
212
213 fin.close();
214 }
215 }
216
217 void Outputer::footer(string& query, unsigned page, vector<string> common,
218 bool and_, bool or_, string& ignore)
219 {
220 ifstream fin(footerFile.c_str());
221
222 string line;
223 while (fin.good())
224 {
225 getline(fin, line);
226
227 conditional(line, fin, "<?ifquery?>", this->query);
228 conditional(line, fin, "<?ifresults?>", results);
229 conditional(line, fin, "<?ifor?>", or_);
230 conditional(line, fin, "<?ifand?>", and_);
231 conditional(line, fin, "<?ifignore?>", ignore != "");
232 conditional(line, fin, "<?ifcommon?>", common.size() == 1);
233 conditional(line, fin, "<?ifmanycommon?>", common.size() > 1);
234
235 tag(line, "<?query?>", query);
236
237 string fred = range(page);
238
239 tag(line, "<?range?>", fred);
240
241 string martha = total();
242
243 tag(line, "<?total?>", martha);
244
245 string george = duration();
246
247 tag(line, "<?time?>", george);
248
249 string derf = pages(query, page);
250
251 tag(line, "<?pages?>", derf);
252 tag(line, "<?ignore?>", ignore);
253 tag(line, "<?common?>", common[0]);
254
255 string elaine = manycommon(common);
256
257 tag(line, "<?manycommon?>", elaine);
258
259 cout << line << "\n";
260 }
261
262 fin.close();
263 }
264
265 void Outputer::notfound(string& query, unsigned keywords)
266 {
267 ifstream fin(notfoundFile.c_str());
268
269 string line;
270 while (fin.good())
271 {
272 getline(fin, line);
273
274 conditional(line, fin, "<?ifmany?>", keywords > 1);
275
276 tag(line, "<?query?>", query);
277
278 cout << line << "\n";
279 }
280
281 fin.close();
282 }
283
284 string Outputer::pages(string query, unsigned page)
285 {
286 entities(query, "&lt;", '<');
287 entities(query, "&gt;", '>');
288 entities(query, "&quot;", '\"');
289 entities(query, "&amp;", '&');
290
291 entities(query, '%', "%25");
292 entities(query, '\t', "%09");
293 entities(query, ' ', "%20");
294 entities(query, '\"', "%22");
295 entities(query, '#', "%23");
296 entities(query, '$', "%24");
297 entities(query, '&', "%26");
298 entities(query, '\'', "%27");
299 entities(query, '+', "%2B");
300 entities(query, ',', "%2C");
301 entities(query, '/', "%2F");
302 entities(query, ':', "%3A");
303 entities(query, ';', "%3B");
304 entities(query, '<', "%3C");
305 entities(query, '=', "%3D");
306 entities(query, '>', "%3E");
307 entities(query, '?', "%3F");
308 entities(query, '@', "%40");
309 entities(query, '[', "%5B");
310 entities(query, ']', "%5D");
311 entities(query, '\\', "%5C");
312 entities(query, '^', "%5E");
313 entities(query, '`', "%60");
314 entities(query, '{', "%7B");
315 entities(query, '|', "%7C");
316 entities(query, '}', "%7D");
317 entities(query, '~', "%7E");
318
319 string lines;
320
321 ifstream fin(pagesFile.c_str());
322
323 string line;
324 while (fin.good())
325 {
326 getline(fin, line);
327 conditional(line, fin, "<?ifprevious?>", page >= 1);
328 conditional(line, fin, "<?ifpage?>", false);
329 conditional(line, fin, "<?ifnum?>", false);
330 conditional(line, fin, "<?ifnext?>", false);
331
332 char* cprevious = new char[1024];
333
334 sprintf(cprevious, "%u", page);
335
336 string previous = cprevious;
337
338 delete [] cprevious;
339
340 tag(line, "<?query?>", query);
341 tag(line, "<?previous?>", previous);
342
343 lines += line + "\n";
344 }
345
346 fin.close();
347 fin.clear();
348
349 for (int index = 0; index < numPages; index++)
350 {
351 fin.open(pagesFile.c_str());
352
353 while (fin.good())
354 {
355 getline(fin, line);
356 if (index == page)
357 {
358 conditional(line, fin, "<?ifprevious?>", false);
359 conditional(line, fin, "<?ifpage?>", true);
360 conditional(line, fin, "<?ifnum?>", false);
361 conditional(line, fin, "<?ifnext?>", false);
362
363 char* cpage = new char[1024];
364
365 sprintf(cpage, "%u", (index + 1));
366
367 string spage = cpage;
368
369 delete [] cpage;
370
371 tag(line, "<?page?>", spage);
372 }
373 else
374 {
375 conditional(line, fin, "<?ifprevious?>", false);
376 conditional(line, fin, "<?ifpage?>", false);
377 conditional(line, fin, "<?ifnum?>", true);
378 conditional(line, fin, "<?ifnext?>", false);
379
380 char* cnum = new char[1024];
381
382 sprintf(cnum, "%u", (index + 1));
383
384 string num = cnum;
385
386 delete [] cnum;
387
388 tag(line, "<?query?>", query);
389 tag(line, "<?num?>", num);
390 }
391
392 lines += line + "\n";
393 }
394
395 fin.close();
396 fin.clear();
397 }
398
399 fin.open(pagesFile.c_str());
400
401 while (fin.good())
402 {
403 getline(fin, line);
404 conditional(line, fin, "<?ifprevious?>", false);
405 conditional(line, fin, "<?ifpage?>", false);
406 conditional(line, fin, "<?ifnum?>", false);
407 conditional(line, fin, "<?ifnext?>", page + 2 <= numPages);
408
409 char* cnext = new char[1024];
410
411 sprintf(cnext, "%u", (page + 2));
412
413 string next = cnext;
414
415 delete [] cnext;
416
417 tag(line, "<?query?>", query);
418 tag(line, "<?next?>", next);
419
420 lines += line + "\n";
421 }
422
423 fin.close();
424
425 return lines;
426 }
427
428 string Outputer::range(unsigned page)
429 {
430 unsigned bottom = page * 10 + 1;
431 unsigned top = numWebpages > page * 10 + 10 ? page * 10 + 10 : numWebpages;
432
433 char* cbottom = new char[1024];
434 char* ctop = new char[1024];
435
436 sprintf(cbottom, "%u", bottom);
437 sprintf(ctop, "%u", top);
438
439 string range = string(cbottom) + " - " + ctop;
440
441 delete [] cbottom;
442 delete [] ctop;
443
444 return range;
445 }
446
447 string Outputer::total()
448 {
449 char* ctotal = new char[1024];
450
451 sprintf(ctotal, "%u", numWebpages);
452
453 string total = ctotal;
454
455 delete [] ctotal;
456
457 return total;
458 }
459
460 string Outputer::duration()
461 {
462 char* ctime = new char[1024];
463 sprintf(ctime, "%.2f", time);
464
465 string duration = ctime;
466
467 delete [] ctime;
468
469 return duration;
470 }
471
472 string Outputer::manycommon(vector<string> common)
473 {
474 string line;
475
476 for (int index = 0; index < common.size(); index++)
477 {
478 line += common[index];
479
480 if (index != common.size() - 1) line += ' ';
481 }
482
483 return line;
484 }
485
486 void Outputer::tag(string& line, char* tag, string& replacement)
487 {
488 int begin = 0;
489 while (begin < line.length())
490 {
491 int spot = line.find(tag, begin);
492
493 if (spot != string::npos)
494 {
495 line.replace(spot, strlen(tag), replacement);
496 }
497 else
498 {
499 break;
500 }
501
502 begin = spot + replacement.length();
503 }
504 }
505
506 void Outputer::conditional(string& line, ifstream& fin, char* tag, bool
507 condition)
508 {
509 unsigned begin = 0;
510 while (begin < line.length())
511 {
512 unsigned start = line.find(tag, begin);
513 unsigned finish = line.find("<?endif?>", start);
514
515 if (start == string::npos) break;
516
517 string next;
518 while (finish == string::npos)
519 {
520 getline(fin, next);
521 line += '\n' + next;
522 finish = line.find("<?endif?>", start);
523 }
524
525 if (condition)
526 {
527 line.erase(start, strlen(tag));
528 line.erase(finish - strlen(tag), 9);
529
530 begin = finish - strlen(tag) - 9;
531 }
532 else
533 {
534 line.erase(start, finish - start + 9);
535
536 begin = start;
537 }
538 }
539 }