ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Outputer.cpp
Revision: 1
Committed: 2002-12-04T20:22:59-08:00 (22 years, 6 months ago) by douglas
File size: 12640 byte(s)
Log Message:
Initial revision

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Outputer
46     //
47     // Douglas Thrift
48     //
49     // Outputer.cpp
50    
51     #include "Outputer.h"
52    
53     Outputer::Outputer(string& headerFile, string& bodyFile, string& footerFile,
54     string& notfoundFile, string& pagesFile)
55     {
56     this->headerFile = headerFile;
57     this->bodyFile = bodyFile;
58     this->footerFile = footerFile;
59     this->notfoundFile = notfoundFile;
60     this->pagesFile = pagesFile;
61     }
62    
63     void Outputer::output(Searcher& searcher, unsigned page)
64     {
65     MultiSet pagesSet = searcher.getPages();
66     numWebpages = pagesSet.size();
67     numPages = (numWebpages + 9) / 10;
68     string query = searcher.getQueryString();
69     vector<string> common = searcher.getCommonUsed();
70    
71     MultiSetIterator itor = pagesSet.begin();
72    
73     for (int count = 0; count < page * 10 && itor != pagesSet.end(); count++)
74     {
75     itor++;
76     }
77    
78     for (int index = 0; index < 10 && itor != pagesSet.end(); index++, itor++)
79     {
80     webpages.push_back(*itor);
81     }
82    
83     this->query = searcher.getQuery().size() > 0;
84     results = webpages.size() > 0;
85     time = searcher.time();
86    
87     if (debug)
88     {
89     cerr << "query = " << (this->query ? "true" : "false") << "\n"
90     << "results = " << (results ? "true" : "false") << "\n"
91     << "time = " << duration() << "\n";
92     }
93    
94     entities(query, '&', "&amp;");
95     entities(query, '\"', "&quot;");
96     entities(query, '<', "&lt;");
97     entities(query, '>', "&gt;");
98    
99     string ignore = searcher.getIgnore();
100    
101     header(query, page, common, searcher.getAnd(), searcher.getOr(),
102     ignore);
103    
104     if (results)
105     {
106     body();
107     }
108     else if (this->query)
109     {
110     notfound(query, searcher.getQuery().size());
111     }
112    
113     footer(query, page, common, searcher.getAnd(), searcher.getOr(),
114     ignore);
115     }
116    
117     void Outputer::header(string& query, unsigned page, vector<string> common,
118     bool and_, bool or_, string& ignore)
119     {
120     ifstream fin(headerFile.c_str());
121    
122     string line;
123     while (fin.good())
124     {
125     getline(fin, line);
126    
127     conditional(line, fin, "<?ifquery?>", this->query);
128     conditional(line, fin, "<?ifresults?>", results);
129     conditional(line, fin, "<?ifor?>", or_);
130     conditional(line, fin, "<?ifand?>", and_);
131     conditional(line, fin, "<?ifignore?>", ignore != "");
132     conditional(line, fin, "<?ifcommon?>", common.size() == 1);
133     conditional(line, fin, "<?ifmanycommon?>", common.size() > 1);
134    
135     tag(line, "<?query?>", query);
136    
137     string fred = range(page);
138    
139     tag(line, "<?range?>", fred);
140    
141     string martha = total();
142    
143     tag(line, "<?total?>", martha);
144    
145     string george = duration();
146    
147     tag(line, "<?time?>", george);
148    
149     string derf = pages(query, page);
150    
151     tag(line, "<?pages?>", derf);
152     tag(line, "<?ignore?>", ignore);
153     tag(line, "<?common?>", common[0]);
154    
155     string elaine = manycommon(common);
156    
157     tag(line, "<?manycommon?>", elaine);
158    
159     cout << line << "\n";
160     }
161    
162     fin.close();
163     }
164    
165     void Outputer::body()
166     {
167     for (int index = 0; index < webpages.size(); index++)
168     {
169     Ranker webpage = webpages[index];
170     string title = webpage.getTitle();
171     if (title == "")
172     {
173     title = webpage.getURL();
174     entities(title, '&', "&amp;");
175     entities(title, '\"', "&quot;");
176     entities(title, '<', "&lt;");
177     entities(title, '>', "&gt;");
178     }
179     string address = webpage.getURL();
180     string sample = webpage.getSample();
181     string description = webpage.getDescription();
182    
183     char* csize = new char[1024];
184     sprintf(csize, "%.0fk", (double(webpage.getSize()) / double(1024)));
185    
186     string size = csize;
187    
188     delete [] csize;
189    
190     entities(address, '&', "&amp;");
191     entities(address, '\"', "&quot;");
192     entities(address, '<', "&lt;");
193     entities(address, '>', "&gt;");
194    
195     ifstream fin(bodyFile.c_str());
196    
197     string line;
198     while (fin.good())
199     {
200     getline(fin, line);
201    
202     conditional(line, fin, "<?ifdescription?>", description != "");
203    
204     tag(line, "<?address?>", address);
205     tag(line, "<?title?>", title);
206     tag(line, "<?sample?>", sample);
207     tag(line, "<?description?>", description);
208     tag(line, "<?size?>", size);
209    
210     cout << line << "\n";
211     }
212    
213     fin.close();
214     }
215     }
216    
217     void Outputer::footer(string& query, unsigned page, vector<string> common,
218     bool and_, bool or_, string& ignore)
219     {
220     ifstream fin(footerFile.c_str());
221    
222     string line;
223     while (fin.good())
224     {
225     getline(fin, line);
226    
227     conditional(line, fin, "<?ifquery?>", this->query);
228     conditional(line, fin, "<?ifresults?>", results);
229     conditional(line, fin, "<?ifor?>", or_);
230     conditional(line, fin, "<?ifand?>", and_);
231     conditional(line, fin, "<?ifignore?>", ignore != "");
232     conditional(line, fin, "<?ifcommon?>", common.size() == 1);
233     conditional(line, fin, "<?ifmanycommon?>", common.size() > 1);
234    
235     tag(line, "<?query?>", query);
236    
237     string fred = range(page);
238    
239     tag(line, "<?range?>", fred);
240    
241     string martha = total();
242    
243     tag(line, "<?total?>", martha);
244    
245     string george = duration();
246    
247     tag(line, "<?time?>", george);
248    
249     string derf = pages(query, page);
250    
251     tag(line, "<?pages?>", derf);
252     tag(line, "<?ignore?>", ignore);
253     tag(line, "<?common?>", common[0]);
254    
255     string elaine = manycommon(common);
256    
257     tag(line, "<?manycommon?>", elaine);
258    
259     cout << line << "\n";
260     }
261    
262     fin.close();
263     }
264    
265     void Outputer::notfound(string& query, unsigned keywords)
266     {
267     ifstream fin(notfoundFile.c_str());
268    
269     string line;
270     while (fin.good())
271     {
272     getline(fin, line);
273    
274     conditional(line, fin, "<?ifmany?>", keywords > 1);
275    
276     tag(line, "<?query?>", query);
277    
278     cout << line << "\n";
279     }
280    
281     fin.close();
282     }
283    
284     string Outputer::pages(string query, unsigned page)
285     {
286     entities(query, "&lt;", '<');
287     entities(query, "&gt;", '>');
288     entities(query, "&quot;", '\"');
289     entities(query, "&amp;", '&');
290    
291     entities(query, '%', "%25");
292     entities(query, '\t', "%09");
293     entities(query, ' ', "%20");
294     entities(query, '\"', "%22");
295     entities(query, '#', "%23");
296     entities(query, '$', "%24");
297     entities(query, '&', "%26");
298     entities(query, '\'', "%27");
299     entities(query, '+', "%2B");
300     entities(query, ',', "%2C");
301     entities(query, '/', "%2F");
302     entities(query, ':', "%3A");
303     entities(query, ';', "%3B");
304     entities(query, '<', "%3C");
305     entities(query, '=', "%3D");
306     entities(query, '>', "%3E");
307     entities(query, '?', "%3F");
308     entities(query, '@', "%40");
309     entities(query, '[', "%5B");
310     entities(query, ']', "%5D");
311     entities(query, '\\', "%5C");
312     entities(query, '^', "%5E");
313     entities(query, '`', "%60");
314     entities(query, '{', "%7B");
315     entities(query, '|', "%7C");
316     entities(query, '}', "%7D");
317     entities(query, '~', "%7E");
318    
319     string lines;
320    
321     ifstream fin(pagesFile.c_str());
322    
323     string line;
324     while (fin.good())
325     {
326     getline(fin, line);
327     conditional(line, fin, "<?ifprevious?>", page >= 1);
328     conditional(line, fin, "<?ifpage?>", false);
329     conditional(line, fin, "<?ifnum?>", false);
330     conditional(line, fin, "<?ifnext?>", false);
331    
332     char* cprevious = new char[1024];
333    
334     sprintf(cprevious, "%u", page);
335    
336     string previous = cprevious;
337    
338     delete [] cprevious;
339    
340     tag(line, "<?query?>", query);
341     tag(line, "<?previous?>", previous);
342    
343     lines += line + "\n";
344     }
345    
346     fin.close();
347     fin.clear();
348    
349     for (int index = 0; index < numPages; index++)
350     {
351     fin.open(pagesFile.c_str());
352    
353     while (fin.good())
354     {
355     getline(fin, line);
356     if (index == page)
357     {
358     conditional(line, fin, "<?ifprevious?>", false);
359     conditional(line, fin, "<?ifpage?>", true);
360     conditional(line, fin, "<?ifnum?>", false);
361     conditional(line, fin, "<?ifnext?>", false);
362    
363     char* cpage = new char[1024];
364    
365     sprintf(cpage, "%u", (index + 1));
366    
367     string spage = cpage;
368    
369     delete [] cpage;
370    
371     tag(line, "<?page?>", spage);
372     }
373     else
374     {
375     conditional(line, fin, "<?ifprevious?>", false);
376     conditional(line, fin, "<?ifpage?>", false);
377     conditional(line, fin, "<?ifnum?>", true);
378     conditional(line, fin, "<?ifnext?>", false);
379    
380     char* cnum = new char[1024];
381    
382     sprintf(cnum, "%u", (index + 1));
383    
384     string num = cnum;
385    
386     delete [] cnum;
387    
388     tag(line, "<?query?>", query);
389     tag(line, "<?num?>", num);
390     }
391    
392     lines += line + "\n";
393     }
394    
395     fin.close();
396     fin.clear();
397     }
398    
399     fin.open(pagesFile.c_str());
400    
401     while (fin.good())
402     {
403     getline(fin, line);
404     conditional(line, fin, "<?ifprevious?>", false);
405     conditional(line, fin, "<?ifpage?>", false);
406     conditional(line, fin, "<?ifnum?>", false);
407     conditional(line, fin, "<?ifnext?>", page + 2 <= numPages);
408    
409     char* cnext = new char[1024];
410    
411     sprintf(cnext, "%u", (page + 2));
412    
413     string next = cnext;
414    
415     delete [] cnext;
416    
417     tag(line, "<?query?>", query);
418     tag(line, "<?next?>", next);
419    
420     lines += line + "\n";
421     }
422    
423     fin.close();
424    
425     return lines;
426     }
427    
428     string Outputer::range(unsigned page)
429     {
430     unsigned bottom = page * 10 + 1;
431     unsigned top = numWebpages > page * 10 + 10 ? page * 10 + 10 : numWebpages;
432    
433     char* cbottom = new char[1024];
434     char* ctop = new char[1024];
435    
436     sprintf(cbottom, "%u", bottom);
437     sprintf(ctop, "%u", top);
438    
439     string range = string(cbottom) + " - " + ctop;
440    
441     delete [] cbottom;
442     delete [] ctop;
443    
444     return range;
445     }
446    
447     string Outputer::total()
448     {
449     char* ctotal = new char[1024];
450    
451     sprintf(ctotal, "%u", numWebpages);
452    
453     string total = ctotal;
454    
455     delete [] ctotal;
456    
457     return total;
458     }
459    
460     string Outputer::duration()
461     {
462     char* ctime = new char[1024];
463     sprintf(ctime, "%.2f", time);
464    
465     string duration = ctime;
466    
467     delete [] ctime;
468    
469     return duration;
470     }
471    
472     string Outputer::manycommon(vector<string> common)
473     {
474     string line;
475    
476     for (int index = 0; index < common.size(); index++)
477     {
478     line += common[index];
479    
480     if (index != common.size() - 1) line += ' ';
481     }
482    
483     return line;
484     }
485    
486     void Outputer::tag(string& line, char* tag, string& replacement)
487     {
488     int begin = 0;
489     while (begin < line.length())
490     {
491     int spot = line.find(tag, begin);
492    
493     if (spot != string::npos)
494     {
495     line.replace(spot, strlen(tag), replacement);
496     }
497     else
498     {
499     break;
500     }
501    
502     begin = spot + replacement.length();
503     }
504     }
505    
506     void Outputer::conditional(string& line, ifstream& fin, char* tag, bool
507     condition)
508     {
509     unsigned begin = 0;
510     while (begin < line.length())
511     {
512     unsigned start = line.find(tag, begin);
513     unsigned finish = line.find("<?endif?>", start);
514    
515     if (start == string::npos) break;
516    
517     string next;
518     while (finish == string::npos)
519     {
520     getline(fin, next);
521     line += '\n' + next;
522     finish = line.find("<?endif?>", start);
523     }
524    
525     if (condition)
526     {
527     line.erase(start, strlen(tag));
528     line.erase(finish - strlen(tag), 9);
529    
530     begin = finish - strlen(tag) - 9;
531     }
532     else
533     {
534     line.erase(start, finish - start + 9);
535    
536     begin = start;
537     }
538     }
539     }