/* ============================================================================ * Douglas Thrift's Search Engine License * * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The end-user documentation included with the redistribution, if any, must * include the following acknowledgment: * * "This product includes software developed by Douglas Thrift * (http://computers.douglasthrift.net/searchengine/)." * * Alternately, this acknowledgment may appear in the software itself, if * and wherever such third-party acknowledgments normally appear. * * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not * be used to endorse or promote products derived from this software without * specific prior written permission. For written permission, please visit * http://www.douglasthrift.net/contact.cgi for contact information. * * 5. Products derived from this software may not be called "Douglas Thrift's * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their * name, without prior written permission. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ============================================================================ */ // Douglas Thrift's Search Engine Outputer // // Douglas Thrift // // $Id: Outputer.cpp,v 1.11 2003/07/20 01:46:12 douglas Exp $ #include "Outputer.h" Outputer::Outputer(const string& headerFile, const string& bodyFile, const string& footerFile, const string& notfoundFile, const string& pagesFile) { this->headerFile = headerFile; this->bodyFile = bodyFile; this->footerFile = footerFile; this->notfoundFile = notfoundFile; this->pagesFile = pagesFile; } void Outputer::output(Searcher& searcher, unsigned page) { MultiSet pagesSet = searcher.getPages(); numWebpages = pagesSet.size(); numPages = (numWebpages + 9) / 10; string query = searcher.getQueryString(); vector common = searcher.getCommonUsed(); MultiSetIterator itor = pagesSet.begin(); for (int count = 0; count < page * 10 && itor != pagesSet.end(); count++) { itor++; } for (int index = 0; index < 10 && itor != pagesSet.end(); index++, itor++) { webpages.push_back(*itor); } this->query = searcher.getQuery().size() > 0; results = webpages.size() > 0; time = searcher.time(); if (debug) { cerr << "query = " << this->query << "\n" << "results = " << results << "\n" << "time = " << duration() << "\n"; } entities(query, '&', "&"); entities(query, '\"', """); entities(query, '<', "<"); entities(query, '>', ">"); string ignore = searcher.getIgnore(); header(query, page, common, searcher.getAnd(), searcher.getOr(), ignore); if (results) { body(); } else if (this->query) { notfound(query, searcher.getQuery().size()); } footer(query, page, common, searcher.getAnd(), searcher.getOr(), ignore); } void Outputer::header(const string& query, unsigned page, vector common, bool and_, bool or_, const string& ignore) { ifstream fin(headerFile.c_str()); string line; while (fin.good()) { getline(fin, line); conditional(line, fin, "", this->query); conditional(line, fin, "", results); conditional(line, fin, "", or_); conditional(line, fin, "", and_); conditional(line, fin, "", ignore != ""); conditional(line, fin, "", common.size() == 1); conditional(line, fin, "", common.size() > 1); #ifndef _OpenSSL_ tag(line, "", programName + ' ' + programVersion + ' ' + platform()); #else tag(line, "", programName + ' ' + programVersion + ' ' + platform() + ' ' + openssl()); #endif tag(line, "", query); tag(line, "", range(page)); tag(line, "", total()); tag(line, "", duration()); tag(line, "", pages(query, page)); tag(line, "", ignore); tag(line, "", common[0]); tag(line, "", manycommon(common)); cout << line << (fin.good() ? "\n" : ""); } fin.close(); } void Outputer::body() { for (int index = 0; index < webpages.size(); index++) { Ranker webpage = webpages[index]; string title = webpage.getTitle(); if (title == "") { title = webpage.getURL(); entities(title, '&', "&"); entities(title, '\"', """); entities(title, '<', "<"); entities(title, '>', ">"); } string address = webpage.getURL(); string sample = webpage.getSample(); string description = webpage.getDescription(); ostringstream size; size.precision(0); size.setf(ios_base::fixed, ios_base::floatfield); size << double(webpage.getSize()) / double(1024) << "k"; entities(address, '&', "&"); entities(address, '\"', """); entities(address, '<', "<"); entities(address, '>', ">"); ifstream fin(bodyFile.c_str()); string line; while (fin.good()) { getline(fin, line); conditional(line, fin, "", description != ""); tag(line, "", address); tag(line, "", title); tag(line, "", sample); tag(line, "", description); tag(line, "", size.str()); cout << line << (fin.good() ? "\n" : ""); } fin.close(); } } void Outputer::footer(const string& query, unsigned page, vector common, bool and_, bool or_, const string& ignore) { ifstream fin(footerFile.c_str()); string line; while (fin.good()) { getline(fin, line); conditional(line, fin, "", this->query); conditional(line, fin, "", results); conditional(line, fin, "", or_); conditional(line, fin, "", and_); conditional(line, fin, "", ignore != ""); conditional(line, fin, "", common.size() == 1); conditional(line, fin, "", common.size() > 1); #ifndef _OpenSSL_ tag(line, "", programName + ' ' + programVersion + ' ' + platform()); #else tag(line, "", programName + ' ' + programVersion + ' ' + platform() + ' ' + openssl()); #endif tag(line, "", query); tag(line, "", range(page)); tag(line, "", total()); tag(line, "", duration()); tag(line, "", pages(query, page)); tag(line, "", ignore); tag(line, "", common[0]); tag(line, "", manycommon(common)); cout << line << (fin.good() ? "\n" : ""); } fin.close(); } void Outputer::notfound(const string& query, unsigned keywords) { ifstream fin(notfoundFile.c_str()); string line; while (fin.good()) { getline(fin, line); conditional(line, fin, "", keywords > 1); tag(line, "", query); cout << line << (fin.good() ? "\n" : ""); } fin.close(); } string Outputer::pages(string query, unsigned page) { entities(query, "<", '<'); entities(query, ">", '>'); entities(query, """, '\"'); entities(query, "&", '&'); entities(query, '%', "%25"); entities(query, '\t', "%09"); entities(query, ' ', "%20"); entities(query, '\"', "%22"); entities(query, '#', "%23"); entities(query, '$', "%24"); entities(query, '&', "%26"); entities(query, '\'', "%27"); entities(query, '+', "%2B"); entities(query, ',', "%2C"); entities(query, '/', "%2F"); entities(query, ':', "%3A"); entities(query, ';', "%3B"); entities(query, '<', "%3C"); entities(query, '=', "%3D"); entities(query, '>', "%3E"); entities(query, '?', "%3F"); entities(query, '@', "%40"); entities(query, '[', "%5B"); entities(query, ']', "%5D"); entities(query, '\\', "%5C"); entities(query, '^', "%5E"); entities(query, '`', "%60"); entities(query, '{', "%7B"); entities(query, '|', "%7C"); entities(query, '}', "%7D"); entities(query, '~', "%7E"); string lines; ifstream fin(pagesFile.c_str()); string line; while (fin.good()) { getline(fin, line); conditional(line, fin, "", page >= 1); conditional(line, fin, "", false); conditional(line, fin, "", false); conditional(line, fin, "", false); ostringstream previous; previous << page; tag(line, "", query); tag(line, "", previous.str()); lines += line + (fin.good() ? "\n" : ""); } fin.close(); fin.clear(); for (int index = 0; index < numPages; index++) { fin.open(pagesFile.c_str()); while (fin.good()) { getline(fin, line); if (index == page) { conditional(line, fin, "", false); conditional(line, fin, "", true); conditional(line, fin, "", false); conditional(line, fin, "", false); ostringstream current; current << index + 1; tag(line, "", current.str()); } else { conditional(line, fin, "", false); conditional(line, fin, "", false); conditional(line, fin, "", true); conditional(line, fin, "", false); ostringstream num; num << index + 1; tag(line, "", query); tag(line, "", num.str()); } lines += line + (fin.good() ? "\n" : ""); } fin.close(); fin.clear(); } fin.open(pagesFile.c_str()); while (fin.good()) { getline(fin, line); conditional(line, fin, "", false); conditional(line, fin, "", false); conditional(line, fin, "", false); conditional(line, fin, "", page + 2 <= numPages); ostringstream next; next << page + 2; tag(line, "", query); tag(line, "", next.str()); lines += line + (fin.good() ? "\n" : ""); } fin.close(); return lines; } string Outputer::range(unsigned page) { unsigned bottom = page * 10 + 1; unsigned top = numWebpages > page * 10 + 10 ? page * 10 + 10 : numWebpages; ostringstream range; range << bottom << " - " << top; return range.str(); } string Outputer::total() { ostringstream total; total << numWebpages; return total.str(); } string Outputer::duration() { ostringstream duration; duration.precision(2); duration << time; return duration.str(); } string Outputer::manycommon(vector common) { string line; for (int index = 0; index < common.size(); index++) { line += common[index]; if (index != common.size() - 1) line += ' '; } return line; } void Outputer::tag(string& line, char* tag, const string& replacement) { int begin = 0; while (begin < line.length()) { int spot = line.find(tag, begin); if (spot != string::npos) { line.replace(spot, strlen(tag), replacement); } else { break; } begin = spot + replacement.length(); } } void Outputer::conditional(string& line, ifstream& fin, char* tag, bool condition) { unsigned begin = 0; while (begin < line.length()) { unsigned start = line.find(tag, begin); unsigned finish = line.find("", start); if (start == string::npos) break; string next; while (finish == string::npos) { getline(fin, next); line += '\n' + next; finish = line.find("", start); } if (condition) { line.erase(start, strlen(tag)); line.erase(finish - strlen(tag), 9); begin = finish - strlen(tag) - 9; } else { line.erase(start, finish - start + 9); begin = start; } } }