// Site Mapper // // Douglas Thrift // // $Id$ #include "SiteMapper.hpp" #include "Matcher.hpp" #include "Page.hpp" string program; bool debug = false; int main(int argc, char* argv[]) { program = argv[0]; string siteIndex, siteMap; for (int index = 1; index < argc; index++) { string arg(argv[index]); Matcher matcher; if (arg == matcher("^-index=(.*)$")) { siteIndex = matcher[1]; } else if (arg == matcher("^-map=(.*)$")) { siteMap = matcher[1]; } else if (arg == "-D") { if (!debug) debug = true; } } if (siteIndex != "" && siteMap != "") { XMLPlatformUtils::Initialize(); XPathEvaluator::initialize(); SiteMapper mapper(siteIndex, siteMap); XPathEvaluator::terminate(); XMLPlatformUtils::Terminate(); } else { cout << "Usage: " << program << " -index=index -map=map [-D]\n"; } return 0; } SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap) { oldMap(siteMap); index(siteIndex); newMap(siteMap); } void SiteMapper::oldMap(const string& siteMap) { support.setParserLiaison(&liaison); XalanDOMString file(siteMap.c_str()); LocalFileInputSource source(file.c_str()); XalanDocument* document = liaison.parseXMLStream(source); if (document == 0) return; XalanNode* list = evaluator.selectSingleNode(support, document, XalanDOMString("/page/section/list").c_str()); if (list == 0) return; item = evaluator.createXPath(XalanDOMString("item").c_str()); address = evaluator.createXPath(XalanDOMString("link/@address").c_str()); link = evaluator.createXPath(XalanDOMString("link").c_str()); list_ = evaluator.createXPath(XalanDOMString("list").c_str()); oldMap(pages, list); evaluator.destroyXPath(item); evaluator.destroyXPath(address); evaluator.destroyXPath(link); evaluator.destroyXPath(list_); } void SiteMapper::oldMap(list& pages, XalanNode* list) { NodeRefList nodes = evaluator.selectNodeList(support, list, *item); for (int index = 0; index < nodes.getLength(); index++) { XalanNode* node = nodes.item(index); ostringstream url, title; url << evaluator.evaluate(support, node, *address)->str(); title << evaluator.evaluate(support, node, *link)->str(); Page page(url.str(), title.str()); XalanNode* list = evaluator.selectSingleNode(support, node, *list_); if (list != 0) oldMap(page.getChildren(), list); pages.push_back(page); } } void SiteMapper::index(const string& siteIndex) { XalanDOMString file(siteIndex.c_str()); LocalFileInputSource source(file.c_str()); XalanDocument* document = liaison.parseXMLStream(source); if (document == 0) return; address = evaluator.createXPath(XalanDOMString("address").c_str()); port = evaluator.createXPath(XalanDOMString("port").c_str()); path = evaluator.createXPath(XalanDOMString("path").c_str()); title = evaluator.createXPath(XalanDOMString("title").c_str()); NodeRefList nodes = evaluator.selectNodeList(support, document, XalanDOMString("/index/page").c_str()); for (int index = 0; index < nodes.getLength(); index++) { XalanNode* node = nodes.item(index); ostringstream address; address << evaluator.evaluate(support, node, *(this->address))->str(); double port = evaluator.evaluate(support, node, *(this->port))->num(); if (port >= 0 && port <= 65535) { address << ':' << int(port); } ostringstream path, title; path << evaluator.evaluate(support, node, *(this->path))->str(); title << evaluator.evaluate(support, node, *(this->title))->str(); Page page(address.str(), path.str(), title.str()); Matcher matcher("^Douglas Thrift's.+Website \\| (.+)$"); if (page == matcher) { page.setTitle(matcher[1]); newPages.insert(page); } } evaluator.destroyXPath(address); evaluator.destroyXPath(port); evaluator.destroyXPath(path); evaluator.destroyXPath(title); } void SiteMapper::newMap(const string& siteMap) { // for (list::iterator itor = pages.begin(); itor != pages.end(); itor++) { cout << *itor << '\n'; } }