// Site Mapper // // Douglas Thrift // // $Id$ #include "SiteMapper.hpp" #include "Matcher.hpp" #include "Page.hpp" string program; bool debug(false); int main(int argc, char* argv[]) { program = argv[0]; string siteIndex, siteMap; for (int index(1); index < argc; index++) { string arg(argv[index]); Matcher matcher; if (arg == matcher("^-index=(.*)$")) { siteIndex = matcher[1]; } else if (arg == matcher("^-map=(.*)$")) { siteMap = matcher[1]; } else if (arg == "-D") { if (!debug) debug = true; } } if (siteIndex != "" && siteMap != "") { SiteMapper mapper(siteIndex, siteMap); } else { cout << "Usage: " << program << " -index=index -map=map [-D]\n"; } return 0; } SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap) { oldMap(siteMap); newIndex(siteIndex); newMap(siteMap); } void SiteMapper::oldMap(const string& siteMap) { ext::Handle document(xml::Parse(siteMap)); ext::Handle list(*document/"page"/"section"/"list"); comment = *document/"comment()"; oldMap(pages, list); } void SiteMapper::oldMap(vector& pages, xml::Node* list) { xml::NodeSet nodes(*list/"item"); for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End(); ++node) { string url(**node/"link"/"@address"), title(**node/"link"); Page page(url, title); ext::Handle list(**node/"list"); if (!list.IsEmpty()) oldMap(page.getChildren(), list); pages.push_back(page); } } void SiteMapper::newIndex(const string& siteIndex) { ext::Handle document(xml::Parse(siteIndex)); xml::NodeSet nodes(*document/"index"/"page"); for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End(); ++node) { string address(**node/"address"); string port(**node/"port"); if (!port.empty()) { address += ':' + port; } string path(**node/"path"), title(**node/"title"); Page page(address, path, title); Matcher matcher; if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou") + "glas\\sThrift's\\sBlog:\\s(.+)$")) { if (Matcher("^\\w+\\s\\d\\d\\d\\d\\sArchives$") == matcher[1]) { page.setTitle(matcher[1]); if (newIndex(pages, page)) continue; } else continue; } else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$")) { page.setTitle(matcher[1]); if (newIndex(pages, page)) continue; } else continue; multimap items; newPages.insert(pair >(page.getAddress(), items)).first->second.insert(pair(page.getChildOf(), page)); } } bool SiteMapper::newIndex(vector& pages, Page& page) { for (unsigned index(0); index < pages.size(); ++index) { if (pages[index] == page.getAddress()) { Matcher matcher; if (pages[index] == page) { page.setChildren(pages[index].getChildren()); pages[index] = page; return true; } else if (matcher('^' + pages[index].getPath()) == page) { page.setChildOf(matcher[0]); if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$") == page) { page.setTitle(matcher[1]); } return newIndex(pages[index].getChildren(), page); } } } return false; } void SiteMapper::newMap(const string& siteMap) { ofstream fout(siteMap.c_str()); fout << "\n" << "\n" << "\n" << "\n" << "\n" << "\tSitemap\n" << "\t
\n" << "\t\t\n"; for (unsigned index(0); index < pages.size(); ++index) { if (newPages.find(pages[index].getAddress()) != newPages.end()) { newMap(pages[index].getChildren(), pages[index].getPath(), newPages.find(pages[index].getAddress())->second); } fout << pages[index](3) << '\n'; } fout << "\t\t\n" << "\t
\n" << "
\n"; fout.close(); } void SiteMapper::newMap(vector& pages, const string& childOf, multimap& newPages) { for (unsigned index(0); index < pages.size(); ++index) { newMap(pages[index].getChildren(), pages[index].getPath(), newPages); } for (multimap::iterator itor(newPages.lower_bound(childOf)); itor != newPages.upper_bound(childOf); itor++) { pages.push_back(itor->second); } newPages.erase(childOf); }