// Site Mapper // // Douglas Thrift // // $Id$ #include "SiteMapper.hpp" #include "Matcher.hpp" #include "Page.hpp" #include #include ext::String program; bool debug(false); struct SiteMapperCommand : public app::Application { virtual int Run(const app::ArgumentList& args) { program = api::GetExecutablePath().GetName(); ext::String siteIndex, siteMap; _foreach (app::ArgumentList, arg, args) { Matcher matcher; if (*arg == matcher("^-index=(.*)$")) { siteIndex = matcher[1]; } else if (*arg == matcher("^-map=(.*)$")) { siteMap = matcher[1]; } else if (*arg == "-D") { if (!debug) debug = true; } } if (!siteIndex.IsEmpty() && !siteMap.IsEmpty()) { SiteMapper mapper(siteIndex, siteMap); } else { api::Cout << "Usage: " << program << " -index=index -map=map [-D]\n"; } return 0; } } mapper; SiteMapper::SiteMapper(const ext::String& siteIndex, const ext::String& siteMap) { oldMap(siteMap); newIndex(siteIndex); newMap(siteMap); } void SiteMapper::oldMap(const ext::String& siteMap) { ext::Handle document(xml::Parse(siteMap)); ext::Handle list(*document/"page"/"section"/"list"); comment = *document/"comment()"; if (debug) api::Cerr << "comment = " << comment << ios::NewLine; oldMap(pages, list); } void SiteMapper::oldMap(ext::Vector& pages, xml::Node* list) { xml::NodeSet nodes(*list/"item"); _foreach (xml::NodeSet, node, nodes) { ext::String url(**node/"link"/"@address"), title(**node/"link"); Page page(url, title); ext::Handle list(**node/"list"); if (!list.IsEmpty()) oldMap(page.getChildren(), list); pages.InsertLast(page); } } void SiteMapper::newIndex(const ext::String& siteIndex) { ext::Handle document(xml::Parse(siteIndex)); xml::NodeSet nodes(*document/"index"/"page"); _foreach (xml::NodeSet, node, nodes) { ios::String address(**node/"address"); ext::String port(**node/"port"); if (!port.IsEmpty()) address << ":" << port; ext::String path(**node/"path"), title(**node/"title"); Page page(address, path, title); Matcher matcher; if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$")) { if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1]) { page.setTitle(matcher[1]); if (newIndex(pages, page)) continue; } else continue; } else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$")) { page.setTitle(matcher[1]); if (newIndex(pages, page)) continue; } else continue; std::multimap items; newPages.insert(std::pair >(page.getAddress(), items)).first->second.insert(std::pair(page.getChildOf(), page)); } } bool SiteMapper::newIndex(ext::Vector& pages, Page& page) { _mforeach (ext::Vector, page_, pages) { if (*page_ == page.getAddress()) { Matcher matcher; if (*page_ == page) { page.setChildren(page_->getChildren()); *page_ = page; api::Cout << "Updated: " << page.getUrl() << ios::NewLine; return true; } else if (matcher("^" + page_->getPath()) == page) { page.setChildOf(matcher[0]); if (matcher("^" + page_->getTitle() + "\\s\\|\\s(.+)$") == page) page.setTitle(matcher[1]); return newIndex(page_->getChildren(), page); } } } return false; } void SiteMapper::newMap(const ext::String& siteMap) { api::FileWriter file(siteMap); ios::FormatWriter fout(file); xml::TextWriter xml(file); fout << ios::NewLine << "" << ios::NewLine << ""; xml.OutputComment(comment); xml::ScopeElement page(xml, "page"); xml.OpenElement("title"); xml.OutputText("Sitemap"); xml.CloseElement(); xml::ScopeElement section(xml, "section"), list(xml, "list"); _mforeach (ext::Vector, page, pages) { if (newPages.find(page->getAddress()) != newPages.end()) newMap(page->getChildren(), page->getPath(), newPages.find(page->getAddress())->second); xml << *page; } } void SiteMapper::newMap(ext::Vector& pages, const ext::String& childOf, std::multimap& newPages) { _mforeach (ext::Vector, page, pages) newMap(page->getChildren(), page->getPath(), newPages); typedef std::multimap MultiMap; _for (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf)) { api::Cout << "Added: " << itor->second.getUrl() << ios::NewLine; pages.InsertLast(itor->second); } newPages.erase(childOf); }