ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 328
Committed: 2004-12-13T20:13:49-08:00 (20 years, 6 months ago) by douglas
File size: 4767 byte(s)
Log Message:
Use Matcher the new way.

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include "SiteMapper.hpp"
8 #include "Matcher/Matcher.hpp"
9
10 #include <menes-api/exename.hpp>
11 #include <menes-app/application.hpp>
12
13 ext::String program;
14 bool debug(false);
15
16 struct SiteMapperCommand : public app::Application
17 {
18 virtual int Run(const app::ArgumentList& args)
19 {
20 program = api::GetExecutablePath().GetName();
21
22 ext::String siteIndex, siteMap;
23
24 _foreach (app::ArgumentList, arg, args)
25 {
26 Matcher matcher;
27
28 if (*arg == matcher("^-index=(.*)$"))
29 {
30 siteIndex = matcher[1];
31 }
32 else if (*arg == matcher("^-map=(.*)$"))
33 {
34 siteMap = matcher[1];
35 }
36 else if (*arg == "-D")
37 {
38 if (!debug) debug = true;
39 }
40 }
41
42 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
43 {
44 SiteMapper mapper(siteIndex, siteMap);
45 }
46 else
47 {
48 api::Cout << "Usage: " << program << " -index=index -map=map [-D]\n";
49 }
50
51 return 0;
52 }
53 } mapper;
54
55 SiteMapper::SiteMapper(const ext::String& siteIndex, const ext::String& siteMap)
56 {
57 oldMap(siteMap);
58 newIndex(siteIndex);
59 newMap(siteMap);
60 }
61
62 void SiteMapper::oldMap(const ext::String& siteMap)
63 {
64 ext::Handle<xml::Document> document(xml::Parse(siteMap));
65 ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
66
67 comment = *document/"comment()";
68
69 if (debug) api::Cerr << "comment = " << comment << ios::NewLine;
70
71 oldMap(pages, list);
72 }
73
74 void SiteMapper::oldMap(ext::Vector<Page>& pages, xml::Node* list)
75 {
76 xml::NodeSet nodes(*list/"item");
77
78 _foreach (xml::NodeSet, node, nodes)
79 {
80 ext::String url(**node/"link"/"@address"), title(**node/"link");
81 Page page(url, title);
82 ext::Handle<xml::Node> list(**node/"list");
83
84 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
85
86 pages.InsertLast(page);
87 }
88 }
89
90 void SiteMapper::newIndex(const ext::String& siteIndex)
91 {
92 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
93 xml::NodeSet nodes(*document/"index"/"page");
94
95 _foreach (xml::NodeSet, node, nodes)
96 {
97 ios::String address(**node/"address");
98 ext::String port(**node/"port");
99
100 if (!port.IsEmpty()) address << ":" << port;
101
102 ext::String path(**node/"path"), title(**node/"title");
103 Page page(address, path, title);
104 Matcher matcher(PCRE_UNGREEDY | PCRE_DOTALL);
105
106 if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$"))
107 {
108 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
109 {
110 page.setTitle(matcher[1]);
111
112 if (newIndex(pages, page)) continue;
113 }
114 else continue;
115 }
116 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
117 {
118 page.setTitle(matcher[1]);
119
120 if (newIndex(pages, page)) continue;
121 }
122 else continue;
123
124 std::multimap<std::string, Page> items;
125
126 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.getAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.getChildOf(), page));
127 }
128 }
129
130 bool SiteMapper::newIndex(ext::Vector<Page>& pages, Page& page)
131 {
132 _mforeach (ext::Vector<Page>, page_, pages)
133 {
134 if (*page_ == page.getAddress())
135 {
136 Matcher matcher;
137
138 if (*page_ == page)
139 {
140 page.setChildren(page_->getChildren());
141
142 *page_ = page;
143
144 api::Cout << "Updated: " << page.getUrl() << ios::NewLine;
145
146 return true;
147 }
148 else if (matcher("^" + page_->getPath()) == page)
149 {
150 page.setChildOf(matcher[0]);
151
152 if (matcher("^" + page_->getTitle() + "\\s\\|\\s(.+)$") == page) page.setTitle(matcher[1]);
153
154 return newIndex(page_->getChildren(), page);
155 }
156 }
157 }
158
159 return false;
160 }
161
162 void SiteMapper::newMap(const ext::String& siteMap)
163 {
164 api::FileWriter file(siteMap);
165 ios::FormatWriter fout(file);
166 xml::TextWriter xml(file);
167
168 fout << ios::NewLine << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>" << ios::NewLine << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">";
169
170 xml.OutputComment(comment);
171
172 xml::ScopeElement page(xml, "page");
173
174 xml.OpenElement("title");
175 xml.OutputText("Sitemap");
176 xml.CloseElement();
177
178 xml::ScopeElement section(xml, "section"), list(xml, "list");
179
180 _mforeach (ext::Vector<Page>, page, pages)
181 {
182 if (newPages.find(page->getAddress()) != newPages.end()) newMap(page->getChildren(), page->getPath(), newPages.find(page->getAddress())->second);
183
184 xml << *page;
185 }
186 }
187
188 void SiteMapper::newMap(ext::Vector<Page>& pages, const ext::String& childOf, std::multimap<std::string, Page>& newPages)
189 {
190 _mforeach (ext::Vector<Page>, page, pages) newMap(page->getChildren(), page->getPath(), newPages);
191
192 typedef std::multimap<std::string, Page> MultiMap;
193
194 _for (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf))
195 {
196 api::Cout << "Added: " << itor->second.getUrl() << ios::NewLine;
197
198 pages.InsertLast(itor->second);
199 }
200
201 newPages.erase(childOf);
202 }

Properties

Name Value
svn:eol-style native
svn:keywords Id