ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 250
Committed: 2004-09-11T23:21:58-07:00 (20 years, 9 months ago) by Douglas Thrift
File size: 4939 byte(s)
Log Message:
That assert would be bad.

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include "SiteMapper.hpp"
8 #include "Matcher.hpp"
9 #include "Page.hpp"
10
11 #include <menes-api/exename.hpp>
12 #include <menes-app/application.hpp>
13
14 string program;
15 bool debug(false);
16
17 struct SiteMapperCommand : public app::Application
18 {
19 virtual int Run(const app::ArgumentList& args)
20 {
21 program = api::GetExecutableName();
22
23 string siteIndex, siteMap;
24
25 for (size_t index(0); index < args.GetSize(); index++)
26 {
27 string arg(args[index]);
28 Matcher matcher;
29
30 if (arg == matcher("^-index=(.*)$"))
31 {
32 siteIndex = matcher[1];
33 }
34 else if (arg == matcher("^-map=(.*)$"))
35 {
36 siteMap = matcher[1];
37 }
38 else if (arg == "-D")
39 {
40 if (!debug) debug = true;
41 }
42 }
43
44 if (!siteIndex.empty() && !siteMap.empty())
45 {
46 SiteMapper mapper(siteIndex, siteMap);
47 }
48 else
49 {
50 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
51 }
52
53 return 0;
54 }
55 } mapper;
56
57 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
58 {
59 oldMap(siteMap);
60 newIndex(siteIndex);
61 newMap(siteMap);
62 }
63
64 void SiteMapper::oldMap(const string& siteMap)
65 {
66 ext::Handle<xml::Document> document(xml::Parse(siteMap));
67 ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
68
69 comment = ext::String(*document/"comment()");
70
71 if (debug) cerr << "comment = " << comment << '\n';
72
73 oldMap(pages, list);
74 }
75
76 void SiteMapper::oldMap(vector<Page>& pages, xml::Node* list)
77 {
78 xml::NodeSet nodes(*list/"item");
79
80 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
81 ++node)
82 {
83 string url(ext::String(**node/"link"/"@address")),
84 title(ext::String(**node/"link"));
85 Page page(url, title);
86 ext::Handle<xml::Node> list(**node/"list");
87
88 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
89
90 pages.push_back(page);
91 }
92 }
93
94 void SiteMapper::newIndex(const string& siteIndex)
95 {
96 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
97 xml::NodeSet nodes(*document/"index"/"page");
98
99 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
100 ++node)
101 {
102 string address(ext::String(**node/"address")),
103 port(ext::String(**node/"port"));
104
105 if (!port.empty())
106 {
107 address += ':' + port;
108 }
109
110 string path(ext::String(**node/"path")),
111 title(ext::String(**node/"title"));
112 Page page(address, path, title);
113 Matcher matcher;
114
115 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
116 + "glas\\sThrift's\\sBlog:\\s(.+)$"))
117 {
118 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
119 {
120 page.setTitle(matcher[1]);
121
122 if (newIndex(pages, page)) continue;
123 }
124 else continue;
125 }
126 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
127 {
128 page.setTitle(matcher[1]);
129
130 if (newIndex(pages, page)) continue;
131 }
132 else continue;
133
134 multimap<string, Page> items;
135
136 newPages.insert(pair<string, multimap<string, Page>
137 >(page.getAddress(), items)).first->second.insert(pair<string,
138 Page>(page.getChildOf(), page));
139 }
140 }
141
142 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
143 {
144 for (unsigned index(0); index < pages.size(); ++index)
145 {
146 if (pages[index] == page.getAddress())
147 {
148 Matcher matcher;
149
150 if (pages[index] == page)
151 {
152 page.setChildren(pages[index].getChildren());
153
154 pages[index] = page;
155
156 cout << "Updated: " << page.getUrl() << '\n';
157
158 return true;
159 }
160 else if (matcher('^' + pages[index].getPath()) == page)
161 {
162 page.setChildOf(matcher[0]);
163
164 if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
165 == page)
166 {
167 page.setTitle(matcher[1]);
168 }
169
170 return newIndex(pages[index].getChildren(), page);
171 }
172 }
173 }
174
175 return false;
176 }
177
178 void SiteMapper::newMap(const string& siteMap)
179 {
180 ofstream fout(siteMap.c_str());
181
182 fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
183 << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
184 << "\"?>\n"
185 << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
186 << "<!--" << comment << "-->\n"
187 << "<page>\n"
188 << "\t<title>Sitemap</title>\n"
189 << "\t<section>\n"
190 << "\t\t<list>\n";
191
192 for (unsigned index(0); index < pages.size(); ++index)
193 {
194 if (newPages.find(pages[index].getAddress()) != newPages.end())
195 {
196 newMap(pages[index].getChildren(), pages[index].getPath(),
197 newPages.find(pages[index].getAddress())->second);
198 }
199
200 fout << pages[index](3) << '\n';
201 }
202
203 fout << "\t\t</list>\n"
204 << "\t</section>\n"
205 << "</page>\n";
206
207 fout.close();
208 }
209
210 void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
211 multimap<string, Page>& newPages)
212 {
213 for (unsigned index(0); index < pages.size(); ++index)
214 {
215 newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
216 }
217
218 for (multimap<string, Page>::iterator itor(newPages.lower_bound(childOf));
219 itor != newPages.upper_bound(childOf); itor++)
220 {
221 cout << "Added: " << itor->second.getUrl() << '\n';
222
223 pages.push_back(itor->second);
224 }
225
226 newPages.erase(childOf);
227 }

Properties

Name Value
svn:eol-style native
svn:keywords Id