ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 249
Committed: 2004-09-11T23:21:11-07:00 (20 years, 9 months ago) by Douglas Thrift
File size: 4993 byte(s)
Log Message:
Worky, worky!

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include "SiteMapper.hpp"
8 #include "Matcher.hpp"
9 #include "Page.hpp"
10
11 #include <menes-api/exename.hpp>
12 #include <menes-app/application.hpp>
13
14 #include <cassert>
15
16 string program;
17 bool debug(false);
18
19 struct SiteMapperCommand : public app::Application
20 {
21 virtual int Run(const app::ArgumentList& args)
22 {
23 program = api::GetExecutableName();
24
25 string siteIndex, siteMap;
26
27 for (size_t index(0); index < args.GetSize(); index++)
28 {
29 string arg(args[index]);
30 Matcher matcher;
31
32 if (arg == matcher("^-index=(.*)$"))
33 {
34 siteIndex = matcher[1];
35 }
36 else if (arg == matcher("^-map=(.*)$"))
37 {
38 siteMap = matcher[1];
39 }
40 else if (arg == "-D")
41 {
42 if (!debug) debug = true;
43 }
44 }
45
46 if (!siteIndex.empty() && !siteMap.empty())
47 {
48 SiteMapper mapper(siteIndex, siteMap);
49 }
50 else
51 {
52 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
53 }
54
55 return 0;
56 }
57 } mapper;
58
59 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
60 {
61 oldMap(siteMap);
62 newIndex(siteIndex);
63 newMap(siteMap);
64 }
65
66 void SiteMapper::oldMap(const string& siteMap)
67 {
68 ext::Handle<xml::Document> document(xml::Parse(siteMap));
69 ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
70
71 comment = ext::String(*document/"comment()");
72
73 if (debug) cerr << "comment = " << comment << '\n';
74
75 assert(comment == " Cheese! ");
76
77 oldMap(pages, list);
78 }
79
80 void SiteMapper::oldMap(vector<Page>& pages, xml::Node* list)
81 {
82 xml::NodeSet nodes(*list/"item");
83
84 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
85 ++node)
86 {
87 string url(ext::String(**node/"link"/"@address")),
88 title(ext::String(**node/"link"));
89 Page page(url, title);
90 ext::Handle<xml::Node> list(**node/"list");
91
92 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
93
94 pages.push_back(page);
95 }
96 }
97
98 void SiteMapper::newIndex(const string& siteIndex)
99 {
100 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
101 xml::NodeSet nodes(*document/"index"/"page");
102
103 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
104 ++node)
105 {
106 string address(ext::String(**node/"address")),
107 port(ext::String(**node/"port"));
108
109 if (!port.empty())
110 {
111 address += ':' + port;
112 }
113
114 string path(ext::String(**node/"path")),
115 title(ext::String(**node/"title"));
116 Page page(address, path, title);
117 Matcher matcher;
118
119 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
120 + "glas\\sThrift's\\sBlog:\\s(.+)$"))
121 {
122 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
123 {
124 page.setTitle(matcher[1]);
125
126 if (newIndex(pages, page)) continue;
127 }
128 else continue;
129 }
130 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
131 {
132 page.setTitle(matcher[1]);
133
134 if (newIndex(pages, page)) continue;
135 }
136 else continue;
137
138 multimap<string, Page> items;
139
140 newPages.insert(pair<string, multimap<string, Page>
141 >(page.getAddress(), items)).first->second.insert(pair<string,
142 Page>(page.getChildOf(), page));
143 }
144 }
145
146 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
147 {
148 for (unsigned index(0); index < pages.size(); ++index)
149 {
150 if (pages[index] == page.getAddress())
151 {
152 Matcher matcher;
153
154 if (pages[index] == page)
155 {
156 page.setChildren(pages[index].getChildren());
157
158 pages[index] = page;
159
160 cout << "Updated: " << page.getUrl() << '\n';
161
162 return true;
163 }
164 else if (matcher('^' + pages[index].getPath()) == page)
165 {
166 page.setChildOf(matcher[0]);
167
168 if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
169 == page)
170 {
171 page.setTitle(matcher[1]);
172 }
173
174 return newIndex(pages[index].getChildren(), page);
175 }
176 }
177 }
178
179 return false;
180 }
181
182 void SiteMapper::newMap(const string& siteMap)
183 {
184 ofstream fout(siteMap.c_str());
185
186 fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
187 << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
188 << "\"?>\n"
189 << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
190 << "<!--" << comment << "-->\n"
191 << "<page>\n"
192 << "\t<title>Sitemap</title>\n"
193 << "\t<section>\n"
194 << "\t\t<list>\n";
195
196 for (unsigned index(0); index < pages.size(); ++index)
197 {
198 if (newPages.find(pages[index].getAddress()) != newPages.end())
199 {
200 newMap(pages[index].getChildren(), pages[index].getPath(),
201 newPages.find(pages[index].getAddress())->second);
202 }
203
204 fout << pages[index](3) << '\n';
205 }
206
207 fout << "\t\t</list>\n"
208 << "\t</section>\n"
209 << "</page>\n";
210
211 fout.close();
212 }
213
214 void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
215 multimap<string, Page>& newPages)
216 {
217 for (unsigned index(0); index < pages.size(); ++index)
218 {
219 newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
220 }
221
222 for (multimap<string, Page>::iterator itor(newPages.lower_bound(childOf));
223 itor != newPages.upper_bound(childOf); itor++)
224 {
225 cout << "Added: " << itor->second.getUrl() << '\n';
226
227 pages.push_back(itor->second);
228 }
229
230 newPages.erase(childOf);
231 }

Properties

Name Value
svn:eol-style native
svn:keywords Id