ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapperOld/SiteMapper.cpp
Revision: 425
Committed: 2005-03-10T20:06:50-08:00 (20 years, 3 months ago) by douglas
File size: 6698 byte(s)
Log Message:
Wow, not that I even use this one anymore!

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #ifdef _WIN32
8 #pragma warning(disable:4503)
9 #endif
10
11 #include "SiteMapper.hpp"
12 #include "Matcher/Matcher.hpp"
13 #include "Page.hpp"
14
15 std::string program;
16 bool debug(false);
17
18 int main(int argc, char* argv[])
19 {
20 program = argv[0];
21
22 std::string siteIndex, siteMap;
23
24 for (int index(1); index < argc; ++index)
25 {
26 std::string arg(argv[index]);
27 Matcher matcher;
28
29 if (arg == matcher("^-index=(.*)$"))
30 {
31 siteIndex = matcher[1];
32 }
33 else if (arg == matcher("^-map=(.*)$"))
34 {
35 siteMap = matcher[1];
36 }
37 else if (arg == "-D")
38 {
39 if (!debug) debug = true;
40 }
41 }
42
43 if (!siteIndex.empty() && !siteMap.empty())
44 {
45 XMLPlatformUtils::Initialize();
46 XPathEvaluator::initialize();
47
48 SiteMapper mapper(siteIndex, siteMap);
49
50 XPathEvaluator::terminate();
51 XMLPlatformUtils::Terminate();
52 }
53 else
54 {
55 std::cout << "Usage: " << program << " -index=index -map=map [-D]\n";
56 }
57
58 return 0;
59 }
60
61 SiteMapper::SiteMapper(const std::string& siteIndex, const std::string& siteMap)
62 {
63 oldMap(siteMap);
64 newIndex(siteIndex);
65 newMap(siteMap);
66 }
67
68 void SiteMapper::oldMap(const std::string& siteMap)
69 {
70 support.setParserLiaison(&liaison);
71
72 XalanDOMString file(siteMap.c_str());
73 LocalFileInputSource source(file.c_str());
74
75 XalanDocument* document = liaison.parseXMLStream(source);
76
77 if (document == 0) return;
78
79 XalanNode* list = evaluator.selectSingleNode(support, document,
80 XalanDOMString("/page/section/list").c_str());
81
82 if (list == 0) return;
83
84 comment << evaluator.evaluate(support, document,
85 XalanDOMString("comment()").c_str())->str();
86
87 if (debug) std::cerr << "comment = " << comment.str() << '\n';
88
89 item = evaluator.createXPath(XalanDOMString("item").c_str());
90 address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
91 link = evaluator.createXPath(XalanDOMString("link").c_str());
92 this->list = evaluator.createXPath(XalanDOMString("list").c_str());
93
94 oldMap(pages, list);
95
96 evaluator.destroyXPath(item);
97 evaluator.destroyXPath(address);
98 evaluator.destroyXPath(link);
99 evaluator.destroyXPath(this->list);
100 }
101
102 void SiteMapper::oldMap(std::vector<Page>& pages, XalanNode* list)
103 {
104 NodeRefList nodes;
105
106 evaluator.selectNodeList(nodes, support, list, *item);
107
108 for (int index(0); index < nodes.getLength(); ++index)
109 {
110 XalanNode* node = nodes.item(index);
111 std::ostringstream url, title;
112
113 url << evaluator.evaluate(support, node, *address)->str();
114 title << evaluator.evaluate(support, node, *link)->str();
115
116 Page page(url.str(), title.str());
117 XalanNode* list = evaluator.selectSingleNode(support, node, *(this->list));
118
119 if (list != 0) oldMap(page.getChildren(), list);
120
121 pages.push_back(page);
122 }
123 }
124
125 void SiteMapper::newIndex(const std::string& siteIndex)
126 {
127 XalanDOMString file(siteIndex.c_str());
128 LocalFileInputSource source(file.c_str());
129
130 XalanDocument* document = liaison.parseXMLStream(source);
131
132 if (document == 0) return;
133
134 address = evaluator.createXPath(XalanDOMString("address").c_str());
135 port = evaluator.createXPath(XalanDOMString("port").c_str());
136 path = evaluator.createXPath(XalanDOMString("path").c_str());
137 title = evaluator.createXPath(XalanDOMString("title").c_str());
138
139 NodeRefList nodes;
140
141 evaluator.selectNodeList(nodes, support, document, XalanDOMString("/index/page").c_str());
142
143 for (int index(0); index < nodes.getLength(); ++index)
144 {
145 XalanNode* node = nodes.item(index);
146 std::ostringstream address;
147
148 address << evaluator.evaluate(support, node, *(this->address))->str();
149
150 double port = evaluator.evaluate(support, node, *(this->port))->num();
151
152 if (port >= 0 && port <= 65535)
153 {
154 address << ':' << int(port);
155 }
156
157 std::ostringstream path, title;
158
159 path << evaluator.evaluate(support, node, *(this->path))->str();
160 title << evaluator.evaluate(support, node, *(this->title))->str();
161
162 Page page(address.str(), path.str(), title.str());
163 Matcher matcher;
164
165 if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$"))
166 {
167 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
168 {
169 page.setTitle(matcher[1]);
170
171 if (newIndex(pages, page)) continue;
172 }
173 else continue;
174 }
175 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
176 {
177 page.setTitle(matcher[1]);
178
179 if (newIndex(pages, page)) continue;
180 }
181 else continue;
182
183 std::multimap<std::string, Page> items;
184
185 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.getAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.getChildOf(), page));
186 }
187
188 evaluator.destroyXPath(address);
189 evaluator.destroyXPath(port);
190 evaluator.destroyXPath(path);
191 evaluator.destroyXPath(title);
192 }
193
194 bool SiteMapper::newIndex(std::vector<Page>& pages, Page& page)
195 {
196 for (unsigned index(0); index < pages.size(); ++index)
197 {
198 if (pages[index] == page.getAddress())
199 {
200 Matcher matcher;
201
202 if (pages[index] == page)
203 {
204 page.setChildren(pages[index].getChildren());
205
206 pages[index] = page;
207
208 std::cout << "Updated: " << page.getUrl() << '\n';
209
210 return true;
211 }
212 else if (matcher('^' + pages[index].getPath()) == page)
213 {
214 page.setChildOf(matcher[0]);
215
216 if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
217 == page)
218 {
219 page.setTitle(matcher[1]);
220 }
221
222 return newIndex(pages[index].getChildren(), page);
223 }
224 }
225 }
226
227 return false;
228 }
229
230 void SiteMapper::newMap(const std::string& siteMap)
231 {
232 std::ofstream fout(siteMap.c_str());
233
234 fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
235 << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
236 << "\"?>\n"
237 << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
238 << "<!--" << comment.str() << "-->\n"
239 << "<page>\n"
240 << "\t<title>Sitemap</title>\n"
241 << "\t<section>\n"
242 << "\t\t<list>\n";
243
244 for (unsigned index(0); index < pages.size(); ++index)
245 {
246 if (newPages.find(pages[index].getAddress()) != newPages.end())
247 {
248 newMap(pages[index].getChildren(), pages[index].getPath(),
249 newPages.find(pages[index].getAddress())->second);
250 }
251
252 fout << pages[index](3) << '\n';
253 }
254
255 fout << "\t\t</list>\n"
256 << "\t</section>\n"
257 << "</page>\n";
258
259 fout.close();
260 }
261
262 void SiteMapper::newMap(std::vector<Page>& pages, const std::string& childOf, std::multimap<std::string, Page>& newPages)
263 {
264 for (unsigned index(0); index < pages.size(); ++index)
265 {
266 newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
267 }
268
269 for (std::multimap<std::string, Page>::iterator itor(newPages.lower_bound(childOf)); itor != newPages.upper_bound(childOf); itor++)
270 {
271 std::cout << "Added: " << itor->second.getUrl() << '\n';
272
273 pages.push_back(itor->second);
274 }
275
276 newPages.erase(childOf);
277 }

Properties

Name Value
svn:eol-style native
svn:keywords Id