ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 152
Committed: 2004-05-29T20:00:41-07:00 (21 years ago) by Douglas Thrift
File size: 6268 byte(s)
Log Message:
Meep!

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #ifdef _WIN32
8 #pragma warning(disable:4503)
9 #endif
10
11 #include "SiteMapper.hpp"
12 #include "Matcher.hpp"
13 #include "Page.hpp"
14
15 string program;
16 bool debug(false);
17
18 int main(int argc, char* argv[])
19 {
20 program = argv[0];
21
22 string siteIndex, siteMap;
23
24 for (int index(1); index < argc; index++)
25 {
26 string arg(argv[index]);
27 Matcher matcher;
28
29 if (arg == matcher("^-index=(.*)$"))
30 {
31 siteIndex = matcher[1];
32 }
33 else if (arg == matcher("^-map=(.*)$"))
34 {
35 siteMap = matcher[1];
36 }
37 else if (arg == "-D")
38 {
39 if (!debug) debug = true;
40 }
41 }
42
43 if (siteIndex != "" && siteMap != "")
44 {
45 XMLPlatformUtils::Initialize();
46 XPathEvaluator::initialize();
47
48 SiteMapper mapper(siteIndex, siteMap);
49
50 XPathEvaluator::terminate();
51 XMLPlatformUtils::Terminate();
52 }
53 else
54 {
55 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
56 }
57
58 return 0;
59 }
60
61 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
62 {
63 oldMap(siteMap);
64 newIndex(siteIndex);
65 newMap(siteMap);
66 }
67
68 void SiteMapper::oldMap(const string& siteMap)
69 {
70 support.setParserLiaison(&liaison);
71
72 XalanDOMString file(siteMap.c_str());
73 LocalFileInputSource source(file.c_str());
74
75 XalanDocument* document = liaison.parseXMLStream(source);
76
77 if (document == 0) return;
78
79 XalanNode* list = evaluator.selectSingleNode(support, document,
80 XalanDOMString("/page/section/list").c_str());
81
82 if (list == 0) return;
83
84 item = evaluator.createXPath(XalanDOMString("item").c_str());
85 address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
86 link = evaluator.createXPath(XalanDOMString("link").c_str());
87 this->list = evaluator.createXPath(XalanDOMString("list").c_str());
88
89 oldMap(pages, list);
90
91 evaluator.destroyXPath(item);
92 evaluator.destroyXPath(address);
93 evaluator.destroyXPath(link);
94 evaluator.destroyXPath(this->list);
95 }
96
97 void SiteMapper::oldMap(vector<Page>& pages, XalanNode* list)
98 {
99 NodeRefList nodes = evaluator.selectNodeList(support, list, *item);
100
101 for (int index = 0; index < nodes.getLength(); index++)
102 {
103 XalanNode* node = nodes.item(index);
104 ostringstream url, title;
105
106 url << evaluator.evaluate(support, node, *address)->str();
107 title << evaluator.evaluate(support, node, *link)->str();
108
109 Page page(url.str(), title.str());
110 XalanNode* list = evaluator.selectSingleNode(support, node,
111 *(this->list));
112
113 if (list != 0) oldMap(page.getChildren(), list);
114
115 pages.push_back(page);
116 }
117 }
118
119 void SiteMapper::newIndex(const string& siteIndex)
120 {
121 XalanDOMString file(siteIndex.c_str());
122 LocalFileInputSource source(file.c_str());
123
124 XalanDocument* document = liaison.parseXMLStream(source);
125
126 if (document == 0) return;
127
128 address = evaluator.createXPath(XalanDOMString("address").c_str());
129 port = evaluator.createXPath(XalanDOMString("port").c_str());
130 path = evaluator.createXPath(XalanDOMString("path").c_str());
131 title = evaluator.createXPath(XalanDOMString("title").c_str());
132
133 NodeRefList nodes = evaluator.selectNodeList(support, document,
134 XalanDOMString("/index/page").c_str());
135
136 for (int index = 0; index < nodes.getLength(); index++)
137 {
138 XalanNode* node = nodes.item(index);
139 ostringstream address;
140
141 address << evaluator.evaluate(support, node, *(this->address))->str();
142
143 double port = evaluator.evaluate(support, node, *(this->port))->num();
144
145 if (port >= 0 && port <= 65535)
146 {
147 address << ':' << int(port);
148 }
149
150 ostringstream path, title;
151
152 path << evaluator.evaluate(support, node, *(this->path))->str();
153 title << evaluator.evaluate(support, node, *(this->title))->str();
154
155 Page page(address.str(), path.str(), title.str());
156 Matcher matcher;
157
158 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
159 + "glas\\sThrift's\\sBlog:\\s(.+)$"))
160 {
161 if (Matcher("^\\w+\\s\\d\\d\\d\\d\\sArchives$") == matcher[1])
162 {
163 page.setTitle(matcher[1]);
164
165 if (newIndex(pages, page)) continue;
166 }
167 else continue;
168 }
169 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
170 {
171 page.setTitle(matcher[1]);
172
173 if (newIndex(pages, page)) continue;
174 }
175 else continue;
176
177 multimap<string, Page> items;
178
179 newPages.insert(pair<string, multimap<string, Page>
180 >(page.getAddress(), items)).first->second.insert(pair<string,
181 Page>(page.getChildOf(), page));
182 }
183
184 evaluator.destroyXPath(address);
185 evaluator.destroyXPath(port);
186 evaluator.destroyXPath(path);
187 evaluator.destroyXPath(title);
188 }
189
190 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
191 {
192 for (unsigned index = 0; index < pages.size(); index++)
193 {
194 if (pages[index] == page.getAddress())
195 {
196 Matcher matcher;
197
198 if (pages[index] == page)
199 {
200 page.setChildren(pages[index].getChildren());
201
202 pages[index] = page;
203
204 return true;
205 }
206 else if (matcher('^' + pages[index].getPath()) == page)
207 {
208 page.setChildOf(matcher[0]);
209
210 if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
211 == page)
212 {
213 page.setTitle(matcher[1]);
214 }
215
216 return newIndex(pages[index].getChildren(), page);
217 }
218 }
219 }
220
221 return false;
222 }
223
224 void SiteMapper::newMap(const string& siteMap)
225 {
226 ofstream fout(siteMap.c_str());
227
228 fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
229 << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
230 << "\"?>\n"
231 << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
232 << "<!-- $Id$ -->\n"
233 << "<page>\n"
234 << "\t<title>Sitemap</title>\n"
235 << "\t<section>\n"
236 << "\t\t<list>\n";
237
238 for (unsigned index = 0; index < pages.size(); index++)
239 {
240 if (newPages.find(pages[index].getAddress()) != newPages.end())
241 {
242 newMap(pages[index].getChildren(), pages[index].getPath(),
243 newPages.find(pages[index].getAddress())->second);
244 }
245
246 fout << pages[index](3) << '\n';
247 }
248
249 fout << "\t\t</list>\n"
250 << "\t</section>\n"
251 << "</page>\n";
252
253 fout.close();
254 }
255
256 void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
257 multimap<string, Page>& newPages)
258 {
259 for (unsigned index = 0; index < pages.size(); index++)
260 {
261 newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
262 }
263
264 for (multimap<string, Page>::iterator itor = newPages.lower_bound(childOf);
265 itor != newPages.upper_bound(childOf); itor++)
266 {
267 pages.push_back(itor->second);
268 }
269
270 newPages.erase(childOf);
271 }

Properties

Name Value
svn:eol-style native
svn:keywords Id