ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapperOld/SiteMapper.cpp
Revision: 425
Committed: 2005-03-10T20:06:50-08:00 (20 years, 3 months ago) by douglas
File size: 6698 byte(s)
Log Message:
Wow, not that I even use this one anymore!

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7 Douglas Thrift 143 #ifdef _WIN32
8     #pragma warning(disable:4503)
9     #endif
10    
11 Douglas Thrift 126 #include "SiteMapper.hpp"
12 douglas 425 #include "Matcher/Matcher.hpp"
13 Douglas Thrift 128 #include "Page.hpp"
14 Douglas Thrift 126
15 douglas 425 std::string program;
16 Douglas Thrift 152 bool debug(false);
17 Douglas Thrift 128
18 Douglas Thrift 126 int main(int argc, char* argv[])
19     {
20 Douglas Thrift 128 program = argv[0];
21 Douglas Thrift 132
22 douglas 425 std::string siteIndex, siteMap;
23 Douglas Thrift 126
24 douglas 425 for (int index(1); index < argc; ++index)
25 Douglas Thrift 126 {
26 douglas 425 std::string arg(argv[index]);
27 Douglas Thrift 128 Matcher matcher;
28 Douglas Thrift 126
29 Douglas Thrift 128 if (arg == matcher("^-index=(.*)$"))
30     {
31     siteIndex = matcher[1];
32     }
33     else if (arg == matcher("^-map=(.*)$"))
34     {
35     siteMap = matcher[1];
36     }
37 Douglas Thrift 133 else if (arg == "-D")
38     {
39     if (!debug) debug = true;
40     }
41 Douglas Thrift 126 }
42    
43 Douglas Thrift 189 if (!siteIndex.empty() && !siteMap.empty())
44 Douglas Thrift 132 {
45 Douglas Thrift 128 XMLPlatformUtils::Initialize();
46     XPathEvaluator::initialize();
47 Douglas Thrift 126
48 Douglas Thrift 128 SiteMapper mapper(siteIndex, siteMap);
49    
50     XPathEvaluator::terminate();
51     XMLPlatformUtils::Terminate();
52     }
53     else
54     {
55 douglas 425 std::cout << "Usage: " << program << " -index=index -map=map [-D]\n";
56 Douglas Thrift 128 }
57 Douglas Thrift 129
58 Douglas Thrift 126 return 0;
59     }
60    
61 douglas 425 SiteMapper::SiteMapper(const std::string& siteIndex, const std::string& siteMap)
62 Douglas Thrift 126 {
63 Douglas Thrift 133 oldMap(siteMap);
64 Douglas Thrift 142 newIndex(siteIndex);
65 Douglas Thrift 134 newMap(siteMap);
66 Douglas Thrift 133 }
67    
68 douglas 425 void SiteMapper::oldMap(const std::string& siteMap)
69 Douglas Thrift 133 {
70     support.setParserLiaison(&liaison);
71    
72 Douglas Thrift 134 XalanDOMString file(siteMap.c_str());
73     LocalFileInputSource source(file.c_str());
74 Douglas Thrift 133
75 Douglas Thrift 134 XalanDocument* document = liaison.parseXMLStream(source);
76 Douglas Thrift 133
77 Douglas Thrift 134 if (document == 0) return;
78 Douglas Thrift 133
79 Douglas Thrift 134 XalanNode* list = evaluator.selectSingleNode(support, document,
80     XalanDOMString("/page/section/list").c_str());
81 Douglas Thrift 133
82 Douglas Thrift 134 if (list == 0) return;
83 Douglas Thrift 135
84 Douglas Thrift 153 comment << evaluator.evaluate(support, document,
85     XalanDOMString("comment()").c_str())->str();
86    
87 douglas 425 if (debug) std::cerr << "comment = " << comment.str() << '\n';
88 Douglas Thrift 248
89 Douglas Thrift 137 item = evaluator.createXPath(XalanDOMString("item").c_str());
90     address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
91     link = evaluator.createXPath(XalanDOMString("link").c_str());
92 Douglas Thrift 143 this->list = evaluator.createXPath(XalanDOMString("list").c_str());
93 Douglas Thrift 137
94     oldMap(pages, list);
95 Douglas Thrift 138
96     evaluator.destroyXPath(item);
97     evaluator.destroyXPath(address);
98     evaluator.destroyXPath(link);
99 Douglas Thrift 143 evaluator.destroyXPath(this->list);
100 Douglas Thrift 133 }
101    
102 douglas 425 void SiteMapper::oldMap(std::vector<Page>& pages, XalanNode* list)
103 Douglas Thrift 135 {
104 douglas 425 NodeRefList nodes;
105    
106     evaluator.selectNodeList(nodes, support, list, *item);
107 Douglas Thrift 135
108 Douglas Thrift 153 for (int index(0); index < nodes.getLength(); ++index)
109 Douglas Thrift 135 {
110     XalanNode* node = nodes.item(index);
111 douglas 425 std::ostringstream url, title;
112 Douglas Thrift 141
113 Douglas Thrift 138 url << evaluator.evaluate(support, node, *address)->str();
114     title << evaluator.evaluate(support, node, *link)->str();
115 Douglas Thrift 135
116     Page page(url.str(), title.str());
117 douglas 425 XalanNode* list = evaluator.selectSingleNode(support, node, *(this->list));
118 Douglas Thrift 135
119 Douglas Thrift 137 if (list != 0) oldMap(page.getChildren(), list);
120 Douglas Thrift 135
121     pages.push_back(page);
122     }
123     }
124    
125 douglas 425 void SiteMapper::newIndex(const std::string& siteIndex)
126 Douglas Thrift 133 {
127 Douglas Thrift 138 XalanDOMString file(siteIndex.c_str());
128     LocalFileInputSource source(file.c_str());
129    
130     XalanDocument* document = liaison.parseXMLStream(source);
131    
132     if (document == 0) return;
133    
134     address = evaluator.createXPath(XalanDOMString("address").c_str());
135 Douglas Thrift 139 port = evaluator.createXPath(XalanDOMString("port").c_str());
136 Douglas Thrift 138 path = evaluator.createXPath(XalanDOMString("path").c_str());
137     title = evaluator.createXPath(XalanDOMString("title").c_str());
138 Douglas Thrift 141
139 douglas 425 NodeRefList nodes;
140 Douglas Thrift 138
141 douglas 425 evaluator.selectNodeList(nodes, support, document, XalanDOMString("/index/page").c_str());
142    
143 Douglas Thrift 153 for (int index(0); index < nodes.getLength(); ++index)
144 Douglas Thrift 138 {
145     XalanNode* node = nodes.item(index);
146 douglas 425 std::ostringstream address;
147 Douglas Thrift 139
148     address << evaluator.evaluate(support, node, *(this->address))->str();
149    
150     double port = evaluator.evaluate(support, node, *(this->port))->num();
151    
152     if (port >= 0 && port <= 65535)
153     {
154     address << ':' << int(port);
155     }
156    
157 douglas 425 std::ostringstream path, title;
158 Douglas Thrift 140
159     path << evaluator.evaluate(support, node, *(this->path))->str();
160     title << evaluator.evaluate(support, node, *(this->title))->str();
161    
162     Page page(address.str(), path.str(), title.str());
163 Douglas Thrift 142 Matcher matcher;
164 Douglas Thrift 140
165 douglas 425 if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$"))
166 Douglas Thrift 140 {
167 Douglas Thrift 189 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
168 Douglas Thrift 142 {
169     page.setTitle(matcher[1]);
170    
171     if (newIndex(pages, page)) continue;
172     }
173     else continue;
174     }
175     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
176     {
177 Douglas Thrift 140 page.setTitle(matcher[1]);
178    
179 Douglas Thrift 142 if (newIndex(pages, page)) continue;
180 Douglas Thrift 140 }
181 Douglas Thrift 142 else continue;
182    
183 douglas 425 std::multimap<std::string, Page> items;
184 Douglas Thrift 143
185 douglas 425 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.getAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.getChildOf(), page));
186 Douglas Thrift 138 }
187    
188     evaluator.destroyXPath(address);
189 Douglas Thrift 139 evaluator.destroyXPath(port);
190 Douglas Thrift 138 evaluator.destroyXPath(path);
191     evaluator.destroyXPath(title);
192 Douglas Thrift 126 }
193 Douglas Thrift 133
194 douglas 425 bool SiteMapper::newIndex(std::vector<Page>& pages, Page& page)
195 Douglas Thrift 142 {
196 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
197 Douglas Thrift 142 {
198     if (pages[index] == page.getAddress())
199     {
200     Matcher matcher;
201    
202     if (pages[index] == page)
203     {
204     page.setChildren(pages[index].getChildren());
205    
206     pages[index] = page;
207    
208 douglas 425 std::cout << "Updated: " << page.getUrl() << '\n';
209 Douglas Thrift 189
210 Douglas Thrift 142 return true;
211     }
212     else if (matcher('^' + pages[index].getPath()) == page)
213     {
214     page.setChildOf(matcher[0]);
215    
216     if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
217     == page)
218     {
219     page.setTitle(matcher[1]);
220     }
221    
222     return newIndex(pages[index].getChildren(), page);
223     }
224     }
225     }
226    
227     return false;
228     }
229    
230 douglas 425 void SiteMapper::newMap(const std::string& siteMap)
231 Douglas Thrift 133 {
232 douglas 425 std::ofstream fout(siteMap.c_str());
233 Douglas Thrift 144
234     fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
235     << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
236     << "\"?>\n"
237     << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
238 Douglas Thrift 153 << "<!--" << comment.str() << "-->\n"
239 Douglas Thrift 144 << "<page>\n"
240     << "\t<title>Sitemap</title>\n"
241     << "\t<section>\n"
242     << "\t\t<list>\n";
243    
244 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
245 Douglas Thrift 140 {
246 Douglas Thrift 143 if (newPages.find(pages[index].getAddress()) != newPages.end())
247     {
248     newMap(pages[index].getChildren(), pages[index].getPath(),
249     newPages.find(pages[index].getAddress())->second);
250     }
251    
252 Douglas Thrift 144 fout << pages[index](3) << '\n';
253 Douglas Thrift 140 }
254 Douglas Thrift 144
255     fout << "\t\t</list>\n"
256     << "\t</section>\n"
257     << "</page>\n";
258    
259     fout.close();
260 Douglas Thrift 133 }
261 Douglas Thrift 143
262 douglas 425 void SiteMapper::newMap(std::vector<Page>& pages, const std::string& childOf, std::multimap<std::string, Page>& newPages)
263 Douglas Thrift 143 {
264 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
265 Douglas Thrift 143 {
266     newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
267     }
268    
269 douglas 425 for (std::multimap<std::string, Page>::iterator itor(newPages.lower_bound(childOf)); itor != newPages.upper_bound(childOf); itor++)
270 Douglas Thrift 143 {
271 douglas 425 std::cout << "Added: " << itor->second.getUrl() << '\n';
272 Douglas Thrift 189
273 Douglas Thrift 143 pages.push_back(itor->second);
274     }
275    
276     newPages.erase(childOf);
277     }

Properties

Name Value
svn:eol-style native
svn:keywords Id