ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapperOld/SiteMapper.cpp
Revision: 188
Committed: 2004-08-15T01:09:47-07:00 (20 years, 10 months ago) by Douglas Thrift
File size: 6377 byte(s)
Log Message:
Branch old Xerces/Xalan SiteMapper, so we can use it when menes fails.

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7 Douglas Thrift 143 #ifdef _WIN32
8     #pragma warning(disable:4503)
9     #endif
10    
11 Douglas Thrift 126 #include "SiteMapper.hpp"
12     #include "Matcher.hpp"
13 Douglas Thrift 128 #include "Page.hpp"
14 Douglas Thrift 126
15 Douglas Thrift 128 string program;
16 Douglas Thrift 152 bool debug(false);
17 Douglas Thrift 128
18 Douglas Thrift 126 int main(int argc, char* argv[])
19     {
20 Douglas Thrift 128 program = argv[0];
21 Douglas Thrift 132
22 Douglas Thrift 126 string siteIndex, siteMap;
23    
24 Douglas Thrift 152 for (int index(1); index < argc; index++)
25 Douglas Thrift 126 {
26 Douglas Thrift 128 string arg(argv[index]);
27     Matcher matcher;
28 Douglas Thrift 126
29 Douglas Thrift 128 if (arg == matcher("^-index=(.*)$"))
30     {
31     siteIndex = matcher[1];
32     }
33     else if (arg == matcher("^-map=(.*)$"))
34     {
35     siteMap = matcher[1];
36     }
37 Douglas Thrift 133 else if (arg == "-D")
38     {
39     if (!debug) debug = true;
40     }
41 Douglas Thrift 126 }
42    
43 Douglas Thrift 128 if (siteIndex != "" && siteMap != "")
44 Douglas Thrift 132 {
45 Douglas Thrift 128 XMLPlatformUtils::Initialize();
46     XPathEvaluator::initialize();
47 Douglas Thrift 126
48 Douglas Thrift 128 SiteMapper mapper(siteIndex, siteMap);
49    
50     XPathEvaluator::terminate();
51     XMLPlatformUtils::Terminate();
52     }
53     else
54     {
55 Douglas Thrift 133 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
56 Douglas Thrift 128 }
57 Douglas Thrift 129
58 Douglas Thrift 126 return 0;
59     }
60    
61     SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
62     {
63 Douglas Thrift 133 oldMap(siteMap);
64 Douglas Thrift 142 newIndex(siteIndex);
65 Douglas Thrift 134 newMap(siteMap);
66 Douglas Thrift 133 }
67    
68     void SiteMapper::oldMap(const string& siteMap)
69     {
70     support.setParserLiaison(&liaison);
71    
72 Douglas Thrift 134 XalanDOMString file(siteMap.c_str());
73     LocalFileInputSource source(file.c_str());
74 Douglas Thrift 133
75 Douglas Thrift 134 XalanDocument* document = liaison.parseXMLStream(source);
76 Douglas Thrift 133
77 Douglas Thrift 134 if (document == 0) return;
78 Douglas Thrift 133
79 Douglas Thrift 134 XalanNode* list = evaluator.selectSingleNode(support, document,
80     XalanDOMString("/page/section/list").c_str());
81 Douglas Thrift 133
82 Douglas Thrift 134 if (list == 0) return;
83 Douglas Thrift 135
84 Douglas Thrift 153 comment << evaluator.evaluate(support, document,
85     XalanDOMString("comment()").c_str())->str();
86    
87 Douglas Thrift 137 item = evaluator.createXPath(XalanDOMString("item").c_str());
88     address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
89     link = evaluator.createXPath(XalanDOMString("link").c_str());
90 Douglas Thrift 143 this->list = evaluator.createXPath(XalanDOMString("list").c_str());
91 Douglas Thrift 137
92     oldMap(pages, list);
93 Douglas Thrift 138
94     evaluator.destroyXPath(item);
95     evaluator.destroyXPath(address);
96     evaluator.destroyXPath(link);
97 Douglas Thrift 143 evaluator.destroyXPath(this->list);
98 Douglas Thrift 133 }
99    
100 Douglas Thrift 143 void SiteMapper::oldMap(vector<Page>& pages, XalanNode* list)
101 Douglas Thrift 135 {
102 Douglas Thrift 143 NodeRefList nodes = evaluator.selectNodeList(support, list, *item);
103 Douglas Thrift 135
104 Douglas Thrift 153 for (int index(0); index < nodes.getLength(); ++index)
105 Douglas Thrift 135 {
106     XalanNode* node = nodes.item(index);
107 Douglas Thrift 138 ostringstream url, title;
108 Douglas Thrift 141
109 Douglas Thrift 138 url << evaluator.evaluate(support, node, *address)->str();
110     title << evaluator.evaluate(support, node, *link)->str();
111 Douglas Thrift 135
112     Page page(url.str(), title.str());
113 Douglas Thrift 141 XalanNode* list = evaluator.selectSingleNode(support, node,
114 Douglas Thrift 143 *(this->list));
115 Douglas Thrift 135
116 Douglas Thrift 137 if (list != 0) oldMap(page.getChildren(), list);
117 Douglas Thrift 135
118     pages.push_back(page);
119     }
120     }
121    
122 Douglas Thrift 142 void SiteMapper::newIndex(const string& siteIndex)
123 Douglas Thrift 133 {
124 Douglas Thrift 138 XalanDOMString file(siteIndex.c_str());
125     LocalFileInputSource source(file.c_str());
126    
127     XalanDocument* document = liaison.parseXMLStream(source);
128    
129     if (document == 0) return;
130    
131     address = evaluator.createXPath(XalanDOMString("address").c_str());
132 Douglas Thrift 139 port = evaluator.createXPath(XalanDOMString("port").c_str());
133 Douglas Thrift 138 path = evaluator.createXPath(XalanDOMString("path").c_str());
134     title = evaluator.createXPath(XalanDOMString("title").c_str());
135 Douglas Thrift 141
136 Douglas Thrift 138 NodeRefList nodes = evaluator.selectNodeList(support, document,
137     XalanDOMString("/index/page").c_str());
138    
139 Douglas Thrift 153 for (int index(0); index < nodes.getLength(); ++index)
140 Douglas Thrift 138 {
141     XalanNode* node = nodes.item(index);
142 Douglas Thrift 139 ostringstream address;
143    
144     address << evaluator.evaluate(support, node, *(this->address))->str();
145    
146     double port = evaluator.evaluate(support, node, *(this->port))->num();
147    
148     if (port >= 0 && port <= 65535)
149     {
150     address << ':' << int(port);
151     }
152    
153 Douglas Thrift 140 ostringstream path, title;
154    
155     path << evaluator.evaluate(support, node, *(this->path))->str();
156     title << evaluator.evaluate(support, node, *(this->title))->str();
157    
158     Page page(address.str(), path.str(), title.str());
159 Douglas Thrift 142 Matcher matcher;
160 Douglas Thrift 140
161 Douglas Thrift 142 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
162     + "glas\\sThrift's\\sBlog:\\s(.+)$"))
163 Douglas Thrift 140 {
164 Douglas Thrift 142 if (Matcher("^\\w+\\s\\d\\d\\d\\d\\sArchives$") == matcher[1])
165     {
166     page.setTitle(matcher[1]);
167    
168     if (newIndex(pages, page)) continue;
169     }
170     else continue;
171     }
172     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
173     {
174 Douglas Thrift 140 page.setTitle(matcher[1]);
175    
176 Douglas Thrift 142 if (newIndex(pages, page)) continue;
177 Douglas Thrift 140 }
178 Douglas Thrift 142 else continue;
179    
180 Douglas Thrift 143 multimap<string, Page> items;
181    
182     newPages.insert(pair<string, multimap<string, Page>
183     >(page.getAddress(), items)).first->second.insert(pair<string,
184     Page>(page.getChildOf(), page));
185 Douglas Thrift 138 }
186    
187     evaluator.destroyXPath(address);
188 Douglas Thrift 139 evaluator.destroyXPath(port);
189 Douglas Thrift 138 evaluator.destroyXPath(path);
190     evaluator.destroyXPath(title);
191 Douglas Thrift 126 }
192 Douglas Thrift 133
193 Douglas Thrift 142 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
194     {
195 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
196 Douglas Thrift 142 {
197     if (pages[index] == page.getAddress())
198     {
199     Matcher matcher;
200    
201     if (pages[index] == page)
202     {
203     page.setChildren(pages[index].getChildren());
204    
205     pages[index] = page;
206    
207     return true;
208     }
209     else if (matcher('^' + pages[index].getPath()) == page)
210     {
211     page.setChildOf(matcher[0]);
212    
213     if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
214     == page)
215     {
216     page.setTitle(matcher[1]);
217     }
218    
219     return newIndex(pages[index].getChildren(), page);
220     }
221     }
222     }
223    
224     return false;
225     }
226    
227 Douglas Thrift 133 void SiteMapper::newMap(const string& siteMap)
228     {
229 Douglas Thrift 144 ofstream fout(siteMap.c_str());
230    
231     fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
232     << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
233     << "\"?>\n"
234     << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
235 Douglas Thrift 153 << "<!--" << comment.str() << "-->\n"
236 Douglas Thrift 144 << "<page>\n"
237     << "\t<title>Sitemap</title>\n"
238     << "\t<section>\n"
239     << "\t\t<list>\n";
240    
241 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
242 Douglas Thrift 140 {
243 Douglas Thrift 143 if (newPages.find(pages[index].getAddress()) != newPages.end())
244     {
245     newMap(pages[index].getChildren(), pages[index].getPath(),
246     newPages.find(pages[index].getAddress())->second);
247     }
248    
249 Douglas Thrift 144 fout << pages[index](3) << '\n';
250 Douglas Thrift 140 }
251 Douglas Thrift 144
252     fout << "\t\t</list>\n"
253     << "\t</section>\n"
254     << "</page>\n";
255    
256     fout.close();
257 Douglas Thrift 133 }
258 Douglas Thrift 143
259     void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
260     multimap<string, Page>& newPages)
261     {
262 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
263 Douglas Thrift 143 {
264     newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
265     }
266    
267 Douglas Thrift 153 for (multimap<string, Page>::iterator itor(newPages.lower_bound(childOf));
268 Douglas Thrift 143 itor != newPages.upper_bound(childOf); itor++)
269     {
270     pages.push_back(itor->second);
271     }
272    
273     newPages.erase(childOf);
274     }

Properties

Name Value
svn:eol-style native
svn:keywords Id