ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 142
Committed: 2004-03-31T20:53:28-08:00 (21 years, 2 months ago) by Douglas Thrift
File size: 5075 byte(s)
Log Message:
Almost working right!

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include "SiteMapper.hpp"
8 #include "Matcher.hpp"
9 #include "Page.hpp"
10
11 string program;
12 bool debug = false;
13
14 int main(int argc, char* argv[])
15 {
16 program = argv[0];
17
18 string siteIndex, siteMap;
19
20 for (int index = 1; index < argc; index++)
21 {
22 string arg(argv[index]);
23 Matcher matcher;
24
25 if (arg == matcher("^-index=(.*)$"))
26 {
27 siteIndex = matcher[1];
28 }
29 else if (arg == matcher("^-map=(.*)$"))
30 {
31 siteMap = matcher[1];
32 }
33 else if (arg == "-D")
34 {
35 if (!debug) debug = true;
36 }
37 }
38
39 if (siteIndex != "" && siteMap != "")
40 {
41 XMLPlatformUtils::Initialize();
42 XPathEvaluator::initialize();
43
44 SiteMapper mapper(siteIndex, siteMap);
45
46 XPathEvaluator::terminate();
47 XMLPlatformUtils::Terminate();
48 }
49 else
50 {
51 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
52 }
53
54 return 0;
55 }
56
57 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
58 {
59 oldMap(siteMap);
60 newIndex(siteIndex);
61 newMap(siteMap);
62 }
63
64 void SiteMapper::oldMap(const string& siteMap)
65 {
66 support.setParserLiaison(&liaison);
67
68 XalanDOMString file(siteMap.c_str());
69 LocalFileInputSource source(file.c_str());
70
71 XalanDocument* document = liaison.parseXMLStream(source);
72
73 if (document == 0) return;
74
75 XalanNode* list = evaluator.selectSingleNode(support, document,
76 XalanDOMString("/page/section/list").c_str());
77
78 if (list == 0) return;
79
80 item = evaluator.createXPath(XalanDOMString("item").c_str());
81 address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
82 link = evaluator.createXPath(XalanDOMString("link").c_str());
83 list_ = evaluator.createXPath(XalanDOMString("list").c_str());
84
85 oldMap(pages, list);
86
87 evaluator.destroyXPath(item);
88 evaluator.destroyXPath(address);
89 evaluator.destroyXPath(link);
90 evaluator.destroyXPath(list_);
91 }
92
93 void SiteMapper::oldMap(vector<Page>& pages, XalanNode* list_)
94 {
95 NodeRefList nodes = evaluator.selectNodeList(support, list_, *item);
96
97 for (int index = 0; index < nodes.getLength(); index++)
98 {
99 XalanNode* node = nodes.item(index);
100 ostringstream url, title;
101
102 url << evaluator.evaluate(support, node, *address)->str();
103 title << evaluator.evaluate(support, node, *link)->str();
104
105 Page page(url.str(), title.str());
106 XalanNode* list = evaluator.selectSingleNode(support, node,
107 *(this->list_));
108
109 if (list != 0) oldMap(page.getChildren(), list);
110
111 pages.push_back(page);
112 }
113 }
114
115 void SiteMapper::newIndex(const string& siteIndex)
116 {
117 XalanDOMString file(siteIndex.c_str());
118 LocalFileInputSource source(file.c_str());
119
120 XalanDocument* document = liaison.parseXMLStream(source);
121
122 if (document == 0) return;
123
124 address = evaluator.createXPath(XalanDOMString("address").c_str());
125 port = evaluator.createXPath(XalanDOMString("port").c_str());
126 path = evaluator.createXPath(XalanDOMString("path").c_str());
127 title = evaluator.createXPath(XalanDOMString("title").c_str());
128
129 NodeRefList nodes = evaluator.selectNodeList(support, document,
130 XalanDOMString("/index/page").c_str());
131
132 for (int index = 0; index < nodes.getLength(); index++)
133 {
134 XalanNode* node = nodes.item(index);
135 ostringstream address;
136
137 address << evaluator.evaluate(support, node, *(this->address))->str();
138
139 double port = evaluator.evaluate(support, node, *(this->port))->num();
140
141 if (port >= 0 && port <= 65535)
142 {
143 address << ':' << int(port);
144 }
145
146 ostringstream path, title;
147
148 path << evaluator.evaluate(support, node, *(this->path))->str();
149 title << evaluator.evaluate(support, node, *(this->title))->str();
150
151 Page page(address.str(), path.str(), title.str());
152 Matcher matcher;
153
154 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
155 + "glas\\sThrift's\\sBlog:\\s(.+)$"))
156 {
157 if (Matcher("^\\w+\\s\\d\\d\\d\\d\\sArchives$") == matcher[1])
158 {
159 page.setTitle(matcher[1]);
160
161 if (newIndex(pages, page)) continue;
162 }
163 else continue;
164 }
165 else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
166 {
167 page.setTitle(matcher[1]);
168
169 if (newIndex(pages, page)) continue;
170 }
171 else continue;
172
173 cerr << page.getTitle() << ' ' << page.getChildOf() << '\n';
174 // newPages.insert(page);
175 }
176
177 evaluator.destroyXPath(address);
178 evaluator.destroyXPath(port);
179 evaluator.destroyXPath(path);
180 evaluator.destroyXPath(title);
181 }
182
183 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
184 {
185 for (unsigned index = 0; index < pages.size(); index++)
186 {
187 if (pages[index] == page.getAddress())
188 {
189 Matcher matcher;
190
191 if (pages[index] == page)
192 {
193 page.setChildren(pages[index].getChildren());
194
195 pages[index] = page;
196
197 return true;
198 }
199 else if (matcher('^' + pages[index].getPath()) == page)
200 {
201 page.setChildOf(matcher[0]);
202
203 if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
204 == page)
205 {
206 page.setTitle(matcher[1]);
207 }
208
209 return newIndex(pages[index].getChildren(), page);
210 }
211 }
212 }
213
214 cerr << "Here!\n";
215
216 return false;
217 }
218
219 void SiteMapper::newMap(const string& siteMap)
220 {
221 //
222
223 for (unsigned index = 0; index < pages.size(); index++)
224 {
225 cout << pages[index] << '\n';
226 }
227 }

Properties

Name Value
svn:eol-style native
svn:keywords Id