ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 142
Committed: 2004-03-31T20:53:28-08:00 (21 years, 2 months ago) by Douglas Thrift
File size: 5075 byte(s)
Log Message:
Almost working right!

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7     #include "SiteMapper.hpp"
8     #include "Matcher.hpp"
9 Douglas Thrift 128 #include "Page.hpp"
10 Douglas Thrift 126
11 Douglas Thrift 128 string program;
12     bool debug = false;
13    
14 Douglas Thrift 126 int main(int argc, char* argv[])
15     {
16 Douglas Thrift 128 program = argv[0];
17 Douglas Thrift 132
18 Douglas Thrift 126 string siteIndex, siteMap;
19    
20 Douglas Thrift 132 for (int index = 1; index < argc; index++)
21 Douglas Thrift 126 {
22 Douglas Thrift 128 string arg(argv[index]);
23     Matcher matcher;
24 Douglas Thrift 126
25 Douglas Thrift 128 if (arg == matcher("^-index=(.*)$"))
26     {
27     siteIndex = matcher[1];
28     }
29     else if (arg == matcher("^-map=(.*)$"))
30     {
31     siteMap = matcher[1];
32     }
33 Douglas Thrift 133 else if (arg == "-D")
34     {
35     if (!debug) debug = true;
36     }
37 Douglas Thrift 126 }
38    
39 Douglas Thrift 128 if (siteIndex != "" && siteMap != "")
40 Douglas Thrift 132 {
41 Douglas Thrift 128 XMLPlatformUtils::Initialize();
42     XPathEvaluator::initialize();
43 Douglas Thrift 126
44 Douglas Thrift 128 SiteMapper mapper(siteIndex, siteMap);
45    
46     XPathEvaluator::terminate();
47     XMLPlatformUtils::Terminate();
48     }
49     else
50     {
51 Douglas Thrift 133 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
52 Douglas Thrift 128 }
53 Douglas Thrift 129
54 Douglas Thrift 126 return 0;
55     }
56    
57     SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
58     {
59 Douglas Thrift 133 oldMap(siteMap);
60 Douglas Thrift 142 newIndex(siteIndex);
61 Douglas Thrift 134 newMap(siteMap);
62 Douglas Thrift 133 }
63    
64     void SiteMapper::oldMap(const string& siteMap)
65     {
66     support.setParserLiaison(&liaison);
67    
68 Douglas Thrift 134 XalanDOMString file(siteMap.c_str());
69     LocalFileInputSource source(file.c_str());
70 Douglas Thrift 133
71 Douglas Thrift 134 XalanDocument* document = liaison.parseXMLStream(source);
72 Douglas Thrift 133
73 Douglas Thrift 134 if (document == 0) return;
74 Douglas Thrift 133
75 Douglas Thrift 134 XalanNode* list = evaluator.selectSingleNode(support, document,
76     XalanDOMString("/page/section/list").c_str());
77 Douglas Thrift 133
78 Douglas Thrift 134 if (list == 0) return;
79 Douglas Thrift 135
80 Douglas Thrift 137 item = evaluator.createXPath(XalanDOMString("item").c_str());
81     address = evaluator.createXPath(XalanDOMString("link/@address").c_str());
82     link = evaluator.createXPath(XalanDOMString("link").c_str());
83 Douglas Thrift 140 list_ = evaluator.createXPath(XalanDOMString("list").c_str());
84 Douglas Thrift 137
85     oldMap(pages, list);
86 Douglas Thrift 138
87     evaluator.destroyXPath(item);
88     evaluator.destroyXPath(address);
89     evaluator.destroyXPath(link);
90 Douglas Thrift 140 evaluator.destroyXPath(list_);
91 Douglas Thrift 133 }
92    
93 Douglas Thrift 141 void SiteMapper::oldMap(vector<Page>& pages, XalanNode* list_)
94 Douglas Thrift 135 {
95 Douglas Thrift 141 NodeRefList nodes = evaluator.selectNodeList(support, list_, *item);
96 Douglas Thrift 135
97     for (int index = 0; index < nodes.getLength(); index++)
98     {
99     XalanNode* node = nodes.item(index);
100 Douglas Thrift 138 ostringstream url, title;
101 Douglas Thrift 141
102 Douglas Thrift 138 url << evaluator.evaluate(support, node, *address)->str();
103     title << evaluator.evaluate(support, node, *link)->str();
104 Douglas Thrift 135
105     Page page(url.str(), title.str());
106 Douglas Thrift 141 XalanNode* list = evaluator.selectSingleNode(support, node,
107     *(this->list_));
108 Douglas Thrift 135
109 Douglas Thrift 137 if (list != 0) oldMap(page.getChildren(), list);
110 Douglas Thrift 135
111     pages.push_back(page);
112     }
113     }
114    
115 Douglas Thrift 142 void SiteMapper::newIndex(const string& siteIndex)
116 Douglas Thrift 133 {
117 Douglas Thrift 138 XalanDOMString file(siteIndex.c_str());
118     LocalFileInputSource source(file.c_str());
119    
120     XalanDocument* document = liaison.parseXMLStream(source);
121    
122     if (document == 0) return;
123    
124     address = evaluator.createXPath(XalanDOMString("address").c_str());
125 Douglas Thrift 139 port = evaluator.createXPath(XalanDOMString("port").c_str());
126 Douglas Thrift 138 path = evaluator.createXPath(XalanDOMString("path").c_str());
127     title = evaluator.createXPath(XalanDOMString("title").c_str());
128 Douglas Thrift 141
129 Douglas Thrift 138 NodeRefList nodes = evaluator.selectNodeList(support, document,
130     XalanDOMString("/index/page").c_str());
131    
132     for (int index = 0; index < nodes.getLength(); index++)
133     {
134     XalanNode* node = nodes.item(index);
135 Douglas Thrift 139 ostringstream address;
136    
137     address << evaluator.evaluate(support, node, *(this->address))->str();
138    
139     double port = evaluator.evaluate(support, node, *(this->port))->num();
140    
141     if (port >= 0 && port <= 65535)
142     {
143     address << ':' << int(port);
144     }
145    
146 Douglas Thrift 140 ostringstream path, title;
147    
148     path << evaluator.evaluate(support, node, *(this->path))->str();
149     title << evaluator.evaluate(support, node, *(this->title))->str();
150    
151     Page page(address.str(), path.str(), title.str());
152 Douglas Thrift 142 Matcher matcher;
153 Douglas Thrift 140
154 Douglas Thrift 142 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
155     + "glas\\sThrift's\\sBlog:\\s(.+)$"))
156 Douglas Thrift 140 {
157 Douglas Thrift 142 if (Matcher("^\\w+\\s\\d\\d\\d\\d\\sArchives$") == matcher[1])
158     {
159     page.setTitle(matcher[1]);
160    
161     if (newIndex(pages, page)) continue;
162     }
163     else continue;
164     }
165     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
166     {
167 Douglas Thrift 140 page.setTitle(matcher[1]);
168    
169 Douglas Thrift 142 if (newIndex(pages, page)) continue;
170 Douglas Thrift 140 }
171 Douglas Thrift 142 else continue;
172    
173     cerr << page.getTitle() << ' ' << page.getChildOf() << '\n';
174     // newPages.insert(page);
175 Douglas Thrift 138 }
176    
177     evaluator.destroyXPath(address);
178 Douglas Thrift 139 evaluator.destroyXPath(port);
179 Douglas Thrift 138 evaluator.destroyXPath(path);
180     evaluator.destroyXPath(title);
181 Douglas Thrift 126 }
182 Douglas Thrift 133
183 Douglas Thrift 142 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
184     {
185     for (unsigned index = 0; index < pages.size(); index++)
186     {
187     if (pages[index] == page.getAddress())
188     {
189     Matcher matcher;
190    
191     if (pages[index] == page)
192     {
193     page.setChildren(pages[index].getChildren());
194    
195     pages[index] = page;
196    
197     return true;
198     }
199     else if (matcher('^' + pages[index].getPath()) == page)
200     {
201     page.setChildOf(matcher[0]);
202    
203     if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
204     == page)
205     {
206     page.setTitle(matcher[1]);
207     }
208    
209     return newIndex(pages[index].getChildren(), page);
210     }
211     }
212     }
213    
214     cerr << "Here!\n";
215    
216     return false;
217     }
218    
219 Douglas Thrift 133 void SiteMapper::newMap(const string& siteMap)
220     {
221     //
222 Douglas Thrift 136
223 Douglas Thrift 141 for (unsigned index = 0; index < pages.size(); index++)
224 Douglas Thrift 140 {
225 Douglas Thrift 141 cout << pages[index] << '\n';
226 Douglas Thrift 140 }
227 Douglas Thrift 133 }

Properties

Name Value
svn:eol-style native
svn:keywords Id