ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 249
Committed: 2004-09-11T23:21:11-07:00 (20 years, 9 months ago) by Douglas Thrift
File size: 4993 byte(s)
Log Message:
Worky, worky!

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7     #include "SiteMapper.hpp"
8     #include "Matcher.hpp"
9 Douglas Thrift 128 #include "Page.hpp"
10 Douglas Thrift 126
11 Douglas Thrift 249 #include <menes-api/exename.hpp>
12     #include <menes-app/application.hpp>
13    
14     #include <cassert>
15    
16 Douglas Thrift 128 string program;
17 Douglas Thrift 152 bool debug(false);
18 Douglas Thrift 128
19 Douglas Thrift 249 struct SiteMapperCommand : public app::Application
20 Douglas Thrift 126 {
21 Douglas Thrift 249 virtual int Run(const app::ArgumentList& args)
22     {
23     program = api::GetExecutableName();
24 Douglas Thrift 132
25 Douglas Thrift 249 string siteIndex, siteMap;
26 Douglas Thrift 126
27 Douglas Thrift 249 for (size_t index(0); index < args.GetSize(); index++)
28     {
29     string arg(args[index]);
30     Matcher matcher;
31 Douglas Thrift 126
32 Douglas Thrift 249 if (arg == matcher("^-index=(.*)$"))
33     {
34     siteIndex = matcher[1];
35     }
36     else if (arg == matcher("^-map=(.*)$"))
37     {
38     siteMap = matcher[1];
39     }
40     else if (arg == "-D")
41     {
42     if (!debug) debug = true;
43     }
44 Douglas Thrift 128 }
45 Douglas Thrift 249
46     if (!siteIndex.empty() && !siteMap.empty())
47 Douglas Thrift 128 {
48 Douglas Thrift 249 SiteMapper mapper(siteIndex, siteMap);
49 Douglas Thrift 128 }
50 Douglas Thrift 249 else
51 Douglas Thrift 133 {
52 Douglas Thrift 249 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
53 Douglas Thrift 133 }
54 Douglas Thrift 126
55 Douglas Thrift 249 return 0;
56 Douglas Thrift 128 }
57 Douglas Thrift 249 } mapper;
58 Douglas Thrift 129
59 Douglas Thrift 126 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
60     {
61 Douglas Thrift 133 oldMap(siteMap);
62 Douglas Thrift 142 newIndex(siteIndex);
63 Douglas Thrift 134 newMap(siteMap);
64 Douglas Thrift 133 }
65    
66     void SiteMapper::oldMap(const string& siteMap)
67     {
68 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
69     ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
70 Douglas Thrift 133
71 Douglas Thrift 237 comment = ext::String(*document/"comment()");
72 Douglas Thrift 133
73 Douglas Thrift 249 if (debug) cerr << "comment = " << comment << '\n';
74    
75     assert(comment == " Cheese! ");
76    
77 Douglas Thrift 137 oldMap(pages, list);
78 Douglas Thrift 133 }
79    
80 Douglas Thrift 154 void SiteMapper::oldMap(vector<Page>& pages, xml::Node* list)
81 Douglas Thrift 135 {
82 Douglas Thrift 154 xml::NodeSet nodes(*list/"item");
83 Douglas Thrift 135
84 Douglas Thrift 154 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
85     ++node)
86 Douglas Thrift 135 {
87 Douglas Thrift 237 string url(ext::String(**node/"link"/"@address")),
88     title(ext::String(**node/"link"));
89 Douglas Thrift 154 Page page(url, title);
90     ext::Handle<xml::Node> list(**node/"list");
91 Douglas Thrift 141
92 Douglas Thrift 154 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
93 Douglas Thrift 135
94     pages.push_back(page);
95     }
96     }
97    
98 Douglas Thrift 142 void SiteMapper::newIndex(const string& siteIndex)
99 Douglas Thrift 133 {
100 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
101     xml::NodeSet nodes(*document/"index"/"page");
102 Douglas Thrift 138
103 Douglas Thrift 154 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
104     ++node)
105 Douglas Thrift 138 {
106 Douglas Thrift 249 string address(ext::String(**node/"address")),
107     port(ext::String(**node/"port"));
108 Douglas Thrift 139
109 Douglas Thrift 154 if (!port.empty())
110 Douglas Thrift 139 {
111 Douglas Thrift 154 address += ':' + port;
112 Douglas Thrift 139 }
113    
114 Douglas Thrift 237 string path(ext::String(**node/"path")),
115     title(ext::String(**node/"title"));
116 Douglas Thrift 154 Page page(address, path, title);
117 Douglas Thrift 142 Matcher matcher;
118 Douglas Thrift 140
119 Douglas Thrift 142 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
120     + "glas\\sThrift's\\sBlog:\\s(.+)$"))
121 Douglas Thrift 140 {
122 Douglas Thrift 156 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
123 Douglas Thrift 142 {
124     page.setTitle(matcher[1]);
125    
126     if (newIndex(pages, page)) continue;
127     }
128     else continue;
129     }
130     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
131     {
132 Douglas Thrift 140 page.setTitle(matcher[1]);
133    
134 Douglas Thrift 142 if (newIndex(pages, page)) continue;
135 Douglas Thrift 140 }
136 Douglas Thrift 142 else continue;
137    
138 Douglas Thrift 143 multimap<string, Page> items;
139    
140     newPages.insert(pair<string, multimap<string, Page>
141     >(page.getAddress(), items)).first->second.insert(pair<string,
142     Page>(page.getChildOf(), page));
143 Douglas Thrift 138 }
144 Douglas Thrift 126 }
145 Douglas Thrift 133
146 Douglas Thrift 142 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
147     {
148 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
149 Douglas Thrift 142 {
150     if (pages[index] == page.getAddress())
151     {
152     Matcher matcher;
153    
154     if (pages[index] == page)
155     {
156     page.setChildren(pages[index].getChildren());
157    
158     pages[index] = page;
159    
160 Douglas Thrift 156 cout << "Updated: " << page.getUrl() << '\n';
161    
162 Douglas Thrift 142 return true;
163     }
164     else if (matcher('^' + pages[index].getPath()) == page)
165     {
166     page.setChildOf(matcher[0]);
167    
168     if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
169     == page)
170     {
171     page.setTitle(matcher[1]);
172     }
173    
174     return newIndex(pages[index].getChildren(), page);
175     }
176     }
177     }
178    
179     return false;
180     }
181    
182 Douglas Thrift 133 void SiteMapper::newMap(const string& siteMap)
183     {
184 Douglas Thrift 144 ofstream fout(siteMap.c_str());
185    
186     fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
187     << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
188     << "\"?>\n"
189     << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
190 Douglas Thrift 154 << "<!--" << comment << "-->\n"
191 Douglas Thrift 144 << "<page>\n"
192     << "\t<title>Sitemap</title>\n"
193     << "\t<section>\n"
194     << "\t\t<list>\n";
195    
196 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
197 Douglas Thrift 140 {
198 Douglas Thrift 143 if (newPages.find(pages[index].getAddress()) != newPages.end())
199     {
200     newMap(pages[index].getChildren(), pages[index].getPath(),
201     newPages.find(pages[index].getAddress())->second);
202     }
203    
204 Douglas Thrift 144 fout << pages[index](3) << '\n';
205 Douglas Thrift 140 }
206 Douglas Thrift 144
207     fout << "\t\t</list>\n"
208     << "\t</section>\n"
209     << "</page>\n";
210    
211     fout.close();
212 Douglas Thrift 133 }
213 Douglas Thrift 143
214     void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
215     multimap<string, Page>& newPages)
216     {
217 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
218 Douglas Thrift 143 {
219     newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
220     }
221    
222 Douglas Thrift 153 for (multimap<string, Page>::iterator itor(newPages.lower_bound(childOf));
223 Douglas Thrift 143 itor != newPages.upper_bound(childOf); itor++)
224     {
225 Douglas Thrift 156 cout << "Added: " << itor->second.getUrl() << '\n';
226    
227 Douglas Thrift 143 pages.push_back(itor->second);
228     }
229    
230     newPages.erase(childOf);
231     }

Properties

Name Value
svn:eol-style native
svn:keywords Id