ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 259
Committed: 2004-10-01T17:19:10-07:00 (20 years, 8 months ago) by Douglas Thrift
File size: 4949 byte(s)
Log Message:
Worky, worky!

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7     #include "SiteMapper.hpp"
8     #include "Matcher.hpp"
9 Douglas Thrift 128 #include "Page.hpp"
10 Douglas Thrift 126
11 Douglas Thrift 249 #include <menes-api/exename.hpp>
12     #include <menes-app/application.hpp>
13    
14 Douglas Thrift 128 string program;
15 Douglas Thrift 152 bool debug(false);
16 Douglas Thrift 128
17 Douglas Thrift 249 struct SiteMapperCommand : public app::Application
18 Douglas Thrift 126 {
19 Douglas Thrift 249 virtual int Run(const app::ArgumentList& args)
20     {
21 Douglas Thrift 259 program = api::GetExecutablePath().GetName();
22 Douglas Thrift 132
23 Douglas Thrift 249 string siteIndex, siteMap;
24 Douglas Thrift 126
25 Douglas Thrift 249 for (size_t index(0); index < args.GetSize(); index++)
26     {
27     string arg(args[index]);
28     Matcher matcher;
29 Douglas Thrift 126
30 Douglas Thrift 249 if (arg == matcher("^-index=(.*)$"))
31     {
32     siteIndex = matcher[1];
33     }
34     else if (arg == matcher("^-map=(.*)$"))
35     {
36     siteMap = matcher[1];
37     }
38     else if (arg == "-D")
39     {
40     if (!debug) debug = true;
41     }
42 Douglas Thrift 128 }
43 Douglas Thrift 249
44     if (!siteIndex.empty() && !siteMap.empty())
45 Douglas Thrift 128 {
46 Douglas Thrift 249 SiteMapper mapper(siteIndex, siteMap);
47 Douglas Thrift 128 }
48 Douglas Thrift 249 else
49 Douglas Thrift 133 {
50 Douglas Thrift 249 cout << "Usage: " << program << " -index=index -map=map [-D]\n";
51 Douglas Thrift 133 }
52 Douglas Thrift 126
53 Douglas Thrift 249 return 0;
54 Douglas Thrift 128 }
55 Douglas Thrift 249 } mapper;
56 Douglas Thrift 129
57 Douglas Thrift 126 SiteMapper::SiteMapper(const string& siteIndex, const string& siteMap)
58     {
59 Douglas Thrift 133 oldMap(siteMap);
60 Douglas Thrift 142 newIndex(siteIndex);
61 Douglas Thrift 134 newMap(siteMap);
62 Douglas Thrift 133 }
63    
64     void SiteMapper::oldMap(const string& siteMap)
65     {
66 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
67     ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
68 Douglas Thrift 133
69 Douglas Thrift 237 comment = ext::String(*document/"comment()");
70 Douglas Thrift 133
71 Douglas Thrift 249 if (debug) cerr << "comment = " << comment << '\n';
72    
73 Douglas Thrift 137 oldMap(pages, list);
74 Douglas Thrift 133 }
75    
76 Douglas Thrift 154 void SiteMapper::oldMap(vector<Page>& pages, xml::Node* list)
77 Douglas Thrift 135 {
78 Douglas Thrift 154 xml::NodeSet nodes(*list/"item");
79 Douglas Thrift 135
80 Douglas Thrift 154 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
81     ++node)
82 Douglas Thrift 135 {
83 Douglas Thrift 237 string url(ext::String(**node/"link"/"@address")),
84     title(ext::String(**node/"link"));
85 Douglas Thrift 154 Page page(url, title);
86     ext::Handle<xml::Node> list(**node/"list");
87 Douglas Thrift 141
88 Douglas Thrift 154 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
89 Douglas Thrift 135
90     pages.push_back(page);
91     }
92     }
93    
94 Douglas Thrift 142 void SiteMapper::newIndex(const string& siteIndex)
95 Douglas Thrift 133 {
96 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
97     xml::NodeSet nodes(*document/"index"/"page");
98 Douglas Thrift 138
99 Douglas Thrift 154 for (xml::NodeSet::Iterator node(nodes.Begin()); node != nodes.End();
100     ++node)
101 Douglas Thrift 138 {
102 Douglas Thrift 249 string address(ext::String(**node/"address")),
103     port(ext::String(**node/"port"));
104 Douglas Thrift 139
105 Douglas Thrift 154 if (!port.empty())
106 Douglas Thrift 139 {
107 Douglas Thrift 154 address += ':' + port;
108 Douglas Thrift 139 }
109    
110 Douglas Thrift 237 string path(ext::String(**node/"path")),
111     title(ext::String(**node/"title"));
112 Douglas Thrift 154 Page page(address, path, title);
113 Douglas Thrift 142 Matcher matcher;
114 Douglas Thrift 140
115 Douglas Thrift 142 if (page == matcher(string("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDou")
116     + "glas\\sThrift's\\sBlog:\\s(.+)$"))
117 Douglas Thrift 140 {
118 Douglas Thrift 156 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
119 Douglas Thrift 142 {
120     page.setTitle(matcher[1]);
121    
122     if (newIndex(pages, page)) continue;
123     }
124     else continue;
125     }
126     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
127     {
128 Douglas Thrift 140 page.setTitle(matcher[1]);
129    
130 Douglas Thrift 142 if (newIndex(pages, page)) continue;
131 Douglas Thrift 140 }
132 Douglas Thrift 142 else continue;
133    
134 Douglas Thrift 143 multimap<string, Page> items;
135    
136     newPages.insert(pair<string, multimap<string, Page>
137     >(page.getAddress(), items)).first->second.insert(pair<string,
138     Page>(page.getChildOf(), page));
139 Douglas Thrift 138 }
140 Douglas Thrift 126 }
141 Douglas Thrift 133
142 Douglas Thrift 142 bool SiteMapper::newIndex(vector<Page>& pages, Page& page)
143     {
144 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
145 Douglas Thrift 142 {
146     if (pages[index] == page.getAddress())
147     {
148     Matcher matcher;
149    
150     if (pages[index] == page)
151     {
152     page.setChildren(pages[index].getChildren());
153    
154     pages[index] = page;
155    
156 Douglas Thrift 156 cout << "Updated: " << page.getUrl() << '\n';
157    
158 Douglas Thrift 142 return true;
159     }
160     else if (matcher('^' + pages[index].getPath()) == page)
161     {
162     page.setChildOf(matcher[0]);
163    
164     if (matcher('^' + pages[index].getTitle() + "\\s\\|\\s(.+)$")
165     == page)
166     {
167     page.setTitle(matcher[1]);
168     }
169    
170     return newIndex(pages[index].getChildren(), page);
171     }
172     }
173     }
174    
175     return false;
176     }
177    
178 Douglas Thrift 133 void SiteMapper::newMap(const string& siteMap)
179     {
180 Douglas Thrift 144 ofstream fout(siteMap.c_str());
181    
182     fout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
183     << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl"
184     << "\"?>\n"
185     << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">\n"
186 Douglas Thrift 154 << "<!--" << comment << "-->\n"
187 Douglas Thrift 144 << "<page>\n"
188     << "\t<title>Sitemap</title>\n"
189     << "\t<section>\n"
190     << "\t\t<list>\n";
191    
192 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
193 Douglas Thrift 140 {
194 Douglas Thrift 143 if (newPages.find(pages[index].getAddress()) != newPages.end())
195     {
196     newMap(pages[index].getChildren(), pages[index].getPath(),
197     newPages.find(pages[index].getAddress())->second);
198     }
199    
200 Douglas Thrift 144 fout << pages[index](3) << '\n';
201 Douglas Thrift 140 }
202 Douglas Thrift 144
203     fout << "\t\t</list>\n"
204     << "\t</section>\n"
205     << "</page>\n";
206    
207     fout.close();
208 Douglas Thrift 133 }
209 Douglas Thrift 143
210     void SiteMapper::newMap(vector<Page>& pages, const string& childOf,
211     multimap<string, Page>& newPages)
212     {
213 Douglas Thrift 153 for (unsigned index(0); index < pages.size(); ++index)
214 Douglas Thrift 143 {
215     newMap(pages[index].getChildren(), pages[index].getPath(), newPages);
216     }
217    
218 Douglas Thrift 153 for (multimap<string, Page>::iterator itor(newPages.lower_bound(childOf));
219 Douglas Thrift 143 itor != newPages.upper_bound(childOf); itor++)
220     {
221 Douglas Thrift 156 cout << "Added: " << itor->second.getUrl() << '\n';
222    
223 Douglas Thrift 143 pages.push_back(itor->second);
224     }
225    
226     newPages.erase(childOf);
227     }

Properties

Name Value
svn:eol-style native
svn:keywords Id