ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 303
Committed: 2004-12-12T17:58:03-08:00 (20 years, 6 months ago) by douglas
File size: 4750 byte(s)
Log Message:
Seems to almost work.

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7     #include "SiteMapper.hpp"
8     #include "Matcher.hpp"
9 Douglas Thrift 128 #include "Page.hpp"
10 Douglas Thrift 126
11 Douglas Thrift 249 #include <menes-api/exename.hpp>
12     #include <menes-app/application.hpp>
13    
14 douglas 285 ext::String program;
15 Douglas Thrift 152 bool debug(false);
16 Douglas Thrift 128
17 Douglas Thrift 249 struct SiteMapperCommand : public app::Application
18 Douglas Thrift 126 {
19 Douglas Thrift 249 virtual int Run(const app::ArgumentList& args)
20     {
21 Douglas Thrift 259 program = api::GetExecutablePath().GetName();
22 Douglas Thrift 132
23 douglas 285 ext::String siteIndex, siteMap;
24 Douglas Thrift 126
25 douglas 285 _foreach (app::ArgumentList, arg, args)
26 Douglas Thrift 249 {
27     Matcher matcher;
28 Douglas Thrift 126
29 douglas 285 if (*arg == matcher("^-index=(.*)$"))
30 Douglas Thrift 249 {
31     siteIndex = matcher[1];
32     }
33 douglas 285 else if (*arg == matcher("^-map=(.*)$"))
34 Douglas Thrift 249 {
35     siteMap = matcher[1];
36     }
37 douglas 285 else if (*arg == "-D")
38 Douglas Thrift 249 {
39     if (!debug) debug = true;
40     }
41 Douglas Thrift 128 }
42 Douglas Thrift 249
43 douglas 285 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
44 Douglas Thrift 128 {
45 Douglas Thrift 249 SiteMapper mapper(siteIndex, siteMap);
46 Douglas Thrift 128 }
47 Douglas Thrift 249 else
48 Douglas Thrift 133 {
49 douglas 285 api::Cout << "Usage: " << program << " -index=index -map=map [-D]\n";
50 Douglas Thrift 133 }
51 Douglas Thrift 126
52 Douglas Thrift 249 return 0;
53 Douglas Thrift 128 }
54 Douglas Thrift 249 } mapper;
55 Douglas Thrift 129
56 douglas 285 SiteMapper::SiteMapper(const ext::String& siteIndex, const ext::String& siteMap)
57 Douglas Thrift 126 {
58 Douglas Thrift 133 oldMap(siteMap);
59 Douglas Thrift 142 newIndex(siteIndex);
60 Douglas Thrift 134 newMap(siteMap);
61 Douglas Thrift 133 }
62    
63 douglas 285 void SiteMapper::oldMap(const ext::String& siteMap)
64 Douglas Thrift 133 {
65 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
66     ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
67 Douglas Thrift 133
68 douglas 285 comment = *document/"comment()";
69 Douglas Thrift 133
70 douglas 303 if (debug) api::Cerr << "comment = " << comment << ios::NewLine;
71 Douglas Thrift 249
72 Douglas Thrift 137 oldMap(pages, list);
73 Douglas Thrift 133 }
74    
75 douglas 285 void SiteMapper::oldMap(ext::Vector<Page>& pages, xml::Node* list)
76 Douglas Thrift 135 {
77 Douglas Thrift 154 xml::NodeSet nodes(*list/"item");
78 Douglas Thrift 135
79 douglas 285 _foreach (xml::NodeSet, node, nodes)
80 Douglas Thrift 135 {
81 douglas 285 ext::String url(**node/"link"/"@address"), title(**node/"link");
82 Douglas Thrift 154 Page page(url, title);
83     ext::Handle<xml::Node> list(**node/"list");
84 Douglas Thrift 141
85 Douglas Thrift 154 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
86 Douglas Thrift 135
87 douglas 285 pages.InsertLast(page);
88 Douglas Thrift 135 }
89     }
90    
91 douglas 285 void SiteMapper::newIndex(const ext::String& siteIndex)
92 Douglas Thrift 133 {
93 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
94     xml::NodeSet nodes(*document/"index"/"page");
95 Douglas Thrift 138
96 douglas 285 _foreach (xml::NodeSet, node, nodes)
97 Douglas Thrift 138 {
98 douglas 285 ios::String address(**node/"address");
99     ext::String port(**node/"port");
100 Douglas Thrift 139
101 douglas 285 if (!port.IsEmpty()) address << ":" << port;
102 Douglas Thrift 139
103 douglas 285 ext::String path(**node/"path"), title(**node/"title");
104 Douglas Thrift 154 Page page(address, path, title);
105 Douglas Thrift 142 Matcher matcher;
106 Douglas Thrift 140
107 douglas 285 if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$"))
108 Douglas Thrift 140 {
109 Douglas Thrift 156 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
110 Douglas Thrift 142 {
111     page.setTitle(matcher[1]);
112    
113     if (newIndex(pages, page)) continue;
114     }
115     else continue;
116     }
117     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
118     {
119 Douglas Thrift 140 page.setTitle(matcher[1]);
120    
121 Douglas Thrift 142 if (newIndex(pages, page)) continue;
122 Douglas Thrift 140 }
123 Douglas Thrift 142 else continue;
124    
125 douglas 285 std::multimap<std::string, Page> items;
126 Douglas Thrift 143
127 douglas 285 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.getAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.getChildOf(), page));
128 Douglas Thrift 138 }
129 Douglas Thrift 126 }
130 Douglas Thrift 133
131 douglas 285 bool SiteMapper::newIndex(ext::Vector<Page>& pages, Page& page)
132 Douglas Thrift 142 {
133 douglas 285 _mforeach (ext::Vector<Page>, page_, pages)
134 Douglas Thrift 142 {
135 douglas 285 if (*page_ == page.getAddress())
136 Douglas Thrift 142 {
137     Matcher matcher;
138    
139 douglas 285 if (*page_ == page)
140 Douglas Thrift 142 {
141 douglas 285 page.setChildren(page_->getChildren());
142 Douglas Thrift 142
143 douglas 285 *page_ = page;
144 Douglas Thrift 142
145 douglas 303 api::Cout << "Updated: " << page.getUrl() << ios::NewLine;
146 Douglas Thrift 156
147 Douglas Thrift 142 return true;
148     }
149 douglas 285 else if (matcher("^" + page_->getPath()) == page)
150 Douglas Thrift 142 {
151     page.setChildOf(matcher[0]);
152    
153 douglas 285 if (matcher("^" + page_->getTitle() + "\\s\\|\\s(.+)$") == page) page.setTitle(matcher[1]);
154 Douglas Thrift 142
155 douglas 285 return newIndex(page_->getChildren(), page);
156 Douglas Thrift 142 }
157     }
158     }
159    
160     return false;
161     }
162    
163 douglas 285 void SiteMapper::newMap(const ext::String& siteMap)
164 Douglas Thrift 133 {
165 douglas 285 api::FileWriter file(siteMap);
166     ios::FormatWriter fout(file);
167     xml::TextWriter xml(file);
168 Douglas Thrift 144
169 douglas 285 fout << ios::NewLine << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>" << ios::NewLine << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">";
170 Douglas Thrift 144
171 douglas 285 xml.OutputComment(comment);
172 Douglas Thrift 143
173 douglas 285 xml::ScopeElement page(xml, "page");
174 Douglas Thrift 144
175 douglas 285 xml.OpenElement("title");
176     xml.OutputText("Sitemap");
177     xml.CloseElement();
178 Douglas Thrift 144
179 douglas 285 xml::ScopeElement section(xml, "section"), list(xml, "list");
180 Douglas Thrift 143
181 douglas 285 _mforeach (ext::Vector<Page>, page, pages)
182 Douglas Thrift 143 {
183 douglas 285 if (newPages.find(page->getAddress()) != newPages.end()) newMap(page->getChildren(), page->getPath(), newPages.find(page->getAddress())->second);
184    
185     xml << *page;
186 Douglas Thrift 143 }
187 douglas 285 }
188 Douglas Thrift 143
189 douglas 285 void SiteMapper::newMap(ext::Vector<Page>& pages, const ext::String& childOf, std::multimap<std::string, Page>& newPages)
190     {
191     _mforeach (ext::Vector<Page>, page, pages) newMap(page->getChildren(), page->getPath(), newPages);
192    
193     typedef std::multimap<std::string, Page> MultiMap;
194    
195     _for (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf))
196 Douglas Thrift 143 {
197 douglas 303 api::Cout << "Added: " << itor->second.getUrl() << ios::NewLine;
198 Douglas Thrift 156
199 douglas 285 pages.InsertLast(itor->second);
200 Douglas Thrift 143 }
201    
202     newPages.erase(childOf);
203     }

Properties

Name Value
svn:eol-style native
svn:keywords Id