ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 636
Committed: 2005-12-31T16:54:19-08:00 (19 years, 5 months ago) by douglas
File size: 5234 byte(s)
Log Message:
Hmm!

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7 douglas 635 #include <cxx/standard.hh>
8 douglas 559
9 douglas 635 #include <api/pcre/regex.hpp>
10     #include <api/exename.hpp>
11     #include <app/simple.hpp>
12 douglas 559
13 Douglas Thrift 126 #include "SiteMapper.hpp"
14    
15 douglas 559 int Main(const app::Options& options)
16     {
17     SiteMapper::program = api::GetExecutablePath().GetName();
18 Douglas Thrift 249
19 douglas 559 cse::String siteIndex, siteMap;
20     api::Pcre::RegEx index(_B("^-index=(.+)$")), map(_B("^-map=(.+)$"));
21    
22     _foreach (const app::ArgumentList, arg, app::GetArguments())
23 Douglas Thrift 249 {
24 douglas 559 api::Pcre::RegEx::Match match;
25 Douglas Thrift 132
26 douglas 559 if (match = index(*arg))
27 Douglas Thrift 249 {
28 douglas 559 siteIndex = match[1];
29 Douglas Thrift 128 }
30 douglas 559 else if (match = map(*arg))
31 Douglas Thrift 128 {
32 douglas 559 siteMap = match[1];
33 Douglas Thrift 128 }
34 douglas 559 else if (*arg == _B("-D"))
35 Douglas Thrift 133 {
36 douglas 559 if (!SiteMapper::debug) SiteMapper::debug = true;
37 Douglas Thrift 133 }
38 Douglas Thrift 128 }
39 Douglas Thrift 129
40 douglas 559 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
41     SiteMapper mapper(siteIndex, siteMap);
42     else
43     api::Cout << _B("Usage: ") << SiteMapper::program << _B(" -index=index -map=map [-D]") << ios::NewLine;
44    
45     return 0;
46     }
47    
48     SiteMapper::SiteMapper(const cse::String& siteIndex, const cse::String& siteMap)
49 Douglas Thrift 126 {
50 Douglas Thrift 133 oldMap(siteMap);
51 Douglas Thrift 142 newIndex(siteIndex);
52 Douglas Thrift 134 newMap(siteMap);
53 Douglas Thrift 133 }
54    
55 douglas 559 cse::String SiteMapper::program;
56 douglas 348 bool SiteMapper::debug(false);
57    
58 douglas 559 void SiteMapper::oldMap(const cse::String& siteMap)
59 Douglas Thrift 133 {
60 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
61 douglas 559 ext::Handle<xml::Node> list(*document/_B("page")/_B("section")/_B("list"));
62 Douglas Thrift 133
63 douglas 559 comment = *document/_B("comment()");
64 Douglas Thrift 133
65 douglas 559 if (debug) api::Cerr << _B("comment = ") << comment << ios::NewLine;
66 Douglas Thrift 249
67 Douglas Thrift 137 oldMap(pages, list);
68 Douglas Thrift 133 }
69    
70 douglas 285 void SiteMapper::oldMap(ext::Vector<Page>& pages, xml::Node* list)
71 Douglas Thrift 135 {
72 douglas 559 xml::NodeSet nodes(*list/_B("item"));
73 Douglas Thrift 135
74 douglas 285 _foreach (xml::NodeSet, node, nodes)
75 Douglas Thrift 135 {
76 douglas 559 cse::String url(**node/_B("link")/_B("@address")), title(**node/_B("link"));
77 Douglas Thrift 154 Page page(url, title);
78 douglas 559 ext::Handle<xml::Node> list(**node/_B("list"));
79 Douglas Thrift 141
80 douglas 559 if (!list.IsEmpty()) oldMap(page.GetChildren(), list);
81 Douglas Thrift 135
82 douglas 285 pages.InsertLast(page);
83 Douglas Thrift 135 }
84     }
85    
86 douglas 559 void SiteMapper::newIndex(const cse::String& siteIndex)
87 Douglas Thrift 133 {
88 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
89 douglas 559 xml::NodeSet nodes(*document/_B("index")/_B("page"));
90 Douglas Thrift 138
91 douglas 285 _foreach (xml::NodeSet, node, nodes)
92 Douglas Thrift 138 {
93 douglas 559 _S<ios::String> address(**node/_B("address"));
94     cse::String port(**node/_B("port"));
95 Douglas Thrift 139
96 douglas 559 if (!port.IsEmpty()) address << _B(":") << port;
97 Douglas Thrift 139
98 douglas 559 cse::String path(**node/_B("path")), title(**node/_B("title"));
99 Douglas Thrift 154 Page page(address, path, title);
100 douglas 559 static api::Pcre::RegEx blog(_B("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$")), page_(_B("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"));
101 Douglas Thrift 140
102 douglas 559 if (api::Pcre::RegEx::Match match = blog(page.GetTitle()))
103 Douglas Thrift 140 {
104 douglas 559 static api::Pcre::RegEx archives(_B("^\\w+\\s\\d{4}\\sArchives$"));
105    
106     if (archives(match[1]))
107 Douglas Thrift 142 {
108 douglas 559 page.SetTitle(match[1]);
109 Douglas Thrift 142
110 douglas 559 if (newIndex(pages, page))
111     continue;
112 Douglas Thrift 142 }
113 douglas 559 else
114     continue;
115 Douglas Thrift 142 }
116 douglas 559 else if (api::Pcre::RegEx::Match match = page_(page.GetTitle()))
117 Douglas Thrift 142 {
118 douglas 559 page.SetTitle(match[1]);
119 Douglas Thrift 140
120 douglas 559 if (newIndex(pages, page))
121     continue;
122 Douglas Thrift 140 }
123 douglas 559 else
124     continue;
125 Douglas Thrift 142
126 douglas 285 std::multimap<std::string, Page> items;
127 Douglas Thrift 143
128 douglas 559 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.GetAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.GetChildOf(), page));
129 Douglas Thrift 138 }
130 Douglas Thrift 126 }
131 Douglas Thrift 133
132 douglas 285 bool SiteMapper::newIndex(ext::Vector<Page>& pages, Page& page)
133 Douglas Thrift 142 {
134 douglas 423 _foreach (ext::Vector<Page>, page_, pages)
135 Douglas Thrift 142 {
136 douglas 559 if (*page_ == page.GetAddress())
137 Douglas Thrift 142 {
138 douglas 285 if (*page_ == page)
139 Douglas Thrift 142 {
140 douglas 559 page.SetChildren(page_->GetChildren());
141 Douglas Thrift 142
142 douglas 285 *page_ = page;
143 Douglas Thrift 142
144 douglas 559 api::Cout << _B("Updated: ") << page.GetUrl() << ios::NewLine;
145 Douglas Thrift 156
146 Douglas Thrift 142 return true;
147     }
148 douglas 559 else if (page.GetPath().StartsWithAll(page_->GetPath()))
149 Douglas Thrift 142 {
150 douglas 559 page.SetChildOf(page_->GetPath());
151 Douglas Thrift 142
152 douglas 559 api::Pcre::RegEx title(_S<ios::String>() << _B("^") << page_->GetTitle() << "\\s\\|\\s(.+)$");
153 Douglas Thrift 142
154 douglas 559 if (api::Pcre::RegEx::Match match = title(page.GetTitle()))
155     page.SetTitle(match[1]);
156    
157     return newIndex(page_->GetChildren(), page);
158 Douglas Thrift 142 }
159     }
160     }
161    
162     return false;
163     }
164    
165 douglas 559 void SiteMapper::newMap(const cse::String& siteMap)
166 Douglas Thrift 133 {
167 douglas 559 _S<api::FileWriter> file(siteMap);
168     _S<ios::FormatWriter> fout(file);
169     _S<xml::TextWriter> xml(file);
170 Douglas Thrift 144
171 douglas 636 // XXX: xml::TextWriter should have this kind of stuff, no?
172 douglas 559 fout << ios::NewLine << _B("<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>") << ios::NewLine << _B("<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">");
173 Douglas Thrift 144
174 douglas 285 xml.OutputComment(comment);
175 Douglas Thrift 143
176 douglas 559 xml::ScopeElement page(xml, _B("page"));
177 Douglas Thrift 144
178 douglas 559 xml.OpenElement(_B("title"));
179     xml.OutputText(_B("Sitemap"));
180 douglas 285 xml.CloseElement();
181 Douglas Thrift 144
182 douglas 559 xml::ScopeElement section(xml, _B("section")), list(xml, _B("list"));
183 Douglas Thrift 143
184 douglas 423 _foreach (ext::Vector<Page>, page, pages)
185 Douglas Thrift 143 {
186 douglas 559 if (newPages.find(page->GetAddress()) != newPages.end()) newMap(page->GetChildren(), page->GetPath(), newPages.find(page->GetAddress())->second);
187 douglas 285
188     xml << *page;
189 Douglas Thrift 143 }
190 douglas 285 }
191 Douglas Thrift 143
192 douglas 559 void SiteMapper::newMap(ext::Vector<Page>& pages, const cse::String& childOf, std::multimap<std::string, Page>& newPages)
193 douglas 285 {
194 douglas 559 _foreach (ext::Vector<Page>, page, pages) newMap(page->GetChildren(), page->GetPath(), newPages);
195 douglas 285
196     typedef std::multimap<std::string, Page> MultiMap;
197    
198 douglas 559 _forall (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf))
199 Douglas Thrift 143 {
200 douglas 559 api::Cout << _B("Added: ") << itor->second.GetUrl() << ios::NewLine;
201 Douglas Thrift 156
202 douglas 285 pages.InsertLast(itor->second);
203 Douglas Thrift 143 }
204    
205     newPages.erase(childOf);
206     }

Properties

Name Value
svn:eol-style native
svn:keywords Id