ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 635
Committed: 2005-12-31T16:12:15-08:00 (19 years, 5 months ago) by douglas
File size: 5173 byte(s)
Log Message:
Moo!

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include <cxx/standard.hh>
8
9 #include <api/pcre/regex.hpp>
10 #include <api/exename.hpp>
11 #include <app/simple.hpp>
12
13 #include "SiteMapper.hpp"
14
15 int Main(const app::Options& options)
16 {
17 SiteMapper::program = api::GetExecutablePath().GetName();
18
19 cse::String siteIndex, siteMap;
20 api::Pcre::RegEx index(_B("^-index=(.+)$")), map(_B("^-map=(.+)$"));
21
22 _foreach (const app::ArgumentList, arg, app::GetArguments())
23 {
24 api::Pcre::RegEx::Match match;
25
26 if (match = index(*arg))
27 {
28 siteIndex = match[1];
29 }
30 else if (match = map(*arg))
31 {
32 siteMap = match[1];
33 }
34 else if (*arg == _B("-D"))
35 {
36 if (!SiteMapper::debug) SiteMapper::debug = true;
37 }
38 }
39
40 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
41 SiteMapper mapper(siteIndex, siteMap);
42 else
43 api::Cout << _B("Usage: ") << SiteMapper::program << _B(" -index=index -map=map [-D]") << ios::NewLine;
44
45 return 0;
46 }
47
48 SiteMapper::SiteMapper(const cse::String& siteIndex, const cse::String& siteMap)
49 {
50 oldMap(siteMap);
51 newIndex(siteIndex);
52 newMap(siteMap);
53 }
54
55 cse::String SiteMapper::program;
56 bool SiteMapper::debug(false);
57
58 void SiteMapper::oldMap(const cse::String& siteMap)
59 {
60 ext::Handle<xml::Document> document(xml::Parse(siteMap));
61 ext::Handle<xml::Node> list(*document/_B("page")/_B("section")/_B("list"));
62
63 comment = *document/_B("comment()");
64
65 if (debug) api::Cerr << _B("comment = ") << comment << ios::NewLine;
66
67 oldMap(pages, list);
68 }
69
70 void SiteMapper::oldMap(ext::Vector<Page>& pages, xml::Node* list)
71 {
72 xml::NodeSet nodes(*list/_B("item"));
73
74 _foreach (xml::NodeSet, node, nodes)
75 {
76 cse::String url(**node/_B("link")/_B("@address")), title(**node/_B("link"));
77 Page page(url, title);
78 ext::Handle<xml::Node> list(**node/_B("list"));
79
80 if (!list.IsEmpty()) oldMap(page.GetChildren(), list);
81
82 pages.InsertLast(page);
83 }
84 }
85
86 void SiteMapper::newIndex(const cse::String& siteIndex)
87 {
88 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
89 xml::NodeSet nodes(*document/_B("index")/_B("page"));
90
91 _foreach (xml::NodeSet, node, nodes)
92 {
93 _S<ios::String> address(**node/_B("address"));
94 cse::String port(**node/_B("port"));
95
96 if (!port.IsEmpty()) address << _B(":") << port;
97
98 cse::String path(**node/_B("path")), title(**node/_B("title"));
99 Page page(address, path, title);
100 static api::Pcre::RegEx blog(_B("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$")), page_(_B("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"));
101
102 if (api::Pcre::RegEx::Match match = blog(page.GetTitle()))
103 {
104 static api::Pcre::RegEx archives(_B("^\\w+\\s\\d{4}\\sArchives$"));
105
106 if (archives(match[1]))
107 {
108 page.SetTitle(match[1]);
109
110 if (newIndex(pages, page))
111 continue;
112 }
113 else
114 continue;
115 }
116 else if (api::Pcre::RegEx::Match match = page_(page.GetTitle()))
117 {
118 page.SetTitle(match[1]);
119
120 if (newIndex(pages, page))
121 continue;
122 }
123 else
124 continue;
125
126 std::multimap<std::string, Page> items;
127
128 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.GetAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.GetChildOf(), page));
129 }
130 }
131
132 bool SiteMapper::newIndex(ext::Vector<Page>& pages, Page& page)
133 {
134 _foreach (ext::Vector<Page>, page_, pages)
135 {
136 if (*page_ == page.GetAddress())
137 {
138 if (*page_ == page)
139 {
140 page.SetChildren(page_->GetChildren());
141
142 *page_ = page;
143
144 api::Cout << _B("Updated: ") << page.GetUrl() << ios::NewLine;
145
146 return true;
147 }
148 else if (page.GetPath().StartsWithAll(page_->GetPath()))
149 {
150 page.SetChildOf(page_->GetPath());
151
152 api::Pcre::RegEx title(_S<ios::String>() << _B("^") << page_->GetTitle() << "\\s\\|\\s(.+)$");
153
154 if (api::Pcre::RegEx::Match match = title(page.GetTitle()))
155 page.SetTitle(match[1]);
156
157 return newIndex(page_->GetChildren(), page);
158 }
159 }
160 }
161
162 return false;
163 }
164
165 void SiteMapper::newMap(const cse::String& siteMap)
166 {
167 _S<api::FileWriter> file(siteMap);
168 _S<ios::FormatWriter> fout(file);
169 _S<xml::TextWriter> xml(file);
170
171 fout << ios::NewLine << _B("<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>") << ios::NewLine << _B("<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">");
172
173 xml.OutputComment(comment);
174
175 xml::ScopeElement page(xml, _B("page"));
176
177 xml.OpenElement(_B("title"));
178 xml.OutputText(_B("Sitemap"));
179 xml.CloseElement();
180
181 xml::ScopeElement section(xml, _B("section")), list(xml, _B("list"));
182
183 _foreach (ext::Vector<Page>, page, pages)
184 {
185 if (newPages.find(page->GetAddress()) != newPages.end()) newMap(page->GetChildren(), page->GetPath(), newPages.find(page->GetAddress())->second);
186
187 xml << *page;
188 }
189 }
190
191 void SiteMapper::newMap(ext::Vector<Page>& pages, const cse::String& childOf, std::multimap<std::string, Page>& newPages)
192 {
193 _foreach (ext::Vector<Page>, page, pages) newMap(page->GetChildren(), page->GetPath(), newPages);
194
195 typedef std::multimap<std::string, Page> MultiMap;
196
197 _forall (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf))
198 {
199 api::Cout << _B("Added: ") << itor->second.GetUrl() << ios::NewLine;
200
201 pages.InsertLast(itor->second);
202 }
203
204 newPages.erase(childOf);
205 }

Properties

Name Value
svn:eol-style native
svn:keywords Id