ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 637
Committed: 2005-12-31T21:32:21-08:00 (19 years, 5 months ago) by douglas
File size: 4982 byte(s)
Log Message:
Use ext::RedBlackMap and ext::Vector instead of std::multimap.

File Contents

# Content
1 // Site Mapper
2 //
3 // Douglas Thrift
4 //
5 // $Id$
6
7 #include <cxx/standard.hh>
8
9 #include <api/pcre/regex.hpp>
10 #include <api/exename.hpp>
11 #include <app/simple.hpp>
12
13 #include "SiteMapper.hpp"
14
15 int Main(const app::Options &options)
16 {
17 SiteMapper::program = api::GetExecutablePath().GetName();
18
19 cse::String siteIndex, siteMap;
20 api::Pcre::RegEx index(_B("^-index=(.+)$")), map(_B("^-map=(.+)$"));
21
22 _foreach (const app::ArgumentList, arg, app::GetArguments())
23 {
24 api::Pcre::RegEx::Match match;
25
26 if (match = index(*arg))
27 {
28 siteIndex = match[1];
29 }
30 else if (match = map(*arg))
31 {
32 siteMap = match[1];
33 }
34 else if (*arg == _B("-D"))
35 {
36 if (!SiteMapper::debug) SiteMapper::debug = true;
37 }
38 }
39
40 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
41 SiteMapper mapper(siteIndex, siteMap);
42 else
43 api::Cout << _B("Usage: ") << SiteMapper::program << _B(" -index=index -map=map [-D]") << ios::NewLine;
44
45 return 0;
46 }
47
48 SiteMapper::SiteMapper(const cse::String &siteIndex, const cse::String &siteMap)
49 {
50 oldMap(siteMap);
51 newIndex(siteIndex);
52 newMap(siteMap);
53 }
54
55 cse::String SiteMapper::program;
56 bool SiteMapper::debug(false);
57
58 void SiteMapper::oldMap(const cse::String &siteMap)
59 {
60 ext::Handle<xml::Document> document(xml::Parse(siteMap));
61 ext::Handle<xml::Node> list(*document/_B("page")/_B("section")/_B("list"));
62
63 comment = *document/_B("comment()");
64
65 if (debug) api::Cerr << _B("comment = ") << comment << ios::NewLine;
66
67 oldMap(pages, list);
68 }
69
70 void SiteMapper::oldMap(ext::Vector<Page> &pages, xml::Node* list)
71 {
72 xml::NodeSet nodes(*list/_B("item"));
73
74 _foreach (xml::NodeSet, node, nodes)
75 {
76 cse::String url(**node/_B("link")/_B("@address")), title(**node/_B("link"));
77 Page page(url, title);
78 ext::Handle<xml::Node> list(**node/_B("list"));
79
80 if (!list.IsEmpty()) oldMap(page.GetChildren(), list);
81
82 pages.InsertLast(page);
83 }
84 }
85
86 void SiteMapper::newIndex(const cse::String &siteIndex)
87 {
88 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
89 xml::NodeSet nodes(*document/_B("index")/_B("page"));
90
91 _foreach (xml::NodeSet, node, nodes)
92 {
93 _S<ios::String> address(**node/_B("address"));
94 cse::String port(**node/_B("port"));
95
96 if (!port.IsEmpty()) address << _B(":") << port;
97
98 cse::String path(**node/_B("path")), title(**node/_B("title"));
99 Page page(address, path, title);
100 static api::Pcre::RegEx blog(_B("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$")), page_(_B("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"));
101
102 if (api::Pcre::RegEx::Match match = blog(page.GetTitle()))
103 {
104 static api::Pcre::RegEx archives(_B("^\\w+\\s\\d{4}\\sArchives$"));
105
106 if (archives(match[1]))
107 {
108 page.SetTitle(match[1]);
109
110 if (newIndex(pages, page))
111 continue;
112 }
113 else
114 continue;
115 }
116 else if (api::Pcre::RegEx::Match match = page_(page.GetTitle()))
117 {
118 page.SetTitle(match[1]);
119
120 if (newIndex(pages, page))
121 continue;
122 }
123 else
124 continue;
125
126 newPages[page.GetAddress()][page.GetChildOf()].InsertLast(page);
127 }
128 }
129
130 bool SiteMapper::newIndex(ext::Vector<Page> &pages, Page &page)
131 {
132 _foreach (ext::Vector<Page>, page_, pages)
133 {
134 if (*page_ == page.GetAddress())
135 {
136 if (*page_ == page)
137 {
138 page.SetChildren(page_->GetChildren());
139
140 *page_ = page;
141
142 api::Cout << _B("Updated: ") << page.GetUrl() << ios::NewLine;
143
144 return true;
145 }
146 else if (page.GetPath().StartsWithAll(page_->GetPath()))
147 {
148 page.SetChildOf(page_->GetPath());
149
150 api::Pcre::RegEx title(_S<ios::String>() << _B("^") << page_->GetTitle() << "\\s\\|\\s(.+)$");
151
152 if (api::Pcre::RegEx::Match match = title(page.GetTitle()))
153 page.SetTitle(match[1]);
154
155 return newIndex(page_->GetChildren(), page);
156 }
157 }
158 }
159
160 return false;
161 }
162
163 void SiteMapper::newMap(const cse::String &siteMap)
164 {
165 _S<api::FileWriter> file(siteMap);
166 _S<ios::FormatWriter> fout(file);
167 _S<xml::TextWriter> xml(file);
168
169 // XXX: xml::TextWriter should have this kind of stuff, no?
170 fout << ios::NewLine << _B("<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>") << ios::NewLine << _B("<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">");
171
172 xml.OutputComment(comment);
173
174 xml::ScopeElement page(xml, _B("page"));
175
176 xml.OpenElement(_B("title"));
177 xml.OutputText(_B("Sitemap"));
178 xml.CloseElement();
179
180 xml::ScopeElement section(xml, _B("section")), list(xml, _B("list"));
181
182 _foreach (ext::Vector<Page>, page, pages)
183 {
184 if (newPages.Contains(page->GetAddress()))
185 newMap(page->GetChildren(), page->GetPath(), newPages.Find(page->GetAddress())->Second());
186
187 xml << *page;
188 }
189 }
190
191 void SiteMapper::newMap(ext::Vector<Page> &pages, const cse::String &childOf, ext::RedBlackMap<cse::String, ext::Vector<Page>, LessThan> &newPages)
192 {
193 _foreach (ext::Vector<Page>, page, pages)
194 newMap(page->GetChildren(), page->GetPath(), newPages);
195
196 _foreach (ext::Vector<Page>, page, newPages[childOf])
197 {
198 api::Cout << _B("Added: ") << page->GetUrl() << ios::NewLine;
199
200 pages.InsertLast(*page);
201 }
202
203 newPages.Remove(childOf);
204 }

Properties

Name Value
svn:eol-style native
svn:keywords Id