ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 637
Committed: 2005-12-31T21:32:21-08:00 (19 years, 5 months ago) by douglas
File size: 4982 byte(s)
Log Message:
Use ext::RedBlackMap and ext::Vector instead of std::multimap.

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7 douglas 635 #include <cxx/standard.hh>
8 douglas 559
9 douglas 635 #include <api/pcre/regex.hpp>
10     #include <api/exename.hpp>
11     #include <app/simple.hpp>
12 douglas 559
13 Douglas Thrift 126 #include "SiteMapper.hpp"
14    
15 douglas 637 int Main(const app::Options &options)
16 douglas 559 {
17     SiteMapper::program = api::GetExecutablePath().GetName();
18 Douglas Thrift 249
19 douglas 559 cse::String siteIndex, siteMap;
20     api::Pcre::RegEx index(_B("^-index=(.+)$")), map(_B("^-map=(.+)$"));
21    
22     _foreach (const app::ArgumentList, arg, app::GetArguments())
23 Douglas Thrift 249 {
24 douglas 559 api::Pcre::RegEx::Match match;
25 Douglas Thrift 132
26 douglas 559 if (match = index(*arg))
27 Douglas Thrift 249 {
28 douglas 559 siteIndex = match[1];
29 Douglas Thrift 128 }
30 douglas 559 else if (match = map(*arg))
31 Douglas Thrift 128 {
32 douglas 559 siteMap = match[1];
33 Douglas Thrift 128 }
34 douglas 559 else if (*arg == _B("-D"))
35 Douglas Thrift 133 {
36 douglas 559 if (!SiteMapper::debug) SiteMapper::debug = true;
37 Douglas Thrift 133 }
38 Douglas Thrift 128 }
39 Douglas Thrift 129
40 douglas 559 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
41     SiteMapper mapper(siteIndex, siteMap);
42     else
43     api::Cout << _B("Usage: ") << SiteMapper::program << _B(" -index=index -map=map [-D]") << ios::NewLine;
44    
45     return 0;
46     }
47    
48 douglas 637 SiteMapper::SiteMapper(const cse::String &siteIndex, const cse::String &siteMap)
49 Douglas Thrift 126 {
50 Douglas Thrift 133 oldMap(siteMap);
51 Douglas Thrift 142 newIndex(siteIndex);
52 Douglas Thrift 134 newMap(siteMap);
53 Douglas Thrift 133 }
54    
55 douglas 559 cse::String SiteMapper::program;
56 douglas 348 bool SiteMapper::debug(false);
57    
58 douglas 637 void SiteMapper::oldMap(const cse::String &siteMap)
59 Douglas Thrift 133 {
60 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
61 douglas 559 ext::Handle<xml::Node> list(*document/_B("page")/_B("section")/_B("list"));
62 Douglas Thrift 133
63 douglas 559 comment = *document/_B("comment()");
64 Douglas Thrift 133
65 douglas 559 if (debug) api::Cerr << _B("comment = ") << comment << ios::NewLine;
66 Douglas Thrift 249
67 Douglas Thrift 137 oldMap(pages, list);
68 Douglas Thrift 133 }
69    
70 douglas 637 void SiteMapper::oldMap(ext::Vector<Page> &pages, xml::Node* list)
71 Douglas Thrift 135 {
72 douglas 559 xml::NodeSet nodes(*list/_B("item"));
73 Douglas Thrift 135
74 douglas 285 _foreach (xml::NodeSet, node, nodes)
75 Douglas Thrift 135 {
76 douglas 559 cse::String url(**node/_B("link")/_B("@address")), title(**node/_B("link"));
77 Douglas Thrift 154 Page page(url, title);
78 douglas 559 ext::Handle<xml::Node> list(**node/_B("list"));
79 Douglas Thrift 141
80 douglas 559 if (!list.IsEmpty()) oldMap(page.GetChildren(), list);
81 Douglas Thrift 135
82 douglas 285 pages.InsertLast(page);
83 Douglas Thrift 135 }
84     }
85    
86 douglas 637 void SiteMapper::newIndex(const cse::String &siteIndex)
87 Douglas Thrift 133 {
88 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
89 douglas 559 xml::NodeSet nodes(*document/_B("index")/_B("page"));
90 Douglas Thrift 138
91 douglas 285 _foreach (xml::NodeSet, node, nodes)
92 Douglas Thrift 138 {
93 douglas 559 _S<ios::String> address(**node/_B("address"));
94     cse::String port(**node/_B("port"));
95 Douglas Thrift 139
96 douglas 559 if (!port.IsEmpty()) address << _B(":") << port;
97 Douglas Thrift 139
98 douglas 559 cse::String path(**node/_B("path")), title(**node/_B("title"));
99 Douglas Thrift 154 Page page(address, path, title);
100 douglas 559 static api::Pcre::RegEx blog(_B("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$")), page_(_B("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"));
101 Douglas Thrift 140
102 douglas 559 if (api::Pcre::RegEx::Match match = blog(page.GetTitle()))
103 Douglas Thrift 140 {
104 douglas 559 static api::Pcre::RegEx archives(_B("^\\w+\\s\\d{4}\\sArchives$"));
105    
106     if (archives(match[1]))
107 Douglas Thrift 142 {
108 douglas 559 page.SetTitle(match[1]);
109 Douglas Thrift 142
110 douglas 559 if (newIndex(pages, page))
111     continue;
112 Douglas Thrift 142 }
113 douglas 559 else
114     continue;
115 Douglas Thrift 142 }
116 douglas 559 else if (api::Pcre::RegEx::Match match = page_(page.GetTitle()))
117 Douglas Thrift 142 {
118 douglas 559 page.SetTitle(match[1]);
119 Douglas Thrift 140
120 douglas 559 if (newIndex(pages, page))
121     continue;
122 Douglas Thrift 140 }
123 douglas 559 else
124     continue;
125 Douglas Thrift 142
126 douglas 637 newPages[page.GetAddress()][page.GetChildOf()].InsertLast(page);
127 Douglas Thrift 138 }
128 Douglas Thrift 126 }
129 Douglas Thrift 133
130 douglas 637 bool SiteMapper::newIndex(ext::Vector<Page> &pages, Page &page)
131 Douglas Thrift 142 {
132 douglas 423 _foreach (ext::Vector<Page>, page_, pages)
133 Douglas Thrift 142 {
134 douglas 559 if (*page_ == page.GetAddress())
135 Douglas Thrift 142 {
136 douglas 285 if (*page_ == page)
137 Douglas Thrift 142 {
138 douglas 559 page.SetChildren(page_->GetChildren());
139 Douglas Thrift 142
140 douglas 285 *page_ = page;
141 Douglas Thrift 142
142 douglas 559 api::Cout << _B("Updated: ") << page.GetUrl() << ios::NewLine;
143 Douglas Thrift 156
144 Douglas Thrift 142 return true;
145     }
146 douglas 559 else if (page.GetPath().StartsWithAll(page_->GetPath()))
147 Douglas Thrift 142 {
148 douglas 559 page.SetChildOf(page_->GetPath());
149 Douglas Thrift 142
150 douglas 559 api::Pcre::RegEx title(_S<ios::String>() << _B("^") << page_->GetTitle() << "\\s\\|\\s(.+)$");
151 Douglas Thrift 142
152 douglas 559 if (api::Pcre::RegEx::Match match = title(page.GetTitle()))
153     page.SetTitle(match[1]);
154    
155     return newIndex(page_->GetChildren(), page);
156 Douglas Thrift 142 }
157     }
158     }
159    
160     return false;
161     }
162    
163 douglas 637 void SiteMapper::newMap(const cse::String &siteMap)
164 Douglas Thrift 133 {
165 douglas 559 _S<api::FileWriter> file(siteMap);
166     _S<ios::FormatWriter> fout(file);
167     _S<xml::TextWriter> xml(file);
168 Douglas Thrift 144
169 douglas 636 // XXX: xml::TextWriter should have this kind of stuff, no?
170 douglas 559 fout << ios::NewLine << _B("<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>") << ios::NewLine << _B("<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">");
171 Douglas Thrift 144
172 douglas 285 xml.OutputComment(comment);
173 Douglas Thrift 143
174 douglas 559 xml::ScopeElement page(xml, _B("page"));
175 Douglas Thrift 144
176 douglas 559 xml.OpenElement(_B("title"));
177     xml.OutputText(_B("Sitemap"));
178 douglas 285 xml.CloseElement();
179 Douglas Thrift 144
180 douglas 559 xml::ScopeElement section(xml, _B("section")), list(xml, _B("list"));
181 Douglas Thrift 143
182 douglas 423 _foreach (ext::Vector<Page>, page, pages)
183 Douglas Thrift 143 {
184 douglas 637 if (newPages.Contains(page->GetAddress()))
185     newMap(page->GetChildren(), page->GetPath(), newPages.Find(page->GetAddress())->Second());
186 douglas 285
187     xml << *page;
188 Douglas Thrift 143 }
189 douglas 285 }
190 Douglas Thrift 143
191 douglas 637 void SiteMapper::newMap(ext::Vector<Page> &pages, const cse::String &childOf, ext::RedBlackMap<cse::String, ext::Vector<Page>, LessThan> &newPages)
192 douglas 285 {
193 douglas 637 _foreach (ext::Vector<Page>, page, pages)
194     newMap(page->GetChildren(), page->GetPath(), newPages);
195 douglas 285
196 douglas 637 _foreach (ext::Vector<Page>, page, newPages[childOf])
197 Douglas Thrift 143 {
198 douglas 637 api::Cout << _B("Added: ") << page->GetUrl() << ios::NewLine;
199 Douglas Thrift 156
200 douglas 637 pages.InsertLast(*page);
201 Douglas Thrift 143 }
202    
203 douglas 637 newPages.Remove(childOf);
204 Douglas Thrift 143 }

Properties

Name Value
svn:eol-style native
svn:keywords Id