ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/SiteMapper/SiteMapper.cpp
Revision: 328
Committed: 2004-12-13T20:13:49-08:00 (20 years, 6 months ago) by douglas
File size: 4767 byte(s)
Log Message:
Use Matcher the new way.

File Contents

# User Rev Content
1 Douglas Thrift 126 // Site Mapper
2     //
3     // Douglas Thrift
4     //
5     // $Id$
6    
7     #include "SiteMapper.hpp"
8 douglas 328 #include "Matcher/Matcher.hpp"
9 Douglas Thrift 126
10 Douglas Thrift 249 #include <menes-api/exename.hpp>
11     #include <menes-app/application.hpp>
12    
13 douglas 285 ext::String program;
14 Douglas Thrift 152 bool debug(false);
15 Douglas Thrift 128
16 Douglas Thrift 249 struct SiteMapperCommand : public app::Application
17 Douglas Thrift 126 {
18 Douglas Thrift 249 virtual int Run(const app::ArgumentList& args)
19     {
20 Douglas Thrift 259 program = api::GetExecutablePath().GetName();
21 Douglas Thrift 132
22 douglas 285 ext::String siteIndex, siteMap;
23 Douglas Thrift 126
24 douglas 285 _foreach (app::ArgumentList, arg, args)
25 Douglas Thrift 249 {
26     Matcher matcher;
27 Douglas Thrift 126
28 douglas 285 if (*arg == matcher("^-index=(.*)$"))
29 Douglas Thrift 249 {
30     siteIndex = matcher[1];
31     }
32 douglas 285 else if (*arg == matcher("^-map=(.*)$"))
33 Douglas Thrift 249 {
34     siteMap = matcher[1];
35     }
36 douglas 285 else if (*arg == "-D")
37 Douglas Thrift 249 {
38     if (!debug) debug = true;
39     }
40 Douglas Thrift 128 }
41 Douglas Thrift 249
42 douglas 285 if (!siteIndex.IsEmpty() && !siteMap.IsEmpty())
43 Douglas Thrift 128 {
44 Douglas Thrift 249 SiteMapper mapper(siteIndex, siteMap);
45 Douglas Thrift 128 }
46 Douglas Thrift 249 else
47 Douglas Thrift 133 {
48 douglas 285 api::Cout << "Usage: " << program << " -index=index -map=map [-D]\n";
49 Douglas Thrift 133 }
50 Douglas Thrift 126
51 Douglas Thrift 249 return 0;
52 Douglas Thrift 128 }
53 Douglas Thrift 249 } mapper;
54 Douglas Thrift 129
55 douglas 285 SiteMapper::SiteMapper(const ext::String& siteIndex, const ext::String& siteMap)
56 Douglas Thrift 126 {
57 Douglas Thrift 133 oldMap(siteMap);
58 Douglas Thrift 142 newIndex(siteIndex);
59 Douglas Thrift 134 newMap(siteMap);
60 Douglas Thrift 133 }
61    
62 douglas 285 void SiteMapper::oldMap(const ext::String& siteMap)
63 Douglas Thrift 133 {
64 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteMap));
65     ext::Handle<xml::Node> list(*document/"page"/"section"/"list");
66 Douglas Thrift 133
67 douglas 285 comment = *document/"comment()";
68 Douglas Thrift 133
69 douglas 303 if (debug) api::Cerr << "comment = " << comment << ios::NewLine;
70 Douglas Thrift 249
71 Douglas Thrift 137 oldMap(pages, list);
72 Douglas Thrift 133 }
73    
74 douglas 285 void SiteMapper::oldMap(ext::Vector<Page>& pages, xml::Node* list)
75 Douglas Thrift 135 {
76 Douglas Thrift 154 xml::NodeSet nodes(*list/"item");
77 Douglas Thrift 135
78 douglas 285 _foreach (xml::NodeSet, node, nodes)
79 Douglas Thrift 135 {
80 douglas 285 ext::String url(**node/"link"/"@address"), title(**node/"link");
81 Douglas Thrift 154 Page page(url, title);
82     ext::Handle<xml::Node> list(**node/"list");
83 Douglas Thrift 141
84 Douglas Thrift 154 if (!list.IsEmpty()) oldMap(page.getChildren(), list);
85 Douglas Thrift 135
86 douglas 285 pages.InsertLast(page);
87 Douglas Thrift 135 }
88     }
89    
90 douglas 285 void SiteMapper::newIndex(const ext::String& siteIndex)
91 Douglas Thrift 133 {
92 Douglas Thrift 154 ext::Handle<xml::Document> document(xml::Parse(siteIndex));
93     xml::NodeSet nodes(*document/"index"/"page");
94 Douglas Thrift 138
95 douglas 285 _foreach (xml::NodeSet, node, nodes)
96 Douglas Thrift 138 {
97 douglas 285 ios::String address(**node/"address");
98     ext::String port(**node/"port");
99 Douglas Thrift 139
100 douglas 285 if (!port.IsEmpty()) address << ":" << port;
101 Douglas Thrift 139
102 douglas 285 ext::String path(**node/"path"), title(**node/"title");
103 Douglas Thrift 154 Page page(address, path, title);
104 douglas 328 Matcher matcher(PCRE_UNGREEDY | PCRE_DOTALL);
105 Douglas Thrift 140
106 douglas 285 if (page == matcher("^Douglas\\sThrift's\\sWebsite\\s\\|\\sDouglas\\sThrift's\\sBlog:\\s(.+)$"))
107 Douglas Thrift 140 {
108 Douglas Thrift 156 if (Matcher("^\\w+\\s\\d{4}\\sArchives$") == matcher[1])
109 Douglas Thrift 142 {
110     page.setTitle(matcher[1]);
111    
112     if (newIndex(pages, page)) continue;
113     }
114     else continue;
115     }
116     else if (page == matcher("^Douglas\\sThrift's.+Website\\s\\|\\s(.+)$"))
117     {
118 Douglas Thrift 140 page.setTitle(matcher[1]);
119    
120 Douglas Thrift 142 if (newIndex(pages, page)) continue;
121 Douglas Thrift 140 }
122 Douglas Thrift 142 else continue;
123    
124 douglas 285 std::multimap<std::string, Page> items;
125 Douglas Thrift 143
126 douglas 285 newPages.insert(std::pair<std::string, std::multimap<std::string, Page> >(page.getAddress(), items)).first->second.insert(std::pair<std::string, Page>(page.getChildOf(), page));
127 Douglas Thrift 138 }
128 Douglas Thrift 126 }
129 Douglas Thrift 133
130 douglas 285 bool SiteMapper::newIndex(ext::Vector<Page>& pages, Page& page)
131 Douglas Thrift 142 {
132 douglas 285 _mforeach (ext::Vector<Page>, page_, pages)
133 Douglas Thrift 142 {
134 douglas 285 if (*page_ == page.getAddress())
135 Douglas Thrift 142 {
136     Matcher matcher;
137    
138 douglas 285 if (*page_ == page)
139 Douglas Thrift 142 {
140 douglas 285 page.setChildren(page_->getChildren());
141 Douglas Thrift 142
142 douglas 285 *page_ = page;
143 Douglas Thrift 142
144 douglas 303 api::Cout << "Updated: " << page.getUrl() << ios::NewLine;
145 Douglas Thrift 156
146 Douglas Thrift 142 return true;
147     }
148 douglas 285 else if (matcher("^" + page_->getPath()) == page)
149 Douglas Thrift 142 {
150     page.setChildOf(matcher[0]);
151    
152 douglas 285 if (matcher("^" + page_->getTitle() + "\\s\\|\\s(.+)$") == page) page.setTitle(matcher[1]);
153 Douglas Thrift 142
154 douglas 285 return newIndex(page_->getChildren(), page);
155 Douglas Thrift 142 }
156     }
157     }
158    
159     return false;
160     }
161    
162 douglas 285 void SiteMapper::newMap(const ext::String& siteMap)
163 Douglas Thrift 133 {
164 douglas 285 api::FileWriter file(siteMap);
165     ios::FormatWriter fout(file);
166     xml::TextWriter xml(file);
167 Douglas Thrift 144
168 douglas 285 fout << ios::NewLine << "<?xml-stylesheet type=\"text/xsl\" href=\"stylesheets/sitemap.xsl\"?>" << ios::NewLine << "<!DOCTYPE page SYSTEM \"stylesheets/page.dtd\">";
169 Douglas Thrift 144
170 douglas 285 xml.OutputComment(comment);
171 Douglas Thrift 143
172 douglas 285 xml::ScopeElement page(xml, "page");
173 Douglas Thrift 144
174 douglas 285 xml.OpenElement("title");
175     xml.OutputText("Sitemap");
176     xml.CloseElement();
177 Douglas Thrift 144
178 douglas 285 xml::ScopeElement section(xml, "section"), list(xml, "list");
179 Douglas Thrift 143
180 douglas 285 _mforeach (ext::Vector<Page>, page, pages)
181 Douglas Thrift 143 {
182 douglas 285 if (newPages.find(page->getAddress()) != newPages.end()) newMap(page->getChildren(), page->getPath(), newPages.find(page->getAddress())->second);
183    
184     xml << *page;
185 Douglas Thrift 143 }
186 douglas 285 }
187 Douglas Thrift 143
188 douglas 285 void SiteMapper::newMap(ext::Vector<Page>& pages, const ext::String& childOf, std::multimap<std::string, Page>& newPages)
189     {
190     _mforeach (ext::Vector<Page>, page, pages) newMap(page->getChildren(), page->getPath(), newPages);
191    
192     typedef std::multimap<std::string, Page> MultiMap;
193    
194     _for (MultiMap::const_iterator, itor, newPages.lower_bound(childOf), newPages.upper_bound(childOf))
195 Douglas Thrift 143 {
196 douglas 303 api::Cout << "Added: " << itor->second.getUrl() << ios::NewLine;
197 Douglas Thrift 156
198 douglas 285 pages.InsertLast(itor->second);
199 Douglas Thrift 143 }
200    
201     newPages.erase(childOf);
202     }

Properties

Name Value
svn:eol-style native
svn:keywords Id