1 |
douglas |
1 |
/* ============================================================================ |
2 |
|
|
* Douglas Thrift's Search Engine License |
3 |
|
|
* |
4 |
douglas |
312 |
* Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved. |
5 |
douglas |
1 |
* Redistribution and use in source and binary forms, with or without |
6 |
|
|
* modification, are permitted provided that the following conditions are met: |
7 |
|
|
* |
8 |
|
|
* 1. Redistributions of source code must retain the above copyright notice, |
9 |
|
|
* this list of conditions and the following disclaimer. |
10 |
|
|
* |
11 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice, |
12 |
|
|
* this list of conditions and the following disclaimer in the documentation |
13 |
|
|
* and/or other materials provided with the distribution. |
14 |
|
|
* |
15 |
|
|
* 3. The end-user documentation included with the redistribution, if any, must |
16 |
|
|
* include the following acknowledgment: |
17 |
|
|
* |
18 |
|
|
* "This product includes software developed by Douglas Thrift |
19 |
|
|
* (http://computers.douglasthrift.net/searchengine/)." |
20 |
|
|
* |
21 |
|
|
* Alternately, this acknowledgment may appear in the software itself, if |
22 |
|
|
* and wherever such third-party acknowledgments normally appear. |
23 |
|
|
* |
24 |
|
|
* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not |
25 |
|
|
* be used to endorse or promote products derived from this software without |
26 |
|
|
* specific prior written permission. For written permission, please visit |
27 |
|
|
* http://www.douglasthrift.net/contact.cgi for contact information. |
28 |
|
|
* |
29 |
|
|
* 5. Products derived from this software may not be called "Douglas Thrift's |
30 |
|
|
* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their |
31 |
|
|
* name, without prior written permission. |
32 |
|
|
* |
33 |
|
|
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
34 |
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
35 |
|
|
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 |
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 |
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
38 |
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
39 |
|
|
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
40 |
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
41 |
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
42 |
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 |
|
|
* ============================================================================ |
44 |
|
|
*/ |
45 |
|
|
// Douglas Thrift's Search Engine Page |
46 |
|
|
// |
47 |
|
|
// Douglas Thrift |
48 |
|
|
// |
49 |
Douglas Thrift |
331 |
// $Id$ |
50 |
douglas |
1 |
|
51 |
Douglas Thrift |
334 |
#include "Page.hpp" |
52 |
douglas |
1 |
|
53 |
|
|
istream& operator>>(istream& is, Page& data) |
54 |
|
|
{ |
55 |
|
|
string line; |
56 |
douglas |
310 |
#ifdef _OpenSSL_ |
57 |
|
|
bool set = false; |
58 |
|
|
#endif |
59 |
|
|
|
60 |
douglas |
1 |
getline(is, line); |
61 |
douglas |
310 |
|
62 |
douglas |
1 |
if (line == " <page>") |
63 |
|
|
{ |
64 |
|
|
do |
65 |
|
|
{ |
66 |
|
|
getline(is, line); |
67 |
|
|
|
68 |
|
|
if (line.find(" <address>") == 0) |
69 |
|
|
{ |
70 |
|
|
unsigned start = line.find("<address>") + 9; |
71 |
|
|
unsigned finish = line.find("</address>", start); |
72 |
|
|
|
73 |
|
|
string address = line.substr(start, finish - start); |
74 |
|
|
|
75 |
|
|
entities(address, "<", '<'); |
76 |
|
|
entities(address, ">", '>'); |
77 |
|
|
entities(address, "&", '&'); |
78 |
|
|
|
79 |
|
|
data.setAddress(address); |
80 |
|
|
} |
81 |
|
|
else if (line.find(" <port>") == 0) |
82 |
|
|
{ |
83 |
|
|
unsigned start = line.find("<port>") + 6; |
84 |
|
|
unsigned finish = line.find("</port>", start); |
85 |
|
|
|
86 |
douglas |
199 |
istringstream buffer(line.substr(start, finish - start)); |
87 |
|
|
unsigned port; |
88 |
|
|
|
89 |
|
|
buffer >> port; |
90 |
|
|
|
91 |
|
|
data.setPort(port); |
92 |
douglas |
310 |
|
93 |
|
|
#ifdef _OpenSSL_ |
94 |
|
|
set = true; |
95 |
|
|
#endif |
96 |
douglas |
1 |
} |
97 |
douglas |
199 |
#ifdef _OpenSSL_ |
98 |
|
|
else if (line.find(" <tls>") == 0) |
99 |
|
|
{ |
100 |
|
|
unsigned start = line.find("<tls>") + 5; |
101 |
|
|
unsigned finish = line.find("</tls>", start); |
102 |
|
|
|
103 |
|
|
istringstream buffer(line.substr(start, finish - start)); |
104 |
|
|
bool tls; |
105 |
|
|
|
106 |
|
|
buffer.setf(ios_base::boolalpha); |
107 |
|
|
buffer >> tls; |
108 |
|
|
|
109 |
|
|
data.setTls(tls); |
110 |
douglas |
310 |
|
111 |
|
|
if (!set && tls) data.setPort(443); |
112 |
douglas |
199 |
} |
113 |
|
|
#endif |
114 |
douglas |
1 |
else if (line.find(" <path>") == 0) |
115 |
|
|
{ |
116 |
|
|
unsigned start = line.find("<path>") + 6; |
117 |
|
|
unsigned finish = line.find("</path>", start); |
118 |
|
|
|
119 |
|
|
string path = line.substr(start, finish - start); |
120 |
|
|
|
121 |
|
|
entities(path, "<", '<'); |
122 |
|
|
entities(path, ">", '>'); |
123 |
|
|
entities(path, "&", '&'); |
124 |
|
|
|
125 |
|
|
data.setPath(path); |
126 |
|
|
} |
127 |
|
|
else if (line.find(" <size>") == 0) |
128 |
|
|
{ |
129 |
|
|
unsigned start = line.find("<size>") + 6; |
130 |
|
|
unsigned finish = line.find("</size>", start); |
131 |
|
|
|
132 |
douglas |
199 |
istringstream buffer(line.substr(start, finish - start)); |
133 |
|
|
unsigned size; |
134 |
|
|
|
135 |
|
|
buffer >> size; |
136 |
|
|
|
137 |
|
|
data.setSize(size); |
138 |
douglas |
1 |
} |
139 |
|
|
else if (line.find(" <title>") == 0) |
140 |
|
|
{ |
141 |
|
|
unsigned start = line.find("<title>") + 7; |
142 |
|
|
unsigned finish = line.find("</title>", start); |
143 |
|
|
|
144 |
|
|
string title = line.substr(start, finish - start); |
145 |
|
|
|
146 |
|
|
while (finish == string::npos) |
147 |
|
|
{ |
148 |
|
|
getline(is, line); |
149 |
|
|
finish = line.find("</title>"); |
150 |
|
|
title += '\n' + line.substr(0, finish - 0); |
151 |
|
|
} |
152 |
|
|
|
153 |
|
|
entities(title, "<", '<'); |
154 |
|
|
entities(title, ">", '>'); |
155 |
|
|
entities(title, "&", '&'); |
156 |
|
|
|
157 |
|
|
data.setTitle(title); |
158 |
|
|
} |
159 |
|
|
else if (line.find(" <description>") == 0) |
160 |
|
|
{ |
161 |
|
|
unsigned start = line.find("<description>") + 13; |
162 |
|
|
unsigned finish = line.find("</description>", start); |
163 |
|
|
|
164 |
|
|
string description = line.substr(start, finish - start); |
165 |
|
|
|
166 |
|
|
entities(description, "<", '<'); |
167 |
|
|
entities(description, ">", '>'); |
168 |
|
|
entities(description, "&", '&'); |
169 |
|
|
|
170 |
|
|
data.setDescription(description); |
171 |
|
|
} |
172 |
|
|
else if (line.find(" <text>") == 0) |
173 |
|
|
{ |
174 |
|
|
unsigned start = line.find("<text>") + 6; |
175 |
|
|
unsigned finish = line.find("</text>", start); |
176 |
|
|
|
177 |
|
|
string text = line.substr(start, finish - start); |
178 |
|
|
|
179 |
|
|
while (finish == string::npos) |
180 |
|
|
{ |
181 |
|
|
getline(is, line); |
182 |
|
|
finish = line.find("</text>"); |
183 |
|
|
text += '\n' + line.substr(0, finish - 0); |
184 |
|
|
} |
185 |
|
|
|
186 |
|
|
entities(text, "<", '<'); |
187 |
|
|
entities(text, ">", '>'); |
188 |
|
|
entities(text, "&", '&'); |
189 |
|
|
|
190 |
|
|
data.setText(text); |
191 |
|
|
} |
192 |
|
|
else if (line.find(" <heading>") == 0) |
193 |
|
|
{ |
194 |
|
|
unsigned start = line.find("<heading>") + 9; |
195 |
|
|
unsigned finish = line.find("</heading>", start); |
196 |
|
|
|
197 |
|
|
string heading = line.substr(start, finish - start); |
198 |
|
|
|
199 |
|
|
while (finish == string::npos) |
200 |
|
|
{ |
201 |
|
|
getline(is, line); |
202 |
|
|
finish = line.find("</heading>"); |
203 |
|
|
heading += line.substr(0, finish - 0); |
204 |
|
|
} |
205 |
|
|
|
206 |
|
|
entities(heading, "<", '<'); |
207 |
|
|
entities(heading, ">", '>'); |
208 |
|
|
entities(heading, "&", '&'); |
209 |
|
|
|
210 |
|
|
data.headings.push_back(heading); |
211 |
|
|
} |
212 |
|
|
} |
213 |
|
|
while (line != " </page>"); |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
return is; |
217 |
|
|
} |
218 |
|
|
|
219 |
|
|
ostream& operator<<(ostream& os, Page& data) |
220 |
|
|
{ |
221 |
Douglas Thrift |
355 |
string address(data.getAddress()); |
222 |
douglas |
1 |
|
223 |
|
|
entities(address, '&', "&"); |
224 |
|
|
entities(address, '<', "<"); |
225 |
|
|
entities(address, '>', ">"); |
226 |
|
|
|
227 |
Douglas Thrift |
355 |
os << "\t<page>\n" << "\t\t<address>" << address << "</address>\n"; |
228 |
douglas |
1 |
|
229 |
douglas |
199 |
#ifndef _OpenSSL_ |
230 |
douglas |
1 |
if (data.getPort() != 80) |
231 |
douglas |
199 |
#else |
232 |
douglas |
311 |
if ((data.getPort() != 80 && !data.getTls()) || (data.getPort() != 443 && |
233 |
|
|
data.getTls())) |
234 |
douglas |
199 |
#endif |
235 |
douglas |
1 |
{ |
236 |
Douglas Thrift |
355 |
os << "\t\t<port>" << data.getPort() << "</port>\n"; |
237 |
douglas |
1 |
} |
238 |
|
|
|
239 |
douglas |
199 |
#ifdef _OpenSSL_ |
240 |
|
|
os.setf(ios::boolalpha); |
241 |
Douglas Thrift |
355 |
|
242 |
|
|
os << "\t\t<tls>" << data.getTls() << "</tls>\n"; |
243 |
douglas |
199 |
#endif |
244 |
|
|
|
245 |
Douglas Thrift |
355 |
string path(data.getPath()); |
246 |
douglas |
1 |
|
247 |
|
|
entities(path, '&', "&"); |
248 |
|
|
entities(path, '<', "<"); |
249 |
|
|
entities(path, '>', ">"); |
250 |
|
|
|
251 |
Douglas Thrift |
355 |
os << "\t\t<path>" << path << "</path>\n\t\t<size>" << data.getSize() |
252 |
|
|
<< "</size>\n"; |
253 |
douglas |
1 |
|
254 |
Douglas Thrift |
355 |
if(!data.getTitle().empty()) |
255 |
douglas |
1 |
{ |
256 |
Douglas Thrift |
355 |
string title(data.getTitle()); |
257 |
douglas |
1 |
|
258 |
|
|
entities(title, '&', "&"); |
259 |
|
|
entities(title, '<', "<"); |
260 |
|
|
entities(title, '>', ">"); |
261 |
|
|
|
262 |
Douglas Thrift |
355 |
os << "\t\t<title>" << title << "</title>\n"; |
263 |
douglas |
1 |
} |
264 |
|
|
|
265 |
Douglas Thrift |
355 |
if(!data.getDescription().empty()) |
266 |
douglas |
1 |
{ |
267 |
Douglas Thrift |
355 |
string description(data.getDescription()); |
268 |
douglas |
1 |
|
269 |
|
|
entities(description, '&', "&"); |
270 |
|
|
entities(description, '<', "<"); |
271 |
|
|
entities(description, '>', ">"); |
272 |
|
|
|
273 |
Douglas Thrift |
355 |
os << "\t\t<description>" << description << "</description>\n"; |
274 |
douglas |
1 |
} |
275 |
|
|
|
276 |
Douglas Thrift |
355 |
string text(data.getText()); |
277 |
douglas |
1 |
|
278 |
|
|
entities(text, '&', "&"); |
279 |
|
|
entities(text, '<', "<"); |
280 |
|
|
entities(text, '>', ">"); |
281 |
|
|
|
282 |
Douglas Thrift |
355 |
os << "\t\t<text>" << text << "</text>\n"; |
283 |
douglas |
1 |
|
284 |
Douglas Thrift |
348 |
for (size_t index(0); index < data.getHeadings().size(); index++) |
285 |
douglas |
1 |
{ |
286 |
Douglas Thrift |
355 |
string heading(data.getHeadings()[index]); |
287 |
douglas |
1 |
|
288 |
|
|
entities(heading, '&', "&"); |
289 |
|
|
entities(heading, '<', "<"); |
290 |
|
|
entities(heading, '>', ">"); |
291 |
|
|
|
292 |
Douglas Thrift |
355 |
os << "\t\t<heading>" << heading << "</heading>\n"; |
293 |
douglas |
1 |
} |
294 |
|
|
|
295 |
Douglas Thrift |
355 |
os << "\t</page>"; |
296 |
douglas |
1 |
|
297 |
|
|
return os; |
298 |
|
|
} |