1 |
/* ============================================================================ |
2 |
* Douglas Thrift's Search Engine License |
3 |
* |
4 |
* Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved. |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions are met: |
7 |
* |
8 |
* 1. Redistributions of source code must retain the above copyright notice, |
9 |
* this list of conditions and the following disclaimer. |
10 |
* |
11 |
* 2. Redistributions in binary form must reproduce the above copyright notice, |
12 |
* this list of conditions and the following disclaimer in the documentation |
13 |
* and/or other materials provided with the distribution. |
14 |
* |
15 |
* 3. The end-user documentation included with the redistribution, if any, must |
16 |
* include the following acknowledgment: |
17 |
* |
18 |
* "This product includes software developed by Douglas Thrift |
19 |
* (http://computers.douglasthrift.net/searchengine/)." |
20 |
* |
21 |
* Alternately, this acknowledgment may appear in the software itself, if |
22 |
* and wherever such third-party acknowledgments normally appear. |
23 |
* |
24 |
* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not |
25 |
* be used to endorse or promote products derived from this software without |
26 |
* specific prior written permission. For written permission, please visit |
27 |
* http://www.douglasthrift.net/contact.cgi for contact information. |
28 |
* |
29 |
* 5. Products derived from this software may not be called "Douglas Thrift's |
30 |
* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their |
31 |
* name, without prior written permission. |
32 |
* |
33 |
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
34 |
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
35 |
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 |
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
38 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
39 |
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
40 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
41 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
42 |
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 |
* ============================================================================ |
44 |
*/ |
45 |
// Douglas Thrift's Search Engine Page |
46 |
// |
47 |
// Douglas Thrift |
48 |
// |
49 |
// $Id: Page.cpp,v 1.3 2003/07/11 07:54:47 douglas Exp $ |
50 |
|
51 |
#include "Page.h" |
52 |
|
53 |
Page::Page(string& url) : URL(url) |
54 |
{ |
55 |
size = 0; |
56 |
} |
57 |
|
58 |
void Page::setSize(unsigned size) |
59 |
{ |
60 |
this->size = size; |
61 |
} |
62 |
|
63 |
void Page::setTitle(string& title) |
64 |
{ |
65 |
this->title = title; |
66 |
} |
67 |
|
68 |
void Page::setDescription(string& description) |
69 |
{ |
70 |
this->description = description; |
71 |
} |
72 |
|
73 |
void Page::setText(string& text) |
74 |
{ |
75 |
this->text = text; |
76 |
} |
77 |
|
78 |
void Page::setHeadings(vector<string>& headings) |
79 |
{ |
80 |
this->headings = headings; |
81 |
} |
82 |
|
83 |
istream& operator>>(istream& is, Page& data) |
84 |
{ |
85 |
string line; |
86 |
getline(is, line); |
87 |
if (line == " <page>") |
88 |
{ |
89 |
do |
90 |
{ |
91 |
getline(is, line); |
92 |
|
93 |
if (line.find(" <address>") == 0) |
94 |
{ |
95 |
unsigned start = line.find("<address>") + 9; |
96 |
unsigned finish = line.find("</address>", start); |
97 |
|
98 |
string address = line.substr(start, finish - start); |
99 |
|
100 |
entities(address, "<", '<'); |
101 |
entities(address, ">", '>'); |
102 |
entities(address, "&", '&'); |
103 |
|
104 |
data.setAddress(address); |
105 |
} |
106 |
else if (line.find(" <port>") == 0) |
107 |
{ |
108 |
unsigned start = line.find("<port>") + 6; |
109 |
unsigned finish = line.find("</port>", start); |
110 |
|
111 |
data.setPort(strtoul(line.substr(start, finish - |
112 |
start).c_str(), 0, 0)); |
113 |
} |
114 |
else if (line.find(" <path>") == 0) |
115 |
{ |
116 |
unsigned start = line.find("<path>") + 6; |
117 |
unsigned finish = line.find("</path>", start); |
118 |
|
119 |
string path = line.substr(start, finish - start); |
120 |
|
121 |
entities(path, "<", '<'); |
122 |
entities(path, ">", '>'); |
123 |
entities(path, "&", '&'); |
124 |
|
125 |
data.setPath(path); |
126 |
} |
127 |
else if (line.find(" <size>") == 0) |
128 |
{ |
129 |
unsigned start = line.find("<size>") + 6; |
130 |
unsigned finish = line.find("</size>", start); |
131 |
|
132 |
data.setSize(strtoul(line.substr(start, finish - |
133 |
start).c_str(), 0, 0)); |
134 |
} |
135 |
else if (line.find(" <title>") == 0) |
136 |
{ |
137 |
unsigned start = line.find("<title>") + 7; |
138 |
unsigned finish = line.find("</title>", start); |
139 |
|
140 |
string title = line.substr(start, finish - start); |
141 |
|
142 |
while (finish == string::npos) |
143 |
{ |
144 |
getline(is, line); |
145 |
finish = line.find("</title>"); |
146 |
title += '\n' + line.substr(0, finish - 0); |
147 |
} |
148 |
|
149 |
entities(title, "<", '<'); |
150 |
entities(title, ">", '>'); |
151 |
entities(title, "&", '&'); |
152 |
|
153 |
data.setTitle(title); |
154 |
} |
155 |
else if (line.find(" <description>") == 0) |
156 |
{ |
157 |
unsigned start = line.find("<description>") + 13; |
158 |
unsigned finish = line.find("</description>", start); |
159 |
|
160 |
string description = line.substr(start, finish - start); |
161 |
|
162 |
entities(description, "<", '<'); |
163 |
entities(description, ">", '>'); |
164 |
entities(description, "&", '&'); |
165 |
|
166 |
data.setDescription(description); |
167 |
} |
168 |
else if (line.find(" <text>") == 0) |
169 |
{ |
170 |
unsigned start = line.find("<text>") + 6; |
171 |
unsigned finish = line.find("</text>", start); |
172 |
|
173 |
string text = line.substr(start, finish - start); |
174 |
|
175 |
while (finish == string::npos) |
176 |
{ |
177 |
getline(is, line); |
178 |
finish = line.find("</text>"); |
179 |
text += '\n' + line.substr(0, finish - 0); |
180 |
} |
181 |
|
182 |
entities(text, "<", '<'); |
183 |
entities(text, ">", '>'); |
184 |
entities(text, "&", '&'); |
185 |
|
186 |
data.setText(text); |
187 |
} |
188 |
else if (line.find(" <heading>") == 0) |
189 |
{ |
190 |
unsigned start = line.find("<heading>") + 9; |
191 |
unsigned finish = line.find("</heading>", start); |
192 |
|
193 |
string heading = line.substr(start, finish - start); |
194 |
|
195 |
while (finish == string::npos) |
196 |
{ |
197 |
getline(is, line); |
198 |
finish = line.find("</heading>"); |
199 |
heading += line.substr(0, finish - 0); |
200 |
} |
201 |
|
202 |
entities(heading, "<", '<'); |
203 |
entities(heading, ">", '>'); |
204 |
entities(heading, "&", '&'); |
205 |
|
206 |
data.headings.push_back(heading); |
207 |
} |
208 |
} |
209 |
while (line != " </page>"); |
210 |
} |
211 |
|
212 |
return is; |
213 |
} |
214 |
|
215 |
ostream& operator<<(ostream& os, Page& data) |
216 |
{ |
217 |
string address = data.getAddress(); |
218 |
|
219 |
entities(address, '&', "&"); |
220 |
entities(address, '<', "<"); |
221 |
entities(address, '>', ">"); |
222 |
|
223 |
os << " <page>\n" << " <address>" << address << "</address>\n"; |
224 |
|
225 |
if (data.getPort() != 80) |
226 |
{ |
227 |
os << " <port>" << data.getPort() << "</port>\n"; |
228 |
} |
229 |
|
230 |
string path = data.getPath(); |
231 |
|
232 |
entities(path, '&', "&"); |
233 |
entities(path, '<', "<"); |
234 |
entities(path, '>', ">"); |
235 |
|
236 |
os << " <path>" << path << "</path>\n"; |
237 |
|
238 |
os << " <size>" << data.getSize() << "</size>\n"; |
239 |
|
240 |
if(data.getTitle() != "") |
241 |
{ |
242 |
string title = data.getTitle(); |
243 |
|
244 |
entities(title, '&', "&"); |
245 |
entities(title, '<', "<"); |
246 |
entities(title, '>', ">"); |
247 |
|
248 |
os << " <title>" << title << "</title>\n"; |
249 |
} |
250 |
|
251 |
if(data.getDescription() != "") |
252 |
{ |
253 |
string description = data.getDescription(); |
254 |
|
255 |
entities(description, '&', "&"); |
256 |
entities(description, '<', "<"); |
257 |
entities(description, '>', ">"); |
258 |
|
259 |
os << " <description>" << description << "</description>\n"; |
260 |
} |
261 |
|
262 |
string text = data.getText(); |
263 |
|
264 |
entities(text, '&', "&"); |
265 |
entities(text, '<', "<"); |
266 |
entities(text, '>', ">"); |
267 |
|
268 |
os << " <text>" << text << "</text>\n"; |
269 |
|
270 |
for (int index = 0; index < data.getHeadings().size(); index++) |
271 |
{ |
272 |
string heading = data.getHeadings()[index]; |
273 |
|
274 |
entities(heading, '&', "&"); |
275 |
entities(heading, '<', "<"); |
276 |
entities(heading, '>', ">"); |
277 |
|
278 |
os << " <heading>" << heading << "</heading>\n"; |
279 |
} |
280 |
|
281 |
os << " </page>"; |
282 |
|
283 |
return os; |
284 |
} |