ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Page.cpp
Revision: 1
Committed: 2002-12-04T20:22:59-08:00 (22 years, 6 months ago) by douglas
File size: 7686 byte(s)
Log Message:
Initial revision

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Page
46     //
47     // Douglas Thrift
48     //
49     // Page.cpp
50    
51     #include "Page.h"
52    
53     Page::Page(string& url) : URL(url)
54     {
55     size = 0;
56     }
57    
58     void Page::setSize(unsigned size)
59     {
60     this->size = size;
61     }
62    
63     void Page::setTitle(string& title)
64     {
65     this->title = title;
66     }
67    
68     void Page::setDescription(string& description)
69     {
70     this->description = description;
71     }
72    
73     void Page::setText(string& text)
74     {
75     this->text = text;
76     }
77    
78     void Page::setHeadings(vector<string>& headings)
79     {
80     this->headings = headings;
81     }
82    
83     istream& operator>>(istream& is, Page& data)
84     {
85     string line;
86     getline(is, line);
87     if (line == " <page>")
88     {
89     do
90     {
91     getline(is, line);
92    
93     if (line.find(" <address>") == 0)
94     {
95     unsigned start = line.find("<address>") + 9;
96     unsigned finish = line.find("</address>", start);
97    
98     string address = line.substr(start, finish - start);
99    
100     entities(address, "&lt;", '<');
101     entities(address, "&gt;", '>');
102     entities(address, "&amp;", '&');
103    
104     data.setAddress(address);
105     }
106     else if (line.find(" <port>") == 0)
107     {
108     unsigned start = line.find("<port>") + 6;
109     unsigned finish = line.find("</port>", start);
110    
111     data.setPort(strtoul(line.substr(start, finish -
112     start).c_str(), 0, 0));
113     }
114     else if (line.find(" <path>") == 0)
115     {
116     unsigned start = line.find("<path>") + 6;
117     unsigned finish = line.find("</path>", start);
118    
119     string path = line.substr(start, finish - start);
120    
121     entities(path, "&lt;", '<');
122     entities(path, "&gt;", '>');
123     entities(path, "&amp;", '&');
124    
125     data.setPath(path);
126     }
127     else if (line.find(" <size>") == 0)
128     {
129     unsigned start = line.find("<size>") + 6;
130     unsigned finish = line.find("</size>", start);
131    
132     data.setSize(strtoul(line.substr(start, finish -
133     start).c_str(), 0, 0));
134     }
135     else if (line.find(" <title>") == 0)
136     {
137     unsigned start = line.find("<title>") + 7;
138     unsigned finish = line.find("</title>", start);
139    
140     string title = line.substr(start, finish - start);
141    
142     while (finish == string::npos)
143     {
144     getline(is, line);
145     finish = line.find("</title>");
146     title += '\n' + line.substr(0, finish - 0);
147     }
148    
149     entities(title, "&lt;", '<');
150     entities(title, "&gt;", '>');
151     entities(title, "&amp;", '&');
152    
153     data.setTitle(title);
154     }
155     else if (line.find(" <description>") == 0)
156     {
157     unsigned start = line.find("<description>") + 13;
158     unsigned finish = line.find("</description>", start);
159    
160     string description = line.substr(start, finish - start);
161    
162     entities(description, "&lt;", '<');
163     entities(description, "&gt;", '>');
164     entities(description, "&amp;", '&');
165    
166     data.setDescription(description);
167     }
168     else if (line.find(" <text>") == 0)
169     {
170     unsigned start = line.find("<text>") + 6;
171     unsigned finish = line.find("</text>", start);
172    
173     string text = line.substr(start, finish - start);
174    
175     while (finish == string::npos)
176     {
177     getline(is, line);
178     finish = line.find("</text>");
179     text += '\n' + line.substr(0, finish - 0);
180     }
181    
182     entities(text, "&lt;", '<');
183     entities(text, "&gt;", '>');
184     entities(text, "&amp;", '&');
185    
186     data.setText(text);
187     }
188     else if (line.find(" <heading>") == 0)
189     {
190     unsigned start = line.find("<heading>") + 9;
191     unsigned finish = line.find("</heading>", start);
192    
193     string heading = line.substr(start, finish - start);
194    
195     while (finish == string::npos)
196     {
197     getline(is, line);
198     finish = line.find("</heading>");
199     heading += line.substr(0, finish - 0);
200     }
201    
202     entities(heading, "&lt;", '<');
203     entities(heading, "&gt;", '>');
204     entities(heading, "&amp;", '&');
205    
206     data.headings.push_back(heading);
207     }
208     }
209     while (line != " </page>");
210     }
211    
212     return is;
213     }
214    
215     ostream& operator<<(ostream& os, Page& data)
216     {
217     string address = data.getAddress();
218    
219     entities(address, '&', "&amp;");
220     entities(address, '<', "&lt;");
221     entities(address, '>', "&gt;");
222    
223     os << " <page>\n" << " <address>" << address << "</address>\n";
224    
225     if (data.getPort() != 80)
226     {
227     os << " <port>" << data.getPort() << "</port>\n";
228     }
229    
230     string path = data.getPath();
231    
232     entities(path, '&', "&amp;");
233     entities(path, '<', "&lt;");
234     entities(path, '>', "&gt;");
235    
236     os << " <path>" << path << "</path>\n";
237    
238     os << " <size>" << data.getSize() << "</size>\n";
239    
240     if(data.getTitle() != "")
241     {
242     string title = data.getTitle();
243    
244     entities(title, '&', "&amp;");
245     entities(title, '<', "&lt;");
246     entities(title, '>', "&gt;");
247    
248     os << " <title>" << title << "</title>\n";
249     }
250    
251     if(data.getDescription() != "")
252     {
253     string description = data.getDescription();
254    
255     entities(description, '&', "&amp;");
256     entities(description, '<', "&lt;");
257     entities(description, '>', "&gt;");
258    
259     os << " <description>" << description << "</description>\n";
260     }
261    
262     string text = data.getText();
263    
264     entities(text, '&', "&amp;");
265     entities(text, '<', "&lt;");
266     entities(text, '>', "&gt;");
267    
268     os << " <text>" << text << "</text>\n";
269    
270     for (int index = 0; index < data.getHeadings().size(); index++)
271     {
272     string heading = data.getHeadings()[index];
273    
274     entities(heading, '&', "&amp;");
275     entities(heading, '<', "&lt;");
276     entities(heading, '>', "&gt;");
277    
278     os << " <heading>" << heading << "</heading>\n";
279     }
280    
281     os << " </page>";
282    
283     return os;
284     }