ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/URL.cpp
Revision: 195
Committed: 2003-07-11T22:50:04-07:00 (21 years, 11 months ago) by douglas
File size: 6148 byte(s)
Log Message:
Added OpenSSL stuff to URL.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine URL
46 //
47 // Douglas Thrift
48 //
49 // $Id: URL.cpp,v 1.7 2003/07/12 05:50:04 douglas Exp $
50
51 #include "URL.h"
52
53 URL::URL(const string& url)
54 {
55 setURL(url);
56 }
57
58 URL::URL(const string& address, unsigned port, const string& path)
59 {
60 this->address = address;
61 this->port = port;
62 this->path = path;
63 #ifdef _OpenSSL_
64 tls = false;
65 #endif
66 }
67
68 #ifdef _OpenSSL_
69 URL::URL(const string& address, unsigned port, const string& path, bool tls)
70 {
71 this->address = address;
72 this->port = port;
73 this->path = path;
74 this->tls = tls;
75 }
76 #endif
77
78 string URL::getURL()
79 {
80 ostringstream url;
81 #ifndef _OpenSSL_
82 url << "http://" << address;
83
84 if (port != 80)
85 #else
86 url << (tls ? "https://" : "http://") << address;
87
88 if (port != 80 && !tls || port != 443 && tls)
89 #endif
90 {
91 url << ":" << port;
92 }
93
94 url << path;
95
96 return url.str();
97 }
98
99 void URL::setURL(const URL& url)
100 {
101 this->address = url.address;
102 this->port = url.port;
103 this->path = url.path;
104 #ifdef _OpenSSL_
105 this->tls = url.tls;
106 #endif
107 }
108
109 void URL::setURL(const string& url)
110 {
111 #ifndef _OpenSSL_
112 if (url.find("http://") != 0 || url.length() <= 7)
113 {
114 cerr << program << ": Malformed URL: " << url << "\n";
115 exit(1);
116 }
117
118 unsigned begin = 7;
119 #else
120 tls = false;
121
122 if (url.find("https://") == 0 && url.length() > 8)
123 {
124 tls = true;
125 }
126 else if (url.find("http://") != 0 || url.length() <= 7)
127 {
128 cerr << program << ": Malformed URL: " << url << "\n";
129 exit(1);
130 }
131
132 unsigned begin = tls ? 8 : 7;
133 #endif
134 unsigned colon = url.find(':', begin);
135 unsigned end = url.find('/', begin);
136
137 if (colon != string::npos && colon < end)
138 {
139 address = url.substr(begin, colon - begin);
140 port = strtoul(url.substr(colon + 1, end - colon - 1).c_str(), 0, 0);
141 }
142 else
143 {
144 address = url.substr(begin, end - begin);
145 #ifndef _OpenSSL_
146 port = 80;
147 #else
148 port = tls ? 443 : 80;
149 #endif
150 }
151
152 if (end == string::npos)
153 {
154 path = "/";
155 }
156 else
157 {
158 path = url.substr(end);
159 }
160 }
161
162 void URL::setPath(const string& path)
163 {
164 if (path.find('/') != 0)
165 {
166 this->path = "/" + path;
167 }
168 else
169 {
170 this->path = path;
171 }
172 }
173
174 ostream& operator<<(ostream& os, URL& data)
175 {
176 os << data.getURL();
177
178 return os;
179 }
180
181 string getLink(string link, URL& url)
182 {
183 string hyperlink = "";
184
185 if (link.find('#') != string::npos)
186 {
187 unsigned pound = link.find('#');
188 link.erase(pound);
189 }
190
191 if (link.find("://") != string::npos)
192 {
193 #ifndef _OpenSSL_
194 if (link.find("http://") == 0 && link.length() > 7) hyperlink = link;
195 #else
196 if (link.find("http://") == 0 && link.length() > 7 ||
197 link.find("https://") == 0 && link.length() > 8) hyperlink = link;
198 #endif
199 }
200 else if (link.find("mailto:") == 0)
201 {
202 // do nothing we are not evil spammers!
203 }
204 else if (link.find("news:") == 0)
205 {
206 // do nothing this isn't Google Groups
207 }
208 else if (link.find("//") == 0)
209 {
210 #ifndef _OpenSSL_
211 hyperlink = "http:" + link;
212 #else
213 hyperlink = (url.getTls() ? "https:" : "http:") + link;
214 #endif
215 }
216 else if (link.find('/') == 0)
217 {
218 hyperlink = url.getURL();
219
220 #ifndef _OpenSSL_
221 unsigned path = hyperlink.find('/', 7);
222 #else
223 unsigned path = hyperlink.find('/', url.getTls() ? 8 : 7);
224 #endif
225 hyperlink.erase(path);
226
227 hyperlink += link;
228 }
229 else if (link == "")
230 {
231 // a blank link is useless
232 }
233 else
234 {
235 hyperlink = url.getURL();
236 string path = url.getPath();
237
238 unsigned cutoff = hyperlink.rfind(path);
239 hyperlink.erase(cutoff);
240
241 unsigned dir = path.rfind('/') + 1;
242 path.erase(dir);
243
244 while (link.find("../") == 0)
245 {
246 unsigned dot = path.rfind('/') - 1;
247 unsigned up = path.rfind('/', dot) + 1;
248
249 path.erase(up);
250 link.erase(0, 3);
251 }
252 while (link.find("./") == 0)
253 {
254 link.erase(0, 2);
255 }
256
257 hyperlink += path + link;
258 }
259
260 return hyperlink;
261 }