ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/URL.cpp
Revision: 334
Committed: 2004-04-05T16:37:41-07:00 (21 years, 2 months ago) by Douglas Thrift
File size: 6116 byte(s)
Log Message:
Ah, I just love Subversion!

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 312 * Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine URL
46     //
47     // Douglas Thrift
48     //
49 Douglas Thrift 331 // $Id$
50 douglas 1
51 Douglas Thrift 334 #include "URL.hpp"
52 douglas 1
53     URL::URL(const string& url)
54     {
55     setURL(url);
56     }
57    
58     URL::URL(const string& address, unsigned port, const string& path)
59     {
60     this->address = address;
61     this->port = port;
62     this->path = path;
63 douglas 195 #ifdef _OpenSSL_
64     tls = false;
65     #endif
66 douglas 1 }
67    
68 douglas 195 #ifdef _OpenSSL_
69     URL::URL(const string& address, unsigned port, const string& path, bool tls)
70     {
71     this->address = address;
72     this->port = port;
73     this->path = path;
74     this->tls = tls;
75     }
76     #endif
77    
78 douglas 1 string URL::getURL()
79     {
80 douglas 195 ostringstream url;
81     #ifndef _OpenSSL_
82     url << "http://" << address;
83 douglas 1
84     if (port != 80)
85 douglas 195 #else
86     url << (tls ? "https://" : "http://") << address;
87    
88     if (port != 80 && !tls || port != 443 && tls)
89     #endif
90 douglas 1 {
91 douglas 195 url << ":" << port;
92 douglas 1 }
93    
94 douglas 195 url << path;
95 douglas 1
96 douglas 195 return url.str();
97 douglas 1 }
98    
99     void URL::setURL(const URL& url)
100     {
101     this->address = url.address;
102     this->port = url.port;
103     this->path = url.path;
104 douglas 195 #ifdef _OpenSSL_
105     this->tls = url.tls;
106     #endif
107 douglas 1 }
108    
109     void URL::setURL(const string& url)
110     {
111 douglas 195 #ifndef _OpenSSL_
112     if (url.find("http://") != 0 || url.length() <= 7)
113 douglas 1 {
114     cerr << program << ": Malformed URL: " << url << "\n";
115     exit(1);
116     }
117    
118 douglas 195 unsigned begin = 7;
119     #else
120     tls = false;
121 douglas 1
122 douglas 195 if (url.find("https://") == 0 && url.length() > 8)
123     {
124     tls = true;
125     }
126     else if (url.find("http://") != 0 || url.length() <= 7)
127     {
128     cerr << program << ": Malformed URL: " << url << "\n";
129     exit(1);
130     }
131    
132     unsigned begin = tls ? 8 : 7;
133     #endif
134     unsigned colon = url.find(':', begin);
135     unsigned end = url.find('/', begin);
136    
137 douglas 1 if (colon != string::npos && colon < end)
138     {
139     address = url.substr(begin, colon - begin);
140 douglas 212
141     istringstream number((url.substr(colon + 1, end - colon - 1)));
142    
143     number >> port;
144 douglas 1 }
145     else
146     {
147     address = url.substr(begin, end - begin);
148 douglas 195 #ifndef _OpenSSL_
149 douglas 1 port = 80;
150 douglas 195 #else
151     port = tls ? 443 : 80;
152     #endif
153 douglas 1 }
154    
155     if (end == string::npos)
156     {
157     path = "/";
158     }
159     else
160     {
161     path = url.substr(end);
162     }
163     }
164    
165     void URL::setPath(const string& path)
166     {
167     if (path.find('/') != 0)
168     {
169     this->path = "/" + path;
170     }
171     else
172     {
173     this->path = path;
174     }
175     }
176    
177     ostream& operator<<(ostream& os, URL& data)
178     {
179     os << data.getURL();
180    
181     return os;
182     }
183 douglas 17
184     string getLink(string link, URL& url)
185     {
186     string hyperlink = "";
187    
188     if (link.find('#') != string::npos)
189     {
190     unsigned pound = link.find('#');
191     link.erase(pound);
192     }
193    
194     if (link.find("://") != string::npos)
195     {
196 douglas 195 #ifndef _OpenSSL_
197 douglas 22 if (link.find("http://") == 0 && link.length() > 7) hyperlink = link;
198 douglas 195 #else
199     if (link.find("http://") == 0 && link.length() > 7 ||
200     link.find("https://") == 0 && link.length() > 8) hyperlink = link;
201     #endif
202 douglas 17 }
203     else if (link.find("mailto:") == 0)
204     {
205     // do nothing we are not evil spammers!
206     }
207 douglas 18 else if (link.find("news:") == 0)
208     {
209     // do nothing this isn't Google Groups
210     }
211 douglas 17 else if (link.find("//") == 0)
212     {
213 douglas 195 #ifndef _OpenSSL_
214 douglas 17 hyperlink = "http:" + link;
215 douglas 195 #else
216     hyperlink = (url.getTls() ? "https:" : "http:") + link;
217     #endif
218 douglas 17 }
219     else if (link.find('/') == 0)
220     {
221     hyperlink = url.getURL();
222    
223 douglas 195 #ifndef _OpenSSL_
224 douglas 17 unsigned path = hyperlink.find('/', 7);
225 douglas 195 #else
226     unsigned path = hyperlink.find('/', url.getTls() ? 8 : 7);
227     #endif
228 douglas 17 hyperlink.erase(path);
229    
230     hyperlink += link;
231     }
232     else if (link == "")
233     {
234     // a blank link is useless
235     }
236     else
237     {
238     hyperlink = url.getURL();
239     string path = url.getPath();
240    
241     unsigned cutoff = hyperlink.rfind(path);
242     hyperlink.erase(cutoff);
243    
244     unsigned dir = path.rfind('/') + 1;
245     path.erase(dir);
246    
247     while (link.find("../") == 0)
248     {
249     unsigned dot = path.rfind('/') - 1;
250     unsigned up = path.rfind('/', dot) + 1;
251    
252     path.erase(up);
253     link.erase(0, 3);
254     }
255     while (link.find("./") == 0)
256     {
257     link.erase(0, 2);
258     }
259    
260     hyperlink += path + link;
261     }
262    
263     return hyperlink;
264     }

Properties

Name Value
svn:eol-style native
svn:keywords Id