/* ============================================================================ * Douglas Thrift's Search Engine License * * Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The end-user documentation included with the redistribution, if any, must * include the following acknowledgment: * * "This product includes software developed by Douglas Thrift * (http://computers.douglasthrift.net/searchengine/)." * * Alternately, this acknowledgment may appear in the software itself, if * and wherever such third-party acknowledgments normally appear. * * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not * be used to endorse or promote products derived from this software without * specific prior written permission. For written permission, please visit * http://www.douglasthrift.net/contact.cgi for contact information. * * 5. Products derived from this software may not be called "Douglas Thrift's * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their * name, without prior written permission. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ============================================================================ */ // Douglas Thrift's Search Engine URL // // Douglas Thrift // // $Id: URL.cpp,v 1.9 2004/01/01 23:00:34 douglas Exp $ #include "URL.h" URL::URL(const string& url) { setURL(url); } URL::URL(const string& address, unsigned port, const string& path) { this->address = address; this->port = port; this->path = path; #ifdef _OpenSSL_ tls = false; #endif } #ifdef _OpenSSL_ URL::URL(const string& address, unsigned port, const string& path, bool tls) { this->address = address; this->port = port; this->path = path; this->tls = tls; } #endif string URL::getURL() { ostringstream url; #ifndef _OpenSSL_ url << "http://" << address; if (port != 80) #else url << (tls ? "https://" : "http://") << address; if (port != 80 && !tls || port != 443 && tls) #endif { url << ":" << port; } url << path; return url.str(); } void URL::setURL(const URL& url) { this->address = url.address; this->port = url.port; this->path = url.path; #ifdef _OpenSSL_ this->tls = url.tls; #endif } void URL::setURL(const string& url) { #ifndef _OpenSSL_ if (url.find("http://") != 0 || url.length() <= 7) { cerr << program << ": Malformed URL: " << url << "\n"; exit(1); } unsigned begin = 7; #else tls = false; if (url.find("https://") == 0 && url.length() > 8) { tls = true; } else if (url.find("http://") != 0 || url.length() <= 7) { cerr << program << ": Malformed URL: " << url << "\n"; exit(1); } unsigned begin = tls ? 8 : 7; #endif unsigned colon = url.find(':', begin); unsigned end = url.find('/', begin); if (colon != string::npos && colon < end) { address = url.substr(begin, colon - begin); istringstream number((url.substr(colon + 1, end - colon - 1))); number >> port; } else { address = url.substr(begin, end - begin); #ifndef _OpenSSL_ port = 80; #else port = tls ? 443 : 80; #endif } if (end == string::npos) { path = "/"; } else { path = url.substr(end); } } void URL::setPath(const string& path) { if (path.find('/') != 0) { this->path = "/" + path; } else { this->path = path; } } ostream& operator<<(ostream& os, URL& data) { os << data.getURL(); return os; } string getLink(string link, URL& url) { string hyperlink = ""; if (link.find('#') != string::npos) { unsigned pound = link.find('#'); link.erase(pound); } if (link.find("://") != string::npos) { #ifndef _OpenSSL_ if (link.find("http://") == 0 && link.length() > 7) hyperlink = link; #else if (link.find("http://") == 0 && link.length() > 7 || link.find("https://") == 0 && link.length() > 8) hyperlink = link; #endif } else if (link.find("mailto:") == 0) { // do nothing we are not evil spammers! } else if (link.find("news:") == 0) { // do nothing this isn't Google Groups } else if (link.find("//") == 0) { #ifndef _OpenSSL_ hyperlink = "http:" + link; #else hyperlink = (url.getTls() ? "https:" : "http:") + link; #endif } else if (link.find('/') == 0) { hyperlink = url.getURL(); #ifndef _OpenSSL_ unsigned path = hyperlink.find('/', 7); #else unsigned path = hyperlink.find('/', url.getTls() ? 8 : 7); #endif hyperlink.erase(path); hyperlink += link; } else if (link == "") { // a blank link is useless } else { hyperlink = url.getURL(); string path = url.getPath(); unsigned cutoff = hyperlink.rfind(path); hyperlink.erase(cutoff); unsigned dir = path.rfind('/') + 1; path.erase(dir); while (link.find("../") == 0) { unsigned dot = path.rfind('/') - 1; unsigned up = path.rfind('/', dot) + 1; path.erase(up); link.erase(0, 3); } while (link.find("./") == 0) { link.erase(0, 2); } hyperlink += path + link; } return hyperlink; }