/* ============================================================================
 * Douglas Thrift's Search Engine License
 *
 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * 3. The end-user documentation included with the redistribution, if any, must
 *    include the following acknowledgment:
 *
 *       "This product includes software developed by Douglas Thrift
 *       (http://computers.douglasthrift.net/searchengine/)."
 *
 *    Alternately, this acknowledgment may appear in the software itself, if
 *    and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
 *    be used to endorse or promote products derived from this software without
 *    specific prior written permission.  For written permission, please visit
 *    http://www.douglasthrift.net/contact.cgi for contact information.
 *
 * 5. Products derived from this software may not be called "Douglas Thrift's
 *    Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
 *    name, without prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * ============================================================================
 */
// Douglas Thrift's Search Engine URL
//
// Douglas Thrift
//
// URL.cpp

#include "URL.h"

URL::URL(const string& url)
{
	setURL(url);
}

URL::URL(const string& address, unsigned port, const string& path)
{
	this->address = address;
	this->port = port;
	this->path = path;
}

string URL::getURL()
{
	string url = "http://" + address;

	if (port != 80)
	{
		char* cport = new char[1024];

		sprintf(cport, "%u", port);

		url += string(":") + cport;

		delete [] cport;
	}

	url += path;

	return url;
}

void URL::setURL(const URL& url)
{
	this->address = url.address;
	this->port = url.port;
	this->path = url.path;
}

void URL::setURL(const string& url)
{
	if (url.find("http://") || url.length() <= 7)
	{
		cerr << program << ": Malformed URL: " << url << "\n";
		exit(1);
	}

	int begin = 7;
	int colon = url.find(':', begin);
	int end = url.find('/', begin);

	if (colon != string::npos && colon < end)
	{
		address = url.substr(begin, colon - begin);
		port = strtoul(url.substr(colon + 1, end - colon - 1).c_str(), 0, 0);
	}
	else
	{
		address = url.substr(begin, end - begin);
		port = 80;
	}

	if (end == string::npos)
	{
		path = "/";
	}
	else
	{
		path = url.substr(end);
	}
}

void URL::setAddress(const string& address)
{
	this->address = address;
}

void URL::setPort(unsigned port)
{
	this->port = port;
}

void URL::setPath(const string& path)
{
	if (path.find('/') != 0)
	{
		this->path = "/" + path;
	}
	else
	{
		this->path = path;
	}
}

ostream& operator<<(ostream& os, URL& data)
{
	os << data.getURL();

	return os;
}

string getLink(string link, URL& url)
{
	string hyperlink = "";

	if (link.find('#') != string::npos)
	{
		unsigned pound = link.find('#');
		link.erase(pound);
	}

	if (link.find("://") != string::npos)
	{
		if (link.find("http://") == 0 && link.length() > 7) hyperlink = link;
	}
	else if (link.find("mailto:") == 0)
	{
		// do nothing we are not evil spammers!
	}
	else if (link.find("news:") == 0)
	{
		// do nothing this isn't Google Groups
	}
	else if (link.find("//") == 0)
	{
		hyperlink = "http:" + link;
	}
	else if (link.find('/') == 0)
	{
		hyperlink = url.getURL();

		unsigned path = hyperlink.find('/', 7);
		hyperlink.erase(path);

		hyperlink += link;
	}
	else if (link == "")
	{
		// a blank link is useless
	}
	else
	{
		hyperlink = url.getURL();
		string path = url.getPath();

		unsigned cutoff = hyperlink.rfind(path);
		hyperlink.erase(cutoff);

		unsigned dir = path.rfind('/') + 1;
		path.erase(dir);

		while (link.find("../") == 0)
		{
			unsigned dot = path.rfind('/') - 1;
			unsigned up = path.rfind('/', dot) + 1;

			path.erase(up);
			link.erase(0, 3);
		}
		while (link.find("./") == 0)
		{
			link.erase(0, 2);
		}

		hyperlink += path + link;
	}

	return hyperlink;
}