ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/Indexer.h
Revision: 28
Committed: 2003-01-02T19:42:33-08:00 (22 years, 6 months ago) by douglas
Content type: text/x-c
File size: 3261 byte(s)
Log Message:
Changed Copyright notices to state 2002-2003.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine Indexer
46     //
47     // Douglas Thrift
48     //
49     // Indexer.h
50    
51     #ifndef _Indexer_h_
52     #define _Indexer_h_
53    
54     #include "Search.h"
55     #include "URL.h"
56     #include "Page.h"
57     #include "HttpHandler.h"
58     #include "Processor.h"
59    
60     typedef set<string> Set;
61     typedef set<string>::iterator SetIterator;
62    
63     class Indexer
64     {
65     private:
66     enum robot { none, version, name, all };
67     HttpHandler http;
68     Processor processor;
69     Set pages;
70     queue<URL> links;
71 douglas 25 queue<string> referers;
72 douglas 1 string indexFile;
73     Set domains;
74     Set restrictions;
75     Set checked;
76 douglas 25 void index(URL& url, ofstream& fout, const string referer = "");
77 douglas 1 bool restricted(URL& url);
78     void robots(URL& url);
79     public:
80     Indexer(string& indexFile, set<string>& domains,
81     set<string>& restrictions);
82     ~Indexer() {}
83     void index(string& begin);
84     };
85    
86     #endif // _Indexer_h_