trunk/Search/Processor.cpp

/* ============================================================================
 * Douglas Thrift's Search Engine License
 *
 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * 3. The end-user documentation included with the redistribution, if any, must
 *    include the following acknowledgment:
 *
 *       "This product includes software developed by Douglas Thrift
 *       (http://computers.douglasthrift.net/searchengine/)."
 *
 *    Alternately, this acknowledgment may appear in the software itself, if
 *    and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
 *    be used to endorse or promote products derived from this software without
 *    specific prior written permission.  For written permission, please visit
 *    http://www.douglasthrift.net/contact.cgi for contact information.
 *
 * 5. Products derived from this software may not be called "Douglas Thrift's
 *    Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
 *    name, without prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * ============================================================================
 */
// Douglas Thrift's Search Engine Processor
//
// Douglas Thrift
//
// Processor.cpp

#include "Processor.h"

Processor::Processor()
{
        page = new Page();
}

Processor::~Processor()
{
        delete page;
}

bool Processor::process(HttpHandler& http, URL& url)
{
        string title, description, text;
        vector<string> headings;

        if (http.contentType().find("text/html") == 0)
        {
                if (!process(http, url, title, description, text, headings)) return
                        false;

                entities(title, "&nbsp;", ' ');
                entities(title, "&lt;", '<');
                entities(title, "&gt;", '>');
                entities(title, "&quot;", '\"');
                entities(title, "&amp;", '&');

                entities(description, "&nbsp;", ' ');
                entities(description, "&lt;", '<');
                entities(description, "&gt;", '>');
                entities(description, "&quot;", '\"');
                entities(description, "&amp;", '&');

                entities(text, "&nbsp;", ' ');
                entities(text, "&lt;", '<');
                entities(text, "&gt;", '>');
                entities(text, "&quot;", '\"');
                entities(text, "&amp;", '&');

                for (int index = 0; index < headings.size(); index++)
                {
                        entities(headings[index], "&nbsp;", ' ');
                        entities(headings[index], "&lt;", '<');
                        entities(headings[index], "&gt;", '>');
                        entities(headings[index], "&quot;", '\"');
                        entities(headings[index], "&amp;", '&');
                }

                normalize(title);
                normalize(description);
                normalize(text);
                for (int index0 = 0; index0 < headings.size(); index0++)
                {
                        normalize(headings[index0]);
                }
        }
        else
        {
//              bool knowSize = page->getSize() > 0;

                string line;
                while (http.good())
                {
                        http.getline(line);

                        text += line + "\n";

//                      if (!knowSize) page->setSize(page->getSize() + line.length() + 1);
                }

                normalize(text);
        }

        page->setSize(http.contentLength());
        page->setURL(url);
        page->setTitle(title);
        page->setDescription(description);
        page->setText(text);
        page->setHeadings(headings);

        return true;
}

void Processor::reset()
{
        links.clear();
        delete page;
        page = new Page();
}

bool Processor::process(HttpHandler& http, URL& url, string& title, string&
        description, string& text, vector<string>& headings)
{
        bool inHtml = false, inHead = false, inTitle = false, inBody = false,
                inHeading = false, inComment = false, /*knowSize = page->getSize() > 0,
*/              follow = true, answer = true;
        unsigned startComment = 0, finishComment = 0;
        string line;
        while (http.good())
        {
                http.getline(line);
                string heading;

                unsigned begin = 0;
                while (begin < line.length())
                {
                        unsigned open = line.find('<', begin);
                        unsigned close = line.find('>', begin);

                        string next;
                        while (close == string::npos)
                        {
                                http.getline(next);
                                line += '\n' + next;
                                close = line.find('>', begin);
                        }

                        // strangely this is necessary sometimes
                        if (open == string::npos) open = line.find('<', begin);

                        string between = line.substr(begin, open - begin);
                        string tag = getTag(line, open, close);
                        string lowerTag(tag.length(), ' ');

                        for (unsigned index = 0; index < tag.length(); index++)
                        {
                                lowerTag[index] = tolower(tag[index]);
                        }

                        if (inHtml && !inComment)
                        {
                                if (inHead && inTitle)
                                {
                                        title += between + "\n";
                                }

                                if (inBody)
                                {
                                        text += between + "\n";
                                }

                                if (inBody && inHeading)
                                {
                                        heading += between + "\n";
                                }
                                if (((lowerTag.find("meta ") == 0) || (lowerTag.find("meta\n")
                                        == 0) || (lowerTag.find("meta   ") == 0)) && inHead)
                                {
                                        if (lowerTag.find("name=robots") != string::npos ||
                                                lowerTag.find("name=\"robots\"") != string::npos)
                                        {
                                                unsigned start = lowerTag.find("content=\"") + 9;
                                                unsigned finish = lowerTag.find('\"', start);

                                                string robots = lowerTag.substr(start, finish - start);

                                                if ((robots.find("noindex") != string::npos &&
                                                        robots.find("nofollow") != string::npos) ||
                                                        robots.find("none") != string::npos)
                                                {
                                                        answer = false;
                                                        follow = false;
                                                        links.clear();

                                                        return answer;
                                                }
                                                else if (robots.find("noindex") != string::npos)
                                                {
                                                        answer = false;
                                                }
                                                else if (robots.find("nofollow") != string::npos)
                                                {
                                                        follow = false;
                                                        links.clear();
                                                }
                                        }
                                        else if (lowerTag.find("name=description") != string::npos
                                                || lowerTag.find("name=\"description\"") !=
                                                string::npos)
                                        {
                                                unsigned start = lowerTag.find("content=\"") + 9;
                                                unsigned finish = lowerTag.find('\"', start);

                                                description = tag.substr(start, finish - start);
                                        }
                                }

                                if (((lowerTag.find("a ") == 0) || (lowerTag.find("a\n") == 0)
                                        || (lowerTag.find("a    ") == 0)) && inBody && follow)
                                {
                                        if (lowerTag.find("href=\"") != string::npos)
                                        {
                                                unsigned start = lowerTag.find("href=\"") + 6;
                                                unsigned finish = lowerTag.find('\"', start);

                                                string link = getLink(tag.substr(start, finish -
                                                        start), url);

                                                if (link != "") links.insert(link);
                                        }
                                        else if (lowerTag.find("href=") != string::npos)
                                        {
                                                unsigned start = lowerTag.find("href=") + 5;
                                                unsigned finish = lowerTag.find(' ', start);

                                                if (finish < close)
                                                {
                                                        string link = getLink(tag.substr(start, finish -
                                                                start), url);

                                                        if (link != "") links.insert(link);
                                                }
                                                else
                                                {
                                                        string link = getLink(tag.substr(start, close -
                                                                start), url);

                                                        if (link != "") links.insert(link);
                                                }
                                        }
                                }

                                if ((lowerTag.find("img ") == 0) || (lowerTag.find("img\n") ==
                                        0) || (lowerTag.find("img       ")) && inBody)
                                {
                                        if (lowerTag.find("alt=\"") != string::npos)
                                        {
                                                unsigned start = lowerTag.find("alt=\"") + 5;
                                                unsigned finish = lowerTag.find('\"', start);

                                                text += tag.substr(start, finish - start) + ' ';
                                                if (inHeading) heading += tag.substr(start, finish -
                                                        start) + ' ';
                                        }
                                        else if (lowerTag.find("alt=") != string::npos)
                                        {
                                                unsigned start = lowerTag.find("alt=") + 4;
                                                unsigned finish = lowerTag.find(' ', start);

                                                if (finish < close)
                                                {
                                                        text += tag.substr(start, finish - start) + ' ';
                                                        if (inHeading) heading += tag.substr(start, finish
                                                                - start) + ' ';
                                                }
                                                else
                                                {
                                                        text += tag.substr(start, close - start) + ' ';
                                                        if (inHeading) heading += tag.substr(start, close -
                                                                start) + ' ';
                                                }
                                        }
                                }
                        }

                        if (lowerTag.find("html") == 0) inHtml = true;
                        if (lowerTag.find("/html") == 0) inHtml = false;

                        if (lowerTag.find("head") == 0) inHead = true;
                        if (lowerTag.find("/head") == 0) inHead = false;

                        if (lowerTag.find("title") == 0) inTitle = true;
                        if (lowerTag.find("/title") == 0) inTitle = false;

                        if (lowerTag.find("body") == 0 || lowerTag.find("noframes") == 0)
                                inBody = true;
                        if (lowerTag.find("/body") == 0 || lowerTag.find("/noframes") == 0)
                                inBody = false;

                        if (lowerTag.find("h1") == 0 || lowerTag.find("h2") == 0 ||
                                lowerTag.find("h3") == 0 || lowerTag.find("h4") == 0 ||
                                lowerTag.find("h5") == 0 || lowerTag.find("h6") == 0)
                                inHeading = true;
                        if (lowerTag.find("/h1") == 0 || lowerTag.find("/h2") == 0 ||
                                lowerTag.find("/h3") == 0 || lowerTag.find("/h4") == 0 ||
                                lowerTag.find("/h5") == 0 || lowerTag.find("/h6") == 0)
                        {
                                if (heading != "") headings.push_back(heading);
                                inHeading = false;
                        }

                        if (lowerTag.find("!--") == 0)
                        {
                                startComment = open;
                                inComment = true;
                        }
                        if (line.find("-->", begin) >= startComment && line.find("-->",
                                begin) != string::npos)
                        {
                                finishComment = line.find("-->", begin) + 3;
                                inComment = false;
                        }

                        if (close == string::npos)
                        {
                                begin = close;
                        }
                        else
                        {
                                begin = close + 1;
                        }
                }

                startComment = 0;
                finishComment = 0;

//              if (!knowSize) page->setSize(page->getSize() + line.length() + 1);
        }

        return answer;
}

/*bool Processor::html(HttpHandler& http)
{
        bool answer = false;

        string line;
        http.getline(line);

        while (http.good())
        {
                string field;
                http.getline(field, ' ');
                if (field == "") break;
                http.getline(line);

                if (field == "Content-Type:" || field == "Content-type:")
                {
                        if (line.find("text/html") != string::npos)
                        {
                                answer = true;
                        }
                }

                if (field == "Content-Length:" || field == "Content-length:")
                {
                        page->setSize(strtoul(line.c_str(), 0, 0));
                }
        }

        return answer;
}*/

string Processor::getTag(const string& line, unsigned open, unsigned close)
{
        string tag = line.substr(open + 1, close - open - 1);

        return tag;
}
Revision:	17
Committed:	2002-12-09T18:31:11-08:00 (22 years, 6 months ago) by douglas
File size:	10914 byte(s)
Log Message:	Started reimplementing HttpHandler, moved Processor.getLink() to be global and included in URL.h and implemented in URL.cpp.
#	Content
1	/* ============================================================================
2	* Douglas Thrift's Search Engine License
3	*
4	* Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright notice,
9	* this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright notice,
12	* this list of conditions and the following disclaimer in the documentation
13	* and/or other materials provided with the distribution.
14	*
15	* 3. The end-user documentation included with the redistribution, if any, must
16	* include the following acknowledgment:
17	*
18	* "This product includes software developed by Douglas Thrift
19	* (http://computers.douglasthrift.net/searchengine/)."
20	*
21	* Alternately, this acknowledgment may appear in the software itself, if
22	* and wherever such third-party acknowledgments normally appear.
23	*
24	* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25	* be used to endorse or promote products derived from this software without
26	* specific prior written permission. For written permission, please visit
27	* http://www.douglasthrift.net/contact.cgi for contact information.
28	*
29	* 5. Products derived from this software may not be called "Douglas Thrift's
30	* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31	* name, without prior written permission.
32	*
33	* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35	* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39	* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42	* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43	* ============================================================================
44	*/
45	// Douglas Thrift's Search Engine Processor
46	//
47	// Douglas Thrift
48	//
49	// Processor.cpp
50
51	#include "Processor.h"
52
53	Processor::Processor()
54	{
55	page = new Page();
56	}
57
58	Processor::~Processor()
59	{
60	delete page;
61	}
62
63	bool Processor::process(HttpHandler& http, URL& url)
64	{
65	string title, description, text;
66	vector<string> headings;
67
68	if (http.contentType().find("text/html") == 0)
69	{
70	if (!process(http, url, title, description, text, headings)) return
71	false;
72
73	entities(title, " ", ' ');
74	entities(title, "<", '<');
75	entities(title, ">", '>');
76	entities(title, """, '\"');
77	entities(title, "&", '&');
78
79	entities(description, " ", ' ');
80	entities(description, "<", '<');
81	entities(description, ">", '>');
82	entities(description, """, '\"');
83	entities(description, "&", '&');
84
85	entities(text, " ", ' ');
86	entities(text, "<", '<');
87	entities(text, ">", '>');
88	entities(text, """, '\"');
89	entities(text, "&", '&');
90
91	for (int index = 0; index < headings.size(); index++)
92	{
93	entities(headings[index], " ", ' ');
94	entities(headings[index], "<", '<');
95	entities(headings[index], ">", '>');
96	entities(headings[index], """, '\"');
97	entities(headings[index], "&", '&');
98	}
99
100	normalize(title);
101	normalize(description);
102	normalize(text);
103	for (int index0 = 0; index0 < headings.size(); index0++)
104	{
105	normalize(headings[index0]);
106	}
107	}
108	else
109	{
110	// bool knowSize = page->getSize() > 0;
111
112	string line;
113	while (http.good())
114	{
115	http.getline(line);
116
117	text += line + "\n";
118
119	// if (!knowSize) page->setSize(page->getSize() + line.length() + 1);
120	}
121
122	normalize(text);
123	}
124
125	page->setSize(http.contentLength());
126	page->setURL(url);
127	page->setTitle(title);
128	page->setDescription(description);
129	page->setText(text);
130	page->setHeadings(headings);
131
132	return true;
133	}
134
135	void Processor::reset()
136	{
137	links.clear();
138	delete page;
139	page = new Page();
140	}
141
142	bool Processor::process(HttpHandler& http, URL& url, string& title, string&
143	description, string& text, vector<string>& headings)
144	{
145	bool inHtml = false, inHead = false, inTitle = false, inBody = false,
146	inHeading = false, inComment = false, /*knowSize = page->getSize() > 0,
147	*/ follow = true, answer = true;
148	unsigned startComment = 0, finishComment = 0;
149	string line;
150	while (http.good())
151	{
152	http.getline(line);
153	string heading;
154
155	unsigned begin = 0;
156	while (begin < line.length())
157	{
158	unsigned open = line.find('<', begin);
159	unsigned close = line.find('>', begin);
160
161	string next;
162	while (close == string::npos)
163	{
164	http.getline(next);
165	line += '\n' + next;
166	close = line.find('>', begin);
167	}
168
169	// strangely this is necessary sometimes
170	if (open == string::npos) open = line.find('<', begin);
171
172	string between = line.substr(begin, open - begin);
173	string tag = getTag(line, open, close);
174	string lowerTag(tag.length(), ' ');
175
176	for (unsigned index = 0; index < tag.length(); index++)
177	{
178	lowerTag[index] = tolower(tag[index]);
179	}
180
181	if (inHtml && !inComment)
182	{
183	if (inHead && inTitle)
184	{
185	title += between + "\n";
186	}
187
188	if (inBody)
189	{
190	text += between + "\n";
191	}
192
193	if (inBody && inHeading)
194	{
195	heading += between + "\n";
196	}
197	if (((lowerTag.find("meta ") == 0) \|\| (lowerTag.find("meta\n")
198	== 0) \|\| (lowerTag.find("meta ") == 0)) && inHead)
199	{
200	if (lowerTag.find("name=robots") != string::npos \|\|
201	lowerTag.find("name=\"robots\"") != string::npos)
202	{
203	unsigned start = lowerTag.find("content=\"") + 9;
204	unsigned finish = lowerTag.find('\"', start);
205
206	string robots = lowerTag.substr(start, finish - start);
207
208	if ((robots.find("noindex") != string::npos &&
209	robots.find("nofollow") != string::npos) \|\|
210	robots.find("none") != string::npos)
211	{
212	answer = false;
213	follow = false;
214	links.clear();
215
216	return answer;
217	}
218	else if (robots.find("noindex") != string::npos)
219	{
220	answer = false;
221	}
222	else if (robots.find("nofollow") != string::npos)
223	{
224	follow = false;
225	links.clear();
226	}
227	}
228	else if (lowerTag.find("name=description") != string::npos
229	\|\| lowerTag.find("name=\"description\"") !=
230	string::npos)
231	{
232	unsigned start = lowerTag.find("content=\"") + 9;
233	unsigned finish = lowerTag.find('\"', start);
234
235	description = tag.substr(start, finish - start);
236	}
237	}
238
239	if (((lowerTag.find("a ") == 0) \|\| (lowerTag.find("a\n") == 0)
240	\|\| (lowerTag.find("a ") == 0)) && inBody && follow)
241	{
242	if (lowerTag.find("href=\"") != string::npos)
243	{
244	unsigned start = lowerTag.find("href=\"") + 6;
245	unsigned finish = lowerTag.find('\"', start);
246
247	string link = getLink(tag.substr(start, finish -
248	start), url);
249
250	if (link != "") links.insert(link);
251	}
252	else if (lowerTag.find("href=") != string::npos)
253	{
254	unsigned start = lowerTag.find("href=") + 5;
255	unsigned finish = lowerTag.find(' ', start);
256
257	if (finish < close)
258	{
259	string link = getLink(tag.substr(start, finish -
260	start), url);
261
262	if (link != "") links.insert(link);
263	}
264	else
265	{
266	string link = getLink(tag.substr(start, close -
267	start), url);
268
269	if (link != "") links.insert(link);
270	}
271	}
272	}
273
274	if ((lowerTag.find("img ") == 0) \|\| (lowerTag.find("img\n") ==
275	0) \|\| (lowerTag.find("img ")) && inBody)
276	{
277	if (lowerTag.find("alt=\"") != string::npos)
278	{
279	unsigned start = lowerTag.find("alt=\"") + 5;
280	unsigned finish = lowerTag.find('\"', start);
281
282	text += tag.substr(start, finish - start) + ' ';
283	if (inHeading) heading += tag.substr(start, finish -
284	start) + ' ';
285	}
286	else if (lowerTag.find("alt=") != string::npos)
287	{
288	unsigned start = lowerTag.find("alt=") + 4;
289	unsigned finish = lowerTag.find(' ', start);
290
291	if (finish < close)
292	{
293	text += tag.substr(start, finish - start) + ' ';
294	if (inHeading) heading += tag.substr(start, finish
295	- start) + ' ';
296	}
297	else
298	{
299	text += tag.substr(start, close - start) + ' ';
300	if (inHeading) heading += tag.substr(start, close -
301	start) + ' ';
302	}
303	}
304	}
305	}
306
307	if (lowerTag.find("html") == 0) inHtml = true;
308	if (lowerTag.find("/html") == 0) inHtml = false;
309
310	if (lowerTag.find("head") == 0) inHead = true;
311	if (lowerTag.find("/head") == 0) inHead = false;
312
313	if (lowerTag.find("title") == 0) inTitle = true;
314	if (lowerTag.find("/title") == 0) inTitle = false;
315
316	if (lowerTag.find("body") == 0 \|\| lowerTag.find("noframes") == 0)
317	inBody = true;
318	if (lowerTag.find("/body") == 0 \|\| lowerTag.find("/noframes") == 0)
319	inBody = false;
320
321	if (lowerTag.find("h1") == 0 \|\| lowerTag.find("h2") == 0 \|\|
322	lowerTag.find("h3") == 0 \|\| lowerTag.find("h4") == 0 \|\|
323	lowerTag.find("h5") == 0 \|\| lowerTag.find("h6") == 0)
324	inHeading = true;
325	if (lowerTag.find("/h1") == 0 \|\| lowerTag.find("/h2") == 0 \|\|
326	lowerTag.find("/h3") == 0 \|\| lowerTag.find("/h4") == 0 \|\|
327	lowerTag.find("/h5") == 0 \|\| lowerTag.find("/h6") == 0)
328	{
329	if (heading != "") headings.push_back(heading);
330	inHeading = false;
331	}
332
333	if (lowerTag.find("!--") == 0)
334	{
335	startComment = open;
336	inComment = true;
337	}
338	if (line.find("-->", begin) >= startComment && line.find("-->",
339	begin) != string::npos)
340	{
341	finishComment = line.find("-->", begin) + 3;
342	inComment = false;
343	}
344
345	if (close == string::npos)
346	{
347	begin = close;
348	}
349	else
350	{
351	begin = close + 1;
352	}
353	}
354
355	startComment = 0;
356	finishComment = 0;
357
358	// if (!knowSize) page->setSize(page->getSize() + line.length() + 1);
359	}
360
361	return answer;
362	}
363
364	/*bool Processor::html(HttpHandler& http)
365	{
366	bool answer = false;
367
368	string line;
369	http.getline(line);
370
371	while (http.good())
372	{
373	string field;
374	http.getline(field, ' ');
375	if (field == "") break;
376	http.getline(line);
377
378	if (field == "Content-Type:" \|\| field == "Content-type:")
379	{
380	if (line.find("text/html") != string::npos)
381	{
382	answer = true;
383	}
384	}
385
386	if (field == "Content-Length:" \|\| field == "Content-length:")
387	{
388	page->setSize(strtoul(line.c_str(), 0, 0));
389	}
390	}
391
392	return answer;
393	}*/
394
395	string Processor::getTag(const string& line, unsigned open, unsigned close)
396	{
397	string tag = line.substr(open + 1, close - open - 1);
398
399	return tag;
400	}