ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/repos/FreeBSDAdmin/Reminder/website.py
Revision: 1145
Committed: 2009-03-14T22:48:45-07:00 (16 years, 3 months ago) by douglas
Content type: text/x-python
File size: 1020 byte(s)
Log Message:
Improved handling!

File Contents

# User Rev Content
1 douglas 1131 # Website
2     #
3     # Douglas Thrift
4     #
5     # $Id$
6    
7 douglas 1142 from BeautifulSoup import MinimalSoup
8     import copy
9 douglas 1131 import mechanize
10 douglas 1142 import re
11 douglas 1131
12     # XXX: hack to make Wells Fargo http-equiv redirects actually work
13     mechanize._http.AbstractHeadParser.head_elems = tuple(list(mechanize._http.AbstractHeadParser.head_elems) + ['body'])
14 douglas 1145 mechanize._http.HTTPRedirectHandler.max_redirections = 20
15 douglas 1131
16 douglas 1142 class DuckSoup(MinimalSoup):
17     MARKUP_MASSAGE = copy.copy(MinimalSoup.MARKUP_MASSAGE)
18    
19     MARKUP_MASSAGE.append((re.compile(r'''</?[a-z]+['"]\+['"][a-z]+'''), lambda match: ''))
20    
21 douglas 1131 class Website(object):
22 douglas 1142 Soup = DuckSoup
23 douglas 1131
24     def __init__(self, debug):
25     self.browser = mechanize.Browser()
26     self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.25 Safari/525.19')]
27    
28     if debug:
29     self.browser.set_debug_http(True)
30    
31     self.browser.set_handle_equiv(True)
32     self.browser.set_handle_redirect(True)
33     self.browser.set_handle_refresh(True)
34     self.browser.set_handle_robots(False)

Properties

Name Value
svn:keywords Id