4 |
|
# |
5 |
|
# $Id$ |
6 |
|
|
7 |
< |
from BeautifulSoup import BeautifulSoup |
7 |
> |
from BeautifulSoup import MinimalSoup |
8 |
> |
import copy |
9 |
|
import mechanize |
10 |
+ |
import re |
11 |
|
|
12 |
|
# XXX: hack to make Wells Fargo http-equiv redirects actually work |
13 |
|
mechanize._http.AbstractHeadParser.head_elems = tuple(list(mechanize._http.AbstractHeadParser.head_elems) + ['body']) |
14 |
|
|
15 |
+ |
class DuckSoup(MinimalSoup): |
16 |
+ |
MARKUP_MASSAGE = copy.copy(MinimalSoup.MARKUP_MASSAGE) |
17 |
+ |
|
18 |
+ |
MARKUP_MASSAGE.append((re.compile(r'''</?[a-z]+['"]\+['"][a-z]+'''), lambda match: '')) |
19 |
+ |
|
20 |
|
class Website(object): |
21 |
< |
Soup = BeautifulSoup |
21 |
> |
Soup = DuckSoup |
22 |
|
|
23 |
|
def __init__(self, debug): |
24 |
|
self.browser = mechanize.Browser() |