Fix PHD with an ugly hack...
This commit is contained in:
parent
b57945efd1
commit
9895014655
1 changed files with 7 additions and 0 deletions
|
@ -105,6 +105,8 @@ class PeppermintSaga(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class PHDComics(_ParserScraper):
|
class PHDComics(_ParserScraper):
|
||||||
|
BROKEN_COMMENT_END = compile(r'--!>')
|
||||||
|
|
||||||
baseUrl = 'http://phdcomics.com/'
|
baseUrl = 'http://phdcomics.com/'
|
||||||
url = baseUrl + 'comics.php'
|
url = baseUrl + 'comics.php'
|
||||||
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
||||||
|
@ -114,6 +116,11 @@ class PHDComics(_ParserScraper):
|
||||||
nextSearch = '//a[img[contains(@src, "next_button")]]'
|
nextSearch = '//a[img[contains(@src, "next_button")]]'
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
# Ugly hack :(
|
||||||
|
def _parse_page(self, data):
|
||||||
|
data = self.BROKEN_COMMENT_END.sub('-->', data)
|
||||||
|
return super(PHDComics, self)._parse_page(data)
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return url in (
|
return url in (
|
||||||
|
|
Loading…
Reference in a new issue