Fix PHD with an ugly hack...

This commit is contained in:
Tobias Gruetzmacher 2017-02-12 16:21:36 +01:00
parent b57945efd1
commit 9895014655

View file

@ -105,6 +105,8 @@ class PeppermintSaga(_BasicScraper):
class PHDComics(_ParserScraper): class PHDComics(_ParserScraper):
BROKEN_COMMENT_END = compile(r'--!>')
baseUrl = 'http://phdcomics.com/' baseUrl = 'http://phdcomics.com/'
url = baseUrl + 'comics.php' url = baseUrl + 'comics.php'
stripUrl = baseUrl + 'comics/archive.php?comicid=%s' stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
@ -114,6 +116,11 @@ class PHDComics(_ParserScraper):
nextSearch = '//a[img[contains(@src, "next_button")]]' nextSearch = '//a[img[contains(@src, "next_button")]]'
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
# Ugly hack :(
def _parse_page(self, data):
data = self.BROKEN_COMMENT_END.sub('-->', data)
return super(PHDComics, self)._parse_page(data)
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return url in (