From 989501465575b76aaa12d4e534dd25faa30932a8 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 12 Feb 2017 16:21:36 +0100 Subject: [PATCH] Fix PHD with an ugly hack... --- dosagelib/plugins/p.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 14245ee0f..38366940e 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -105,6 +105,8 @@ class PeppermintSaga(_BasicScraper): class PHDComics(_ParserScraper): + BROKEN_COMMENT_END = compile(r'--!>') + baseUrl = 'http://phdcomics.com/' url = baseUrl + 'comics.php' stripUrl = baseUrl + 'comics/archive.php?comicid=%s' @@ -114,6 +116,11 @@ class PHDComics(_ParserScraper): nextSearch = '//a[img[contains(@src, "next_button")]]' help = 'Index format: n (unpadded)' + # Ugly hack :( + def _parse_page(self, data): + data = self.BROKEN_COMMENT_END.sub('-->', data) + return super(PHDComics, self)._parse_page(data) + def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in (