From ed3acd2d2fa2e32d15b425b6c94fed9c99f0666f Mon Sep 17 00:00:00 2001 From: Techwolf Date: Wed, 12 Jun 2019 23:42:12 -0700 Subject: [PATCH] Fix TheWhiteboard --- dosagelib/plugins/t.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 2b7f3a318..e5e85193c 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -109,13 +109,15 @@ class TheThinHLine(_TumblrScraper): class TheWhiteboard(_ParserScraper): - BROKEN_PAGE_MIDDLE = compile(r'<') + BROKEN_PAGE_MIDDLE = compile(r'\n<') url = 'http://www.the-whiteboard.com/' - imageSearch = '//center/img' - prevSearch = '//a[text()="previous"]' + stripUrl = url + 'auto%s.html' + firstStripUrl = stripUrl % 'wb001' + imageSearch = '//img[contains(@src, "auto")]' + prevSearch = '//a[.//img[contains(@src, "previous")]]' - # Another ugly hack :( def _parse_page(self, data): + # Ugly hack to fix broken HTML data = self.BROKEN_PAGE_MIDDLE.sub('<', data) return super(TheWhiteboard, self)._parse_page(data)