From 42f66c07b02328fa35ccc74f22f915305f423e2d Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 22 May 2017 00:30:31 +0200 Subject: [PATCH] Random module fixes. --- dosagelib/plugins/old.py | 5 +++++ dosagelib/plugins/t.py | 22 ++++++++++++---------- dosagelib/plugins/v.py | 9 --------- dosagelib/plugins/w.py | 22 +++++++--------------- dosagelib/plugins/z.py | 4 ++-- 5 files changed, 26 insertions(+), 36 deletions(-) diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 6490a9034..a8e16609f 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -271,6 +271,7 @@ class Removed(Scraper): cls('GoComics/HanktheSock'), cls('GoComics/HarambeeHills'), cls('GoComics/Hbenson7'), + cls('GoComics/HeadComics'), cls('GoComics/HIP'), cls('GoComics/HolidayDoodles'), cls('GoComics/HolySchnark'), @@ -289,6 +290,7 @@ class Removed(Scraper): cls('GoComics/LarryvilleBlue'), cls('GoComics/Leadbellies'), cls('GoComics/LeGooseyLu'), + cls('GoComics/LeighLunaComics'), cls('GoComics/LIGHTERSIDE'), cls('GoComics/LostInTranslation'), cls('GoComics/Lucan'), @@ -306,6 +308,7 @@ class Removed(Scraper): cls('GoComics/Mortimer'), cls('GoComics/MrGigiAndTheSquid'), cls('GoComics/MrMorris'), + cls('GoComics/Mulligan'), cls('GoComics/MyGuardianGrandpa'), cls('GoComics/NeatStep'), cls('GoComics/NedAndLarry'), @@ -345,6 +348,7 @@ class Removed(Scraper): cls('GoComics/SuburbanWilderness'), cls('GoComics/SuckerHeadSmack'), cls('GoComics/TeacherInk'), + cls('GoComics/ThatMonkeyTune'), cls('GoComics/TheAcerbicCaf'), cls('GoComics/TheAdventuresofTeetyBallerina'), cls('GoComics/TheEdperiment'), @@ -492,6 +496,7 @@ class Removed(Scraper): cls('ThunderAndLightning'), cls('TinyKittenTeeth'), cls('TwoTwoOneFour'), + cls('VampireCheerleaders'), cls('WayfarersMoon'), cls('WebcomicsNation/AgnesQuill'), cls('WebcomicsNation/MyMuse'), diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 8bec7c4ae..1cdde7087 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -5,7 +5,7 @@ from __future__ import absolute_import, division, print_function -from re import compile, escape, IGNORECASE +from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, xpath_class @@ -20,13 +20,11 @@ class TheBrads(_ParserScraper): multipleImagesPerStrip = True -class TheDevilsPanties(_BasicScraper): +class TheDevilsPanties(_WordPressScraper): url = 'http://thedevilspanties.com/' stripUrl = url + 'archives/%s' firstStripUrl = stripUrl % '300' - imageSearch = compile(tagre("img", "src", r'(http://origin\.thedevilspanties\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/archives/\d+)', - after="Previous")) + prevSearch = '//a[%s]' % xpath_class('navi-prev') help = 'Index format: number' @@ -101,12 +99,16 @@ class TheThinHLine(_TumblrScraper): return super(TheThinHLine, self).getComicStrip(subPage, pageData) -class TheWhiteboard(_BasicScraper): +class TheWhiteboard(_ParserScraper): + BROKEN_PAGE_MIDDLE = compile(r'<') url = 'http://www.the-whiteboard.com/' - stripUrl = url + 'auto%s.html' - imageSearch = compile(r'', IGNORECASE) - prevSearch = compile(r' previous', IGNORECASE) - help = 'Index format: twb or wb + n wg. twb1000' + imageSearch = '//center/img' + prevSearch = '//a[text()="previous"]' + + # Another ugly hack :( + def _parse_page(self, data): + data = self.BROKEN_PAGE_MIDDLE.sub('<', data) + return super(TheWhiteboard, self)._parse_page(data) class TheWotch(_WordPressScraper): diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index 7cb0e9dcf..422f0a7ac 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -11,15 +11,6 @@ from ..helpers import indirectStarter, xpath_class from ..util import tagre -class VampireCheerleaders(_BasicScraper): - url = 'http://www.vampirecheerleaders.net/' - stripUrl = url + 'strips-vc/%s' - firstStripUrl = stripUrl % 'fang_service' - imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.vampirecheerleaders\.net/strips-vc/[^"]+)', before="cndprev")) - help = 'Index format: name' - - class VGCats(_BasicScraper): url = 'http://www.vgcats.com/comics/' stripUrl = url + '?strip_id=%s' diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index 7e2189823..778117d2c 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -28,23 +28,15 @@ class WastedTalent(_BasicScraper): help = 'Index format: stripname' -class WebDesignerCOTW(_BasicScraper): - url = 'http://www.webdesignerdepot.com/' - rurl = escape(url) +class WebDesignerCOTW(_ParserScraper): + baseUrl = 'https://www.webdesignerdepot.com/' + url = baseUrl + 'category/comics/' starter = indirectStarter - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1' - imageSearch = ( - compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/\d+s?\.[^"]+)')), - compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/Christmas\d+\.[^"]+)')), - compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics\d+[a-z0-9]*/\d+a?\.[^"]+)')), - compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')), - ) + firstStripUrl = baseUrl + '2009/11/comics-of-the-week-1/' + imageSearch = '//article[%s]//img' % xpath_class('article-content') multipleImagesPerStrip = True - prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, - before='prev', quote="'")) - latestSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)) - help = 'Index format: yyyy/mm/stripname' + prevSearch = '//a[span[%s]]' % xpath_class('icon-right-small') + latestSearch = '//a[%s]' % xpath_class('anim-link') def shouldSkipUrl(self, url, data): """Skip non-comic URLs.""" diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index b63f68168..52c308152 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2017 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -23,7 +23,7 @@ class ZapComic(_ParserScraper): class Zapiro(_ParserScraper): url = 'http://mg.co.za/zapiro/' starter = bounceStarter - imageSearch = '//img[%s]' % xpath_class('img-fluid') + imageSearch = '//div[@id="cartoon"]/img' prevSearch = '//a[%s]' % xpath_class('left') nextSearch = '//a[%s]' % xpath_class('right')