From 47e2502ec7de7ebf1cde459a4578c46241fd9a54 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 31 Oct 2016 06:57:47 +0100 Subject: [PATCH] Fix a bunch of comic modules. --- dosagelib/plugins/d.py | 21 +++++++---------- dosagelib/plugins/e.py | 20 +++++----------- dosagelib/plugins/j.py | 4 +++- dosagelib/plugins/old.py | 2 ++ dosagelib/plugins/p.py | 10 ++------ dosagelib/plugins/r.py | 3 --- dosagelib/plugins/s.py | 50 ++++++++++++++-------------------------- dosagelib/plugins/t.py | 29 +++++------------------ dosagelib/plugins/v.py | 12 ++++------ dosagelib/plugins/z.py | 6 ++--- 10 files changed, 52 insertions(+), 105 deletions(-) diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index 730d53c2a..6233ef24c 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -167,11 +167,12 @@ class DMFA(_BasicScraper): help = 'Index format: nnn (normally, some specials)' -class DoemainOfOurOwn(_BasicScraper): +class DoemainOfOurOwn(_ParserScraper): url = 'http://www.doemain.com/' stripUrl = url + 'index.cgi/%s' - imageSearch = compile(r"Previous Strip\s*" + - tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, - after=' alt')) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous") - help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy' + firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/' + imageSearch = '//div[%s]//a/img' % xpath_class('entry-content') + prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn') class Dracula(_BasicScraper): diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index a0884dd14..028682558 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -56,17 +56,10 @@ class EatLiver(_ParserScraper): latestSearch = '//a[@rel="bookmark"]' -class EatThatToast(_BasicScraper): +class EatThatToast(_WordPressScraper): url = 'http://eatthattoast.com/' - rurl = escape(url) - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'thewizard/' - imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, - after='comic-nav-base comic-nav-previous')) - textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + - tagre("img", "alt", r'([^"]+)')) - help = 'Index Format: name' + firstStripUrl = url + 'comic/thewizard/' + textSearch = _WordPressScraper.imageSearch + '/@alt' class EdibleDirt(_BasicScraper): @@ -225,13 +218,12 @@ class ExtraLife(_BasicScraper): help = 'Index format: stripname' -class ExtraOrdinary(_BasicScraper): +class ExtraOrdinary(_ParserScraper): url = 'http://www.exocomics.com/' - rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '01' - prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev")) - imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl)) + prevSearch = '//a[%s]' % xpath_class('prev') + imageSearch = '//img[%s]' % xpath_class('image-style-main-comic') help = 'Index format: number' diff --git a/dosagelib/plugins/j.py b/dosagelib/plugins/j.py index 4fef564a8..4346f6da7 100644 --- a/dosagelib/plugins/j.py +++ b/dosagelib/plugins/j.py @@ -10,7 +10,7 @@ from re import compile, escape from ..scraper import _BasicScraper from ..util import tagre from ..helpers import indirectStarter -from .common import _ComicControlScraper +from .common import _ComicControlScraper, xpath_class class JackCannon(_BasicScraper): @@ -53,6 +53,8 @@ class JoeAndMonkey(_BasicScraper): class JohnnyWander(_ComicControlScraper): + imageSearch = ('//ul[%s]/li/@data-src' % xpath_class('cc-showbig'), + _ComicControlScraper.imageSearch) url = 'http://www.johnnywander.com/' diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 4b32e01cb..26eb63e2c 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -257,6 +257,7 @@ class Removed(Scraper): cls('PensAndTales/FireflyCross'), cls('PetiteSymphony/Djandora'), cls('PetiteSymphony/Generation17'), + cls('PunksAndNerds', 'mis'), cls('PunksAndNerdsOld'), cls('RedsPlanet'), cls('SmackJeeves/Aarrevaara'), @@ -329,6 +330,7 @@ class Removed(Scraper): cls('Stubble'), cls('SuburbanTribe'), cls('TheOuterQuarter'), + cls('TheParkingLotIsFull'), cls('ThunderAndLightning'), cls('TinyKittenTeeth'), cls('TwoTwoOneFour'), diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 6361bc28c..c5053d4ba 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -203,6 +203,7 @@ class Precocious(_ParserScraper): prevSearch = '//a[img[contains(@src, "/back_arrow")]]' help = 'Index format: yyyy/mm/dd' + class PrinceOfSartar(_WordPressScraper): url = 'http://www.princeofsartar.com/' stripUrl = url + 'comic/%s/' @@ -219,6 +220,7 @@ class PrinceOfSartar(_WordPressScraper): image_ext = image_url.rsplit('.', 1)[1] return '%s.%s' % (title, image_ext) + class PS238(_ParserScraper): url = 'http://ps238.nodwick.com/' stripUrl = url + 'comic/%s/' @@ -227,14 +229,6 @@ class PS238(_ParserScraper): help = 'Index format: yyyy-mm-dd' -class PunksAndNerds(_WordPressScraper): - url = 'http://www.punksandnerds.com/' - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '15' - prevSearch = '//a[%s]' % xpath_class('navi-prev') - help = 'Index format: nnn' - - class PvPonline(_BasicScraper): url = 'http://pvponline.com/comic' stripUrl = url + '%s' diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py index d82a27f47..b52e730a0 100644 --- a/dosagelib/plugins/r.py +++ b/dosagelib/plugins/r.py @@ -9,7 +9,6 @@ from re import compile from six.moves.urllib.parse import urljoin from ..scraper import _BasicScraper, _ParserScraper -from ..helpers import indirectStarter from ..util import tagre from .common import _WordPressScraper, xpath_class @@ -80,8 +79,6 @@ class RomanticallyApocalyptic(_ParserScraper): firstStripUrl = stripUrl % '0' imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel') prevSearch = '//a[@accesskey="p"]' - latestSearch = '//a[span[%s]]' % xpath_class('glyphicon-fast-forward') - starter = indirectStarter help = 'Index format: n' adult = True diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 19a57ced8..062cf3bed 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -298,33 +298,17 @@ class SluggyFreelance(_BasicScraper): help = 'Index format: yymmdd' -class SMBC(_ParserScraper): +class SMBC(_ComicControlScraper): url = 'http://www.smbc-comics.com/' - stripUrl = url + 'index.php?id=%s' - firstStripUrl = stripUrl % '1' + firstStripUrl = url + 'comic/2002-09-05' multipleImagesPerStrip = True imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img'] - prevSearch = '//a[@class="prev"]' - help = 'Index format: nnnn' textSearch = '//img[@id="cc-comic"]/@title' def namer(self, image_url, page_url): """Remove random noise from name.""" return image_url.rsplit('-', 1)[-1] - def shouldSkipUrl(self, url, data): - """Skip promo or missing update pages.""" - return url in ( - self.stripUrl % '2865', - self.stripUrl % '2653', - self.stripUrl % '2424', - self.stripUrl % '2226', - self.stripUrl % '2069', - self.stripUrl % '1895', - self.stripUrl % '1896', - self.stripUrl % '1589', - ) - class SnowFlame(_WordPressScraper): url = 'http://www.snowflamecomic.com/' @@ -375,23 +359,22 @@ class Sorcery101(_ParserScraper): help = 'Index format: stripname' -class SpaceTrawler(_WordPressScraper): - base_url = 'http://spacetrawler.com/' - url = base_url + '2013/12/24/spacetrawler-379/' - firstStripUrl = base_url + '2010/01/01/spacetrawler-4/' - prevSearch = '//a[%s]' % xpath_class('navi-prev') - endOfLife = True - - -class SpaceJunkArlia(_BasicScraper): - url = 'http://spacejunkarlia.com' - stripUrl = url + '/index.php?strip_id=%s' +class SpaceJunkArlia(_ParserScraper): + url = 'http://spacejunkarlia.com/' + stripUrl = url + '?strip_id=%s' firstStripUrl = stripUrl % '0' - imageSearch = compile(tagre('img', 'src', r'(comics/[^"]+)')) - prevSearch = compile(tagre('a', 'href', r'(\?strip_id=\d+)') + '<<') + imageSearch = '//div[%s]/img' % xpath_class('content') + prevSearch = '//a[text()="<"]' help = 'Index format: number' +class SpaceTrawler(_ParserScraper): + url = 'https://www.baldwinpage.com/spacetrawler/' + firstStripUrl = url + '2010/01/01/spacetrawler-4/' + imageSearch = '//img[%s]' % xpath_class('size-full') + prevSearch = '//a[@rel="prev"]' + + class Spamusement(_BasicScraper): url = 'http://spamusement.com/' rurl = escape(url) @@ -487,7 +470,7 @@ class StrongFemaleProtagonist(_ParserScraper): stripUrl = url + '%s/' css = True imageSearch = 'article p img' - prevSearch = 'div.nav-previous > a' + prevSearch = 'a.page-nav__item--left' help = 'Index format: issue-?/page-??' def shouldSkipUrl(self, url, data): @@ -499,7 +482,7 @@ class StrongFemaleProtagonist(_ParserScraper): self.stripUrl % 'issue-5/newspaper', self.stripUrl % 'issue-5/hiatus-1', self.stripUrl % 'issue-5/hiatus-2', - self.stripUrl % 'ssue-1/no-page', + self.stripUrl % 'issue-1/no-page', ) @@ -532,6 +515,7 @@ class StuffNoOneToldMe(_BasicScraper): def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( + self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics', # no comic self.stripUrl % '2012/08/self-rant', # no comic self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 2aa626b83..10f65e091 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -14,14 +14,11 @@ from .common import (_ComicControlScraper, _TumblrScraper, _WordPressScraper, xpath_class) -class TheBrads(_BasicScraper): - url = 'http://bradcolbow.com/archive/C4/' - stripUrl = url + '%s/' - firstStripUrl = stripUrl % 'P125' - imageSearch = compile(tagre("img", "src", r'(http://s3\.amazonaws\.com/the_brads/the-?brads[-_][^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://bradcolbow\.com/archive/C4/[^"]+)', before="prev")) +class TheBrads(_ParserScraper): + url = 'http://bradcolbow.com/archive/' + imageSearch = '//div[%s]//img' % xpath_class('entry') + prevSearch = '//a[%s]' % xpath_class('prev') multipleImagesPerStrip = True - help = 'Index format: a letter and a number' class TheDevilsPanties(_BasicScraper): @@ -88,17 +85,6 @@ class TheOrderOfTheStick(_BasicScraper): return page_url.rsplit('/', 1)[-1][:-5] -class TheParkingLotIsFull(_BasicScraper): - baseUrl = 'http://plif.courageunfettered.com/' - url = baseUrl + 'archive/arch2002.htm' - stripUrl = baseUrl + 'archive/arch%s.htm' - firstStripUrl = stripUrl % '1998' - imageSearch = compile(r'') - multipleImagesPerStrip = True - prevSearch = compile(r'\d{4} -\s+\d{4}') - help = 'Index format: nnn' - - class TheThinHLine(_TumblrScraper): url = 'http://thinhline.tumblr.com/' firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the' @@ -147,13 +133,10 @@ class ThreePanelSoul(_ComicControlScraper): class ToonHole(_WordPressScraper): url = 'http://toonhole.com/' - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010' - prevSearch = '//a[@rel="prev"]' - help = 'Index format: yyyy/mm/stripname' + firstStripUrl = url + 'comic/toon-hole-coming-soon-2010/' def shouldSkipUrl(self, url, data): - return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",) + return url in (self.url + "comic/if-game-of-thrones-was-animated/",) class TracyAndTristan(_BasicScraper): diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index ce7faa6dc..e5eedba66 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -6,8 +6,9 @@ from __future__ import absolute_import, division, print_function from re import compile -from ..scraper import _BasicScraper +from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre +from .common import xpath_class class VampireCheerleaders(_BasicScraper): @@ -51,13 +52,10 @@ class VictimsOfTheSystem(_BasicScraper): help = 'Index format: nnn-nnn' -class ViiviJaWagner(_BasicScraper): +class ViiviJaWagner(_ParserScraper): url = 'http://www.hs.fi/viivijawagner/' - stripUrl = None - imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', - before="prev-cm")) - help = 'Index format: none' + imageSearch = '//div[@id="full-comic"]//img' + prevSearch = '//a[%s]' % xpath_class('prev-cm') lang = 'fi' def namer(self, image_url, page_url): diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index ca31553aa..d8ab6e7e5 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -23,9 +23,9 @@ class ZapComic(_ParserScraper): class Zapiro(_ParserScraper): url = 'http://mg.co.za/zapiro/' starter = bounceStarter - imageSearch = '//div[@id="cartoon_full_size"]//img' - prevSearch = '//li[@class="nav_older"]/a' - nextSearch = '//li[@class="nav_newer"]/a' + imageSearch = '//img[%s]' % xpath_class('img-fluid') + prevSearch = '//a[%s]' % xpath_class('left') + nextSearch = '//a[%s]' % xpath_class('right') def namer(self, image_url, page_url): parts = page_url.rsplit('/', 1)