diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 48de3b56b..41356d103 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -231,6 +231,10 @@ class Annyseed(_ParserScraper): imageSearch = '//div/img[contains(@src, "Annyseed")]' prevSearch = '//a[img[@name="Previousbtn"]]' help = 'Index format: nnn' + FIX_RE = compile(r'Annyseed/Finished%20For%20Print/') + + def imageUrlModifier(self, image_url, data): + return self.FIX_RE.sub('', image_url) class AoiHouse(_ParserScraper): diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index 8f072b605..36e2ee194 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2017 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -206,9 +206,7 @@ class ComicFury(_ParserScraper): # BeyondTheOrdinary has a duplicate in SmackJeeves/BeyondTheOrdinary cls('BibleBelt', 'biblebelt'), cls('BicycleBoy', 'bicycleboy'), - cls('BigBookOfLameJokes', 'bigbook'), cls('BilateralComics', 'bilateralcomics'), - cls('BiMorphon', 'bimorphon'), cls('BionicleTales', 'bionicletales'), cls('BioSyte', 'biosyte'), cls('Birdman', 'birdman'), @@ -728,7 +726,6 @@ class ComicFury(_ParserScraper): cls('MushroomGo', 'mushroomgo'), cls('MutantElf', 'mutantelf'), cls('Mutigenx', 'mutigenx'), - cls('MuttInTheMiddle', 'muttinthemiddle'), cls('MVPL', 'mvpl'), cls('MyForgottenPast', 'myforgottenpast'), cls('MyGirlfriendTheSecretAgent', 'mygfthesecagent'), @@ -849,7 +846,6 @@ class ComicFury(_ParserScraper): cls('RED', 'redthecomic'), # RedVelvetRequiem has a duplicate in SmackJeeves/RedVelvetRequiem cls('RegardingDandelions', 'regardingdandelions'), - cls('ReiketsuouNoKimi', 'rnk'), cls('Remedy', 'remedy'), cls('RememberBedlam', 'bedlam'), cls('RequiemsGate', 'requiemsgate'), @@ -943,7 +939,6 @@ class ComicFury(_ParserScraper): cls('StarSovereignSeriesMuladhara', 'muladhara'), cls('STARWARSXWingAlliance', 'x-wingalliance'), cls('STASonicTheAdventure', 'sta'), - cls('SteamSword', 'steamsword'), cls('StereotyPixs', 'stereotypixs'), cls('StevenAndTheCrystalGMs', 'crystalgms'), cls('StickLife', 'sticklife'), @@ -1118,7 +1113,6 @@ class ComicFury(_ParserScraper): cls('TigerWrestling', 'anybodythere'), cls('Timezone', 'timezone'), cls('Tinytown', 'tinytown'), - cls('Tiziana', 'tiziana'), cls('TM47', 'tm47'), cls('TohvelinTuhinoita', 'tuhinaloota'), cls('TOLVA', 'tolva'), diff --git a/dosagelib/plugins/comicsherpa.py b/dosagelib/plugins/comicsherpa.py index 5358a0a3f..ab605cde7 100644 --- a/dosagelib/plugins/comicsherpa.py +++ b/dosagelib/plugins/comicsherpa.py @@ -69,9 +69,9 @@ class ComicSherpa(_ParserScraper): cls('DBCartoons', 'csnvt'), cls('DevinCraneComicStripGhostwriter', 'csadf'), cls('DoghouseInYourSoul', 'cstwx'), + cls('DoingTime', 'csvuk'), cls('DontPickTheFlowers', 'cswfs'), cls('Dragin', 'cswgz'), - cls('DrWhiskers', 'cswvl'), cls('DumbQuestionBadAnswer', 'cskro'), cls('DungeonHordes', 'csnlo'), cls('DustSpecks', 'csqgq'), diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index 028682558..efc167fe3 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2017 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -27,17 +27,10 @@ class EarthsongSaga(_ParserScraper): 'earthsongsaga.com/') for x in urls] def namer(self, image_url, page_url): - imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', + imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(.*)\.\w+$', IGNORECASE).search(image_url) - if not imgmatch: - imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$', - IGNORECASE).search(image_url) - suffix = "cover" - else: - suffix = "" - return 'vol%02d_ch%02d_%02d%s' % ( - int(imgmatch.group(1)), int(imgmatch.group(2)), - int(imgmatch.group(3)), suffix) + return 'vol%02d_ch%02d_%s' % ( + int(imgmatch.group(1)), int(imgmatch.group(2)), imgmatch.group(3)) class EasilyAmused(_WordPressScraper): @@ -209,6 +202,9 @@ class ExtraFabulousComics(_WordPressScraper): pagepart = compile(r'/comic/([^/]+)/$').search(page_url).group(1) return '_'.join((pagepart, imagename)) + def shouldSkipUrl(self, url, data): + return data.xpath('//div[@id="comic"]//iframe') + class ExtraLife(_BasicScraper): url = 'http://www.myextralife.com/' diff --git a/scripts/comicfury.py b/scripts/comicfury.py index 6289eb670..ae236db7f 100755 --- a/scripts/comicfury.py +++ b/scripts/comicfury.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2017 Tobias Gruetzmacher """ Script to get ComicFury comics and save the info in a JSON file for further processing. @@ -107,6 +107,7 @@ class ComicFuryUpdater(ComicListUpdater): # images gone "BaseballCapsAndTiaras", + "BiMorphon", "CROSSWORLDSNEXUS", "Fathead", "GOODBYEREPTILIANS",