From 98c98ddfabed09c0a28c4409f8540ac2d61db377 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Fri, 30 Sep 2016 00:15:45 +0200 Subject: [PATCH] Fix some more comic modules (c-f). --- dosagelib/plugins/c.py | 21 ++------------------- dosagelib/plugins/d.py | 29 ++++++++--------------------- dosagelib/plugins/e.py | 7 +------ dosagelib/plugins/f.py | 21 ++++++++------------- dosagelib/plugins/old.py | 5 +++++ 5 files changed, 24 insertions(+), 59 deletions(-) mode change 100755 => 100644 dosagelib/plugins/c.py diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py old mode 100755 new mode 100644 index 79029d2e3..d2337db62 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -156,15 +156,9 @@ class Champ2010(_BasicScraper): help = 'Index format: yy-dd-mm' -class ChannelAte(_BasicScraper): +class ChannelAte(_WordPressScraper): url = 'http://www.channelate.com/' - rurl = escape(url) - stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", - r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", - r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/name' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class ChasingTheSunset(_BasicScraper): @@ -274,17 +268,6 @@ class CourtingDisaster(_WordPressScraper): firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/' -class CowboyJedi(_WordPressScraper): - url = 'http://www.cowboyjedi.com/' - nextSearch = '//a[%s]' % xpath_class('comic-nav-next') - starter = bounceStarter - - def shouldSkipUrl(self, url, data): - return url in ( - self.url + 'comic/darth-bart-wont-stop/', - ) - - class CraftedFables(_WordPressScraper): url = 'http://www.caf-fiends.net/comicpress/' prevSearch = '//a[@rel="prev"]' diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index b0a4f0e54..111a29f3b 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -13,19 +13,12 @@ from ..util import tagre from .common import _WordPressScraper, xpath_class -class DailyDose(_ParserScraper): - url = 'http://dailydoseofcomics.com/' - starter = indirectStarter - imageSearch = '//p/a/img' - prevSearch = '//a[@rel="prev"]' - latestSearch = '//a[@rel="bookmark"]' - - class DamnLol(_ParserScraper): url = 'http://www.damnlol.com/' - prevSearch = '//a[@id="prev"]' - nextSearch = '//a[@id="next"]' - imageSearch = '//div[@id="hideFooter"]/img' + # Classes for next and previous seem to be swapped... + prevSearch = '//a[%s]' % xpath_class("next") + nextSearch = '//a[%s]' % xpath_class("previous") + imageSearch = '//img[@id="post-image"]' starter = bounceStarter def namer(self, image_url, page_url): @@ -122,7 +115,7 @@ class DieFruehreifen(_BasicScraper): class DieselSweeties(_ParserScraper): url = 'http://dieselsweeties.com/' - stripUrl = url + 'ics/%s/' + stripUrl = url + 'ics/%s' firstStripUrl = stripUrl % '1' imageSearch = '//img[@class="xomic"]' prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]' @@ -174,12 +167,6 @@ class DMFA(_BasicScraper): help = 'Index format: nnn (normally, some specials)' -class DoctorCat(_WordPressScraper): - url = 'http://doctorcatmd.com/' - firstStripUrl = url + 'comic/doctor-cat' - prevSearch = '//a[%s]' % xpath_class('navi-prev') - - class DoemainOfOurOwn(_BasicScraper): url = 'http://www.doemain.com/' stripUrl = url + 'index.cgi/%s' @@ -229,11 +216,11 @@ class Dracula(_BasicScraper): help = 'Index format: nnn' -class DreamKeepersPrelude(_BasicScraper): +class DreamKeepersPrelude(_ParserScraper): url = 'http://www.dreamkeeperscomic.com/Prelude.php' stripUrl = url + '?pg=%s' - imageSearch = compile(r'(images/PreludeNew/.+?)"') - prevSearch = compile(r'(Prelude.php\?pg=.+?)"') + imageSearch = '//div[@class="Preludecomic"]/table//a/img' + prevSearch = '//a[@id="prev"]' help = 'Index format: n' diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index 6f8bd0cec..ebd790fbe 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -163,12 +163,7 @@ class EverybodyLovesEricRaymond(_BasicScraper): help = 'Index format: name-of-old-comic' -class EverydayBlues(_WordPressScraper): - url = 'http://everydayblues.everydayblues.net/' - firstStripUrl = url + '2010/02/11/sometimes/' - prevSearch = '//a[%s]' % xpath_class('navi-prev') - - +# Seems to be GeoBlocked from Europe? class EvilDiva(_BasicScraper): url = 'http://www.evildivacomics.com/' stripUrl = url + '?p=%s' diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index e5d47d9ca..a505ce1ac 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter -from .common import _WPNaviIn, _WordPressScraper +from .common import _WPNaviIn, _WordPressScraper, xpath_class class FalconTwin(_BasicScraper): @@ -37,12 +37,12 @@ class FantasyRealms(_BasicScraper): starter = indirectStarter -class FauxPas(_BasicScraper): +class FauxPas(_ParserScraper): url = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi' stripUrl = url + '?%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(r'Previous Strip') + imageSearch = '//img[@name]' + prevSearch = '//a[img[@alt="Previous"]]' help = 'Index format: nnn' @@ -126,19 +126,14 @@ class Fragile(_ParserScraper): firstStripUrl = url + 'strips/chapter_01' -class FredoAndPidjin(_BasicScraper): +class FredoAndPidjin(_ParserScraper): url = 'http://www.pidjin.net/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2006/02/19/goofy-monday' - help = 'Index format: yyyy/mm/dd/number-index' - imageSearch = ( - compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d+/\d+/\d+[^"]+\.[a-z]+)')), - compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/old/[^"]+\.[a-z]+)')), - ) + imageSearch = '//div[%s]//img' % xpath_class("episode") multipleImagesPerStrip = True - prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev") - latestSearch = compile(tagre('a', 'href', "(" + url + - r'\d\d\d\d/\d\d/\d\d/[^"]+/)')) + prevSearch = '//span[%s]/a' % xpath_class("prev") + latestSearch = '//section[%s]//a' % xpath_class("latest") starter = indirectStarter diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 63c58d82b..3ccdff776 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -15,6 +15,7 @@ class Removed(Scraper): 'del': 'Comic was removed from the web.', 'block': 'The comic site is blocking us.', 'unk': 'Comic was removed for an unknown reason.', + 'brk': 'Comic navigation is broken.', } def __init__(self, name, reason='del'): @@ -173,14 +174,18 @@ class Removed(Scraper): cls('ComicGenesis/CryHavoc'), cls('ComicGenesis/SueosdelSur'), cls('Commissioned'), + cls('CowboyJedi', 'brk'), cls('Creators/BCinSpanish'), cls('Creators/GirlsandSportsinSpanish'), cls('Creators/RugratsinSpanish'), cls('CtrlAltDel', 'block'), cls('CtrlAltDel/Sillies', 'block'), + cls('DailyDose'), cls('DeathToTheExtremist'), + cls('DoctorCat', 'brk'), cls('Ellerbisms'), cls('Eriadan'), + cls('EverydayBlues'), cls('FeyWinds'), cls('FilibusterCartoons'), cls('FowlLanguage', 'block'),