From bb5b6ffcec75e9fd1c229bd26f229c657b935cf9 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Thu, 7 Apr 2016 23:21:31 +0200 Subject: [PATCH] Fix comics in module a.py. --- dosagelib/plugins/a.py | 87 ++++++++++++------------------------------ 1 file changed, 25 insertions(+), 62 deletions(-) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 855e97a64..bf35b0ec6 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -28,9 +28,9 @@ class AbstruseGoose(_BasicScraper): textSearch = compile(tagre("img", "title", r'([^"]+)')) @classmethod - def namer(cls, imageUrl, pageUrl): - index = int(pageUrl.rstrip('/').split('/')[-1]) - name = imageUrl.split('/')[-1].split('.')[0] + def namer(cls, image_url, page_url): + index = int(page_url.rstrip('/').split('/')[-1]) + name = image_url.split('/')[-1].split('.')[0] return 'c%03d-%s' % (index, name) @@ -89,7 +89,7 @@ class AGirlAndHerFed(_BasicScraper): class AhoiPolloi(_ParserScraper): - url = 'http://ahoipolloi.blogger.de/' + url = 'https://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' firstStripUrl = stripUrl % '20060306' multipleImagesPerStrip = True @@ -108,13 +108,9 @@ class AhoyEarth(_ParserScraper): help = 'Index format: ddmmyyyy' -class AirForceBlues(_BasicScraper): - url = 'http://www.afblues.com/' - stripUrl = url + 'wordpress/%s/' - firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya' - imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) - help = 'Index format: yyyy/mm/dd/stripname' +class AirForceBlues(_WordPressScraper): + url = 'http://farvatoons.com/' + firstStripUrl = url + 'comic/in-texas-there-are-texans/' class ALessonIsLearned(_BasicScraper): @@ -144,14 +140,9 @@ class AlienLovesPredator(_BasicScraper): help = 'Index format: yyyy/mm/dd/name' -class AlienShores(_BasicScraper): - baseUrl = 'http://alienshores.com/' - rurl = escape(baseUrl) - url = baseUrl + 'alienshores_band/' - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r'(%salienshores_band/wp-content/uploads/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/p/' +class AlienShores(_WordPressScraper): + url = 'http://alienshores.com/alienshores_band/' + firstStripUrl = url + 'AScomic/updated-cover/' class AllTheGrowingThings(_BasicScraper): @@ -181,15 +172,6 @@ class AlphaLunaSpanish(AlphaLuna): firstStripUrl = stripUrl % '1/portada' -class AlsoBagels(_BasicScraper): - url = 'http://alsobagels.com/' - rurl = escape(url) - stripUrl = url + 'index.php/comic/%s/' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous")) - help = 'Index format: strip-name' - - class Altermeta(_BasicScraper): url = 'http://altermeta.net/' rurl = escape(url) @@ -246,28 +228,15 @@ class Angels2200(_BasicScraper): help = 'Index format: yyyy/mm/dd/part--comic-' -class Annyseed(_BasicScraper): - baseUrl = 'http://www.colourofivy.com/' - rurl = escape(baseUrl) - url = baseUrl + 'annyseed_webcomic_latest.htm' - stripUrl = baseUrl + 'annyseed_webcomic%s.htm' - imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) - prevSearch = compile(r'")) + starter = bounceStarter(url, '//a[text()=">"]') stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' - imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) + imageSearch = '//a/img' multipleImagesPerStrip = True - prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "<") + prevSearch = '//a[text()="<"]' help = 'Index format: yymmdd' def shouldSkipUrl(self, url, data): """Skip pages without images.""" - return url in ( - self.stripUrl % '130217', # video - self.stripUrl % '130218', # video - self.stripUrl % '130226', # video - self.stripUrl % '130424', # video - ) + return data.xpath('//iframe') # videos @classmethod - def namer(cls, imageUrl, pageUrl): - return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], - imageUrl.split('/')[-1].split('.')[0]) + def namer(cls, image_url, page_url): + return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:], + image_url.split('/')[-1].split('.')[0]) class AxeCop(_WordPressScraper):