From b1d2650615e122184311d5fb48f680835a17ddc9 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Thu, 29 Sep 2016 01:29:01 +0200 Subject: [PATCH] Fix some modules (a&b). --- dosagelib/plugins/a.py | 21 +++++++-------------- dosagelib/plugins/b.py | 6 +++++- dosagelib/plugins/old.py | 1 + 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 903275038..15c488c5f 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -10,7 +10,7 @@ from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter -from .common import _WordPressScraper, _WPNaviIn, xpath_class, WP_LATEST_SEARCH +from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH class AbstruseGoose(_BasicScraper): @@ -117,7 +117,7 @@ class ALessonIsLearned(_BasicScraper): help = 'Index format: nnn' -class Alice(_WPNaviIn): +class Alice(_WordPressScraper): url = 'http://www.alicecomics.com/' latestSearch = '//a[text()="Latest Alice!"]' starter = indirectStarter @@ -152,8 +152,11 @@ class AlphaLuna(_BasicScraper): url = 'http://www.alphaluna.net/' stripUrl = url + 'issue-%s/' firstStripUrl = stripUrl % '1/cover' - imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev")) + imageSearch = compile(tagre("a", "href", + r'[^"]*/(?:issue-|support/upcoming)[^"]+') + + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)') + + tagre("img", "alt", "Prev")) help = 'Index format: issue/page (e.g. 4/05)' @@ -263,16 +266,6 @@ class Ashes(_WordPressScraper): starter = indirectStarter -class ASkeweredParadise(_BasicScraper): - url = 'http://aspcomics.net/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % '001' - imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) - prevSearch = compile(tagre("a", "href", "(/comic/\d+)") + - r"[^>]+Previous") - help = 'Index format: nnn' - - class ASofterWorld(_ParserScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index ccbe8c831..f7b5539fc 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -57,7 +57,8 @@ class Baroquen(_BasicScraper): class Bearmageddon(_WordPressScraper): url = 'http://bearmageddon.com/' firstStripUrl = url + '2011/08/01/page-1/' - prevSearch = '//a[%s]' % xpath_class('navi-prev') + latestSearch = '//a[div[%s]]' % xpath_class('latest-page') + starter = indirectStarter class Beetlebum(_BasicScraper): @@ -210,6 +211,9 @@ class BroodHollow(_WordPressScraper): url = 'http://broodhollow.chainsawsuit.com/' firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing' + def shouldSkipUrl(self, url, data): + return data.xpath('//div[@id="comic"]//iframe') + class Buni(_WordPressScraper): url = 'http://www.bunicomic.com/' diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 3e85f41c1..63c58d82b 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -32,6 +32,7 @@ class Removed(Scraper): cls('AlsoBagels'), cls('Antics'), cls('Arcamax/BleekerTheRechargeableDog'), + cls('ASkeweredParadise'), cls('BackwaterPlanet'), cls('BigFatWhale'), cls('Blip'),