From 3d05e59c3658bd5d4c58b0f3e58c0c6ba572849d Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 18 Jan 2021 01:25:03 +0100 Subject: [PATCH] Fix some modules (a-c) --- dosagelib/plugins/a.py | 29 ++++++++++++++++++----------- dosagelib/plugins/b.py | 8 ++++---- dosagelib/plugins/c.py | 37 +++++++++++++++---------------------- dosagelib/plugins/common.py | 7 ++++++- 4 files changed, 43 insertions(+), 38 deletions(-) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 4327814b6..f79138e9a 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -8,10 +8,10 @@ from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter -from .common import _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic +from .common import _WordPressScraper, _WordPressSpliced, _WPNavi, _WPNaviIn, _WPWebcomic -class AbbysAgency(_WordPressScraper): +class AbbysAgency(_WordPressSpliced): url = 'https://abbysagency.us/' stripUrl = url + 'blog/comic/%s/' firstStripUrl = stripUrl % 'a' @@ -169,7 +169,7 @@ class Alice(_WordPressScraper): starter = indirectStarter -class AlienDice(_WordPressScraper): +class AlienDice(_WordPressSpliced): url = 'https://aliendice.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % '05162001' @@ -185,7 +185,7 @@ class AlienDice(_WordPressScraper): return imageUrl.rsplit('/', 1)[-1].replace('20010831', '2001-08-31') -class AlienDiceLegacy(_WordPressScraper): +class AlienDiceLegacy(_WordPressSpliced): name = 'AlienDice/Legacy' stripUrl = 'https://aliendice.com/comic/%s/' url = stripUrl % 'legacy-2-15' @@ -304,14 +304,21 @@ class Anaria(_ParserScraper): return filename.replace('00.jpg', 'new00.jpg').replace('new', '1') -class Angband(_BasicScraper): +class Angband(_ParserScraper): url = 'http://angband.calamarain.net/' - stripUrl = url + 'view.php?date=%s' - firstStripUrl = stripUrl % '2005-12-30' - imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') + - "Previous") - help = 'Index format: yyyy-mm-dd' + stripUrl = url + '%s' + imageSearch = '//img' + multipleImagesPerStrip = True + endOfLife = True + + def starter(self): + page = self.getPage(self.url) + self.pages = page.xpath('//p/a[not(contains(@href, "cast"))]/@href') + self.firstStripUrl = self.pages[0] + return self.pages[-1] + + def getPrevUrl(self, url, data): + return self.pages[self.pages.index(url) - 1] class Angels2200(_BasicScraper): diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 4045d170b..ea932d5a8 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -42,7 +42,7 @@ class BalderDash(_ComicControlScraper): class BallerinaMafia(_ParserScraper): - url = 'http://www.ballerinamafia.net/' + url = 'https://web.archive.org/web/20200115230012/http://ballerinamafia.net/' stripUrl = url + 'index.php?pid=%s' firstStripUrl = stripUrl % '20100906' imageSearch = ('//img[contains(@alt, "Comic")]', @@ -135,7 +135,7 @@ class BetweenFailures(_WPWebcomic): class BeyondTheVeil(_WordPressScraper): - url = 'http://beyondtheveilcomic.com/' + url = 'https://web.archive.org/web/20201009235642/http://beyondtheveilcomic.com/' stripUrl = url + '?comic=%s' firstStripUrl = stripUrl % '01252010' endOfLife = True @@ -178,9 +178,9 @@ class BirdBoy(_WordPressScraper): try: pageNr = int(strip) except ValueError: - pageNr = None # Use the string to fetch a cover page + pageNr = None # Use the string to fetch a cover page if volume == 'synopsis': - strip = '{0}{1}'.format(pageNr, '-02' if strip in [1,3] else '') + strip = '{0}{1}'.format(pageNr, '-02' if strip in [1, 3] else '') else: volume = 'volume-' + volume if pageNr is not None: diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index a94baece8..a75686487 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher +# Copyright (C) 2015-2021 Tobias Gruetzmacher # Copyright (C) 2019-2020 Daniel Ring from re import compile, escape @@ -11,13 +11,12 @@ from ..util import tagre from .common import _WordPressScraper, _WPNavi, _WPWebcomic -class CampComic(_BasicScraper): +class CampComic(_ParserScraper): url = 'http://campcomic.com/comic/' - rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '6' - imageSearch = compile(tagre("img", "src", r'(http://hw1\.pa-cdn\.com/camp/assets/img/katie/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev")) + imageSearch = '//div[@id="comic"]/img' + prevSearch = '//a[d:class("btnPrev")]' help = 'Index Format: number' @@ -112,9 +111,9 @@ class Catalyst(_BasicScraper): class CatAndGirl(_ParserScraper): - url = 'http://catandgirl.com/' + url = 'https://catandgirl.com/' imageSearch = '//div[@id="comic"]//img' - prevSearch = '//a[@rel="prev"]' + prevSearch = '//a[d:class("pager--prev")]' class CatenaCafe(_WordPressScraper): @@ -287,7 +286,7 @@ class CollegeCatastrophe(_ParserScraper): stripUrl = url + '/%s' firstStripUrl = stripUrl % '2000-11-10' imageSearch = '//img[@class="comic-image"]' - prevSearch = '//a[@class="prev"]' + prevSearch = '//a[span[contains(text(),"Previous")]]' endOfLife = True multipleImagesPerStrip = True @@ -365,13 +364,15 @@ class CorydonCafe(_ParserScraper): class CourtingDisaster(_WordPressScraper): - url = 'http://www.courting-disaster.com/' + url = 'https://web.archive.org/web/20201127150157/http://www.courting-disaster.com/' firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/' + endOfLife = True class CraftedFables(_WordPressScraper): - url = 'http://www.caf-fiends.net/comicpress/' + url = 'https://web.archive.org/web/20191126025641/http://www.caf-fiends.net/comicpress/' prevSearch = '//a[@rel="prev"]' + endOfLife = True class CrimsonDark(_BasicScraper): @@ -407,20 +408,12 @@ class CrossTimeCafe(_ParserScraper): endOfLife = True -class CucumberQuest(_BasicScraper): - url = 'http://cucumber.gigidigi.com/' - rurl = escape(url) - stripUrl = url + 'cq/%s/' +class CucumberQuest(_WPWebcomic): + baseUrl = 'http://cucumber.gigidigi.com/' + stripUrl = baseUrl + 'cq/%s/' firstStripUrl = stripUrl % 'page-1' - startUrl = url + 'recent.html' + url = firstStripUrl starter = indirectStarter - imageSearch = ( - compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)), - compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)), - compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)), - ) - prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous")) - latestSearch = compile(r'window\.location="(/cq/[^"]+/)"') help = 'Index format: stripname' diff --git a/dosagelib/plugins/common.py b/dosagelib/plugins/common.py index f56063851..88892cb7c 100644 --- a/dosagelib/plugins/common.py +++ b/dosagelib/plugins/common.py @@ -18,6 +18,11 @@ class _WordPressScraper(_ParserScraper): latestSearch = '//a[d:class("comic-nav-last")]' +class _WordPressSpliced(_ParserScraper): + imageSearch = '//div[@id="spliced-comic"]//img' + prevSearch = '//a[d:class("previous-comic")]' + + class _WPNavi(_WordPressScraper): prevSearch = '//a[d:class("navi-prev")]' @@ -26,7 +31,7 @@ class _WPNaviIn(_WordPressScraper): prevSearch = '//a[d:class("navi-prev-in")]' -class _WPWebcomic(_WordPressScraper): +class _WPWebcomic(_ParserScraper): imageSearch = '//div[d:class("webcomic-image")]//img' prevSearch = '//a[d:class("previous-webcomic-link")]' nextSearch = '///a[d:class("next-webcomic-link")]'