From b3da06b2708590b031b77a54023f95eeaefb7507 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Tue, 13 Feb 2024 23:37:08 +0100 Subject: [PATCH] Fix some modules --- dosagelib/plugins/g.py | 63 +++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 0d5c1a5ce..605f96e9c 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -3,11 +3,11 @@ # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring -from re import compile, escape +from re import compile -from ..scraper import _BasicScraper, _ParserScraper +from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import indirectStarter -from ..util import tagre +from ..util import tagre, getQueryParams from .common import ComicControlScraper, WordPressScraper, WordPressNavi @@ -27,13 +27,9 @@ class Garanos(WordPressScraper): endOfLife = True -class GastroPhobia(_ParserScraper): - url = 'http://www.gastrophobia.com/' - stripUrl = url + 'index.php?date=%s' - firstStripUrl = stripUrl % '2008-07-30' - imageSearch = '//div[@id="comic"]//img' - prevSearch = '//div[@id="prev"]/a' - help = 'Index format: yyyy-mm-dd' +class GastroPhobia(ComicControlScraper): + url = 'https://gastrophobia.com/' + firstStripUrl = url + 'comix/the-mane-event' class Geeks(_ParserScraper): @@ -51,7 +47,7 @@ class GeeksNextDoor(_ParserScraper): url = 'http://www.geeksnextcomic.com/' stripUrl = url + '%s.html' firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04' - imageSearch = '//p/img' + imageSearch = ('//p/img', '//p/span/img') prevSearch = ( '//a[img[contains(@src, "/nav_prev")]]', '//a[contains(text(), "< prev")]', # start page is different @@ -59,19 +55,19 @@ class GeeksNextDoor(_ParserScraper): help = 'Index format: yyyy-mm-dd' -class GirlGenius(_BasicScraper): - baseUrl = 'http://www.girlgeniusonline.com/' - rurl = escape(baseUrl) - url = baseUrl + 'comic.php' +class GirlGenius(ParserScraper): + url = 'https://www.girlgeniusonline.com/comic.php' stripUrl = url + '?date=%s' firstStripUrl = stripUrl % '20021104' - imageSearch = compile( - tagre("img", "src", r"(%sggmain/strips/[^']*)" % rurl, quote="'")) - prevSearch = compile(tagre("a", "id", "topprev", quote="\"", - before=r"(%s[^\"']+)" % rurl)) + imageSearch = '//img[@alt="Comic"]' + prevSearch = '//a[@id="topprev"]' multipleImagesPerStrip = True help = 'Index format: yyyymmdd' + def shouldSkipUrl(self, url, data): + """Skip pages without images.""" + return not data.xpath('//div[@id="comicbody"]//img[contains(@src, "comic")]') + class GirlsWithSlingshots(ComicControlScraper): url = 'https://girlswithslingshots.com/' @@ -99,20 +95,18 @@ class GoGetARoomie(ComicControlScraper): url = 'http://www.gogetaroomie.com' -class GoneWithTheBlastwave(_BasicScraper): - url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' - starter = indirectStarter - stripUrl = url[:-1] + '%s' +class GoneWithTheBlastwave(ParserScraper): + stripUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(r'' + - r'' + - r'