Fix some modules

This commit is contained in:
Tobias Gruetzmacher 2024-02-13 23:37:08 +01:00
parent 17f7c53e53
commit b3da06b270
No known key found for this signature in database

View file

@ -3,11 +3,11 @@
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring # SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile, escape from re import compile
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre, getQueryParams
from .common import ComicControlScraper, WordPressScraper, WordPressNavi from .common import ComicControlScraper, WordPressScraper, WordPressNavi
@ -27,13 +27,9 @@ class Garanos(WordPressScraper):
endOfLife = True endOfLife = True
class GastroPhobia(_ParserScraper): class GastroPhobia(ComicControlScraper):
url = 'http://www.gastrophobia.com/' url = 'https://gastrophobia.com/'
stripUrl = url + 'index.php?date=%s' firstStripUrl = url + 'comix/the-mane-event'
firstStripUrl = stripUrl % '2008-07-30'
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//div[@id="prev"]/a'
help = 'Index format: yyyy-mm-dd'
class Geeks(_ParserScraper): class Geeks(_ParserScraper):
@ -51,7 +47,7 @@ class GeeksNextDoor(_ParserScraper):
url = 'http://www.geeksnextcomic.com/' url = 'http://www.geeksnextcomic.com/'
stripUrl = url + '%s.html' stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04' firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04'
imageSearch = '//p/img' imageSearch = ('//p/img', '//p/span/img')
prevSearch = ( prevSearch = (
'//a[img[contains(@src, "/nav_prev")]]', '//a[img[contains(@src, "/nav_prev")]]',
'//a[contains(text(), "< prev")]', # start page is different '//a[contains(text(), "< prev")]', # start page is different
@ -59,19 +55,19 @@ class GeeksNextDoor(_ParserScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class GirlGenius(_BasicScraper): class GirlGenius(ParserScraper):
baseUrl = 'http://www.girlgeniusonline.com/' url = 'https://www.girlgeniusonline.com/comic.php'
rurl = escape(baseUrl)
url = baseUrl + 'comic.php'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20021104' firstStripUrl = stripUrl % '20021104'
imageSearch = compile( imageSearch = '//img[@alt="Comic"]'
tagre("img", "src", r"(%sggmain/strips/[^']*)" % rurl, quote="'")) prevSearch = '//a[@id="topprev"]'
prevSearch = compile(tagre("a", "id", "topprev", quote="\"",
before=r"(%s[^\"']+)" % rurl))
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return not data.xpath('//div[@id="comicbody"]//img[contains(@src, "comic")]')
class GirlsWithSlingshots(ComicControlScraper): class GirlsWithSlingshots(ComicControlScraper):
url = 'https://girlswithslingshots.com/' url = 'https://girlswithslingshots.com/'
@ -99,20 +95,18 @@ class GoGetARoomie(ComicControlScraper):
url = 'http://www.gogetaroomie.com' url = 'http://www.gogetaroomie.com'
class GoneWithTheBlastwave(_BasicScraper): class GoneWithTheBlastwave(ParserScraper):
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' stripUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s'
starter = indirectStarter
stripUrl = url[:-1] + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"') url = firstStripUrl
prevSearch = compile(r'href="(index.php\?p=comic&amp;nro=\d+)">' + starter = indirectStarter
r'<img src="images/page/default/previous') imageSearch = '//*[@id="comic_ruutu"]/center/img'
latestSearch = compile(r'href="(index.php\?p=comic&amp;nro=\d+)">' + prevSearch = '//a[img[contains(@src, "previous")]]'
r'<img src="images/page/default/latest') latestSearch = '//a[img[contains(@src, "latest")]]'
help = 'Index format: n' help = 'Index format: n'
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1)) return '%02d' % int(getQueryParams(page_url)['nro'][0])
class GrrlPower(WordPressScraper): class GrrlPower(WordPressScraper):
@ -130,13 +124,12 @@ class GuildedAge(WordPressScraper):
firstStripUrl = url + 'comic/chapter-1-cover/' firstStripUrl = url + 'comic/chapter-1-cover/'
class GUComics(_BasicScraper): class GUComics(ParserScraper):
url = 'http://www.gucomics.com/' stripUrl = 'https://www.gucomics.com/%s'
stripUrl = url + '%s' url = stripUrl % 'comic/'
firstStripUrl = stripUrl % '20000710' firstStripUrl = stripUrl % '20000710'
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)')) imageSearch = '//img[contains(@src, "/comics/2")]'
prevSearch = compile(tagre("a", "href", r'(/\d+)') + prevSearch = '//a[img[contains(@alt, "previous")]]'
tagre("img", "src", r'/images/nav/prev\.png'))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'