Fix a bunch of comics (fixes #277)

This commit is contained in:
Tobias Gruetzmacher 2023-08-27 02:23:16 +02:00
parent 6d20de8b2a
commit 9e05fae304
No known key found for this signature in database
4 changed files with 43 additions and 65 deletions

View file

@ -1,8 +1,8 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper):
help = 'Index format: yyyymmdd'
class GirlsWithSlingshots(_BasicScraper):
class GirlsWithSlingshots(ComicControlScraper):
url = 'https://girlswithslingshots.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'gws1'
imageSearch = (
compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)),
compile(tagre("img", "src",
r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')),
)
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
before='rel="prev"'))
help = 'Index format: stripname'
firstStripUrl = url + 'comic/gws1'
class GleefulNihilism(WordPressScraper):

View file

@ -3,11 +3,8 @@
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
from ..scraper import ParserScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper):
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
class LittleGamers(_BasicScraper):
url = 'http://www.little-gamers.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2000/12/01/99'
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))
help = 'Index format: yyyy/mm/dd/name'
class LittleGamers(ParserScraper):
url = 'https://www.little-gamers.com/'
firstStripUrl = url + '2000/12/01/99'
imageSearch = '//div[d:class("comic")]//img'
prevSearch = ('//a[@id="previous"]',
'//div[d:class("comic-navigation")]//a[text()="previous"]')
class LittleTales(_ParserScraper):
@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper):
starter = indirectStarter
class LoFiJinks(WordPressNaviIn):
class LoFiJinks(WordPressScraper):
baseUrl = 'https://hijinksensue.com/comic/'
url = baseUrl + 'learning-to-love-again/'
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'

View file

@ -1,8 +1,8 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from ..scraper import Scraper
@ -1695,4 +1695,5 @@ class Renamed(Scraper):
# Renamed in 3.1
cls('Exiern', 'ComicFury/Exiern'),
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
)

View file

@ -9,7 +9,8 @@ from os.path import splitext
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
from ..util import tagre
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced,
WordPressNavi, WordPressWebcomic)
class SabrinaOnline(_BasicScraper):
@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper):
return archivepages[-1]
class SafelyEndangered(WordPressNavi):
url = 'http://www.safelyendangered.com/'
firstStripUrl = url + 'comic/ignored/'
class SaffronAndSage(WordPressScraper):
url = 'https://www.saffroncomic.com/'
firstStripUrl = url + 'comic/p0001/'
@ -74,19 +70,18 @@ class Savestate(WordPressNavi):
class ScandinaviaAndTheWorld(_ParserScraper):
url = 'https://satwcomic.com/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
url = 'https://satwcomic.com/sweden-denmark-and-norway'
firstStripUrl = url
starter = indirectStarter
imageSearch = '//img[@itemprop="image"]'
prevSearch = '//a[@accesskey="p"]'
latestSearch = '//a[text()="View latest comic"]'
latestSearch = '//a[contains(@title, "Latest")]'
textSearch = '//span[@itemprop="articleBody"]'
help = 'Index format: stripname'
class ScaryGoRound(_ParserScraper):
url = 'http://www.scarygoround.com/sgr/ar.php'
url = ('https://web.archive.org/web/20190327203330/'
'https://www.scarygoround.com/sgr/ar.php')
stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20020604'
imageSearch = '//img[contains(@src, "/strips/")]'
@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper):
allow_errors = (403,)
class Sheldon(_BasicScraper):
url = 'http://www.sheldoncomics.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s.html'
firstStripUrl = stripUrl % '011130'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
after="sidenav-prev"))
help = 'Index format: yymmdd'
class Sheldon(ParserScraper):
url = 'https://www.sheldoncomics.com/'
firstStripUrl = url + 'comic/well-who-is-this/'
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[img[d:class("left")]]'
class Shifters(ParserScraper):
@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper):
help = 'Index format: number'
class Shortpacked(_ParserScraper):
url = 'http://www.shortpacked.com/index.php'
stripUrl = url + '?id=%s'
css = True
imageSearch = 'img#comic'
prevSearch = 'a.prev'
help = 'Index format: nnn'
class Shortpacked(ComicControlScraper):
url = 'https://www.shortpacked.com/comic/'
firstStripUrl = url + 'just-a-toy-store'
class ShotgunShuffle(WordPressScraper):
url = 'http://shotgunshuffle.com/'
class ShotgunShuffle(WordPressSpliced):
# Currently down, use archive.org in the meantime (08-2023)
url = ('https://web.archive.org/web/20230131163842/'
'https://shotgunshuffle.com/')
firstStripUrl = url + 'comic/pilot/'
@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper):
return imageurls
class SpaceJunkArlia(_ParserScraper):
url = 'http://spacejunkarlia.com/'
class SpaceJunkArlia(ParserScraper):
url = ('https://web.archive.org/web/20220121133701/'
'http://spacejunkarlia.com/')
stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '0'
imageSearch = '//div[d:class("content")]/img'
prevSearch = '//a[text()="<"]'
endOfLife = True
help = 'Index format: number'