Fix a bunch of comics (fixes #277)

This commit is contained in:
Tobias Gruetzmacher 2023-08-27 02:23:16 +02:00
parent 6d20de8b2a
commit 9e05fae304
No known key found for this signature in database
4 changed files with 43 additions and 65 deletions

View file

@ -1,8 +1,8 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class GirlsWithSlingshots(_BasicScraper): class GirlsWithSlingshots(ComicControlScraper):
url = 'https://girlswithslingshots.com/' url = 'https://girlswithslingshots.com/'
rurl = escape(url) firstStripUrl = url + 'comic/gws1'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'gws1'
imageSearch = (
compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)),
compile(tagre("img", "src",
r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')),
)
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
before='rel="prev"'))
help = 'Index format: stripname'
class GleefulNihilism(WordPressScraper): class GleefulNihilism(WordPressScraper):

View file

@ -3,11 +3,8 @@
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring # SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile from ..scraper import ParserScraper, _ParserScraper
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper):
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1] return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
class LittleGamers(_BasicScraper): class LittleGamers(ParserScraper):
url = 'http://www.little-gamers.com/' url = 'https://www.little-gamers.com/'
stripUrl = url + '%s/' firstStripUrl = url + '2000/12/01/99'
firstStripUrl = stripUrl % '2000/12/01/99' imageSearch = '//div[d:class("comic")]//img'
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)')) prevSearch = ('//a[@id="previous"]',
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link")) '//div[d:class("comic-navigation")]//a[text()="previous"]')
help = 'Index format: yyyy/mm/dd/name'
class LittleTales(_ParserScraper): class LittleTales(_ParserScraper):
@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper):
starter = indirectStarter starter = indirectStarter
class LoFiJinks(WordPressNaviIn): class LoFiJinks(WordPressScraper):
baseUrl = 'https://hijinksensue.com/comic/' baseUrl = 'https://hijinksensue.com/comic/'
url = baseUrl + 'learning-to-love-again/' url = baseUrl + 'learning-to-love-again/'
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/' firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'

View file

@ -1,8 +1,8 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # SPDX-FileCopyrightText: © 2019 Daniel Ring
from ..scraper import Scraper from ..scraper import Scraper
@ -1695,4 +1695,5 @@ class Renamed(Scraper):
# Renamed in 3.1 # Renamed in 3.1
cls('Exiern', 'ComicFury/Exiern'), cls('Exiern', 'ComicFury/Exiern'),
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
) )

View file

@ -9,7 +9,8 @@ from os.path import splitext
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
from ..util import tagre from ..util import tagre
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced,
WordPressNavi, WordPressWebcomic)
class SabrinaOnline(_BasicScraper): class SabrinaOnline(_BasicScraper):
@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper):
return archivepages[-1] return archivepages[-1]
class SafelyEndangered(WordPressNavi):
url = 'http://www.safelyendangered.com/'
firstStripUrl = url + 'comic/ignored/'
class SaffronAndSage(WordPressScraper): class SaffronAndSage(WordPressScraper):
url = 'https://www.saffroncomic.com/' url = 'https://www.saffroncomic.com/'
firstStripUrl = url + 'comic/p0001/' firstStripUrl = url + 'comic/p0001/'
@ -74,19 +70,18 @@ class Savestate(WordPressNavi):
class ScandinaviaAndTheWorld(_ParserScraper): class ScandinaviaAndTheWorld(_ParserScraper):
url = 'https://satwcomic.com/' url = 'https://satwcomic.com/sweden-denmark-and-norway'
stripUrl = url + '%s' firstStripUrl = url
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
starter = indirectStarter starter = indirectStarter
imageSearch = '//img[@itemprop="image"]' imageSearch = '//img[@itemprop="image"]'
prevSearch = '//a[@accesskey="p"]' prevSearch = '//a[@accesskey="p"]'
latestSearch = '//a[text()="View latest comic"]' latestSearch = '//a[contains(@title, "Latest")]'
textSearch = '//span[@itemprop="articleBody"]' textSearch = '//span[@itemprop="articleBody"]'
help = 'Index format: stripname'
class ScaryGoRound(_ParserScraper): class ScaryGoRound(_ParserScraper):
url = 'http://www.scarygoround.com/sgr/ar.php' url = ('https://web.archive.org/web/20190327203330/'
'https://www.scarygoround.com/sgr/ar.php')
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20020604' firstStripUrl = stripUrl % '20020604'
imageSearch = '//img[contains(@src, "/strips/")]' imageSearch = '//img[contains(@src, "/strips/")]'
@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper):
allow_errors = (403,) allow_errors = (403,)
class Sheldon(_BasicScraper): class Sheldon(ParserScraper):
url = 'http://www.sheldoncomics.com/' url = 'https://www.sheldoncomics.com/'
rurl = escape(url) firstStripUrl = url + 'comic/well-who-is-this/'
stripUrl = url + 'archive/%s.html' imageSearch = '//div[@id="comic"]//img'
firstStripUrl = stripUrl % '011130' prevSearch = '//a[img[d:class("left")]]'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
after="sidenav-prev"))
help = 'Index format: yymmdd'
class Shifters(ParserScraper): class Shifters(ParserScraper):
@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper):
help = 'Index format: number' help = 'Index format: number'
class Shortpacked(_ParserScraper): class Shortpacked(ComicControlScraper):
url = 'http://www.shortpacked.com/index.php' url = 'https://www.shortpacked.com/comic/'
stripUrl = url + '?id=%s' firstStripUrl = url + 'just-a-toy-store'
css = True
imageSearch = 'img#comic'
prevSearch = 'a.prev'
help = 'Index format: nnn'
class ShotgunShuffle(WordPressScraper): class ShotgunShuffle(WordPressSpliced):
url = 'http://shotgunshuffle.com/' # Currently down, use archive.org in the meantime (08-2023)
url = ('https://web.archive.org/web/20230131163842/'
'https://shotgunshuffle.com/')
firstStripUrl = url + 'comic/pilot/' firstStripUrl = url + 'comic/pilot/'
@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper):
return imageurls return imageurls
class SpaceJunkArlia(_ParserScraper): class SpaceJunkArlia(ParserScraper):
url = 'http://spacejunkarlia.com/' url = ('https://web.archive.org/web/20220121133701/'
'http://spacejunkarlia.com/')
stripUrl = url + '?strip_id=%s' stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '0' firstStripUrl = stripUrl % '0'
imageSearch = '//div[d:class("content")]/img' imageSearch = '//div[d:class("content")]/img'
prevSearch = '//a[text()="<"]' prevSearch = '//a[text()="<"]'
endOfLife = True
help = 'Index format: number' help = 'Index format: number'