From 9e05fae3041f0b20260d253b9964aed53fb3932e Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 27 Aug 2023 02:23:16 +0200 Subject: [PATCH] Fix a bunch of comics (fixes #277) --- dosagelib/plugins/g.py | 22 +++++----------- dosagelib/plugins/l.py | 20 ++++++-------- dosagelib/plugins/old.py | 9 ++++--- dosagelib/plugins/s.py | 57 +++++++++++++++++----------------------- 4 files changed, 43 insertions(+), 65 deletions(-) diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 4c266f339..0d5c1a5ce 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper @@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper): help = 'Index format: yyyymmdd' -class GirlsWithSlingshots(_BasicScraper): +class GirlsWithSlingshots(ComicControlScraper): url = 'https://girlswithslingshots.com/' - rurl = escape(url) - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'gws1' - imageSearch = ( - compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)), - compile(tagre("img", "src", - r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')), - ) - prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, - before='rel="prev"')) - help = 'Index format: stripname' + firstStripUrl = url + 'comic/gws1' class GleefulNihilism(WordPressScraper): diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py index d75126782..28f432187 100644 --- a/dosagelib/plugins/l.py +++ b/dosagelib/plugins/l.py @@ -3,11 +3,8 @@ # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring -from re import compile - -from ..scraper import ParserScraper, _BasicScraper, _ParserScraper +from ..scraper import ParserScraper, _ParserScraper from ..helpers import bounceStarter, indirectStarter -from ..util import tagre from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn @@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper): return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1] -class LittleGamers(_BasicScraper): - url = 'http://www.little-gamers.com/' - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2000/12/01/99' - imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link")) - help = 'Index format: yyyy/mm/dd/name' +class LittleGamers(ParserScraper): + url = 'https://www.little-gamers.com/' + firstStripUrl = url + '2000/12/01/99' + imageSearch = '//div[d:class("comic")]//img' + prevSearch = ('//a[@id="previous"]', + '//div[d:class("comic-navigation")]//a[text()="previous"]') class LittleTales(_ParserScraper): @@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper): starter = indirectStarter -class LoFiJinks(WordPressNaviIn): +class LoFiJinks(WordPressScraper): baseUrl = 'https://hijinksensue.com/comic/' url = baseUrl + 'learning-to-love-again/' firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/' diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index b5989665b..11ee39045 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from ..scraper import Scraper @@ -1695,4 +1695,5 @@ class Renamed(Scraper): # Renamed in 3.1 cls('Exiern', 'ComicFury/Exiern'), + cls('SafelyEndangered', 'WebToons/SafelyEndangered'), ) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 0f8e3f305..fb115b943 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -9,7 +9,8 @@ from os.path import splitext from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer from ..util import tagre -from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic +from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced, + WordPressNavi, WordPressWebcomic) class SabrinaOnline(_BasicScraper): @@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper): return archivepages[-1] -class SafelyEndangered(WordPressNavi): - url = 'http://www.safelyendangered.com/' - firstStripUrl = url + 'comic/ignored/' - - class SaffronAndSage(WordPressScraper): url = 'https://www.saffroncomic.com/' firstStripUrl = url + 'comic/p0001/' @@ -74,19 +70,18 @@ class Savestate(WordPressNavi): class ScandinaviaAndTheWorld(_ParserScraper): - url = 'https://satwcomic.com/' - stripUrl = url + '%s' - firstStripUrl = stripUrl % 'sweden-denmark-and-norway' + url = 'https://satwcomic.com/sweden-denmark-and-norway' + firstStripUrl = url starter = indirectStarter imageSearch = '//img[@itemprop="image"]' prevSearch = '//a[@accesskey="p"]' - latestSearch = '//a[text()="View latest comic"]' + latestSearch = '//a[contains(@title, "Latest")]' textSearch = '//span[@itemprop="articleBody"]' - help = 'Index format: stripname' class ScaryGoRound(_ParserScraper): - url = 'http://www.scarygoround.com/sgr/ar.php' + url = ('https://web.archive.org/web/20190327203330/' + 'https://www.scarygoround.com/sgr/ar.php') stripUrl = url + '?date=%s' firstStripUrl = stripUrl % '20020604' imageSearch = '//img[contains(@src, "/strips/")]' @@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper): allow_errors = (403,) -class Sheldon(_BasicScraper): - url = 'http://www.sheldoncomics.com/' - rurl = escape(url) - stripUrl = url + 'archive/%s.html' - firstStripUrl = stripUrl % '011130' - imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, - after="sidenav-prev")) - help = 'Index format: yymmdd' +class Sheldon(ParserScraper): + url = 'https://www.sheldoncomics.com/' + firstStripUrl = url + 'comic/well-who-is-this/' + imageSearch = '//div[@id="comic"]//img' + prevSearch = '//a[img[d:class("left")]]' class Shifters(ParserScraper): @@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper): help = 'Index format: number' -class Shortpacked(_ParserScraper): - url = 'http://www.shortpacked.com/index.php' - stripUrl = url + '?id=%s' - css = True - imageSearch = 'img#comic' - prevSearch = 'a.prev' - help = 'Index format: nnn' +class Shortpacked(ComicControlScraper): + url = 'https://www.shortpacked.com/comic/' + firstStripUrl = url + 'just-a-toy-store' -class ShotgunShuffle(WordPressScraper): - url = 'http://shotgunshuffle.com/' +class ShotgunShuffle(WordPressSpliced): + # Currently down, use archive.org in the meantime (08-2023) + url = ('https://web.archive.org/web/20230131163842/' + 'https://shotgunshuffle.com/') firstStripUrl = url + 'comic/pilot/' @@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper): return imageurls -class SpaceJunkArlia(_ParserScraper): - url = 'http://spacejunkarlia.com/' +class SpaceJunkArlia(ParserScraper): + url = ('https://web.archive.org/web/20220121133701/' + 'http://spacejunkarlia.com/') stripUrl = url + '?strip_id=%s' firstStripUrl = stripUrl % '0' imageSearch = '//div[d:class("content")]/img' prevSearch = '//a[text()="<"]' + endOfLife = True help = 'Index format: number' @@ -717,4 +708,4 @@ class SwordsComic(ParserScraper): firstStripUrl = stripUrl % 'cover' imageSearch = '//div[@class="page-image-wrapper"]//img' prevSearch = '//a[@class="navigation-button navigation-previous"]' - help = 'Index format: Swordsnnn (unpadded)' \ No newline at end of file + help = 'Index format: Swordsnnn (unpadded)'