Fix SinFest with an ugly hack

This commit is contained in:
Tobias Gruetzmacher 2022-06-06 14:52:40 +02:00
parent e5c505e643
commit 1d632904e4

View file

@ -6,7 +6,7 @@
from re import compile, escape, IGNORECASE, sub from re import compile, escape, IGNORECASE, sub
from os.path import splitext from os.path import splitext
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
from ..util import tagre from ..util import tagre
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
@ -237,15 +237,22 @@ class ShotgunShuffle(WordPressScraper):
firstStripUrl = url + 'comic/pilot/' firstStripUrl = url + 'comic/pilot/'
class SinFest(_ParserScraper): class SinFest(ParserScraper):
url = 'https://www.sinfest.net/' END_HTML_TAG = compile(r'</html>')
url = 'https://sinfest.xyz/'
stripUrl = url + 'view.php?date=%s' stripUrl = url + 'view.php?date=%s'
firstStripUrl = stripUrl % '2000-01-17' firstStripUrl = stripUrl % '2000-01-17'
imageSearch = '//img[contains(@src, "btphp/comics/")]' imageSearch = '//img[contains(@src, "btphp/comics/")]'
textSearch = imageSearch + '/@alt' textSearch = imageSearch + '/@alt'
prevSearch = '//a[./img[contains(@src, "images/prev")]]' prevSearch = '//a[d:class("prev")]'
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
# Remove HTML end tag confusing our parser
def _parse_page(self, data):
data = self.END_HTML_TAG.sub('', data)
return super()._parse_page(data)
class SisterClaire(ComicControlScraper): class SisterClaire(ComicControlScraper):
url = 'https://www.sisterclaire.com/comic/' url = 'https://www.sisterclaire.com/comic/'