From 1d632904e4005a06a93dad603719d106441ac006 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 6 Jun 2022 14:52:40 +0200 Subject: [PATCH] Fix SinFest with an ugly hack --- dosagelib/plugins/s.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index cb9774852..c536c1ea2 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -6,7 +6,7 @@ from re import compile, escape, IGNORECASE, sub from os.path import splitext -from ..scraper import _BasicScraper, _ParserScraper +from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer from ..util import tagre from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic @@ -237,15 +237,22 @@ class ShotgunShuffle(WordPressScraper): firstStripUrl = url + 'comic/pilot/' -class SinFest(_ParserScraper): - url = 'https://www.sinfest.net/' +class SinFest(ParserScraper): + END_HTML_TAG = compile(r'') + + url = 'https://sinfest.xyz/' stripUrl = url + 'view.php?date=%s' firstStripUrl = stripUrl % '2000-01-17' imageSearch = '//img[contains(@src, "btphp/comics/")]' textSearch = imageSearch + '/@alt' - prevSearch = '//a[./img[contains(@src, "images/prev")]]' + prevSearch = '//a[d:class("prev")]' help = 'Index format: yyyy-mm-dd' + # Remove HTML end tag confusing our parser + def _parse_page(self, data): + data = self.END_HTML_TAG.sub('', data) + return super()._parse_page(data) + class SisterClaire(ComicControlScraper): url = 'https://www.sisterclaire.com/comic/'