Fix SinFest with an ugly hack
This commit is contained in:
parent
e5c505e643
commit
1d632904e4
1 changed files with 11 additions and 4 deletions
|
@ -6,7 +6,7 @@
|
||||||
from re import compile, escape, IGNORECASE, sub
|
from re import compile, escape, IGNORECASE, sub
|
||||||
from os.path import splitext
|
from os.path import splitext
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
|
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
|
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
|
||||||
|
@ -237,15 +237,22 @@ class ShotgunShuffle(WordPressScraper):
|
||||||
firstStripUrl = url + 'comic/pilot/'
|
firstStripUrl = url + 'comic/pilot/'
|
||||||
|
|
||||||
|
|
||||||
class SinFest(_ParserScraper):
|
class SinFest(ParserScraper):
|
||||||
url = 'https://www.sinfest.net/'
|
END_HTML_TAG = compile(r'</html>')
|
||||||
|
|
||||||
|
url = 'https://sinfest.xyz/'
|
||||||
stripUrl = url + 'view.php?date=%s'
|
stripUrl = url + 'view.php?date=%s'
|
||||||
firstStripUrl = stripUrl % '2000-01-17'
|
firstStripUrl = stripUrl % '2000-01-17'
|
||||||
imageSearch = '//img[contains(@src, "btphp/comics/")]'
|
imageSearch = '//img[contains(@src, "btphp/comics/")]'
|
||||||
textSearch = imageSearch + '/@alt'
|
textSearch = imageSearch + '/@alt'
|
||||||
prevSearch = '//a[./img[contains(@src, "images/prev")]]'
|
prevSearch = '//a[d:class("prev")]'
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
# Remove HTML end tag confusing our parser
|
||||||
|
def _parse_page(self, data):
|
||||||
|
data = self.END_HTML_TAG.sub('', data)
|
||||||
|
return super()._parse_page(data)
|
||||||
|
|
||||||
|
|
||||||
class SisterClaire(ComicControlScraper):
|
class SisterClaire(ComicControlScraper):
|
||||||
url = 'https://www.sisterclaire.com/comic/'
|
url = 'https://www.sisterclaire.com/comic/'
|
||||||
|
|
Loading…
Reference in a new issue