dosage/dosagelib/plugins/tapas.py

# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from ..output import out
from ..scraper import ParserScraper


class Tapas(ParserScraper):
    baseUrl = 'https://tapas.io/'
    imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
    prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
    latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
    multipleImagesPerStrip = True

    def __init__(self, name, url):
        super().__init__('Tapas/' + name)
        self.url = self.baseUrl + 'series/' + url + '/info'
        self.stripUrl = self.baseUrl + 'episode/%s'

    def starter(self):
        # Retrieve comic metadata from info page
        info = self.getPage(self.url)
        series = self.match(info, '//@data-series-id')[0]
        # Retrieve comic metadata from API
        data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
        data.raise_for_status()
        episodes = data.json()['data']['body']
        return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]

    def getPrevUrl(self, url, data):
        # Retrieve comic metadata from API
        data = self.session.get(url + '/info')
        data.raise_for_status()
        apiData = data.json()['data']
        if apiData['scene'] == 2:
            self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
        return self.stripUrl % apiData['prev_ep_id']

    def extract_image_urls(self, url, data):
        # Save link order for position-based filenames
        self._cached_image_urls = super().extract_image_urls(url, data)
        return self._cached_image_urls

    def shouldSkipUrl(self, url, data):
        if self.match(data, '//button[d:class("js-have-to-sign")]'):
            out.warn(f'Nothing to download on "{url}", because a login is required.')
            return True
        return False

    def namer(self, imageUrl, pageUrl):
        # Construct filename from episode number and image position on page
        episodeNum = pageUrl.rsplit('/', 1)[-1]
        imageNum = self._cached_image_urls.index(imageUrl)
        imageExt = pageUrl.rsplit('.', 1)[-1]
        if len(self._cached_image_urls) > 1:
            filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
        else:
            filename = "%s.%s" % (episodeNum, imageExt)
        return filename

    @classmethod
    def getmodules(cls):
        return (
            # Manually-added comics
            cls('AmpleTime', 'Ample-Time'),
            cls('FANGS', 'fangscomic'),
            cls('FishNuggets', 'Fish-Nuggets'),
            cls('Ginpu', 'Ginpu-Studios-Comics'),
            cls('HoneyAndTheMoon', 'Honey-and-the-Moon'),
            cls('InsignificantOtters', 'IOtters'),
            cls('MagicalBoy', 'magicalboy'),
            cls('NoFuture', 'NoFuture'),
            cls('OrensForge', 'OrensForge'),
            cls('RadioactivePanda', 'Radioactive-Panda'),
            cls('RavenWolf', 'RavenWolf'),
            cls('SyntheticInstinct', 'Synthetic-Instinct'),
            cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
            cls('TheInkApprentice', 'The-Ink-Apprentice'),
            cls('TheSeaInYou', 'theseainyou'),
            cls('TheSelkiesSkin', 'theselkiesskincomic'),
            cls('TheWitchsThrone', 'thewitchsthrone'),
            cls('VenturaCityDrifters', 'Ventura-City-Drifters'),

            # START AUTOUPDATE
            # END AUTOUPDATE
        )
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`# SPDX-License-Identifier: MIT`
Extend scraper API with a extract_image_urls method This is just a light wrapper around fetchUrls, but frees comic modules from second-guessing for what purpose fetchUrls was called when they are overriding that API - And yes, some comic modules already got this wrong, they are now all fixed. 2023-06-10 13:05:57 +00:00			`# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher`
			`# SPDX-FileCopyrightText: © 2019 Daniel Ring`
Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`from ..output import out`
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`from ..scraper import ParserScraper`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class Tapas(ParserScraper):`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`baseUrl = 'https://tapas.io/'`
Fix Tapastic scraper 2020-04-07 08:18:20 +00:00			`imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'`
			`prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'`
			`latestSearch = '//ul[contains(@class, "js-episode-list")]//a'`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`multipleImagesPerStrip = True`

			`def __init__(self, name, url):`
Rename Tapastic to Tapas The site is know just as "Tapas" since longer then Dosage has support for it. Since the module was merged just recently, this rename shouldn't affect many users... 2022-06-05 23:54:22 +00:00			`super().__init__('Tapas/' + name)`
Fix Tapastic scraper 2020-06-20 06:12:53 +00:00			`self.url = self.baseUrl + 'series/' + url + '/info'`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`self.stripUrl = self.baseUrl + 'episode/%s'`

Fix Tapastic scraper for comics with episode list in ascending order 2021-01-21 09:41:26 +00:00			`def starter(self):`
			`# Retrieve comic metadata from info page`
			`info = self.getPage(self.url)`
Unify XPath NS config over modules 2024-03-17 20:44:46 +00:00			`series = self.match(info, '//@data-series-id')[0]`
Fix Tapastic scraper for comics with episode list in ascending order 2021-01-21 09:41:26 +00:00			`# Retrieve comic metadata from API`
			`data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')`
			`data.raise_for_status()`
			`episodes = data.json()['data']['body']`
			`return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]`

Fix Tapastic scraper 2020-06-20 06:12:53 +00:00			`def getPrevUrl(self, url, data):`
			`# Retrieve comic metadata from API`
			`data = self.session.get(url + '/info')`
			`data.raise_for_status()`
			`apiData = data.json()['data']`
			`if apiData['scene'] == 2:`
			`self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']`
			`return self.stripUrl % apiData['prev_ep_id']`

Extend scraper API with a extract_image_urls method This is just a light wrapper around fetchUrls, but frees comic modules from second-guessing for what purpose fetchUrls was called when they are overriding that API - And yes, some comic modules already got this wrong, they are now all fixed. 2023-06-10 13:05:57 +00:00			`def extract_image_urls(self, url, data):`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`# Save link order for position-based filenames`
Extend scraper API with a extract_image_urls method This is just a light wrapper around fetchUrls, but frees comic modules from second-guessing for what purpose fetchUrls was called when they are overriding that API - And yes, some comic modules already got this wrong, they are now all fixed. 2023-06-10 13:05:57 +00:00			`self._cached_image_urls = super().extract_image_urls(url, data)`
			`return self._cached_image_urls`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00
Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`def shouldSkipUrl(self, url, data):`
Unify XPath NS config over modules 2024-03-17 20:44:46 +00:00			`if self.match(data, '//button[d:class("js-have-to-sign")]'):`
Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`out.warn(f'Nothing to download on "{url}", because a login is required.')`
			`return True`
			`return False`

Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`def namer(self, imageUrl, pageUrl):`
			`# Construct filename from episode number and image position on page`
			`episodeNum = pageUrl.rsplit('/', 1)[-1]`
Extend scraper API with a extract_image_urls method This is just a light wrapper around fetchUrls, but frees comic modules from second-guessing for what purpose fetchUrls was called when they are overriding that API - And yes, some comic modules already got this wrong, they are now all fixed. 2023-06-10 13:05:57 +00:00			`imageNum = self._cached_image_urls.index(imageUrl)`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`imageExt = pageUrl.rsplit('.', 1)[-1]`
Extend scraper API with a extract_image_urls method This is just a light wrapper around fetchUrls, but frees comic modules from second-guessing for what purpose fetchUrls was called when they are overriding that API - And yes, some comic modules already got this wrong, they are now all fixed. 2023-06-10 13:05:57 +00:00			`if len(self._cached_image_urls) > 1:`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)`
			`else:`
			`filename = "%s.%s" % (episodeNum, imageExt)`
			`return filename`

			`@classmethod`
			`def getmodules(cls):`
			`return (`
			`# Manually-added comics`
Add Tapastic/AmpleTime 2020-04-25 02:48:54 +00:00			`cls('AmpleTime', 'Ample-Time'),`
Add some more comics to the Tapas module 2022-06-06 00:31:08 +00:00			`cls('FANGS', 'fangscomic'),`
			`cls('FishNuggets', 'Fish-Nuggets'),`
Add Tapastic/Ginpu 2023-06-07 04:06:24 +00:00			`cls('Ginpu', 'Ginpu-Studios-Comics'),`
Add some more comics to the Tapas module 2022-06-06 00:31:08 +00:00			`cls('HoneyAndTheMoon', 'Honey-and-the-Moon'),`
Fix InsignificantOtters 2021-01-21 09:43:43 +00:00			`cls('InsignificantOtters', 'IOtters'),`
Add some more comics to the Tapas module 2022-06-06 00:31:08 +00:00			`cls('MagicalBoy', 'magicalboy'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`cls('NoFuture', 'NoFuture'),`
			`cls('OrensForge', 'OrensForge'),`
Add some more comics to the Tapas module 2022-06-06 00:31:08 +00:00			`cls('RadioactivePanda', 'Radioactive-Panda'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`cls('RavenWolf', 'RavenWolf'),`
Add Tapastic/SyntheticInstinct 2021-03-16 07:55:44 +00:00			`cls('SyntheticInstinct', 'Synthetic-Instinct'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),`
Add some more comics to the Tapas module 2022-06-06 00:31:08 +00:00			`cls('TheInkApprentice', 'The-Ink-Apprentice'),`
			`cls('TheSeaInYou', 'theseainyou'),`
			`cls('TheSelkiesSkin', 'theselkiesskincomic'),`
			`cls('TheWitchsThrone', 'thewitchsthrone'),`
Add Tapastic/VenturaCityDrifters 2021-11-28 06:01:50 +00:00			`cls('VenturaCityDrifters', 'Ventura-City-Drifters'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00
			`# START AUTOUPDATE`
			`# END AUTOUPDATE`
			`)`