dosage/scripts/comicskingdom.py

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
"""
Script to get a list of comicskingdom.com comics and save the info in a JSON
file for further processing.
"""

from scriptutil import ComicListUpdater
from dosagelib.xml import NS


class ComicsKingdomUpdater(ComicListUpdater):
    dup_templates = (
        "Creators/%s",
        "GoComics/%s",
        "KeenSpot/%s",
        "ComicGenesis/%s",
    )

    def handle_listing(self, page):
        for link in page.xpath('//ul[d:class("index")]//a', namespaces=NS):
            name = link.text_content().removeprefix('The ')
            url = link.attrib['href']
            lang = 'es' if ' (Spanish)' in name else None

            self.add_comic(name, (url, lang))

    def collect_results(self):
        """Parse all search result pages."""
        self.handle_listing(self.get_url('https://comicskingdom.com/features'))

    def get_entry(self, name: str, data: tuple[str, str]):
        opt = f", lang='{data[1]}'" if data[1] else ''
        return f"cls('{name}', '{data[0].split('/')[3]}'{opt}),"


if __name__ == '__main__':
    ComicsKingdomUpdater(__file__).run()
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 11:45:44 +00:00			`#!/usr/bin/env python3`
			`# SPDX-License-Identifier: MIT`
Adapt to new ComicsKingdom layout (fixes #307) 2024-03-17 18:18:35 +00:00			`# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher`
			`# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`"""`
Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00			`Script to get a list of comicskingdom.com comics and save the info in a JSON`
			`file for further processing.`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`"""`

			`from scriptutil import ComicListUpdater`
Search for spanish comics on ComicsKingdom (fixes #165) 2022-06-05 22:20:12 +00:00			`from dosagelib.xml import NS`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00
Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`class ComicsKingdomUpdater(ComicListUpdater):`
Search for spanish comics on ComicsKingdom (fixes #165) 2022-06-05 22:20:12 +00:00			`dup_templates = (`
			`"Creators/%s",`
			`"GoComics/%s",`
			`"KeenSpot/%s",`
			`"ComicGenesis/%s",`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`)`

Adapt to new ComicsKingdom layout (fixes #307) 2024-03-17 18:18:35 +00:00			`def handle_listing(self, page):`
			`for link in page.xpath('//ul[d:class("index")]//a', namespaces=NS):`
			`name = link.text_content().removeprefix('The ')`
Search for spanish comics on ComicsKingdom (fixes #165) 2022-06-05 22:20:12 +00:00			`url = link.attrib['href']`
Adapt to new ComicsKingdom layout (fixes #307) 2024-03-17 18:18:35 +00:00			`lang = 'es' if ' (Spanish)' in name else None`
Search for spanish comics on ComicsKingdom (fixes #165) 2022-06-05 22:20:12 +00:00
Adapt to new ComicsKingdom layout (fixes #307) 2024-03-17 18:18:35 +00:00			`self.add_comic(name, (url, lang))`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00
			`def collect_results(self):`
			`"""Parse all search result pages."""`
Adapt to new ComicsKingdom layout (fixes #307) 2024-03-17 18:18:35 +00:00			`self.handle_listing(self.get_url('https://comicskingdom.com/features'))`
Search for spanish comics on ComicsKingdom (fixes #165) 2022-06-05 22:20:12 +00:00
			`def get_entry(self, name: str, data: tuple[str, str]):`
			`opt = f", lang='{data[1]}'" if data[1] else ''`
			`return f"cls('{name}', '{data[0].split('/')[3]}'{opt}),"`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00

			`if __name__ == '__main__':`
			`ComicsKingdomUpdater(__file__).run()`