dosage/scripts/comicskingdom.py

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2021 Tobias Gruetzmacher
# Copyright (C) 2019 Thomas W. Littauer
"""
Script to get a list of comicskingdom.com comics and save the info in a JSON
file for further processing.
"""

from scriptutil import ComicListUpdater


class ComicsKingdomUpdater(ComicListUpdater):
    dup_templates = ("Creators/%s", "DrunkDuck/%s", "GoComics/%s",
                    "KeenSpot/%s", "ComicGenesis/%s")

    # names of comics to exclude
    excluded_comics = (
    )

    def handle_url(self, url):
        """Parse one listing page."""
        data = self.get_url(url)

        for comicdiv in data.cssselect('ul.comic-link-group li'):
            comiclink = comicdiv.cssselect('a')[0]
            comicurl = comiclink.attrib['href']
            name = comicdiv.cssselect('a')[0].text

            self.add_comic(name, comicurl.rsplit('/', 1)[1])

    def collect_results(self):
        """Parse all search result pages."""
        self.handle_url('https://www.comicskingdom.com/')

    def get_entry(self, name, path):
        return u"cls('%s', '%s')," % (name, path)


if __name__ == '__main__':
    ComicsKingdomUpdater(__file__).run()
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 11:45:44 +00:00			`#!/usr/bin/env python3`
			`# SPDX-License-Identifier: MIT`
Remove SmackJeeves It was closed at the end of 2020... 2021-01-10 18:18:45 +00:00			`# Copyright (C) 2019-2021 Tobias Gruetzmacher`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`# Copyright (C) 2019 Thomas W. Littauer`
			`"""`
Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00			`Script to get a list of comicskingdom.com comics and save the info in a JSON`
			`file for further processing.`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`"""`

			`from scriptutil import ComicListUpdater`

Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`class ComicsKingdomUpdater(ComicListUpdater):`
			`dup_templates = ("Creators/%s", "DrunkDuck/%s", "GoComics/%s",`
Remove SmackJeeves It was closed at the end of 2020... 2021-01-10 18:18:45 +00:00			`"KeenSpot/%s", "ComicGenesis/%s")`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00
			`# names of comics to exclude`
			`excluded_comics = (`
			`)`

			`def handle_url(self, url):`
			`"""Parse one listing page."""`
			`data = self.get_url(url)`

			`for comicdiv in data.cssselect('ul.comic-link-group li'):`
			`comiclink = comicdiv.cssselect('a')[0]`
			`comicurl = comiclink.attrib['href']`
			`name = comicdiv.cssselect('a')[0].text`
Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00			`self.add_comic(name, comicurl.rsplit('/', 1)[1])`

			`def collect_results(self):`
			`"""Parse all search result pages."""`
			`self.handle_url('https://www.comicskingdom.com/')`

			`def get_entry(self, name, path):`
Simplify ComicsKingdom extraction a bit 2019-12-16 22:47:14 +00:00			`return u"cls('%s', '%s')," % (name, path)`
Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. 2019-12-16 22:18:04 +00:00

			`if __name__ == '__main__':`
			`ComicsKingdomUpdater(__file__).run()`