dosage/dosagelib/plugins/h.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from re import compile, escape

from ..scraper import BasicScraper, ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, indirectStarter
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn


class Hackles(ParserScraper):
    url = 'http://hackles.org/'
    stripUrl = url + 'cgi-bin/archives.pl?request=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = '//img[contains(@src, "strips/")]'
    prevSearch = '//a[text()="< previous"]'
    endOfLife = True


class HagarTheHorrible(BasicScraper):
    url = 'http://www.hagarthehorrible.net/'
    stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'
    firstStripUrl = stripUrl % '1'
    multipleImagesPerStrip = True
    imageSearch = compile(tagre("img", "src", r'(stripus\d+/(?:Hagar_The_Horrible_?|h)\d+[^ >]+)', quote=""))
    prevUrl = r'(comicstrips_us\.php\?serietype\=9\&colortype\=1\&serieno\=\d+)'
    prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
    help = 'Index format: number'

    def starter(self):
        """Return last gallery link."""
        url = 'http://www.hagardunor.net/comics.php'
        data = self.getPage(url)
        pattern = compile(tagre("a", "href", self.prevUrl))
        return self.fetchUrls(url, data, pattern)[-1]


class HarkAVagrant(BasicScraper):
    url = 'http://www.harkavagrant.com/'
    rurl = escape(url)
    starter = bounceStarter
    stripUrl = url + 'index.php?id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
                                after='BORDER'))
    prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
                         tagre("img", "src", "buttonprevious.png"))
    nextSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
                         tagre("img", "src", "buttonnext.png"))
    help = 'Index format: number'

    def namer(self, image_url, page_url):
        filename = image_url.rsplit('/', 1)[1]
        num = page_url.rsplit('=', 1)[1]
        return '%s-%s' % (num, filename)


class HavocInc(WordPressScraper):
    url = 'http://www.radiocomix.com/havoc-inc/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % 'havoc-cover'


class HeadlessBliss(ComicControlScraper):
    url = 'http://headlessbliss.com/'


class Hellkats(ParserScraper):
    url = 'https://poecatcomix.com/hellkatscomic/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % 'hellkats-issue-1-cover'
    imageSearch = '//img[@class="scale-with-grid wp-post-image"]'
    prevSearch = '//a[d:class("fixed-nav-prev")]'
    latestSearch = '//div[@class="post-title"]//a'
    starter = indirectStarter
    adult = True

    def namer(self, imageUrl, pageUrl):
        return pageUrl.rsplit('/', 2)[1] + '.' + imageUrl.rsplit('.', 1)[-1]


class HeyFox(WordPressScraper):
    url = 'http://www.steamclaw.com/heyfox/'
    stripUrl = url + 'archives/comic/%s'
    firstStripUrl = stripUrl % '11092004'
    adult = True


class HeyKitty(WordPressScraper):
    url = 'http://heykittycomic.com/'
    stripUrl = url + '?comic=%s'
    firstStripUrl = stripUrl % 'it-begins'


class Hipsters(WordPressScraper):
    url = 'http://www.hipsters-comic.com/'
    firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/'


class HijinksEnsue(WordPressNaviIn):
    url = 'http://hijinksensue.com/'
    latestSearch = '//a[text()="Latest HijiNKS ENSUE"]'
    firstStripUrl = 'http://hijinksensue.com/comic/who-is-your-daddy-and-what-does-he-do/'
    starter = indirectStarter


class HijinksEnsueClassic(WordPressNaviIn):
    url = 'http://hijinksensue.com/comic/open-your-eyes/'
    firstStripUrl = 'http://hijinksensue.com/comic/a-soul-as-black-as-eyeliner/'
    endOfLife = True


class HijinksEnsueConvention(WordPressNaviIn):
    url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-sketches-part-4/'
    firstStripUrl = 'http://hijinksensue.com/comic/whatever-dad-im-outta-here/'
    endOfLife = True


class HijinksEnsuePhoto(WordPressNaviIn):
    url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-photo-comic-part-2/'
    firstStripUrl = 'http://hijinksensue.com/comic/san-diego-comic-con-fancy-picto-comic-pt-1/'
    endOfLife = True


class HowToBeAWerewolf(ComicControlScraper):
    url = 'http://howtobeawerewolf.com/'
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % 'coming-february-3rd'

    def namer(self, imageUrl, pageUrl):
        filename = imageUrl.rsplit('/', 1)[-1]
        if filename[0].isdigit():
            filename = filename.split('-', 1)[1]
        return filename
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 11:45:44 +00:00			`# SPDX-License-Identifier: MIT`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`# Copyright (C) 2015-2022 Tobias Gruetzmacher`
Add self to authors list, update copyright headers 2020-01-13 06:34:05 +00:00			`# Copyright (C) 2019-2020 Daniel Ring`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`from ..scraper import BasicScraper, ParserScraper`
Convert starters and other helpers to new interface. This allows those starters to work with future scrapers. 2014-07-23 18:53:59 +00:00			`from ..util import tagre`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`from ..helpers import bounceStarter, indirectStarter`
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class Hackles(ParserScraper):`
Add Hackles 2021-05-20 03:26:34 +00:00			`url = 'http://hackles.org/'`
			`stripUrl = url + 'cgi-bin/archives.pl?request=%s'`
			`firstStripUrl = stripUrl % '1'`
			`imageSearch = '//img[contains(@src, "strips/")]'`
			`prevSearch = '//a[text()="< previous"]'`
			`endOfLife = True`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HagarTheHorrible(BasicScraper):`
Add HagarTheHorrible 2013-03-26 16:35:10 +00:00			`url = 'http://www.hagarthehorrible.net/'`
			`stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'`
			`firstStripUrl = stripUrl % '1'`
			`multipleImagesPerStrip = True`
Fix hagar. 2013-03-26 19:12:26 +00:00			`imageSearch = compile(tagre("img", "src", r'(stripus\d+/(?:Hagar_The_Horrible_?\|h)\d+[^ >]+)', quote=""))`
Add HagarTheHorrible 2013-03-26 16:35:10 +00:00			`prevUrl = r'(comicstrips_us\.php\?serietype\=9\&colortype\=1\&serieno\=\d+)'`
			`prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))`
			`help = 'Index format: number'`

Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`def starter(self):`
Add HagarTheHorrible 2013-03-26 16:35:10 +00:00			`"""Return last gallery link."""`
			`url = 'http://www.hagardunor.net/comics.php'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`data = self.getPage(url)`
			`pattern = compile(tagre("a", "href", self.prevUrl))`
Stricter style checking & related style fixes 2020-10-11 18:15:27 +00:00			`return self.fetchUrls(url, data, pattern)[-1]`
Add HagarTheHorrible 2013-03-26 16:35:10 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HarkAVagrant(BasicScraper):`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`url = 'http://www.harkavagrant.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = bounceStarter`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`stripUrl = url + 'index.php?id=%s'`
			`firstStripUrl = stripUrl % '1'`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,`
			`after='BORDER'))`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`tagre("img", "src", "buttonprevious.png"))`
Read starter parameters from class. This allows to specify starters in a more declarative and dynamic way. 2016-04-12 21:11:39 +00:00			`nextSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +`
			`tagre("img", "src", "buttonnext.png"))`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`help = 'Index format: number'`

Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
			`filename = image_url.rsplit('/', 1)[1]`
			`num = page_url.rsplit('=', 1)[1]`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`return '%s-%s' % (num, filename)`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HavocInc(WordPressScraper):`
Add HavocInc 2019-06-22 05:44:56 +00:00			`url = 'http://www.radiocomix.com/havoc-inc/'`
			`stripUrl = url + 'comic/%s/'`
			`firstStripUrl = stripUrl % 'havoc-cover'`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HeadlessBliss(ComicControlScraper):`
Added Headless Bliss (#146) 2020-01-01 19:53:34 +00:00			`url = 'http://headlessbliss.com/'`


Add Hellkats 2023-06-07 07:05:18 +00:00			`class Hellkats(ParserScraper):`
			`url = 'https://poecatcomix.com/hellkatscomic/'`
			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % 'hellkats-issue-1-cover'`
			`imageSearch = '//img[@class="scale-with-grid wp-post-image"]'`
			`prevSearch = '//a[d:class("fixed-nav-prev")]'`
			`latestSearch = '//div[@class="post-title"]//a'`
			`starter = indirectStarter`
			`adult = True`

			`def namer(self, imageUrl, pageUrl):`
			`return pageUrl.rsplit('/', 2)[1] + '.' + imageUrl.rsplit('.', 1)[-1]`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HeyFox(WordPressScraper):`
Add HeyFox 2019-07-13 03:47:26 +00:00			`url = 'http://www.steamclaw.com/heyfox/'`
			`stripUrl = url + 'archives/comic/%s'`
			`firstStripUrl = stripUrl % '11092004'`
			`adult = True`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HeyKitty(WordPressScraper):`
Add HeyKitty 2019-07-13 04:50:09 +00:00			`url = 'http://heykittycomic.com/'`
			`stripUrl = url + '?comic=%s'`
			`firstStripUrl = stripUrl % 'it-begins'`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class Hipsters(WordPressScraper):`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`url = 'http://www.hipsters-comic.com/'`
			`firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/'`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HijinksEnsue(WordPressNaviIn):`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`url = 'http://hijinksensue.com/'`
			`latestSearch = '//a[text()="Latest HijiNKS ENSUE"]'`
			`firstStripUrl = 'http://hijinksensue.com/comic/who-is-your-daddy-and-what-does-he-do/'`
			`starter = indirectStarter`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HijinksEnsueClassic(WordPressNaviIn):`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`url = 'http://hijinksensue.com/comic/open-your-eyes/'`
			`firstStripUrl = 'http://hijinksensue.com/comic/a-soul-as-black-as-eyeliner/'`
			`endOfLife = True`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HijinksEnsueConvention(WordPressNaviIn):`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-sketches-part-4/'`
			`firstStripUrl = 'http://hijinksensue.com/comic/whatever-dad-im-outta-here/'`
			`endOfLife = True`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HijinksEnsuePhoto(WordPressNaviIn):`
Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-photo-comic-part-2/'`
			`firstStripUrl = 'http://hijinksensue.com/comic/san-diego-comic-con-fancy-picto-comic-pt-1/'`
			`endOfLife = True`
Add HowToBeAWerewolf 2019-06-19 03:50:58 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class HowToBeAWerewolf(ComicControlScraper):`
Add HowToBeAWerewolf 2019-06-19 03:50:58 +00:00			`url = 'http://howtobeawerewolf.com/'`
			`stripUrl = url + 'comic/%s'`
			`firstStripUrl = stripUrl % 'coming-february-3rd'`

			`def namer(self, imageUrl, pageUrl):`
			`filename = imageUrl.rsplit('/', 1)[-1]`
			`if filename[0].isdigit():`
			`filename = filename.split('-', 1)[1]`
			`return filename`