2020-04-18 11:45:44 +00:00
|
|
|
# SPDX-License-Identifier: MIT
|
2016-10-28 22:21:41 +00:00
|
|
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
2014-01-05 15:50:57 +00:00
|
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
2020-01-13 06:34:05 +00:00
|
|
|
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
|
|
|
# Copyright (C) 2019-2020 Daniel Ring
|
2013-04-10 16:19:11 +00:00
|
|
|
from re import compile, escape
|
2016-05-01 23:25:34 +00:00
|
|
|
|
2021-05-20 03:26:34 +00:00
|
|
|
from ..scraper import _BasicScraper, _ParserScraper
|
2014-07-23 18:53:59 +00:00
|
|
|
from ..util import tagre
|
2016-05-01 23:25:34 +00:00
|
|
|
from ..helpers import bounceStarter, indirectStarter
|
2019-06-19 03:50:58 +00:00
|
|
|
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
|
2013-03-06 19:00:30 +00:00
|
|
|
|
|
|
|
|
2021-05-20 03:26:34 +00:00
|
|
|
class Hackles(_ParserScraper):
|
|
|
|
url = 'http://hackles.org/'
|
|
|
|
stripUrl = url + 'cgi-bin/archives.pl?request=%s'
|
|
|
|
firstStripUrl = stripUrl % '1'
|
|
|
|
imageSearch = '//img[contains(@src, "strips/")]'
|
|
|
|
prevSearch = '//a[text()="< previous"]'
|
|
|
|
endOfLife = True
|
|
|
|
|
|
|
|
|
2013-03-26 16:35:10 +00:00
|
|
|
class HagarTheHorrible(_BasicScraper):
|
|
|
|
url = 'http://www.hagarthehorrible.net/'
|
|
|
|
stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'
|
|
|
|
firstStripUrl = stripUrl % '1'
|
|
|
|
multipleImagesPerStrip = True
|
2013-03-26 19:12:26 +00:00
|
|
|
imageSearch = compile(tagre("img", "src", r'(stripus\d+/(?:Hagar_The_Horrible_?|h)\d+[^ >]+)', quote=""))
|
2013-03-26 16:35:10 +00:00
|
|
|
prevUrl = r'(comicstrips_us\.php\?serietype\=9\&colortype\=1\&serieno\=\d+)'
|
|
|
|
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
|
|
|
|
help = 'Index format: number'
|
|
|
|
|
2016-04-13 18:01:51 +00:00
|
|
|
def starter(self):
|
2013-03-26 16:35:10 +00:00
|
|
|
"""Return last gallery link."""
|
|
|
|
url = 'http://www.hagardunor.net/comics.php'
|
2016-04-13 18:01:51 +00:00
|
|
|
data = self.getPage(url)
|
|
|
|
pattern = compile(tagre("a", "href", self.prevUrl))
|
2020-10-11 18:15:27 +00:00
|
|
|
return self.fetchUrls(url, data, pattern)[-1]
|
2013-03-26 16:35:10 +00:00
|
|
|
|
|
|
|
|
2016-04-12 06:21:06 +00:00
|
|
|
# "Hiatus", navigation missing
|
|
|
|
class _HappyJar(_WordPressScraper):
|
2016-04-01 22:14:31 +00:00
|
|
|
url = 'http://www.happyjar.com/'
|
|
|
|
|
|
|
|
|
2013-03-06 19:00:30 +00:00
|
|
|
class HarkAVagrant(_BasicScraper):
|
|
|
|
url = 'http://www.harkavagrant.com/'
|
2013-04-10 16:19:11 +00:00
|
|
|
rurl = escape(url)
|
2016-04-13 18:01:51 +00:00
|
|
|
starter = bounceStarter
|
2013-03-06 19:00:30 +00:00
|
|
|
stripUrl = url + 'index.php?id=%s'
|
|
|
|
firstStripUrl = stripUrl % '1'
|
2016-04-01 22:14:31 +00:00
|
|
|
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
|
|
|
after='BORDER'))
|
2013-04-10 16:19:11 +00:00
|
|
|
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
2016-04-01 22:14:31 +00:00
|
|
|
tagre("img", "src", "buttonprevious.png"))
|
2016-04-12 21:11:39 +00:00
|
|
|
nextSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
|
|
|
tagre("img", "src", "buttonnext.png"))
|
2013-03-06 19:00:30 +00:00
|
|
|
help = 'Index format: number'
|
|
|
|
|
2016-04-21 06:20:49 +00:00
|
|
|
def namer(self, image_url, page_url):
|
|
|
|
filename = image_url.rsplit('/', 1)[1]
|
|
|
|
num = page_url.rsplit('=', 1)[1]
|
2013-03-06 19:00:30 +00:00
|
|
|
return '%s-%s' % (num, filename)
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
|
2019-06-22 05:44:56 +00:00
|
|
|
class HavocInc(_WordPressScraper):
|
|
|
|
url = 'http://www.radiocomix.com/havoc-inc/'
|
|
|
|
stripUrl = url + 'comic/%s/'
|
|
|
|
firstStripUrl = stripUrl % 'havoc-cover'
|
|
|
|
|
|
|
|
|
2020-01-01 19:53:34 +00:00
|
|
|
class HeadlessBliss(_ComicControlScraper):
|
|
|
|
url = 'http://headlessbliss.com/'
|
|
|
|
|
|
|
|
|
2019-07-13 03:47:26 +00:00
|
|
|
class HeyFox(_WordPressScraper):
|
|
|
|
url = 'http://www.steamclaw.com/heyfox/'
|
|
|
|
stripUrl = url + 'archives/comic/%s'
|
|
|
|
firstStripUrl = stripUrl % '11092004'
|
|
|
|
adult = True
|
|
|
|
|
|
|
|
|
2019-07-13 04:50:09 +00:00
|
|
|
class HeyKitty(_WordPressScraper):
|
|
|
|
url = 'http://heykittycomic.com/'
|
|
|
|
stripUrl = url + '?comic=%s'
|
|
|
|
firstStripUrl = stripUrl % 'it-begins'
|
|
|
|
|
|
|
|
|
2016-04-01 22:14:31 +00:00
|
|
|
class Hipsters(_WordPressScraper):
|
|
|
|
url = 'http://www.hipsters-comic.com/'
|
|
|
|
firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/'
|
2016-05-01 23:25:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
class HijinksEnsue(_WPNaviIn):
|
|
|
|
url = 'http://hijinksensue.com/'
|
|
|
|
latestSearch = '//a[text()="Latest HijiNKS ENSUE"]'
|
|
|
|
firstStripUrl = 'http://hijinksensue.com/comic/who-is-your-daddy-and-what-does-he-do/'
|
|
|
|
starter = indirectStarter
|
|
|
|
|
|
|
|
|
|
|
|
class HijinksEnsueClassic(_WPNaviIn):
|
|
|
|
url = 'http://hijinksensue.com/comic/open-your-eyes/'
|
|
|
|
firstStripUrl = 'http://hijinksensue.com/comic/a-soul-as-black-as-eyeliner/'
|
|
|
|
endOfLife = True
|
|
|
|
|
|
|
|
|
|
|
|
class HijinksEnsueConvention(_WPNaviIn):
|
|
|
|
url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-sketches-part-4/'
|
|
|
|
firstStripUrl = 'http://hijinksensue.com/comic/whatever-dad-im-outta-here/'
|
|
|
|
endOfLife = True
|
|
|
|
|
|
|
|
|
|
|
|
class HijinksEnsuePhoto(_WPNaviIn):
|
|
|
|
url = 'http://hijinksensue.com/comic/emerald-city-comicon-2015-fancy-photo-comic-part-2/'
|
|
|
|
firstStripUrl = 'http://hijinksensue.com/comic/san-diego-comic-con-fancy-picto-comic-pt-1/'
|
|
|
|
endOfLife = True
|
2019-06-19 03:50:58 +00:00
|
|
|
|
|
|
|
|
2019-06-19 03:55:18 +00:00
|
|
|
class Housepets(_WordPressScraper):
|
|
|
|
url = 'http://www.housepetscomic.com/'
|
|
|
|
stripUrl = url + 'comic/%s/'
|
|
|
|
firstStripUrl = '2008/06/02/when-boredom-strikes'
|
|
|
|
|
|
|
|
|
2019-06-19 03:50:58 +00:00
|
|
|
class HowToBeAWerewolf(_ComicControlScraper):
|
|
|
|
url = 'http://howtobeawerewolf.com/'
|
|
|
|
stripUrl = url + 'comic/%s'
|
|
|
|
firstStripUrl = stripUrl % 'coming-february-3rd'
|
|
|
|
|
|
|
|
def namer(self, imageUrl, pageUrl):
|
|
|
|
filename = imageUrl.rsplit('/', 1)[-1]
|
|
|
|
if filename[0].isdigit():
|
|
|
|
filename = filename.split('-', 1)[1]
|
|
|
|
return filename
|