TheThinHLine: fetch bigger images and name image files from sequence number.
This commit is contained in:
parent
4845a4ccc1
commit
f194e430bc
2 changed files with 22 additions and 6 deletions
|
@ -1,8 +1,8 @@
|
||||||
Dosage 2.15 (released xx.xx.2014)
|
Dosage 2.15 (released xx.xx.2014)
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- comics: Added Whomp.
|
- comics: Added TheThinHLine, Whomp.
|
||||||
Closes: GH bug #64
|
Closes: GH bug #64, #67
|
||||||
|
|
||||||
Fixes:
|
Fixes:
|
||||||
- comics: Fixed DungeonsAndDenizens, GirlGenius, GirlsWithSlingshots,
|
- comics: Fixed DungeonsAndDenizens, GirlGenius, GirlsWithSlingshots,
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from ..util import tagre
|
from ..util import tagre, fetchUrl, getPageContent
|
||||||
|
|
||||||
|
|
||||||
class TheBrads(_BasicScraper):
|
class TheBrads(_BasicScraper):
|
||||||
|
@ -213,13 +213,29 @@ class TheOuterQuarter(_BasicScraper):
|
||||||
class TheThinHLine(_BasicScraper):
|
class TheThinHLine(_BasicScraper):
|
||||||
description = u'the thin H line. Proudly mediocre. NSFW.'
|
description = u'the thin H line. Proudly mediocre. NSFW.'
|
||||||
url = 'http://thinhline.tumblr.com/'
|
url = 'http://thinhline.tumblr.com/'
|
||||||
|
rurl = escape(url)
|
||||||
stripUrl = url + 'post/%s'
|
stripUrl = url + 'post/%s'
|
||||||
firstStripUrl = stripUrl % '3517345105'
|
firstStripUrl = stripUrl % '3517345105'
|
||||||
imageSearch = compile(tagre('a', 'href', '%simage[^"]+' % url) + tagre('img', 'src', '([^"]+media.tumblr.com/[^"]+)'))
|
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
|
||||||
prevSearch = compile(r'<a href="([^"]+)">></a>')
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
||||||
starter = indirectStarter(url, compile(r'<a href="([^"]+)" class="timestamp"'))
|
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
||||||
|
|
||||||
|
def getComicStrip(self, url, data, baseUrl):
|
||||||
|
"""The comic strip image is in a separate page."""
|
||||||
|
pageUrl = fetchUrl(url, data, baseUrl, self.indirectImageSearch)
|
||||||
|
pageData, pageBaseUrl = getPageContent(pageUrl, self.session)
|
||||||
|
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData, pageBaseUrl)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
"""Remove trailing digit from day number."""
|
||||||
|
num = pageUrl.split('/')[-1]
|
||||||
|
ext = imageUrl.rsplit('.', 1)[1]
|
||||||
|
return "thethinhline-%s.%s" % (num, ext)
|
||||||
|
|
||||||
|
|
||||||
class ThreePanelSoul(_BasicScraper):
|
class ThreePanelSoul(_BasicScraper):
|
||||||
url = 'http://threepanelsoul.com/'
|
url = 'http://threepanelsoul.com/'
|
||||||
|
|
Loading…
Reference in a new issue