From f194e430bcd1a5b2c9d10a522b45d620de055250 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 3 Jul 2014 18:41:25 +0200 Subject: [PATCH] TheThinHLine: fetch bigger images and name image files from sequence number. --- doc/changelog.txt | 4 ++-- dosagelib/plugins/t.py | 24 ++++++++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 758f322dc..66d118f85 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,8 +1,8 @@ Dosage 2.15 (released xx.xx.2014) Features: -- comics: Added Whomp. - Closes: GH bug #64 +- comics: Added TheThinHLine, Whomp. + Closes: GH bug #64, #67 Fixes: - comics: Fixed DungeonsAndDenizens, GirlGenius, GirlsWithSlingshots, diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index a23bc454f..71d0368ad 100755 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -5,7 +5,7 @@ from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper from ..helpers import indirectStarter -from ..util import tagre +from ..util import tagre, fetchUrl, getPageContent class TheBrads(_BasicScraper): @@ -213,13 +213,29 @@ class TheOuterQuarter(_BasicScraper): class TheThinHLine(_BasicScraper): description = u'the thin H line. Proudly mediocre. NSFW.' url = 'http://thinhline.tumblr.com/' + rurl = escape(url) stripUrl = url + 'post/%s' firstStripUrl = stripUrl % '3517345105' - imageSearch = compile(tagre('a', 'href', '%simage[^"]+' % url) + tagre('img', 'src', '([^"]+media.tumblr.com/[^"]+)')) - prevSearch = compile(r'>') - starter = indirectStarter(url, compile(r'') + starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"'))) adult = True + indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl)) + + def getComicStrip(self, url, data, baseUrl): + """The comic strip image is in a separate page.""" + pageUrl = fetchUrl(url, data, baseUrl, self.indirectImageSearch) + pageData, pageBaseUrl = getPageContent(pageUrl, self.session) + return super(TheThinHLine, self).getComicStrip(pageUrl, pageData, pageBaseUrl) + + @classmethod + def namer(cls, imageUrl, pageUrl): + """Remove trailing digit from day number.""" + num = pageUrl.split('/')[-1] + ext = imageUrl.rsplit('.', 1)[1] + return "thethinhline-%s.%s" % (num, ext) + class ThreePanelSoul(_BasicScraper): url = 'http://threepanelsoul.com/'