Fix PennyArcade

This commit is contained in:
Bastian Kleineidam 2014-03-26 19:59:42 +01:00
parent 323a9f1959
commit 4bb31953ad
2 changed files with 30 additions and 5 deletions

View file

@ -1,3 +1,10 @@
Dosage 2.14 (released xx.xx.2014)
Fixes:
- comics: Fixed PennyArcade
Closes: GH bug #62
Dosage 2.13 (released 3.3.2014) Dosage 2.13 (released 3.3.2014)
Features: Features:

View file

@ -5,7 +5,7 @@
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre from ..util import tagre, fetchUrl, getPageContent
class PandyLand(_BasicScraper): class PandyLand(_BasicScraper):
@ -83,14 +83,32 @@ class PennyAndAggie(_BasicScraper):
class PennyArcade(_BasicScraper): class PennyArcade(_BasicScraper):
url = 'http://penny-arcade.com/comic/' url = 'http://penny-arcade.com/comic/'
rurl = escape(url) rurl = escape(url)
starter = bounceStarter(url,
compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext"))
)
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1998/11/18' firstStripUrl = stripUrl % '1998/11/18'
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev"))
help = 'Index format: yyyy/mm/dd' nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext"))
help = 'Index format: yyyy/mm/dd/'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3)
try:
int(dd)
except ValueError:
# URL has form yyyy/mm/dd/stripname
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl
@classmethod
def starter(cls):
"""Get bounced start URL."""
data, baseUrl = getPageContent(cls.url, cls.session)
url1 = fetchUrl(cls.url, data, baseUrl, cls.prevSearch)
data, baseUrl = getPageContent(url1, cls.session)
url2 = fetchUrl(url1, data, baseUrl, cls.nextSearch)
return cls.prevUrlModifier(url2)
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):