Add GeneralProtectionFault (disallowed by robots.txt)

This commit is contained in:
Bastian Kleineidam 2013-04-25 20:54:48 +02:00
parent f20df8b692
commit 96fc129fea

View file

@ -64,6 +64,25 @@ class GeeksNextDoor(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
# disallowed by robots.txt
class _GeneralProtectionFault(_BasicScraper):
description = u'General Protection Fault'
url = 'http://www.gpf-comics.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s'
firstStripUrl = stripUrl % '1998/11/02'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("img", "alt", "Previous Comic"))
help = 'Index format: yyyy/mm/dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Remove random stuff from filename."""
imageName = imageUrl.split('/')[-1]
return imageName[:11] + imageName[-4:]
class GirlsWithSlingshots(_BasicScraper):
url = 'http://www.girlswithslingshots.com/'
rurl = escape(url)