Add GeneralProtectionFault (disallowed by robots.txt)
This commit is contained in:
parent
f20df8b692
commit
96fc129fea
1 changed files with 19 additions and 0 deletions
|
@ -64,6 +64,25 @@ class GeeksNextDoor(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
# disallowed by robots.txt
|
||||||
|
class _GeneralProtectionFault(_BasicScraper):
|
||||||
|
description = u'General Protection Fault'
|
||||||
|
url = 'http://www.gpf-comics.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + 'archive/%s'
|
||||||
|
firstStripUrl = stripUrl % '1998/11/02'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]*)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
|
||||||
|
tagre("img", "alt", "Previous Comic"))
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
"""Remove random stuff from filename."""
|
||||||
|
imageName = imageUrl.split('/')[-1]
|
||||||
|
return imageName[:11] + imageName[-4:]
|
||||||
|
|
||||||
|
|
||||||
class GirlsWithSlingshots(_BasicScraper):
|
class GirlsWithSlingshots(_BasicScraper):
|
||||||
url = 'http://www.girlswithslingshots.com/'
|
url = 'http://www.girlswithslingshots.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
|
Loading…
Reference in a new issue