Provide page data in shouldSkipUrl() function

This commit is contained in:
Bastian Kleineidam 2014-02-10 21:58:09 +01:00
parent 73e1af7aba
commit 875e431edc
9 changed files with 13 additions and 13 deletions

View file

@ -223,7 +223,7 @@ class AmazingSuperPowers(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
# video
@ -326,7 +326,7 @@ class AstronomyPOTD(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>")
help = 'Index format: yymmdd'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '130217', # video

View file

@ -94,7 +94,7 @@ class Eriadan(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
)

View file

@ -113,7 +113,7 @@ class FonFlatter(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % "2006/11/30/adventskalender",
self.stripUrl % "2006/09/21/danke",

View file

@ -120,7 +120,7 @@ class PHDComics(_BasicScraper):
tagre("img", "src", r'(?:comics/)?images/prev_button\.gif', quote=""))
help = 'Index format: number'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '1669', # video

View file

@ -305,7 +305,7 @@ class SMBC(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)#comic', after="backRollover"))
help = 'Index format: nnnn'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip promo or missing update pages."""
return url in (
self.stripUrl % '2865',
@ -347,7 +347,7 @@ class SnowFlakes(_BasicScraper):
ext = imageUrl.rsplit('.', 1)[1]
return "SnowFlakes-%d.%s" % (index, ext)
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % ('279', '2'), # no comic
@ -526,7 +526,7 @@ class StuffNoOneToldMe(_BasicScraper):
parts, imagename = imageUrl.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '2012/08/self-rant', # no comic

View file

@ -158,7 +158,7 @@ class ToonHole(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)

View file

@ -54,7 +54,7 @@ class WebDesignerCOTW(_BasicScraper):
help = 'Index format: yyyy/mm/stripname'
description = u"The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist whos one of the most published, prolific and versatile cartoonists in the world today."
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Skip non-comic URLs."""
return 'comics-of-the-week' not in url

View file

@ -80,7 +80,7 @@ class Zwarwald(_BasicScraper):
help = 'Index format: number'
waitSeconds = 1
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Some pages have flash content."""
return url in (
self.stripUrl % "112",

View file

@ -102,7 +102,7 @@ class _BasicScraper(object):
"""Get hash value from name and index list."""
return hash((self.getName(), self.indexes))
def shouldSkipUrl(self, url):
def shouldSkipUrl(self, url, data):
"""Determine if search for images in given URL should be skipped."""
return False
@ -163,7 +163,7 @@ class _BasicScraper(object):
while url:
out.info(u'Get strip URL %s' % url, level=1)
data, baseUrl = getPageContent(url, self.session)
if self.shouldSkipUrl(url):
if self.shouldSkipUrl(url, data):
out.info(u'Skipping URL %s' % url)
self.skippedUrls.add(url)
else: