Fix tests which hit the first URL.
This commit is contained in:
parent
d0c3492cc7
commit
f36ed46d6a
2 changed files with 4 additions and 1 deletions
|
@ -59,6 +59,7 @@ class _BasicScraper(object):
|
||||||
else:
|
else:
|
||||||
self.indexes = tuple()
|
self.indexes = tuple()
|
||||||
self.skippedUrls = set()
|
self.skippedUrls = set()
|
||||||
|
self.hitFirstStripUrl = False
|
||||||
|
|
||||||
def __cmp__(self, other):
|
def __cmp__(self, other):
|
||||||
"""Compare scraper by name and index list."""
|
"""Compare scraper by name and index list."""
|
||||||
|
@ -137,6 +138,7 @@ class _BasicScraper(object):
|
||||||
def getStripsFor(self, url, maxstrips):
|
def getStripsFor(self, url, maxstrips):
|
||||||
"""Get comic strips for an URL. If maxstrips is a positive number, stop after
|
"""Get comic strips for an URL. If maxstrips is a positive number, stop after
|
||||||
retrieving the given number of strips."""
|
retrieving the given number of strips."""
|
||||||
|
self.hitFirstStripUrl = False
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
while url:
|
while url:
|
||||||
data, baseUrl = getPageContent(url, self.session)
|
data, baseUrl = getPageContent(url, self.session)
|
||||||
|
@ -147,6 +149,7 @@ class _BasicScraper(object):
|
||||||
yield self.getComicStrip(url, imageUrls)
|
yield self.getComicStrip(url, imageUrls)
|
||||||
if self.firstStripUrl == url:
|
if self.firstStripUrl == url:
|
||||||
out.debug("Stop at first URL %s" % url)
|
out.debug("Stop at first URL %s" % url)
|
||||||
|
self.hitFirstStripUrl = True
|
||||||
break
|
break
|
||||||
prevUrl = None
|
prevUrl = None
|
||||||
if self.prevSearch:
|
if self.prevSearch:
|
||||||
|
|
|
@ -46,7 +46,7 @@ class _ComicTester(TestCase):
|
||||||
if num > 0 and self.scraperclass.prevUrlMatchesStripUrl:
|
if num > 0 and self.scraperclass.prevUrlMatchesStripUrl:
|
||||||
self.check_stripurl(strip)
|
self.check_stripurl(strip)
|
||||||
num += 1
|
num += 1
|
||||||
if self.scraperclass.prevSearch:
|
if self.scraperclass.prevSearch and not scraperobj.hitFirstStripUrl:
|
||||||
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern at %s.' % (num, strip.stripUrl))
|
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern at %s.' % (num, strip.stripUrl))
|
||||||
# Check that exactly or for multiple pages at least 5 images are saved.
|
# Check that exactly or for multiple pages at least 5 images are saved.
|
||||||
# This is different than the image number check above since it checks saved files,
|
# This is different than the image number check above since it checks saved files,
|
||||||
|
|
Loading…
Reference in a new issue