diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 0e124ef77..50e3ecf39 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -59,6 +59,7 @@ class _BasicScraper(object): else: self.indexes = tuple() self.skippedUrls = set() + self.hitFirstStripUrl = False def __cmp__(self, other): """Compare scraper by name and index list.""" @@ -137,6 +138,7 @@ class _BasicScraper(object): def getStripsFor(self, url, maxstrips): """Get comic strips for an URL. If maxstrips is a positive number, stop after retrieving the given number of strips.""" + self.hitFirstStripUrl = False seen_urls = set() while url: data, baseUrl = getPageContent(url, self.session) @@ -147,6 +149,7 @@ class _BasicScraper(object): yield self.getComicStrip(url, imageUrls) if self.firstStripUrl == url: out.debug("Stop at first URL %s" % url) + self.hitFirstStripUrl = True break prevUrl = None if self.prevSearch: diff --git a/tests/test_comics.py b/tests/test_comics.py index bef6a9e3a..0c2ced96d 100644 --- a/tests/test_comics.py +++ b/tests/test_comics.py @@ -46,7 +46,7 @@ class _ComicTester(TestCase): if num > 0 and self.scraperclass.prevUrlMatchesStripUrl: self.check_stripurl(strip) num += 1 - if self.scraperclass.prevSearch: + if self.scraperclass.prevSearch and not scraperobj.hitFirstStripUrl: self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern at %s.' % (num, strip.stripUrl)) # Check that exactly or for multiple pages at least 5 images are saved. # This is different than the image number check above since it checks saved files,