Fix indexed retrieval.

2012-10-11 19:53:37 +02:00 · 2012-10-11 19:53:37 +02:00 · 06008d4266
commit 06008d4266
parent 78f44e9d9c
3 changed files with 48 additions and 15 deletions
--- a/27
+++ b/27
@ -64,13 +64,16 @@ def setOutputInfo(options):
 def saveComicStrip(strip, basepath):
    """Save a comic strip which can consist of multiple images."""
    errors = 0
+    allskipped = True
    for image in strip.getImages():
        try:
-            image.save(basepath)
+            filename, saved = image.save(basepath)
+            if saved:
+                allskipped = False
        except IOError, msg:
            out.write('Error saving %s: %s' % (image.filename, msg))
            errors += 1
-    return errors
+    return errors, allskipped


 def displayHelp(comics, basepath):
@ -88,13 +91,19 @@ def getComics(options, comics):
    for scraperobj in getScrapers(comics, options.basepath):
        out.context = scraperobj.get_name()
        if options.all:
-            out.write('Retrieving all strips...')
            strips = scraperobj.getAllStrips()
        else:
-            out.write('Retrieving the current strip...')
-            strips = [scraperobj.getCurrentStrip()]
+            strips = scraperobj.getCurrentStrips()
+        first = True
        for strip in strips:
-            errors += saveComicStrip(strip, options.basepath)
+            _errors, skipped = saveComicStrip(strip, options.basepath)
+            errors += _errors
+            if not first and scraperobj.indexes:
+                # stop when indexed retrieval skipped all images for one
+                # comie strip (except the first one)
+                out.write("Stop retrieval because image file already exists")
+                break
+            first = False
    events.handler.end()
    return errors

@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None):
        for comic in comics:
            if ':' in comic:
                name, index = comic.split(':', 1)
-                indices = index.split(',')
+                indexes = index.split(',')
            else:
                name = comic
-                indices = None
-            yield scraper.get_scraper(name)(indices=indices)
+                indexes = None
+            yield scraper.get_scraper(name)(indexes=indexes)

 def main():
    """Parse options and execute commands."""
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@ -51,7 +51,6 @@ class ComicImage(object):

    def connect(self):
        """Connect to host and get meta information."""
-        out.write('Getting headers for %s...' % (self.url,), 2)
        try:
            self.urlobj = urlopen(self.url, referrer=self.referrer)
        except urllib2.HTTPError, he:
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -5,6 +5,7 @@ import os
 from . import loader
 from .util import fetchUrls
 from .comic import ComicStrip
+from .output import out

 disabled = []
 def init_disabled():
@ -37,14 +38,23 @@ class _BasicScraper(object):
    '''
    help = 'Sorry, no help for this comic yet.'

-    def __init__(self, indices=None):
+    def __init__(self, indexes=None):
        """Initialize internal variables."""
        self.urls = set()
-        self.indices = indices
+        self.indexes = indexes

-    def getCurrentStrip(self):
+    def getCurrentStrips(self):
        """Get current comic strip."""
-        return self.getStrip(self.getLatestUrl())
+        msg = 'Retrieving the current strip'
+        if self.indexes:
+            msg += " for indexes %s" % self.indexes
+        out.write(msg+"...")
+        if self.indexes:
+            for index in self.indexes:
+                url = self.imageUrl % index
+                yield self.getStrip(url)
+        else:
+            yield self.getStrip(self.getLatestUrl())

    def getStrip(self, url):
        """Get comic strip for given URL."""
@ -57,8 +67,23 @@ class _BasicScraper(object):

    def getAllStrips(self):
        """Get all comic strips."""
+        msg = 'Retrieving all strips'
+        if self.indexes:
+            msg += " for indexes %s" % self.indexes
+        out.write(msg+"...")
+        if self.indexes:
+            for index in self.indexes:
+                url = self.imageUrl % index
+                for strip in self.getAllStripsFor(url):
+                    yield strip
+        else:
+            url = self.getLatestUrl()
+            for strip in self.getAllStripsFor(url):
+                yield strip
+
+    def getAllStripsFor(self, url):
+        """Get all comic strips for an URL."""
        seen_urls = set()
-        url = self.getLatestUrl()
        while url:
            imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
            seen_urls.add(url)