From 06008d4266ede9c7c17356c94e7f23745e823520 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 11 Oct 2012 19:53:37 +0200 Subject: [PATCH] Fix indexed retrieval. --- dosage | 27 ++++++++++++++++++--------- dosagelib/comic.py | 1 - dosagelib/scraper.py | 35 ++++++++++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/dosage b/dosage index 5c1c1d96f..483ca53db 100755 --- a/dosage +++ b/dosage @@ -64,13 +64,16 @@ def setOutputInfo(options): def saveComicStrip(strip, basepath): """Save a comic strip which can consist of multiple images.""" errors = 0 + allskipped = True for image in strip.getImages(): try: - image.save(basepath) + filename, saved = image.save(basepath) + if saved: + allskipped = False except IOError, msg: out.write('Error saving %s: %s' % (image.filename, msg)) errors += 1 - return errors + return errors, allskipped def displayHelp(comics, basepath): @@ -88,13 +91,19 @@ def getComics(options, comics): for scraperobj in getScrapers(comics, options.basepath): out.context = scraperobj.get_name() if options.all: - out.write('Retrieving all strips...') strips = scraperobj.getAllStrips() else: - out.write('Retrieving the current strip...') - strips = [scraperobj.getCurrentStrip()] + strips = scraperobj.getCurrentStrips() + first = True for strip in strips: - errors += saveComicStrip(strip, options.basepath) + _errors, skipped = saveComicStrip(strip, options.basepath) + errors += _errors + if not first and scraperobj.indexes: + # stop when indexed retrieval skipped all images for one + # comie strip (except the first one) + out.write("Stop retrieval because image file already exists") + break + first = False events.handler.end() return errors @@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None): for comic in comics: if ':' in comic: name, index = comic.split(':', 1) - indices = index.split(',') + indexes = index.split(',') else: name = comic - indices = None - yield scraper.get_scraper(name)(indices=indices) + indexes = None + yield scraper.get_scraper(name)(indexes=indexes) def main(): """Parse options and execute commands.""" diff --git a/dosagelib/comic.py b/dosagelib/comic.py index c2a050b93..0dbc62447 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -51,7 +51,6 @@ class ComicImage(object): def connect(self): """Connect to host and get meta information.""" - out.write('Getting headers for %s...' % (self.url,), 2) try: self.urlobj = urlopen(self.url, referrer=self.referrer) except urllib2.HTTPError, he: diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index a94590927..5b39ef346 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -5,6 +5,7 @@ import os from . import loader from .util import fetchUrls from .comic import ComicStrip +from .output import out disabled = [] def init_disabled(): @@ -37,14 +38,23 @@ class _BasicScraper(object): ''' help = 'Sorry, no help for this comic yet.' - def __init__(self, indices=None): + def __init__(self, indexes=None): """Initialize internal variables.""" self.urls = set() - self.indices = indices + self.indexes = indexes - def getCurrentStrip(self): + def getCurrentStrips(self): """Get current comic strip.""" - return self.getStrip(self.getLatestUrl()) + msg = 'Retrieving the current strip' + if self.indexes: + msg += " for indexes %s" % self.indexes + out.write(msg+"...") + if self.indexes: + for index in self.indexes: + url = self.imageUrl % index + yield self.getStrip(url) + else: + yield self.getStrip(self.getLatestUrl()) def getStrip(self, url): """Get comic strip for given URL.""" @@ -57,8 +67,23 @@ class _BasicScraper(object): def getAllStrips(self): """Get all comic strips.""" + msg = 'Retrieving all strips' + if self.indexes: + msg += " for indexes %s" % self.indexes + out.write(msg+"...") + if self.indexes: + for index in self.indexes: + url = self.imageUrl % index + for strip in self.getAllStripsFor(url): + yield strip + else: + url = self.getLatestUrl() + for strip in self.getAllStripsFor(url): + yield strip + + def getAllStripsFor(self, url): + """Get all comic strips for an URL.""" seen_urls = set() - url = self.getLatestUrl() while url: imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch) seen_urls.add(url)