Fix indexed retrieval.

This commit is contained in:
Bastian Kleineidam 2012-10-11 19:53:37 +02:00
parent 78f44e9d9c
commit 06008d4266
3 changed files with 48 additions and 15 deletions

27
dosage
View file

@ -64,13 +64,16 @@ def setOutputInfo(options):
def saveComicStrip(strip, basepath):
"""Save a comic strip which can consist of multiple images."""
errors = 0
allskipped = True
for image in strip.getImages():
try:
image.save(basepath)
filename, saved = image.save(basepath)
if saved:
allskipped = False
except IOError, msg:
out.write('Error saving %s: %s' % (image.filename, msg))
errors += 1
return errors
return errors, allskipped
def displayHelp(comics, basepath):
@ -88,13 +91,19 @@ def getComics(options, comics):
for scraperobj in getScrapers(comics, options.basepath):
out.context = scraperobj.get_name()
if options.all:
out.write('Retrieving all strips...')
strips = scraperobj.getAllStrips()
else:
out.write('Retrieving the current strip...')
strips = [scraperobj.getCurrentStrip()]
strips = scraperobj.getCurrentStrips()
first = True
for strip in strips:
errors += saveComicStrip(strip, options.basepath)
_errors, skipped = saveComicStrip(strip, options.basepath)
errors += _errors
if not first and scraperobj.indexes:
# stop when indexed retrieval skipped all images for one
# comie strip (except the first one)
out.write("Stop retrieval because image file already exists")
break
first = False
events.handler.end()
return errors
@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None):
for comic in comics:
if ':' in comic:
name, index = comic.split(':', 1)
indices = index.split(',')
indexes = index.split(',')
else:
name = comic
indices = None
yield scraper.get_scraper(name)(indices=indices)
indexes = None
yield scraper.get_scraper(name)(indexes=indexes)
def main():
"""Parse options and execute commands."""

View file

@ -51,7 +51,6 @@ class ComicImage(object):
def connect(self):
"""Connect to host and get meta information."""
out.write('Getting headers for %s...' % (self.url,), 2)
try:
self.urlobj = urlopen(self.url, referrer=self.referrer)
except urllib2.HTTPError, he:

View file

@ -5,6 +5,7 @@ import os
from . import loader
from .util import fetchUrls
from .comic import ComicStrip
from .output import out
disabled = []
def init_disabled():
@ -37,14 +38,23 @@ class _BasicScraper(object):
'''
help = 'Sorry, no help for this comic yet.'
def __init__(self, indices=None):
def __init__(self, indexes=None):
"""Initialize internal variables."""
self.urls = set()
self.indices = indices
self.indexes = indexes
def getCurrentStrip(self):
def getCurrentStrips(self):
"""Get current comic strip."""
return self.getStrip(self.getLatestUrl())
msg = 'Retrieving the current strip'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.imageUrl % index
yield self.getStrip(url)
else:
yield self.getStrip(self.getLatestUrl())
def getStrip(self, url):
"""Get comic strip for given URL."""
@ -57,8 +67,23 @@ class _BasicScraper(object):
def getAllStrips(self):
"""Get all comic strips."""
msg = 'Retrieving all strips'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.imageUrl % index
for strip in self.getAllStripsFor(url):
yield strip
else:
url = self.getLatestUrl()
for strip in self.getAllStripsFor(url):
yield strip
def getAllStripsFor(self, url):
"""Get all comic strips for an URL."""
seen_urls = set()
url = self.getLatestUrl()
while url:
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
seen_urls.add(url)