Fix indexed retrieval.

This commit is contained in:
Bastian Kleineidam 2012-10-11 19:53:37 +02:00
parent 78f44e9d9c
commit 06008d4266
3 changed files with 48 additions and 15 deletions

27
dosage
View file

@ -64,13 +64,16 @@ def setOutputInfo(options):
def saveComicStrip(strip, basepath): def saveComicStrip(strip, basepath):
"""Save a comic strip which can consist of multiple images.""" """Save a comic strip which can consist of multiple images."""
errors = 0 errors = 0
allskipped = True
for image in strip.getImages(): for image in strip.getImages():
try: try:
image.save(basepath) filename, saved = image.save(basepath)
if saved:
allskipped = False
except IOError, msg: except IOError, msg:
out.write('Error saving %s: %s' % (image.filename, msg)) out.write('Error saving %s: %s' % (image.filename, msg))
errors += 1 errors += 1
return errors return errors, allskipped
def displayHelp(comics, basepath): def displayHelp(comics, basepath):
@ -88,13 +91,19 @@ def getComics(options, comics):
for scraperobj in getScrapers(comics, options.basepath): for scraperobj in getScrapers(comics, options.basepath):
out.context = scraperobj.get_name() out.context = scraperobj.get_name()
if options.all: if options.all:
out.write('Retrieving all strips...')
strips = scraperobj.getAllStrips() strips = scraperobj.getAllStrips()
else: else:
out.write('Retrieving the current strip...') strips = scraperobj.getCurrentStrips()
strips = [scraperobj.getCurrentStrip()] first = True
for strip in strips: for strip in strips:
errors += saveComicStrip(strip, options.basepath) _errors, skipped = saveComicStrip(strip, options.basepath)
errors += _errors
if not first and scraperobj.indexes:
# stop when indexed retrieval skipped all images for one
# comie strip (except the first one)
out.write("Stop retrieval because image file already exists")
break
first = False
events.handler.end() events.handler.end()
return errors return errors
@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None):
for comic in comics: for comic in comics:
if ':' in comic: if ':' in comic:
name, index = comic.split(':', 1) name, index = comic.split(':', 1)
indices = index.split(',') indexes = index.split(',')
else: else:
name = comic name = comic
indices = None indexes = None
yield scraper.get_scraper(name)(indices=indices) yield scraper.get_scraper(name)(indexes=indexes)
def main(): def main():
"""Parse options and execute commands.""" """Parse options and execute commands."""

View file

@ -51,7 +51,6 @@ class ComicImage(object):
def connect(self): def connect(self):
"""Connect to host and get meta information.""" """Connect to host and get meta information."""
out.write('Getting headers for %s...' % (self.url,), 2)
try: try:
self.urlobj = urlopen(self.url, referrer=self.referrer) self.urlobj = urlopen(self.url, referrer=self.referrer)
except urllib2.HTTPError, he: except urllib2.HTTPError, he:

View file

@ -5,6 +5,7 @@ import os
from . import loader from . import loader
from .util import fetchUrls from .util import fetchUrls
from .comic import ComicStrip from .comic import ComicStrip
from .output import out
disabled = [] disabled = []
def init_disabled(): def init_disabled():
@ -37,14 +38,23 @@ class _BasicScraper(object):
''' '''
help = 'Sorry, no help for this comic yet.' help = 'Sorry, no help for this comic yet.'
def __init__(self, indices=None): def __init__(self, indexes=None):
"""Initialize internal variables.""" """Initialize internal variables."""
self.urls = set() self.urls = set()
self.indices = indices self.indexes = indexes
def getCurrentStrip(self): def getCurrentStrips(self):
"""Get current comic strip.""" """Get current comic strip."""
return self.getStrip(self.getLatestUrl()) msg = 'Retrieving the current strip'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.imageUrl % index
yield self.getStrip(url)
else:
yield self.getStrip(self.getLatestUrl())
def getStrip(self, url): def getStrip(self, url):
"""Get comic strip for given URL.""" """Get comic strip for given URL."""
@ -57,8 +67,23 @@ class _BasicScraper(object):
def getAllStrips(self): def getAllStrips(self):
"""Get all comic strips.""" """Get all comic strips."""
seen_urls = set() msg = 'Retrieving all strips'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.imageUrl % index
for strip in self.getAllStripsFor(url):
yield strip
else:
url = self.getLatestUrl() url = self.getLatestUrl()
for strip in self.getAllStripsFor(url):
yield strip
def getAllStripsFor(self, url):
"""Get all comic strips for an URL."""
seen_urls = set()
while url: while url:
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch) imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
seen_urls.add(url) seen_urls.add(url)