Fix indexed retrieval.
This commit is contained in:
parent
78f44e9d9c
commit
06008d4266
3 changed files with 48 additions and 15 deletions
27
dosage
27
dosage
|
@ -64,13 +64,16 @@ def setOutputInfo(options):
|
|||
def saveComicStrip(strip, basepath):
|
||||
"""Save a comic strip which can consist of multiple images."""
|
||||
errors = 0
|
||||
allskipped = True
|
||||
for image in strip.getImages():
|
||||
try:
|
||||
image.save(basepath)
|
||||
filename, saved = image.save(basepath)
|
||||
if saved:
|
||||
allskipped = False
|
||||
except IOError, msg:
|
||||
out.write('Error saving %s: %s' % (image.filename, msg))
|
||||
errors += 1
|
||||
return errors
|
||||
return errors, allskipped
|
||||
|
||||
|
||||
def displayHelp(comics, basepath):
|
||||
|
@ -88,13 +91,19 @@ def getComics(options, comics):
|
|||
for scraperobj in getScrapers(comics, options.basepath):
|
||||
out.context = scraperobj.get_name()
|
||||
if options.all:
|
||||
out.write('Retrieving all strips...')
|
||||
strips = scraperobj.getAllStrips()
|
||||
else:
|
||||
out.write('Retrieving the current strip...')
|
||||
strips = [scraperobj.getCurrentStrip()]
|
||||
strips = scraperobj.getCurrentStrips()
|
||||
first = True
|
||||
for strip in strips:
|
||||
errors += saveComicStrip(strip, options.basepath)
|
||||
_errors, skipped = saveComicStrip(strip, options.basepath)
|
||||
errors += _errors
|
||||
if not first and scraperobj.indexes:
|
||||
# stop when indexed retrieval skipped all images for one
|
||||
# comie strip (except the first one)
|
||||
out.write("Stop retrieval because image file already exists")
|
||||
break
|
||||
first = False
|
||||
events.handler.end()
|
||||
return errors
|
||||
|
||||
|
@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None):
|
|||
for comic in comics:
|
||||
if ':' in comic:
|
||||
name, index = comic.split(':', 1)
|
||||
indices = index.split(',')
|
||||
indexes = index.split(',')
|
||||
else:
|
||||
name = comic
|
||||
indices = None
|
||||
yield scraper.get_scraper(name)(indices=indices)
|
||||
indexes = None
|
||||
yield scraper.get_scraper(name)(indexes=indexes)
|
||||
|
||||
def main():
|
||||
"""Parse options and execute commands."""
|
||||
|
|
|
@ -51,7 +51,6 @@ class ComicImage(object):
|
|||
|
||||
def connect(self):
|
||||
"""Connect to host and get meta information."""
|
||||
out.write('Getting headers for %s...' % (self.url,), 2)
|
||||
try:
|
||||
self.urlobj = urlopen(self.url, referrer=self.referrer)
|
||||
except urllib2.HTTPError, he:
|
||||
|
|
|
@ -5,6 +5,7 @@ import os
|
|||
from . import loader
|
||||
from .util import fetchUrls
|
||||
from .comic import ComicStrip
|
||||
from .output import out
|
||||
|
||||
disabled = []
|
||||
def init_disabled():
|
||||
|
@ -37,14 +38,23 @@ class _BasicScraper(object):
|
|||
'''
|
||||
help = 'Sorry, no help for this comic yet.'
|
||||
|
||||
def __init__(self, indices=None):
|
||||
def __init__(self, indexes=None):
|
||||
"""Initialize internal variables."""
|
||||
self.urls = set()
|
||||
self.indices = indices
|
||||
self.indexes = indexes
|
||||
|
||||
def getCurrentStrip(self):
|
||||
def getCurrentStrips(self):
|
||||
"""Get current comic strip."""
|
||||
return self.getStrip(self.getLatestUrl())
|
||||
msg = 'Retrieving the current strip'
|
||||
if self.indexes:
|
||||
msg += " for indexes %s" % self.indexes
|
||||
out.write(msg+"...")
|
||||
if self.indexes:
|
||||
for index in self.indexes:
|
||||
url = self.imageUrl % index
|
||||
yield self.getStrip(url)
|
||||
else:
|
||||
yield self.getStrip(self.getLatestUrl())
|
||||
|
||||
def getStrip(self, url):
|
||||
"""Get comic strip for given URL."""
|
||||
|
@ -57,8 +67,23 @@ class _BasicScraper(object):
|
|||
|
||||
def getAllStrips(self):
|
||||
"""Get all comic strips."""
|
||||
msg = 'Retrieving all strips'
|
||||
if self.indexes:
|
||||
msg += " for indexes %s" % self.indexes
|
||||
out.write(msg+"...")
|
||||
if self.indexes:
|
||||
for index in self.indexes:
|
||||
url = self.imageUrl % index
|
||||
for strip in self.getAllStripsFor(url):
|
||||
yield strip
|
||||
else:
|
||||
url = self.getLatestUrl()
|
||||
for strip in self.getAllStripsFor(url):
|
||||
yield strip
|
||||
|
||||
def getAllStripsFor(self, url):
|
||||
"""Get all comic strips for an URL."""
|
||||
seen_urls = set()
|
||||
url = self.getLatestUrl()
|
||||
while url:
|
||||
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
|
||||
seen_urls.add(url)
|
||||
|
|
Loading…
Reference in a new issue