Fix indexed retrieval.
This commit is contained in:
parent
78f44e9d9c
commit
06008d4266
3 changed files with 48 additions and 15 deletions
27
dosage
27
dosage
|
@ -64,13 +64,16 @@ def setOutputInfo(options):
|
||||||
def saveComicStrip(strip, basepath):
|
def saveComicStrip(strip, basepath):
|
||||||
"""Save a comic strip which can consist of multiple images."""
|
"""Save a comic strip which can consist of multiple images."""
|
||||||
errors = 0
|
errors = 0
|
||||||
|
allskipped = True
|
||||||
for image in strip.getImages():
|
for image in strip.getImages():
|
||||||
try:
|
try:
|
||||||
image.save(basepath)
|
filename, saved = image.save(basepath)
|
||||||
|
if saved:
|
||||||
|
allskipped = False
|
||||||
except IOError, msg:
|
except IOError, msg:
|
||||||
out.write('Error saving %s: %s' % (image.filename, msg))
|
out.write('Error saving %s: %s' % (image.filename, msg))
|
||||||
errors += 1
|
errors += 1
|
||||||
return errors
|
return errors, allskipped
|
||||||
|
|
||||||
|
|
||||||
def displayHelp(comics, basepath):
|
def displayHelp(comics, basepath):
|
||||||
|
@ -88,13 +91,19 @@ def getComics(options, comics):
|
||||||
for scraperobj in getScrapers(comics, options.basepath):
|
for scraperobj in getScrapers(comics, options.basepath):
|
||||||
out.context = scraperobj.get_name()
|
out.context = scraperobj.get_name()
|
||||||
if options.all:
|
if options.all:
|
||||||
out.write('Retrieving all strips...')
|
|
||||||
strips = scraperobj.getAllStrips()
|
strips = scraperobj.getAllStrips()
|
||||||
else:
|
else:
|
||||||
out.write('Retrieving the current strip...')
|
strips = scraperobj.getCurrentStrips()
|
||||||
strips = [scraperobj.getCurrentStrip()]
|
first = True
|
||||||
for strip in strips:
|
for strip in strips:
|
||||||
errors += saveComicStrip(strip, options.basepath)
|
_errors, skipped = saveComicStrip(strip, options.basepath)
|
||||||
|
errors += _errors
|
||||||
|
if not first and scraperobj.indexes:
|
||||||
|
# stop when indexed retrieval skipped all images for one
|
||||||
|
# comie strip (except the first one)
|
||||||
|
out.write("Stop retrieval because image file already exists")
|
||||||
|
break
|
||||||
|
first = False
|
||||||
events.handler.end()
|
events.handler.end()
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
@ -161,11 +170,11 @@ def getScrapers(comics, basepath=None):
|
||||||
for comic in comics:
|
for comic in comics:
|
||||||
if ':' in comic:
|
if ':' in comic:
|
||||||
name, index = comic.split(':', 1)
|
name, index = comic.split(':', 1)
|
||||||
indices = index.split(',')
|
indexes = index.split(',')
|
||||||
else:
|
else:
|
||||||
name = comic
|
name = comic
|
||||||
indices = None
|
indexes = None
|
||||||
yield scraper.get_scraper(name)(indices=indices)
|
yield scraper.get_scraper(name)(indexes=indexes)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Parse options and execute commands."""
|
"""Parse options and execute commands."""
|
||||||
|
|
|
@ -51,7 +51,6 @@ class ComicImage(object):
|
||||||
|
|
||||||
def connect(self):
|
def connect(self):
|
||||||
"""Connect to host and get meta information."""
|
"""Connect to host and get meta information."""
|
||||||
out.write('Getting headers for %s...' % (self.url,), 2)
|
|
||||||
try:
|
try:
|
||||||
self.urlobj = urlopen(self.url, referrer=self.referrer)
|
self.urlobj = urlopen(self.url, referrer=self.referrer)
|
||||||
except urllib2.HTTPError, he:
|
except urllib2.HTTPError, he:
|
||||||
|
|
|
@ -5,6 +5,7 @@ import os
|
||||||
from . import loader
|
from . import loader
|
||||||
from .util import fetchUrls
|
from .util import fetchUrls
|
||||||
from .comic import ComicStrip
|
from .comic import ComicStrip
|
||||||
|
from .output import out
|
||||||
|
|
||||||
disabled = []
|
disabled = []
|
||||||
def init_disabled():
|
def init_disabled():
|
||||||
|
@ -37,14 +38,23 @@ class _BasicScraper(object):
|
||||||
'''
|
'''
|
||||||
help = 'Sorry, no help for this comic yet.'
|
help = 'Sorry, no help for this comic yet.'
|
||||||
|
|
||||||
def __init__(self, indices=None):
|
def __init__(self, indexes=None):
|
||||||
"""Initialize internal variables."""
|
"""Initialize internal variables."""
|
||||||
self.urls = set()
|
self.urls = set()
|
||||||
self.indices = indices
|
self.indexes = indexes
|
||||||
|
|
||||||
def getCurrentStrip(self):
|
def getCurrentStrips(self):
|
||||||
"""Get current comic strip."""
|
"""Get current comic strip."""
|
||||||
return self.getStrip(self.getLatestUrl())
|
msg = 'Retrieving the current strip'
|
||||||
|
if self.indexes:
|
||||||
|
msg += " for indexes %s" % self.indexes
|
||||||
|
out.write(msg+"...")
|
||||||
|
if self.indexes:
|
||||||
|
for index in self.indexes:
|
||||||
|
url = self.imageUrl % index
|
||||||
|
yield self.getStrip(url)
|
||||||
|
else:
|
||||||
|
yield self.getStrip(self.getLatestUrl())
|
||||||
|
|
||||||
def getStrip(self, url):
|
def getStrip(self, url):
|
||||||
"""Get comic strip for given URL."""
|
"""Get comic strip for given URL."""
|
||||||
|
@ -57,8 +67,23 @@ class _BasicScraper(object):
|
||||||
|
|
||||||
def getAllStrips(self):
|
def getAllStrips(self):
|
||||||
"""Get all comic strips."""
|
"""Get all comic strips."""
|
||||||
seen_urls = set()
|
msg = 'Retrieving all strips'
|
||||||
|
if self.indexes:
|
||||||
|
msg += " for indexes %s" % self.indexes
|
||||||
|
out.write(msg+"...")
|
||||||
|
if self.indexes:
|
||||||
|
for index in self.indexes:
|
||||||
|
url = self.imageUrl % index
|
||||||
|
for strip in self.getAllStripsFor(url):
|
||||||
|
yield strip
|
||||||
|
else:
|
||||||
url = self.getLatestUrl()
|
url = self.getLatestUrl()
|
||||||
|
for strip in self.getAllStripsFor(url):
|
||||||
|
yield strip
|
||||||
|
|
||||||
|
def getAllStripsFor(self, url):
|
||||||
|
"""Get all comic strips for an URL."""
|
||||||
|
seen_urls = set()
|
||||||
while url:
|
while url:
|
||||||
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
|
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
|
||||||
seen_urls.add(url)
|
seen_urls.add(url)
|
||||||
|
|
Loading…
Reference in a new issue