diff --git a/doc/changelog.txt b/doc/changelog.txt index 3286db979..67d4080f3 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -9,6 +9,7 @@ Fixes: - cmdline: Catch error when piping output to another program or file under Windows. Closes: GH bug #13 +- comics: Retry download on empty content to reduce empty file errors. Dosage 1.12 (released 4.3.2013) diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 6c5bf28b0..d69957ebb 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -1,19 +1,12 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam - import os -import rfc822 -import time from .output import out from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename from .events import getHandler -class FetchComicError(IOError): - """Exception for comic fetching errors.""" - pass - class ComicStrip(object): """A list of comic image URLs.""" @@ -59,8 +52,7 @@ class ComicImage(object): try: self.urlobj = getImageObject(self.url, self.referrer, self.session) except IOError as msg: - raise FetchComicError('Unable to retrieve URL.', self.url, msg) - + raise IOError('error retrieving URL %s: %s' % (self.url, msg)) content_type = unquote(self.urlobj.headers.get('content-type')) content_type = content_type.split(';', 1)[0] if '/' in content_type: @@ -69,48 +61,37 @@ class ComicImage(object): maintype = content_type subtype = None if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'): - raise FetchComicError('Content type %r is not an image.' % content_type, self.url) - + raise IOError('content type %r is not an image at %s' % (content_type, self.url)) # Always use mime type for file extension if it is sane. if maintype == 'image': self.ext = '.' + subtype.replace('jpeg', 'jpg') self.contentLength = int(self.urlobj.headers.get('content-length', 0)) - self.lastModified = self.urlobj.headers.get('last-modified') out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength)) - def touch(self, filename): - """Set last modified date on filename.""" - if self.lastModified: - tt = rfc822.parsedate(self.lastModified) - if tt: - mtime = time.mktime(tt) - os.utime(filename, (mtime, mtime)) - def save(self, basepath): """Save comic URL to filename on disk.""" out.info("Get image URL %s" % self.url, level=1) self.connect() filename = "%s%s" % (self.filename, self.ext) - comicSize = self.contentLength comicDir = os.path.join(basepath, self.dirname) if not os.path.isdir(comicDir): os.makedirs(comicDir) - fn = os.path.join(comicDir, filename) - # compare with >= since comicSize could be the compressed size - if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize: - self.touch(fn) + # compare with >= since content length could be the compressed size + if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength: out.info('Skipping existing file "%s".' % fn) return fn, False - + content = self.urlobj.content + if not content: + out.warn("Empty content from %s, try again..." % self.url) + self.connect() + content = self.urlobj.content try: out.debug('Writing comic to file %s...' % fn) with open(fn, 'wb') as comicOut: - for chunk in self.urlobj.iter_content(chunk_size=self.ChunkBytes): - comicOut.write(chunk) + comicOut.write(content) comicOut.flush() os.fsync(comicOut.fileno()) - self.touch(fn) size = os.path.getsize(fn) if size == 0: raise OSError("empty file %s" % fn) @@ -121,5 +102,4 @@ class ComicImage(object): else: out.info("Saved %s (%s)." % (fn, strsize(size))) getHandler().comicDownloaded(self.name, fn) - return fn, True