Retry empty downloads and don't set a manual modification time.

2013-03-07 18:20:38 +01:00 · 2013-03-07 18:20:38 +01:00 · 9f13af7750
commit 9f13af7750
parent 6f2aebe8c0
2 changed files with 11 additions and 30 deletions
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@ -9,6 +9,7 @@ Fixes:
 - cmdline: Catch error when piping output to another
  program or file under Windows.
  Closes: GH bug #13
 - comics: Retry download on empty content to reduce empty file errors.
 Dosage 1.12 (released 4.3.2013)
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@ -1,19 +1,12 @@
 # -*- coding: iso-8859-1 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2013 Bastian Kleineidam
 import os
 import rfc822
 import time
 from .output import out
 from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
 from .events import getHandler
 class FetchComicError(IOError):
    """Exception for comic fetching errors."""
    pass
 class ComicStrip(object):
    """A list of comic image URLs."""
@ -59,8 +52,7 @@ class ComicImage(object):
        try:
            self.urlobj = getImageObject(self.url, self.referrer, self.session)
        except IOError as msg:
-            raise FetchComicError('Unable to retrieve URL.', self.url, msg)
+            raise IOError('error retrieving URL %s: %s' % (self.url, msg))
        content_type = unquote(self.urlobj.headers.get('content-type'))
        content_type = content_type.split(';', 1)[0]
        if '/' in content_type:
@ -69,48 +61,37 @@ class ComicImage(object):
            maintype = content_type
            subtype = None
        if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
-            raise FetchComicError('Content type %r is not an image.' % content_type, self.url)
+            raise IOError('content type %r is not an image at %s' % (content_type, self.url))
        # Always use mime type for file extension if it is sane.
        if maintype == 'image':
            self.ext = '.' + subtype.replace('jpeg', 'jpg')
        self.contentLength = int(self.urlobj.headers.get('content-length', 0))
        self.lastModified = self.urlobj.headers.get('last-modified')
        out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
    def touch(self, filename):
        """Set last modified date on filename."""
        if self.lastModified:
            tt = rfc822.parsedate(self.lastModified)
            if tt:
                mtime = time.mktime(tt)
                os.utime(filename, (mtime, mtime))
    def save(self, basepath):
        """Save comic URL to filename on disk."""
        out.info("Get image URL %s" % self.url, level=1)
        self.connect()
        filename = "%s%s" % (self.filename, self.ext)
        comicSize = self.contentLength
        comicDir = os.path.join(basepath, self.dirname)
        if not os.path.isdir(comicDir):
            os.makedirs(comicDir)
        fn = os.path.join(comicDir, filename)
-        # compare with >= since comicSize could be the compressed size
+        # compare with >= since content length could be the compressed size
-        if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
+        if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
            self.touch(fn)
            out.info('Skipping existing file "%s".' % fn)
            return fn, False
-
+        content = self.urlobj.content
        if not content:
            out.warn("Empty content from %s, try again..." % self.url)
            self.connect()
            content = self.urlobj.content
        try:
            out.debug('Writing comic to file %s...' % fn)
            with open(fn, 'wb') as comicOut:
-                for chunk in self.urlobj.iter_content(chunk_size=self.ChunkBytes):
+                comicOut.write(content)
                    comicOut.write(chunk)
                comicOut.flush()
                os.fsync(comicOut.fileno())
            self.touch(fn)
            size = os.path.getsize(fn)
            if size == 0:
                raise OSError("empty file %s" % fn)
@ -121,5 +102,4 @@ class ComicImage(object):
        else:
            out.info("Saved %s (%s)." % (fn, strsize(size)))
            getHandler().comicDownloaded(self.name, fn)
        return fn, True