Retry empty downloads and don't set a manual modification time.

This commit is contained in:
Bastian Kleineidam 2013-03-07 18:20:38 +01:00
parent 6f2aebe8c0
commit 9f13af7750
2 changed files with 11 additions and 30 deletions

View file

@ -9,6 +9,7 @@ Fixes:
- cmdline: Catch error when piping output to another - cmdline: Catch error when piping output to another
program or file under Windows. program or file under Windows.
Closes: GH bug #13 Closes: GH bug #13
- comics: Retry download on empty content to reduce empty file errors.
Dosage 1.12 (released 4.3.2013) Dosage 1.12 (released 4.3.2013)

View file

@ -1,19 +1,12 @@
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
import os import os
import rfc822
import time
from .output import out from .output import out
from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
from .events import getHandler from .events import getHandler
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
class ComicStrip(object): class ComicStrip(object):
"""A list of comic image URLs.""" """A list of comic image URLs."""
@ -59,8 +52,7 @@ class ComicImage(object):
try: try:
self.urlobj = getImageObject(self.url, self.referrer, self.session) self.urlobj = getImageObject(self.url, self.referrer, self.session)
except IOError as msg: except IOError as msg:
raise FetchComicError('Unable to retrieve URL.', self.url, msg) raise IOError('error retrieving URL %s: %s' % (self.url, msg))
content_type = unquote(self.urlobj.headers.get('content-type')) content_type = unquote(self.urlobj.headers.get('content-type'))
content_type = content_type.split(';', 1)[0] content_type = content_type.split(';', 1)[0]
if '/' in content_type: if '/' in content_type:
@ -69,48 +61,37 @@ class ComicImage(object):
maintype = content_type maintype = content_type
subtype = None subtype = None
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'): if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
raise FetchComicError('Content type %r is not an image.' % content_type, self.url) raise IOError('content type %r is not an image at %s' % (content_type, self.url))
# Always use mime type for file extension if it is sane. # Always use mime type for file extension if it is sane.
if maintype == 'image': if maintype == 'image':
self.ext = '.' + subtype.replace('jpeg', 'jpg') self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0)) self.contentLength = int(self.urlobj.headers.get('content-length', 0))
self.lastModified = self.urlobj.headers.get('last-modified')
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength)) out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
def touch(self, filename):
"""Set last modified date on filename."""
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
def save(self, basepath): def save(self, basepath):
"""Save comic URL to filename on disk.""" """Save comic URL to filename on disk."""
out.info("Get image URL %s" % self.url, level=1) out.info("Get image URL %s" % self.url, level=1)
self.connect() self.connect()
filename = "%s%s" % (self.filename, self.ext) filename = "%s%s" % (self.filename, self.ext)
comicSize = self.contentLength
comicDir = os.path.join(basepath, self.dirname) comicDir = os.path.join(basepath, self.dirname)
if not os.path.isdir(comicDir): if not os.path.isdir(comicDir):
os.makedirs(comicDir) os.makedirs(comicDir)
fn = os.path.join(comicDir, filename) fn = os.path.join(comicDir, filename)
# compare with >= since comicSize could be the compressed size # compare with >= since content length could be the compressed size
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize: if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
self.touch(fn)
out.info('Skipping existing file "%s".' % fn) out.info('Skipping existing file "%s".' % fn)
return fn, False return fn, False
content = self.urlobj.content
if not content:
out.warn("Empty content from %s, try again..." % self.url)
self.connect()
content = self.urlobj.content
try: try:
out.debug('Writing comic to file %s...' % fn) out.debug('Writing comic to file %s...' % fn)
with open(fn, 'wb') as comicOut: with open(fn, 'wb') as comicOut:
for chunk in self.urlobj.iter_content(chunk_size=self.ChunkBytes): comicOut.write(content)
comicOut.write(chunk)
comicOut.flush() comicOut.flush()
os.fsync(comicOut.fileno()) os.fsync(comicOut.fileno())
self.touch(fn)
size = os.path.getsize(fn) size = os.path.getsize(fn)
if size == 0: if size == 0:
raise OSError("empty file %s" % fn) raise OSError("empty file %s" % fn)
@ -121,5 +102,4 @@ class ComicImage(object):
else: else:
out.info("Saved %s (%s)." % (fn, strsize(size))) out.info("Saved %s (%s)." % (fn, strsize(size)))
getHandler().comicDownloaded(self.name, fn) getHandler().comicDownloaded(self.name, fn)
return fn, True return fn, True