Retry empty downloads and don't set a manual modification time.
This commit is contained in:
parent
6f2aebe8c0
commit
9f13af7750
2 changed files with 11 additions and 30 deletions
|
@ -9,6 +9,7 @@ Fixes:
|
||||||
- cmdline: Catch error when piping output to another
|
- cmdline: Catch error when piping output to another
|
||||||
program or file under Windows.
|
program or file under Windows.
|
||||||
Closes: GH bug #13
|
Closes: GH bug #13
|
||||||
|
- comics: Retry download on empty content to reduce empty file errors.
|
||||||
|
|
||||||
|
|
||||||
Dosage 1.12 (released 4.3.2013)
|
Dosage 1.12 (released 4.3.2013)
|
||||||
|
|
|
@ -1,19 +1,12 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import rfc822
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .output import out
|
from .output import out
|
||||||
from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
|
from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
|
||||||
from .events import getHandler
|
from .events import getHandler
|
||||||
|
|
||||||
class FetchComicError(IOError):
|
|
||||||
"""Exception for comic fetching errors."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ComicStrip(object):
|
class ComicStrip(object):
|
||||||
"""A list of comic image URLs."""
|
"""A list of comic image URLs."""
|
||||||
|
|
||||||
|
@ -59,8 +52,7 @@ class ComicImage(object):
|
||||||
try:
|
try:
|
||||||
self.urlobj = getImageObject(self.url, self.referrer, self.session)
|
self.urlobj = getImageObject(self.url, self.referrer, self.session)
|
||||||
except IOError as msg:
|
except IOError as msg:
|
||||||
raise FetchComicError('Unable to retrieve URL.', self.url, msg)
|
raise IOError('error retrieving URL %s: %s' % (self.url, msg))
|
||||||
|
|
||||||
content_type = unquote(self.urlobj.headers.get('content-type'))
|
content_type = unquote(self.urlobj.headers.get('content-type'))
|
||||||
content_type = content_type.split(';', 1)[0]
|
content_type = content_type.split(';', 1)[0]
|
||||||
if '/' in content_type:
|
if '/' in content_type:
|
||||||
|
@ -69,48 +61,37 @@ class ComicImage(object):
|
||||||
maintype = content_type
|
maintype = content_type
|
||||||
subtype = None
|
subtype = None
|
||||||
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
||||||
raise FetchComicError('Content type %r is not an image.' % content_type, self.url)
|
raise IOError('content type %r is not an image at %s' % (content_type, self.url))
|
||||||
|
|
||||||
# Always use mime type for file extension if it is sane.
|
# Always use mime type for file extension if it is sane.
|
||||||
if maintype == 'image':
|
if maintype == 'image':
|
||||||
self.ext = '.' + subtype.replace('jpeg', 'jpg')
|
self.ext = '.' + subtype.replace('jpeg', 'jpg')
|
||||||
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
|
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
|
||||||
self.lastModified = self.urlobj.headers.get('last-modified')
|
|
||||||
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
|
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
|
||||||
|
|
||||||
def touch(self, filename):
|
|
||||||
"""Set last modified date on filename."""
|
|
||||||
if self.lastModified:
|
|
||||||
tt = rfc822.parsedate(self.lastModified)
|
|
||||||
if tt:
|
|
||||||
mtime = time.mktime(tt)
|
|
||||||
os.utime(filename, (mtime, mtime))
|
|
||||||
|
|
||||||
def save(self, basepath):
|
def save(self, basepath):
|
||||||
"""Save comic URL to filename on disk."""
|
"""Save comic URL to filename on disk."""
|
||||||
out.info("Get image URL %s" % self.url, level=1)
|
out.info("Get image URL %s" % self.url, level=1)
|
||||||
self.connect()
|
self.connect()
|
||||||
filename = "%s%s" % (self.filename, self.ext)
|
filename = "%s%s" % (self.filename, self.ext)
|
||||||
comicSize = self.contentLength
|
|
||||||
comicDir = os.path.join(basepath, self.dirname)
|
comicDir = os.path.join(basepath, self.dirname)
|
||||||
if not os.path.isdir(comicDir):
|
if not os.path.isdir(comicDir):
|
||||||
os.makedirs(comicDir)
|
os.makedirs(comicDir)
|
||||||
|
|
||||||
fn = os.path.join(comicDir, filename)
|
fn = os.path.join(comicDir, filename)
|
||||||
# compare with >= since comicSize could be the compressed size
|
# compare with >= since content length could be the compressed size
|
||||||
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
|
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
|
||||||
self.touch(fn)
|
|
||||||
out.info('Skipping existing file "%s".' % fn)
|
out.info('Skipping existing file "%s".' % fn)
|
||||||
return fn, False
|
return fn, False
|
||||||
|
content = self.urlobj.content
|
||||||
|
if not content:
|
||||||
|
out.warn("Empty content from %s, try again..." % self.url)
|
||||||
|
self.connect()
|
||||||
|
content = self.urlobj.content
|
||||||
try:
|
try:
|
||||||
out.debug('Writing comic to file %s...' % fn)
|
out.debug('Writing comic to file %s...' % fn)
|
||||||
with open(fn, 'wb') as comicOut:
|
with open(fn, 'wb') as comicOut:
|
||||||
for chunk in self.urlobj.iter_content(chunk_size=self.ChunkBytes):
|
comicOut.write(content)
|
||||||
comicOut.write(chunk)
|
|
||||||
comicOut.flush()
|
comicOut.flush()
|
||||||
os.fsync(comicOut.fileno())
|
os.fsync(comicOut.fileno())
|
||||||
self.touch(fn)
|
|
||||||
size = os.path.getsize(fn)
|
size = os.path.getsize(fn)
|
||||||
if size == 0:
|
if size == 0:
|
||||||
raise OSError("empty file %s" % fn)
|
raise OSError("empty file %s" % fn)
|
||||||
|
@ -121,5 +102,4 @@ class ComicImage(object):
|
||||||
else:
|
else:
|
||||||
out.info("Saved %s (%s)." % (fn, strsize(size)))
|
out.info("Saved %s (%s)." % (fn, strsize(size)))
|
||||||
getHandler().comicDownloaded(self.name, fn)
|
getHandler().comicDownloaded(self.name, fn)
|
||||||
|
|
||||||
return fn, True
|
return fn, True
|
||||||
|
|
Loading…
Reference in a new issue