dosage/dosagelib/comic.py
2012-11-26 18:44:31 +01:00

123 lines
4.3 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
import os
import locale
import rfc822
import time
from .output import out
from .util import urlopen, saneDataSize, normaliseURL
from .events import getHandler
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
class ComicStrip(object):
"""A list of comic image URLs."""
def __init__(self, name, stripUrl, imageUrls, namer):
"""Store the image URL list."""
self.name = name
self.stripUrl = stripUrl
self.imageUrls = imageUrls
self.namer = namer
def getImages(self):
"""Get a list of image downloaders."""
for imageUrl in self.imageUrls:
yield self.getDownloader(normaliseURL(imageUrl))
def getDownloader(self, url):
"""Get an image downloader."""
filename = self.namer(url, self.stripUrl)
if filename is None:
filename = url.rsplit('/', 1)[1]
return ComicImage(self.name, url, self.stripUrl, filename)
class ComicImage(object):
"""A comic image downloader."""
def __init__(self, name, url, referrer, filename):
"""Set URL and filename."""
self.name = name
self.referrer = referrer
self.url = url
self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self):
"""Connect to host and get meta information."""
try:
self.urlobj = urlopen(self.url, referrer=self.referrer)
except IOError as he:
raise FetchComicError('Unable to retrieve URL.', self.url, he.code)
content_type = self.urlobj.headers.get('content-type')
content_type = content_type.split(';', 1)[0]
if '/' in content_type:
maintype, subtype = content_type.split('/', 1)
else:
maintype = content_type
subtype = None
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
raise FetchComicError('No suitable image found to retrieve.', self.url)
# Always use mime type for file extension if it is sane.
if maintype == 'image':
self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
self.lastModified = self.urlobj.headers.get('last-modified')
out.write('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
def touch(self, filename):
"""Set last modified date on filename."""
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
def save(self, basepath):
"""Save comic URL to filename on disk."""
self.connect()
filename = "%s%s" % (self.filename, self.ext)
comicSize = self.contentLength
comicDir = os.path.join(basepath, self.name.replace('/', os.sep))
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
fn = os.path.join(comicDir, filename)
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.touch(fn)
out.write('Skipping existing file "%s".' % (fn,), 1)
return fn, False
try:
out.write('Writing comic to file %s...' % (fn,), 3)
with open(fn, 'wb') as comicOut:
startTime = time.time()
comicOut.write(self.urlobj.content)
endTime = time.time()
self.touch(fn)
except:
if os.path.isfile(fn):
os.remove(fn)
raise
else:
size = os.path.getsize(fn)
bytes = locale.format('%d', size, True)
if endTime != startTime:
speed = saneDataSize(size / (endTime - startTime))
else:
speed = '???'
attrs = dict(fn=fn, bytes=bytes, speed=speed)
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
getHandler().comicDownloaded(self.name, fn)
return fn, True