dosage/dosagelib/comic.py

120 lines
4.2 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-02-13 05:28:35 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2012-06-20 19:58:13 +00:00
import os
import rfc822
import time
from .output import out
2012-12-07 23:45:18 +00:00
from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
2012-10-12 20:07:50 +00:00
from .events import getHandler
2012-06-20 19:58:13 +00:00
2012-09-26 14:47:39 +00:00
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
class ComicStrip(object):
"""A list of comic image URLs."""
2012-09-26 14:47:39 +00:00
2013-02-12 16:55:13 +00:00
def __init__(self, name, stripUrl, imageUrls, namer, session):
2012-10-11 10:03:12 +00:00
"""Store the image URL list."""
self.name = name
2012-11-13 18:12:28 +00:00
self.stripUrl = stripUrl
2012-10-11 10:03:12 +00:00
self.imageUrls = imageUrls
self.namer = namer
2013-02-12 16:55:13 +00:00
self.session = session
2012-10-11 10:03:12 +00:00
def getImages(self):
"""Get a list of image downloaders."""
for imageUrl in self.imageUrls:
yield self.getDownloader(normaliseURL(imageUrl))
def getDownloader(self, url):
2012-10-11 16:02:29 +00:00
"""Get an image downloader."""
2012-11-13 18:12:28 +00:00
filename = self.namer(url, self.stripUrl)
2012-10-11 16:02:29 +00:00
if filename is None:
filename = url.rsplit('/', 1)[1]
2012-12-07 23:45:18 +00:00
dirname = getDirname(self.name)
2013-02-12 16:55:13 +00:00
return ComicImage(self.name, url, self.stripUrl, dirname, filename, self.session)
2012-10-11 10:03:12 +00:00
class ComicImage(object):
2012-10-11 16:02:29 +00:00
"""A comic image downloader."""
2013-02-12 16:55:13 +00:00
ChunkBytes = 1024 * 100 # 100KB
def __init__(self, name, url, referrer, dirname, filename, session):
2012-09-26 14:47:39 +00:00
"""Set URL and filename."""
2012-10-11 10:03:12 +00:00
self.name = name
2012-09-26 14:47:39 +00:00
self.referrer = referrer
2012-10-11 10:03:12 +00:00
self.url = url
2012-12-07 23:45:18 +00:00
self.dirname = dirname
filename = getFilename(filename)
2012-09-26 14:47:39 +00:00
self.filename, self.ext = os.path.splitext(filename)
2013-02-12 16:55:13 +00:00
self.session = session
2012-09-26 14:47:39 +00:00
def connect(self):
"""Connect to host and get meta information."""
2012-06-20 19:58:13 +00:00
try:
2013-02-12 16:55:13 +00:00
self.urlobj = getImageObject(self.url, self.referrer, self.session)
except IOError as msg:
raise FetchComicError('Unable to retrieve URL.', self.url, msg)
2012-06-20 19:58:13 +00:00
content_type = unquote(self.urlobj.headers.get('content-type'))
content_type = content_type.split(';', 1)[0]
if '/' in content_type:
maintype, subtype = content_type.split('/', 1)
else:
maintype = content_type
subtype = None
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
raise FetchComicError('Content type %r is not an image.' % content_type, self.url)
2012-06-20 19:58:13 +00:00
# Always use mime type for file extension if it is sane.
if maintype == 'image':
self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
self.lastModified = self.urlobj.headers.get('last-modified')
2012-12-07 23:45:18 +00:00
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
2012-06-20 19:58:13 +00:00
def touch(self, filename):
2012-09-26 14:47:39 +00:00
"""Set last modified date on filename."""
2012-06-20 19:58:13 +00:00
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
2012-10-11 16:08:18 +00:00
def save(self, basepath):
2012-09-26 14:47:39 +00:00
"""Save comic URL to filename on disk."""
self.connect()
2012-10-11 10:03:12 +00:00
filename = "%s%s" % (self.filename, self.ext)
2012-06-20 19:58:13 +00:00
comicSize = self.contentLength
2012-12-07 23:45:18 +00:00
comicDir = os.path.join(basepath, self.dirname)
2012-06-20 19:58:13 +00:00
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
2012-10-11 10:03:12 +00:00
fn = os.path.join(comicDir, filename)
2012-06-20 19:58:13 +00:00
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.touch(fn)
out.info('Skipping existing file "%s".' % fn)
2012-06-20 19:58:13 +00:00
return fn, False
try:
2012-12-07 23:45:18 +00:00
out.debug('Writing comic to file %s...' % fn)
2012-10-11 10:03:12 +00:00
with open(fn, 'wb') as comicOut:
2013-02-12 16:55:13 +00:00
for chunk in self.urlobj.iter_content(chunk_size=self.ChunkBytes):
comicOut.write(chunk)
2012-06-20 19:58:13 +00:00
self.touch(fn)
2012-12-02 17:35:06 +00:00
except Exception:
2012-10-11 10:03:12 +00:00
if os.path.isfile(fn):
os.remove(fn)
raise
else:
2012-12-02 17:35:06 +00:00
size = strsize(os.path.getsize(fn))
out.info("Saved %s (%s)." % (fn, size))
2012-10-12 20:07:50 +00:00
getHandler().comicDownloaded(self.name, fn)
2012-06-20 19:58:13 +00:00
return fn, True