dosage/dosagelib/comic.py

130 lines
4.7 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2012-06-20 19:58:13 +00:00
import urllib2
import os
import locale
import rfc822
import time
from .output import out
from .util import urlopen, saneDataSize, normaliseURL
from .progress import progressBar, OperationComplete
from .events import handler
2012-09-26 14:47:39 +00:00
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
class ComicStrip(object):
"""A list of comic image URLs."""
2012-09-26 14:47:39 +00:00
2012-10-11 10:03:12 +00:00
def __init__(self, name, parentUrl, imageUrls, namer):
"""Store the image URL list."""
self.name = name
self.parentUrl = parentUrl
self.imageUrls = imageUrls
self.namer = namer
def getImages(self):
"""Get a list of image downloaders."""
for imageUrl in self.imageUrls:
yield self.getDownloader(normaliseURL(imageUrl))
def getDownloader(self, url):
2012-10-11 16:02:29 +00:00
"""Get an image downloader."""
2012-10-11 10:03:12 +00:00
filename = self.namer(url, self.parentUrl)
2012-10-11 16:02:29 +00:00
if filename is None:
filename = url.rsplit('/', 1)[1]
2012-10-11 13:58:54 +00:00
return ComicImage(self.name, url, self.parentUrl, filename)
2012-10-11 10:03:12 +00:00
class ComicImage(object):
2012-10-11 16:02:29 +00:00
"""A comic image downloader."""
2012-10-11 13:58:54 +00:00
def __init__(self, name, url, referrer, filename):
2012-09-26 14:47:39 +00:00
"""Set URL and filename."""
2012-10-11 10:03:12 +00:00
self.name = name
2012-09-26 14:47:39 +00:00
self.referrer = referrer
2012-10-11 10:03:12 +00:00
self.url = url
2012-09-26 14:47:39 +00:00
self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self):
"""Connect to host and get meta information."""
2012-09-27 19:54:56 +00:00
out.write('Getting headers for %s...' % (self.url,), 2)
2012-06-20 19:58:13 +00:00
try:
2012-09-27 19:54:56 +00:00
self.urlobj = urlopen(self.url, referrer=self.referrer)
2012-06-20 19:58:13 +00:00
except urllib2.HTTPError, he:
2012-09-27 19:54:56 +00:00
raise FetchComicError, ('Unable to retrieve URL.', self.url, he.code)
2012-06-20 19:58:13 +00:00
if self.urlobj.info().getmaintype() != 'image' and \
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
2012-09-27 19:54:56 +00:00
raise FetchComicError, ('No suitable image found to retrieve.', self.url)
2012-06-20 19:58:13 +00:00
# Always use mime type for file extension if it is sane.
if self.urlobj.info().getmaintype() == 'image':
2012-10-11 13:58:54 +00:00
self.ext = '.' + self.urlobj.info().getsubtype().replace('jpeg', 'jpg')
2012-06-20 19:58:13 +00:00
self.contentLength = int(self.urlobj.info().get('content-length', 0))
self.lastModified = self.urlobj.info().get('last-modified')
2012-10-11 13:58:54 +00:00
out.write('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
2012-06-20 19:58:13 +00:00
def touch(self, filename):
2012-09-26 14:47:39 +00:00
"""Set last modified date on filename."""
2012-06-20 19:58:13 +00:00
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
def save(self, basepath, showProgress=False):
2012-09-26 14:47:39 +00:00
"""Save comic URL to filename on disk."""
self.connect()
2012-10-11 10:03:12 +00:00
filename = "%s%s" % (self.filename, self.ext)
2012-06-20 19:58:13 +00:00
comicSize = self.contentLength
2012-10-11 10:03:12 +00:00
comicDir = os.path.join(basepath, self.name.replace('/', os.sep))
2012-06-20 19:58:13 +00:00
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
2012-10-11 10:03:12 +00:00
fn = os.path.join(comicDir, filename)
2012-06-20 19:58:13 +00:00
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.urlobj.close()
self.touch(fn)
out.write('Skipping existing file "%s".' % (fn,), 1)
return fn, False
try:
2012-10-11 10:03:12 +00:00
out.write('Writing comic to file %s...' % (fn,), 3)
with open(fn, 'wb') as comicOut:
2012-06-20 19:58:13 +00:00
startTime = time.time()
if showProgress:
def pollData():
data = self.urlobj.read(8192)
if not data:
raise OperationComplete
comicOut.write(data)
return len(data), self.contentLength
progressBar(pollData)
else:
comicOut.write(self.urlobj.read())
endTime = time.time()
self.touch(fn)
2012-10-11 10:03:12 +00:00
except:
if os.path.isfile(fn):
os.remove(fn)
raise
else:
2012-06-20 19:58:13 +00:00
size = os.path.getsize(fn)
bytes = locale.format('%d', size, True)
if endTime != startTime:
speed = saneDataSize(size / (endTime - startTime))
else:
speed = '???'
attrs = dict(fn=fn, bytes=bytes, speed=speed)
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
2012-10-11 10:03:12 +00:00
handler.comicDownloaded(self.name, fn)
2012-06-20 19:58:13 +00:00
finally:
2012-10-11 10:03:12 +00:00
self.urlobj.close()
2012-06-20 19:58:13 +00:00
return fn, True