dosage/dosagelib/comic.py

128 lines
4.6 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2012-06-20 19:58:13 +00:00
import urllib2
import os
import locale
import rfc822
import time
import shutil
from .output import out
from .util import urlopen, saneDataSize, normaliseURL
from .progress import progressBar, OperationComplete
from .events import handler
2012-09-26 14:47:39 +00:00
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
class ComicStrip(object):
"""A list of comic image URLs."""
2012-09-26 14:47:39 +00:00
2012-10-11 10:03:12 +00:00
def __init__(self, name, parentUrl, imageUrls, namer):
"""Store the image URL list."""
self.name = name
self.parentUrl = parentUrl
self.imageUrls = imageUrls
self.namer = namer
def getImages(self):
"""Get a list of image downloaders."""
for imageUrl in self.imageUrls:
yield self.getDownloader(normaliseURL(imageUrl))
def getDownloader(self, url):
filename = self.namer(url, self.parentUrl)
return ComicImage(self.name, self.parentUrl, url, filename)
class ComicImage(object):
def __init__(self, name, referrer, url, filename):
2012-09-26 14:47:39 +00:00
"""Set URL and filename."""
2012-10-11 10:03:12 +00:00
self.name = name
2012-09-26 14:47:39 +00:00
self.referrer = referrer
2012-10-11 10:03:12 +00:00
self.url = url
2012-09-26 14:47:39 +00:00
if filename is None:
2012-10-11 10:03:12 +00:00
filename = url.rsplit('/')[1]
2012-09-26 14:47:39 +00:00
self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self):
"""Connect to host and get meta information."""
2012-09-27 19:54:56 +00:00
out.write('Getting headers for %s...' % (self.url,), 2)
2012-06-20 19:58:13 +00:00
try:
2012-09-27 19:54:56 +00:00
self.urlobj = urlopen(self.url, referrer=self.referrer)
2012-06-20 19:58:13 +00:00
except urllib2.HTTPError, he:
2012-09-27 19:54:56 +00:00
raise FetchComicError, ('Unable to retrieve URL.', self.url, he.code)
2012-06-20 19:58:13 +00:00
if self.urlobj.info().getmaintype() != 'image' and \
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
2012-09-27 19:54:56 +00:00
raise FetchComicError, ('No suitable image found to retrieve.', self.url)
2012-06-20 19:58:13 +00:00
# Always use mime type for file extension if it is sane.
if self.urlobj.info().getmaintype() == 'image':
self.ext = '.' + self.urlobj.info().getsubtype()
self.contentLength = int(self.urlobj.info().get('content-length', 0))
self.lastModified = self.urlobj.info().get('last-modified')
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
def touch(self, filename):
2012-09-26 14:47:39 +00:00
"""Set last modified date on filename."""
2012-06-20 19:58:13 +00:00
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
def save(self, basepath, showProgress=False):
2012-09-26 14:47:39 +00:00
"""Save comic URL to filename on disk."""
self.connect()
2012-10-11 10:03:12 +00:00
filename = "%s%s" % (self.filename, self.ext)
2012-06-20 19:58:13 +00:00
comicSize = self.contentLength
2012-10-11 10:03:12 +00:00
comicDir = os.path.join(basepath, self.name.replace('/', os.sep))
2012-06-20 19:58:13 +00:00
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
2012-10-11 10:03:12 +00:00
fn = os.path.join(comicDir, filename)
2012-06-20 19:58:13 +00:00
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.urlobj.close()
self.touch(fn)
out.write('Skipping existing file "%s".' % (fn,), 1)
return fn, False
try:
2012-10-11 10:03:12 +00:00
out.write('Writing comic to file %s...' % (fn,), 3)
with open(fn, 'wb') as comicOut:
2012-06-20 19:58:13 +00:00
startTime = time.time()
if showProgress:
def pollData():
data = self.urlobj.read(8192)
if not data:
raise OperationComplete
comicOut.write(data)
return len(data), self.contentLength
progressBar(pollData)
else:
comicOut.write(self.urlobj.read())
endTime = time.time()
self.touch(fn)
2012-10-11 10:03:12 +00:00
except:
if os.path.isfile(fn):
os.remove(fn)
raise
else:
2012-06-20 19:58:13 +00:00
size = os.path.getsize(fn)
bytes = locale.format('%d', size, True)
if endTime != startTime:
speed = saneDataSize(size / (endTime - startTime))
else:
speed = '???'
attrs = dict(fn=fn, bytes=bytes, speed=speed)
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
2012-10-11 10:03:12 +00:00
handler.comicDownloaded(self.name, fn)
2012-06-20 19:58:13 +00:00
finally:
2012-10-11 10:03:12 +00:00
self.urlobj.close()
2012-06-20 19:58:13 +00:00
return fn, True