2012-06-20 20:41:04 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2012-06-20 19:58:13 +00:00
|
|
|
import urllib2
|
|
|
|
import os
|
|
|
|
import locale
|
|
|
|
import rfc822
|
|
|
|
import time
|
|
|
|
|
|
|
|
from .output import out
|
|
|
|
from .util import urlopen, saneDataSize, normaliseURL
|
|
|
|
from .progress import progressBar, OperationComplete
|
|
|
|
from .events import handler
|
|
|
|
|
2012-09-26 14:47:39 +00:00
|
|
|
class FetchComicError(IOError):
|
|
|
|
"""Exception for comic fetching errors."""
|
|
|
|
pass
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
class ComicStrip(object):
|
|
|
|
"""A list of comic image URLs."""
|
2012-09-26 14:47:39 +00:00
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
def __init__(self, name, parentUrl, imageUrls, namer):
|
|
|
|
"""Store the image URL list."""
|
|
|
|
self.name = name
|
|
|
|
self.parentUrl = parentUrl
|
|
|
|
self.imageUrls = imageUrls
|
|
|
|
self.namer = namer
|
|
|
|
|
|
|
|
def getImages(self):
|
|
|
|
"""Get a list of image downloaders."""
|
|
|
|
for imageUrl in self.imageUrls:
|
|
|
|
yield self.getDownloader(normaliseURL(imageUrl))
|
|
|
|
|
|
|
|
def getDownloader(self, url):
|
|
|
|
filename = self.namer(url, self.parentUrl)
|
|
|
|
return ComicImage(self.name, self.parentUrl, url, filename)
|
|
|
|
|
|
|
|
|
|
|
|
class ComicImage(object):
|
|
|
|
def __init__(self, name, referrer, url, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Set URL and filename."""
|
2012-10-11 10:03:12 +00:00
|
|
|
self.name = name
|
2012-09-26 14:47:39 +00:00
|
|
|
self.referrer = referrer
|
2012-10-11 10:03:12 +00:00
|
|
|
self.url = url
|
2012-09-26 14:47:39 +00:00
|
|
|
if filename is None:
|
2012-10-11 10:03:12 +00:00
|
|
|
filename = url.rsplit('/')[1]
|
2012-09-26 14:47:39 +00:00
|
|
|
self.filename, self.ext = os.path.splitext(filename)
|
|
|
|
self.filename = self.filename.replace(os.sep, '_')
|
|
|
|
self.ext = self.ext.replace(os.sep, '_')
|
|
|
|
|
|
|
|
def connect(self):
|
|
|
|
"""Connect to host and get meta information."""
|
2012-09-27 19:54:56 +00:00
|
|
|
out.write('Getting headers for %s...' % (self.url,), 2)
|
2012-06-20 19:58:13 +00:00
|
|
|
try:
|
2012-09-27 19:54:56 +00:00
|
|
|
self.urlobj = urlopen(self.url, referrer=self.referrer)
|
2012-06-20 19:58:13 +00:00
|
|
|
except urllib2.HTTPError, he:
|
2012-09-27 19:54:56 +00:00
|
|
|
raise FetchComicError, ('Unable to retrieve URL.', self.url, he.code)
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
if self.urlobj.info().getmaintype() != 'image' and \
|
|
|
|
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
2012-09-27 19:54:56 +00:00
|
|
|
raise FetchComicError, ('No suitable image found to retrieve.', self.url)
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
# Always use mime type for file extension if it is sane.
|
|
|
|
if self.urlobj.info().getmaintype() == 'image':
|
|
|
|
self.ext = '.' + self.urlobj.info().getsubtype()
|
|
|
|
self.contentLength = int(self.urlobj.info().get('content-length', 0))
|
|
|
|
self.lastModified = self.urlobj.info().get('last-modified')
|
|
|
|
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
|
|
|
|
|
|
|
|
def touch(self, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Set last modified date on filename."""
|
2012-06-20 19:58:13 +00:00
|
|
|
if self.lastModified:
|
|
|
|
tt = rfc822.parsedate(self.lastModified)
|
|
|
|
if tt:
|
|
|
|
mtime = time.mktime(tt)
|
|
|
|
os.utime(filename, (mtime, mtime))
|
|
|
|
|
|
|
|
def save(self, basepath, showProgress=False):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Save comic URL to filename on disk."""
|
|
|
|
self.connect()
|
2012-10-11 10:03:12 +00:00
|
|
|
filename = "%s%s" % (self.filename, self.ext)
|
2012-06-20 19:58:13 +00:00
|
|
|
comicSize = self.contentLength
|
2012-10-11 10:03:12 +00:00
|
|
|
comicDir = os.path.join(basepath, self.name.replace('/', os.sep))
|
2012-06-20 19:58:13 +00:00
|
|
|
if not os.path.isdir(comicDir):
|
|
|
|
os.makedirs(comicDir)
|
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
fn = os.path.join(comicDir, filename)
|
2012-06-20 19:58:13 +00:00
|
|
|
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
|
|
|
|
self.urlobj.close()
|
|
|
|
self.touch(fn)
|
|
|
|
out.write('Skipping existing file "%s".' % (fn,), 1)
|
|
|
|
return fn, False
|
|
|
|
|
|
|
|
try:
|
2012-10-11 10:03:12 +00:00
|
|
|
out.write('Writing comic to file %s...' % (fn,), 3)
|
|
|
|
with open(fn, 'wb') as comicOut:
|
2012-06-20 19:58:13 +00:00
|
|
|
startTime = time.time()
|
|
|
|
if showProgress:
|
|
|
|
def pollData():
|
|
|
|
data = self.urlobj.read(8192)
|
|
|
|
if not data:
|
|
|
|
raise OperationComplete
|
|
|
|
comicOut.write(data)
|
|
|
|
return len(data), self.contentLength
|
|
|
|
progressBar(pollData)
|
|
|
|
else:
|
|
|
|
comicOut.write(self.urlobj.read())
|
|
|
|
endTime = time.time()
|
|
|
|
self.touch(fn)
|
2012-10-11 10:03:12 +00:00
|
|
|
except:
|
|
|
|
if os.path.isfile(fn):
|
|
|
|
os.remove(fn)
|
|
|
|
raise
|
|
|
|
else:
|
2012-06-20 19:58:13 +00:00
|
|
|
size = os.path.getsize(fn)
|
|
|
|
bytes = locale.format('%d', size, True)
|
|
|
|
if endTime != startTime:
|
|
|
|
speed = saneDataSize(size / (endTime - startTime))
|
|
|
|
else:
|
|
|
|
speed = '???'
|
|
|
|
attrs = dict(fn=fn, bytes=bytes, speed=speed)
|
|
|
|
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
|
2012-10-11 10:03:12 +00:00
|
|
|
handler.comicDownloaded(self.name, fn)
|
2012-06-20 19:58:13 +00:00
|
|
|
finally:
|
2012-10-11 10:03:12 +00:00
|
|
|
self.urlobj.close()
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
return fn, True
|