2012-06-20 20:41:04 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2013-02-13 05:28:35 +00:00
|
|
|
# Copyright (C) 2012-2013 Bastian Kleineidam
|
2012-06-20 19:58:13 +00:00
|
|
|
import os
|
|
|
|
|
|
|
|
from .output import out
|
2013-12-05 17:29:15 +00:00
|
|
|
from .util import getImageObject, normaliseURL, unquote, getDirname, getFilename, writeFile
|
2012-10-12 20:07:50 +00:00
|
|
|
from .events import getHandler
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
class ComicStrip(object):
|
|
|
|
"""A list of comic image URLs."""
|
2012-09-26 14:47:39 +00:00
|
|
|
|
2013-11-29 19:26:49 +00:00
|
|
|
def __init__(self, name, stripUrl, imageUrls, namer, session, text=None):
|
2012-10-11 10:03:12 +00:00
|
|
|
"""Store the image URL list."""
|
|
|
|
self.name = name
|
2012-11-13 18:12:28 +00:00
|
|
|
self.stripUrl = stripUrl
|
2012-10-11 10:03:12 +00:00
|
|
|
self.imageUrls = imageUrls
|
|
|
|
self.namer = namer
|
2013-02-12 16:55:13 +00:00
|
|
|
self.session = session
|
2013-11-29 19:26:49 +00:00
|
|
|
self.text = text
|
2012-10-11 10:03:12 +00:00
|
|
|
|
|
|
|
def getImages(self):
|
|
|
|
"""Get a list of image downloaders."""
|
|
|
|
for imageUrl in self.imageUrls:
|
|
|
|
yield self.getDownloader(normaliseURL(imageUrl))
|
|
|
|
|
|
|
|
def getDownloader(self, url):
|
2012-10-11 16:02:29 +00:00
|
|
|
"""Get an image downloader."""
|
2012-11-13 18:12:28 +00:00
|
|
|
filename = self.namer(url, self.stripUrl)
|
2012-10-11 16:02:29 +00:00
|
|
|
if filename is None:
|
|
|
|
filename = url.rsplit('/', 1)[1]
|
2012-12-07 23:45:18 +00:00
|
|
|
dirname = getDirname(self.name)
|
2013-11-29 19:26:49 +00:00
|
|
|
return ComicImage(self.name, url, self.stripUrl, dirname, filename, self.session, text=self.text)
|
2012-10-11 10:03:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ComicImage(object):
|
2012-10-11 16:02:29 +00:00
|
|
|
"""A comic image downloader."""
|
|
|
|
|
2013-02-12 16:55:13 +00:00
|
|
|
ChunkBytes = 1024 * 100 # 100KB
|
|
|
|
|
2013-11-29 19:26:49 +00:00
|
|
|
def __init__(self, name, url, referrer, dirname, filename, session, text=None):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Set URL and filename."""
|
2012-10-11 10:03:12 +00:00
|
|
|
self.name = name
|
2012-09-26 14:47:39 +00:00
|
|
|
self.referrer = referrer
|
2012-10-11 10:03:12 +00:00
|
|
|
self.url = url
|
2012-12-07 23:45:18 +00:00
|
|
|
self.dirname = dirname
|
|
|
|
filename = getFilename(filename)
|
2012-09-26 14:47:39 +00:00
|
|
|
self.filename, self.ext = os.path.splitext(filename)
|
2013-02-12 16:55:13 +00:00
|
|
|
self.session = session
|
2013-11-29 19:26:49 +00:00
|
|
|
self.text = text
|
2012-09-26 14:47:39 +00:00
|
|
|
|
|
|
|
def connect(self):
|
|
|
|
"""Connect to host and get meta information."""
|
2013-03-07 23:06:50 +00:00
|
|
|
self.urlobj = getImageObject(self.url, self.referrer, self.session)
|
2013-03-07 22:08:37 +00:00
|
|
|
content_type = unquote(self.urlobj.headers.get('content-type', 'application/octet-stream'))
|
2012-11-26 17:44:31 +00:00
|
|
|
content_type = content_type.split(';', 1)[0]
|
|
|
|
if '/' in content_type:
|
|
|
|
maintype, subtype = content_type.split('/', 1)
|
|
|
|
else:
|
|
|
|
maintype = content_type
|
|
|
|
subtype = None
|
|
|
|
if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
2013-03-07 17:20:38 +00:00
|
|
|
raise IOError('content type %r is not an image at %s' % (content_type, self.url))
|
2012-06-20 19:58:13 +00:00
|
|
|
# Always use mime type for file extension if it is sane.
|
2012-11-26 17:44:31 +00:00
|
|
|
if maintype == 'image':
|
|
|
|
self.ext = '.' + subtype.replace('jpeg', 'jpg')
|
|
|
|
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
|
2013-04-30 04:40:20 +00:00
|
|
|
out.debug(u'... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2012-10-11 16:08:18 +00:00
|
|
|
def save(self, basepath):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Save comic URL to filename on disk."""
|
2013-04-30 04:40:20 +00:00
|
|
|
out.info(u"Get image URL %s" % self.url, level=1)
|
2012-09-26 14:47:39 +00:00
|
|
|
self.connect()
|
2012-10-11 10:03:12 +00:00
|
|
|
filename = "%s%s" % (self.filename, self.ext)
|
2012-12-07 23:45:18 +00:00
|
|
|
comicDir = os.path.join(basepath, self.dirname)
|
2012-06-20 19:58:13 +00:00
|
|
|
if not os.path.isdir(comicDir):
|
|
|
|
os.makedirs(comicDir)
|
2012-10-11 10:03:12 +00:00
|
|
|
fn = os.path.join(comicDir, filename)
|
2013-03-07 17:20:38 +00:00
|
|
|
# compare with >= since content length could be the compressed size
|
|
|
|
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
|
2013-04-30 04:40:20 +00:00
|
|
|
out.info(u'Skipping existing file "%s".' % fn)
|
2012-06-20 19:58:13 +00:00
|
|
|
return fn, False
|
2013-03-07 17:20:38 +00:00
|
|
|
content = self.urlobj.content
|
|
|
|
if not content:
|
2013-04-30 04:40:20 +00:00
|
|
|
out.warn(u"Empty content from %s, try again..." % self.url)
|
2013-03-07 17:20:38 +00:00
|
|
|
self.connect()
|
|
|
|
content = self.urlobj.content
|
2013-12-05 17:29:15 +00:00
|
|
|
out.debug(u'Writing comic to file %s...' % fn)
|
|
|
|
writeFile(fn, content)
|
2013-11-29 19:26:49 +00:00
|
|
|
if self.text:
|
|
|
|
fntext = os.path.join(comicDir, "%s.txt" % self.filename)
|
|
|
|
out.debug(u'Writing comic text to file %s...' % fntext)
|
2013-12-05 17:29:15 +00:00
|
|
|
writeFile(fntext, self.text, encoding='utf-8')
|
|
|
|
getHandler().comicDownloaded(self, fn, text=self.text)
|
2012-06-20 19:58:13 +00:00
|
|
|
return fn, True
|