More unicode output fixes.

This commit is contained in:
Bastian Kleineidam 2013-04-30 06:40:20 +02:00
parent 110d070c4f
commit ebdc1e6359
4 changed files with 42 additions and 42 deletions

22
dosage
View file

@ -218,13 +218,13 @@ def vote(scraperobj):
out.debug('Vote answer %r' % answer) out.debug('Vote answer %r' % answer)
if answer == 'counted': if answer == 'counted':
url = configuration.Url + 'comics/%s.html' % name.replace('/', '_') url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
out.info('Vote submitted. Votes are updated regularly at %s.' % url) out.info(u'Vote submitted. Votes are updated regularly at %s.' % url)
elif answer == 'no': elif answer == 'no':
out.info('Vote not submitted - your vote has already been submitted before.') out.info(u'Vote not submitted - your vote has already been submitted before.')
elif answer == 'noname': elif answer == 'noname':
out.warn('The comic %s cannot be voted.' % name) out.warn(u'The comic %s cannot be voted.' % name)
else: else:
out.warn('Error submitting vote parameters: %r' % answer) out.warn(u'Error submitting vote parameters: %r' % answer)
except Exception as msg: except Exception as msg:
out.exception(msg) out.exception(msg)
errors += 1 errors += 1
@ -246,14 +246,14 @@ def getStrips(scraperobj, options):
out.context = scraperobj.getName() out.context = scraperobj.getName()
try: try:
if scraperobj.isComplete(options.basepath): if scraperobj.isComplete(options.basepath):
out.info("All comics are already downloaded.") out.info(u"All comics are already downloaded.")
return 0 return 0
for strip in scraperobj.getStrips(numstrips): for strip in scraperobj.getStrips(numstrips):
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run) _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
errors += _errors errors += _errors
if skipped and options.cont: if skipped and options.cont:
# stop when retrieval skipped an image for one comic strip # stop when retrieval skipped an image for one comic strip
out.info("Stop retrieval because image file already exists") out.info(u"Stop retrieval because image file already exists")
break break
if options.all and not (errors or options.dry_run or if options.all and not (errors or options.dry_run or
options.cont or scraperobj.indexes): options.cont or scraperobj.indexes):
@ -292,15 +292,15 @@ def doList(columnList=True, verbose=False):
else: else:
fd = sys.stdout fd = sys.stdout
out.setStream(fd) out.setStream(fd)
out.info('Available comic scrapers:') out.info(u'Available comic scrapers:')
out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
out.info('Non-english comics are tagged with [%s].' % TAG_LANG) out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName()) scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
if columnList: if columnList:
num = doColumnList(scrapers) num = doColumnList(scrapers)
else: else:
num = doSingleList(scrapers, verbose=verbose) num = doSingleList(scrapers, verbose=verbose)
out.info('%d supported comics.' % num) out.info(u'%d supported comics.' % num)
if page: if page:
pydoc.pager(fd.getvalue()) pydoc.pager(fd.getvalue())
return 0 return 0
@ -326,7 +326,7 @@ def doColumnList(scrapers):
maxlen = max(len(name) for name in names) maxlen = max(len(name) for name in names)
namesPerLine = max(screenWidth // (maxlen + 1), 1) namesPerLine = max(screenWidth // (maxlen + 1), 1)
while names: while names:
out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine])) out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
del names[:namesPerLine] del names[:namesPerLine]
return num return num

View file

@ -63,11 +63,11 @@ class ComicImage(object):
if maintype == 'image': if maintype == 'image':
self.ext = '.' + subtype.replace('jpeg', 'jpg') self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0)) self.contentLength = int(self.urlobj.headers.get('content-length', 0))
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength)) out.debug(u'... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
def save(self, basepath): def save(self, basepath):
"""Save comic URL to filename on disk.""" """Save comic URL to filename on disk."""
out.info("Get image URL %s" % self.url, level=1) out.info(u"Get image URL %s" % self.url, level=1)
self.connect() self.connect()
filename = "%s%s" % (self.filename, self.ext) filename = "%s%s" % (self.filename, self.ext)
comicDir = os.path.join(basepath, self.dirname) comicDir = os.path.join(basepath, self.dirname)
@ -76,15 +76,15 @@ class ComicImage(object):
fn = os.path.join(comicDir, filename) fn = os.path.join(comicDir, filename)
# compare with >= since content length could be the compressed size # compare with >= since content length could be the compressed size
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength: if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
out.info('Skipping existing file "%s".' % fn) out.info(u'Skipping existing file "%s".' % fn)
return fn, False return fn, False
content = self.urlobj.content content = self.urlobj.content
if not content: if not content:
out.warn("Empty content from %s, try again..." % self.url) out.warn(u"Empty content from %s, try again..." % self.url)
self.connect() self.connect()
content = self.urlobj.content content = self.urlobj.content
try: try:
out.debug('Writing comic to file %s...' % fn) out.debug(u'Writing comic to file %s...' % fn)
with open(fn, 'wb') as comicOut: with open(fn, 'wb') as comicOut:
comicOut.write(content) comicOut.write(content)
comicOut.flush() comicOut.flush()
@ -97,6 +97,6 @@ class ComicImage(object):
os.remove(fn) os.remove(fn)
raise raise
else: else:
out.info("Saved %s (%s)." % (fn, strsize(size))) out.info(u"Saved %s (%s)." % (fn, strsize(size)))
getHandler().comicDownloaded(self, fn) getHandler().comicDownloaded(self, fn)
return fn, True return fn, True

View file

@ -110,32 +110,32 @@ class _BasicScraper(object):
imageUrls = set(map(self.imageUrlModifier, imageUrls)) imageUrls = set(map(self.imageUrlModifier, imageUrls))
if len(imageUrls) > 1 and not self.multipleImagesPerStrip: if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
patterns = [x.pattern for x in makeSequence(self.imageSearch)] patterns = [x.pattern for x in makeSequence(self.imageSearch)]
out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns)) out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
image = sorted(imageUrls)[0] image = sorted(imageUrls)[0]
out.warn("choosing image %s" % image) out.warn(u"choosing image %s" % image)
imageUrls = (image,) imageUrls = (image,)
elif not imageUrls: elif not imageUrls:
patterns = [x.pattern for x in makeSequence(self.imageSearch)] patterns = [x.pattern for x in makeSequence(self.imageSearch)]
out.warn("found no images at %s with patterns %s" % (url, patterns)) out.warn(u"found no images at %s with patterns %s" % (url, patterns))
return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session) return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session)
def getStrips(self, maxstrips=None): def getStrips(self, maxstrips=None):
"""Get comic strips.""" """Get comic strips."""
if maxstrips: if maxstrips:
word = "strip" if maxstrips == 1 else "strips" word = u"strip" if maxstrips == 1 else "strips"
msg = 'Retrieving %d %s' % (maxstrips, word) msg = u'Retrieving %d %s' % (maxstrips, word)
else: else:
msg = 'Retrieving all strips' msg = u'Retrieving all strips'
if self.indexes: if self.indexes:
if len(self.indexes) == 1: if len(self.indexes) == 1:
msg += " for index %s" % self.indexes[0] msg += u" for index %s" % self.indexes[0]
else: else:
msg += " for indexes %s" % self.indexes msg += u" for indexes %s" % self.indexes
urls = [self.getIndexStripUrl(index) for index in self.indexes] urls = [self.getIndexStripUrl(index) for index in self.indexes]
else: else:
urls = [self.getLatestUrl()] urls = [self.getLatestUrl()]
if self.adult: if self.adult:
msg += " (including adult content)" msg += u" (including adult content)"
out.info(msg) out.info(msg)
for url in urls: for url in urls:
for strip in self.getStripsFor(url, maxstrips): for strip in self.getStripsFor(url, maxstrips):
@ -147,10 +147,10 @@ class _BasicScraper(object):
self.hitFirstStripUrl = False self.hitFirstStripUrl = False
seen_urls = set() seen_urls = set()
while url: while url:
out.info('Get strip URL %s' % url, level=1) out.info(u'Get strip URL %s' % url, level=1)
data, baseUrl = getPageContent(url, self.session) data, baseUrl = getPageContent(url, self.session)
if self.shouldSkipUrl(url): if self.shouldSkipUrl(url):
out.info('Skipping URL %s' % url) out.info(u'Skipping URL %s' % url)
self.skippedUrls.add(url) self.skippedUrls.add(url)
else: else:
try: try:
@ -159,7 +159,7 @@ class _BasicScraper(object):
# image not found # image not found
out.exception(msg) out.exception(msg)
if self.firstStripUrl == url: if self.firstStripUrl == url:
out.debug("Stop at first URL %s" % url) out.debug(u"Stop at first URL %s" % url)
self.hitFirstStripUrl = True self.hitFirstStripUrl = True
break break
if maxstrips is not None: if maxstrips is not None:
@ -170,7 +170,7 @@ class _BasicScraper(object):
seen_urls.add(url) seen_urls.add(url)
if prevUrl in seen_urls: if prevUrl in seen_urls:
# avoid recursive URL loops # avoid recursive URL loops
out.warn("Already seen previous URL %r" % prevUrl) out.warn(u"Already seen previous URL %r" % prevUrl)
break break
url = prevUrl url = prevUrl
if url and self.waitSeconds: if url and self.waitSeconds:
@ -184,10 +184,10 @@ class _BasicScraper(object):
prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch) prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch)
except ValueError as msg: except ValueError as msg:
# assume there is no previous URL, but print a warning # assume there is no previous URL, but print a warning
out.warn("%s Assuming no previous comic strips exist." % msg) out.warn(u"%s Assuming no previous comic strips exist." % msg)
else: else:
prevUrl = self.prevUrlModifier(prevUrl) prevUrl = self.prevUrlModifier(prevUrl)
out.debug("Matched previous URL %s" % prevUrl) out.debug(u"Matched previous URL %s" % prevUrl)
getHandler().comicPageLink(self.getName(), url, prevUrl) getHandler().comicPageLink(self.getName(), url, prevUrl)
return prevUrl return prevUrl
@ -294,12 +294,12 @@ def get_scraperclasses():
""" """
global _scraperclasses global _scraperclasses
if _scraperclasses is None: if _scraperclasses is None:
out.debug("Loading comic modules...") out.debug(u"Loading comic modules...")
modules = loader.get_modules() modules = loader.get_modules()
plugins = loader.get_plugins(modules, _BasicScraper) plugins = loader.get_plugins(modules, _BasicScraper)
_scraperclasses = list(plugins) _scraperclasses = list(plugins)
check_scrapers() check_scrapers()
out.debug("... %d modules loaded." % len(_scraperclasses)) out.debug(u"... %d modules loaded." % len(_scraperclasses))
return _scraperclasses return _scraperclasses

View file

@ -201,7 +201,7 @@ def getPageContent(url, session, max_content_bytes=MaxContentBytes):
tries -= 1 tries -= 1
if not isValidPageContent(data): if not isValidPageContent(data):
raise ValueError("Got invalid page content from %s: %r" % (url, data)) raise ValueError("Got invalid page content from %s: %r" % (url, data))
out.debug("Got page content %r" % data, level=3) out.debug(u"Got page content %r" % data, level=3)
# determine base URL # determine base URL
baseUrl = None baseUrl = None
match = baseSearch.search(data) match = baseSearch.search(data)
@ -234,7 +234,7 @@ def fetchUrls(url, data, baseUrl, urlSearch):
searchUrl = match.group(1) searchUrl = match.group(1)
if not searchUrl: if not searchUrl:
raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url)) raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern)) out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern))
searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl))) searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
if searchUrls: if searchUrls:
# do not search other links if one pattern matched # do not search other links if one pattern matched
@ -318,12 +318,12 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
timeout=ConnectionTimeoutSecs, raise_for_status=True, timeout=ConnectionTimeoutSecs, raise_for_status=True,
stream=False, data=None): stream=False, data=None):
"""Open an URL and return the response object.""" """Open an URL and return the response object."""
out.debug('Open URL %s' % url) out.debug(u'Open URL %s' % url)
headers = {'User-Agent': UserAgent} headers = {'User-Agent': UserAgent}
if referrer: if referrer:
headers['Referer'] = referrer headers['Referer'] = referrer
out.debug('Sending headers %s' % headers, level=3) out.debug(u'Sending headers %s' % headers, level=3)
out.debug('Sending cookies %s' % session.cookies) out.debug(u'Sending cookies %s' % session.cookies)
kwargs = { kwargs = {
"headers": headers, "headers": headers,
"timeout": timeout, "timeout": timeout,
@ -340,10 +340,10 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
else: else:
kwargs['data'] = data kwargs['data'] = data
func = session.post func = session.post
out.debug('Sending POST data %s' % data, level=3) out.debug(u'Sending POST data %s' % data, level=3)
try: try:
req = func(url, **kwargs) req = func(url, **kwargs)
out.debug('Response cookies: %s' % req.cookies) out.debug(u'Response cookies: %s' % req.cookies)
check_content_size(url, req.headers, max_content_bytes) check_content_size(url, req.headers, max_content_bytes)
if raise_for_status: if raise_for_status:
req.raise_for_status() req.raise_for_status()
@ -393,7 +393,7 @@ def getRelativePath(basepath, path):
def getQueryParams(url): def getQueryParams(url):
"""Get URL query parameters.""" """Get URL query parameters."""
query = urlsplit(url)[3] query = urlsplit(url)[3]
out.debug('Extracting query parameters from %r (%r)...' % (url, query)) out.debug(u'Extracting query parameters from %r (%r)...' % (url, query))
return cgi.parse_qs(query) return cgi.parse_qs(query)