More unicode output fixes.

This commit is contained in:
Bastian Kleineidam 2013-04-30 06:40:20 +02:00
parent 110d070c4f
commit ebdc1e6359
4 changed files with 42 additions and 42 deletions

22
dosage
View file

@ -218,13 +218,13 @@ def vote(scraperobj):
out.debug('Vote answer %r' % answer)
if answer == 'counted':
url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
out.info('Vote submitted. Votes are updated regularly at %s.' % url)
out.info(u'Vote submitted. Votes are updated regularly at %s.' % url)
elif answer == 'no':
out.info('Vote not submitted - your vote has already been submitted before.')
out.info(u'Vote not submitted - your vote has already been submitted before.')
elif answer == 'noname':
out.warn('The comic %s cannot be voted.' % name)
out.warn(u'The comic %s cannot be voted.' % name)
else:
out.warn('Error submitting vote parameters: %r' % answer)
out.warn(u'Error submitting vote parameters: %r' % answer)
except Exception as msg:
out.exception(msg)
errors += 1
@ -246,14 +246,14 @@ def getStrips(scraperobj, options):
out.context = scraperobj.getName()
try:
if scraperobj.isComplete(options.basepath):
out.info("All comics are already downloaded.")
out.info(u"All comics are already downloaded.")
return 0
for strip in scraperobj.getStrips(numstrips):
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
errors += _errors
if skipped and options.cont:
# stop when retrieval skipped an image for one comic strip
out.info("Stop retrieval because image file already exists")
out.info(u"Stop retrieval because image file already exists")
break
if options.all and not (errors or options.dry_run or
options.cont or scraperobj.indexes):
@ -292,15 +292,15 @@ def doList(columnList=True, verbose=False):
else:
fd = sys.stdout
out.setStream(fd)
out.info('Available comic scrapers:')
out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
out.info('Non-english comics are tagged with [%s].' % TAG_LANG)
out.info(u'Available comic scrapers:')
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
if columnList:
num = doColumnList(scrapers)
else:
num = doSingleList(scrapers, verbose=verbose)
out.info('%d supported comics.' % num)
out.info(u'%d supported comics.' % num)
if page:
pydoc.pager(fd.getvalue())
return 0
@ -326,7 +326,7 @@ def doColumnList(scrapers):
maxlen = max(len(name) for name in names)
namesPerLine = max(screenWidth // (maxlen + 1), 1)
while names:
out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
del names[:namesPerLine]
return num

View file

@ -63,11 +63,11 @@ class ComicImage(object):
if maintype == 'image':
self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
out.debug(u'... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
def save(self, basepath):
"""Save comic URL to filename on disk."""
out.info("Get image URL %s" % self.url, level=1)
out.info(u"Get image URL %s" % self.url, level=1)
self.connect()
filename = "%s%s" % (self.filename, self.ext)
comicDir = os.path.join(basepath, self.dirname)
@ -76,15 +76,15 @@ class ComicImage(object):
fn = os.path.join(comicDir, filename)
# compare with >= since content length could be the compressed size
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
out.info('Skipping existing file "%s".' % fn)
out.info(u'Skipping existing file "%s".' % fn)
return fn, False
content = self.urlobj.content
if not content:
out.warn("Empty content from %s, try again..." % self.url)
out.warn(u"Empty content from %s, try again..." % self.url)
self.connect()
content = self.urlobj.content
try:
out.debug('Writing comic to file %s...' % fn)
out.debug(u'Writing comic to file %s...' % fn)
with open(fn, 'wb') as comicOut:
comicOut.write(content)
comicOut.flush()
@ -97,6 +97,6 @@ class ComicImage(object):
os.remove(fn)
raise
else:
out.info("Saved %s (%s)." % (fn, strsize(size)))
out.info(u"Saved %s (%s)." % (fn, strsize(size)))
getHandler().comicDownloaded(self, fn)
return fn, True

View file

@ -110,32 +110,32 @@ class _BasicScraper(object):
imageUrls = set(map(self.imageUrlModifier, imageUrls))
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
image = sorted(imageUrls)[0]
out.warn("choosing image %s" % image)
out.warn(u"choosing image %s" % image)
imageUrls = (image,)
elif not imageUrls:
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
out.warn("found no images at %s with patterns %s" % (url, patterns))
out.warn(u"found no images at %s with patterns %s" % (url, patterns))
return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session)
def getStrips(self, maxstrips=None):
"""Get comic strips."""
if maxstrips:
word = "strip" if maxstrips == 1 else "strips"
msg = 'Retrieving %d %s' % (maxstrips, word)
word = u"strip" if maxstrips == 1 else "strips"
msg = u'Retrieving %d %s' % (maxstrips, word)
else:
msg = 'Retrieving all strips'
msg = u'Retrieving all strips'
if self.indexes:
if len(self.indexes) == 1:
msg += " for index %s" % self.indexes[0]
msg += u" for index %s" % self.indexes[0]
else:
msg += " for indexes %s" % self.indexes
msg += u" for indexes %s" % self.indexes
urls = [self.getIndexStripUrl(index) for index in self.indexes]
else:
urls = [self.getLatestUrl()]
if self.adult:
msg += " (including adult content)"
msg += u" (including adult content)"
out.info(msg)
for url in urls:
for strip in self.getStripsFor(url, maxstrips):
@ -147,10 +147,10 @@ class _BasicScraper(object):
self.hitFirstStripUrl = False
seen_urls = set()
while url:
out.info('Get strip URL %s' % url, level=1)
out.info(u'Get strip URL %s' % url, level=1)
data, baseUrl = getPageContent(url, self.session)
if self.shouldSkipUrl(url):
out.info('Skipping URL %s' % url)
out.info(u'Skipping URL %s' % url)
self.skippedUrls.add(url)
else:
try:
@ -159,7 +159,7 @@ class _BasicScraper(object):
# image not found
out.exception(msg)
if self.firstStripUrl == url:
out.debug("Stop at first URL %s" % url)
out.debug(u"Stop at first URL %s" % url)
self.hitFirstStripUrl = True
break
if maxstrips is not None:
@ -170,7 +170,7 @@ class _BasicScraper(object):
seen_urls.add(url)
if prevUrl in seen_urls:
# avoid recursive URL loops
out.warn("Already seen previous URL %r" % prevUrl)
out.warn(u"Already seen previous URL %r" % prevUrl)
break
url = prevUrl
if url and self.waitSeconds:
@ -184,10 +184,10 @@ class _BasicScraper(object):
prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch)
except ValueError as msg:
# assume there is no previous URL, but print a warning
out.warn("%s Assuming no previous comic strips exist." % msg)
out.warn(u"%s Assuming no previous comic strips exist." % msg)
else:
prevUrl = self.prevUrlModifier(prevUrl)
out.debug("Matched previous URL %s" % prevUrl)
out.debug(u"Matched previous URL %s" % prevUrl)
getHandler().comicPageLink(self.getName(), url, prevUrl)
return prevUrl
@ -294,12 +294,12 @@ def get_scraperclasses():
"""
global _scraperclasses
if _scraperclasses is None:
out.debug("Loading comic modules...")
out.debug(u"Loading comic modules...")
modules = loader.get_modules()
plugins = loader.get_plugins(modules, _BasicScraper)
_scraperclasses = list(plugins)
check_scrapers()
out.debug("... %d modules loaded." % len(_scraperclasses))
out.debug(u"... %d modules loaded." % len(_scraperclasses))
return _scraperclasses

View file

@ -201,7 +201,7 @@ def getPageContent(url, session, max_content_bytes=MaxContentBytes):
tries -= 1
if not isValidPageContent(data):
raise ValueError("Got invalid page content from %s: %r" % (url, data))
out.debug("Got page content %r" % data, level=3)
out.debug(u"Got page content %r" % data, level=3)
# determine base URL
baseUrl = None
match = baseSearch.search(data)
@ -234,7 +234,7 @@ def fetchUrls(url, data, baseUrl, urlSearch):
searchUrl = match.group(1)
if not searchUrl:
raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern))
out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern))
searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
if searchUrls:
# do not search other links if one pattern matched
@ -318,12 +318,12 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
timeout=ConnectionTimeoutSecs, raise_for_status=True,
stream=False, data=None):
"""Open an URL and return the response object."""
out.debug('Open URL %s' % url)
out.debug(u'Open URL %s' % url)
headers = {'User-Agent': UserAgent}
if referrer:
headers['Referer'] = referrer
out.debug('Sending headers %s' % headers, level=3)
out.debug('Sending cookies %s' % session.cookies)
out.debug(u'Sending headers %s' % headers, level=3)
out.debug(u'Sending cookies %s' % session.cookies)
kwargs = {
"headers": headers,
"timeout": timeout,
@ -340,10 +340,10 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
else:
kwargs['data'] = data
func = session.post
out.debug('Sending POST data %s' % data, level=3)
out.debug(u'Sending POST data %s' % data, level=3)
try:
req = func(url, **kwargs)
out.debug('Response cookies: %s' % req.cookies)
out.debug(u'Response cookies: %s' % req.cookies)
check_content_size(url, req.headers, max_content_bytes)
if raise_for_status:
req.raise_for_status()
@ -393,7 +393,7 @@ def getRelativePath(basepath, path):
def getQueryParams(url):
"""Get URL query parameters."""
query = urlsplit(url)[3]
out.debug('Extracting query parameters from %r (%r)...' % (url, query))
out.debug(u'Extracting query parameters from %r (%r)...' % (url, query))
return cgi.parse_qs(query)