diff --git a/dosage b/dosage index d91eafb38..b6e616306 100755 --- a/dosage +++ b/dosage @@ -218,13 +218,13 @@ def vote(scraperobj): out.debug('Vote answer %r' % answer) if answer == 'counted': url = configuration.Url + 'comics/%s.html' % name.replace('/', '_') - out.info('Vote submitted. Votes are updated regularly at %s.' % url) + out.info(u'Vote submitted. Votes are updated regularly at %s.' % url) elif answer == 'no': - out.info('Vote not submitted - your vote has already been submitted before.') + out.info(u'Vote not submitted - your vote has already been submitted before.') elif answer == 'noname': - out.warn('The comic %s cannot be voted.' % name) + out.warn(u'The comic %s cannot be voted.' % name) else: - out.warn('Error submitting vote parameters: %r' % answer) + out.warn(u'Error submitting vote parameters: %r' % answer) except Exception as msg: out.exception(msg) errors += 1 @@ -246,14 +246,14 @@ def getStrips(scraperobj, options): out.context = scraperobj.getName() try: if scraperobj.isComplete(options.basepath): - out.info("All comics are already downloaded.") + out.info(u"All comics are already downloaded.") return 0 for strip in scraperobj.getStrips(numstrips): _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run) errors += _errors if skipped and options.cont: # stop when retrieval skipped an image for one comic strip - out.info("Stop retrieval because image file already exists") + out.info(u"Stop retrieval because image file already exists") break if options.all and not (errors or options.dry_run or options.cont or scraperobj.indexes): @@ -292,15 +292,15 @@ def doList(columnList=True, verbose=False): else: fd = sys.stdout out.setStream(fd) - out.info('Available comic scrapers:') - out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) - out.info('Non-english comics are tagged with [%s].' % TAG_LANG) + out.info(u'Available comic scrapers:') + out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) + out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG) scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName()) if columnList: num = doColumnList(scrapers) else: num = doSingleList(scrapers, verbose=verbose) - out.info('%d supported comics.' % num) + out.info(u'%d supported comics.' % num) if page: pydoc.pager(fd.getvalue()) return 0 @@ -326,7 +326,7 @@ def doColumnList(scrapers): maxlen = max(len(name) for name in names) namesPerLine = max(screenWidth // (maxlen + 1), 1) while names: - out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine])) + out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine])) del names[:namesPerLine] return num diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 3944d5335..96bcbae67 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -63,11 +63,11 @@ class ComicImage(object): if maintype == 'image': self.ext = '.' + subtype.replace('jpeg', 'jpg') self.contentLength = int(self.urlobj.headers.get('content-length', 0)) - out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength)) + out.debug(u'... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength)) def save(self, basepath): """Save comic URL to filename on disk.""" - out.info("Get image URL %s" % self.url, level=1) + out.info(u"Get image URL %s" % self.url, level=1) self.connect() filename = "%s%s" % (self.filename, self.ext) comicDir = os.path.join(basepath, self.dirname) @@ -76,15 +76,15 @@ class ComicImage(object): fn = os.path.join(comicDir, filename) # compare with >= since content length could be the compressed size if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength: - out.info('Skipping existing file "%s".' % fn) + out.info(u'Skipping existing file "%s".' % fn) return fn, False content = self.urlobj.content if not content: - out.warn("Empty content from %s, try again..." % self.url) + out.warn(u"Empty content from %s, try again..." % self.url) self.connect() content = self.urlobj.content try: - out.debug('Writing comic to file %s...' % fn) + out.debug(u'Writing comic to file %s...' % fn) with open(fn, 'wb') as comicOut: comicOut.write(content) comicOut.flush() @@ -97,6 +97,6 @@ class ComicImage(object): os.remove(fn) raise else: - out.info("Saved %s (%s)." % (fn, strsize(size))) + out.info(u"Saved %s (%s)." % (fn, strsize(size))) getHandler().comicDownloaded(self, fn) return fn, True diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 29c3347f8..c5b9e3561 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -110,32 +110,32 @@ class _BasicScraper(object): imageUrls = set(map(self.imageUrlModifier, imageUrls)) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: patterns = [x.pattern for x in makeSequence(self.imageSearch)] - out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns)) + out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns)) image = sorted(imageUrls)[0] - out.warn("choosing image %s" % image) + out.warn(u"choosing image %s" % image) imageUrls = (image,) elif not imageUrls: patterns = [x.pattern for x in makeSequence(self.imageSearch)] - out.warn("found no images at %s with patterns %s" % (url, patterns)) + out.warn(u"found no images at %s with patterns %s" % (url, patterns)) return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session) def getStrips(self, maxstrips=None): """Get comic strips.""" if maxstrips: - word = "strip" if maxstrips == 1 else "strips" - msg = 'Retrieving %d %s' % (maxstrips, word) + word = u"strip" if maxstrips == 1 else "strips" + msg = u'Retrieving %d %s' % (maxstrips, word) else: - msg = 'Retrieving all strips' + msg = u'Retrieving all strips' if self.indexes: if len(self.indexes) == 1: - msg += " for index %s" % self.indexes[0] + msg += u" for index %s" % self.indexes[0] else: - msg += " for indexes %s" % self.indexes + msg += u" for indexes %s" % self.indexes urls = [self.getIndexStripUrl(index) for index in self.indexes] else: urls = [self.getLatestUrl()] if self.adult: - msg += " (including adult content)" + msg += u" (including adult content)" out.info(msg) for url in urls: for strip in self.getStripsFor(url, maxstrips): @@ -147,10 +147,10 @@ class _BasicScraper(object): self.hitFirstStripUrl = False seen_urls = set() while url: - out.info('Get strip URL %s' % url, level=1) + out.info(u'Get strip URL %s' % url, level=1) data, baseUrl = getPageContent(url, self.session) if self.shouldSkipUrl(url): - out.info('Skipping URL %s' % url) + out.info(u'Skipping URL %s' % url) self.skippedUrls.add(url) else: try: @@ -159,7 +159,7 @@ class _BasicScraper(object): # image not found out.exception(msg) if self.firstStripUrl == url: - out.debug("Stop at first URL %s" % url) + out.debug(u"Stop at first URL %s" % url) self.hitFirstStripUrl = True break if maxstrips is not None: @@ -170,7 +170,7 @@ class _BasicScraper(object): seen_urls.add(url) if prevUrl in seen_urls: # avoid recursive URL loops - out.warn("Already seen previous URL %r" % prevUrl) + out.warn(u"Already seen previous URL %r" % prevUrl) break url = prevUrl if url and self.waitSeconds: @@ -184,10 +184,10 @@ class _BasicScraper(object): prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch) except ValueError as msg: # assume there is no previous URL, but print a warning - out.warn("%s Assuming no previous comic strips exist." % msg) + out.warn(u"%s Assuming no previous comic strips exist." % msg) else: prevUrl = self.prevUrlModifier(prevUrl) - out.debug("Matched previous URL %s" % prevUrl) + out.debug(u"Matched previous URL %s" % prevUrl) getHandler().comicPageLink(self.getName(), url, prevUrl) return prevUrl @@ -294,12 +294,12 @@ def get_scraperclasses(): """ global _scraperclasses if _scraperclasses is None: - out.debug("Loading comic modules...") + out.debug(u"Loading comic modules...") modules = loader.get_modules() plugins = loader.get_plugins(modules, _BasicScraper) _scraperclasses = list(plugins) check_scrapers() - out.debug("... %d modules loaded." % len(_scraperclasses)) + out.debug(u"... %d modules loaded." % len(_scraperclasses)) return _scraperclasses diff --git a/dosagelib/util.py b/dosagelib/util.py index 9dc602057..a813f93e1 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -201,7 +201,7 @@ def getPageContent(url, session, max_content_bytes=MaxContentBytes): tries -= 1 if not isValidPageContent(data): raise ValueError("Got invalid page content from %s: %r" % (url, data)) - out.debug("Got page content %r" % data, level=3) + out.debug(u"Got page content %r" % data, level=3) # determine base URL baseUrl = None match = baseSearch.search(data) @@ -234,7 +234,7 @@ def fetchUrls(url, data, baseUrl, urlSearch): searchUrl = match.group(1) if not searchUrl: raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url)) - out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern)) + out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern)) searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl))) if searchUrls: # do not search other links if one pattern matched @@ -318,12 +318,12 @@ def urlopen(url, session, referrer=None, max_content_bytes=None, timeout=ConnectionTimeoutSecs, raise_for_status=True, stream=False, data=None): """Open an URL and return the response object.""" - out.debug('Open URL %s' % url) + out.debug(u'Open URL %s' % url) headers = {'User-Agent': UserAgent} if referrer: headers['Referer'] = referrer - out.debug('Sending headers %s' % headers, level=3) - out.debug('Sending cookies %s' % session.cookies) + out.debug(u'Sending headers %s' % headers, level=3) + out.debug(u'Sending cookies %s' % session.cookies) kwargs = { "headers": headers, "timeout": timeout, @@ -340,10 +340,10 @@ def urlopen(url, session, referrer=None, max_content_bytes=None, else: kwargs['data'] = data func = session.post - out.debug('Sending POST data %s' % data, level=3) + out.debug(u'Sending POST data %s' % data, level=3) try: req = func(url, **kwargs) - out.debug('Response cookies: %s' % req.cookies) + out.debug(u'Response cookies: %s' % req.cookies) check_content_size(url, req.headers, max_content_bytes) if raise_for_status: req.raise_for_status() @@ -393,7 +393,7 @@ def getRelativePath(basepath, path): def getQueryParams(url): """Get URL query parameters.""" query = urlsplit(url)[3] - out.debug('Extracting query parameters from %r (%r)...' % (url, query)) + out.debug(u'Extracting query parameters from %r (%r)...' % (url, query)) return cgi.parse_qs(query)