More unicode output fixes.
This commit is contained in:
parent
110d070c4f
commit
ebdc1e6359
4 changed files with 42 additions and 42 deletions
22
dosage
22
dosage
|
@ -218,13 +218,13 @@ def vote(scraperobj):
|
||||||
out.debug('Vote answer %r' % answer)
|
out.debug('Vote answer %r' % answer)
|
||||||
if answer == 'counted':
|
if answer == 'counted':
|
||||||
url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
|
url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
|
||||||
out.info('Vote submitted. Votes are updated regularly at %s.' % url)
|
out.info(u'Vote submitted. Votes are updated regularly at %s.' % url)
|
||||||
elif answer == 'no':
|
elif answer == 'no':
|
||||||
out.info('Vote not submitted - your vote has already been submitted before.')
|
out.info(u'Vote not submitted - your vote has already been submitted before.')
|
||||||
elif answer == 'noname':
|
elif answer == 'noname':
|
||||||
out.warn('The comic %s cannot be voted.' % name)
|
out.warn(u'The comic %s cannot be voted.' % name)
|
||||||
else:
|
else:
|
||||||
out.warn('Error submitting vote parameters: %r' % answer)
|
out.warn(u'Error submitting vote parameters: %r' % answer)
|
||||||
except Exception as msg:
|
except Exception as msg:
|
||||||
out.exception(msg)
|
out.exception(msg)
|
||||||
errors += 1
|
errors += 1
|
||||||
|
@ -246,14 +246,14 @@ def getStrips(scraperobj, options):
|
||||||
out.context = scraperobj.getName()
|
out.context = scraperobj.getName()
|
||||||
try:
|
try:
|
||||||
if scraperobj.isComplete(options.basepath):
|
if scraperobj.isComplete(options.basepath):
|
||||||
out.info("All comics are already downloaded.")
|
out.info(u"All comics are already downloaded.")
|
||||||
return 0
|
return 0
|
||||||
for strip in scraperobj.getStrips(numstrips):
|
for strip in scraperobj.getStrips(numstrips):
|
||||||
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
|
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
|
||||||
errors += _errors
|
errors += _errors
|
||||||
if skipped and options.cont:
|
if skipped and options.cont:
|
||||||
# stop when retrieval skipped an image for one comic strip
|
# stop when retrieval skipped an image for one comic strip
|
||||||
out.info("Stop retrieval because image file already exists")
|
out.info(u"Stop retrieval because image file already exists")
|
||||||
break
|
break
|
||||||
if options.all and not (errors or options.dry_run or
|
if options.all and not (errors or options.dry_run or
|
||||||
options.cont or scraperobj.indexes):
|
options.cont or scraperobj.indexes):
|
||||||
|
@ -292,15 +292,15 @@ def doList(columnList=True, verbose=False):
|
||||||
else:
|
else:
|
||||||
fd = sys.stdout
|
fd = sys.stdout
|
||||||
out.setStream(fd)
|
out.setStream(fd)
|
||||||
out.info('Available comic scrapers:')
|
out.info(u'Available comic scrapers:')
|
||||||
out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
|
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
|
||||||
out.info('Non-english comics are tagged with [%s].' % TAG_LANG)
|
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
||||||
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
|
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
|
||||||
if columnList:
|
if columnList:
|
||||||
num = doColumnList(scrapers)
|
num = doColumnList(scrapers)
|
||||||
else:
|
else:
|
||||||
num = doSingleList(scrapers, verbose=verbose)
|
num = doSingleList(scrapers, verbose=verbose)
|
||||||
out.info('%d supported comics.' % num)
|
out.info(u'%d supported comics.' % num)
|
||||||
if page:
|
if page:
|
||||||
pydoc.pager(fd.getvalue())
|
pydoc.pager(fd.getvalue())
|
||||||
return 0
|
return 0
|
||||||
|
@ -326,7 +326,7 @@ def doColumnList(scrapers):
|
||||||
maxlen = max(len(name) for name in names)
|
maxlen = max(len(name) for name in names)
|
||||||
namesPerLine = max(screenWidth // (maxlen + 1), 1)
|
namesPerLine = max(screenWidth // (maxlen + 1), 1)
|
||||||
while names:
|
while names:
|
||||||
out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
|
out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
|
||||||
del names[:namesPerLine]
|
del names[:namesPerLine]
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
|
|
@ -63,11 +63,11 @@ class ComicImage(object):
|
||||||
if maintype == 'image':
|
if maintype == 'image':
|
||||||
self.ext = '.' + subtype.replace('jpeg', 'jpg')
|
self.ext = '.' + subtype.replace('jpeg', 'jpg')
|
||||||
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
|
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
|
||||||
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
|
out.debug(u'... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
|
||||||
|
|
||||||
def save(self, basepath):
|
def save(self, basepath):
|
||||||
"""Save comic URL to filename on disk."""
|
"""Save comic URL to filename on disk."""
|
||||||
out.info("Get image URL %s" % self.url, level=1)
|
out.info(u"Get image URL %s" % self.url, level=1)
|
||||||
self.connect()
|
self.connect()
|
||||||
filename = "%s%s" % (self.filename, self.ext)
|
filename = "%s%s" % (self.filename, self.ext)
|
||||||
comicDir = os.path.join(basepath, self.dirname)
|
comicDir = os.path.join(basepath, self.dirname)
|
||||||
|
@ -76,15 +76,15 @@ class ComicImage(object):
|
||||||
fn = os.path.join(comicDir, filename)
|
fn = os.path.join(comicDir, filename)
|
||||||
# compare with >= since content length could be the compressed size
|
# compare with >= since content length could be the compressed size
|
||||||
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
|
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
|
||||||
out.info('Skipping existing file "%s".' % fn)
|
out.info(u'Skipping existing file "%s".' % fn)
|
||||||
return fn, False
|
return fn, False
|
||||||
content = self.urlobj.content
|
content = self.urlobj.content
|
||||||
if not content:
|
if not content:
|
||||||
out.warn("Empty content from %s, try again..." % self.url)
|
out.warn(u"Empty content from %s, try again..." % self.url)
|
||||||
self.connect()
|
self.connect()
|
||||||
content = self.urlobj.content
|
content = self.urlobj.content
|
||||||
try:
|
try:
|
||||||
out.debug('Writing comic to file %s...' % fn)
|
out.debug(u'Writing comic to file %s...' % fn)
|
||||||
with open(fn, 'wb') as comicOut:
|
with open(fn, 'wb') as comicOut:
|
||||||
comicOut.write(content)
|
comicOut.write(content)
|
||||||
comicOut.flush()
|
comicOut.flush()
|
||||||
|
@ -97,6 +97,6 @@ class ComicImage(object):
|
||||||
os.remove(fn)
|
os.remove(fn)
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
out.info("Saved %s (%s)." % (fn, strsize(size)))
|
out.info(u"Saved %s (%s)." % (fn, strsize(size)))
|
||||||
getHandler().comicDownloaded(self, fn)
|
getHandler().comicDownloaded(self, fn)
|
||||||
return fn, True
|
return fn, True
|
||||||
|
|
|
@ -110,32 +110,32 @@ class _BasicScraper(object):
|
||||||
imageUrls = set(map(self.imageUrlModifier, imageUrls))
|
imageUrls = set(map(self.imageUrlModifier, imageUrls))
|
||||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||||
out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
|
out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
|
||||||
image = sorted(imageUrls)[0]
|
image = sorted(imageUrls)[0]
|
||||||
out.warn("choosing image %s" % image)
|
out.warn(u"choosing image %s" % image)
|
||||||
imageUrls = (image,)
|
imageUrls = (image,)
|
||||||
elif not imageUrls:
|
elif not imageUrls:
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||||
out.warn("found no images at %s with patterns %s" % (url, patterns))
|
out.warn(u"found no images at %s with patterns %s" % (url, patterns))
|
||||||
return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session)
|
return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session)
|
||||||
|
|
||||||
def getStrips(self, maxstrips=None):
|
def getStrips(self, maxstrips=None):
|
||||||
"""Get comic strips."""
|
"""Get comic strips."""
|
||||||
if maxstrips:
|
if maxstrips:
|
||||||
word = "strip" if maxstrips == 1 else "strips"
|
word = u"strip" if maxstrips == 1 else "strips"
|
||||||
msg = 'Retrieving %d %s' % (maxstrips, word)
|
msg = u'Retrieving %d %s' % (maxstrips, word)
|
||||||
else:
|
else:
|
||||||
msg = 'Retrieving all strips'
|
msg = u'Retrieving all strips'
|
||||||
if self.indexes:
|
if self.indexes:
|
||||||
if len(self.indexes) == 1:
|
if len(self.indexes) == 1:
|
||||||
msg += " for index %s" % self.indexes[0]
|
msg += u" for index %s" % self.indexes[0]
|
||||||
else:
|
else:
|
||||||
msg += " for indexes %s" % self.indexes
|
msg += u" for indexes %s" % self.indexes
|
||||||
urls = [self.getIndexStripUrl(index) for index in self.indexes]
|
urls = [self.getIndexStripUrl(index) for index in self.indexes]
|
||||||
else:
|
else:
|
||||||
urls = [self.getLatestUrl()]
|
urls = [self.getLatestUrl()]
|
||||||
if self.adult:
|
if self.adult:
|
||||||
msg += " (including adult content)"
|
msg += u" (including adult content)"
|
||||||
out.info(msg)
|
out.info(msg)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
for strip in self.getStripsFor(url, maxstrips):
|
for strip in self.getStripsFor(url, maxstrips):
|
||||||
|
@ -147,10 +147,10 @@ class _BasicScraper(object):
|
||||||
self.hitFirstStripUrl = False
|
self.hitFirstStripUrl = False
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
while url:
|
while url:
|
||||||
out.info('Get strip URL %s' % url, level=1)
|
out.info(u'Get strip URL %s' % url, level=1)
|
||||||
data, baseUrl = getPageContent(url, self.session)
|
data, baseUrl = getPageContent(url, self.session)
|
||||||
if self.shouldSkipUrl(url):
|
if self.shouldSkipUrl(url):
|
||||||
out.info('Skipping URL %s' % url)
|
out.info(u'Skipping URL %s' % url)
|
||||||
self.skippedUrls.add(url)
|
self.skippedUrls.add(url)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -159,7 +159,7 @@ class _BasicScraper(object):
|
||||||
# image not found
|
# image not found
|
||||||
out.exception(msg)
|
out.exception(msg)
|
||||||
if self.firstStripUrl == url:
|
if self.firstStripUrl == url:
|
||||||
out.debug("Stop at first URL %s" % url)
|
out.debug(u"Stop at first URL %s" % url)
|
||||||
self.hitFirstStripUrl = True
|
self.hitFirstStripUrl = True
|
||||||
break
|
break
|
||||||
if maxstrips is not None:
|
if maxstrips is not None:
|
||||||
|
@ -170,7 +170,7 @@ class _BasicScraper(object):
|
||||||
seen_urls.add(url)
|
seen_urls.add(url)
|
||||||
if prevUrl in seen_urls:
|
if prevUrl in seen_urls:
|
||||||
# avoid recursive URL loops
|
# avoid recursive URL loops
|
||||||
out.warn("Already seen previous URL %r" % prevUrl)
|
out.warn(u"Already seen previous URL %r" % prevUrl)
|
||||||
break
|
break
|
||||||
url = prevUrl
|
url = prevUrl
|
||||||
if url and self.waitSeconds:
|
if url and self.waitSeconds:
|
||||||
|
@ -184,10 +184,10 @@ class _BasicScraper(object):
|
||||||
prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch)
|
prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch)
|
||||||
except ValueError as msg:
|
except ValueError as msg:
|
||||||
# assume there is no previous URL, but print a warning
|
# assume there is no previous URL, but print a warning
|
||||||
out.warn("%s Assuming no previous comic strips exist." % msg)
|
out.warn(u"%s Assuming no previous comic strips exist." % msg)
|
||||||
else:
|
else:
|
||||||
prevUrl = self.prevUrlModifier(prevUrl)
|
prevUrl = self.prevUrlModifier(prevUrl)
|
||||||
out.debug("Matched previous URL %s" % prevUrl)
|
out.debug(u"Matched previous URL %s" % prevUrl)
|
||||||
getHandler().comicPageLink(self.getName(), url, prevUrl)
|
getHandler().comicPageLink(self.getName(), url, prevUrl)
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
|
||||||
|
@ -294,12 +294,12 @@ def get_scraperclasses():
|
||||||
"""
|
"""
|
||||||
global _scraperclasses
|
global _scraperclasses
|
||||||
if _scraperclasses is None:
|
if _scraperclasses is None:
|
||||||
out.debug("Loading comic modules...")
|
out.debug(u"Loading comic modules...")
|
||||||
modules = loader.get_modules()
|
modules = loader.get_modules()
|
||||||
plugins = loader.get_plugins(modules, _BasicScraper)
|
plugins = loader.get_plugins(modules, _BasicScraper)
|
||||||
_scraperclasses = list(plugins)
|
_scraperclasses = list(plugins)
|
||||||
check_scrapers()
|
check_scrapers()
|
||||||
out.debug("... %d modules loaded." % len(_scraperclasses))
|
out.debug(u"... %d modules loaded." % len(_scraperclasses))
|
||||||
return _scraperclasses
|
return _scraperclasses
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -201,7 +201,7 @@ def getPageContent(url, session, max_content_bytes=MaxContentBytes):
|
||||||
tries -= 1
|
tries -= 1
|
||||||
if not isValidPageContent(data):
|
if not isValidPageContent(data):
|
||||||
raise ValueError("Got invalid page content from %s: %r" % (url, data))
|
raise ValueError("Got invalid page content from %s: %r" % (url, data))
|
||||||
out.debug("Got page content %r" % data, level=3)
|
out.debug(u"Got page content %r" % data, level=3)
|
||||||
# determine base URL
|
# determine base URL
|
||||||
baseUrl = None
|
baseUrl = None
|
||||||
match = baseSearch.search(data)
|
match = baseSearch.search(data)
|
||||||
|
@ -234,7 +234,7 @@ def fetchUrls(url, data, baseUrl, urlSearch):
|
||||||
searchUrl = match.group(1)
|
searchUrl = match.group(1)
|
||||||
if not searchUrl:
|
if not searchUrl:
|
||||||
raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
|
raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
|
||||||
out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern))
|
out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern))
|
||||||
searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
|
searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
|
||||||
if searchUrls:
|
if searchUrls:
|
||||||
# do not search other links if one pattern matched
|
# do not search other links if one pattern matched
|
||||||
|
@ -318,12 +318,12 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
|
||||||
timeout=ConnectionTimeoutSecs, raise_for_status=True,
|
timeout=ConnectionTimeoutSecs, raise_for_status=True,
|
||||||
stream=False, data=None):
|
stream=False, data=None):
|
||||||
"""Open an URL and return the response object."""
|
"""Open an URL and return the response object."""
|
||||||
out.debug('Open URL %s' % url)
|
out.debug(u'Open URL %s' % url)
|
||||||
headers = {'User-Agent': UserAgent}
|
headers = {'User-Agent': UserAgent}
|
||||||
if referrer:
|
if referrer:
|
||||||
headers['Referer'] = referrer
|
headers['Referer'] = referrer
|
||||||
out.debug('Sending headers %s' % headers, level=3)
|
out.debug(u'Sending headers %s' % headers, level=3)
|
||||||
out.debug('Sending cookies %s' % session.cookies)
|
out.debug(u'Sending cookies %s' % session.cookies)
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
"timeout": timeout,
|
"timeout": timeout,
|
||||||
|
@ -340,10 +340,10 @@ def urlopen(url, session, referrer=None, max_content_bytes=None,
|
||||||
else:
|
else:
|
||||||
kwargs['data'] = data
|
kwargs['data'] = data
|
||||||
func = session.post
|
func = session.post
|
||||||
out.debug('Sending POST data %s' % data, level=3)
|
out.debug(u'Sending POST data %s' % data, level=3)
|
||||||
try:
|
try:
|
||||||
req = func(url, **kwargs)
|
req = func(url, **kwargs)
|
||||||
out.debug('Response cookies: %s' % req.cookies)
|
out.debug(u'Response cookies: %s' % req.cookies)
|
||||||
check_content_size(url, req.headers, max_content_bytes)
|
check_content_size(url, req.headers, max_content_bytes)
|
||||||
if raise_for_status:
|
if raise_for_status:
|
||||||
req.raise_for_status()
|
req.raise_for_status()
|
||||||
|
@ -393,7 +393,7 @@ def getRelativePath(basepath, path):
|
||||||
def getQueryParams(url):
|
def getQueryParams(url):
|
||||||
"""Get URL query parameters."""
|
"""Get URL query parameters."""
|
||||||
query = urlsplit(url)[3]
|
query = urlsplit(url)[3]
|
||||||
out.debug('Extracting query parameters from %r (%r)...' % (url, query))
|
out.debug(u'Extracting query parameters from %r (%r)...' % (url, query))
|
||||||
return cgi.parse_qs(query)
|
return cgi.parse_qs(query)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue