#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam # ___ # / \___ ___ __ _ __ _ ___ # / /\ / _ \/ __|/ _` |/ _` |/ _ \ # / /_// (_) \__ \ (_| | (_| | __/ # /___,' \___/|___/\__,_|\__, |\___| # |___/ from __future__ import division, print_function import sys import os import argparse import pydoc from io import StringIO from dosagelib import events, scraper, configuration from dosagelib.output import out from dosagelib.util import internal_error, getDirname, strlimit, getLangName from dosagelib.ansicolor import get_columns class ArgumentParser(argparse.ArgumentParser): """Custom argument parser.""" def print_help(self, file=None): """Paginate help message on TTYs.""" msg = self.format_help() if file is None: file = sys.stdout if hasattr(file, "isatty") and file.isatty(): pydoc.pager(msg) else: print(msg, file=file) Examples = """\ EXAMPLES List available comics (ca. 3000 at the moment): dosage -l Get the latest comic of for example CalvinAndHobbes and save it in the "Comics" directory: dosage CalvinAndHobbes If you already have downloaded several comics and want to get the latest strips of all of them: dosage --continue @ On Unix, xargs(1) can download several comic strips in parallel, for example using up to 4 processes: cd Comics && find . -type d | xargs -n1 -P4 dosage -b . -v """ def setupOptions(): """Construct option parser. @return: new option parser @rtype argparse.ArgumentParser """ kwargs = dict( description = "A comic downloader and archiver.", epilog = Examples, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser = ArgumentParser(**kwargs) parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity') parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips') parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips') parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found') parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH') parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH') parser.add_argument('-l', '--list', action='store_true', help='list available comic modules') parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list') parser.add_argument('--version', action='store_true', help='display the version number') parser.add_argument('--vote', action='store_true', help='vote for the selected comics') parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules') parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level') parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics') parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content') # used for development testing prev/next matching parser.add_argument('--dry-run', action='store_true', help=argparse.SUPPRESS) # multimatch is only used for development, eg. testing if all comics of a scripted plugin are working parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS) parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)') try: import argcomplete argcomplete.autocomplete(parser) except ImportError: pass return parser def displayVersion(verbose): """Display application name, version, copyright and license.""" print(configuration.App) print(configuration.Copyright) print(configuration.Freeware) print("For support see", configuration.SupportUrl) if verbose: # search for updates from dosagelib.updater import check_update result, value = check_update() if result: if value: version, url = value if url is None: # current version is newer than online version text = ('Detected local or development version %(currentversion)s. ' 'Available version of %(app)s is %(version)s.') else: # display update link text = ('A new version %(version)s of %(app)s is ' 'available at %(url)s.') attrs = dict(version=version, app=configuration.AppName, url=url, currentversion=configuration.Version) print(text % attrs) else: if value is None: value = 'invalid update file syntax' text = ('An error occured while checking for an ' 'update of %(app)s: %(error)s.') attrs = dict(error=value, app=configuration.AppName) print(text % attrs) return 0 def setOutputInfo(options): """Set global output level and timestamp option.""" out.level = 0 out.level += options.verbose out.timestamps = options.timestamps # debug urllib3 #from requests.packages.urllib3 import add_stderr_logger #add_stderr_logger() def saveComicStrip(strip, basepath, dryrun): """Save a comic strip which can consist of multiple images.""" errors = 0 allskipped = True for image in strip.getImages(): try: if dryrun: filename, saved = "", False else: filename, saved = image.save(basepath) if saved: allskipped = False except Exception as msg: out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg)) errors += 1 return errors, allskipped def displayHelp(options): """Print help for comic strips.""" errors = 0 try: for scraperobj in getScrapers(options.comic, options.basepath): errors += displayComicHelp(scraperobj) except ValueError as msg: out.exception(msg) return 2 return errors def displayComicHelp(scraperobj): """Print description and help for a comic.""" out.context = getScraperName(scraperobj) try: out.info(u"URL: " + scraperobj.url) if scraperobj.description: out.info(u"Description: " + scraperobj.description) if scraperobj.lang: out.info(u"Language: " + getLangName(scraperobj.lang)) if scraperobj.genres: out.info(u"Genres: " + ", ".join(scraperobj.genres)) if scraperobj.help: for line in scraperobj.help.splitlines(): out.info(line) return 0 except ValueError as msg: out.exception(msg) return 1 finally: out.context = u'' def getComics(options): """Retrieve comics.""" if options.handler: for name in set(options.handler): events.addHandler(name, options.basepath, options.baseurl) events.getHandler().start() errors = 0 try: for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch): if options.vote: errors += vote(scraperobj) else: errors += getStrips(scraperobj, options) except ValueError as msg: out.exception(msg) errors += 1 finally: events.getHandler().end() return errors def vote(scraperobj): """Vote for given comic scraper.""" errors = 0 name = scraperobj.getName() out.context = name try: answer = scraperobj.vote() out.debug(u'Vote answer %r' % answer) if answer == 'counted': url = configuration.Url + 'comics/%s.html' % name.replace('/', '_') out.info(u'Vote submitted. Votes are updated regularly at %s.' % url) elif answer == 'no': out.info(u'Vote not submitted - your vote has already been submitted before.') elif answer == 'noname': out.warn(u'The comic %s cannot be voted.' % name) else: out.warn(u'Error submitting vote parameters: %r' % answer) except Exception as msg: out.exception(msg) errors += 1 finally: out.context = u'' return errors def getStrips(scraperobj, options): """Get all strips from a scraper.""" errors = 0 if options.all or options.cont: numstrips = None elif options.numstrips: numstrips = options.numstrips else: # get current strip numstrips = 1 out.context = scraperobj.getName() try: if scraperobj.isComplete(options.basepath): out.info(u"All comics are already downloaded.") return 0 for strip in scraperobj.getStrips(numstrips): _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run) errors += _errors if skipped and options.cont: # stop when retrieval skipped an image for one comic strip out.info(u"Stop retrieval because image file already exists") break if options.all and not (errors or options.dry_run or options.cont or scraperobj.indexes): scraperobj.setComplete(options.basepath) except Exception as msg: out.exception(msg) errors += 1 finally: out.context = u'' return errors def run(options): """Execute comic commands.""" setOutputInfo(options) if options.version: return displayVersion(options.verbose) if options.list: return doList() if options.singlelist: return doList(columnList=False, verbose=options.verbose) # after this a list of comic strips is needed if not options.comic: out.warn(u'No comics specified, bailing out!') return 1 if options.modulehelp: return displayHelp(options) return getComics(options) def doList(columnList=True, verbose=False): """List available comics.""" page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty() if page: fd = StringIO(u'') out.setStream(fd) out.info(u'Available comic scrapers:') out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG) scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName()) if columnList: num = doColumnList(scrapers) else: num = doSingleList(scrapers, verbose=verbose) out.info(u'%d supported comics.' % num) if page: pydoc.pager(fd.getvalue()) return 0 def doSingleList(scrapers, verbose=False): """Get list of scraper names, one per line.""" for num, scraperobj in enumerate(scrapers): if verbose: displayComicHelp(scraperobj) else: out.info(getScraperName(scraperobj)) return num def doColumnList(scrapers): """Get list of scraper names with multiple names per line.""" screenWidth = get_columns(sys.stdout) # limit name length so at least two columns are there limit = (screenWidth // 2) - 8 names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers] num = len(names) maxlen = max(len(name) for name in names) namesPerLine = max(screenWidth // (maxlen + 1), 1) while names: out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine])) del names[:namesPerLine] return num TAG_ADULT = "adult" TAG_LANG = "lang" def getScraperName(scraperobj, limit=None): """Get comic scraper name.""" tags = [] if scraperobj.adult: tags.append(TAG_ADULT) if scraperobj.lang != "en": tags.append("%s:%s" % (TAG_LANG, scraperobj.lang)) if tags: suffix = " [" + ", ".join(tags) + "]" else: suffix = "" name = scraperobj.getName() if limit is not None: name = strlimit(name, limit) return name + suffix def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False): """Get scraper objects for the given comics.""" if '@' in comics: # only scrapers whose directory already exists if len(comics) > 1: out.warn(u"using '@' as comic name ignores all other specified comics.") for scraperclass in scraper.get_scraperclasses(): dirname = getDirname(scraperclass.getName()) if os.path.isdir(os.path.join(basepath, dirname)): if not adult and scraperclass.adult: warn_adult(scraperclass) continue yield scraperclass() elif '@@' in comics: # all scrapers for scraperclass in scraper.get_scraperclasses(): if not adult and scraperclass.adult: warn_adult(scraperclass) continue yield scraperclass() else: # get only selected comic scrapers # store them in a set to eliminate duplicates scrapers = set() for comic in comics: # Helpful when using shell completion to pick comics to get comic.rstrip(os.path.sep) if basepath and comic.startswith(basepath): # make the following command work: # find Comics -type d | xargs -n1 -P10 dosage -b Comics comic = comic[len(basepath):].lstrip(os.sep) if ':' in comic: name, index = comic.split(':', 1) indexes = index.split(',') else: name = comic indexes = None scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed) for scraperclass in scraperclasses: if not adult and scraperclass.adult: warn_adult(scraperclass) continue scraperobj = scraperclass(indexes=indexes) if scraperobj not in scrapers: scrapers.add(scraperobj) yield scraperobj def warn_adult(scraperclass): """Print warning about adult content.""" out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName()) def main(): """Parse options and execute commands.""" try: parser = setupOptions() res = run(parser.parse_args()) except KeyboardInterrupt: print("Aborted.") res = 1 except Exception: internal_error() res = 2 return res def profile(): """Profile the loading of all scrapers.""" import cProfile cProfile.run("scraper.get_scraperclasses()", "dosage.prof") def viewprof(): """View profile stats.""" import pstats stats = pstats.Stats("dosage.prof") stats.strip_dirs().sort_stats("cumulative").print_stats(100) if __name__ == '__main__': sys.exit(main()) #profile() #viewprof()