#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam # ___ # / \___ ___ __ _ __ _ ___ # / /\ / _ \/ __|/ _` |/ _` |/ _ \ # / /_// (_) \__ \ (_| | (_| | __/ # /___,' \___/|___/\__,_|\__, |\___| # |___/ from __future__ import print_function import sys import os import argparse import pydoc from cStringIO import StringIO from dosagelib import events, scraper from dosagelib.output import out from dosagelib.util import internal_error, getDirname, strlimit, getLangName from dosagelib.ansicolor import get_columns from dosagelib.configuration import App, Freeware, Copyright, SupportUrl class ArgumentParser(argparse.ArgumentParser): """Custom argument parser.""" def print_help(self, file=None): """Paginate help message on TTYs.""" msg = self.format_help() if file is None: file = sys.stdout if hasattr(file, "isatty") and file.isatty(): pydoc.pager(msg) else: print(msg, file=file) Examples = """\ EXAMPLES List available comics (ca. 3000 at the moment): dosage -l Get the latest comic of for example CalvinAndHobbes and save it in the "Comics" directory: dosage CalvinAndHobbes If you already have downloaded several comics and want to get the latest strips of all of them: dosage --continue @ On Unix, xargs(1) can download several comic strips in parallel, for example using up to 4 processes: cd Comics && find . -type d | xargs -n1 -P4 dosage -b . -v """ def setupOptions(): """Construct option parser. @return: new option parser @rtype argparse.ArgumentParser """ kwargs = dict( description = "A comic downloader and archiver.", epilog = Examples, formatter_class=argparse.RawDescriptionHelpFormatter, ) if sys.argv[0].endswith("mainline"): out.warn("the 'mainline' program is deprecated, please use the new 'dosage' program") kwargs["prog"] = "dosage" parser = ArgumentParser(**kwargs) parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity') parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips') parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips') parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found') parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH') parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH') parser.add_argument('-l', '--list', action='store_true', help='list available comic modules') parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list') parser.add_argument('--version', action='store_true', help='display the version number') parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules') parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level') parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics') parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content') parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS) parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)') try: import argcomplete argcomplete.autocomplete(parser) except ImportError: pass return parser def displayVersion(): """Display application name, version, copyright and license.""" print(App) print(Copyright) print(Freeware) print("For support see", SupportUrl) return 0 def setOutputInfo(options): """Set global output level and timestamp option.""" out.level = 0 out.level += options.verbose out.timestamps = options.timestamps def saveComicStrip(strip, basepath): """Save a comic strip which can consist of multiple images.""" errors = 0 allskipped = True for image in strip.getImages(): try: filename, saved = image.save(basepath) if saved: allskipped = False except Exception as msg: out.exception('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg)) errors += 1 return errors, allskipped def displayHelp(comics): """Print help for comic strips.""" try: for scraperobj in getScrapers(comics): displayComicHelp(scraperobj) except ValueError as msg: out.exception(msg) return 1 return 0 def displayComicHelp(scraperobj): """Print description and help for a comic.""" out.context = getScraperName(scraperobj) try: if scraperobj.description: out.info("Description: " + scraperobj.description) if scraperobj.lang: out.info("Language: " + getLangName(scraperobj.lang)) if scraperobj.help: for line in scraperobj.help.splitlines(): out.info(line) finally: out.context = '' def getComics(options): """Retrieve comics.""" errors = 0 if options.handler: for name in set(options.handler): events.addHandler(name, options.basepath, options.baseurl) events.getHandler().start() try: for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch): errors += getStrips(scraperobj, options) except ValueError as msg: out.exception(msg) errors += 1 finally: events.getHandler().end() return errors def getStrips(scraperobj, options): """Get all strips from a scraper.""" errors = 0 if options.all: numstrips = None elif options.numstrips: numstrips = options.numstrips else: # get current strip numstrips = 1 try: out.context = scraperobj.getName() for strip in scraperobj.getStrips(numstrips): _errors, skipped = saveComicStrip(strip, options.basepath) errors += _errors if skipped and options.cont: # stop when retrieval skipped an image for one comic strip out.info("Stop retrieval because image file already exists") break except Exception as msg: out.exception(msg) errors += 1 finally: out.context = '' return errors def run(options): """Execute comic commands.""" setOutputInfo(options) if options.version: return displayVersion() if options.list: return doList() if options.singlelist: return doList(columnList=False, verbose=options.verbose) # after this a list of comic strips is needed if not options.comic: out.warn('No comics specified, bailing out!') return 1 if options.modulehelp: return displayHelp(options.comic) return getComics(options) def doList(columnList=True, verbose=False): """List available comics.""" page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty() if page: fd = StringIO() else: fd = sys.stdout out.setStream(fd) out.info('Available comic scrapers:') out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) out.info('Non-english comics are tagged with [%s].' % TAG_LANG) scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName()) if columnList: num = doColumnList(scrapers) else: num = doSingleList(scrapers, verbose=verbose) out.info('%d supported comics.' % num) if page: pydoc.pager(fd.getvalue()) return 0 def doSingleList(scrapers, verbose=False): """Get list of scraper names, one per line.""" for num, scraperobj in enumerate(scrapers): if verbose: displayComicHelp(scraperobj) else: out.info(getScraperName(scraperobj)) return num def doColumnList(scrapers): """Get list of scraper names with multiple names per line.""" screenWidth = get_columns(sys.stdout) # limit name length so at least two columns are there limit = (screenWidth / 2) - 8 names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers] num = len(names) maxlen = max(len(name) for name in names) namesPerLine = max(int(screenWidth / (maxlen + 1)), 1) while names: out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine])) del names[:namesPerLine] return num TAG_ADULT = "adult" TAG_LANG = "lang" def getScraperName(scraperobj, limit=None): """Get comic scraper name.""" tags = [] if scraperobj.adult: tags.append(TAG_ADULT) if scraperobj.lang != "en": tags.append("%s:%s" % (TAG_LANG, scraperobj.lang)) if tags: suffix = " [" + ", ".join(tags) + "]" else: suffix = "" name = scraperobj.getName() if limit is not None: name = strlimit(name, limit) return name + suffix def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False): """Get scraper objects for the given comics.""" if '@' in comics: # only scrapers whose directory already exists if len(comics) > 1: out.warn("using '@' as comic name ignores all other specified comics.") for scraperclass in scraper.get_scraperclasses(): dirname = getDirname(scraperclass.getName()) if os.path.isdir(os.path.join(basepath, dirname)): if not adult and scraperclass.adult: warn_adult(scraperclass) continue yield scraperclass() elif '@@' in comics: # all scrapers for scraperclass in scraper.get_scraperclasses(): if not adult and scraperclass.adult: warn_adult(scraperclass) continue yield scraperclass() else: # get only selected comic scrapers # store them in a set to eliminate duplicates scrapers = set() for comic in comics: if basepath and comic.startswith(basepath): # make the following command work: # find Comics -type d | xargs -n1 -P10 dosage -b Comics comic = comic[len(basepath):].lstrip(os.sep) if ':' in comic: name, index = comic.split(':', 1) indexes = index.split(',') else: name = comic indexes = None scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed) for scraperclass in scraperclasses: if not adult and scraperclass.adult: warn_adult(scraperclass) continue scraperobj = scraperclass(indexes=indexes) if scraperobj not in scrapers: scrapers.add(scraperobj) yield scraperobj def warn_adult(scraperclass): """Print warning about adult content.""" out.warn("skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName()) def main(): """Parse options and execute commands.""" try: parser = setupOptions() res = run(parser.parse_args()) except KeyboardInterrupt: print("Aborted.") res = 1 except Exception: internal_error() res = 2 return res def profile(): """Profile the loading of all scrapers.""" import cProfile cProfile.run("scraper.get_scraperclasses()", "dosage.prof") def viewprof(): """View profile stats.""" import pstats stats = pstats.Stats("dosage.prof") stats.strip_dirs().sort_stats("cumulative").print_stats(100) if __name__ == '__main__': sys.exit(main()) #profile() #viewprof()