diff --git a/mainline b/mainline deleted file mode 120000 index b77cbddce..000000000 --- a/mainline +++ /dev/null @@ -1 +0,0 @@ -dosage \ No newline at end of file diff --git a/mainline b/mainline new file mode 100755 index 000000000..76c5a987f --- /dev/null +++ b/mainline @@ -0,0 +1,363 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2013 Bastian Kleineidam +# ___ +# / \___ ___ __ _ __ _ ___ +# / /\ / _ \/ __|/ _` |/ _` |/ _ \ +# / /_// (_) \__ \ (_| | (_| | __/ +# /___,' \___/|___/\__,_|\__, |\___| +# |___/ + +from __future__ import print_function +import sys +import os +import argparse +import pydoc +from cStringIO import StringIO + +from dosagelib import events, scraper +from dosagelib.output import out +from dosagelib.util import internal_error, getDirname, strlimit, getLangName +from dosagelib.ansicolor import get_columns +from dosagelib.configuration import App, Freeware, Copyright, SupportUrl + + +class ArgumentParser(argparse.ArgumentParser): + """Custom argument parser.""" + + def print_help(self, file=None): + """Paginate help message on TTYs.""" + msg = self.format_help() + if file is None: + file = sys.stdout + if hasattr(file, "isatty") and file.isatty(): + pydoc.pager(msg) + else: + print(msg, file=file) + + +Examples = """\ +EXAMPLES +List available comics (ca. 3000 at the moment): + dosage -l + +Get the latest comic of for example CalvinAndHobbes and save it in the "Comics" +directory: + dosage CalvinAndHobbes + +If you already have downloaded several comics and want to get the latest +strips of all of them: + dosage --continue @ + +On Unix, xargs(1) can download several comic strips in parallel, +for example using up to 4 processes: + cd Comics && find . -type d | xargs -n1 -P4 dosage -b . -v +""" + + +def setupOptions(): + """Construct option parser. + @return: new option parser + @rtype argparse.ArgumentParser + """ + kwargs = dict( + description = "A comic downloader and archiver.", + epilog = Examples, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + if sys.argv[0].endswith("mainline"): + out.warn("the 'mainline' program is deprecated, please use the new 'dosage' program") + kwargs["prog"] = "dosage" + parser = ArgumentParser(**kwargs) + parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity') + parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips') + parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips') + parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found') + parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH') + parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH') + parser.add_argument('-l', '--list', action='store_true', help='list available comic modules') + parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list') + parser.add_argument('--version', action='store_true', help='display the version number') + parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules') + parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level') + parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics') + parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content') + parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS) + parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)') + try: + import argcomplete + argcomplete.autocomplete(parser) + except ImportError: + pass + return parser + + +def displayVersion(): + """Display application name, version, copyright and license.""" + print(App) + print(Copyright) + print(Freeware) + print("For support see", SupportUrl) + return 0 + + +def setOutputInfo(options): + """Set global output level and timestamp option.""" + out.level = 0 + out.level += options.verbose + out.timestamps = options.timestamps + + +def saveComicStrip(strip, basepath): + """Save a comic strip which can consist of multiple images.""" + errors = 0 + allskipped = True + for image in strip.getImages(): + try: + filename, saved = image.save(basepath) + if saved: + allskipped = False + except Exception as msg: + out.exception('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg)) + errors += 1 + return errors, allskipped + + +def displayHelp(comics): + """Print help for comic strips.""" + try: + for scraperobj in getScrapers(comics): + displayComicHelp(scraperobj) + except ValueError as msg: + out.exception(msg) + return 1 + return 0 + + +def displayComicHelp(scraperobj): + """Print description and help for a comic.""" + out.context = getScraperName(scraperobj) + try: + if scraperobj.description: + out.info("Description: " + scraperobj.description) + if scraperobj.lang: + out.info("Language: " + getLangName(scraperobj.lang)) + if scraperobj.help: + for line in scraperobj.help.splitlines(): + out.info(line) + finally: + out.context = '' + + +def getComics(options): + """Retrieve comics.""" + errors = 0 + if options.handler: + for name in set(options.handler): + events.addHandler(name, options.basepath, options.baseurl) + events.getHandler().start() + try: + for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch): + errors += getStrips(scraperobj, options) + except ValueError as msg: + out.exception(msg) + errors += 1 + finally: + events.getHandler().end() + return errors + + +def getStrips(scraperobj, options): + """Get all strips from a scraper.""" + errors = 0 + if options.all: + numstrips = None + elif options.numstrips: + numstrips = options.numstrips + else: + # get current strip + numstrips = 1 + try: + out.context = scraperobj.getName() + for strip in scraperobj.getStrips(numstrips): + _errors, skipped = saveComicStrip(strip, options.basepath) + errors += _errors + if skipped and options.cont: + # stop when retrieval skipped an image for one comic strip + out.info("Stop retrieval because image file already exists") + break + except Exception as msg: + out.exception(msg) + errors += 1 + finally: + out.context = '' + return errors + + +def run(options): + """Execute comic commands.""" + setOutputInfo(options) + if options.version: + return displayVersion() + if options.list: + return doList() + if options.singlelist: + return doList(columnList=False, verbose=options.verbose) + # after this a list of comic strips is needed + if not options.comic: + out.warn('No comics specified, bailing out!') + return 1 + if options.modulehelp: + return displayHelp(options.comic) + return getComics(options) + + +def doList(columnList=True, verbose=False): + """List available comics.""" + page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty() + if page: + fd = StringIO() + else: + fd = sys.stdout + out.setStream(fd) + out.info('Available comic scrapers:') + out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) + out.info('Non-english comics are tagged with [%s].' % TAG_LANG) + scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName()) + if columnList: + num = doColumnList(scrapers) + else: + num = doSingleList(scrapers, verbose=verbose) + out.info('%d supported comics.' % num) + if page: + pydoc.pager(fd.getvalue()) + return 0 + + +def doSingleList(scrapers, verbose=False): + """Get list of scraper names, one per line.""" + for num, scraperobj in enumerate(scrapers): + if verbose: + displayComicHelp(scraperobj) + else: + out.info(getScraperName(scraperobj)) + return num + + +def doColumnList(scrapers): + """Get list of scraper names with multiple names per line.""" + screenWidth = get_columns(sys.stdout) + # limit name length so at least two columns are there + limit = (screenWidth / 2) - 8 + names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers] + num = len(names) + maxlen = max(len(name) for name in names) + namesPerLine = max(int(screenWidth / (maxlen + 1)), 1) + while names: + out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine])) + del names[:namesPerLine] + return num + +TAG_ADULT = "adult" +TAG_LANG = "lang" + +def getScraperName(scraperobj, limit=None): + """Get comic scraper name.""" + tags = [] + if scraperobj.adult: + tags.append(TAG_ADULT) + if scraperobj.lang != "en": + tags.append("%s:%s" % (TAG_LANG, scraperobj.lang)) + if tags: + suffix = " [" + ", ".join(tags) + "]" + else: + suffix = "" + name = scraperobj.getName() + if limit is not None: + name = strlimit(name, limit) + return name + suffix + + +def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False): + """Get scraper objects for the given comics.""" + if '@' in comics: + # only scrapers whose directory already exists + if len(comics) > 1: + out.warn("using '@' as comic name ignores all other specified comics.") + for scraperclass in scraper.get_scraperclasses(): + dirname = getDirname(scraperclass.getName()) + if os.path.isdir(os.path.join(basepath, dirname)): + if not adult and scraperclass.adult: + warn_adult(scraperclass) + continue + yield scraperclass() + elif '@@' in comics: + # all scrapers + for scraperclass in scraper.get_scraperclasses(): + if not adult and scraperclass.adult: + warn_adult(scraperclass) + continue + yield scraperclass() + else: + # get only selected comic scrapers + # store them in a set to eliminate duplicates + scrapers = set() + for comic in comics: + if basepath and comic.startswith(basepath): + # make the following command work: + # find Comics -type d | xargs -n1 -P10 dosage -b Comics + comic = comic[len(basepath):].lstrip(os.sep) + if ':' in comic: + name, index = comic.split(':', 1) + indexes = index.split(',') + else: + name = comic + indexes = None + scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed) + for scraperclass in scraperclasses: + if not adult and scraperclass.adult: + warn_adult(scraperclass) + continue + scraperobj = scraperclass(indexes=indexes) + if scraperobj not in scrapers: + scrapers.add(scraperobj) + yield scraperobj + + +def warn_adult(scraperclass): + """Print warning about adult content.""" + out.warn("skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName()) + + +def main(): + """Parse options and execute commands.""" + try: + parser = setupOptions() + res = run(parser.parse_args()) + except KeyboardInterrupt: + print("Aborted.") + res = 1 + except Exception: + internal_error() + res = 2 + return res + + +def profile(): + """Profile the loading of all scrapers.""" + import cProfile + cProfile.run("scraper.get_scraperclasses()", "dosage.prof") + + +def viewprof(): + """View profile stats.""" + import pstats + stats = pstats.Stats("dosage.prof") + stats.strip_dirs().sort_stats("cumulative").print_stats(100) + + +if __name__ == '__main__': + sys.exit(main()) + #profile() + #viewprof()