Make mainline a hardlink.

2013-03-29 10:35:27 +01:00 · 2013-03-29 10:35:27 +01:00 · fcb50ee72f
commit fcb50ee72f
parent fa3b9d8fa5
1 changed files with 363 additions and 1 deletions
--- a/1
+++ b/1
@ -1 +0,0 @@
-dosage
--- a/363
+++ b/363
@ -0,0 +1,363 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
+# Copyright (C) 2012-2013 Bastian Kleineidam
+#     ___
+#    /   \___  ___  __ _  __ _  ___
+#   / /\ / _ \/ __|/ _` |/ _` |/ _ \
+#  / /_// (_) \__ \ (_| | (_| |  __/
+# /___,' \___/|___/\__,_|\__, |\___|
+#                        |___/
+
+from __future__ import print_function
+import sys
+import os
+import argparse
+import pydoc
+from cStringIO import StringIO
+
+from dosagelib import events, scraper
+from dosagelib.output import out
+from dosagelib.util import internal_error, getDirname, strlimit, getLangName
+from dosagelib.ansicolor import get_columns
+from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
+
+
+class ArgumentParser(argparse.ArgumentParser):
+    """Custom argument parser."""
+
+    def print_help(self, file=None):
+        """Paginate help message on TTYs."""
+        msg = self.format_help()
+        if file is None:
+            file = sys.stdout
+        if hasattr(file, "isatty") and file.isatty():
+            pydoc.pager(msg)
+        else:
+            print(msg, file=file)
+
+
+Examples = """\
+EXAMPLES
+List available comics (ca. 3000 at the moment):
+  dosage -l
+
+Get the latest comic of for example CalvinAndHobbes and save it in the "Comics"
+directory:
+  dosage CalvinAndHobbes
+
+If you already have downloaded several comics and want to get the latest
+strips of all of them:
+  dosage --continue @
+
+On Unix, xargs(1) can download several comic strips in parallel,
+for example using up to 4 processes:
+  cd Comics && find . -type d | xargs -n1 -P4 dosage -b . -v
+"""
+
+
+def setupOptions():
+    """Construct option parser.
+    @return: new option parser
+    @rtype argparse.ArgumentParser
+    """
+    kwargs = dict(
+        description = "A comic downloader and archiver.",
+        epilog = Examples,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    if sys.argv[0].endswith("mainline"):
+        out.warn("the 'mainline' program is deprecated, please use the new 'dosage' program")
+        kwargs["prog"] = "dosage"
+    parser = ArgumentParser(**kwargs)
+    parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity')
+    parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
+    parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips')
+    parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found')
+    parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
+    parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
+    parser.add_argument('-l', '--list', action='store_true', help='list available comic modules')
+    parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list')
+    parser.add_argument('--version', action='store_true', help='display the version number')
+    parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules')
+    parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level')
+    parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics')
+    parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content')
+    parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS)
+    parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)')
+    try:
+        import argcomplete
+        argcomplete.autocomplete(parser)
+    except ImportError:
+        pass
+    return parser
+
+
+def displayVersion():
+    """Display application name, version, copyright and license."""
+    print(App)
+    print(Copyright)
+    print(Freeware)
+    print("For support see", SupportUrl)
+    return 0
+
+
+def setOutputInfo(options):
+    """Set global output level and timestamp option."""
+    out.level = 0
+    out.level += options.verbose
+    out.timestamps = options.timestamps
+
+
+def saveComicStrip(strip, basepath):
+    """Save a comic strip which can consist of multiple images."""
+    errors = 0
+    allskipped = True
+    for image in strip.getImages():
+        try:
+            filename, saved = image.save(basepath)
+            if saved:
+                allskipped = False
+        except Exception as msg:
+            out.exception('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg))
+            errors += 1
+    return errors, allskipped
+
+
+def displayHelp(comics):
+    """Print help for comic strips."""
+    try:
+        for scraperobj in getScrapers(comics):
+            displayComicHelp(scraperobj)
+    except ValueError as msg:
+        out.exception(msg)
+        return 1
+    return 0
+
+
+def displayComicHelp(scraperobj):
+    """Print description and help for a comic."""
+    out.context = getScraperName(scraperobj)
+    try:
+        if scraperobj.description:
+            out.info("Description: " + scraperobj.description)
+        if scraperobj.lang:
+            out.info("Language: " + getLangName(scraperobj.lang))
+        if scraperobj.help:
+            for line in scraperobj.help.splitlines():
+                out.info(line)
+    finally:
+        out.context = ''
+
+
+def getComics(options):
+    """Retrieve comics."""
+    errors = 0
+    if options.handler:
+        for name in set(options.handler):
+            events.addHandler(name, options.basepath, options.baseurl)
+    events.getHandler().start()
+    try:
+        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
+            errors += getStrips(scraperobj, options)
+    except ValueError as msg:
+        out.exception(msg)
+        errors += 1
+    finally:
+        events.getHandler().end()
+    return errors
+
+
+def getStrips(scraperobj, options):
+    """Get all strips from a scraper."""
+    errors = 0
+    if options.all:
+        numstrips = None
+    elif options.numstrips:
+        numstrips = options.numstrips
+    else:
+        # get current strip
+        numstrips = 1
+    try:
+        out.context = scraperobj.getName()
+        for strip in scraperobj.getStrips(numstrips):
+            _errors, skipped = saveComicStrip(strip, options.basepath)
+            errors += _errors
+            if skipped and options.cont:
+                # stop when retrieval skipped an image for one comic strip
+                out.info("Stop retrieval because image file already exists")
+                break
+    except Exception as msg:
+        out.exception(msg)
+        errors += 1
+    finally:
+        out.context = ''
+    return errors
+
+
+def run(options):
+    """Execute comic commands."""
+    setOutputInfo(options)
+    if options.version:
+        return displayVersion()
+    if options.list:
+        return doList()
+    if options.singlelist:
+        return doList(columnList=False, verbose=options.verbose)
+    # after this a list of comic strips is needed
+    if not options.comic:
+        out.warn('No comics specified, bailing out!')
+        return 1
+    if options.modulehelp:
+        return displayHelp(options.comic)
+    return getComics(options)
+
+
+def doList(columnList=True, verbose=False):
+    """List available comics."""
+    page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+    if page:
+        fd = StringIO()
+    else:
+        fd = sys.stdout
+    out.setStream(fd)
+    out.info('Available comic scrapers:')
+    out.info('Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
+    out.info('Non-english comics are tagged with [%s].' % TAG_LANG)
+    scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
+    if columnList:
+        num = doColumnList(scrapers)
+    else:
+        num = doSingleList(scrapers, verbose=verbose)
+    out.info('%d supported comics.' % num)
+    if page:
+        pydoc.pager(fd.getvalue())
+    return 0
+
+
+def doSingleList(scrapers, verbose=False):
+    """Get list of scraper names, one per line."""
+    for num, scraperobj in enumerate(scrapers):
+        if verbose:
+            displayComicHelp(scraperobj)
+        else:
+            out.info(getScraperName(scraperobj))
+    return num
+
+
+def doColumnList(scrapers):
+    """Get list of scraper names with multiple names per line."""
+    screenWidth = get_columns(sys.stdout)
+    # limit name length so at least two columns are there
+    limit = (screenWidth / 2) - 8
+    names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
+    num = len(names)
+    maxlen = max(len(name) for name in names)
+    namesPerLine = max(int(screenWidth / (maxlen + 1)), 1)
+    while names:
+        out.info(''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
+        del names[:namesPerLine]
+    return num
+
+TAG_ADULT = "adult"
+TAG_LANG = "lang"
+
+def getScraperName(scraperobj, limit=None):
+    """Get comic scraper name."""
+    tags = []
+    if scraperobj.adult:
+        tags.append(TAG_ADULT)
+    if scraperobj.lang != "en":
+        tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
+    if tags:
+        suffix = " [" + ", ".join(tags) + "]"
+    else:
+        suffix = ""
+    name = scraperobj.getName()
+    if limit is not None:
+        name = strlimit(name, limit)
+    return name + suffix
+
+
+def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
+    """Get scraper objects for the given comics."""
+    if '@' in comics:
+        # only scrapers whose directory already exists
+        if len(comics) > 1:
+            out.warn("using '@' as comic name ignores all other specified comics.")
+        for scraperclass in scraper.get_scraperclasses():
+            dirname = getDirname(scraperclass.getName())
+            if os.path.isdir(os.path.join(basepath, dirname)):
+                if not adult and scraperclass.adult:
+                    warn_adult(scraperclass)
+                    continue
+                yield scraperclass()
+    elif '@@' in comics:
+        # all scrapers
+        for scraperclass in scraper.get_scraperclasses():
+            if not adult and scraperclass.adult:
+                warn_adult(scraperclass)
+                continue
+            yield scraperclass()
+    else:
+        # get only selected comic scrapers
+        # store them in a set to eliminate duplicates
+        scrapers = set()
+        for comic in comics:
+            if basepath and comic.startswith(basepath):
+                # make the following command work:
+                # find Comics -type d | xargs -n1 -P10 dosage -b Comics
+                comic = comic[len(basepath):].lstrip(os.sep)
+            if ':' in comic:
+                name, index = comic.split(':', 1)
+                indexes = index.split(',')
+            else:
+                name = comic
+                indexes = None
+            scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
+            for scraperclass in scraperclasses:
+                if not adult and scraperclass.adult:
+                    warn_adult(scraperclass)
+                    continue
+                scraperobj = scraperclass(indexes=indexes)
+                if scraperobj not in scrapers:
+                    scrapers.add(scraperobj)
+                    yield scraperobj
+
+
+def warn_adult(scraperclass):
+    """Print warning about adult content."""
+    out.warn("skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
+
+
+def main():
+    """Parse options and execute commands."""
+    try:
+        parser = setupOptions()
+        res = run(parser.parse_args())
+    except KeyboardInterrupt:
+        print("Aborted.")
+        res = 1
+    except Exception:
+        internal_error()
+        res = 2
+    return res
+
+
+def profile():
+    """Profile the loading of all scrapers."""
+    import cProfile
+    cProfile.run("scraper.get_scraperclasses()", "dosage.prof")
+
+
+def viewprof():
+    """View profile stats."""
+    import pstats
+    stats = pstats.Stats("dosage.prof")
+    stats.strip_dirs().sort_stats("cumulative").print_stats(100)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+    #profile()
+    #viewprof()