dosage/dosage

#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
#     ___
#    /   \___  ___  __ _  __ _  ___
#   / /\ / _ \/ __|/ _` |/ _` |/ _ \
#  / /_// (_) \__ \ (_| | (_| |  __/
# /___,' \___/|___/\__,_|\__, |\___|
#                        |___/

from __future__ import division, print_function
import sys
import os
import argparse
import pydoc
import threading
from io import StringIO
try:
    from Queue import Queue, Empty
except ImportError:
    from queue import Queue, Empty
try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse

from dosagelib import events, scraper, configuration, singleton
from dosagelib.output import out
from dosagelib.util import internal_error, getDirname, strlimit, getLangName
from dosagelib.ansicolor import get_columns


class ArgumentParser(argparse.ArgumentParser):
    """Custom argument parser."""

    def print_help(self, file=None):
        """Paginate help message on TTYs."""
        msg = self.format_help()
        if file is None:
            file = sys.stdout
        if hasattr(file, "isatty") and file.isatty():
            pydoc.pager(msg)
        else:
            print(msg, file=file)


Examples = """\
EXAMPLES
List available comics (ca. 3000 at the moment):
  dosage -l

Get the latest comic of for example CalvinAndHobbes and save it in the "Comics"
directory:
  dosage CalvinAndHobbes

If you already have downloaded several comics and want to get the latest
strips of all of them:
  dosage --continue @
"""


def setupOptions():
    """Construct option parser.
    @return: new option parser
    @rtype argparse.ArgumentParser
    """
    kwargs = dict(
        description = "A comic downloader and archiver.",
        epilog = Examples,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser = ArgumentParser(**kwargs)
    parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity')
    parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
    parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips')
    parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found')
    parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
    parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
    parser.add_argument('-l', '--list', action='store_true', help='list available comic modules')
    parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list')
    parser.add_argument('--version', action='store_true', help='display the version number')
    parser.add_argument('--vote', action='store_true', help='vote for the selected comics')
    parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules')
    parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level')
    parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics')
    parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content')
    # used for development testing prev/next matching
    parser.add_argument('--dry-run', action='store_true', help=argparse.SUPPRESS)
    # multimatch is only used for development, eg. testing if all comics of a scripted plugin are working
    parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS)
    parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)')
    try:
        import argcomplete
        argcomplete.autocomplete(parser)
    except ImportError:
        pass
    return parser


def displayVersion(verbose):
    """Display application name, version, copyright and license."""
    print(configuration.App)
    print(configuration.Copyright)
    print(configuration.Freeware)
    print("For support see", configuration.SupportUrl)
    if verbose:
        # search for updates
        from dosagelib.updater import check_update
        result, value = check_update()
        if result:
            if value:
                version, url = value
                if url is None:
                    # current version is newer than online version
                    text = ('Detected local or development version %(currentversion)s. '
                            'Available version of %(app)s is %(version)s.')
                else:
                    # display update link
                    text = ('A new version %(version)s of %(app)s is '
                            'available at %(url)s.')
                attrs = dict(version=version, app=configuration.AppName,
                             url=url, currentversion=configuration.Version)
                print(text % attrs)
        else:
            if value is None:
                value = 'invalid update file syntax'
            text = ('An error occured while checking for an '
                    'update of %(app)s: %(error)s.')
            attrs = dict(error=value, app=configuration.AppName)
            print(text % attrs)
    return 0


def setOutputInfo(options):
    """Set global output level and timestamp option."""
    out.level = 0
    out.level += options.verbose
    out.timestamps = options.timestamps
    # debug urllib3
    #from requests.packages.urllib3 import add_stderr_logger
    #add_stderr_logger()


def saveComicStrip(strip, basepath, dryrun):
    """Save a comic strip which can consist of multiple images."""
    errors = 0
    allskipped = True
    for image in strip.getImages():
        try:
            if dryrun:
                filename, saved = "", False
            else:
                filename, saved = image.save(basepath)
            if saved:
                allskipped = False
        except Exception as msg:
            out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
            errors += 1
    return errors, allskipped


def displayHelp(options):
    """Print help for comic strips."""
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath):
            errors += displayComicHelp(scraperobj)
    except ValueError as msg:
        out.exception(msg)
        return 2
    return errors


def displayComicHelp(scraperobj):
    """Print description and help for a comic."""
    out.context = getScraperName(scraperobj)
    try:
        out.info(u"URL: " + scraperobj.url)
        if scraperobj.description:
            out.info(u"Description: " + scraperobj.description)
        if scraperobj.lang:
            out.info(u"Language: " + getLangName(scraperobj.lang))
        if scraperobj.genres:
            out.info(u"Genres: " + ", ".join(scraperobj.genres))
        if scraperobj.help:
            for line in scraperobj.help.splitlines():
                out.info(line)
        return 0
    except ValueError as msg:
        out.exception(msg)
        return 1
    finally:
        out.context = u''


# the comic scraper job queue
jobs = Queue()
# ensure threads download only from one host at a time
host_locks = {}


def get_hostname(url):
    """Get hostname from URL."""
    return list(urlparse(url))[1].lower()


lock = threading.Lock()
def get_host_lock(url):
    hostname = get_hostname(url)
    return host_locks.setdefault(hostname, threading.Lock())

comic_errors = 0


class ComicGetter(threading.Thread):
    """Get all strips of a comic in a thread."""

    def __init__(self, options):
        """Store options."""
        super(ComicGetter, self).__init__()
        self.options = options
        self.origname = self.getName()

    def run(self):
        """Process from queue until it is empty."""
        global comic_errors
        while True:
            try:
                scraperobj = jobs.get(False)
                self.setName(scraperobj.getName())
                with lock:
                    host_lock = get_host_lock(scraperobj.url)
                with host_lock:
                    errors = getStrips(scraperobj, self.options)
                with lock:
                    comic_errors += errors
                jobs.task_done()
                self.setName(self.origname)
            except Empty:
                break


def getComics(options):
    """Retrieve comics."""
    if options.handler:
        for name in set(options.handler):
            events.addHandler(name, options.basepath, options.baseurl)
    events.getHandler().start()
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
            jobs.put(scraperobj)
        # start threads
        num_threads = max(1, min(10, jobs.qsize()))
        for i in range(num_threads):
            ComicGetter(options).start()
        # wait for threads to finish
        jobs.join()
    except ValueError as msg:
        out.exception(msg)
        errors += 1
    finally:
        events.getHandler().end()
    return errors + comic_errors


def voteComics(options):
    """Vote for comics."""
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
            errors += voteComic(scraperobj)
    except ValueError as msg:
        out.exception(msg)
        errors += 1
    return errors


def voteComic(scraperobj):
    """Vote for given comic scraper."""
    errors = 0
    out.context = getScraperName(scraperobj)
    try:
        name = scraperobj.getName()
        answer = scraperobj.vote()
        out.debug(u'Vote answer %r' % answer)
        if answer == 'counted':
            url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
            out.info(u'Vote submitted. Votes are updated regularly at %s.' % url)
        elif answer == 'no':
            out.info(u'Vote not submitted - your vote has already been submitted before.')
        elif answer == 'noname':
            out.warn(u'The comic %s cannot be voted.' % name)
        else:
            out.warn(u'Error submitting vote parameters: %r' % answer)
    except Exception as msg:
        out.exception(msg)
        errors += 1
    finally:
        out.context = u''
    return errors


def getStrips(scraperobj, options):
    """Get all strips from a scraper."""
    errors = 0
    if options.all or options.cont:
        numstrips = None
    elif options.numstrips:
        numstrips = options.numstrips
    else:
        # get current strip
        numstrips = 1
    try:
        if scraperobj.isComplete(options.basepath):
            out.info(u"All comics are already downloaded.")
            return 0
        for strip in scraperobj.getStrips(numstrips):
            _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
            errors += _errors
            if skipped and options.cont:
                # stop when retrieval skipped an image for one comic strip
                out.info(u"Stop retrieval because image file already exists")
                break
        if options.all and not (errors or options.dry_run or
                                options.cont or scraperobj.indexes):
            scraperobj.setComplete(options.basepath)
    except Exception as msg:
        out.exception(msg)
        errors += 1
    return errors


def run(options):
    """Execute comic commands."""
    setOutputInfo(options)
    # ensure only one instance of dosage is running
    me = singleton.SingleInstance()
    if options.version:
        return displayVersion(options.verbose)
    if options.list:
        return doList()
    if options.singlelist:
        return doList(columnList=False, verbose=options.verbose)
    # after this a list of comic strips is needed
    if not options.comic:
        out.warn(u'No comics specified, bailing out!')
        return 1
    if options.modulehelp:
        return displayHelp(options)
    if options.vote:
        return voteComics(options)
    return getComics(options)


def doList(columnList=True, verbose=False):
    """List available comics."""
    page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
    if page:
        fd = StringIO(u'')
        out.setStream(fd)
    out.info(u'Available comic scrapers:')
    out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
    out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
    scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
    if columnList:
        num = doColumnList(scrapers)
    else:
        num = doSingleList(scrapers, verbose=verbose)
    out.info(u'%d supported comics.' % num)
    if page:
        pydoc.pager(fd.getvalue())
    return 0


def doSingleList(scrapers, verbose=False):
    """Get list of scraper names, one per line."""
    for num, scraperobj in enumerate(scrapers):
        if verbose:
            displayComicHelp(scraperobj)
        else:
            out.info(getScraperName(scraperobj))
    return num


def doColumnList(scrapers):
    """Get list of scraper names with multiple names per line."""
    screenWidth = get_columns(sys.stdout)
    # limit name length so at least two columns are there
    limit = (screenWidth // 2) - 8
    names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
    num = len(names)
    maxlen = max(len(name) for name in names)
    namesPerLine = max(screenWidth // (maxlen + 1), 1)
    while names:
        out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
        del names[:namesPerLine]
    return num

TAG_ADULT = "adult"
TAG_LANG = "lang"

def getScraperName(scraperobj, limit=None):
    """Get comic scraper name."""
    tags = []
    if scraperobj.adult:
        tags.append(TAG_ADULT)
    if scraperobj.lang != "en":
        tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
    if tags:
        suffix = " [" + ", ".join(tags) + "]"
    else:
        suffix = ""
    name = scraperobj.getName()
    if limit is not None:
        name = strlimit(name, limit)
    return name + suffix


def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
    """Get scraper objects for the given comics."""
    if '@' in comics:
        # only scrapers whose directory already exists
        if len(comics) > 1:
            out.warn(u"using '@' as comic name ignores all other specified comics.")
        for scraperclass in scraper.get_scraperclasses():
            dirname = getDirname(scraperclass.getName())
            if os.path.isdir(os.path.join(basepath, dirname)):
                if not adult and scraperclass.adult:
                    warn_adult(scraperclass)
                    continue
                yield scraperclass()
    elif '@@' in comics:
        # all scrapers
        for scraperclass in scraper.get_scraperclasses():
            if not adult and scraperclass.adult:
                warn_adult(scraperclass)
                continue
            yield scraperclass()
    else:
        # get only selected comic scrapers
        # store them in a set to eliminate duplicates
        scrapers = set()
        for comic in comics:
            # Helpful when using shell completion to pick comics to get
            comic.rstrip(os.path.sep)
            if basepath and comic.startswith(basepath):
                # make the following command work:
                # find Comics -type d | xargs -n1 -P10 dosage -b Comics
                comic = comic[len(basepath):].lstrip(os.sep)
            if ':' in comic:
                name, index = comic.split(':', 1)
                indexes = index.split(',')
            else:
                name = comic
                indexes = None
            scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
            for scraperclass in scraperclasses:
                if not adult and scraperclass.adult:
                    warn_adult(scraperclass)
                    continue
                scraperobj = scraperclass(indexes=indexes)
                if scraperobj not in scrapers:
                    scrapers.add(scraperobj)
                    yield scraperobj


def warn_adult(scraperclass):
    """Print warning about adult content."""
    out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())


def main():
    """Parse options and execute commands."""
    try:
        parser = setupOptions()
        res = run(parser.parse_args())
    except KeyboardInterrupt:
        print("Aborted.")
        res = 1
    except Exception:
        internal_error()
        res = 2
    return res


def profile():
    """Profile the loading of all scrapers."""
    import cProfile
    cProfile.run("scraper.get_scraperclasses()", "dosage.prof")


def viewprof():
    """View profile stats."""
    import pstats
    stats = pstats.Stats("dosage.prof")
    stats.strip_dirs().sort_stats("cumulative").print_stats(100)


if __name__ == '__main__':
    sys.exit(main())
    #profile()
    #viewprof()