Make threads interruptable.

2014-03-04 18:38:46 +01:00 · 2014-03-04 18:38:46 +01:00 · 15ef59262a
commit 15ef59262a
parent 1a96b63137
3 changed files with 251 additions and 190 deletions
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@ -1,10 +1,12 @@
 Dosage 2.13 (released xx.xx.2014)

 Features:
- comics: Added OhJoySexToy, TheGentlemansArmchair.
+- comics: Added OhJoySexToy, TheGentlemansArmchair, Underling, DongeonsAndDenizens,
+  GrimTalesFromDownBelow, TheLandscaper, DieFruehreifen, MonsieurLeChien.

 Fixes:
 - comics: Fixed EvilInc, FredoAndPidjin.
+- cmdline: Make download threads interruptable with Ctrl-C


 Dosage 2.12 (released 24.1.2014)
--- a/195
+++ b/195
@ -14,20 +14,10 @@ import sys
 import os
 import argparse
 import pydoc
-import threading
 from io import StringIO
-try:
-    from Queue import Queue, Empty
-except ImportError:
-    from queue import Queue, Empty
-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse
-
-from dosagelib import events, scraper, configuration, singleton
+from dosagelib import events, configuration, singleton, director
 from dosagelib.output import out
-from dosagelib.util import internal_error, getDirname, strlimit, getLangName
+from dosagelib.util import internal_error, strlimit, getLangName
 from dosagelib.ansicolor import get_columns


@ -142,29 +132,11 @@ def setOutputInfo(options):
    #add_stderr_logger()


-def saveComicStrip(strip, basepath, dryrun):
-    """Save a comic strip which can consist of multiple images."""
-    errors = 0
-    allskipped = True
-    for image in strip.getImages():
-        try:
-            if dryrun:
-                filename, saved = "", False
-            else:
-                filename, saved = image.save(basepath)
-            if saved:
-                allskipped = False
-        except Exception as msg:
-            out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
-            errors += 1
-    return errors, allskipped
-
-
 def displayHelp(options):
    """Print help for comic strips."""
    errors = 0
    try:
-        for scraperobj in getScrapers(options.comic, options.basepath):
+        for scraperobj in director.getScrapers(options.comic, options.basepath):
            errors += displayComicHelp(scraperobj)
    except ValueError as msg:
        out.exception(msg)
@ -195,83 +167,11 @@ def displayComicHelp(scraperobj):
        out.context = orig_context


-# the comic scraper job queue
-jobs = Queue()
-# ensure threads download only from one host at a time
-host_locks = {}
-
-
-def get_hostname(url):
-    """Get hostname from URL."""
-    return list(urlparse(url))[1].lower()
-
-
-lock = threading.Lock()
-def get_host_lock(url):
-    """Get lock object for given URL host."""
-    hostname = get_hostname(url)
-    return host_locks.setdefault(hostname, threading.Lock())
-
-comic_errors = 0
-
-
-class ComicGetter(threading.Thread):
-    """Get all strips of a comic in a thread."""
-
-    def __init__(self, options):
-        """Store options."""
-        super(ComicGetter, self).__init__()
-        self.options = options
-        self.origname = self.getName()
-
-    def run(self):
-        """Process from queue until it is empty."""
-        global comic_errors
-        while True:
-            try:
-                scraperobj = jobs.get(False)
-                self.setName(scraperobj.getName())
-                with lock:
-                    host_lock = get_host_lock(scraperobj.url)
-                with host_lock:
-                    errors = getStrips(scraperobj, self.options)
-                with lock:
-                    comic_errors += errors
-                jobs.task_done()
-                self.setName(self.origname)
-            except Empty:
-                break
-
-
-def getComics(options):
-    """Retrieve comics."""
-    if options.handler:
-        for name in set(options.handler):
-            events.addHandler(name, options.basepath, options.baseurl)
-    events.getHandler().start()
-    errors = 0
-    try:
-        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
-            jobs.put(scraperobj)
-        # start threads
-        num_threads = max(1, min(10, jobs.qsize()))
-        for i in range(num_threads):
-            ComicGetter(options).start()
-        # wait for threads to finish
-        jobs.join()
-    except ValueError as msg:
-        out.exception(msg)
-        errors += 1
-    finally:
-        events.getHandler().end()
-    return errors + comic_errors
-
-
 def voteComics(options):
    """Vote for comics."""
    errors = 0
    try:
-        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
+        for scraperobj in director.getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
            errors += voteComic(scraperobj)
    except ValueError as msg:
        out.exception(msg)
@ -305,36 +205,6 @@ def voteComic(scraperobj):
    return errors


-def getStrips(scraperobj, options):
-    """Get all strips from a scraper."""
-    errors = 0
-    if options.all or options.cont:
-        numstrips = None
-    elif options.numstrips:
-        numstrips = options.numstrips
-    else:
-        # get current strip
-        numstrips = 1
-    try:
-        if scraperobj.isComplete(options.basepath):
-            out.info(u"All comics are already downloaded.")
-            return 0
-        for strip in scraperobj.getStrips(numstrips):
-            _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
-            errors += _errors
-            if skipped and options.cont:
-                # stop when retrieval skipped an image for one comic strip
-                out.info(u"Stop retrieval because image file already exists")
-                break
-        if options.all and not (errors or options.dry_run or
-                                options.cont or scraperobj.indexes):
-            scraperobj.setComplete(options.basepath)
-    except Exception as msg:
-        out.exception(msg)
-        errors += 1
-    return errors
-
-
 def run(options):
    """Execute comic commands."""
    setOutputInfo(options)
@ -354,7 +224,7 @@ def run(options):
        return displayHelp(options)
    if options.vote:
        return voteComics(options)
-    return getComics(options)
+    return director.getComics(options)


 def doList(columnList=True, verbose=False):
@ -369,7 +239,7 @@ def doList(columnList=True, verbose=False):
        out.info(u'Available comic scrapers:')
        out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
        out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
-        scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
+        scrapers = sorted(director.getAllScrapers(), key=lambda s: s.getName())
        if columnList:
            num = doColumnList(scrapers)
        else:
@ -426,59 +296,6 @@ def getScraperName(scraperobj, limit=None):
    return name + suffix


-def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
-    """Get scraper objects for the given comics."""
-    if '@' in comics:
-        # only scrapers whose directory already exists
-        if len(comics) > 1:
-            out.warn(u"using '@' as comic name ignores all other specified comics.")
-        for scraperclass in scraper.get_scraperclasses():
-            dirname = getDirname(scraperclass.getName())
-            if os.path.isdir(os.path.join(basepath, dirname)):
-                if not adult and scraperclass.adult:
-                    warn_adult(scraperclass)
-                    continue
-                yield scraperclass()
-    elif '@@' in comics:
-        # all scrapers
-        for scraperclass in scraper.get_scraperclasses():
-            if not adult and scraperclass.adult:
-                warn_adult(scraperclass)
-                continue
-            yield scraperclass()
-    else:
-        # get only selected comic scrapers
-        # store them in a set to eliminate duplicates
-        scrapers = set()
-        for comic in comics:
-            # Helpful when using shell completion to pick comics to get
-            comic.rstrip(os.path.sep)
-            if basepath and comic.startswith(basepath):
-                # make the following command work:
-                # find Comics -type d | xargs -n1 -P10 dosage -b Comics
-                comic = comic[len(basepath):].lstrip(os.sep)
-            if ':' in comic:
-                name, index = comic.split(':', 1)
-                indexes = index.split(',')
-            else:
-                name = comic
-                indexes = None
-            scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
-            for scraperclass in scraperclasses:
-                if not adult and scraperclass.adult:
-                    warn_adult(scraperclass)
-                    continue
-                scraperobj = scraperclass(indexes=indexes)
-                if scraperobj not in scrapers:
-                    scrapers.add(scraperobj)
-                    yield scraperobj
-
-
-def warn_adult(scraperclass):
-    """Print warning about adult content."""
-    out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
-
-
 def main():
    """Parse options and execute commands."""
    try:
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@ -0,0 +1,242 @@
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2014 Bastian Kleineidam
+import threading
+import thread
+import os
+try:
+    from Queue import Queue, Empty
+except ImportError:
+    from queue import Queue, Empty
+try:
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse
+from .output import out
+from . import events, scraper
+from .util import getDirname
+
+
+class ComicQueue(Queue):
+    """The comic scraper job queue."""
+
+    def join(self, timeout=None):
+        """Blocks until all items in the Queue have been gotten and processed.
+
+        The count of unfinished tasks goes up whenever an item is added to the
+        queue. The count goes down whenever a consumer thread calls task_done()
+        to indicate the item was retrieved and all work on it is complete.
+
+        When the count of unfinished tasks drops to zero, join() unblocks.
+        """
+        self.all_tasks_done.acquire()
+        try:
+            while self.unfinished_tasks:
+                self.all_tasks_done.wait(timeout)
+        finally:
+            self.all_tasks_done.release()
+
+    def clear(self):
+        """Remove all queue entries."""
+        self.mutex.acquire()
+        self.queue.clear()
+        self.mutex.release()
+
+
+# ensure threads download only from one host at a time
+host_locks = {}
+
+
+def get_hostname(url):
+    """Get hostname from URL."""
+    return list(urlparse(url))[1].lower()
+
+
+lock = threading.Lock()
+def get_host_lock(url):
+    """Get lock object for given URL host."""
+    hostname = get_hostname(url)
+    return host_locks.setdefault(hostname, threading.Lock())
+
+
+class ComicGetter(threading.Thread):
+    """Get all strips of a comic in a thread."""
+
+    def __init__(self, options):
+        """Store options."""
+        super(ComicGetter, self).__init__()
+        self.options = options
+        self.origname = self.getName()
+        self.stopped = False
+        self.errors = 0
+
+    def run(self):
+        """Process from queue until it is empty."""
+        try:
+            while not self.stopped:
+                scraperobj = jobs.get(False)
+                self.setName(scraperobj.getName())
+                try:
+                    self.getStrips(scraperobj)
+                finally:
+                    jobs.task_done()
+                    self.setName(self.origname)
+        except Empty:
+            pass
+        except KeyboardInterrupt:
+            thread.interrupt_main()
+
+    def getStrips(self, scraperobj):
+        """Download comic strips."""
+        with lock:
+            host_lock = get_host_lock(scraperobj.url)
+        with host_lock:
+            self._getStrips(scraperobj)
+
+    def _getStrips(self, scraperobj):
+        """Get all strips from a scraper."""
+        if self.options.all or self.options.cont:
+            numstrips = None
+        elif self.options.numstrips:
+            numstrips = self.options.numstrips
+        else:
+            # get current strip
+            numstrips = 1
+        try:
+            if scraperobj.isComplete(self.options.basepath):
+                out.info(u"All comics are already downloaded.")
+                return 0
+            for strip in scraperobj.getStrips(numstrips):
+                skipped = self.saveComicStrip(strip)
+                if skipped and self.options.cont:
+                    # stop when retrieval skipped an image for one comic strip
+                    out.info(u"Stop retrieval because image file already exists")
+                    break
+                if self.stopped:
+                    break
+            if self.options.all and not (self.errors or self.options.dry_run or
+                                    self.options.cont or scraperobj.indexes):
+                scraperobj.setComplete(self.options.basepath)
+        except Exception as msg:
+            out.exception(msg)
+            self.errors += 1
+
+    def saveComicStrip(self, strip):
+        """Save a comic strip which can consist of multiple images."""
+        allskipped = True
+        for image in strip.getImages():
+            try:
+                if self.options.dry_run:
+                    filename, saved = "", False
+                else:
+                    filename, saved = image.save(self.options.basepath)
+                if saved:
+                    allskipped = False
+                if self.stopped:
+                    break
+            except Exception as msg:
+                out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
+                self.errors += 1
+        return allskipped
+
+    def stop(self):
+        self.stopped = True
+
+
+jobs = ComicQueue()
+threads = []
+
+
+def getComics(options):
+    """Retrieve comics."""
+    if options.handler:
+        for name in set(options.handler):
+            events.addHandler(name, options.basepath, options.baseurl)
+    events.getHandler().start()
+    errors = 0
+    try:
+        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
+            jobs.put(scraperobj)
+        # start threads
+        num_threads = 1# XXX max(1, min(10, jobs.qsize()))
+        for i in range(num_threads):
+            t = ComicGetter(options)
+            threads.append(t)
+            t.start()
+        # wait for threads to finish
+        jobs.join(1)
+        for t in threads:
+            errors += t.errors
+    except ValueError as msg:
+        out.exception(msg)
+        errors += 1
+    except KeyboardInterrupt:
+        finish()
+    finally:
+        events.getHandler().end()
+    return errors
+
+
+def finish():
+    out.warn("Interrupted!")
+    for t in threads:
+        t.stop()
+    jobs.clear()
+    out.warn("Waiting for download threads to finish.")
+
+
+def getAllScrapers():
+    """Get all scrapers."""
+    return getScrapers(['@@'])
+
+
+def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
+    """Get scraper objects for the given comics."""
+    if '@' in comics:
+        # only scrapers whose directory already exists
+        if len(comics) > 1:
+            out.warn(u"using '@' as comic name ignores all other specified comics.")
+        for scraperclass in scraper.get_scraperclasses():
+            dirname = getDirname(scraperclass.getName())
+            if os.path.isdir(os.path.join(basepath, dirname)):
+                if not adult and scraperclass.adult:
+                    warn_adult(scraperclass)
+                    continue
+                yield scraperclass()
+    elif '@@' in comics:
+        # all scrapers
+        for scraperclass in scraper.get_scraperclasses():
+            if not adult and scraperclass.adult:
+                warn_adult(scraperclass)
+                continue
+            yield scraperclass()
+    else:
+        # get only selected comic scrapers
+        # store them in a set to eliminate duplicates
+        scrapers = set()
+        for comic in comics:
+            # Helpful when using shell completion to pick comics to get
+            comic.rstrip(os.path.sep)
+            if basepath and comic.startswith(basepath):
+                # make the following command work:
+                # find Comics -type d | xargs -n1 -P10 dosage -b Comics
+                comic = comic[len(basepath):].lstrip(os.sep)
+            if ':' in comic:
+                name, index = comic.split(':', 1)
+                indexes = index.split(',')
+            else:
+                name = comic
+                indexes = None
+            scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
+            for scraperclass in scraperclasses:
+                if not adult and scraperclass.adult:
+                    warn_adult(scraperclass)
+                    continue
+                scraperobj = scraperclass(indexes=indexes)
+                if scraperobj not in scrapers:
+                    scrapers.add(scraperobj)
+                    yield scraperobj
+
+
+def warn_adult(scraperclass):
+    """Print warning about adult content."""
+    out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())