Implement parallel downloading.

2014-01-05 16:01:11 +01:00 · 2014-01-05 16:01:11 +01:00 · 1a3d3f517b
commit 1a3d3f517b
parent 365fd17802
1 changed files with 75 additions and 5 deletions
--- a/80
+++ b/80
@ -14,7 +14,16 @@ import sys
 import os
 import argparse
 import pydoc
 import threading
 from io import StringIO
 try:
    from Queue import Queue, Empty
 except ImportError:
    from queue import Queue, Empty
 try:
    from urllib.parse import urlparse
 except ImportError:
    from urlparse import urlparse
 from dosagelib import events, scraper, configuration, singleton
 from dosagelib.output import out
@ -189,6 +198,50 @@ def displayComicHelp(scraperobj):
        out.context = u''
 # the comic scraper job queue
 jobs = Queue()
 # ensure threads download only from one host at a time
 host_locks = {}
 def get_hostname(url):
    """Get hostname from URL."""
    return list(urlparse(url))[1].lower()
 lock = threading.Lock()
 def get_host_lock(url):
    hostname = get_hostname(url)
    return host_locks.setdefault(hostname, threading.Lock())
 comic_errors = 0
 class ComicGetter(threading.Thread):
    """Get all strips of a comic in a thread."""
    def __init__(self, options):
        """Store options."""
        super(ComicGetter, self).__init__()
        self.options = options
    def run(self):
        """Process from queue until it is empty."""
        global comic_errors
        while True:
            try:
                scraperobj = jobs.get(False)
                with lock:
                    host_lock = get_host_lock(scraperobj.url)
                with host_lock:
                    errors = getStrips(scraperobj, self.options)
                with lock:
                    comic_errors += errors
                jobs.task_done()
            except Empty:
                break
 def getComics(options):
    """Retrieve comics."""
    if options.handler:
@ -198,19 +251,34 @@ def getComics(options):
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
-            if options.vote:
+            jobs.put(scraperobj)
-                errors += vote(scraperobj)
+        # start threads
-            else:
+        num_threads = max(1, min(10, jobs.qsize()))
-                errors += getStrips(scraperobj, options)
+        for i in range(num_threads):
            ComicGetter(options).start()
        # wait for threads to finish
        jobs.join()
    except ValueError as msg:
        out.exception(msg)
        errors += 1
    finally:
        events.getHandler().end()
    return errors + comic_errors
 def voteComics(options):
    """Vote for comics."""
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
            errors += voteComic(scraperobj)
    except ValueError as msg:
        out.exception(msg)
        errors += 1
    return errors
-def vote(scraperobj):
+def voteComic(scraperobj):
    """Vote for given comic scraper."""
    errors = 0
    name = scraperobj.getName()
@ -285,6 +353,8 @@ def run(options):
        return 1
    if options.modulehelp:
        return displayHelp(options)
    if options.vote:
        return voteComics(options)
    return getComics(options)