Implement parallel downloading.

This commit is contained in:
Bastian Kleineidam 2014-01-05 16:01:11 +01:00
parent 365fd17802
commit 1a3d3f517b

80
dosage
View file

@ -14,7 +14,16 @@ import sys
import os import os
import argparse import argparse
import pydoc import pydoc
import threading
from io import StringIO from io import StringIO
try:
from Queue import Queue, Empty
except ImportError:
from queue import Queue, Empty
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
from dosagelib import events, scraper, configuration, singleton from dosagelib import events, scraper, configuration, singleton
from dosagelib.output import out from dosagelib.output import out
@ -189,6 +198,50 @@ def displayComicHelp(scraperobj):
out.context = u'' out.context = u''
# the comic scraper job queue
jobs = Queue()
# ensure threads download only from one host at a time
host_locks = {}
def get_hostname(url):
"""Get hostname from URL."""
return list(urlparse(url))[1].lower()
lock = threading.Lock()
def get_host_lock(url):
hostname = get_hostname(url)
return host_locks.setdefault(hostname, threading.Lock())
comic_errors = 0
class ComicGetter(threading.Thread):
"""Get all strips of a comic in a thread."""
def __init__(self, options):
"""Store options."""
super(ComicGetter, self).__init__()
self.options = options
def run(self):
"""Process from queue until it is empty."""
global comic_errors
while True:
try:
scraperobj = jobs.get(False)
with lock:
host_lock = get_host_lock(scraperobj.url)
with host_lock:
errors = getStrips(scraperobj, self.options)
with lock:
comic_errors += errors
jobs.task_done()
except Empty:
break
def getComics(options): def getComics(options):
"""Retrieve comics.""" """Retrieve comics."""
if options.handler: if options.handler:
@ -198,19 +251,34 @@ def getComics(options):
errors = 0 errors = 0
try: try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch): for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
if options.vote: jobs.put(scraperobj)
errors += vote(scraperobj) # start threads
else: num_threads = max(1, min(10, jobs.qsize()))
errors += getStrips(scraperobj, options) for i in range(num_threads):
ComicGetter(options).start()
# wait for threads to finish
jobs.join()
except ValueError as msg: except ValueError as msg:
out.exception(msg) out.exception(msg)
errors += 1 errors += 1
finally: finally:
events.getHandler().end() events.getHandler().end()
return errors + comic_errors
def voteComics(options):
"""Vote for comics."""
errors = 0
try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
errors += voteComic(scraperobj)
except ValueError as msg:
out.exception(msg)
errors += 1
return errors return errors
def vote(scraperobj): def voteComic(scraperobj):
"""Vote for given comic scraper.""" """Vote for given comic scraper."""
errors = 0 errors = 0
name = scraperobj.getName() name = scraperobj.getName()
@ -285,6 +353,8 @@ def run(options):
return 1 return 1
if options.modulehelp: if options.modulehelp:
return displayHelp(options) return displayHelp(options)
if options.vote:
return voteComics(options)
return getComics(options) return getComics(options)