Implement parallel downloading.
This commit is contained in:
parent
365fd17802
commit
1a3d3f517b
1 changed files with 75 additions and 5 deletions
80
dosage
80
dosage
|
@ -14,7 +14,16 @@ import sys
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import pydoc
|
import pydoc
|
||||||
|
import threading
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
try:
|
||||||
|
from Queue import Queue, Empty
|
||||||
|
except ImportError:
|
||||||
|
from queue import Queue, Empty
|
||||||
|
try:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
except ImportError:
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
from dosagelib import events, scraper, configuration, singleton
|
from dosagelib import events, scraper, configuration, singleton
|
||||||
from dosagelib.output import out
|
from dosagelib.output import out
|
||||||
|
@ -189,6 +198,50 @@ def displayComicHelp(scraperobj):
|
||||||
out.context = u''
|
out.context = u''
|
||||||
|
|
||||||
|
|
||||||
|
# the comic scraper job queue
|
||||||
|
jobs = Queue()
|
||||||
|
# ensure threads download only from one host at a time
|
||||||
|
host_locks = {}
|
||||||
|
|
||||||
|
|
||||||
|
def get_hostname(url):
|
||||||
|
"""Get hostname from URL."""
|
||||||
|
return list(urlparse(url))[1].lower()
|
||||||
|
|
||||||
|
|
||||||
|
lock = threading.Lock()
|
||||||
|
def get_host_lock(url):
|
||||||
|
hostname = get_hostname(url)
|
||||||
|
return host_locks.setdefault(hostname, threading.Lock())
|
||||||
|
|
||||||
|
comic_errors = 0
|
||||||
|
|
||||||
|
|
||||||
|
class ComicGetter(threading.Thread):
|
||||||
|
"""Get all strips of a comic in a thread."""
|
||||||
|
|
||||||
|
def __init__(self, options):
|
||||||
|
"""Store options."""
|
||||||
|
super(ComicGetter, self).__init__()
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""Process from queue until it is empty."""
|
||||||
|
global comic_errors
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
scraperobj = jobs.get(False)
|
||||||
|
with lock:
|
||||||
|
host_lock = get_host_lock(scraperobj.url)
|
||||||
|
with host_lock:
|
||||||
|
errors = getStrips(scraperobj, self.options)
|
||||||
|
with lock:
|
||||||
|
comic_errors += errors
|
||||||
|
jobs.task_done()
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def getComics(options):
|
def getComics(options):
|
||||||
"""Retrieve comics."""
|
"""Retrieve comics."""
|
||||||
if options.handler:
|
if options.handler:
|
||||||
|
@ -198,19 +251,34 @@ def getComics(options):
|
||||||
errors = 0
|
errors = 0
|
||||||
try:
|
try:
|
||||||
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||||
if options.vote:
|
jobs.put(scraperobj)
|
||||||
errors += vote(scraperobj)
|
# start threads
|
||||||
else:
|
num_threads = max(1, min(10, jobs.qsize()))
|
||||||
errors += getStrips(scraperobj, options)
|
for i in range(num_threads):
|
||||||
|
ComicGetter(options).start()
|
||||||
|
# wait for threads to finish
|
||||||
|
jobs.join()
|
||||||
except ValueError as msg:
|
except ValueError as msg:
|
||||||
out.exception(msg)
|
out.exception(msg)
|
||||||
errors += 1
|
errors += 1
|
||||||
finally:
|
finally:
|
||||||
events.getHandler().end()
|
events.getHandler().end()
|
||||||
|
return errors + comic_errors
|
||||||
|
|
||||||
|
|
||||||
|
def voteComics(options):
|
||||||
|
"""Vote for comics."""
|
||||||
|
errors = 0
|
||||||
|
try:
|
||||||
|
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||||
|
errors += voteComic(scraperobj)
|
||||||
|
except ValueError as msg:
|
||||||
|
out.exception(msg)
|
||||||
|
errors += 1
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
def vote(scraperobj):
|
def voteComic(scraperobj):
|
||||||
"""Vote for given comic scraper."""
|
"""Vote for given comic scraper."""
|
||||||
errors = 0
|
errors = 0
|
||||||
name = scraperobj.getName()
|
name = scraperobj.getName()
|
||||||
|
@ -285,6 +353,8 @@ def run(options):
|
||||||
return 1
|
return 1
|
||||||
if options.modulehelp:
|
if options.modulehelp:
|
||||||
return displayHelp(options)
|
return displayHelp(options)
|
||||||
|
if options.vote:
|
||||||
|
return voteComics(options)
|
||||||
return getComics(options)
|
return getComics(options)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue