Make threads interruptable.
This commit is contained in:
parent
1a96b63137
commit
15ef59262a
3 changed files with 251 additions and 190 deletions
|
@ -1,10 +1,12 @@
|
|||
Dosage 2.13 (released xx.xx.2014)
|
||||
|
||||
Features:
|
||||
- comics: Added OhJoySexToy, TheGentlemansArmchair.
|
||||
- comics: Added OhJoySexToy, TheGentlemansArmchair, Underling, DongeonsAndDenizens,
|
||||
GrimTalesFromDownBelow, TheLandscaper, DieFruehreifen, MonsieurLeChien.
|
||||
|
||||
Fixes:
|
||||
- comics: Fixed EvilInc, FredoAndPidjin.
|
||||
- cmdline: Make download threads interruptable with Ctrl-C
|
||||
|
||||
|
||||
Dosage 2.12 (released 24.1.2014)
|
||||
|
|
195
dosage
195
dosage
|
@ -14,20 +14,10 @@ import sys
|
|||
import os
|
||||
import argparse
|
||||
import pydoc
|
||||
import threading
|
||||
from io import StringIO
|
||||
try:
|
||||
from Queue import Queue, Empty
|
||||
except ImportError:
|
||||
from queue import Queue, Empty
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
except ImportError:
|
||||
from urlparse import urlparse
|
||||
|
||||
from dosagelib import events, scraper, configuration, singleton
|
||||
from dosagelib import events, configuration, singleton, director
|
||||
from dosagelib.output import out
|
||||
from dosagelib.util import internal_error, getDirname, strlimit, getLangName
|
||||
from dosagelib.util import internal_error, strlimit, getLangName
|
||||
from dosagelib.ansicolor import get_columns
|
||||
|
||||
|
||||
|
@ -142,29 +132,11 @@ def setOutputInfo(options):
|
|||
#add_stderr_logger()
|
||||
|
||||
|
||||
def saveComicStrip(strip, basepath, dryrun):
|
||||
"""Save a comic strip which can consist of multiple images."""
|
||||
errors = 0
|
||||
allskipped = True
|
||||
for image in strip.getImages():
|
||||
try:
|
||||
if dryrun:
|
||||
filename, saved = "", False
|
||||
else:
|
||||
filename, saved = image.save(basepath)
|
||||
if saved:
|
||||
allskipped = False
|
||||
except Exception as msg:
|
||||
out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
|
||||
errors += 1
|
||||
return errors, allskipped
|
||||
|
||||
|
||||
def displayHelp(options):
|
||||
"""Print help for comic strips."""
|
||||
errors = 0
|
||||
try:
|
||||
for scraperobj in getScrapers(options.comic, options.basepath):
|
||||
for scraperobj in director.getScrapers(options.comic, options.basepath):
|
||||
errors += displayComicHelp(scraperobj)
|
||||
except ValueError as msg:
|
||||
out.exception(msg)
|
||||
|
@ -195,83 +167,11 @@ def displayComicHelp(scraperobj):
|
|||
out.context = orig_context
|
||||
|
||||
|
||||
# the comic scraper job queue
|
||||
jobs = Queue()
|
||||
# ensure threads download only from one host at a time
|
||||
host_locks = {}
|
||||
|
||||
|
||||
def get_hostname(url):
|
||||
"""Get hostname from URL."""
|
||||
return list(urlparse(url))[1].lower()
|
||||
|
||||
|
||||
lock = threading.Lock()
|
||||
def get_host_lock(url):
|
||||
"""Get lock object for given URL host."""
|
||||
hostname = get_hostname(url)
|
||||
return host_locks.setdefault(hostname, threading.Lock())
|
||||
|
||||
comic_errors = 0
|
||||
|
||||
|
||||
class ComicGetter(threading.Thread):
|
||||
"""Get all strips of a comic in a thread."""
|
||||
|
||||
def __init__(self, options):
|
||||
"""Store options."""
|
||||
super(ComicGetter, self).__init__()
|
||||
self.options = options
|
||||
self.origname = self.getName()
|
||||
|
||||
def run(self):
|
||||
"""Process from queue until it is empty."""
|
||||
global comic_errors
|
||||
while True:
|
||||
try:
|
||||
scraperobj = jobs.get(False)
|
||||
self.setName(scraperobj.getName())
|
||||
with lock:
|
||||
host_lock = get_host_lock(scraperobj.url)
|
||||
with host_lock:
|
||||
errors = getStrips(scraperobj, self.options)
|
||||
with lock:
|
||||
comic_errors += errors
|
||||
jobs.task_done()
|
||||
self.setName(self.origname)
|
||||
except Empty:
|
||||
break
|
||||
|
||||
|
||||
def getComics(options):
|
||||
"""Retrieve comics."""
|
||||
if options.handler:
|
||||
for name in set(options.handler):
|
||||
events.addHandler(name, options.basepath, options.baseurl)
|
||||
events.getHandler().start()
|
||||
errors = 0
|
||||
try:
|
||||
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||
jobs.put(scraperobj)
|
||||
# start threads
|
||||
num_threads = max(1, min(10, jobs.qsize()))
|
||||
for i in range(num_threads):
|
||||
ComicGetter(options).start()
|
||||
# wait for threads to finish
|
||||
jobs.join()
|
||||
except ValueError as msg:
|
||||
out.exception(msg)
|
||||
errors += 1
|
||||
finally:
|
||||
events.getHandler().end()
|
||||
return errors + comic_errors
|
||||
|
||||
|
||||
def voteComics(options):
|
||||
"""Vote for comics."""
|
||||
errors = 0
|
||||
try:
|
||||
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||
for scraperobj in director.getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||
errors += voteComic(scraperobj)
|
||||
except ValueError as msg:
|
||||
out.exception(msg)
|
||||
|
@ -305,36 +205,6 @@ def voteComic(scraperobj):
|
|||
return errors
|
||||
|
||||
|
||||
def getStrips(scraperobj, options):
|
||||
"""Get all strips from a scraper."""
|
||||
errors = 0
|
||||
if options.all or options.cont:
|
||||
numstrips = None
|
||||
elif options.numstrips:
|
||||
numstrips = options.numstrips
|
||||
else:
|
||||
# get current strip
|
||||
numstrips = 1
|
||||
try:
|
||||
if scraperobj.isComplete(options.basepath):
|
||||
out.info(u"All comics are already downloaded.")
|
||||
return 0
|
||||
for strip in scraperobj.getStrips(numstrips):
|
||||
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
|
||||
errors += _errors
|
||||
if skipped and options.cont:
|
||||
# stop when retrieval skipped an image for one comic strip
|
||||
out.info(u"Stop retrieval because image file already exists")
|
||||
break
|
||||
if options.all and not (errors or options.dry_run or
|
||||
options.cont or scraperobj.indexes):
|
||||
scraperobj.setComplete(options.basepath)
|
||||
except Exception as msg:
|
||||
out.exception(msg)
|
||||
errors += 1
|
||||
return errors
|
||||
|
||||
|
||||
def run(options):
|
||||
"""Execute comic commands."""
|
||||
setOutputInfo(options)
|
||||
|
@ -354,7 +224,7 @@ def run(options):
|
|||
return displayHelp(options)
|
||||
if options.vote:
|
||||
return voteComics(options)
|
||||
return getComics(options)
|
||||
return director.getComics(options)
|
||||
|
||||
|
||||
def doList(columnList=True, verbose=False):
|
||||
|
@ -369,7 +239,7 @@ def doList(columnList=True, verbose=False):
|
|||
out.info(u'Available comic scrapers:')
|
||||
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
|
||||
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
||||
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
|
||||
scrapers = sorted(director.getAllScrapers(), key=lambda s: s.getName())
|
||||
if columnList:
|
||||
num = doColumnList(scrapers)
|
||||
else:
|
||||
|
@ -426,59 +296,6 @@ def getScraperName(scraperobj, limit=None):
|
|||
return name + suffix
|
||||
|
||||
|
||||
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
|
||||
"""Get scraper objects for the given comics."""
|
||||
if '@' in comics:
|
||||
# only scrapers whose directory already exists
|
||||
if len(comics) > 1:
|
||||
out.warn(u"using '@' as comic name ignores all other specified comics.")
|
||||
for scraperclass in scraper.get_scraperclasses():
|
||||
dirname = getDirname(scraperclass.getName())
|
||||
if os.path.isdir(os.path.join(basepath, dirname)):
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
yield scraperclass()
|
||||
elif '@@' in comics:
|
||||
# all scrapers
|
||||
for scraperclass in scraper.get_scraperclasses():
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
yield scraperclass()
|
||||
else:
|
||||
# get only selected comic scrapers
|
||||
# store them in a set to eliminate duplicates
|
||||
scrapers = set()
|
||||
for comic in comics:
|
||||
# Helpful when using shell completion to pick comics to get
|
||||
comic.rstrip(os.path.sep)
|
||||
if basepath and comic.startswith(basepath):
|
||||
# make the following command work:
|
||||
# find Comics -type d | xargs -n1 -P10 dosage -b Comics
|
||||
comic = comic[len(basepath):].lstrip(os.sep)
|
||||
if ':' in comic:
|
||||
name, index = comic.split(':', 1)
|
||||
indexes = index.split(',')
|
||||
else:
|
||||
name = comic
|
||||
indexes = None
|
||||
scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
|
||||
for scraperclass in scraperclasses:
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
scraperobj = scraperclass(indexes=indexes)
|
||||
if scraperobj not in scrapers:
|
||||
scrapers.add(scraperobj)
|
||||
yield scraperobj
|
||||
|
||||
|
||||
def warn_adult(scraperclass):
|
||||
"""Print warning about adult content."""
|
||||
out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
|
||||
|
||||
|
||||
def main():
|
||||
"""Parse options and execute commands."""
|
||||
try:
|
||||
|
|
242
dosagelib/director.py
Normal file
242
dosagelib/director.py
Normal file
|
@ -0,0 +1,242 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2014 Bastian Kleineidam
|
||||
import threading
|
||||
import thread
|
||||
import os
|
||||
try:
|
||||
from Queue import Queue, Empty
|
||||
except ImportError:
|
||||
from queue import Queue, Empty
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
except ImportError:
|
||||
from urlparse import urlparse
|
||||
from .output import out
|
||||
from . import events, scraper
|
||||
from .util import getDirname
|
||||
|
||||
|
||||
class ComicQueue(Queue):
|
||||
"""The comic scraper job queue."""
|
||||
|
||||
def join(self, timeout=None):
|
||||
"""Blocks until all items in the Queue have been gotten and processed.
|
||||
|
||||
The count of unfinished tasks goes up whenever an item is added to the
|
||||
queue. The count goes down whenever a consumer thread calls task_done()
|
||||
to indicate the item was retrieved and all work on it is complete.
|
||||
|
||||
When the count of unfinished tasks drops to zero, join() unblocks.
|
||||
"""
|
||||
self.all_tasks_done.acquire()
|
||||
try:
|
||||
while self.unfinished_tasks:
|
||||
self.all_tasks_done.wait(timeout)
|
||||
finally:
|
||||
self.all_tasks_done.release()
|
||||
|
||||
def clear(self):
|
||||
"""Remove all queue entries."""
|
||||
self.mutex.acquire()
|
||||
self.queue.clear()
|
||||
self.mutex.release()
|
||||
|
||||
|
||||
# ensure threads download only from one host at a time
|
||||
host_locks = {}
|
||||
|
||||
|
||||
def get_hostname(url):
|
||||
"""Get hostname from URL."""
|
||||
return list(urlparse(url))[1].lower()
|
||||
|
||||
|
||||
lock = threading.Lock()
|
||||
def get_host_lock(url):
|
||||
"""Get lock object for given URL host."""
|
||||
hostname = get_hostname(url)
|
||||
return host_locks.setdefault(hostname, threading.Lock())
|
||||
|
||||
|
||||
class ComicGetter(threading.Thread):
|
||||
"""Get all strips of a comic in a thread."""
|
||||
|
||||
def __init__(self, options):
|
||||
"""Store options."""
|
||||
super(ComicGetter, self).__init__()
|
||||
self.options = options
|
||||
self.origname = self.getName()
|
||||
self.stopped = False
|
||||
self.errors = 0
|
||||
|
||||
def run(self):
|
||||
"""Process from queue until it is empty."""
|
||||
try:
|
||||
while not self.stopped:
|
||||
scraperobj = jobs.get(False)
|
||||
self.setName(scraperobj.getName())
|
||||
try:
|
||||
self.getStrips(scraperobj)
|
||||
finally:
|
||||
jobs.task_done()
|
||||
self.setName(self.origname)
|
||||
except Empty:
|
||||
pass
|
||||
except KeyboardInterrupt:
|
||||
thread.interrupt_main()
|
||||
|
||||
def getStrips(self, scraperobj):
|
||||
"""Download comic strips."""
|
||||
with lock:
|
||||
host_lock = get_host_lock(scraperobj.url)
|
||||
with host_lock:
|
||||
self._getStrips(scraperobj)
|
||||
|
||||
def _getStrips(self, scraperobj):
|
||||
"""Get all strips from a scraper."""
|
||||
if self.options.all or self.options.cont:
|
||||
numstrips = None
|
||||
elif self.options.numstrips:
|
||||
numstrips = self.options.numstrips
|
||||
else:
|
||||
# get current strip
|
||||
numstrips = 1
|
||||
try:
|
||||
if scraperobj.isComplete(self.options.basepath):
|
||||
out.info(u"All comics are already downloaded.")
|
||||
return 0
|
||||
for strip in scraperobj.getStrips(numstrips):
|
||||
skipped = self.saveComicStrip(strip)
|
||||
if skipped and self.options.cont:
|
||||
# stop when retrieval skipped an image for one comic strip
|
||||
out.info(u"Stop retrieval because image file already exists")
|
||||
break
|
||||
if self.stopped:
|
||||
break
|
||||
if self.options.all and not (self.errors or self.options.dry_run or
|
||||
self.options.cont or scraperobj.indexes):
|
||||
scraperobj.setComplete(self.options.basepath)
|
||||
except Exception as msg:
|
||||
out.exception(msg)
|
||||
self.errors += 1
|
||||
|
||||
def saveComicStrip(self, strip):
|
||||
"""Save a comic strip which can consist of multiple images."""
|
||||
allskipped = True
|
||||
for image in strip.getImages():
|
||||
try:
|
||||
if self.options.dry_run:
|
||||
filename, saved = "", False
|
||||
else:
|
||||
filename, saved = image.save(self.options.basepath)
|
||||
if saved:
|
||||
allskipped = False
|
||||
if self.stopped:
|
||||
break
|
||||
except Exception as msg:
|
||||
out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
|
||||
self.errors += 1
|
||||
return allskipped
|
||||
|
||||
def stop(self):
|
||||
self.stopped = True
|
||||
|
||||
|
||||
jobs = ComicQueue()
|
||||
threads = []
|
||||
|
||||
|
||||
def getComics(options):
|
||||
"""Retrieve comics."""
|
||||
if options.handler:
|
||||
for name in set(options.handler):
|
||||
events.addHandler(name, options.basepath, options.baseurl)
|
||||
events.getHandler().start()
|
||||
errors = 0
|
||||
try:
|
||||
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||
jobs.put(scraperobj)
|
||||
# start threads
|
||||
num_threads = 1# XXX max(1, min(10, jobs.qsize()))
|
||||
for i in range(num_threads):
|
||||
t = ComicGetter(options)
|
||||
threads.append(t)
|
||||
t.start()
|
||||
# wait for threads to finish
|
||||
jobs.join(1)
|
||||
for t in threads:
|
||||
errors += t.errors
|
||||
except ValueError as msg:
|
||||
out.exception(msg)
|
||||
errors += 1
|
||||
except KeyboardInterrupt:
|
||||
finish()
|
||||
finally:
|
||||
events.getHandler().end()
|
||||
return errors
|
||||
|
||||
|
||||
def finish():
|
||||
out.warn("Interrupted!")
|
||||
for t in threads:
|
||||
t.stop()
|
||||
jobs.clear()
|
||||
out.warn("Waiting for download threads to finish.")
|
||||
|
||||
|
||||
def getAllScrapers():
|
||||
"""Get all scrapers."""
|
||||
return getScrapers(['@@'])
|
||||
|
||||
|
||||
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
|
||||
"""Get scraper objects for the given comics."""
|
||||
if '@' in comics:
|
||||
# only scrapers whose directory already exists
|
||||
if len(comics) > 1:
|
||||
out.warn(u"using '@' as comic name ignores all other specified comics.")
|
||||
for scraperclass in scraper.get_scraperclasses():
|
||||
dirname = getDirname(scraperclass.getName())
|
||||
if os.path.isdir(os.path.join(basepath, dirname)):
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
yield scraperclass()
|
||||
elif '@@' in comics:
|
||||
# all scrapers
|
||||
for scraperclass in scraper.get_scraperclasses():
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
yield scraperclass()
|
||||
else:
|
||||
# get only selected comic scrapers
|
||||
# store them in a set to eliminate duplicates
|
||||
scrapers = set()
|
||||
for comic in comics:
|
||||
# Helpful when using shell completion to pick comics to get
|
||||
comic.rstrip(os.path.sep)
|
||||
if basepath and comic.startswith(basepath):
|
||||
# make the following command work:
|
||||
# find Comics -type d | xargs -n1 -P10 dosage -b Comics
|
||||
comic = comic[len(basepath):].lstrip(os.sep)
|
||||
if ':' in comic:
|
||||
name, index = comic.split(':', 1)
|
||||
indexes = index.split(',')
|
||||
else:
|
||||
name = comic
|
||||
indexes = None
|
||||
scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
|
||||
for scraperclass in scraperclasses:
|
||||
if not adult and scraperclass.adult:
|
||||
warn_adult(scraperclass)
|
||||
continue
|
||||
scraperobj = scraperclass(indexes=indexes)
|
||||
if scraperobj not in scrapers:
|
||||
scrapers.add(scraperobj)
|
||||
yield scraperobj
|
||||
|
||||
|
||||
def warn_adult(scraperclass):
|
||||
"""Print warning about adult content."""
|
||||
out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
|
Loading…
Reference in a new issue