New feature: Comic modules ca be "disabled".
This is modeled parallel to the "adult" feature, except the user can't override it via the command line. Each comic module can override the classmethod getDisabledReasons and give the user a reason why this module is disabled. The user can see the reason in the comic list (-l or --singlelist) and the comic module refuses to run, showing the same message. This is currently used to disable modules that use the _ParserScraper if the LXML python module is missing.
This commit is contained in:
parent
d495d95ee0
commit
e92a3fb3a1
3 changed files with 63 additions and 32 deletions
31
dosage
31
dosage
|
@ -136,7 +136,7 @@ def displayHelp(options):
|
||||||
"""Print help for comic strips."""
|
"""Print help for comic strips."""
|
||||||
errors = 0
|
errors = 0
|
||||||
try:
|
try:
|
||||||
for scraperobj in director.getScrapers(options.comic, options.basepath):
|
for scraperobj in director.getScrapers(options.comic, options.basepath, listing=True):
|
||||||
errors += displayComicHelp(scraperobj)
|
errors += displayComicHelp(scraperobj)
|
||||||
except ValueError as msg:
|
except ValueError as msg:
|
||||||
out.exception(msg)
|
out.exception(msg)
|
||||||
|
@ -239,12 +239,17 @@ def doList(columnList=True, verbose=False):
|
||||||
out.info(u'Available comic scrapers:')
|
out.info(u'Available comic scrapers:')
|
||||||
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
|
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
|
||||||
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
||||||
scrapers = sorted(director.getAllScrapers(), key=lambda s: s.getName())
|
scrapers = sorted(director.getAllScrapers(listing=True), key=lambda s: s.getName())
|
||||||
if columnList:
|
if columnList:
|
||||||
num = doColumnList(scrapers)
|
num, disabled = doColumnList(scrapers)
|
||||||
else:
|
else:
|
||||||
num = doSingleList(scrapers, verbose=verbose)
|
num, disabled = doSingleList(scrapers, verbose=verbose)
|
||||||
out.info(u'%d supported comics.' % num)
|
out.info(u'%d supported comics.' % num)
|
||||||
|
if disabled:
|
||||||
|
out.info('')
|
||||||
|
out.info(u'Some comics are disabled, they are tagged with [%s:REASON], where REASON is one of:' % TAG_DISABLED)
|
||||||
|
for k in disabled:
|
||||||
|
out.info(u' %-10s %s' % (k, disabled[k]))
|
||||||
if page:
|
if page:
|
||||||
pydoc.pager(fd.getvalue())
|
pydoc.pager(fd.getvalue())
|
||||||
return 0
|
return 0
|
||||||
|
@ -254,38 +259,46 @@ def doList(columnList=True, verbose=False):
|
||||||
|
|
||||||
def doSingleList(scrapers, verbose=False):
|
def doSingleList(scrapers, verbose=False):
|
||||||
"""Get list of scraper names, one per line."""
|
"""Get list of scraper names, one per line."""
|
||||||
|
disabled = {}
|
||||||
for num, scraperobj in enumerate(scrapers):
|
for num, scraperobj in enumerate(scrapers):
|
||||||
if verbose:
|
if verbose:
|
||||||
displayComicHelp(scraperobj)
|
displayComicHelp(scraperobj)
|
||||||
else:
|
else:
|
||||||
out.info(getScraperName(scraperobj))
|
out.info(getScraperName(scraperobj, reasons=disabled))
|
||||||
return num
|
return num, disabled
|
||||||
|
|
||||||
|
|
||||||
def doColumnList(scrapers):
|
def doColumnList(scrapers):
|
||||||
"""Get list of scraper names with multiple names per line."""
|
"""Get list of scraper names with multiple names per line."""
|
||||||
|
disabled = {}
|
||||||
screenWidth = get_columns(sys.stdout)
|
screenWidth = get_columns(sys.stdout)
|
||||||
# limit name length so at least two columns are there
|
# limit name length so at least two columns are there
|
||||||
limit = (screenWidth // 2) - 8
|
limit = (screenWidth // 2) - 8
|
||||||
names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
|
names = [getScraperName(scraperobj, limit=limit, reasons=disabled) for scraperobj in scrapers]
|
||||||
num = len(names)
|
num = len(names)
|
||||||
maxlen = max(len(name) for name in names)
|
maxlen = max(len(name) for name in names)
|
||||||
namesPerLine = max(screenWidth // (maxlen + 1), 1)
|
namesPerLine = max(screenWidth // (maxlen + 1), 1)
|
||||||
while names:
|
while names:
|
||||||
out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
|
out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
|
||||||
del names[:namesPerLine]
|
del names[:namesPerLine]
|
||||||
return num
|
return num, disabled
|
||||||
|
|
||||||
TAG_ADULT = "adult"
|
TAG_ADULT = "adult"
|
||||||
TAG_LANG = "lang"
|
TAG_LANG = "lang"
|
||||||
|
TAG_DISABLED = "dis"
|
||||||
|
|
||||||
def getScraperName(scraperobj, limit=None):
|
def getScraperName(scraperobj, limit=None, reasons=None):
|
||||||
"""Get comic scraper name."""
|
"""Get comic scraper name."""
|
||||||
tags = []
|
tags = []
|
||||||
if scraperobj.adult:
|
if scraperobj.adult:
|
||||||
tags.append(TAG_ADULT)
|
tags.append(TAG_ADULT)
|
||||||
if scraperobj.lang != "en":
|
if scraperobj.lang != "en":
|
||||||
tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
|
tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
|
||||||
|
disabled = scraperobj.getDisabledReasons()
|
||||||
|
if disabled:
|
||||||
|
reasons.update(disabled)
|
||||||
|
for reason in disabled:
|
||||||
|
tags.append("%s:%s" % (TAG_DISABLED, reason))
|
||||||
if tags:
|
if tags:
|
||||||
suffix = " [" + ", ".join(tags) + "]"
|
suffix = " [" + ", ".join(tags) + "]"
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -189,12 +189,12 @@ def finish():
|
||||||
out.warn("Waiting for download threads to finish.")
|
out.warn("Waiting for download threads to finish.")
|
||||||
|
|
||||||
|
|
||||||
def getAllScrapers():
|
def getAllScrapers(listing=False):
|
||||||
"""Get all scrapers."""
|
"""Get all scrapers."""
|
||||||
return getScrapers(['@@'])
|
return getScrapers(['@@'], listing=listing)
|
||||||
|
|
||||||
|
|
||||||
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
|
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
|
||||||
"""Get scraper objects for the given comics."""
|
"""Get scraper objects for the given comics."""
|
||||||
if '@' in comics:
|
if '@' in comics:
|
||||||
# only scrapers whose directory already exists
|
# only scrapers whose directory already exists
|
||||||
|
@ -203,12 +203,12 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
|
||||||
for scraperclass in scraper.get_scraperclasses():
|
for scraperclass in scraper.get_scraperclasses():
|
||||||
dirname = getDirname(scraperclass.getName())
|
dirname = getDirname(scraperclass.getName())
|
||||||
if os.path.isdir(os.path.join(basepath, dirname)):
|
if os.path.isdir(os.path.join(basepath, dirname)):
|
||||||
if shouldRunScraper(scraperclass, adult):
|
if shouldRunScraper(scraperclass, adult, listing):
|
||||||
yield scraperclass()
|
yield scraperclass()
|
||||||
elif '@@' in comics:
|
elif '@@' in comics:
|
||||||
# all scrapers
|
# all scrapers
|
||||||
for scraperclass in scraper.get_scraperclasses():
|
for scraperclass in scraper.get_scraperclasses():
|
||||||
if shouldRunScraper(scraperclass, adult):
|
if shouldRunScraper(scraperclass, adult, listing):
|
||||||
yield scraperclass()
|
yield scraperclass()
|
||||||
else:
|
else:
|
||||||
# get only selected comic scrapers
|
# get only selected comic scrapers
|
||||||
|
@ -229,20 +229,30 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
|
||||||
indexes = None
|
indexes = None
|
||||||
scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
|
scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
|
||||||
for scraperclass in scraperclasses:
|
for scraperclass in scraperclasses:
|
||||||
if shouldRunScraper(scraperclass, adult):
|
if shouldRunScraper(scraperclass, adult, listing):
|
||||||
scraperobj = scraperclass(indexes=indexes)
|
scraperobj = scraperclass(indexes=indexes)
|
||||||
if scraperobj not in scrapers:
|
if scraperobj not in scrapers:
|
||||||
scrapers.add(scraperobj)
|
scrapers.add(scraperobj)
|
||||||
yield scraperobj
|
yield scraperobj
|
||||||
|
|
||||||
|
|
||||||
def shouldRunScraper(scraperclass, adult=True):
|
def shouldRunScraper(scraperclass, adult=True, listing=False):
|
||||||
|
if listing:
|
||||||
|
return True
|
||||||
if not adult and scraperclass.adult:
|
if not adult and scraperclass.adult:
|
||||||
warn_adult(scraperclass)
|
warn_adult(scraperclass)
|
||||||
return False
|
return False
|
||||||
|
reasons = scraperclass.getDisabledReasons()
|
||||||
|
if reasons:
|
||||||
|
warn_disabled(scraperclass, reasons)
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def warn_adult(scraperclass):
|
def warn_adult(scraperclass):
|
||||||
"""Print warning about adult content."""
|
"""Print warning about adult content."""
|
||||||
out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
|
out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
|
||||||
|
|
||||||
|
def warn_disabled(scraperclass, reasons):
|
||||||
|
"""Print warning about disabled comic modules."""
|
||||||
|
out.warn(u"Skipping comic %s: %s" % (scraperclass.getName(), ' '.join(reasons.values())))
|
||||||
|
|
|
@ -10,6 +10,13 @@ try:
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
|
|
||||||
|
try:
|
||||||
|
from lxml import html
|
||||||
|
from lxml.html.defs import link_attrs as html_link_attrs
|
||||||
|
except ImportError:
|
||||||
|
html = None
|
||||||
|
|
||||||
from . import loader, configuration, util
|
from . import loader, configuration, util
|
||||||
from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
|
from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
|
||||||
getDirname, unescape, tagre, normaliseURL)
|
getDirname, unescape, tagre, normaliseURL)
|
||||||
|
@ -308,6 +315,16 @@ class Scraper(object):
|
||||||
def fetchText(cls, url, data, textSearch, optional):
|
def fetchText(cls, url, data, textSearch, optional):
|
||||||
raise ValueError("No implementation for fetchText!")
|
raise ValueError("No implementation for fetchText!")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getDisabledReasons(cls):
|
||||||
|
"""
|
||||||
|
Get a dict of reasons why this comic module is disabled. The key is a
|
||||||
|
short (unique) identifier, the value is a string explaining why the
|
||||||
|
module is deactivated. If the module is not disabled, just return an
|
||||||
|
empty dict.
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class _BasicScraper(Scraper):
|
class _BasicScraper(Scraper):
|
||||||
"""
|
"""
|
||||||
|
@ -389,23 +406,8 @@ class _ParserScraper(Scraper):
|
||||||
of the HTML element and returns that.
|
of the HTML element and returns that.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def xpath(cls, expr):
|
|
||||||
return expr
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def css(cls, expr, attr=None):
|
|
||||||
return expr
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getPage(cls, url):
|
def getPage(cls, url):
|
||||||
try:
|
|
||||||
from lxml import html
|
|
||||||
except ImportError:
|
|
||||||
raise ValueError(u"Skipping comic %s: Needs lxml (python-lxml) installed." % cls.getName())
|
|
||||||
from lxml.html.defs import link_attrs
|
|
||||||
cls.link_attrs = link_attrs
|
|
||||||
cls.html = html
|
|
||||||
tree = html.document_fromstring(getPageContent(url, cls.session))
|
tree = html.document_fromstring(getPageContent(url, cls.session))
|
||||||
tree.make_links_absolute(url)
|
tree.make_links_absolute(url)
|
||||||
return tree
|
return tree
|
||||||
|
@ -418,7 +420,7 @@ class _ParserScraper(Scraper):
|
||||||
for search in searches:
|
for search in searches:
|
||||||
for match in data.xpath(search):
|
for match in data.xpath(search):
|
||||||
try:
|
try:
|
||||||
for attrib in cls.link_attrs:
|
for attrib in html_link_attrs:
|
||||||
if attrib in match.attrib:
|
if attrib in match.attrib:
|
||||||
searchUrls.append(match.get(attrib))
|
searchUrls.append(match.get(attrib))
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
@ -450,6 +452,12 @@ class _ParserScraper(Scraper):
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getDisabledReasons(cls):
|
||||||
|
res = {}
|
||||||
|
if html is None:
|
||||||
|
res['lxml'] = u"This module needs the lxml (python-lxml) python module which is not installed."
|
||||||
|
return res
|
||||||
|
|
||||||
def find_scraperclasses(comic, multiple_allowed=False):
|
def find_scraperclasses(comic, multiple_allowed=False):
|
||||||
"""Get a list comic scraper classes. Can return more than one entries if
|
"""Get a list comic scraper classes. Can return more than one entries if
|
||||||
|
|
Loading…
Reference in a new issue