Rename get_scrapers to get_scraperclasses

This commit is contained in:
Bastian Kleineidam 2013-02-13 19:59:13 +01:00
parent 96bf9ef523
commit 312d117ff3
7 changed files with 33 additions and 30 deletions

20
dosage
View file

@ -7,6 +7,7 @@ from __future__ import print_function
import sys import sys
import os import os
import optparse import optparse
from collections import OrderedDict
from dosagelib import events, scraper from dosagelib import events, scraper
from dosagelib.output import out from dosagelib.output import out
@ -208,7 +209,7 @@ def getScrapers(comics, basepath=None, adult=True):
# only scrapers whose directory already exists # only scrapers whose directory already exists
if len(comics) > 1: if len(comics) > 1:
out.warn("using '@' as comic name ignores all other specified comics.") out.warn("using '@' as comic name ignores all other specified comics.")
for scraperclass in scraper.get_scrapers(): for scraperclass in scraper.get_scraperclasses():
if not adult and scraperclass.adult: if not adult and scraperclass.adult:
warn_adult(scraperclass) warn_adult(scraperclass)
continue continue
@ -217,15 +218,15 @@ def getScrapers(comics, basepath=None, adult=True):
yield scraperclass() yield scraperclass()
elif '@@' in comics: elif '@@' in comics:
# all scrapers # all scrapers
for scraperclass in scraper.get_scrapers(): for scraperclass in scraper.get_scraperclasses():
if not adult and scraperclass.adult: if not adult and scraperclass.adult:
warn_adult(scraperclass) warn_adult(scraperclass)
continue continue
yield scraperclass() yield scraperclass()
else: else:
# get only selected comic scrapers # get only selected comic scrapers
# store them in a list to catch naming errors early # store them in an ordered set to eliminate duplicates
scrapers = [] scrapers = OrderedDict()
for comic in comics: for comic in comics:
if ':' in comic: if ':' in comic:
name, index = comic.split(':', 1) name, index = comic.split(':', 1)
@ -233,13 +234,16 @@ def getScrapers(comics, basepath=None, adult=True):
else: else:
name = comic name = comic
indexes = None indexes = None
# XXX if matchmulti, search for a list of scrapers
scraperclass = scraper.get_scraper(name) scraperclass = scraper.get_scraper(name)
if not adult and scraperclass.adult: if not adult and scraperclass.adult:
warn_adult(scraperclass) warn_adult(scraperclass)
continue continue
scrapers.append(scraperclass(indexes=indexes)) scraperobj = scraperclass(indexes=indexes)
for s in scrapers: if scraperobj not in scrapers:
yield s scrapers[scraperobj] = True
for scraperobj in scrapers:
yield scraperobj
def warn_adult(scraperclass): def warn_adult(scraperclass):
@ -269,7 +273,7 @@ def main():
def profile(): def profile():
"""Profile the loading of all scrapers.""" """Profile the loading of all scrapers."""
import cProfile import cProfile
cProfile.run("scraper.get_scrapers()", "dosage.prof") cProfile.run("scraper.get_scraperclasses()", "dosage.prof")
def viewprof(): def viewprof():

View file

@ -166,7 +166,7 @@ def get_scraper(comic):
raise ValueError("empty comic name") raise ValueError("empty comic name")
candidates = [] candidates = []
cname = comic.lower() cname = comic.lower()
for scraperclass in get_scrapers(): for scraperclass in get_scraperclasses():
lname = scraperclass.get_name().lower() lname = scraperclass.get_name().lower()
if lname == cname: if lname == cname:
# perfect match # perfect match
@ -182,29 +182,29 @@ def get_scraper(comic):
raise ValueError('comic %r not found' % comic) raise ValueError('comic %r not found' % comic)
_scrapers = None _scraperclasses = None
def get_scrapers(): def get_scraperclasses():
"""Find all comic scraper classes in the plugins directory. """Find all comic scraper classes in the plugins directory.
The result is cached. The result is cached.
@return: list of _BasicScraper classes @return: list of _BasicScraper classes
@rtype: list of _BasicScraper @rtype: list of _BasicScraper
""" """
global _scrapers global _scraperclasses
if _scrapers is None: if _scraperclasses is None:
out.debug("Loading comic modules...") out.debug("Loading comic modules...")
modules = loader.get_modules() modules = loader.get_modules()
plugins = loader.get_plugins(modules, _BasicScraper) plugins = loader.get_plugins(modules, _BasicScraper)
_scrapers = list(plugins) _scraperclasses = list(plugins)
_scrapers.sort(key=lambda s: s.get_name()) _scraperclasses.sort(key=lambda s: s.get_name())
check_scrapers() check_scrapers()
out.debug("... %d modules loaded." % len(_scrapers)) out.debug("... %d modules loaded." % len(_scraperclasses))
return _scrapers return _scraperclasses
def check_scrapers(): def check_scrapers():
"""Check for duplicate scraper class names.""" """Check for duplicate scraper class names."""
d = {} d = {}
for scraperclass in _scrapers: for scraperclass in _scraperclasses:
name = scraperclass.get_name().lower() name = scraperclass.get_name().lower()
if name in d: if name in d:
name1 = scraperclass.get_name() name1 = scraperclass.get_name()

View file

@ -10,7 +10,7 @@ import os
import requests import requests
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.util import getPageContent, asciify, unescape from dosagelib.util import getPageContent, asciify, unescape
from dosagelib.scraper import get_scrapers from dosagelib.scraper import get_scraperclasses
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name
json_file = __file__.replace(".py", ".json") json_file = __file__.replace(".py", ".json")
@ -64,7 +64,7 @@ def has_comic(name):
("KeenSpot/%s" % name).lower(), ("KeenSpot/%s" % name).lower(),
("SmackJeeves/%s" % name).lower(), ("SmackJeeves/%s" % name).lower(),
] ]
for scraperclass in get_scrapers(): for scraperclass in get_scraperclasses():
lname = scraperclass.get_name().lower() lname = scraperclass.get_name().lower()
if lname in names: if lname in names:
return True return True

View file

@ -10,7 +10,7 @@ import os
import requests import requests
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.util import tagre, getPageContent, asciify, unescape from dosagelib.util import tagre, getPageContent, asciify, unescape
from dosagelib.scraper import get_scrapers from dosagelib.scraper import get_scraperclasses
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name
json_file = __file__.replace(".py", ".json") json_file = __file__.replace(".py", ".json")
@ -78,7 +78,7 @@ def get_results():
def has_creators_comic(name): def has_creators_comic(name):
"""Test if comic name already exists.""" """Test if comic name already exists."""
cname = "Creators/%s" % name cname = "Creators/%s" % name
for scraperclass in get_scrapers(): for scraperclass in get_scraperclasses():
lname = scraperclass.get_name().lower() lname = scraperclass.get_name().lower()
if lname == cname.lower(): if lname == cname.lower():
return True return True

View file

@ -10,7 +10,7 @@ import os
import requests import requests
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.util import getPageContent, asciify, unescape, tagre from dosagelib.util import getPageContent, asciify, unescape, tagre
from dosagelib.scraper import get_scrapers from dosagelib.scraper import get_scraperclasses
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name
json_file = __file__.replace(".py", ".json") json_file = __file__.replace(".py", ".json")
@ -406,7 +406,7 @@ def has_comic(name):
"""Check if comic name already exists.""" """Check if comic name already exists."""
cname = ("Creators/%s" % name).lower() cname = ("Creators/%s" % name).lower()
gname = ("GoComics/%s" % name).lower() gname = ("GoComics/%s" % name).lower()
for scraperclass in get_scrapers(): for scraperclass in get_scraperclasses():
lname = scraperclass.get_name().lower() lname = scraperclass.get_name().lower()
if lname == cname or lname == gname: if lname == cname or lname == gname:
return True return True

View file

@ -7,8 +7,7 @@ import time
import cgi import cgi
import codecs import codecs
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.scraper import get_scrapers from dosagelib.scraper import get_scraperclasses
from dosagelib.configuration import Version as DosageVersion
from scriptutil import load_result, save_result from scriptutil import load_result, save_result
json_file = __file__.replace(".py", ".json") json_file = __file__.replace(".py", ".json")
@ -101,9 +100,9 @@ def strdate(t):
def get_testscraper(line): def get_testscraper(line):
"""Get scraper from test output line.""" """Get scraper from test output line."""
classname = line.split('::')[1][4:] classname = line.split('::')[1][4:]
for scraper in get_scrapers(): for scraperclass in get_scraperclasses():
if scraper.__name__ == classname: if scraperclass.__name__ == classname:
return scraper return scraperclass
raise ValueError("Scraper %r not found" % classname) raise ValueError("Scraper %r not found" % classname)

View file

@ -7,7 +7,7 @@ from dosagelib import scraper, util
class TestComicNames(TestCase): class TestComicNames(TestCase):
def test_names(self): def test_names(self):
for scraperclass in scraper.get_scrapers(): for scraperclass in scraper.get_scraperclasses():
name = scraperclass.get_name() name = scraperclass.get_name()
self.assertTrue(name.count('/') <= 1, name) self.assertTrue(name.count('/') <= 1, name)
if '/' in name: if '/' in name: