Convert scraper cache to a class

This should make it easier to extend with additional entries.
2020-10-01 18:49:14 +02:00 · 2020-10-01 18:49:14 +02:00 · 9237bd62b2
commit 9237bd62b2
parent 5ec0710d26
8 changed files with 92 additions and 83 deletions
--- a/dosagelib/cmd.py
+++ b/dosagelib/cmd.py
@ -1,13 +1,14 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
 import argparse
 import os

-from . import events, configuration, singleton, director, scraper
+from . import events, configuration, singleton, director
 from . import AppName, __version__
 from .output import out
+from .scraper import scrapers as allscrapers
 from .util import internal_error, strlimit


@ -243,7 +244,7 @@ def do_list(column_list=True, verbose=False, listall=False):
        out.info(u'Comics tagged with [{}] require age confirmation'
            ' with the --adult option.'.format(TAG_ADULT))
        out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
-        scrapers = sorted(scraper.get_scrapers(listall),
+        scrapers = sorted(allscrapers.get(listall),
                          key=lambda s: s.name.lower())
        if column_list:
            num, disabled = do_column_list(scrapers)
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@ -10,7 +10,8 @@ from queue import Queue, Empty
 from urllib.parse import urlparse

 from .output import out
-from . import events, scraper
+from .scraper import scrapers as allscrapers
+from . import events


 class ComicQueue(Queue):
@ -209,7 +210,7 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
            else:
                name = comic
                indexes = None
-            found_scrapers = scraper.find_scrapers(name, multiple_allowed=multiple_allowed)
+            found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
            for scraperobj in found_scrapers:
                if shouldRunScraper(scraperobj, adult, listing):
                    # FIXME: Find a better way to work with indexes
@ -220,7 +221,7 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi


 def get_existing_comics(basepath=None, adult=True, listing=False):
-    for scraperobj in scraper.get_scrapers(include_removed=True):
+    for scraperobj in allscrapers.get(include_removed=True):
        dirname = scraperobj.get_download_dir(basepath)
        if os.path.isdir(dirname):
            if shouldRunScraper(scraperobj, adult, listing):
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -536,7 +536,15 @@ class _ParserScraper(Scraper):
        return res


-def find_scrapers(comic, multiple_allowed=False):
+class Cache:
+    """Cache for comic scraper objects. The cache is initialized on first use.
+    This is cached, since iterating & loading a complete package might be quite
+    slow.
+    """
+    def __init__(self):
+        self.data = None
+
+    def find(self, comic, multiple_allowed=False):
        """Get a list comic scraper objects.

        Can return more than one entry if multiple_allowed is True, else it raises
@ -547,7 +555,7 @@ def find_scrapers(comic, multiple_allowed=False):
            raise ValueError("empty comic name")
        candidates = []
        cname = comic.lower()
-    for scrapers in get_scrapers(include_removed=True):
+        for scrapers in self.get(include_removed=True):
            lname = scrapers.name.lower()
            if lname == cname:
                # perfect match
@ -564,36 +572,31 @@ def find_scrapers(comic, multiple_allowed=False):
            raise ValueError('comic %r not found' % comic)
        return candidates

+    def load(self):
+        out.debug("Loading comic modules...")
+        modules = loader.get_modules('plugins')
+        plugins = list(loader.get_plugins(modules, Scraper))
+        self.data = list([m for x in plugins for m in x.getmodules()])
+        self.validate()
+        out.debug("... %d modules loaded from %d classes." % (
+            len(self.data), len(plugins)))

-_scrapers = None
-
-
-def get_scrapers(include_removed=False):
+    def get(self, include_removed=False):
        """Find all comic scraper classes in the plugins directory.
-    The result is cached.
        @return: list of Scraper classes
        @rtype: list of Scraper
        """
-    global _scrapers
-    if _scrapers is None:
-        out.debug(u"Loading comic modules...")
-        modules = loader.get_modules('plugins')
-        plugins = list(loader.get_plugins(modules, Scraper))
-        _scrapers = sorted([m for x in plugins for m in x.getmodules()],
-                           key=lambda p: p.name)
-        check_scrapers()
-        out.debug(u"... %d modules loaded from %d classes." % (
-            len(_scrapers), len(plugins)))
+        if not self.data:
+            self.load()
        if include_removed:
-        return _scrapers
+            return self.data
        else:
-        return [x for x in _scrapers if x.url]
+            return [x for x in self.data if x.url]

-
-def check_scrapers():
+    def validate(self):
        """Check for duplicate scraper names."""
        d = {}
-    for scraper in _scrapers:
+        for scraper in self.data:
            name = scraper.name.lower()
            if name in d:
                name1 = scraper.name
@ -601,3 +604,6 @@ def check_scrapers():
                raise ValueError('duplicate scrapers %s and %s found' %
                                 (name1, name2))
                d[name] = scraper
+
+
+scrapers = Cache()
--- a/scripts/mklanguages.py
+++ b/scripts/mklanguages.py
@ -7,7 +7,7 @@
 import os
 import codecs

-from dosagelib.scraper import get_scrapers
+from dosagelib.scraper import scrapers


 def main():
@ -24,7 +24,7 @@ def main():

 def get_used_languages():
    languages = {}
-    for scraperobj in get_scrapers():
+    for scraperobj in scrapers.get():
        lang = scraperobj.lang
        if lang not in languages:
            languages[lang] = scraperobj.language()
--- a/scripts/scriptutil.py
+++ b/scripts/scriptutil.py
@ -12,8 +12,9 @@ import time

 import lxml

+from dosagelib.scraper import scrapers
 from dosagelib.util import get_page
-from dosagelib import scraper, http
+from dosagelib import http


 def first_lower(x):
@ -129,7 +130,7 @@ class ComicListUpdater(object):
        """Check if comic name already exists."""
        names = [(tmpl % name).lower() for tmpl in self.dup_templates]
        if names:
-            for scraperobj in scraper.get_scrapers():
+            for scraperobj in scrapers.get():
                lname = scraperobj.name.lower()
                if lname in names:
                    return scraperobj.name
--- a/tests/modules/conftest.py
+++ b/tests/modules/conftest.py
@ -1,21 +1,21 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
 import re
 import os

 import pytest
 from xdist.dsession import LoadScopeScheduling

-from dosagelib import scraper
+from dosagelib.scraper import scrapers


 def get_test_scrapers():
    """Return scrapers that should be tested."""
    if "TESTALL" in os.environ:
        # test all comics (this will take some time)
-        return scraper.get_scrapers()
+        return scrapers.get()
    if 'TESTCOMICS' in os.environ:
        scraper_pattern = re.compile(os.environ['TESTCOMICS'])
    else:
@ -31,7 +31,7 @@ def get_test_scrapers():
        scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')

    return [
-        scraperobj for scraperobj in scraper.get_scrapers()
+        scraperobj for scraperobj in scrapers.get()
        if scraper_pattern.match(scraperobj.name)
    ]

--- a/tests/test_comicnames.py
+++ b/tests/test_comicnames.py
@ -1,17 +1,17 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
 import re

-from dosagelib import scraper
+from dosagelib.scraper import scrapers
 from dosagelib.plugins import old


 class TestComicNames(object):

    def test_names(self):
-        for scraperobj in scraper.get_scrapers():
+        for scraperobj in scrapers.get():
            name = scraperobj.name
            assert name.count('/') <= 1
            if '/' in name:
@ -21,10 +21,10 @@ class TestComicNames(object):
            assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname

    def test_renamed(self):
-        for scraperobj in scraper.get_scrapers(include_removed=True):
+        for scraperobj in scrapers.get(include_removed=True):
            if not isinstance(scraperobj, old.Renamed):
                continue
            assert len(scraperobj.getDisabledReasons()) > 0
            # Renamed scraper should only point to an non-disabled scraper
-            newscraper = scraper.find_scrapers(scraperobj.newname)[0]
+            newscraper = scrapers.find(scraperobj.newname)[0]
            assert len(newscraper.getDisabledReasons()) == 0
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@ -1,26 +1,26 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2013-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
 import pytest
-from dosagelib import scraper
+from dosagelib.scraper import scrapers


 class TestScraper(object):
    """Test scraper module functions."""

    def test_get_scrapers(self):
-        for scraperobj in scraper.get_scrapers():
+        for scraperobj in scrapers.get():
            scraperobj.indexes = ["bla"]
            assert scraperobj.url, "missing url in %s" % scraperobj.name

    def test_find_scrapers_single(self):
-        result = scraper.find_scrapers("xkcd")
+        result = scrapers.find("xkcd")
        assert len(result) == 1

    def test_find_scrapers_multi(self):
-        result = scraper.find_scrapers("a", multiple_allowed=True)
+        result = scrapers.find("a", multiple_allowed=True)
        assert len(result) > 1

    def test_find_scrapers_error(self):
        with pytest.raises(ValueError, match='empty comic name'):
-            scraper.find_scrapers('')
+            scrapers.find('')