Add some basic type annotations

2022-05-28 19:33:16 +02:00 · 2022-05-28 19:33:16 +02:00 · 9b95171f37
commit 9b95171f37
parent f3b8ebf0be
5 changed files with 29 additions and 24 deletions
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@ -1,12 +1,13 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 import os
 import threading
 import _thread
 from queue import Queue, Empty
+from typing import Dict
 from urllib.parse import urlparse

 from .output import out
@ -41,7 +42,7 @@ class ComicQueue(Queue):


 # ensure threads download only from one host at a time
-host_locks = {}
+host_locks: Dict[str, threading.Lock] = {}


 def get_hostname(url):
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -1,9 +1,10 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2021 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from re import compile, escape
+from typing import List

 from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import bounceStarter, indirectStarter
@ -132,7 +133,7 @@ class CatenaManor(_ParserScraper):
    imageSearch = '//img[@class="comicthumbnail"]'
    multipleImagesPerStrip = True
    endOfLife = True
-    strips = []
+    strips: List[str] = []

    def starter(self):
        # Retrieve archive links and select valid range
--- a/dosagelib/plugins/common.py
+++ b/dosagelib/plugins/common.py
@ -1,8 +1,10 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
+from typing import Sequence, Union
+
 from ..scraper import _ParserScraper

 # Common base classes for comics with the same structure (same hosting
@ -39,7 +41,7 @@ class _WPWebcomic(_ParserScraper):


 class _ComicControlScraper(_ParserScraper):
-    imageSearch = '//img[@id="cc-comic"]'
+    imageSearch: Union[Sequence[str], str] = '//img[@id="cc-comic"]'
    prevSearch = '//a[@rel="prev"]'
    nextSearch = '//a[@rel="next"]'
    latestSearch = '//a[@rel="last"]'
--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2021 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 from re import compile, escape

 from ..scraper import _BasicScraper
@ -42,5 +42,5 @@ class JoeAndMonkey(_BasicScraper):

 class JohnnyWander(_ComicControlScraper):
    imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
-                   _ComicControlScraper.imageSearch)
+        '//img[@id="cc-comic"]')
    url = 'http://www.johnnywander.com/'
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -1,12 +1,13 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2021 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 import html
 import os
 import re
 import warnings
 from urllib.parse import urljoin
+from typing import Optional, Union, Pattern, Sequence

 import lxml
 from lxml.html.defs import link_attrs as html_link_attrs
@ -42,60 +43,60 @@ class GeoblockedException(IOError):
        super().__init__('It seems your current location is geo-blocked.')


-class Scraper(object):
+class Scraper:
    '''Base class for all comic scraper, but without a specific scrape
    implementation.'''

    # The URL for the comic strip
-    url = None
+    url: Optional[str] = None

    # A string that is interpolated with the strip index to yield the URL for a
    # particular strip.
-    stripUrl = None
+    stripUrl: Optional[str] = None

    # Stop search for previous URLs at this URL
-    firstStripUrl = None
+    firstStripUrl: Optional[str] = None

    # if more than one image per URL is expected
-    multipleImagesPerStrip = False
+    multipleImagesPerStrip: bool = False

    # set to True if this comic contains adult content
-    adult = False
+    adult: bool = False

    # set to True if this comic will not get updated anymore
-    endOfLife = False
+    endOfLife: bool = False

    # langauge of the comic (two-letter ISO 639-1 code)
-    lang = 'en'
+    lang: str = 'en'

    # an expression that will locate the URL for the previous strip in a page
    # this can also be a list or tuple
-    prevSearch = None
+    prevSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None

    # an expression that will locate the strip image URLs strip in a page
    # this can also be a list or tuple
-    imageSearch = None
+    imageSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None

    # an expression to store a text together with the image
    # sometimes comic strips have additional text info for each comic
-    textSearch = None
+    textSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None

    # Is the additional text required or optional?  When it is required (the
    # default), you see an error message whenever a comic page is encountered
    # that does not have the text
-    textOptional = False
+    textOptional: bool = False

    # usually the index format help
-    help = ''
+    help: str = ''

    # Specifing a list of HTTP error codes which should be handled as a
    # successful request.  This is a workaround for some comics which return
    # regular pages with strange HTTP codes. By default, all HTTP errors raise
    # exceptions.
-    allow_errors = ()
+    allow_errors: Sequence[int] = ()

    # HTTP session for configuration & cookies
-    session = http.default_session
+    session: http.Session = http.default_session

    @classmethod
    def getmodules(cls):