Add some basic type annotations

This commit is contained in:
Tobias Gruetzmacher 2022-05-28 19:33:16 +02:00
parent f3b8ebf0be
commit 9b95171f37
5 changed files with 29 additions and 24 deletions

View file

@ -1,12 +1,13 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # Copyright (C) 2019-2020 Daniel Ring
import os import os
import threading import threading
import _thread import _thread
from queue import Queue, Empty from queue import Queue, Empty
from typing import Dict
from urllib.parse import urlparse from urllib.parse import urlparse
from .output import out from .output import out
@ -41,7 +42,7 @@ class ComicQueue(Queue):
# ensure threads download only from one host at a time # ensure threads download only from one host at a time
host_locks = {} host_locks: Dict[str, threading.Lock] = {}
def get_hostname(url): def get_hostname(url):

View file

@ -1,9 +1,10 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # Copyright (C) 2019-2020 Daniel Ring
from re import compile, escape from re import compile, escape
from typing import List
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import bounceStarter, indirectStarter
@ -132,7 +133,7 @@ class CatenaManor(_ParserScraper):
imageSearch = '//img[@class="comicthumbnail"]' imageSearch = '//img[@class="comicthumbnail"]'
multipleImagesPerStrip = True multipleImagesPerStrip = True
endOfLife = True endOfLife = True
strips = [] strips: List[str] = []
def starter(self): def starter(self):
# Retrieve archive links and select valid range # Retrieve archive links and select valid range

View file

@ -1,8 +1,10 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # Copyright (C) 2019-2020 Daniel Ring
from typing import Sequence, Union
from ..scraper import _ParserScraper from ..scraper import _ParserScraper
# Common base classes for comics with the same structure (same hosting # Common base classes for comics with the same structure (same hosting
@ -39,7 +41,7 @@ class _WPWebcomic(_ParserScraper):
class _ComicControlScraper(_ParserScraper): class _ComicControlScraper(_ParserScraper):
imageSearch = '//img[@id="cc-comic"]' imageSearch: Union[Sequence[str], str] = '//img[@id="cc-comic"]'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
nextSearch = '//a[@rel="next"]' nextSearch = '//a[@rel="next"]'
latestSearch = '//a[@rel="last"]' latestSearch = '//a[@rel="last"]'

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -42,5 +42,5 @@ class JoeAndMonkey(_BasicScraper):
class JohnnyWander(_ComicControlScraper): class JohnnyWander(_ComicControlScraper):
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src', imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
_ComicControlScraper.imageSearch) '//img[@id="cc-comic"]')
url = 'http://www.johnnywander.com/' url = 'http://www.johnnywander.com/'

View file

@ -1,12 +1,13 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
import html import html
import os import os
import re import re
import warnings import warnings
from urllib.parse import urljoin from urllib.parse import urljoin
from typing import Optional, Union, Pattern, Sequence
import lxml import lxml
from lxml.html.defs import link_attrs as html_link_attrs from lxml.html.defs import link_attrs as html_link_attrs
@ -42,60 +43,60 @@ class GeoblockedException(IOError):
super().__init__('It seems your current location is geo-blocked.') super().__init__('It seems your current location is geo-blocked.')
class Scraper(object): class Scraper:
'''Base class for all comic scraper, but without a specific scrape '''Base class for all comic scraper, but without a specific scrape
implementation.''' implementation.'''
# The URL for the comic strip # The URL for the comic strip
url = None url: Optional[str] = None
# A string that is interpolated with the strip index to yield the URL for a # A string that is interpolated with the strip index to yield the URL for a
# particular strip. # particular strip.
stripUrl = None stripUrl: Optional[str] = None
# Stop search for previous URLs at this URL # Stop search for previous URLs at this URL
firstStripUrl = None firstStripUrl: Optional[str] = None
# if more than one image per URL is expected # if more than one image per URL is expected
multipleImagesPerStrip = False multipleImagesPerStrip: bool = False
# set to True if this comic contains adult content # set to True if this comic contains adult content
adult = False adult: bool = False
# set to True if this comic will not get updated anymore # set to True if this comic will not get updated anymore
endOfLife = False endOfLife: bool = False
# langauge of the comic (two-letter ISO 639-1 code) # langauge of the comic (two-letter ISO 639-1 code)
lang = 'en' lang: str = 'en'
# an expression that will locate the URL for the previous strip in a page # an expression that will locate the URL for the previous strip in a page
# this can also be a list or tuple # this can also be a list or tuple
prevSearch = None prevSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# an expression that will locate the strip image URLs strip in a page # an expression that will locate the strip image URLs strip in a page
# this can also be a list or tuple # this can also be a list or tuple
imageSearch = None imageSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# an expression to store a text together with the image # an expression to store a text together with the image
# sometimes comic strips have additional text info for each comic # sometimes comic strips have additional text info for each comic
textSearch = None textSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# Is the additional text required or optional? When it is required (the # Is the additional text required or optional? When it is required (the
# default), you see an error message whenever a comic page is encountered # default), you see an error message whenever a comic page is encountered
# that does not have the text # that does not have the text
textOptional = False textOptional: bool = False
# usually the index format help # usually the index format help
help = '' help: str = ''
# Specifing a list of HTTP error codes which should be handled as a # Specifing a list of HTTP error codes which should be handled as a
# successful request. This is a workaround for some comics which return # successful request. This is a workaround for some comics which return
# regular pages with strange HTTP codes. By default, all HTTP errors raise # regular pages with strange HTTP codes. By default, all HTTP errors raise
# exceptions. # exceptions.
allow_errors = () allow_errors: Sequence[int] = ()
# HTTP session for configuration & cookies # HTTP session for configuration & cookies
session = http.default_session session: http.Session = http.default_session
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):