Add some basic type annotations

This commit is contained in:
Tobias Gruetzmacher 2022-05-28 19:33:16 +02:00
parent f3b8ebf0be
commit 9b95171f37
5 changed files with 29 additions and 24 deletions

View file

@ -1,12 +1,13 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
import os
import threading
import _thread
from queue import Queue, Empty
from typing import Dict
from urllib.parse import urlparse
from .output import out
@ -41,7 +42,7 @@ class ComicQueue(Queue):
# ensure threads download only from one host at a time
host_locks = {}
host_locks: Dict[str, threading.Lock] = {}
def get_hostname(url):

View file

@ -1,9 +1,10 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from re import compile, escape
from typing import List
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
@ -132,7 +133,7 @@ class CatenaManor(_ParserScraper):
imageSearch = '//img[@class="comicthumbnail"]'
multipleImagesPerStrip = True
endOfLife = True
strips = []
strips: List[str] = []
def starter(self):
# Retrieve archive links and select valid range

View file

@ -1,8 +1,10 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from typing import Sequence, Union
from ..scraper import _ParserScraper
# Common base classes for comics with the same structure (same hosting
@ -39,7 +41,7 @@ class _WPWebcomic(_ParserScraper):
class _ComicControlScraper(_ParserScraper):
imageSearch = '//img[@id="cc-comic"]'
imageSearch: Union[Sequence[str], str] = '//img[@id="cc-comic"]'
prevSearch = '//a[@rel="prev"]'
nextSearch = '//a[@rel="next"]'
latestSearch = '//a[@rel="last"]'

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
from re import compile, escape
from ..scraper import _BasicScraper
@ -42,5 +42,5 @@ class JoeAndMonkey(_BasicScraper):
class JohnnyWander(_ComicControlScraper):
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
_ComicControlScraper.imageSearch)
'//img[@id="cc-comic"]')
url = 'http://www.johnnywander.com/'

View file

@ -1,12 +1,13 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
import html
import os
import re
import warnings
from urllib.parse import urljoin
from typing import Optional, Union, Pattern, Sequence
import lxml
from lxml.html.defs import link_attrs as html_link_attrs
@ -42,60 +43,60 @@ class GeoblockedException(IOError):
super().__init__('It seems your current location is geo-blocked.')
class Scraper(object):
class Scraper:
'''Base class for all comic scraper, but without a specific scrape
implementation.'''
# The URL for the comic strip
url = None
url: Optional[str] = None
# A string that is interpolated with the strip index to yield the URL for a
# particular strip.
stripUrl = None
stripUrl: Optional[str] = None
# Stop search for previous URLs at this URL
firstStripUrl = None
firstStripUrl: Optional[str] = None
# if more than one image per URL is expected
multipleImagesPerStrip = False
multipleImagesPerStrip: bool = False
# set to True if this comic contains adult content
adult = False
adult: bool = False
# set to True if this comic will not get updated anymore
endOfLife = False
endOfLife: bool = False
# langauge of the comic (two-letter ISO 639-1 code)
lang = 'en'
lang: str = 'en'
# an expression that will locate the URL for the previous strip in a page
# this can also be a list or tuple
prevSearch = None
prevSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# an expression that will locate the strip image URLs strip in a page
# this can also be a list or tuple
imageSearch = None
imageSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# an expression to store a text together with the image
# sometimes comic strips have additional text info for each comic
textSearch = None
textSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
# Is the additional text required or optional? When it is required (the
# default), you see an error message whenever a comic page is encountered
# that does not have the text
textOptional = False
textOptional: bool = False
# usually the index format help
help = ''
help: str = ''
# Specifing a list of HTTP error codes which should be handled as a
# successful request. This is a workaround for some comics which return
# regular pages with strange HTTP codes. By default, all HTTP errors raise
# exceptions.
allow_errors = ()
allow_errors: Sequence[int] = ()
# HTTP session for configuration & cookies
session = http.default_session
session: http.Session = http.default_session
@classmethod
def getmodules(cls):