Add some basic type annotations
This commit is contained in:
parent
f3b8ebf0be
commit
9b95171f37
5 changed files with 29 additions and 24 deletions
|
@ -1,12 +1,13 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
import os
|
||||
import threading
|
||||
import _thread
|
||||
from queue import Queue, Empty
|
||||
from typing import Dict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .output import out
|
||||
|
@ -41,7 +42,7 @@ class ComicQueue(Queue):
|
|||
|
||||
|
||||
# ensure threads download only from one host at a time
|
||||
host_locks = {}
|
||||
host_locks: Dict[str, threading.Lock] = {}
|
||||
|
||||
|
||||
def get_hostname(url):
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
from re import compile, escape
|
||||
from typing import List
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
|
@ -132,7 +133,7 @@ class CatenaManor(_ParserScraper):
|
|||
imageSearch = '//img[@class="comicthumbnail"]'
|
||||
multipleImagesPerStrip = True
|
||||
endOfLife = True
|
||||
strips = []
|
||||
strips: List[str] = []
|
||||
|
||||
def starter(self):
|
||||
# Retrieve archive links and select valid range
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
from typing import Sequence, Union
|
||||
|
||||
from ..scraper import _ParserScraper
|
||||
|
||||
# Common base classes for comics with the same structure (same hosting
|
||||
|
@ -39,7 +41,7 @@ class _WPWebcomic(_ParserScraper):
|
|||
|
||||
|
||||
class _ComicControlScraper(_ParserScraper):
|
||||
imageSearch = '//img[@id="cc-comic"]'
|
||||
imageSearch: Union[Sequence[str], str] = '//img[@id="cc-comic"]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
nextSearch = '//a[@rel="next"]'
|
||||
latestSearch = '//a[@rel="last"]'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
@ -42,5 +42,5 @@ class JoeAndMonkey(_BasicScraper):
|
|||
|
||||
class JohnnyWander(_ComicControlScraper):
|
||||
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
||||
_ComicControlScraper.imageSearch)
|
||||
'//img[@id="cc-comic"]')
|
||||
url = 'http://www.johnnywander.com/'
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
import html
|
||||
import os
|
||||
import re
|
||||
import warnings
|
||||
from urllib.parse import urljoin
|
||||
from typing import Optional, Union, Pattern, Sequence
|
||||
|
||||
import lxml
|
||||
from lxml.html.defs import link_attrs as html_link_attrs
|
||||
|
@ -42,60 +43,60 @@ class GeoblockedException(IOError):
|
|||
super().__init__('It seems your current location is geo-blocked.')
|
||||
|
||||
|
||||
class Scraper(object):
|
||||
class Scraper:
|
||||
'''Base class for all comic scraper, but without a specific scrape
|
||||
implementation.'''
|
||||
|
||||
# The URL for the comic strip
|
||||
url = None
|
||||
url: Optional[str] = None
|
||||
|
||||
# A string that is interpolated with the strip index to yield the URL for a
|
||||
# particular strip.
|
||||
stripUrl = None
|
||||
stripUrl: Optional[str] = None
|
||||
|
||||
# Stop search for previous URLs at this URL
|
||||
firstStripUrl = None
|
||||
firstStripUrl: Optional[str] = None
|
||||
|
||||
# if more than one image per URL is expected
|
||||
multipleImagesPerStrip = False
|
||||
multipleImagesPerStrip: bool = False
|
||||
|
||||
# set to True if this comic contains adult content
|
||||
adult = False
|
||||
adult: bool = False
|
||||
|
||||
# set to True if this comic will not get updated anymore
|
||||
endOfLife = False
|
||||
endOfLife: bool = False
|
||||
|
||||
# langauge of the comic (two-letter ISO 639-1 code)
|
||||
lang = 'en'
|
||||
lang: str = 'en'
|
||||
|
||||
# an expression that will locate the URL for the previous strip in a page
|
||||
# this can also be a list or tuple
|
||||
prevSearch = None
|
||||
prevSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||
|
||||
# an expression that will locate the strip image URLs strip in a page
|
||||
# this can also be a list or tuple
|
||||
imageSearch = None
|
||||
imageSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||
|
||||
# an expression to store a text together with the image
|
||||
# sometimes comic strips have additional text info for each comic
|
||||
textSearch = None
|
||||
textSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||
|
||||
# Is the additional text required or optional? When it is required (the
|
||||
# default), you see an error message whenever a comic page is encountered
|
||||
# that does not have the text
|
||||
textOptional = False
|
||||
textOptional: bool = False
|
||||
|
||||
# usually the index format help
|
||||
help = ''
|
||||
help: str = ''
|
||||
|
||||
# Specifing a list of HTTP error codes which should be handled as a
|
||||
# successful request. This is a workaround for some comics which return
|
||||
# regular pages with strange HTTP codes. By default, all HTTP errors raise
|
||||
# exceptions.
|
||||
allow_errors = ()
|
||||
allow_errors: Sequence[int] = ()
|
||||
|
||||
# HTTP session for configuration & cookies
|
||||
session = http.default_session
|
||||
session: http.Session = http.default_session
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
|
|
Loading…
Reference in a new issue