Add some basic type annotations
This commit is contained in:
parent
f3b8ebf0be
commit
9b95171f37
5 changed files with 29 additions and 24 deletions
|
@ -1,12 +1,13 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
import _thread
|
import _thread
|
||||||
from queue import Queue, Empty
|
from queue import Queue, Empty
|
||||||
|
from typing import Dict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from .output import out
|
from .output import out
|
||||||
|
@ -41,7 +42,7 @@ class ComicQueue(Queue):
|
||||||
|
|
||||||
|
|
||||||
# ensure threads download only from one host at a time
|
# ensure threads download only from one host at a time
|
||||||
host_locks = {}
|
host_locks: Dict[str, threading.Lock] = {}
|
||||||
|
|
||||||
|
|
||||||
def get_hostname(url):
|
def get_hostname(url):
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
from typing import List
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
|
@ -132,7 +133,7 @@ class CatenaManor(_ParserScraper):
|
||||||
imageSearch = '//img[@class="comicthumbnail"]'
|
imageSearch = '//img[@class="comicthumbnail"]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
strips = []
|
strips: List[str] = []
|
||||||
|
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Retrieve archive links and select valid range
|
# Retrieve archive links and select valid range
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from ..scraper import _ParserScraper
|
from ..scraper import _ParserScraper
|
||||||
|
|
||||||
# Common base classes for comics with the same structure (same hosting
|
# Common base classes for comics with the same structure (same hosting
|
||||||
|
@ -39,7 +41,7 @@ class _WPWebcomic(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class _ComicControlScraper(_ParserScraper):
|
class _ComicControlScraper(_ParserScraper):
|
||||||
imageSearch = '//img[@id="cc-comic"]'
|
imageSearch: Union[Sequence[str], str] = '//img[@id="cc-comic"]'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
nextSearch = '//a[@rel="next"]'
|
nextSearch = '//a[@rel="next"]'
|
||||||
latestSearch = '//a[@rel="last"]'
|
latestSearch = '//a[@rel="last"]'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
@ -42,5 +42,5 @@ class JoeAndMonkey(_BasicScraper):
|
||||||
|
|
||||||
class JohnnyWander(_ComicControlScraper):
|
class JohnnyWander(_ComicControlScraper):
|
||||||
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
||||||
_ComicControlScraper.imageSearch)
|
'//img[@id="cc-comic"]')
|
||||||
url = 'http://www.johnnywander.com/'
|
url = 'http://www.johnnywander.com/'
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
import html
|
import html
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
from typing import Optional, Union, Pattern, Sequence
|
||||||
|
|
||||||
import lxml
|
import lxml
|
||||||
from lxml.html.defs import link_attrs as html_link_attrs
|
from lxml.html.defs import link_attrs as html_link_attrs
|
||||||
|
@ -42,60 +43,60 @@ class GeoblockedException(IOError):
|
||||||
super().__init__('It seems your current location is geo-blocked.')
|
super().__init__('It seems your current location is geo-blocked.')
|
||||||
|
|
||||||
|
|
||||||
class Scraper(object):
|
class Scraper:
|
||||||
'''Base class for all comic scraper, but without a specific scrape
|
'''Base class for all comic scraper, but without a specific scrape
|
||||||
implementation.'''
|
implementation.'''
|
||||||
|
|
||||||
# The URL for the comic strip
|
# The URL for the comic strip
|
||||||
url = None
|
url: Optional[str] = None
|
||||||
|
|
||||||
# A string that is interpolated with the strip index to yield the URL for a
|
# A string that is interpolated with the strip index to yield the URL for a
|
||||||
# particular strip.
|
# particular strip.
|
||||||
stripUrl = None
|
stripUrl: Optional[str] = None
|
||||||
|
|
||||||
# Stop search for previous URLs at this URL
|
# Stop search for previous URLs at this URL
|
||||||
firstStripUrl = None
|
firstStripUrl: Optional[str] = None
|
||||||
|
|
||||||
# if more than one image per URL is expected
|
# if more than one image per URL is expected
|
||||||
multipleImagesPerStrip = False
|
multipleImagesPerStrip: bool = False
|
||||||
|
|
||||||
# set to True if this comic contains adult content
|
# set to True if this comic contains adult content
|
||||||
adult = False
|
adult: bool = False
|
||||||
|
|
||||||
# set to True if this comic will not get updated anymore
|
# set to True if this comic will not get updated anymore
|
||||||
endOfLife = False
|
endOfLife: bool = False
|
||||||
|
|
||||||
# langauge of the comic (two-letter ISO 639-1 code)
|
# langauge of the comic (two-letter ISO 639-1 code)
|
||||||
lang = 'en'
|
lang: str = 'en'
|
||||||
|
|
||||||
# an expression that will locate the URL for the previous strip in a page
|
# an expression that will locate the URL for the previous strip in a page
|
||||||
# this can also be a list or tuple
|
# this can also be a list or tuple
|
||||||
prevSearch = None
|
prevSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||||
|
|
||||||
# an expression that will locate the strip image URLs strip in a page
|
# an expression that will locate the strip image URLs strip in a page
|
||||||
# this can also be a list or tuple
|
# this can also be a list or tuple
|
||||||
imageSearch = None
|
imageSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||||
|
|
||||||
# an expression to store a text together with the image
|
# an expression to store a text together with the image
|
||||||
# sometimes comic strips have additional text info for each comic
|
# sometimes comic strips have additional text info for each comic
|
||||||
textSearch = None
|
textSearch: Optional[Union[Sequence[Union[str, Pattern]], str, Pattern]] = None
|
||||||
|
|
||||||
# Is the additional text required or optional? When it is required (the
|
# Is the additional text required or optional? When it is required (the
|
||||||
# default), you see an error message whenever a comic page is encountered
|
# default), you see an error message whenever a comic page is encountered
|
||||||
# that does not have the text
|
# that does not have the text
|
||||||
textOptional = False
|
textOptional: bool = False
|
||||||
|
|
||||||
# usually the index format help
|
# usually the index format help
|
||||||
help = ''
|
help: str = ''
|
||||||
|
|
||||||
# Specifing a list of HTTP error codes which should be handled as a
|
# Specifing a list of HTTP error codes which should be handled as a
|
||||||
# successful request. This is a workaround for some comics which return
|
# successful request. This is a workaround for some comics which return
|
||||||
# regular pages with strange HTTP codes. By default, all HTTP errors raise
|
# regular pages with strange HTTP codes. By default, all HTTP errors raise
|
||||||
# exceptions.
|
# exceptions.
|
||||||
allow_errors = ()
|
allow_errors: Sequence[int] = ()
|
||||||
|
|
||||||
# HTTP session for configuration & cookies
|
# HTTP session for configuration & cookies
|
||||||
session = http.default_session
|
session: http.Session = http.default_session
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
|
|
Loading…
Reference in a new issue