Add some more type annotations

This commit is contained in:
Tobias Gruetzmacher 2024-02-18 16:53:17 +01:00
parent ee22169cc5
commit 7b9ca867fb
No known key found for this signature in database
2 changed files with 42 additions and 39 deletions

View file

@ -1,12 +1,15 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
from __future__ import annotations
import os
import glob
import codecs
import contextlib
from datetime import datetime
from typing import Iterator
from .output import out
from .util import unquote, getFilename, urlopen, strsize
@ -14,27 +17,27 @@ from .events import getHandler
# Maximum content size for images
MaxImageBytes = 1024 * 1024 * 20 # 20 MB
MAX_IMAGE_BYTES = 1024 * 1024 * 20 # 20 MB
# RFC 1123 format, as preferred by RFC 2616
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
class ComicStrip(object):
class ComicStrip:
"""A list of comic image URLs."""
def __init__(self, scraper, strip_url, image_urls, text=None):
def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None:
"""Store the image URL list."""
self.scraper = scraper
self.strip_url = strip_url
self.image_urls = image_urls
self.text = text
def getImages(self):
def getImages(self) -> Iterator[ComicImage]:
"""Get a list of image downloaders."""
for image_url in self.image_urls:
yield self.getDownloader(image_url)
def getDownloader(self, url):
def getDownloader(self, url: str) -> ComicImage:
"""Get an image downloader."""
filename = self.scraper.namer(url, self.strip_url)
if filename is None:
@ -43,7 +46,7 @@ class ComicStrip(object):
text=self.text)
class ComicImage(object):
class ComicImage:
"""A comic image downloader."""
ChunkBytes = 1024 * 100 # 100KB
@ -64,7 +67,7 @@ class ComicImage(object):
headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
self.urlobj = urlopen(self.url, self.scraper.session,
referrer=self.referrer,
max_content_bytes=MaxImageBytes, stream=True,
max_content_bytes=MAX_IMAGE_BYTES, stream=True,
headers=headers)
if self.urlobj.status_code == 304: # Not modified
return

View file

@ -119,45 +119,45 @@ class Scraper:
if val:
self._indexes = tuple(sorted(val))
def __init__(self, name):
def __init__(self, name: str) -> None:
"""Initialize internal variables."""
self.name = name
self.urls = set()
self.urls: set[str] = set()
self._indexes = ()
self.skippedUrls = set()
self.skippedUrls: set[str] = set()
self.hitFirstStripUrl = False
def __hash__(self):
def __hash__(self) -> int:
"""Get hash value from name and index list."""
return hash((self.name, self.indexes))
def shouldSkipUrl(self, url, data):
def shouldSkipUrl(self, url: str, data) -> bool:
"""Determine if search for images in given URL should be skipped."""
return False
def getComicStrip(self, url, data):
def getComicStrip(self, url, data) -> ComicStrip:
"""Get comic strip downloader for given URL and data."""
imageUrls = self.extract_image_urls(url, data)
urls = self.extract_image_urls(url, data)
# map modifier function on image URLs
imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
urls = [self.imageUrlModifier(x, data) for x in urls]
# remove duplicate URLs
imageUrls = uniq(imageUrls)
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
urls = uniq(urls)
if len(urls) > 1 and not self.multipleImagesPerStrip:
out.warn(
u"Found %d images instead of 1 at %s with expressions %s" %
(len(imageUrls), url, prettyMatcherList(self.imageSearch)))
image = imageUrls[0]
out.warn(u"Choosing image %s" % image)
imageUrls = (image,)
elif not imageUrls:
out.warn(u"Found no images at %s with expressions %s" % (url,
(len(urls), url, prettyMatcherList(self.imageSearch)))
image = urls[0]
out.warn("Choosing image %s" % image)
urls = (image,)
elif not urls:
out.warn("Found no images at %s with expressions %s" % (url,
prettyMatcherList(self.imageSearch)))
if self.textSearch:
text = self.fetchText(url, data, self.textSearch,
optional=self.textOptional)
else:
text = None
return ComicStrip(self, url, imageUrls, text=text)
return ComicStrip(self, url, urls, text=text)
def getStrips(self, maxstrips=None):
"""Get comic strips."""
@ -217,7 +217,7 @@ class Scraper:
break
url = prevUrl
def isfirststrip(self, url):
def isfirststrip(self, url: str) -> bool:
"""Check if the specified URL is the first strip of a comic. This is
specially for comics taken from archive.org, since the base URL of
archive.org changes whenever pages are taken from a different
@ -228,7 +228,7 @@ class Scraper:
currenturl = ARCHIVE_ORG_URL.sub('', url)
return firsturl == currenturl
def getPrevUrl(self, url, data):
def getPrevUrl(self, url: str, data) -> str | None:
"""Find previous URL."""
prevUrl = None
if self.prevSearch:
@ -243,40 +243,40 @@ class Scraper:
getHandler().comicPageLink(self, url, prevUrl)
return prevUrl
def getIndexStripUrl(self, index):
def getIndexStripUrl(self, index: str) -> str:
"""Get comic strip URL from index."""
return self.stripUrl % index
def starter(self):
def starter(self) -> str:
"""Get starter URL from where to scrape comic strips."""
return self.url
def namer(self, image_url, page_url):
def namer(self, image_url: str, page_url: str) -> str | None:
"""Return filename for given image and page URL."""
return
def link_modifier(self, fromurl, tourl):
def link_modifier(self, fromurl: str, tourl: str) -> str:
"""Optional modification of parsed link (previous/back/latest) URLs.
Useful if there are domain redirects. The default implementation does
not modify the URL.
"""
return tourl
def imageUrlModifier(self, image_url, data):
def imageUrlModifier(self, image_url: str, data) -> str:
"""Optional modification of parsed image URLs. Useful if the URL
needs to be fixed before usage. The default implementation does
not modify the URL. The given data is the URL page data.
"""
return image_url
def vote(self):
def vote(self) -> None:
"""Cast a public vote for this comic."""
uid = get_system_uid()
data = {"name": self.name.replace('/', '_'), "uid": uid}
response = self.session.post(configuration.VoteUrl, data=data)
response.raise_for_status()
def get_download_dir(self, basepath):
def get_download_dir(self, basepath: str) -> str:
"""Try to find the corect download directory, ignoring case
differences."""
path = basepath
@ -294,16 +294,16 @@ class Scraper:
path = os.path.join(path, part)
return path
def getCompleteFile(self, basepath):
def getCompleteFile(self, basepath: str) -> str:
"""Get filename indicating all comics are downloaded."""
dirname = self.get_download_dir(basepath)
return os.path.join(dirname, "complete.txt")
def isComplete(self, basepath):
def isComplete(self, basepath: str) -> bool:
"""Check if all comics are downloaded."""
return os.path.isfile(self.getCompleteFile(basepath))
def setComplete(self, basepath):
def setComplete(self, basepath: str) -> None:
"""Set complete flag for this comic, ie. all comics are downloaded."""
if self.endOfLife:
filename = self.getCompleteFile(basepath)