Add some more type annotations

This commit is contained in:
Tobias Gruetzmacher 2024-02-18 16:53:17 +01:00
parent ee22169cc5
commit 7b9ca867fb
No known key found for this signature in database
2 changed files with 42 additions and 39 deletions

View file

@ -1,12 +1,15 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
from __future__ import annotations
import os import os
import glob import glob
import codecs import codecs
import contextlib import contextlib
from datetime import datetime from datetime import datetime
from typing import Iterator
from .output import out from .output import out
from .util import unquote, getFilename, urlopen, strsize from .util import unquote, getFilename, urlopen, strsize
@ -14,27 +17,27 @@ from .events import getHandler
# Maximum content size for images # Maximum content size for images
MaxImageBytes = 1024 * 1024 * 20 # 20 MB MAX_IMAGE_BYTES = 1024 * 1024 * 20 # 20 MB
# RFC 1123 format, as preferred by RFC 2616 # RFC 1123 format, as preferred by RFC 2616
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
class ComicStrip(object): class ComicStrip:
"""A list of comic image URLs.""" """A list of comic image URLs."""
def __init__(self, scraper, strip_url, image_urls, text=None): def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None:
"""Store the image URL list.""" """Store the image URL list."""
self.scraper = scraper self.scraper = scraper
self.strip_url = strip_url self.strip_url = strip_url
self.image_urls = image_urls self.image_urls = image_urls
self.text = text self.text = text
def getImages(self): def getImages(self) -> Iterator[ComicImage]:
"""Get a list of image downloaders.""" """Get a list of image downloaders."""
for image_url in self.image_urls: for image_url in self.image_urls:
yield self.getDownloader(image_url) yield self.getDownloader(image_url)
def getDownloader(self, url): def getDownloader(self, url: str) -> ComicImage:
"""Get an image downloader.""" """Get an image downloader."""
filename = self.scraper.namer(url, self.strip_url) filename = self.scraper.namer(url, self.strip_url)
if filename is None: if filename is None:
@ -43,7 +46,7 @@ class ComicStrip(object):
text=self.text) text=self.text)
class ComicImage(object): class ComicImage:
"""A comic image downloader.""" """A comic image downloader."""
ChunkBytes = 1024 * 100 # 100KB ChunkBytes = 1024 * 100 # 100KB
@ -64,7 +67,7 @@ class ComicImage(object):
headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR) headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
self.urlobj = urlopen(self.url, self.scraper.session, self.urlobj = urlopen(self.url, self.scraper.session,
referrer=self.referrer, referrer=self.referrer,
max_content_bytes=MaxImageBytes, stream=True, max_content_bytes=MAX_IMAGE_BYTES, stream=True,
headers=headers) headers=headers)
if self.urlobj.status_code == 304: # Not modified if self.urlobj.status_code == 304: # Not modified
return return

View file

@ -119,45 +119,45 @@ class Scraper:
if val: if val:
self._indexes = tuple(sorted(val)) self._indexes = tuple(sorted(val))
def __init__(self, name): def __init__(self, name: str) -> None:
"""Initialize internal variables.""" """Initialize internal variables."""
self.name = name self.name = name
self.urls = set() self.urls: set[str] = set()
self._indexes = () self._indexes = ()
self.skippedUrls = set() self.skippedUrls: set[str] = set()
self.hitFirstStripUrl = False self.hitFirstStripUrl = False
def __hash__(self): def __hash__(self) -> int:
"""Get hash value from name and index list.""" """Get hash value from name and index list."""
return hash((self.name, self.indexes)) return hash((self.name, self.indexes))
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url: str, data) -> bool:
"""Determine if search for images in given URL should be skipped.""" """Determine if search for images in given URL should be skipped."""
return False return False
def getComicStrip(self, url, data): def getComicStrip(self, url, data) -> ComicStrip:
"""Get comic strip downloader for given URL and data.""" """Get comic strip downloader for given URL and data."""
imageUrls = self.extract_image_urls(url, data) urls = self.extract_image_urls(url, data)
# map modifier function on image URLs # map modifier function on image URLs
imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] urls = [self.imageUrlModifier(x, data) for x in urls]
# remove duplicate URLs # remove duplicate URLs
imageUrls = uniq(imageUrls) urls = uniq(urls)
if len(imageUrls) > 1 and not self.multipleImagesPerStrip: if len(urls) > 1 and not self.multipleImagesPerStrip:
out.warn( out.warn(
u"Found %d images instead of 1 at %s with expressions %s" % u"Found %d images instead of 1 at %s with expressions %s" %
(len(imageUrls), url, prettyMatcherList(self.imageSearch))) (len(urls), url, prettyMatcherList(self.imageSearch)))
image = imageUrls[0] image = urls[0]
out.warn(u"Choosing image %s" % image) out.warn("Choosing image %s" % image)
imageUrls = (image,) urls = (image,)
elif not imageUrls: elif not urls:
out.warn(u"Found no images at %s with expressions %s" % (url, out.warn("Found no images at %s with expressions %s" % (url,
prettyMatcherList(self.imageSearch))) prettyMatcherList(self.imageSearch)))
if self.textSearch: if self.textSearch:
text = self.fetchText(url, data, self.textSearch, text = self.fetchText(url, data, self.textSearch,
optional=self.textOptional) optional=self.textOptional)
else: else:
text = None text = None
return ComicStrip(self, url, imageUrls, text=text) return ComicStrip(self, url, urls, text=text)
def getStrips(self, maxstrips=None): def getStrips(self, maxstrips=None):
"""Get comic strips.""" """Get comic strips."""
@ -217,7 +217,7 @@ class Scraper:
break break
url = prevUrl url = prevUrl
def isfirststrip(self, url): def isfirststrip(self, url: str) -> bool:
"""Check if the specified URL is the first strip of a comic. This is """Check if the specified URL is the first strip of a comic. This is
specially for comics taken from archive.org, since the base URL of specially for comics taken from archive.org, since the base URL of
archive.org changes whenever pages are taken from a different archive.org changes whenever pages are taken from a different
@ -228,7 +228,7 @@ class Scraper:
currenturl = ARCHIVE_ORG_URL.sub('', url) currenturl = ARCHIVE_ORG_URL.sub('', url)
return firsturl == currenturl return firsturl == currenturl
def getPrevUrl(self, url, data): def getPrevUrl(self, url: str, data) -> str | None:
"""Find previous URL.""" """Find previous URL."""
prevUrl = None prevUrl = None
if self.prevSearch: if self.prevSearch:
@ -243,40 +243,40 @@ class Scraper:
getHandler().comicPageLink(self, url, prevUrl) getHandler().comicPageLink(self, url, prevUrl)
return prevUrl return prevUrl
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index: str) -> str:
"""Get comic strip URL from index.""" """Get comic strip URL from index."""
return self.stripUrl % index return self.stripUrl % index
def starter(self): def starter(self) -> str:
"""Get starter URL from where to scrape comic strips.""" """Get starter URL from where to scrape comic strips."""
return self.url return self.url
def namer(self, image_url, page_url): def namer(self, image_url: str, page_url: str) -> str | None:
"""Return filename for given image and page URL.""" """Return filename for given image and page URL."""
return return
def link_modifier(self, fromurl, tourl): def link_modifier(self, fromurl: str, tourl: str) -> str:
"""Optional modification of parsed link (previous/back/latest) URLs. """Optional modification of parsed link (previous/back/latest) URLs.
Useful if there are domain redirects. The default implementation does Useful if there are domain redirects. The default implementation does
not modify the URL. not modify the URL.
""" """
return tourl return tourl
def imageUrlModifier(self, image_url, data): def imageUrlModifier(self, image_url: str, data) -> str:
"""Optional modification of parsed image URLs. Useful if the URL """Optional modification of parsed image URLs. Useful if the URL
needs to be fixed before usage. The default implementation does needs to be fixed before usage. The default implementation does
not modify the URL. The given data is the URL page data. not modify the URL. The given data is the URL page data.
""" """
return image_url return image_url
def vote(self): def vote(self) -> None:
"""Cast a public vote for this comic.""" """Cast a public vote for this comic."""
uid = get_system_uid() uid = get_system_uid()
data = {"name": self.name.replace('/', '_'), "uid": uid} data = {"name": self.name.replace('/', '_'), "uid": uid}
response = self.session.post(configuration.VoteUrl, data=data) response = self.session.post(configuration.VoteUrl, data=data)
response.raise_for_status() response.raise_for_status()
def get_download_dir(self, basepath): def get_download_dir(self, basepath: str) -> str:
"""Try to find the corect download directory, ignoring case """Try to find the corect download directory, ignoring case
differences.""" differences."""
path = basepath path = basepath
@ -294,16 +294,16 @@ class Scraper:
path = os.path.join(path, part) path = os.path.join(path, part)
return path return path
def getCompleteFile(self, basepath): def getCompleteFile(self, basepath: str) -> str:
"""Get filename indicating all comics are downloaded.""" """Get filename indicating all comics are downloaded."""
dirname = self.get_download_dir(basepath) dirname = self.get_download_dir(basepath)
return os.path.join(dirname, "complete.txt") return os.path.join(dirname, "complete.txt")
def isComplete(self, basepath): def isComplete(self, basepath: str) -> bool:
"""Check if all comics are downloaded.""" """Check if all comics are downloaded."""
return os.path.isfile(self.getCompleteFile(basepath)) return os.path.isfile(self.getCompleteFile(basepath))
def setComplete(self, basepath): def setComplete(self, basepath: str) -> None:
"""Set complete flag for this comic, ie. all comics are downloaded.""" """Set complete flag for this comic, ie. all comics are downloaded."""
if self.endOfLife: if self.endOfLife:
filename = self.getCompleteFile(basepath) filename = self.getCompleteFile(basepath)