Add some more type annotations

2024-02-18 16:53:17 +01:00 · 2024-02-18 16:53:17 +01:00 · 7b9ca867fb
commit 7b9ca867fb
parent ee22169cc5
2 changed files with 42 additions and 39 deletions
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@ -1,12 +1,15 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+from __future__ import annotations
+
 import os
 import glob
 import codecs
 import contextlib
 from datetime import datetime
+from typing import Iterator

 from .output import out
 from .util import unquote, getFilename, urlopen, strsize
@ -14,27 +17,27 @@ from .events import getHandler


 # Maximum content size for images
-MaxImageBytes = 1024 * 1024 * 20  # 20 MB
+MAX_IMAGE_BYTES = 1024 * 1024 * 20  # 20 MB
 # RFC 1123 format, as preferred by RFC 2616
 RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"


-class ComicStrip(object):
+class ComicStrip:
    """A list of comic image URLs."""

-    def __init__(self, scraper, strip_url, image_urls, text=None):
+    def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None:
        """Store the image URL list."""
        self.scraper = scraper
        self.strip_url = strip_url
        self.image_urls = image_urls
        self.text = text

-    def getImages(self):
+    def getImages(self) -> Iterator[ComicImage]:
        """Get a list of image downloaders."""
        for image_url in self.image_urls:
            yield self.getDownloader(image_url)

-    def getDownloader(self, url):
+    def getDownloader(self, url: str) -> ComicImage:
        """Get an image downloader."""
        filename = self.scraper.namer(url, self.strip_url)
        if filename is None:
@ -43,7 +46,7 @@ class ComicStrip(object):
                          text=self.text)


-class ComicImage(object):
+class ComicImage:
    """A comic image downloader."""

    ChunkBytes = 1024 * 100  # 100KB
@ -64,7 +67,7 @@ class ComicImage(object):
            headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
        self.urlobj = urlopen(self.url, self.scraper.session,
                              referrer=self.referrer,
-                              max_content_bytes=MaxImageBytes, stream=True,
+                              max_content_bytes=MAX_IMAGE_BYTES, stream=True,
                              headers=headers)
        if self.urlobj.status_code == 304:  # Not modified
            return
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -119,45 +119,45 @@ class Scraper:
        if val:
            self._indexes = tuple(sorted(val))

-    def __init__(self, name):
+    def __init__(self, name: str) -> None:
        """Initialize internal variables."""
        self.name = name
-        self.urls = set()
+        self.urls: set[str] = set()
        self._indexes = ()
-        self.skippedUrls = set()
+        self.skippedUrls: set[str] = set()
        self.hitFirstStripUrl = False

-    def __hash__(self):
+    def __hash__(self) -> int:
        """Get hash value from name and index list."""
        return hash((self.name, self.indexes))

-    def shouldSkipUrl(self, url, data):
+    def shouldSkipUrl(self, url: str, data) -> bool:
        """Determine if search for images in given URL should be skipped."""
        return False

-    def getComicStrip(self, url, data):
+    def getComicStrip(self, url, data) -> ComicStrip:
        """Get comic strip downloader for given URL and data."""
-        imageUrls = self.extract_image_urls(url, data)
+        urls = self.extract_image_urls(url, data)
        # map modifier function on image URLs
-        imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
+        urls = [self.imageUrlModifier(x, data) for x in urls]
        # remove duplicate URLs
-        imageUrls = uniq(imageUrls)
-        if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
+        urls = uniq(urls)
+        if len(urls) > 1 and not self.multipleImagesPerStrip:
            out.warn(
                u"Found %d images instead of 1 at %s with expressions %s" %
-                (len(imageUrls), url, prettyMatcherList(self.imageSearch)))
-            image = imageUrls[0]
-            out.warn(u"Choosing image %s" % image)
-            imageUrls = (image,)
-        elif not imageUrls:
-            out.warn(u"Found no images at %s with expressions %s" % (url,
+                (len(urls), url, prettyMatcherList(self.imageSearch)))
+            image = urls[0]
+            out.warn("Choosing image %s" % image)
+            urls = (image,)
+        elif not urls:
+            out.warn("Found no images at %s with expressions %s" % (url,
                     prettyMatcherList(self.imageSearch)))
        if self.textSearch:
            text = self.fetchText(url, data, self.textSearch,
                                  optional=self.textOptional)
        else:
            text = None
-        return ComicStrip(self, url, imageUrls, text=text)
+        return ComicStrip(self, url, urls, text=text)

    def getStrips(self, maxstrips=None):
        """Get comic strips."""
@ -217,7 +217,7 @@ class Scraper:
                break
            url = prevUrl

-    def isfirststrip(self, url):
+    def isfirststrip(self, url: str) -> bool:
        """Check if the specified URL is the first strip of a comic. This is
        specially for comics taken from archive.org, since the base URL of
        archive.org changes whenever pages are taken from a different
@ -228,7 +228,7 @@ class Scraper:
        currenturl = ARCHIVE_ORG_URL.sub('', url)
        return firsturl == currenturl

-    def getPrevUrl(self, url, data):
+    def getPrevUrl(self, url: str, data) -> str | None:
        """Find previous URL."""
        prevUrl = None
        if self.prevSearch:
@ -243,40 +243,40 @@ class Scraper:
                getHandler().comicPageLink(self, url, prevUrl)
        return prevUrl

-    def getIndexStripUrl(self, index):
+    def getIndexStripUrl(self, index: str) -> str:
        """Get comic strip URL from index."""
        return self.stripUrl % index

-    def starter(self):
+    def starter(self) -> str:
        """Get starter URL from where to scrape comic strips."""
        return self.url

-    def namer(self, image_url, page_url):
+    def namer(self, image_url: str, page_url: str) -> str | None:
        """Return filename for given image and page URL."""
        return

-    def link_modifier(self, fromurl, tourl):
+    def link_modifier(self, fromurl: str, tourl: str) -> str:
        """Optional modification of parsed link (previous/back/latest) URLs.
        Useful if there are domain redirects. The default implementation does
        not modify the URL.
        """
        return tourl

-    def imageUrlModifier(self, image_url, data):
+    def imageUrlModifier(self, image_url: str, data) -> str:
        """Optional modification of parsed image URLs. Useful if the URL
        needs to be fixed before usage. The default implementation does
        not modify the URL. The given data is the URL page data.
        """
        return image_url

-    def vote(self):
+    def vote(self) -> None:
        """Cast a public vote for this comic."""
        uid = get_system_uid()
        data = {"name": self.name.replace('/', '_'), "uid": uid}
        response = self.session.post(configuration.VoteUrl, data=data)
        response.raise_for_status()

-    def get_download_dir(self, basepath):
+    def get_download_dir(self, basepath: str) -> str:
        """Try to find the corect download directory, ignoring case
        differences."""
        path = basepath
@ -294,16 +294,16 @@ class Scraper:
                path = os.path.join(path, part)
        return path

-    def getCompleteFile(self, basepath):
+    def getCompleteFile(self, basepath: str) -> str:
        """Get filename indicating all comics are downloaded."""
        dirname = self.get_download_dir(basepath)
        return os.path.join(dirname, "complete.txt")

-    def isComplete(self, basepath):
+    def isComplete(self, basepath: str) -> bool:
        """Check if all comics are downloaded."""
        return os.path.isfile(self.getCompleteFile(basepath))

-    def setComplete(self, basepath):
+    def setComplete(self, basepath: str) -> None:
        """Set complete flag for this comic, ie. all comics are downloaded."""
        if self.endOfLife:
            filename = self.getCompleteFile(basepath)