Stricter style checking & related style fixes

This commit is contained in:
Tobias Gruetzmacher 2020-10-11 20:15:27 +02:00
parent e84bbe2667
commit e64635e86b
34 changed files with 104 additions and 90 deletions

View file

@ -2,5 +2,6 @@
# Copyright (C) 2020 Tobias Gruetzmacher
import os
def get_hook_dirs():
return [os.path.dirname(__file__)]

View file

@ -301,12 +301,12 @@ def do_list(column_list=True, verbose=False, listall=False):
def do_single_list(scrapers, verbose=False):
"""Get list of scraper names, one per line."""
disabled = {}
for num, scraperobj in enumerate(scrapers):
for scraperobj in scrapers:
if verbose:
display_comic_help(scraperobj)
else:
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
return num + 1, disabled
return len(scrapers) + 1, disabled
def do_column_list(scrapers):
@ -359,11 +359,10 @@ def main(args=None):
try:
options = setup_options().parse_args(args=args)
options.basepath = os.path.expanduser(options.basepath)
res = run(options)
return run(options)
except KeyboardInterrupt:
print("Aborted.")
res = 1
return 1
except Exception:
internal_error()
res = 2
return res
return 2

View file

@ -163,7 +163,7 @@ def getComics(options):
jobs.put(scraperobj)
# start threads
num_threads = min(options.parallel, jobs.qsize())
for i in range(num_threads):
for _i in range(num_threads):
t = ComicGetter(options, jobs)
threads.append(t)
t.start()

View file

@ -118,7 +118,7 @@ class RSSEventHandler(EventHandler):
title,
imageUrl,
description,
util.rfc822date(time.time())
util.rfc822date(time.time()),
)
if self.newfile:
@ -164,8 +164,7 @@ class HtmlEventHandler(EventHandler):
"""Get filename from date."""
fn = time.strftime('comics-%Y%m%d', date)
fn = os.path.join(self.basepath, 'html', fn + ".html")
fn = os.path.abspath(fn)
return fn
return os.path.abspath(fn)
def addNavLinks(self):
if self.yesterdayUrl:
@ -270,8 +269,7 @@ class JSONEventHandler(EventHandler):
def jsonFn(self, scraper):
"""Get filename for the JSON file for a comic."""
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
fn = os.path.abspath(fn)
return fn
return os.path.abspath(fn)
def getComicData(self, scraper):
"""Return dictionary with comic info."""

View file

@ -122,7 +122,7 @@ class AHClub(_WPNaviIn):
nav = {
'ah-club-2-cover': 'ah-club-1-page-24',
'ah-club-3-cover': 'ah-club-2-page-28',
'ah-club-4-cover': 'ah-club-3-page-22'
'ah-club-4-cover': 'ah-club-3-page-22',
}
def getPrevUrl(self, url, data):

View file

@ -8,7 +8,7 @@ from re import compile, escape
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPWebcomic
class BackOffice(_WPNavi):
@ -95,8 +95,7 @@ class Beetlebum(_BasicScraper):
def namer(self, image_url, page_url):
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
name = '%s-%s-%s-%s' % indexes
name = name + '_' + image_url.split('/')[-1]
return name
return name + '_' + image_url.split('/')[-1]
class Bethellium(_WPWebcomic):
@ -265,8 +264,8 @@ class Brink(_WordPressScraper):
class BroodHollow(_WordPressScraper):
url = 'http://broodhollow.chainsawsuit.com/'
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
url = 'https://broodhollow.chainsawsuit.com/'
firstStripUrl = url + 'page/2012/10/06/book-1-curious-little-thing'
def shouldSkipUrl(self, url, data):
return data.xpath('//div[@id="comic"]//iframe')
@ -297,7 +296,7 @@ class ButtercupFestival(_ParserScraper):
imageSearch = '//center/img'
prevSearch = (
'//a[img[contains(@src, "previous")]]', # 3-x
'//a[text()="previous"]' # 2-x
'//a[text()="previous"]', # 2-x
)

View file

@ -249,8 +249,8 @@ class CigarroAndCerveja(_ParserScraper):
url = 'http://www.cigarro.ca/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'reacquaintance'
imageSearch = '//div[@id="comic"]//img',
prevSearch = '//a[contains(text()," Prev")]',
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[contains(text()," Prev")]'
class ClanOfTheCats(_WordPressScraper):
@ -488,4 +488,4 @@ class CynWolf(_ParserScraper):
endOfLife = True
def shouldSkipUrl(self, url, data):
return '2016/the-end' in url # video
return '2016/the-end' in url # video

View file

@ -67,7 +67,7 @@ class ComicFury(_ParserScraper):
return "%s_%s%s" % (self.prefix, num, ext)
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# These were once in the list below, but fell out from the index...
cls('BadassologyByMichaelBay', 'strudelology'),

View file

@ -20,7 +20,7 @@ class ComicSherpa(_ParserScraper):
return self.url + '&uc_full_date=%s' % index
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# do not edit anything below since these entries are generated from
# scripts/comicsherpa.py

View file

@ -26,7 +26,7 @@ class ComicsKingdom(_BasicScraper):
return tourl
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# Some comics are not listed on the "all" page (too old?)

View file

@ -39,12 +39,13 @@ class TheCyantianChronicles(_WordPressScraper):
cls('Darius', 'dbook-01', '03102010', last='darius-end'),
cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'),
cls('GenoworksSaga', 'genoworks-saga', '07012004'),
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05', nav={'chapter-5': '02152005'}),
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05',
nav={'chapter-5': '02152005'}),
cls('Kiet', 'kiet', 'kiet-c01'),
cls('NoAngel', 'no-angel', '08112001', last='12142006'),
cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'),
cls('RandomRamblings', 'random-ramblings', 'darrik'),
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim')
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim'),
)
@ -85,5 +86,5 @@ class ShivaeComics(_WordPressScraper):
cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}),
cls('Pure', 'pure', '04082002', last='chapter-6-page-1'),
cls('SerinFairyHunter', 'serin', 'character-serin'),
cls('SivineBlades', 'sivine', '06302002', last='10242008')
cls('SivineBlades', 'sivine', '06302002', last='10242008'),
)

View file

@ -52,5 +52,5 @@ class Derideal(_ParserScraper):
cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'),
cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'),
cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'),
cls('TheVoid', 'the-void', 'the-void-cover')
cls('TheVoid', 'the-void', 'the-void-cover'),
)

View file

@ -38,5 +38,5 @@ class DMFA(_ParserScraper):
cls('Matilda', 'Ma_001', last='Ma_060'),
cls('PerfectDate', 'PD_01', last='PD_18'),
cls('TakePride', 'P_01', last='P_08'),
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast')
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast'),
)

View file

@ -120,10 +120,9 @@ class Erfworld(_ParserScraper):
def fetchUrls(self, url, data, urlSearch):
# Return the main logo for text-only pages
try:
imageUrls = super(Erfworld, self).fetchUrls(url, data, urlSearch)
return super().fetchUrls(url, data, urlSearch)
except ValueError:
imageUrls = super(Erfworld, self).fetchUrls(url, data, '//li[@class="erf-logo"]//img')
return imageUrls
return super().fetchUrls(url, data, '//li[@class="erf-logo"]//img')
def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames

View file

@ -34,7 +34,7 @@ class GoComics(_ParserScraper):
return data.xpath('//img[contains(@src, "content-error-missing")]')
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# old comics removed from the listing
cls('HeavenlyNostrils', 'heavenly-nostrils'),

View file

@ -26,9 +26,7 @@ class HagarTheHorrible(_BasicScraper):
url = 'http://www.hagardunor.net/comics.php'
data = self.getPage(url)
pattern = compile(tagre("a", "href", self.prevUrl))
for starturl in self.fetchUrls(url, data, pattern):
pass
return starturl
return self.fetchUrls(url, data, pattern)[-1]
# "Hiatus", navigation missing

View file

@ -54,7 +54,8 @@ class KeenSpot(_ParserScraper):
# Not on frontpage...
cls('Buzzboy', 'buzzboy'),
cls('EveryoneLovesAdis', 'adis'),
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'),
cls('GeneCatlowAlternate', 'genecatlow', last='20170302',
adult=True, path='altd/%s.html'),
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh

View file

@ -107,14 +107,13 @@ class LifeAsRendered(_ParserScraper):
'0500': '0501',
'0508': '0509',
'0558': '0559',
'0577': '05extra'
'0577': '05extra',
}
def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames
filename = imageUrl.rsplit('/', 1)[-1]
filename = filename.replace('ReN', 'N').replace('N01P', 'A02S')
return filename
return filename.replace('ReN', 'N').replace('N01P', 'A02S')
def fetchUrls(self, url, data, urlSearch):
# Fix missing image link
@ -167,7 +166,7 @@ class LittleTales(_ParserScraper):
starter = bounceStarter
nav = {
'517': '515',
'449': '447'
'449': '447',
}
def namer(self, imageUrl, pageUrl):
@ -190,8 +189,9 @@ class LoadingArtist(_ParserScraper):
class LoFiJinks(_WPNaviIn):
url = 'http://hijinksensue.com/comic/learning-to-love-again/'
firstStripUrl = 'http://hijinksensue.com/comic/lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
baseUrl = 'https://hijinksensue.com/comic/'
url = baseUrl + 'learning-to-love-again/'
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
endOfLife = True

View file

@ -42,7 +42,7 @@ class NamirDeiter(_ParserScraper):
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
cls('TheNDU', 'thendu.com'),
cls('WonderKittens', 'wonderkittens.com'),
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125')
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125'),
)

View file

@ -27,7 +27,7 @@ class Removed(Scraper):
return {'rem-' + self.reason: self.REASONS[self.reason]}
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# Removed in 2.16
cls('AbleAndBaker'),
@ -675,7 +675,7 @@ class Renamed(Scraper):
return {'ren-%i' % self.i: self.MSG % self.newname}
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# Renamed in 2.16
cls('1997', '1977'),

View file

@ -5,12 +5,12 @@ from ..scraper import _ParserScraper
class RHJunior(_ParserScraper):
stripUrl = 'http://www.rhjunior.com/%s/'
stripUrl = 'https://www.rhjunior.com/%s/'
imageSearch = '//div[contains(@class, "entry-content")]//img'
multipleImagesPerStrip = True
def __init__(self, name, sub, prev, first, last=None):
super(RHJunior, self).__init__('RHJunior/' + name)
super().__init__('RHJunior/' + name)
self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]')
self.url = self.stripUrl % ('comics/' + sub)
self.firstStripUrl = self.stripUrl % (sub + '-' + first)
@ -22,10 +22,16 @@ class RHJunior(_ParserScraper):
@classmethod
def getmodules(cls):
return (
cls('GoblinHollow', 'goblin-hollow', '', '0001', last='7'),
cls('NipAndTuck', 'nip-and-tuck', 'Nip and Tuck', '0000'),
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger', 'Quentyn Quinn, Space Ranger', '0001'),
cls('TalesOfTheQuestor', 'tales-of-the-questor', 'Tales of the Questor', 'cover'),
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript', '', '0001', last='0111'),
cls('TheProbabilityBomb', 'the-probability-bomb', 'the Probability Bomb', 'kickstarter')
cls('GoblinHollow', 'goblin-hollow',
'', '0001', last='7'),
cls('NipAndTuck', 'nip-and-tuck',
'Nip and Tuck', '0000'),
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger',
'Quentyn Quinn, Space Ranger', '0001'),
cls('TalesOfTheQuestor', 'tales-of-the-questor',
'Tales of the Questor', 'cover'),
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript',
'', '0001', last='0111'),
cls('TheProbabilityBomb', 'the-probability-bomb',
'the Probability Bomb', 'kickstarter'),
)

View file

@ -371,7 +371,7 @@ class SoloLeveling(_ParserScraper):
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.'
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
}
def imageUrlModifier(self, imageUrl, data):
@ -533,7 +533,7 @@ class SSDD(_ParserScraper):
self.stripUrl % '20050504',
self.stripUrl % '20040705',
self.stripUrl % '20030418',
self.stripUrl % '20030214'
self.stripUrl % '20030214',
)
@ -598,6 +598,7 @@ class StarfireAgency(_WPWebcomic):
self.currentChapter = self.currentChapter - 1
return filename
class StarTrip(_ComicControlScraper):
url = 'https://www.startripcomic.com/'
@ -684,11 +685,11 @@ class SurvivingTheWorld(_ParserScraper):
imageSearch = (
'//div[@class="img"]/img', # When there's one image per strip
'//div[@class="img"]/p/img', # When there's multiple images per strip
'//td/img' # Special case for Lesson1296.html
'//td/img', # Special case for Lesson1296.html
)
prevSearch = (
'//li[@class="previous"]/a',
'//td/a' # Special case for Lesson1296.html
'//td/a', # Special case for Lesson1296.html
)
multipleImagesPerStrip = True
help = 'Index format: name'

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2020 Tobias Gruetzmacher
from .common import _WordPressScraper
@ -9,16 +9,17 @@ class SandraAndWoo(_WordPressScraper):
prevSearch = '//a[@rel="prev"]'
def __init__(self, name, urlName, firstUrl, lang='en'):
super(SandraAndWoo, self).__init__(name)
super().__init__(name)
self.url = 'http://www.sandraandwoo.com/' + urlName
self.firstStripUrl = self.url + firstUrl
self.lang = lang
@classmethod
def getmodules(cls):
return [
return (
cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'),
cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'),
cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'),
cls('SandraAndWooGerman', 'woode/', '2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
]
cls('SandraAndWooGerman', 'woode/',
'2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
)

View file

@ -61,7 +61,7 @@ class SmackJeeves(_ParserScraper):
'titleNo': self._comicid,
'articleNo': url.rsplit('=', 1)[1],
'page': 1,
'order': 'new'
'order': 'new',
})
response.raise_for_status()
comments = response.json()['result']['list']
@ -70,10 +70,10 @@ class SmackJeeves(_ParserScraper):
return comment['commentText']
return None
else:
super(SmackJeeves, self).fetchText(url, data, textSearch, optional)
return super().fetchText(url, data, textSearch, optional)
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
cls('20TimesKirby', 91583),
cls('2Kingdoms', 112096, endOfLife=True),

View file

@ -44,7 +44,7 @@ class WebToons(_ParserScraper):
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
@classmethod
def getmodules(cls):
def getmodules(cls): # noqa: Allowed to be long
return (
# START AUTOUPDATE
cls('1000', 'action/one-thousand', 1217),

View file

@ -117,7 +117,7 @@ class Scraper(object):
"""Initialize internal variables."""
self.name = name
self.urls = set()
self._indexes = tuple()
self._indexes = ()
self.skippedUrls = set()
self.hitFirstStripUrl = False
@ -247,7 +247,7 @@ class Scraper(object):
def namer(self, image_url, page_url):
"""Return filename for given image and page URL."""
return None
return
def link_modifier(self, fromurl, tourl):
"""Optional modification of parsed link (previous/back/latest) URLs.
@ -342,19 +342,18 @@ class Scraper(object):
Return language of the comic as a human-readable language name instead
of a 2-character ISO639-1 code.
"""
lang = 'Unknown (%s)' % self.lang
if pycountry is None:
if self.lang in languages.Languages:
lang = languages.Languages[self.lang]
return languages.Languages[self.lang]
else:
try:
lang = pycountry.languages.get(alpha_2=self.lang).name
return pycountry.languages.get(alpha_2=self.lang).name
except KeyError:
try:
lang = pycountry.languages.get(alpha2=self.lang).name
return pycountry.languages.get(alpha2=self.lang).name
except KeyError:
pass
return lang
return 'Unknown (%s)' % self.lang
def geoblocked(self):
"""Helper method to indicate that the user is most probably geo-blocked."""
@ -467,8 +466,7 @@ class _ParserScraper(Scraper):
return tree
def _parse_page(self, data):
tree = lxml.html.document_fromstring(data)
return tree
return lxml.html.document_fromstring(data)
def fetchUrls(self, url, data, urlSearch):
"""Search all entries for given XPath in a HTML page."""

View file

@ -56,7 +56,7 @@ class SingleInstance(object):
try:
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
# raises IOError on Python << 3.3, else OSError
except (IOError, OSError):
except OSError:
self.exit(exit_code)
self.initialized = True

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2019 Tobias Gruetzmacher
# Copyright (C) 2015-2020 Tobias Gruetzmacher
import html
import os
import re

View file

@ -5,7 +5,7 @@ from lxml import etree
NS = {
'd': 'https://dosage.rocks/xpath',
're': 'http://exslt.org/regular-expressions'
're': 'http://exslt.org/regular-expressions',
}

View file

@ -196,5 +196,4 @@ def format_name(text):
"""Format a comic name."""
name = html.unescape(text)
name = "".join(capfirst(x) for x in name.split(" "))
name = asciify(name.translate(TRANS))
return name
return asciify(name.translate(TRANS))

View file

@ -4,6 +4,8 @@ description = a comic strip downloader and archiver
long_description = file: README.md
long_description_content_type = text/markdown
url = https://dosage.rocks
maintainer = Tobias Gruetzmacher
maintainer_email = tobias-dosage@23.gs
license = MIT License
license_file = COPYING
platforms = Any
@ -23,8 +25,6 @@ classifiers =
Topic :: Internet :: WWW/HTTP
Topic :: Multimedia :: Graphics
keywords = comic,webcomic,downloader,archiver,crawler
maintainer = Tobias Gruetzmacher
maintainer_email = tobias-dosage@23.gs
project_urls =
Code = https://github.com/webcomics/dosage
Issue tracker = https://github.com/webcomics/dosage/issues
@ -56,16 +56,26 @@ bash =
css =
cssselect
dev =
pytest-cov
pytest-xdist
responses
flake8
flake8-2020;python_version>'3.5'
flake8-breakpoint;python_version>'3.5'
flake8-bugbear
flake8-coding
flake8-commas
flake8-comprehensions
flake8-eradicate
flake8-fixme
flake8-functions
flake8-future-import
flake8-logging-format
flake8-no-fstring;python_version>'3.5'
flake8-pytest
flake8-pytest-style;python_version>'3.5'
flake8-strings;python_version>'3.5'
pytest-cov
pytest-xdist
responses
setup-cfg-fmt
[bdist_wheel]
universal = 1

View file

@ -26,7 +26,7 @@ def get_test_scrapers():
# complex _ParserScraper
'GoComics/CalvinAndHobbes',
# _WordPressScraper
'GrrlPower'
'GrrlPower',
]
scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')
@ -39,7 +39,7 @@ def get_test_scrapers():
def pytest_generate_tests(metafunc):
if 'scraperobj' in metafunc.fixturenames:
scrapers = get_test_scrapers()
scraperids = list(x.name for x in scrapers)
scraperids = [x.name for x in scrapers]
metafunc.parametrize('scraperobj', scrapers, ids=scraperids)

View file

@ -176,5 +176,5 @@ class TestDosage(object):
images = data['pages'][page]['images']
assert len(images) == 2
for imgurl, imgfile in images.items():
for imgfile in images.values():
assert directory.join(imgfile).check(file=1)

View file

@ -43,3 +43,6 @@ ignore = E127,E128,E241,FI12,FI14,FI15,FI50,FI51,FI53,FI54,FI55,W504
[pytest]
filterwarnings = default
junit_family = xunit2
[isort]