Stricter style checking & related style fixes

This commit is contained in:
Tobias Gruetzmacher 2020-10-11 20:15:27 +02:00
parent e84bbe2667
commit e64635e86b
34 changed files with 104 additions and 90 deletions

View file

@ -2,5 +2,6 @@
# Copyright (C) 2020 Tobias Gruetzmacher # Copyright (C) 2020 Tobias Gruetzmacher
import os import os
def get_hook_dirs(): def get_hook_dirs():
return [os.path.dirname(__file__)] return [os.path.dirname(__file__)]

View file

@ -301,12 +301,12 @@ def do_list(column_list=True, verbose=False, listall=False):
def do_single_list(scrapers, verbose=False): def do_single_list(scrapers, verbose=False):
"""Get list of scraper names, one per line.""" """Get list of scraper names, one per line."""
disabled = {} disabled = {}
for num, scraperobj in enumerate(scrapers): for scraperobj in scrapers:
if verbose: if verbose:
display_comic_help(scraperobj) display_comic_help(scraperobj)
else: else:
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled)) out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
return num + 1, disabled return len(scrapers) + 1, disabled
def do_column_list(scrapers): def do_column_list(scrapers):
@ -359,11 +359,10 @@ def main(args=None):
try: try:
options = setup_options().parse_args(args=args) options = setup_options().parse_args(args=args)
options.basepath = os.path.expanduser(options.basepath) options.basepath = os.path.expanduser(options.basepath)
res = run(options) return run(options)
except KeyboardInterrupt: except KeyboardInterrupt:
print("Aborted.") print("Aborted.")
res = 1 return 1
except Exception: except Exception:
internal_error() internal_error()
res = 2 return 2
return res

View file

@ -163,7 +163,7 @@ def getComics(options):
jobs.put(scraperobj) jobs.put(scraperobj)
# start threads # start threads
num_threads = min(options.parallel, jobs.qsize()) num_threads = min(options.parallel, jobs.qsize())
for i in range(num_threads): for _i in range(num_threads):
t = ComicGetter(options, jobs) t = ComicGetter(options, jobs)
threads.append(t) threads.append(t)
t.start() t.start()

View file

@ -118,7 +118,7 @@ class RSSEventHandler(EventHandler):
title, title,
imageUrl, imageUrl,
description, description,
util.rfc822date(time.time()) util.rfc822date(time.time()),
) )
if self.newfile: if self.newfile:
@ -164,8 +164,7 @@ class HtmlEventHandler(EventHandler):
"""Get filename from date.""" """Get filename from date."""
fn = time.strftime('comics-%Y%m%d', date) fn = time.strftime('comics-%Y%m%d', date)
fn = os.path.join(self.basepath, 'html', fn + ".html") fn = os.path.join(self.basepath, 'html', fn + ".html")
fn = os.path.abspath(fn) return os.path.abspath(fn)
return fn
def addNavLinks(self): def addNavLinks(self):
if self.yesterdayUrl: if self.yesterdayUrl:
@ -270,8 +269,7 @@ class JSONEventHandler(EventHandler):
def jsonFn(self, scraper): def jsonFn(self, scraper):
"""Get filename for the JSON file for a comic.""" """Get filename for the JSON file for a comic."""
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json') fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
fn = os.path.abspath(fn) return os.path.abspath(fn)
return fn
def getComicData(self, scraper): def getComicData(self, scraper):
"""Return dictionary with comic info.""" """Return dictionary with comic info."""

View file

@ -122,7 +122,7 @@ class AHClub(_WPNaviIn):
nav = { nav = {
'ah-club-2-cover': 'ah-club-1-page-24', 'ah-club-2-cover': 'ah-club-1-page-24',
'ah-club-3-cover': 'ah-club-2-page-28', 'ah-club-3-cover': 'ah-club-2-page-28',
'ah-club-4-cover': 'ah-club-3-page-22' 'ah-club-4-cover': 'ah-club-3-page-22',
} }
def getPrevUrl(self, url, data): def getPrevUrl(self, url, data):

View file

@ -8,7 +8,7 @@ from re import compile, escape
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPWebcomic
class BackOffice(_WPNavi): class BackOffice(_WPNavi):
@ -95,8 +95,7 @@ class Beetlebum(_BasicScraper):
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
indexes = tuple(page_url.rstrip('/').split('/')[-4:]) indexes = tuple(page_url.rstrip('/').split('/')[-4:])
name = '%s-%s-%s-%s' % indexes name = '%s-%s-%s-%s' % indexes
name = name + '_' + image_url.split('/')[-1] return name + '_' + image_url.split('/')[-1]
return name
class Bethellium(_WPWebcomic): class Bethellium(_WPWebcomic):
@ -265,8 +264,8 @@ class Brink(_WordPressScraper):
class BroodHollow(_WordPressScraper): class BroodHollow(_WordPressScraper):
url = 'http://broodhollow.chainsawsuit.com/' url = 'https://broodhollow.chainsawsuit.com/'
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing' firstStripUrl = url + 'page/2012/10/06/book-1-curious-little-thing'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
return data.xpath('//div[@id="comic"]//iframe') return data.xpath('//div[@id="comic"]//iframe')
@ -297,7 +296,7 @@ class ButtercupFestival(_ParserScraper):
imageSearch = '//center/img' imageSearch = '//center/img'
prevSearch = ( prevSearch = (
'//a[img[contains(@src, "previous")]]', # 3-x '//a[img[contains(@src, "previous")]]', # 3-x
'//a[text()="previous"]' # 2-x '//a[text()="previous"]', # 2-x
) )

View file

@ -249,8 +249,8 @@ class CigarroAndCerveja(_ParserScraper):
url = 'http://www.cigarro.ca/' url = 'http://www.cigarro.ca/'
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'reacquaintance' firstStripUrl = stripUrl % 'reacquaintance'
imageSearch = '//div[@id="comic"]//img', imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[contains(text()," Prev")]', prevSearch = '//a[contains(text()," Prev")]'
class ClanOfTheCats(_WordPressScraper): class ClanOfTheCats(_WordPressScraper):
@ -488,4 +488,4 @@ class CynWolf(_ParserScraper):
endOfLife = True endOfLife = True
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
return '2016/the-end' in url # video return '2016/the-end' in url # video

View file

@ -67,7 +67,7 @@ class ComicFury(_ParserScraper):
return "%s_%s%s" % (self.prefix, num, ext) return "%s_%s%s" % (self.prefix, num, ext)
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# These were once in the list below, but fell out from the index... # These were once in the list below, but fell out from the index...
cls('BadassologyByMichaelBay', 'strudelology'), cls('BadassologyByMichaelBay', 'strudelology'),

View file

@ -20,7 +20,7 @@ class ComicSherpa(_ParserScraper):
return self.url + '&uc_full_date=%s' % index return self.url + '&uc_full_date=%s' % index
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/comicsherpa.py # scripts/comicsherpa.py

View file

@ -26,7 +26,7 @@ class ComicsKingdom(_BasicScraper):
return tourl return tourl
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# Some comics are not listed on the "all" page (too old?) # Some comics are not listed on the "all" page (too old?)

View file

@ -39,12 +39,13 @@ class TheCyantianChronicles(_WordPressScraper):
cls('Darius', 'dbook-01', '03102010', last='darius-end'), cls('Darius', 'dbook-01', '03102010', last='darius-end'),
cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'), cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'),
cls('GenoworksSaga', 'genoworks-saga', '07012004'), cls('GenoworksSaga', 'genoworks-saga', '07012004'),
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05', nav={'chapter-5': '02152005'}), cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05',
nav={'chapter-5': '02152005'}),
cls('Kiet', 'kiet', 'kiet-c01'), cls('Kiet', 'kiet', 'kiet-c01'),
cls('NoAngel', 'no-angel', '08112001', last='12142006'), cls('NoAngel', 'no-angel', '08112001', last='12142006'),
cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'), cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'),
cls('RandomRamblings', 'random-ramblings', 'darrik'), cls('RandomRamblings', 'random-ramblings', 'darrik'),
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim') cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim'),
) )
@ -85,5 +86,5 @@ class ShivaeComics(_WordPressScraper):
cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}), cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}),
cls('Pure', 'pure', '04082002', last='chapter-6-page-1'), cls('Pure', 'pure', '04082002', last='chapter-6-page-1'),
cls('SerinFairyHunter', 'serin', 'character-serin'), cls('SerinFairyHunter', 'serin', 'character-serin'),
cls('SivineBlades', 'sivine', '06302002', last='10242008') cls('SivineBlades', 'sivine', '06302002', last='10242008'),
) )

View file

@ -52,5 +52,5 @@ class Derideal(_ParserScraper):
cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'), cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'),
cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'), cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'),
cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'), cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'),
cls('TheVoid', 'the-void', 'the-void-cover') cls('TheVoid', 'the-void', 'the-void-cover'),
) )

View file

@ -38,5 +38,5 @@ class DMFA(_ParserScraper):
cls('Matilda', 'Ma_001', last='Ma_060'), cls('Matilda', 'Ma_001', last='Ma_060'),
cls('PerfectDate', 'PD_01', last='PD_18'), cls('PerfectDate', 'PD_01', last='PD_18'),
cls('TakePride', 'P_01', last='P_08'), cls('TakePride', 'P_01', last='P_08'),
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast') cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast'),
) )

View file

@ -120,10 +120,9 @@ class Erfworld(_ParserScraper):
def fetchUrls(self, url, data, urlSearch): def fetchUrls(self, url, data, urlSearch):
# Return the main logo for text-only pages # Return the main logo for text-only pages
try: try:
imageUrls = super(Erfworld, self).fetchUrls(url, data, urlSearch) return super().fetchUrls(url, data, urlSearch)
except ValueError: except ValueError:
imageUrls = super(Erfworld, self).fetchUrls(url, data, '//li[@class="erf-logo"]//img') return super().fetchUrls(url, data, '//li[@class="erf-logo"]//img')
return imageUrls
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames # Fix inconsistent filenames

View file

@ -34,7 +34,7 @@ class GoComics(_ParserScraper):
return data.xpath('//img[contains(@src, "content-error-missing")]') return data.xpath('//img[contains(@src, "content-error-missing")]')
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# old comics removed from the listing # old comics removed from the listing
cls('HeavenlyNostrils', 'heavenly-nostrils'), cls('HeavenlyNostrils', 'heavenly-nostrils'),

View file

@ -26,9 +26,7 @@ class HagarTheHorrible(_BasicScraper):
url = 'http://www.hagardunor.net/comics.php' url = 'http://www.hagardunor.net/comics.php'
data = self.getPage(url) data = self.getPage(url)
pattern = compile(tagre("a", "href", self.prevUrl)) pattern = compile(tagre("a", "href", self.prevUrl))
for starturl in self.fetchUrls(url, data, pattern): return self.fetchUrls(url, data, pattern)[-1]
pass
return starturl
# "Hiatus", navigation missing # "Hiatus", navigation missing

View file

@ -54,7 +54,8 @@ class KeenSpot(_ParserScraper):
# Not on frontpage... # Not on frontpage...
cls('Buzzboy', 'buzzboy'), cls('Buzzboy', 'buzzboy'),
cls('EveryoneLovesAdis', 'adis'), cls('EveryoneLovesAdis', 'adis'),
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'), cls('GeneCatlowAlternate', 'genecatlow', last='20170302',
adult=True, path='altd/%s.html'),
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh

View file

@ -107,14 +107,13 @@ class LifeAsRendered(_ParserScraper):
'0500': '0501', '0500': '0501',
'0508': '0509', '0508': '0509',
'0558': '0559', '0558': '0559',
'0577': '05extra' '0577': '05extra',
} }
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames # Fix inconsistent filenames
filename = imageUrl.rsplit('/', 1)[-1] filename = imageUrl.rsplit('/', 1)[-1]
filename = filename.replace('ReN', 'N').replace('N01P', 'A02S') return filename.replace('ReN', 'N').replace('N01P', 'A02S')
return filename
def fetchUrls(self, url, data, urlSearch): def fetchUrls(self, url, data, urlSearch):
# Fix missing image link # Fix missing image link
@ -167,7 +166,7 @@ class LittleTales(_ParserScraper):
starter = bounceStarter starter = bounceStarter
nav = { nav = {
'517': '515', '517': '515',
'449': '447' '449': '447',
} }
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
@ -190,8 +189,9 @@ class LoadingArtist(_ParserScraper):
class LoFiJinks(_WPNaviIn): class LoFiJinks(_WPNaviIn):
url = 'http://hijinksensue.com/comic/learning-to-love-again/' baseUrl = 'https://hijinksensue.com/comic/'
firstStripUrl = 'http://hijinksensue.com/comic/lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/' url = baseUrl + 'learning-to-love-again/'
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
endOfLife = True endOfLife = True

View file

@ -42,7 +42,7 @@ class NamirDeiter(_ParserScraper):
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'), cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
cls('TheNDU', 'thendu.com'), cls('TheNDU', 'thendu.com'),
cls('WonderKittens', 'wonderkittens.com'), cls('WonderKittens', 'wonderkittens.com'),
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125') cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125'),
) )

View file

@ -27,7 +27,7 @@ class Removed(Scraper):
return {'rem-' + self.reason: self.REASONS[self.reason]} return {'rem-' + self.reason: self.REASONS[self.reason]}
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# Removed in 2.16 # Removed in 2.16
cls('AbleAndBaker'), cls('AbleAndBaker'),
@ -675,7 +675,7 @@ class Renamed(Scraper):
return {'ren-%i' % self.i: self.MSG % self.newname} return {'ren-%i' % self.i: self.MSG % self.newname}
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# Renamed in 2.16 # Renamed in 2.16
cls('1997', '1977'), cls('1997', '1977'),

View file

@ -5,12 +5,12 @@ from ..scraper import _ParserScraper
class RHJunior(_ParserScraper): class RHJunior(_ParserScraper):
stripUrl = 'http://www.rhjunior.com/%s/' stripUrl = 'https://www.rhjunior.com/%s/'
imageSearch = '//div[contains(@class, "entry-content")]//img' imageSearch = '//div[contains(@class, "entry-content")]//img'
multipleImagesPerStrip = True multipleImagesPerStrip = True
def __init__(self, name, sub, prev, first, last=None): def __init__(self, name, sub, prev, first, last=None):
super(RHJunior, self).__init__('RHJunior/' + name) super().__init__('RHJunior/' + name)
self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]') self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]')
self.url = self.stripUrl % ('comics/' + sub) self.url = self.stripUrl % ('comics/' + sub)
self.firstStripUrl = self.stripUrl % (sub + '-' + first) self.firstStripUrl = self.stripUrl % (sub + '-' + first)
@ -22,10 +22,16 @@ class RHJunior(_ParserScraper):
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):
return ( return (
cls('GoblinHollow', 'goblin-hollow', '', '0001', last='7'), cls('GoblinHollow', 'goblin-hollow',
cls('NipAndTuck', 'nip-and-tuck', 'Nip and Tuck', '0000'), '', '0001', last='7'),
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger', 'Quentyn Quinn, Space Ranger', '0001'), cls('NipAndTuck', 'nip-and-tuck',
cls('TalesOfTheQuestor', 'tales-of-the-questor', 'Tales of the Questor', 'cover'), 'Nip and Tuck', '0000'),
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript', '', '0001', last='0111'), cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger',
cls('TheProbabilityBomb', 'the-probability-bomb', 'the Probability Bomb', 'kickstarter') 'Quentyn Quinn, Space Ranger', '0001'),
cls('TalesOfTheQuestor', 'tales-of-the-questor',
'Tales of the Questor', 'cover'),
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript',
'', '0001', last='0111'),
cls('TheProbabilityBomb', 'the-probability-bomb',
'the Probability Bomb', 'kickstarter'),
) )

View file

@ -371,7 +371,7 @@ class SoloLeveling(_ParserScraper):
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.', '88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.', '88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.', '88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.' '87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
} }
def imageUrlModifier(self, imageUrl, data): def imageUrlModifier(self, imageUrl, data):
@ -533,7 +533,7 @@ class SSDD(_ParserScraper):
self.stripUrl % '20050504', self.stripUrl % '20050504',
self.stripUrl % '20040705', self.stripUrl % '20040705',
self.stripUrl % '20030418', self.stripUrl % '20030418',
self.stripUrl % '20030214' self.stripUrl % '20030214',
) )
@ -598,6 +598,7 @@ class StarfireAgency(_WPWebcomic):
self.currentChapter = self.currentChapter - 1 self.currentChapter = self.currentChapter - 1
return filename return filename
class StarTrip(_ComicControlScraper): class StarTrip(_ComicControlScraper):
url = 'https://www.startripcomic.com/' url = 'https://www.startripcomic.com/'
@ -684,11 +685,11 @@ class SurvivingTheWorld(_ParserScraper):
imageSearch = ( imageSearch = (
'//div[@class="img"]/img', # When there's one image per strip '//div[@class="img"]/img', # When there's one image per strip
'//div[@class="img"]/p/img', # When there's multiple images per strip '//div[@class="img"]/p/img', # When there's multiple images per strip
'//td/img' # Special case for Lesson1296.html '//td/img', # Special case for Lesson1296.html
) )
prevSearch = ( prevSearch = (
'//li[@class="previous"]/a', '//li[@class="previous"]/a',
'//td/a' # Special case for Lesson1296.html '//td/a', # Special case for Lesson1296.html
) )
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: name' help = 'Index format: name'

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher # Copyright (C) 2015-2020 Tobias Gruetzmacher
from .common import _WordPressScraper from .common import _WordPressScraper
@ -9,16 +9,17 @@ class SandraAndWoo(_WordPressScraper):
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
def __init__(self, name, urlName, firstUrl, lang='en'): def __init__(self, name, urlName, firstUrl, lang='en'):
super(SandraAndWoo, self).__init__(name) super().__init__(name)
self.url = 'http://www.sandraandwoo.com/' + urlName self.url = 'http://www.sandraandwoo.com/' + urlName
self.firstStripUrl = self.url + firstUrl self.firstStripUrl = self.url + firstUrl
self.lang = lang self.lang = lang
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):
return [ return (
cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'), cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'),
cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'), cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'),
cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'), cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'),
cls('SandraAndWooGerman', 'woode/', '2008/10/19/ein-ausgefuchster-waschbar/', lang='de'), cls('SandraAndWooGerman', 'woode/',
] '2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
)

View file

@ -61,7 +61,7 @@ class SmackJeeves(_ParserScraper):
'titleNo': self._comicid, 'titleNo': self._comicid,
'articleNo': url.rsplit('=', 1)[1], 'articleNo': url.rsplit('=', 1)[1],
'page': 1, 'page': 1,
'order': 'new' 'order': 'new',
}) })
response.raise_for_status() response.raise_for_status()
comments = response.json()['result']['list'] comments = response.json()['result']['list']
@ -70,10 +70,10 @@ class SmackJeeves(_ParserScraper):
return comment['commentText'] return comment['commentText']
return None return None
else: else:
super(SmackJeeves, self).fetchText(url, data, textSearch, optional) return super().fetchText(url, data, textSearch, optional)
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
cls('20TimesKirby', 91583), cls('20TimesKirby', 91583),
cls('2Kingdoms', 112096, endOfLife=True), cls('2Kingdoms', 112096, endOfLife=True),

View file

@ -44,7 +44,7 @@ class WebToons(_ParserScraper):
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt) return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
@classmethod @classmethod
def getmodules(cls): def getmodules(cls): # noqa: Allowed to be long
return ( return (
# START AUTOUPDATE # START AUTOUPDATE
cls('1000', 'action/one-thousand', 1217), cls('1000', 'action/one-thousand', 1217),

View file

@ -117,7 +117,7 @@ class Scraper(object):
"""Initialize internal variables.""" """Initialize internal variables."""
self.name = name self.name = name
self.urls = set() self.urls = set()
self._indexes = tuple() self._indexes = ()
self.skippedUrls = set() self.skippedUrls = set()
self.hitFirstStripUrl = False self.hitFirstStripUrl = False
@ -247,7 +247,7 @@ class Scraper(object):
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
"""Return filename for given image and page URL.""" """Return filename for given image and page URL."""
return None return
def link_modifier(self, fromurl, tourl): def link_modifier(self, fromurl, tourl):
"""Optional modification of parsed link (previous/back/latest) URLs. """Optional modification of parsed link (previous/back/latest) URLs.
@ -342,19 +342,18 @@ class Scraper(object):
Return language of the comic as a human-readable language name instead Return language of the comic as a human-readable language name instead
of a 2-character ISO639-1 code. of a 2-character ISO639-1 code.
""" """
lang = 'Unknown (%s)' % self.lang
if pycountry is None: if pycountry is None:
if self.lang in languages.Languages: if self.lang in languages.Languages:
lang = languages.Languages[self.lang] return languages.Languages[self.lang]
else: else:
try: try:
lang = pycountry.languages.get(alpha_2=self.lang).name return pycountry.languages.get(alpha_2=self.lang).name
except KeyError: except KeyError:
try: try:
lang = pycountry.languages.get(alpha2=self.lang).name return pycountry.languages.get(alpha2=self.lang).name
except KeyError: except KeyError:
pass pass
return lang return 'Unknown (%s)' % self.lang
def geoblocked(self): def geoblocked(self):
"""Helper method to indicate that the user is most probably geo-blocked.""" """Helper method to indicate that the user is most probably geo-blocked."""
@ -467,8 +466,7 @@ class _ParserScraper(Scraper):
return tree return tree
def _parse_page(self, data): def _parse_page(self, data):
tree = lxml.html.document_fromstring(data) return lxml.html.document_fromstring(data)
return tree
def fetchUrls(self, url, data, urlSearch): def fetchUrls(self, url, data, urlSearch):
"""Search all entries for given XPath in a HTML page.""" """Search all entries for given XPath in a HTML page."""

View file

@ -56,7 +56,7 @@ class SingleInstance(object):
try: try:
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB) fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
# raises IOError on Python << 3.3, else OSError # raises IOError on Python << 3.3, else OSError
except (IOError, OSError): except OSError:
self.exit(exit_code) self.exit(exit_code)
self.initialized = True self.initialized = True

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2019 Tobias Gruetzmacher # Copyright (C) 2015-2020 Tobias Gruetzmacher
import html import html
import os import os
import re import re

View file

@ -5,7 +5,7 @@ from lxml import etree
NS = { NS = {
'd': 'https://dosage.rocks/xpath', 'd': 'https://dosage.rocks/xpath',
're': 'http://exslt.org/regular-expressions' 're': 'http://exslt.org/regular-expressions',
} }

View file

@ -196,5 +196,4 @@ def format_name(text):
"""Format a comic name.""" """Format a comic name."""
name = html.unescape(text) name = html.unescape(text)
name = "".join(capfirst(x) for x in name.split(" ")) name = "".join(capfirst(x) for x in name.split(" "))
name = asciify(name.translate(TRANS)) return asciify(name.translate(TRANS))
return name

View file

@ -4,6 +4,8 @@ description = a comic strip downloader and archiver
long_description = file: README.md long_description = file: README.md
long_description_content_type = text/markdown long_description_content_type = text/markdown
url = https://dosage.rocks url = https://dosage.rocks
maintainer = Tobias Gruetzmacher
maintainer_email = tobias-dosage@23.gs
license = MIT License license = MIT License
license_file = COPYING license_file = COPYING
platforms = Any platforms = Any
@ -23,8 +25,6 @@ classifiers =
Topic :: Internet :: WWW/HTTP Topic :: Internet :: WWW/HTTP
Topic :: Multimedia :: Graphics Topic :: Multimedia :: Graphics
keywords = comic,webcomic,downloader,archiver,crawler keywords = comic,webcomic,downloader,archiver,crawler
maintainer = Tobias Gruetzmacher
maintainer_email = tobias-dosage@23.gs
project_urls = project_urls =
Code = https://github.com/webcomics/dosage Code = https://github.com/webcomics/dosage
Issue tracker = https://github.com/webcomics/dosage/issues Issue tracker = https://github.com/webcomics/dosage/issues
@ -56,16 +56,26 @@ bash =
css = css =
cssselect cssselect
dev = dev =
pytest-cov
pytest-xdist
responses
flake8 flake8
flake8-2020;python_version>'3.5' flake8-2020;python_version>'3.5'
flake8-breakpoint;python_version>'3.5'
flake8-bugbear
flake8-coding flake8-coding
flake8-commas
flake8-comprehensions
flake8-eradicate
flake8-fixme
flake8-functions
flake8-future-import flake8-future-import
flake8-logging-format
flake8-no-fstring;python_version>'3.5' flake8-no-fstring;python_version>'3.5'
flake8-pytest flake8-pytest
flake8-pytest-style;python_version>'3.5' flake8-pytest-style;python_version>'3.5'
flake8-strings;python_version>'3.5'
pytest-cov
pytest-xdist
responses
setup-cfg-fmt
[bdist_wheel] [bdist_wheel]
universal = 1 universal = 1

View file

@ -26,7 +26,7 @@ def get_test_scrapers():
# complex _ParserScraper # complex _ParserScraper
'GoComics/CalvinAndHobbes', 'GoComics/CalvinAndHobbes',
# _WordPressScraper # _WordPressScraper
'GrrlPower' 'GrrlPower',
] ]
scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$') scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')
@ -39,7 +39,7 @@ def get_test_scrapers():
def pytest_generate_tests(metafunc): def pytest_generate_tests(metafunc):
if 'scraperobj' in metafunc.fixturenames: if 'scraperobj' in metafunc.fixturenames:
scrapers = get_test_scrapers() scrapers = get_test_scrapers()
scraperids = list(x.name for x in scrapers) scraperids = [x.name for x in scrapers]
metafunc.parametrize('scraperobj', scrapers, ids=scraperids) metafunc.parametrize('scraperobj', scrapers, ids=scraperids)

View file

@ -176,5 +176,5 @@ class TestDosage(object):
images = data['pages'][page]['images'] images = data['pages'][page]['images']
assert len(images) == 2 assert len(images) == 2
for imgurl, imgfile in images.items(): for imgfile in images.values():
assert directory.join(imgfile).check(file=1) assert directory.join(imgfile).check(file=1)

View file

@ -43,3 +43,6 @@ ignore = E127,E128,E241,FI12,FI14,FI15,FI50,FI51,FI53,FI54,FI55,W504
[pytest] [pytest]
filterwarnings = default filterwarnings = default
junit_family = xunit2 junit_family = xunit2
[isort]