Stricter style checking & related style fixes
This commit is contained in:
parent
e84bbe2667
commit
e64635e86b
34 changed files with 104 additions and 90 deletions
|
@ -2,5 +2,6 @@
|
|||
# Copyright (C) 2020 Tobias Gruetzmacher
|
||||
import os
|
||||
|
||||
|
||||
def get_hook_dirs():
|
||||
return [os.path.dirname(__file__)]
|
||||
|
|
|
@ -301,12 +301,12 @@ def do_list(column_list=True, verbose=False, listall=False):
|
|||
def do_single_list(scrapers, verbose=False):
|
||||
"""Get list of scraper names, one per line."""
|
||||
disabled = {}
|
||||
for num, scraperobj in enumerate(scrapers):
|
||||
for scraperobj in scrapers:
|
||||
if verbose:
|
||||
display_comic_help(scraperobj)
|
||||
else:
|
||||
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
|
||||
return num + 1, disabled
|
||||
return len(scrapers) + 1, disabled
|
||||
|
||||
|
||||
def do_column_list(scrapers):
|
||||
|
@ -359,11 +359,10 @@ def main(args=None):
|
|||
try:
|
||||
options = setup_options().parse_args(args=args)
|
||||
options.basepath = os.path.expanduser(options.basepath)
|
||||
res = run(options)
|
||||
return run(options)
|
||||
except KeyboardInterrupt:
|
||||
print("Aborted.")
|
||||
res = 1
|
||||
return 1
|
||||
except Exception:
|
||||
internal_error()
|
||||
res = 2
|
||||
return res
|
||||
return 2
|
||||
|
|
|
@ -163,7 +163,7 @@ def getComics(options):
|
|||
jobs.put(scraperobj)
|
||||
# start threads
|
||||
num_threads = min(options.parallel, jobs.qsize())
|
||||
for i in range(num_threads):
|
||||
for _i in range(num_threads):
|
||||
t = ComicGetter(options, jobs)
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
|
|
@ -118,7 +118,7 @@ class RSSEventHandler(EventHandler):
|
|||
title,
|
||||
imageUrl,
|
||||
description,
|
||||
util.rfc822date(time.time())
|
||||
util.rfc822date(time.time()),
|
||||
)
|
||||
|
||||
if self.newfile:
|
||||
|
@ -164,8 +164,7 @@ class HtmlEventHandler(EventHandler):
|
|||
"""Get filename from date."""
|
||||
fn = time.strftime('comics-%Y%m%d', date)
|
||||
fn = os.path.join(self.basepath, 'html', fn + ".html")
|
||||
fn = os.path.abspath(fn)
|
||||
return fn
|
||||
return os.path.abspath(fn)
|
||||
|
||||
def addNavLinks(self):
|
||||
if self.yesterdayUrl:
|
||||
|
@ -270,8 +269,7 @@ class JSONEventHandler(EventHandler):
|
|||
def jsonFn(self, scraper):
|
||||
"""Get filename for the JSON file for a comic."""
|
||||
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
|
||||
fn = os.path.abspath(fn)
|
||||
return fn
|
||||
return os.path.abspath(fn)
|
||||
|
||||
def getComicData(self, scraper):
|
||||
"""Return dictionary with comic info."""
|
||||
|
|
|
@ -122,7 +122,7 @@ class AHClub(_WPNaviIn):
|
|||
nav = {
|
||||
'ah-club-2-cover': 'ah-club-1-page-24',
|
||||
'ah-club-3-cover': 'ah-club-2-page-28',
|
||||
'ah-club-4-cover': 'ah-club-3-page-22'
|
||||
'ah-club-4-cover': 'ah-club-3-page-22',
|
||||
}
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
|
|
|
@ -8,7 +8,7 @@ from re import compile, escape
|
|||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic
|
||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPWebcomic
|
||||
|
||||
|
||||
class BackOffice(_WPNavi):
|
||||
|
@ -95,8 +95,7 @@ class Beetlebum(_BasicScraper):
|
|||
def namer(self, image_url, page_url):
|
||||
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
|
||||
name = '%s-%s-%s-%s' % indexes
|
||||
name = name + '_' + image_url.split('/')[-1]
|
||||
return name
|
||||
return name + '_' + image_url.split('/')[-1]
|
||||
|
||||
|
||||
class Bethellium(_WPWebcomic):
|
||||
|
@ -265,8 +264,8 @@ class Brink(_WordPressScraper):
|
|||
|
||||
|
||||
class BroodHollow(_WordPressScraper):
|
||||
url = 'http://broodhollow.chainsawsuit.com/'
|
||||
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
|
||||
url = 'https://broodhollow.chainsawsuit.com/'
|
||||
firstStripUrl = url + 'page/2012/10/06/book-1-curious-little-thing'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return data.xpath('//div[@id="comic"]//iframe')
|
||||
|
@ -297,7 +296,7 @@ class ButtercupFestival(_ParserScraper):
|
|||
imageSearch = '//center/img'
|
||||
prevSearch = (
|
||||
'//a[img[contains(@src, "previous")]]', # 3-x
|
||||
'//a[text()="previous"]' # 2-x
|
||||
'//a[text()="previous"]', # 2-x
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -249,8 +249,8 @@ class CigarroAndCerveja(_ParserScraper):
|
|||
url = 'http://www.cigarro.ca/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'reacquaintance'
|
||||
imageSearch = '//div[@id="comic"]//img',
|
||||
prevSearch = '//a[contains(text()," Prev")]',
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[contains(text()," Prev")]'
|
||||
|
||||
|
||||
class ClanOfTheCats(_WordPressScraper):
|
||||
|
@ -488,4 +488,4 @@ class CynWolf(_ParserScraper):
|
|||
endOfLife = True
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return '2016/the-end' in url # video
|
||||
return '2016/the-end' in url # video
|
||||
|
|
|
@ -67,7 +67,7 @@ class ComicFury(_ParserScraper):
|
|||
return "%s_%s%s" % (self.prefix, num, ext)
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# These were once in the list below, but fell out from the index...
|
||||
cls('BadassologyByMichaelBay', 'strudelology'),
|
||||
|
|
|
@ -20,7 +20,7 @@ class ComicSherpa(_ParserScraper):
|
|||
return self.url + '&uc_full_date=%s' % index
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/comicsherpa.py
|
||||
|
|
|
@ -26,7 +26,7 @@ class ComicsKingdom(_BasicScraper):
|
|||
return tourl
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# Some comics are not listed on the "all" page (too old?)
|
||||
|
||||
|
|
|
@ -39,12 +39,13 @@ class TheCyantianChronicles(_WordPressScraper):
|
|||
cls('Darius', 'dbook-01', '03102010', last='darius-end'),
|
||||
cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'),
|
||||
cls('GenoworksSaga', 'genoworks-saga', '07012004'),
|
||||
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05', nav={'chapter-5': '02152005'}),
|
||||
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05',
|
||||
nav={'chapter-5': '02152005'}),
|
||||
cls('Kiet', 'kiet', 'kiet-c01'),
|
||||
cls('NoAngel', 'no-angel', '08112001', last='12142006'),
|
||||
cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'),
|
||||
cls('RandomRamblings', 'random-ramblings', 'darrik'),
|
||||
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim')
|
||||
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim'),
|
||||
)
|
||||
|
||||
|
||||
|
@ -85,5 +86,5 @@ class ShivaeComics(_WordPressScraper):
|
|||
cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}),
|
||||
cls('Pure', 'pure', '04082002', last='chapter-6-page-1'),
|
||||
cls('SerinFairyHunter', 'serin', 'character-serin'),
|
||||
cls('SivineBlades', 'sivine', '06302002', last='10242008')
|
||||
cls('SivineBlades', 'sivine', '06302002', last='10242008'),
|
||||
)
|
||||
|
|
|
@ -52,5 +52,5 @@ class Derideal(_ParserScraper):
|
|||
cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'),
|
||||
cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'),
|
||||
cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'),
|
||||
cls('TheVoid', 'the-void', 'the-void-cover')
|
||||
cls('TheVoid', 'the-void', 'the-void-cover'),
|
||||
)
|
||||
|
|
|
@ -38,5 +38,5 @@ class DMFA(_ParserScraper):
|
|||
cls('Matilda', 'Ma_001', last='Ma_060'),
|
||||
cls('PerfectDate', 'PD_01', last='PD_18'),
|
||||
cls('TakePride', 'P_01', last='P_08'),
|
||||
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast')
|
||||
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast'),
|
||||
)
|
||||
|
|
|
@ -120,10 +120,9 @@ class Erfworld(_ParserScraper):
|
|||
def fetchUrls(self, url, data, urlSearch):
|
||||
# Return the main logo for text-only pages
|
||||
try:
|
||||
imageUrls = super(Erfworld, self).fetchUrls(url, data, urlSearch)
|
||||
return super().fetchUrls(url, data, urlSearch)
|
||||
except ValueError:
|
||||
imageUrls = super(Erfworld, self).fetchUrls(url, data, '//li[@class="erf-logo"]//img')
|
||||
return imageUrls
|
||||
return super().fetchUrls(url, data, '//li[@class="erf-logo"]//img')
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Fix inconsistent filenames
|
||||
|
|
|
@ -34,7 +34,7 @@ class GoComics(_ParserScraper):
|
|||
return data.xpath('//img[contains(@src, "content-error-missing")]')
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# old comics removed from the listing
|
||||
cls('HeavenlyNostrils', 'heavenly-nostrils'),
|
||||
|
|
|
@ -26,9 +26,7 @@ class HagarTheHorrible(_BasicScraper):
|
|||
url = 'http://www.hagardunor.net/comics.php'
|
||||
data = self.getPage(url)
|
||||
pattern = compile(tagre("a", "href", self.prevUrl))
|
||||
for starturl in self.fetchUrls(url, data, pattern):
|
||||
pass
|
||||
return starturl
|
||||
return self.fetchUrls(url, data, pattern)[-1]
|
||||
|
||||
|
||||
# "Hiatus", navigation missing
|
||||
|
|
|
@ -54,7 +54,8 @@ class KeenSpot(_ParserScraper):
|
|||
# Not on frontpage...
|
||||
cls('Buzzboy', 'buzzboy'),
|
||||
cls('EveryoneLovesAdis', 'adis'),
|
||||
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'),
|
||||
cls('GeneCatlowAlternate', 'genecatlow', last='20170302',
|
||||
adult=True, path='altd/%s.html'),
|
||||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
|
|
|
@ -107,14 +107,13 @@ class LifeAsRendered(_ParserScraper):
|
|||
'0500': '0501',
|
||||
'0508': '0509',
|
||||
'0558': '0559',
|
||||
'0577': '05extra'
|
||||
'0577': '05extra',
|
||||
}
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Fix inconsistent filenames
|
||||
filename = imageUrl.rsplit('/', 1)[-1]
|
||||
filename = filename.replace('ReN', 'N').replace('N01P', 'A02S')
|
||||
return filename
|
||||
return filename.replace('ReN', 'N').replace('N01P', 'A02S')
|
||||
|
||||
def fetchUrls(self, url, data, urlSearch):
|
||||
# Fix missing image link
|
||||
|
@ -167,7 +166,7 @@ class LittleTales(_ParserScraper):
|
|||
starter = bounceStarter
|
||||
nav = {
|
||||
'517': '515',
|
||||
'449': '447'
|
||||
'449': '447',
|
||||
}
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
|
@ -190,8 +189,9 @@ class LoadingArtist(_ParserScraper):
|
|||
|
||||
|
||||
class LoFiJinks(_WPNaviIn):
|
||||
url = 'http://hijinksensue.com/comic/learning-to-love-again/'
|
||||
firstStripUrl = 'http://hijinksensue.com/comic/lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
||||
baseUrl = 'https://hijinksensue.com/comic/'
|
||||
url = baseUrl + 'learning-to-love-again/'
|
||||
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class NamirDeiter(_ParserScraper):
|
|||
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
|
||||
cls('TheNDU', 'thendu.com'),
|
||||
cls('WonderKittens', 'wonderkittens.com'),
|
||||
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125')
|
||||
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125'),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ class Removed(Scraper):
|
|||
return {'rem-' + self.reason: self.REASONS[self.reason]}
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# Removed in 2.16
|
||||
cls('AbleAndBaker'),
|
||||
|
@ -675,7 +675,7 @@ class Renamed(Scraper):
|
|||
return {'ren-%i' % self.i: self.MSG % self.newname}
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# Renamed in 2.16
|
||||
cls('1997', '1977'),
|
||||
|
|
|
@ -5,12 +5,12 @@ from ..scraper import _ParserScraper
|
|||
|
||||
|
||||
class RHJunior(_ParserScraper):
|
||||
stripUrl = 'http://www.rhjunior.com/%s/'
|
||||
stripUrl = 'https://www.rhjunior.com/%s/'
|
||||
imageSearch = '//div[contains(@class, "entry-content")]//img'
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
def __init__(self, name, sub, prev, first, last=None):
|
||||
super(RHJunior, self).__init__('RHJunior/' + name)
|
||||
super().__init__('RHJunior/' + name)
|
||||
self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]')
|
||||
self.url = self.stripUrl % ('comics/' + sub)
|
||||
self.firstStripUrl = self.stripUrl % (sub + '-' + first)
|
||||
|
@ -22,10 +22,16 @@ class RHJunior(_ParserScraper):
|
|||
@classmethod
|
||||
def getmodules(cls):
|
||||
return (
|
||||
cls('GoblinHollow', 'goblin-hollow', '', '0001', last='7'),
|
||||
cls('NipAndTuck', 'nip-and-tuck', 'Nip and Tuck', '0000'),
|
||||
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger', 'Quentyn Quinn, Space Ranger', '0001'),
|
||||
cls('TalesOfTheQuestor', 'tales-of-the-questor', 'Tales of the Questor', 'cover'),
|
||||
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript', '', '0001', last='0111'),
|
||||
cls('TheProbabilityBomb', 'the-probability-bomb', 'the Probability Bomb', 'kickstarter')
|
||||
cls('GoblinHollow', 'goblin-hollow',
|
||||
'', '0001', last='7'),
|
||||
cls('NipAndTuck', 'nip-and-tuck',
|
||||
'Nip and Tuck', '0000'),
|
||||
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger',
|
||||
'Quentyn Quinn, Space Ranger', '0001'),
|
||||
cls('TalesOfTheQuestor', 'tales-of-the-questor',
|
||||
'Tales of the Questor', 'cover'),
|
||||
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript',
|
||||
'', '0001', last='0111'),
|
||||
cls('TheProbabilityBomb', 'the-probability-bomb',
|
||||
'the Probability Bomb', 'kickstarter'),
|
||||
)
|
||||
|
|
|
@ -371,7 +371,7 @@ class SoloLeveling(_ParserScraper):
|
|||
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
|
||||
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
|
||||
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
|
||||
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.'
|
||||
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
|
||||
}
|
||||
|
||||
def imageUrlModifier(self, imageUrl, data):
|
||||
|
@ -533,7 +533,7 @@ class SSDD(_ParserScraper):
|
|||
self.stripUrl % '20050504',
|
||||
self.stripUrl % '20040705',
|
||||
self.stripUrl % '20030418',
|
||||
self.stripUrl % '20030214'
|
||||
self.stripUrl % '20030214',
|
||||
)
|
||||
|
||||
|
||||
|
@ -598,6 +598,7 @@ class StarfireAgency(_WPWebcomic):
|
|||
self.currentChapter = self.currentChapter - 1
|
||||
return filename
|
||||
|
||||
|
||||
class StarTrip(_ComicControlScraper):
|
||||
url = 'https://www.startripcomic.com/'
|
||||
|
||||
|
@ -684,11 +685,11 @@ class SurvivingTheWorld(_ParserScraper):
|
|||
imageSearch = (
|
||||
'//div[@class="img"]/img', # When there's one image per strip
|
||||
'//div[@class="img"]/p/img', # When there's multiple images per strip
|
||||
'//td/img' # Special case for Lesson1296.html
|
||||
'//td/img', # Special case for Lesson1296.html
|
||||
)
|
||||
prevSearch = (
|
||||
'//li[@class="previous"]/a',
|
||||
'//td/a' # Special case for Lesson1296.html
|
||||
'//td/a', # Special case for Lesson1296.html
|
||||
)
|
||||
multipleImagesPerStrip = True
|
||||
help = 'Index format: name'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
|
@ -9,16 +9,17 @@ class SandraAndWoo(_WordPressScraper):
|
|||
prevSearch = '//a[@rel="prev"]'
|
||||
|
||||
def __init__(self, name, urlName, firstUrl, lang='en'):
|
||||
super(SandraAndWoo, self).__init__(name)
|
||||
super().__init__(name)
|
||||
self.url = 'http://www.sandraandwoo.com/' + urlName
|
||||
self.firstStripUrl = self.url + firstUrl
|
||||
self.lang = lang
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
return [
|
||||
return (
|
||||
cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'),
|
||||
cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'),
|
||||
cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'),
|
||||
cls('SandraAndWooGerman', 'woode/', '2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
|
||||
]
|
||||
cls('SandraAndWooGerman', 'woode/',
|
||||
'2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
|
||||
)
|
||||
|
|
|
@ -61,7 +61,7 @@ class SmackJeeves(_ParserScraper):
|
|||
'titleNo': self._comicid,
|
||||
'articleNo': url.rsplit('=', 1)[1],
|
||||
'page': 1,
|
||||
'order': 'new'
|
||||
'order': 'new',
|
||||
})
|
||||
response.raise_for_status()
|
||||
comments = response.json()['result']['list']
|
||||
|
@ -70,10 +70,10 @@ class SmackJeeves(_ParserScraper):
|
|||
return comment['commentText']
|
||||
return None
|
||||
else:
|
||||
super(SmackJeeves, self).fetchText(url, data, textSearch, optional)
|
||||
return super().fetchText(url, data, textSearch, optional)
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
cls('20TimesKirby', 91583),
|
||||
cls('2Kingdoms', 112096, endOfLife=True),
|
||||
|
|
|
@ -44,7 +44,7 @@ class WebToons(_ParserScraper):
|
|||
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
def getmodules(cls): # noqa: Allowed to be long
|
||||
return (
|
||||
# START AUTOUPDATE
|
||||
cls('1000', 'action/one-thousand', 1217),
|
||||
|
|
|
@ -117,7 +117,7 @@ class Scraper(object):
|
|||
"""Initialize internal variables."""
|
||||
self.name = name
|
||||
self.urls = set()
|
||||
self._indexes = tuple()
|
||||
self._indexes = ()
|
||||
self.skippedUrls = set()
|
||||
self.hitFirstStripUrl = False
|
||||
|
||||
|
@ -247,7 +247,7 @@ class Scraper(object):
|
|||
|
||||
def namer(self, image_url, page_url):
|
||||
"""Return filename for given image and page URL."""
|
||||
return None
|
||||
return
|
||||
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||
|
@ -342,19 +342,18 @@ class Scraper(object):
|
|||
Return language of the comic as a human-readable language name instead
|
||||
of a 2-character ISO639-1 code.
|
||||
"""
|
||||
lang = 'Unknown (%s)' % self.lang
|
||||
if pycountry is None:
|
||||
if self.lang in languages.Languages:
|
||||
lang = languages.Languages[self.lang]
|
||||
return languages.Languages[self.lang]
|
||||
else:
|
||||
try:
|
||||
lang = pycountry.languages.get(alpha_2=self.lang).name
|
||||
return pycountry.languages.get(alpha_2=self.lang).name
|
||||
except KeyError:
|
||||
try:
|
||||
lang = pycountry.languages.get(alpha2=self.lang).name
|
||||
return pycountry.languages.get(alpha2=self.lang).name
|
||||
except KeyError:
|
||||
pass
|
||||
return lang
|
||||
return 'Unknown (%s)' % self.lang
|
||||
|
||||
def geoblocked(self):
|
||||
"""Helper method to indicate that the user is most probably geo-blocked."""
|
||||
|
@ -467,8 +466,7 @@ class _ParserScraper(Scraper):
|
|||
return tree
|
||||
|
||||
def _parse_page(self, data):
|
||||
tree = lxml.html.document_fromstring(data)
|
||||
return tree
|
||||
return lxml.html.document_fromstring(data)
|
||||
|
||||
def fetchUrls(self, url, data, urlSearch):
|
||||
"""Search all entries for given XPath in a HTML page."""
|
||||
|
|
|
@ -56,7 +56,7 @@ class SingleInstance(object):
|
|||
try:
|
||||
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
# raises IOError on Python << 3.3, else OSError
|
||||
except (IOError, OSError):
|
||||
except OSError:
|
||||
self.exit(exit_code)
|
||||
self.initialized = True
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
import html
|
||||
import os
|
||||
import re
|
||||
|
|
|
@ -5,7 +5,7 @@ from lxml import etree
|
|||
|
||||
NS = {
|
||||
'd': 'https://dosage.rocks/xpath',
|
||||
're': 'http://exslt.org/regular-expressions'
|
||||
're': 'http://exslt.org/regular-expressions',
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -196,5 +196,4 @@ def format_name(text):
|
|||
"""Format a comic name."""
|
||||
name = html.unescape(text)
|
||||
name = "".join(capfirst(x) for x in name.split(" "))
|
||||
name = asciify(name.translate(TRANS))
|
||||
return name
|
||||
return asciify(name.translate(TRANS))
|
||||
|
|
20
setup.cfg
20
setup.cfg
|
@ -4,6 +4,8 @@ description = a comic strip downloader and archiver
|
|||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
url = https://dosage.rocks
|
||||
maintainer = Tobias Gruetzmacher
|
||||
maintainer_email = tobias-dosage@23.gs
|
||||
license = MIT License
|
||||
license_file = COPYING
|
||||
platforms = Any
|
||||
|
@ -23,8 +25,6 @@ classifiers =
|
|||
Topic :: Internet :: WWW/HTTP
|
||||
Topic :: Multimedia :: Graphics
|
||||
keywords = comic,webcomic,downloader,archiver,crawler
|
||||
maintainer = Tobias Gruetzmacher
|
||||
maintainer_email = tobias-dosage@23.gs
|
||||
project_urls =
|
||||
Code = https://github.com/webcomics/dosage
|
||||
Issue tracker = https://github.com/webcomics/dosage/issues
|
||||
|
@ -56,16 +56,26 @@ bash =
|
|||
css =
|
||||
cssselect
|
||||
dev =
|
||||
pytest-cov
|
||||
pytest-xdist
|
||||
responses
|
||||
flake8
|
||||
flake8-2020;python_version>'3.5'
|
||||
flake8-breakpoint;python_version>'3.5'
|
||||
flake8-bugbear
|
||||
flake8-coding
|
||||
flake8-commas
|
||||
flake8-comprehensions
|
||||
flake8-eradicate
|
||||
flake8-fixme
|
||||
flake8-functions
|
||||
flake8-future-import
|
||||
flake8-logging-format
|
||||
flake8-no-fstring;python_version>'3.5'
|
||||
flake8-pytest
|
||||
flake8-pytest-style;python_version>'3.5'
|
||||
flake8-strings;python_version>'3.5'
|
||||
pytest-cov
|
||||
pytest-xdist
|
||||
responses
|
||||
setup-cfg-fmt
|
||||
|
||||
[bdist_wheel]
|
||||
universal = 1
|
||||
|
|
|
@ -26,7 +26,7 @@ def get_test_scrapers():
|
|||
# complex _ParserScraper
|
||||
'GoComics/CalvinAndHobbes',
|
||||
# _WordPressScraper
|
||||
'GrrlPower'
|
||||
'GrrlPower',
|
||||
]
|
||||
scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')
|
||||
|
||||
|
@ -39,7 +39,7 @@ def get_test_scrapers():
|
|||
def pytest_generate_tests(metafunc):
|
||||
if 'scraperobj' in metafunc.fixturenames:
|
||||
scrapers = get_test_scrapers()
|
||||
scraperids = list(x.name for x in scrapers)
|
||||
scraperids = [x.name for x in scrapers]
|
||||
metafunc.parametrize('scraperobj', scrapers, ids=scraperids)
|
||||
|
||||
|
||||
|
|
|
@ -176,5 +176,5 @@ class TestDosage(object):
|
|||
images = data['pages'][page]['images']
|
||||
assert len(images) == 2
|
||||
|
||||
for imgurl, imgfile in images.items():
|
||||
for imgfile in images.values():
|
||||
assert directory.join(imgfile).check(file=1)
|
||||
|
|
3
tox.ini
3
tox.ini
|
@ -43,3 +43,6 @@ ignore = E127,E128,E241,FI12,FI14,FI15,FI50,FI51,FI53,FI54,FI55,W504
|
|||
[pytest]
|
||||
filterwarnings = default
|
||||
junit_family = xunit2
|
||||
|
||||
[isort]
|
||||
|
||||
|
|
Loading…
Reference in a new issue