Stricter style checking & related style fixes
This commit is contained in:
parent
e84bbe2667
commit
e64635e86b
34 changed files with 104 additions and 90 deletions
|
@ -2,5 +2,6 @@
|
||||||
# Copyright (C) 2020 Tobias Gruetzmacher
|
# Copyright (C) 2020 Tobias Gruetzmacher
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
def get_hook_dirs():
|
def get_hook_dirs():
|
||||||
return [os.path.dirname(__file__)]
|
return [os.path.dirname(__file__)]
|
||||||
|
|
|
@ -301,12 +301,12 @@ def do_list(column_list=True, verbose=False, listall=False):
|
||||||
def do_single_list(scrapers, verbose=False):
|
def do_single_list(scrapers, verbose=False):
|
||||||
"""Get list of scraper names, one per line."""
|
"""Get list of scraper names, one per line."""
|
||||||
disabled = {}
|
disabled = {}
|
||||||
for num, scraperobj in enumerate(scrapers):
|
for scraperobj in scrapers:
|
||||||
if verbose:
|
if verbose:
|
||||||
display_comic_help(scraperobj)
|
display_comic_help(scraperobj)
|
||||||
else:
|
else:
|
||||||
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
|
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
|
||||||
return num + 1, disabled
|
return len(scrapers) + 1, disabled
|
||||||
|
|
||||||
|
|
||||||
def do_column_list(scrapers):
|
def do_column_list(scrapers):
|
||||||
|
@ -359,11 +359,10 @@ def main(args=None):
|
||||||
try:
|
try:
|
||||||
options = setup_options().parse_args(args=args)
|
options = setup_options().parse_args(args=args)
|
||||||
options.basepath = os.path.expanduser(options.basepath)
|
options.basepath = os.path.expanduser(options.basepath)
|
||||||
res = run(options)
|
return run(options)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("Aborted.")
|
print("Aborted.")
|
||||||
res = 1
|
return 1
|
||||||
except Exception:
|
except Exception:
|
||||||
internal_error()
|
internal_error()
|
||||||
res = 2
|
return 2
|
||||||
return res
|
|
||||||
|
|
|
@ -163,7 +163,7 @@ def getComics(options):
|
||||||
jobs.put(scraperobj)
|
jobs.put(scraperobj)
|
||||||
# start threads
|
# start threads
|
||||||
num_threads = min(options.parallel, jobs.qsize())
|
num_threads = min(options.parallel, jobs.qsize())
|
||||||
for i in range(num_threads):
|
for _i in range(num_threads):
|
||||||
t = ComicGetter(options, jobs)
|
t = ComicGetter(options, jobs)
|
||||||
threads.append(t)
|
threads.append(t)
|
||||||
t.start()
|
t.start()
|
||||||
|
|
|
@ -118,7 +118,7 @@ class RSSEventHandler(EventHandler):
|
||||||
title,
|
title,
|
||||||
imageUrl,
|
imageUrl,
|
||||||
description,
|
description,
|
||||||
util.rfc822date(time.time())
|
util.rfc822date(time.time()),
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.newfile:
|
if self.newfile:
|
||||||
|
@ -164,8 +164,7 @@ class HtmlEventHandler(EventHandler):
|
||||||
"""Get filename from date."""
|
"""Get filename from date."""
|
||||||
fn = time.strftime('comics-%Y%m%d', date)
|
fn = time.strftime('comics-%Y%m%d', date)
|
||||||
fn = os.path.join(self.basepath, 'html', fn + ".html")
|
fn = os.path.join(self.basepath, 'html', fn + ".html")
|
||||||
fn = os.path.abspath(fn)
|
return os.path.abspath(fn)
|
||||||
return fn
|
|
||||||
|
|
||||||
def addNavLinks(self):
|
def addNavLinks(self):
|
||||||
if self.yesterdayUrl:
|
if self.yesterdayUrl:
|
||||||
|
@ -270,8 +269,7 @@ class JSONEventHandler(EventHandler):
|
||||||
def jsonFn(self, scraper):
|
def jsonFn(self, scraper):
|
||||||
"""Get filename for the JSON file for a comic."""
|
"""Get filename for the JSON file for a comic."""
|
||||||
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
|
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
|
||||||
fn = os.path.abspath(fn)
|
return os.path.abspath(fn)
|
||||||
return fn
|
|
||||||
|
|
||||||
def getComicData(self, scraper):
|
def getComicData(self, scraper):
|
||||||
"""Return dictionary with comic info."""
|
"""Return dictionary with comic info."""
|
||||||
|
|
|
@ -122,7 +122,7 @@ class AHClub(_WPNaviIn):
|
||||||
nav = {
|
nav = {
|
||||||
'ah-club-2-cover': 'ah-club-1-page-24',
|
'ah-club-2-cover': 'ah-club-1-page-24',
|
||||||
'ah-club-3-cover': 'ah-club-2-page-28',
|
'ah-club-3-cover': 'ah-club-2-page-28',
|
||||||
'ah-club-4-cover': 'ah-club-3-page-22'
|
'ah-club-4-cover': 'ah-club-3-page-22',
|
||||||
}
|
}
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
|
|
|
@ -8,7 +8,7 @@ from re import compile, escape
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic
|
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPWebcomic
|
||||||
|
|
||||||
|
|
||||||
class BackOffice(_WPNavi):
|
class BackOffice(_WPNavi):
|
||||||
|
@ -95,8 +95,7 @@ class Beetlebum(_BasicScraper):
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
|
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
|
||||||
name = '%s-%s-%s-%s' % indexes
|
name = '%s-%s-%s-%s' % indexes
|
||||||
name = name + '_' + image_url.split('/')[-1]
|
return name + '_' + image_url.split('/')[-1]
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
class Bethellium(_WPWebcomic):
|
class Bethellium(_WPWebcomic):
|
||||||
|
@ -265,8 +264,8 @@ class Brink(_WordPressScraper):
|
||||||
|
|
||||||
|
|
||||||
class BroodHollow(_WordPressScraper):
|
class BroodHollow(_WordPressScraper):
|
||||||
url = 'http://broodhollow.chainsawsuit.com/'
|
url = 'https://broodhollow.chainsawsuit.com/'
|
||||||
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
|
firstStripUrl = url + 'page/2012/10/06/book-1-curious-little-thing'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
return data.xpath('//div[@id="comic"]//iframe')
|
return data.xpath('//div[@id="comic"]//iframe')
|
||||||
|
@ -297,7 +296,7 @@ class ButtercupFestival(_ParserScraper):
|
||||||
imageSearch = '//center/img'
|
imageSearch = '//center/img'
|
||||||
prevSearch = (
|
prevSearch = (
|
||||||
'//a[img[contains(@src, "previous")]]', # 3-x
|
'//a[img[contains(@src, "previous")]]', # 3-x
|
||||||
'//a[text()="previous"]' # 2-x
|
'//a[text()="previous"]', # 2-x
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -249,8 +249,8 @@ class CigarroAndCerveja(_ParserScraper):
|
||||||
url = 'http://www.cigarro.ca/'
|
url = 'http://www.cigarro.ca/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % 'reacquaintance'
|
firstStripUrl = stripUrl % 'reacquaintance'
|
||||||
imageSearch = '//div[@id="comic"]//img',
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[contains(text()," Prev")]',
|
prevSearch = '//a[contains(text()," Prev")]'
|
||||||
|
|
||||||
|
|
||||||
class ClanOfTheCats(_WordPressScraper):
|
class ClanOfTheCats(_WordPressScraper):
|
||||||
|
@ -488,4 +488,4 @@ class CynWolf(_ParserScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
return '2016/the-end' in url # video
|
return '2016/the-end' in url # video
|
||||||
|
|
|
@ -67,7 +67,7 @@ class ComicFury(_ParserScraper):
|
||||||
return "%s_%s%s" % (self.prefix, num, ext)
|
return "%s_%s%s" % (self.prefix, num, ext)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# These were once in the list below, but fell out from the index...
|
# These were once in the list below, but fell out from the index...
|
||||||
cls('BadassologyByMichaelBay', 'strudelology'),
|
cls('BadassologyByMichaelBay', 'strudelology'),
|
||||||
|
|
|
@ -20,7 +20,7 @@ class ComicSherpa(_ParserScraper):
|
||||||
return self.url + '&uc_full_date=%s' % index
|
return self.url + '&uc_full_date=%s' % index
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/comicsherpa.py
|
# scripts/comicsherpa.py
|
||||||
|
|
|
@ -26,7 +26,7 @@ class ComicsKingdom(_BasicScraper):
|
||||||
return tourl
|
return tourl
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# Some comics are not listed on the "all" page (too old?)
|
# Some comics are not listed on the "all" page (too old?)
|
||||||
|
|
||||||
|
|
|
@ -39,12 +39,13 @@ class TheCyantianChronicles(_WordPressScraper):
|
||||||
cls('Darius', 'dbook-01', '03102010', last='darius-end'),
|
cls('Darius', 'dbook-01', '03102010', last='darius-end'),
|
||||||
cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'),
|
cls('DracoVulpes', 'draco-vulpes', 'draco-vulpes'),
|
||||||
cls('GenoworksSaga', 'genoworks-saga', '07012004'),
|
cls('GenoworksSaga', 'genoworks-saga', '07012004'),
|
||||||
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05', nav={'chapter-5': '02152005'}),
|
cls('GralenCraggHall', 'gchall', '07152002', last='chapter-6-05',
|
||||||
|
nav={'chapter-5': '02152005'}),
|
||||||
cls('Kiet', 'kiet', 'kiet-c01'),
|
cls('Kiet', 'kiet', 'kiet-c01'),
|
||||||
cls('NoAngel', 'no-angel', '08112001', last='12142006'),
|
cls('NoAngel', 'no-angel', '08112001', last='12142006'),
|
||||||
cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'),
|
cls('Pawprints', 'pawprints', 'airboard-page-1', last='pawprints-sheana-10'),
|
||||||
cls('RandomRamblings', 'random-ramblings', 'darrik'),
|
cls('RandomRamblings', 'random-ramblings', 'darrik'),
|
||||||
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim')
|
cls('SinkOrSwim', 'sos', 'sink-or-swim', last='ricochete-and-seraphim'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,5 +86,5 @@ class ShivaeComics(_WordPressScraper):
|
||||||
cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}),
|
cls('Extras', 'extras', '01012012', nav={'12302012': '08152013'}),
|
||||||
cls('Pure', 'pure', '04082002', last='chapter-6-page-1'),
|
cls('Pure', 'pure', '04082002', last='chapter-6-page-1'),
|
||||||
cls('SerinFairyHunter', 'serin', 'character-serin'),
|
cls('SerinFairyHunter', 'serin', 'character-serin'),
|
||||||
cls('SivineBlades', 'sivine', '06302002', last='10242008')
|
cls('SivineBlades', 'sivine', '06302002', last='10242008'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -52,5 +52,5 @@ class Derideal(_ParserScraper):
|
||||||
cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'),
|
cls('LRE', 'RLE', 'the-leyend-of-the-rose-cover'),
|
||||||
cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'),
|
cls('ProjectPrime', 'project-prime', 'custus-part-i-cover'),
|
||||||
cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'),
|
cls('PurpurinaEffect', 'purpurina-effect', 'purpurina-effect-cover'),
|
||||||
cls('TheVoid', 'the-void', 'the-void-cover')
|
cls('TheVoid', 'the-void', 'the-void-cover'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -38,5 +38,5 @@ class DMFA(_ParserScraper):
|
||||||
cls('Matilda', 'Ma_001', last='Ma_060'),
|
cls('Matilda', 'Ma_001', last='Ma_060'),
|
||||||
cls('PerfectDate', 'PD_01', last='PD_18'),
|
cls('PerfectDate', 'PD_01', last='PD_18'),
|
||||||
cls('TakePride', 'P_01', last='P_08'),
|
cls('TakePride', 'P_01', last='P_08'),
|
||||||
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast')
|
cls('Valentines', 'Vol_VDay001', last='Vol_VDaylast'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -120,10 +120,9 @@ class Erfworld(_ParserScraper):
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
# Return the main logo for text-only pages
|
# Return the main logo for text-only pages
|
||||||
try:
|
try:
|
||||||
imageUrls = super(Erfworld, self).fetchUrls(url, data, urlSearch)
|
return super().fetchUrls(url, data, urlSearch)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
imageUrls = super(Erfworld, self).fetchUrls(url, data, '//li[@class="erf-logo"]//img')
|
return super().fetchUrls(url, data, '//li[@class="erf-logo"]//img')
|
||||||
return imageUrls
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Fix inconsistent filenames
|
# Fix inconsistent filenames
|
||||||
|
|
|
@ -34,7 +34,7 @@ class GoComics(_ParserScraper):
|
||||||
return data.xpath('//img[contains(@src, "content-error-missing")]')
|
return data.xpath('//img[contains(@src, "content-error-missing")]')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# old comics removed from the listing
|
# old comics removed from the listing
|
||||||
cls('HeavenlyNostrils', 'heavenly-nostrils'),
|
cls('HeavenlyNostrils', 'heavenly-nostrils'),
|
||||||
|
|
|
@ -26,9 +26,7 @@ class HagarTheHorrible(_BasicScraper):
|
||||||
url = 'http://www.hagardunor.net/comics.php'
|
url = 'http://www.hagardunor.net/comics.php'
|
||||||
data = self.getPage(url)
|
data = self.getPage(url)
|
||||||
pattern = compile(tagre("a", "href", self.prevUrl))
|
pattern = compile(tagre("a", "href", self.prevUrl))
|
||||||
for starturl in self.fetchUrls(url, data, pattern):
|
return self.fetchUrls(url, data, pattern)[-1]
|
||||||
pass
|
|
||||||
return starturl
|
|
||||||
|
|
||||||
|
|
||||||
# "Hiatus", navigation missing
|
# "Hiatus", navigation missing
|
||||||
|
|
|
@ -54,7 +54,8 @@ class KeenSpot(_ParserScraper):
|
||||||
# Not on frontpage...
|
# Not on frontpage...
|
||||||
cls('Buzzboy', 'buzzboy'),
|
cls('Buzzboy', 'buzzboy'),
|
||||||
cls('EveryoneLovesAdis', 'adis'),
|
cls('EveryoneLovesAdis', 'adis'),
|
||||||
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'),
|
cls('GeneCatlowAlternate', 'genecatlow', last='20170302',
|
||||||
|
adult=True, path='altd/%s.html'),
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
|
|
|
@ -107,14 +107,13 @@ class LifeAsRendered(_ParserScraper):
|
||||||
'0500': '0501',
|
'0500': '0501',
|
||||||
'0508': '0509',
|
'0508': '0509',
|
||||||
'0558': '0559',
|
'0558': '0559',
|
||||||
'0577': '05extra'
|
'0577': '05extra',
|
||||||
}
|
}
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Fix inconsistent filenames
|
# Fix inconsistent filenames
|
||||||
filename = imageUrl.rsplit('/', 1)[-1]
|
filename = imageUrl.rsplit('/', 1)[-1]
|
||||||
filename = filename.replace('ReN', 'N').replace('N01P', 'A02S')
|
return filename.replace('ReN', 'N').replace('N01P', 'A02S')
|
||||||
return filename
|
|
||||||
|
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
# Fix missing image link
|
# Fix missing image link
|
||||||
|
@ -167,7 +166,7 @@ class LittleTales(_ParserScraper):
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
nav = {
|
nav = {
|
||||||
'517': '515',
|
'517': '515',
|
||||||
'449': '447'
|
'449': '447',
|
||||||
}
|
}
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
@ -190,8 +189,9 @@ class LoadingArtist(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class LoFiJinks(_WPNaviIn):
|
class LoFiJinks(_WPNaviIn):
|
||||||
url = 'http://hijinksensue.com/comic/learning-to-love-again/'
|
baseUrl = 'https://hijinksensue.com/comic/'
|
||||||
firstStripUrl = 'http://hijinksensue.com/comic/lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
url = baseUrl + 'learning-to-love-again/'
|
||||||
|
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ class NamirDeiter(_ParserScraper):
|
||||||
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
|
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
|
||||||
cls('TheNDU', 'thendu.com'),
|
cls('TheNDU', 'thendu.com'),
|
||||||
cls('WonderKittens', 'wonderkittens.com'),
|
cls('WonderKittens', 'wonderkittens.com'),
|
||||||
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125')
|
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ class Removed(Scraper):
|
||||||
return {'rem-' + self.reason: self.REASONS[self.reason]}
|
return {'rem-' + self.reason: self.REASONS[self.reason]}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# Removed in 2.16
|
# Removed in 2.16
|
||||||
cls('AbleAndBaker'),
|
cls('AbleAndBaker'),
|
||||||
|
@ -675,7 +675,7 @@ class Renamed(Scraper):
|
||||||
return {'ren-%i' % self.i: self.MSG % self.newname}
|
return {'ren-%i' % self.i: self.MSG % self.newname}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# Renamed in 2.16
|
# Renamed in 2.16
|
||||||
cls('1997', '1977'),
|
cls('1997', '1977'),
|
||||||
|
|
|
@ -5,12 +5,12 @@ from ..scraper import _ParserScraper
|
||||||
|
|
||||||
|
|
||||||
class RHJunior(_ParserScraper):
|
class RHJunior(_ParserScraper):
|
||||||
stripUrl = 'http://www.rhjunior.com/%s/'
|
stripUrl = 'https://www.rhjunior.com/%s/'
|
||||||
imageSearch = '//div[contains(@class, "entry-content")]//img'
|
imageSearch = '//div[contains(@class, "entry-content")]//img'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
def __init__(self, name, sub, prev, first, last=None):
|
def __init__(self, name, sub, prev, first, last=None):
|
||||||
super(RHJunior, self).__init__('RHJunior/' + name)
|
super().__init__('RHJunior/' + name)
|
||||||
self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]')
|
self.prevSearch = ('//a[@rel="prev"]', '//a[@title="' + prev + '"]')
|
||||||
self.url = self.stripUrl % ('comics/' + sub)
|
self.url = self.stripUrl % ('comics/' + sub)
|
||||||
self.firstStripUrl = self.stripUrl % (sub + '-' + first)
|
self.firstStripUrl = self.stripUrl % (sub + '-' + first)
|
||||||
|
@ -22,10 +22,16 @@ class RHJunior(_ParserScraper):
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
return (
|
return (
|
||||||
cls('GoblinHollow', 'goblin-hollow', '', '0001', last='7'),
|
cls('GoblinHollow', 'goblin-hollow',
|
||||||
cls('NipAndTuck', 'nip-and-tuck', 'Nip and Tuck', '0000'),
|
'', '0001', last='7'),
|
||||||
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger', 'Quentyn Quinn, Space Ranger', '0001'),
|
cls('NipAndTuck', 'nip-and-tuck',
|
||||||
cls('TalesOfTheQuestor', 'tales-of-the-questor', 'Tales of the Questor', 'cover'),
|
'Nip and Tuck', '0000'),
|
||||||
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript', '', '0001', last='0111'),
|
cls('QuentynQuinnSpaceRanger', 'quentyn-quinn-space-ranger',
|
||||||
cls('TheProbabilityBomb', 'the-probability-bomb', 'the Probability Bomb', 'kickstarter')
|
'Quentyn Quinn, Space Ranger', '0001'),
|
||||||
|
cls('TalesOfTheQuestor', 'tales-of-the-questor',
|
||||||
|
'Tales of the Questor', 'cover'),
|
||||||
|
cls('TheJournalOfEnniasLongscript', 'the-journal-of-ennias-longscript',
|
||||||
|
'', '0001', last='0111'),
|
||||||
|
cls('TheProbabilityBomb', 'the-probability-bomb',
|
||||||
|
'the Probability Bomb', 'kickstarter'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -371,7 +371,7 @@ class SoloLeveling(_ParserScraper):
|
||||||
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
|
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
|
||||||
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
|
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
|
||||||
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
|
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
|
||||||
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.'
|
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
|
||||||
}
|
}
|
||||||
|
|
||||||
def imageUrlModifier(self, imageUrl, data):
|
def imageUrlModifier(self, imageUrl, data):
|
||||||
|
@ -533,7 +533,7 @@ class SSDD(_ParserScraper):
|
||||||
self.stripUrl % '20050504',
|
self.stripUrl % '20050504',
|
||||||
self.stripUrl % '20040705',
|
self.stripUrl % '20040705',
|
||||||
self.stripUrl % '20030418',
|
self.stripUrl % '20030418',
|
||||||
self.stripUrl % '20030214'
|
self.stripUrl % '20030214',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -598,6 +598,7 @@ class StarfireAgency(_WPWebcomic):
|
||||||
self.currentChapter = self.currentChapter - 1
|
self.currentChapter = self.currentChapter - 1
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
class StarTrip(_ComicControlScraper):
|
class StarTrip(_ComicControlScraper):
|
||||||
url = 'https://www.startripcomic.com/'
|
url = 'https://www.startripcomic.com/'
|
||||||
|
|
||||||
|
@ -684,11 +685,11 @@ class SurvivingTheWorld(_ParserScraper):
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
'//div[@class="img"]/img', # When there's one image per strip
|
'//div[@class="img"]/img', # When there's one image per strip
|
||||||
'//div[@class="img"]/p/img', # When there's multiple images per strip
|
'//div[@class="img"]/p/img', # When there's multiple images per strip
|
||||||
'//td/img' # Special case for Lesson1296.html
|
'//td/img', # Special case for Lesson1296.html
|
||||||
)
|
)
|
||||||
prevSearch = (
|
prevSearch = (
|
||||||
'//li[@class="previous"]/a',
|
'//li[@class="previous"]/a',
|
||||||
'//td/a' # Special case for Lesson1296.html
|
'//td/a', # Special case for Lesson1296.html
|
||||||
)
|
)
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||||
from .common import _WordPressScraper
|
from .common import _WordPressScraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,16 +9,17 @@ class SandraAndWoo(_WordPressScraper):
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
|
|
||||||
def __init__(self, name, urlName, firstUrl, lang='en'):
|
def __init__(self, name, urlName, firstUrl, lang='en'):
|
||||||
super(SandraAndWoo, self).__init__(name)
|
super().__init__(name)
|
||||||
self.url = 'http://www.sandraandwoo.com/' + urlName
|
self.url = 'http://www.sandraandwoo.com/' + urlName
|
||||||
self.firstStripUrl = self.url + firstUrl
|
self.firstStripUrl = self.url + firstUrl
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
return [
|
return (
|
||||||
cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'),
|
cls('Gaia', 'gaia/', '2000/01/01/welcome-to-gaia/'),
|
||||||
cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'),
|
cls('GaiaGerman', 'gaiade/', '2000/01/01/welcome-to-gaia/', lang='de'),
|
||||||
cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'),
|
cls('SandraAndWoo', '', '2000/01/01/welcome-to-sandra-and-woo/'),
|
||||||
cls('SandraAndWooGerman', 'woode/', '2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
|
cls('SandraAndWooGerman', 'woode/',
|
||||||
]
|
'2008/10/19/ein-ausgefuchster-waschbar/', lang='de'),
|
||||||
|
)
|
||||||
|
|
|
@ -61,7 +61,7 @@ class SmackJeeves(_ParserScraper):
|
||||||
'titleNo': self._comicid,
|
'titleNo': self._comicid,
|
||||||
'articleNo': url.rsplit('=', 1)[1],
|
'articleNo': url.rsplit('=', 1)[1],
|
||||||
'page': 1,
|
'page': 1,
|
||||||
'order': 'new'
|
'order': 'new',
|
||||||
})
|
})
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
comments = response.json()['result']['list']
|
comments = response.json()['result']['list']
|
||||||
|
@ -70,10 +70,10 @@ class SmackJeeves(_ParserScraper):
|
||||||
return comment['commentText']
|
return comment['commentText']
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
super(SmackJeeves, self).fetchText(url, data, textSearch, optional)
|
return super().fetchText(url, data, textSearch, optional)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
cls('20TimesKirby', 91583),
|
cls('20TimesKirby', 91583),
|
||||||
cls('2Kingdoms', 112096, endOfLife=True),
|
cls('2Kingdoms', 112096, endOfLife=True),
|
||||||
|
|
|
@ -44,7 +44,7 @@ class WebToons(_ParserScraper):
|
||||||
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
|
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls): # noqa: Allowed to be long
|
||||||
return (
|
return (
|
||||||
# START AUTOUPDATE
|
# START AUTOUPDATE
|
||||||
cls('1000', 'action/one-thousand', 1217),
|
cls('1000', 'action/one-thousand', 1217),
|
||||||
|
|
|
@ -117,7 +117,7 @@ class Scraper(object):
|
||||||
"""Initialize internal variables."""
|
"""Initialize internal variables."""
|
||||||
self.name = name
|
self.name = name
|
||||||
self.urls = set()
|
self.urls = set()
|
||||||
self._indexes = tuple()
|
self._indexes = ()
|
||||||
self.skippedUrls = set()
|
self.skippedUrls = set()
|
||||||
self.hitFirstStripUrl = False
|
self.hitFirstStripUrl = False
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ class Scraper(object):
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
"""Return filename for given image and page URL."""
|
"""Return filename for given image and page URL."""
|
||||||
return None
|
return
|
||||||
|
|
||||||
def link_modifier(self, fromurl, tourl):
|
def link_modifier(self, fromurl, tourl):
|
||||||
"""Optional modification of parsed link (previous/back/latest) URLs.
|
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||||
|
@ -342,19 +342,18 @@ class Scraper(object):
|
||||||
Return language of the comic as a human-readable language name instead
|
Return language of the comic as a human-readable language name instead
|
||||||
of a 2-character ISO639-1 code.
|
of a 2-character ISO639-1 code.
|
||||||
"""
|
"""
|
||||||
lang = 'Unknown (%s)' % self.lang
|
|
||||||
if pycountry is None:
|
if pycountry is None:
|
||||||
if self.lang in languages.Languages:
|
if self.lang in languages.Languages:
|
||||||
lang = languages.Languages[self.lang]
|
return languages.Languages[self.lang]
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
lang = pycountry.languages.get(alpha_2=self.lang).name
|
return pycountry.languages.get(alpha_2=self.lang).name
|
||||||
except KeyError:
|
except KeyError:
|
||||||
try:
|
try:
|
||||||
lang = pycountry.languages.get(alpha2=self.lang).name
|
return pycountry.languages.get(alpha2=self.lang).name
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
return lang
|
return 'Unknown (%s)' % self.lang
|
||||||
|
|
||||||
def geoblocked(self):
|
def geoblocked(self):
|
||||||
"""Helper method to indicate that the user is most probably geo-blocked."""
|
"""Helper method to indicate that the user is most probably geo-blocked."""
|
||||||
|
@ -467,8 +466,7 @@ class _ParserScraper(Scraper):
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
def _parse_page(self, data):
|
def _parse_page(self, data):
|
||||||
tree = lxml.html.document_fromstring(data)
|
return lxml.html.document_fromstring(data)
|
||||||
return tree
|
|
||||||
|
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
"""Search all entries for given XPath in a HTML page."""
|
"""Search all entries for given XPath in a HTML page."""
|
||||||
|
|
|
@ -56,7 +56,7 @@ class SingleInstance(object):
|
||||||
try:
|
try:
|
||||||
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
# raises IOError on Python << 3.3, else OSError
|
# raises IOError on Python << 3.3, else OSError
|
||||||
except (IOError, OSError):
|
except OSError:
|
||||||
self.exit(exit_code)
|
self.exit(exit_code)
|
||||||
self.initialized = True
|
self.initialized = True
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||||
import html
|
import html
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
|
@ -5,7 +5,7 @@ from lxml import etree
|
||||||
|
|
||||||
NS = {
|
NS = {
|
||||||
'd': 'https://dosage.rocks/xpath',
|
'd': 'https://dosage.rocks/xpath',
|
||||||
're': 'http://exslt.org/regular-expressions'
|
're': 'http://exslt.org/regular-expressions',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -196,5 +196,4 @@ def format_name(text):
|
||||||
"""Format a comic name."""
|
"""Format a comic name."""
|
||||||
name = html.unescape(text)
|
name = html.unescape(text)
|
||||||
name = "".join(capfirst(x) for x in name.split(" "))
|
name = "".join(capfirst(x) for x in name.split(" "))
|
||||||
name = asciify(name.translate(TRANS))
|
return asciify(name.translate(TRANS))
|
||||||
return name
|
|
||||||
|
|
20
setup.cfg
20
setup.cfg
|
@ -4,6 +4,8 @@ description = a comic strip downloader and archiver
|
||||||
long_description = file: README.md
|
long_description = file: README.md
|
||||||
long_description_content_type = text/markdown
|
long_description_content_type = text/markdown
|
||||||
url = https://dosage.rocks
|
url = https://dosage.rocks
|
||||||
|
maintainer = Tobias Gruetzmacher
|
||||||
|
maintainer_email = tobias-dosage@23.gs
|
||||||
license = MIT License
|
license = MIT License
|
||||||
license_file = COPYING
|
license_file = COPYING
|
||||||
platforms = Any
|
platforms = Any
|
||||||
|
@ -23,8 +25,6 @@ classifiers =
|
||||||
Topic :: Internet :: WWW/HTTP
|
Topic :: Internet :: WWW/HTTP
|
||||||
Topic :: Multimedia :: Graphics
|
Topic :: Multimedia :: Graphics
|
||||||
keywords = comic,webcomic,downloader,archiver,crawler
|
keywords = comic,webcomic,downloader,archiver,crawler
|
||||||
maintainer = Tobias Gruetzmacher
|
|
||||||
maintainer_email = tobias-dosage@23.gs
|
|
||||||
project_urls =
|
project_urls =
|
||||||
Code = https://github.com/webcomics/dosage
|
Code = https://github.com/webcomics/dosage
|
||||||
Issue tracker = https://github.com/webcomics/dosage/issues
|
Issue tracker = https://github.com/webcomics/dosage/issues
|
||||||
|
@ -56,16 +56,26 @@ bash =
|
||||||
css =
|
css =
|
||||||
cssselect
|
cssselect
|
||||||
dev =
|
dev =
|
||||||
pytest-cov
|
|
||||||
pytest-xdist
|
|
||||||
responses
|
|
||||||
flake8
|
flake8
|
||||||
flake8-2020;python_version>'3.5'
|
flake8-2020;python_version>'3.5'
|
||||||
|
flake8-breakpoint;python_version>'3.5'
|
||||||
|
flake8-bugbear
|
||||||
flake8-coding
|
flake8-coding
|
||||||
|
flake8-commas
|
||||||
|
flake8-comprehensions
|
||||||
|
flake8-eradicate
|
||||||
|
flake8-fixme
|
||||||
|
flake8-functions
|
||||||
flake8-future-import
|
flake8-future-import
|
||||||
|
flake8-logging-format
|
||||||
flake8-no-fstring;python_version>'3.5'
|
flake8-no-fstring;python_version>'3.5'
|
||||||
flake8-pytest
|
flake8-pytest
|
||||||
flake8-pytest-style;python_version>'3.5'
|
flake8-pytest-style;python_version>'3.5'
|
||||||
|
flake8-strings;python_version>'3.5'
|
||||||
|
pytest-cov
|
||||||
|
pytest-xdist
|
||||||
|
responses
|
||||||
|
setup-cfg-fmt
|
||||||
|
|
||||||
[bdist_wheel]
|
[bdist_wheel]
|
||||||
universal = 1
|
universal = 1
|
||||||
|
|
|
@ -26,7 +26,7 @@ def get_test_scrapers():
|
||||||
# complex _ParserScraper
|
# complex _ParserScraper
|
||||||
'GoComics/CalvinAndHobbes',
|
'GoComics/CalvinAndHobbes',
|
||||||
# _WordPressScraper
|
# _WordPressScraper
|
||||||
'GrrlPower'
|
'GrrlPower',
|
||||||
]
|
]
|
||||||
scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')
|
scraper_pattern = re.compile('^(' + '|'.join(testscrapernames) + ')$')
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ def get_test_scrapers():
|
||||||
def pytest_generate_tests(metafunc):
|
def pytest_generate_tests(metafunc):
|
||||||
if 'scraperobj' in metafunc.fixturenames:
|
if 'scraperobj' in metafunc.fixturenames:
|
||||||
scrapers = get_test_scrapers()
|
scrapers = get_test_scrapers()
|
||||||
scraperids = list(x.name for x in scrapers)
|
scraperids = [x.name for x in scrapers]
|
||||||
metafunc.parametrize('scraperobj', scrapers, ids=scraperids)
|
metafunc.parametrize('scraperobj', scrapers, ids=scraperids)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -176,5 +176,5 @@ class TestDosage(object):
|
||||||
images = data['pages'][page]['images']
|
images = data['pages'][page]['images']
|
||||||
assert len(images) == 2
|
assert len(images) == 2
|
||||||
|
|
||||||
for imgurl, imgfile in images.items():
|
for imgfile in images.values():
|
||||||
assert directory.join(imgfile).check(file=1)
|
assert directory.join(imgfile).check(file=1)
|
||||||
|
|
3
tox.ini
3
tox.ini
|
@ -43,3 +43,6 @@ ignore = E127,E128,E241,FI12,FI14,FI15,FI50,FI51,FI53,FI54,FI55,W504
|
||||||
[pytest]
|
[pytest]
|
||||||
filterwarnings = default
|
filterwarnings = default
|
||||||
junit_family = xunit2
|
junit_family = xunit2
|
||||||
|
|
||||||
|
[isort]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue