Merge pull request #163 from Techwolfy/upstream-nsfw-tagging

Fix missing NSFW tags on several comics
This commit is contained in:
Tobias Gruetzmacher 2020-04-29 21:31:42 +02:00 committed by GitHub
commit 684ba1f909
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 48 additions and 27 deletions

View file

@ -352,20 +352,17 @@ class AntiheroForHire(_ParserScraper):
imageSearch = '//div[@class="image-wrapper"]//img[not(@class="thumb-image")]' imageSearch = '//div[@class="image-wrapper"]//img[not(@class="thumb-image")]'
multipleImagesPerStrip = True multipleImagesPerStrip = True
endOfLife = True endOfLife = True
archive = []
def starter(self): def starter(self):
# Build list of chapters for navigation # Build list of chapters for navigation
page = self.getPage(self.url) page = self.getPage(self.url)
archiveLinks = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]') self.chapters = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]/@href')
for link in archiveLinks: return self.chapters[0]
self.archive.append(link.get('href'))
return self.archive[0]
def getPrevUrl(self, url, data): def getPrevUrl(self, url, data):
# Retrieve previous chapter from list # Retrieve previous chapter from list
index = self.archive.index(url) + 1 index = self.chapters.index(url) + 1
return self.archive[index] if index < len(self.archive) else None return self.chapters[index] if index < len(self.chapters) else None
class AppleGeeks(_BasicScraper): class AppleGeeks(_BasicScraper):

View file

@ -123,6 +123,7 @@ class BetterDays(_ParserScraper):
firstStripUrl = stripUrl % '2003/04/post-2' firstStripUrl = stripUrl % '2003/04/post-2'
imageSearch = '//img[contains(@src, "/betterdays/comic/")]' imageSearch = '//img[contains(@src, "/betterdays/comic/")]'
prevSearch = '//a[contains(text(), "Previous")]' prevSearch = '//a[contains(text(), "Previous")]'
adult = True
endOfLife = True endOfLife = True
help = 'Index format: yyyy/mm/<your guess>' help = 'Index format: yyyy/mm/<your guess>'

View file

@ -49,7 +49,7 @@ class ComicFury(_ParserScraper):
help = 'Index format: n' help = 'Index format: n'
starter = bounceStarter starter = bounceStarter
def __init__(self, name, sub, lang=None): def __init__(self, name, sub, lang=None, adult=False):
super(ComicFury, self).__init__('ComicFury/' + name) super(ComicFury, self).__init__('ComicFury/' + name)
self.prefix = name self.prefix = name
self.url = 'http://%s.webcomic.ws/comics/' % sub self.url = 'http://%s.webcomic.ws/comics/' % sub
@ -57,6 +57,8 @@ class ComicFury(_ParserScraper):
self.firstStripUrl = self.stripUrl % '1' self.firstStripUrl = self.stripUrl % '1'
if lang: if lang:
self.lang = lang self.lang = lang
if adult:
self.adult = adult
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
parts = page_url.split('/') parts = page_url.split('/')
@ -376,7 +378,7 @@ class ComicFury(_ParserScraper):
cls('Droned', 'droned'), cls('Droned', 'droned'),
cls('DRouggs', 'drouggs'), cls('DRouggs', 'drouggs'),
cls('DrugsAndKisses', 'd-and-k'), cls('DrugsAndKisses', 'd-and-k'),
cls('Druids', 'druids'), cls('Druids', 'druids', adult=True),
cls('DubCity', 'dubcity'), cls('DubCity', 'dubcity'),
cls('DueEast', 'dueeast'), cls('DueEast', 'dueeast'),
cls('DuelingHeroes', 'duelingheroes'), cls('DuelingHeroes', 'duelingheroes'),

View file

@ -95,6 +95,7 @@ class Flipside(_ParserScraper):
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = '//img[contains(@src, "comic/")]' imageSearch = '//img[contains(@src, "comic/")]'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
adult = True
help = 'Index format: nnnn' help = 'Index format: nnnn'

View file

@ -10,6 +10,8 @@ class KeenSpot(_ParserScraper):
multipleImagesPerStrip = True multipleImagesPerStrip = True
imageSearch = ( imageSearch = (
'//img[contains(@src, "/comics/")]', '//img[contains(@src, "/comics/")]',
# Gene Catlow Alternate
'//img[contains(@src, "/altcomics/")]',
# Shockwave Darkside # Shockwave Darkside
'//img[contains(@src, "/comics2D/")]', '//img[contains(@src, "/comics2D/")]',
'//img[contains(@src, "com/shockwave")]', '//img[contains(@src, "com/shockwave")]',
@ -34,7 +36,7 @@ class KeenSpot(_ParserScraper):
) )
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
def __init__(self, name, sub, last=None, path='d/%s.html'): def __init__(self, name, sub, last=None, adult=False, path='d/%s.html'):
super(KeenSpot, self).__init__('KeenSpot/' + name) super(KeenSpot, self).__init__('KeenSpot/' + name)
self.url = 'http://%s.keenspot.com/' % sub self.url = 'http://%s.keenspot.com/' % sub
self.stripUrl = self.url + path self.stripUrl = self.url + path
@ -43,12 +45,16 @@ class KeenSpot(_ParserScraper):
self.url = self.stripUrl % last self.url = self.stripUrl % last
self.endOfLife = True self.endOfLife = True
if adult:
self.adult = adult
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):
return ( return (
# Not on frontpage... # Not on frontpage...
cls('Buzzboy', 'buzzboy'), cls('Buzzboy', 'buzzboy'),
cls('EveryoneLovesAdis', 'adis'), cls('EveryoneLovesAdis', 'adis'),
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'),
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh

View file

@ -103,22 +103,30 @@ class Nicky510(_WPNavi):
class Nightshift(_ParserScraper): class Nightshift(_ParserScraper):
url = 'http://poecatcomix.com/comic-titles/nightshift/' url = 'https://poecatcomix.com/nightshift-static/'
stripUrl = url + '%s/' stripUrl = 'https://poecatcomix.com/comic/%s/'
firstStripUrl = stripUrl % 'nightshift-volume1/ns-chapter-1' firstStripUrl = stripUrl % 'ns1-page-cover'
imageSearch = '//div[@id="gallery-1"]//img' imageSearch = '//div[@class="mangapress-media-img"]/img'
prevSearch = ('//a[./span[text()="PAST CHAPTER"]]', prevSearch = '//li[@class="link-prev"]/a'
'//a[./span[text()="LAST CHAPTER"]]') latestSearch = '//li[@class="link-last"]/a/@href'
latestSearch = '//a[./img[contains(@src, "Latest-Page")]]'
starter = indirectStarter
multipleImagesPerStrip = True
adult = True adult = True
def starter(self):
# Build list of chapters for navigation
indexPage = self.getPage(self.url)
self.chapters = indexPage.xpath('//a[./img[contains(@class, "attachment-large")]]/@href')
chapterPage = self.getPage(self.chapters[-1])
return chapterPage.xpath(self.latestSearch)[0]
def getPrevUrl(self, url, data):
# Retrieve previous chapter from list
if url in self.chapters:
chapterPage = self.getPage(self.chapters[self.chapters.index(url) - 1])
return chapterPage.xpath(self.latestSearch)[0]
return super(Nightshift, self).getPrevUrl(url, data)
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
# Prepend chapter title to page filenames return pageUrl.rstrip('/').rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
chapter = pageUrl.rstrip('/').rsplit('/', 1)[-1].replace('ns-', 'ns1-')
page = imageUrl.rsplit('/', 1)[-1]
return chapter + '_' + page
class Nimona(_ParserScraper): class Nimona(_ParserScraper):

View file

@ -133,6 +133,7 @@ class OriginalLife(_ParserScraper):
firstStripUrl = stripUrl % '2009/06/001' firstStripUrl = stripUrl % '2009/06/001'
imageSearch = '//img[contains(@src, "/originallife/comic/")]' imageSearch = '//img[contains(@src, "/originallife/comic/")]'
prevSearch = '//a[contains(text(), "Previous")]' prevSearch = '//a[contains(text(), "Previous")]'
adult = True
help = 'Index format: yyyy/mm/<your guess>' help = 'Index format: yyyy/mm/<your guess>'
@ -149,6 +150,7 @@ class OutOfPlacers(_WordPressScraper):
url = 'http://www.valsalia.com/' url = 'http://www.valsalia.com/'
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'prologue/01' firstStripUrl = stripUrl % 'prologue/01'
adult = True
class OverCompensating(_BasicScraper): class OverCompensating(_BasicScraper):

View file

@ -67,6 +67,7 @@ class PeanutBerrySundae(_ParserScraper):
'//img[contains(@src, "page")]') '//img[contains(@src, "page")]')
latestSearch = '//a[contains(@href, "peanut-berry-sundae")]' latestSearch = '//a[contains(@href, "peanut-berry-sundae")]'
starter = indirectStarter starter = indirectStarter
adult = True
def getPrevUrl(self, url, data): def getPrevUrl(self, url, data):
# Replace missing navigation links # Replace missing navigation links

View file

@ -19,19 +19,21 @@ class PetiteSymphony(_WPNavi):
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):
return ( return (
cls("knuckleup"), cls('knuckleup'),
cls("sangria"), cls('sangria'),
) )
class ComicsBreak(_WordPressScraper): class ComicsBreak(_WordPressScraper):
def __init__(self, name, archive=None): def __init__(self, name, archive=None, adult=False):
super(ComicsBreak, self).__init__('ComicsBreak/' + name) super(ComicsBreak, self).__init__('ComicsBreak/' + name)
self.url = 'http://%s.comicsbreak.com/' % name.lower() self.url = 'http://%s.comicsbreak.com/' % name.lower()
if archive: if archive:
self.url = 'https://web.archive.org/web/{}/{}'.format( self.url = 'https://web.archive.org/web/{}/{}'.format(
archive, self.url) archive, self.url)
self.endOfLife = True self.endOfLife = True
if adult:
self.adult = adult
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
if self.name == 'ComicsBreak/Djandora': if self.name == 'ComicsBreak/Djandora':
@ -49,5 +51,5 @@ class ComicsBreak(_WordPressScraper):
def getmodules(cls): def getmodules(cls):
return ( return (
cls('Djandora', archive='20170923062433'), cls('Djandora', archive='20170923062433'),
cls("Generation17"), cls('Generation17', adult=True),
) )

View file

@ -138,6 +138,7 @@ class Ryugou(_WPWebcomic):
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = 'ryugou-chapter-1-cover' firstStripUrl = 'ryugou-chapter-1-cover'
starter = bounceStarter starter = bounceStarter
adult = True
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
title = pageUrl.rstrip('/').rsplit('/', 1)[-1] title = pageUrl.rstrip('/').rsplit('/', 1)[-1]