Merge pull request #163 from Techwolfy/upstream-nsfw-tagging
Fix missing NSFW tags on several comics
This commit is contained in:
commit
684ba1f909
10 changed files with 48 additions and 27 deletions
|
@ -352,20 +352,17 @@ class AntiheroForHire(_ParserScraper):
|
|||
imageSearch = '//div[@class="image-wrapper"]//img[not(@class="thumb-image")]'
|
||||
multipleImagesPerStrip = True
|
||||
endOfLife = True
|
||||
archive = []
|
||||
|
||||
def starter(self):
|
||||
# Build list of chapters for navigation
|
||||
page = self.getPage(self.url)
|
||||
archiveLinks = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]')
|
||||
for link in archiveLinks:
|
||||
self.archive.append(link.get('href'))
|
||||
return self.archive[0]
|
||||
self.chapters = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]/@href')
|
||||
return self.chapters[0]
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
# Retrieve previous chapter from list
|
||||
index = self.archive.index(url) + 1
|
||||
return self.archive[index] if index < len(self.archive) else None
|
||||
index = self.chapters.index(url) + 1
|
||||
return self.chapters[index] if index < len(self.chapters) else None
|
||||
|
||||
|
||||
class AppleGeeks(_BasicScraper):
|
||||
|
|
|
@ -123,6 +123,7 @@ class BetterDays(_ParserScraper):
|
|||
firstStripUrl = stripUrl % '2003/04/post-2'
|
||||
imageSearch = '//img[contains(@src, "/betterdays/comic/")]'
|
||||
prevSearch = '//a[contains(text(), "Previous")]'
|
||||
adult = True
|
||||
endOfLife = True
|
||||
help = 'Index format: yyyy/mm/<your guess>'
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ class ComicFury(_ParserScraper):
|
|||
help = 'Index format: n'
|
||||
starter = bounceStarter
|
||||
|
||||
def __init__(self, name, sub, lang=None):
|
||||
def __init__(self, name, sub, lang=None, adult=False):
|
||||
super(ComicFury, self).__init__('ComicFury/' + name)
|
||||
self.prefix = name
|
||||
self.url = 'http://%s.webcomic.ws/comics/' % sub
|
||||
|
@ -57,6 +57,8 @@ class ComicFury(_ParserScraper):
|
|||
self.firstStripUrl = self.stripUrl % '1'
|
||||
if lang:
|
||||
self.lang = lang
|
||||
if adult:
|
||||
self.adult = adult
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
parts = page_url.split('/')
|
||||
|
@ -376,7 +378,7 @@ class ComicFury(_ParserScraper):
|
|||
cls('Droned', 'droned'),
|
||||
cls('DRouggs', 'drouggs'),
|
||||
cls('DrugsAndKisses', 'd-and-k'),
|
||||
cls('Druids', 'druids'),
|
||||
cls('Druids', 'druids', adult=True),
|
||||
cls('DubCity', 'dubcity'),
|
||||
cls('DueEast', 'dueeast'),
|
||||
cls('DuelingHeroes', 'duelingheroes'),
|
||||
|
|
|
@ -95,6 +95,7 @@ class Flipside(_ParserScraper):
|
|||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//img[contains(@src, "comic/")]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
adult = True
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@ class KeenSpot(_ParserScraper):
|
|||
multipleImagesPerStrip = True
|
||||
imageSearch = (
|
||||
'//img[contains(@src, "/comics/")]',
|
||||
# Gene Catlow Alternate
|
||||
'//img[contains(@src, "/altcomics/")]',
|
||||
# Shockwave Darkside
|
||||
'//img[contains(@src, "/comics2D/")]',
|
||||
'//img[contains(@src, "com/shockwave")]',
|
||||
|
@ -34,7 +36,7 @@ class KeenSpot(_ParserScraper):
|
|||
)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
def __init__(self, name, sub, last=None, path='d/%s.html'):
|
||||
def __init__(self, name, sub, last=None, adult=False, path='d/%s.html'):
|
||||
super(KeenSpot, self).__init__('KeenSpot/' + name)
|
||||
self.url = 'http://%s.keenspot.com/' % sub
|
||||
self.stripUrl = self.url + path
|
||||
|
@ -43,12 +45,16 @@ class KeenSpot(_ParserScraper):
|
|||
self.url = self.stripUrl % last
|
||||
self.endOfLife = True
|
||||
|
||||
if adult:
|
||||
self.adult = adult
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
return (
|
||||
# Not on frontpage...
|
||||
cls('Buzzboy', 'buzzboy'),
|
||||
cls('EveryoneLovesAdis', 'adis'),
|
||||
cls('GeneCatlowAlternate', 'genecatlow', last='20170302', adult=True, path='altd/%s.html'),
|
||||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
|
|
|
@ -103,22 +103,30 @@ class Nicky510(_WPNavi):
|
|||
|
||||
|
||||
class Nightshift(_ParserScraper):
|
||||
url = 'http://poecatcomix.com/comic-titles/nightshift/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'nightshift-volume1/ns-chapter-1'
|
||||
imageSearch = '//div[@id="gallery-1"]//img'
|
||||
prevSearch = ('//a[./span[text()="PAST CHAPTER"]]',
|
||||
'//a[./span[text()="LAST CHAPTER"]]')
|
||||
latestSearch = '//a[./img[contains(@src, "Latest-Page")]]'
|
||||
starter = indirectStarter
|
||||
multipleImagesPerStrip = True
|
||||
url = 'https://poecatcomix.com/nightshift-static/'
|
||||
stripUrl = 'https://poecatcomix.com/comic/%s/'
|
||||
firstStripUrl = stripUrl % 'ns1-page-cover'
|
||||
imageSearch = '//div[@class="mangapress-media-img"]/img'
|
||||
prevSearch = '//li[@class="link-prev"]/a'
|
||||
latestSearch = '//li[@class="link-last"]/a/@href'
|
||||
adult = True
|
||||
|
||||
def starter(self):
|
||||
# Build list of chapters for navigation
|
||||
indexPage = self.getPage(self.url)
|
||||
self.chapters = indexPage.xpath('//a[./img[contains(@class, "attachment-large")]]/@href')
|
||||
chapterPage = self.getPage(self.chapters[-1])
|
||||
return chapterPage.xpath(self.latestSearch)[0]
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
# Retrieve previous chapter from list
|
||||
if url in self.chapters:
|
||||
chapterPage = self.getPage(self.chapters[self.chapters.index(url) - 1])
|
||||
return chapterPage.xpath(self.latestSearch)[0]
|
||||
return super(Nightshift, self).getPrevUrl(url, data)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Prepend chapter title to page filenames
|
||||
chapter = pageUrl.rstrip('/').rsplit('/', 1)[-1].replace('ns-', 'ns1-')
|
||||
page = imageUrl.rsplit('/', 1)[-1]
|
||||
return chapter + '_' + page
|
||||
return pageUrl.rstrip('/').rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||
|
||||
|
||||
class Nimona(_ParserScraper):
|
||||
|
|
|
@ -133,6 +133,7 @@ class OriginalLife(_ParserScraper):
|
|||
firstStripUrl = stripUrl % '2009/06/001'
|
||||
imageSearch = '//img[contains(@src, "/originallife/comic/")]'
|
||||
prevSearch = '//a[contains(text(), "Previous")]'
|
||||
adult = True
|
||||
help = 'Index format: yyyy/mm/<your guess>'
|
||||
|
||||
|
||||
|
@ -149,6 +150,7 @@ class OutOfPlacers(_WordPressScraper):
|
|||
url = 'http://www.valsalia.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'prologue/01'
|
||||
adult = True
|
||||
|
||||
|
||||
class OverCompensating(_BasicScraper):
|
||||
|
|
|
@ -67,6 +67,7 @@ class PeanutBerrySundae(_ParserScraper):
|
|||
'//img[contains(@src, "page")]')
|
||||
latestSearch = '//a[contains(@href, "peanut-berry-sundae")]'
|
||||
starter = indirectStarter
|
||||
adult = True
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
# Replace missing navigation links
|
||||
|
|
|
@ -19,19 +19,21 @@ class PetiteSymphony(_WPNavi):
|
|||
@classmethod
|
||||
def getmodules(cls):
|
||||
return (
|
||||
cls("knuckleup"),
|
||||
cls("sangria"),
|
||||
cls('knuckleup'),
|
||||
cls('sangria'),
|
||||
)
|
||||
|
||||
|
||||
class ComicsBreak(_WordPressScraper):
|
||||
def __init__(self, name, archive=None):
|
||||
def __init__(self, name, archive=None, adult=False):
|
||||
super(ComicsBreak, self).__init__('ComicsBreak/' + name)
|
||||
self.url = 'http://%s.comicsbreak.com/' % name.lower()
|
||||
if archive:
|
||||
self.url = 'https://web.archive.org/web/{}/{}'.format(
|
||||
archive, self.url)
|
||||
self.endOfLife = True
|
||||
if adult:
|
||||
self.adult = adult
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
if self.name == 'ComicsBreak/Djandora':
|
||||
|
@ -49,5 +51,5 @@ class ComicsBreak(_WordPressScraper):
|
|||
def getmodules(cls):
|
||||
return (
|
||||
cls('Djandora', archive='20170923062433'),
|
||||
cls("Generation17"),
|
||||
cls('Generation17', adult=True),
|
||||
)
|
||||
|
|
|
@ -138,6 +138,7 @@ class Ryugou(_WPWebcomic):
|
|||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = 'ryugou-chapter-1-cover'
|
||||
starter = bounceStarter
|
||||
adult = True
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
title = pageUrl.rstrip('/').rsplit('/', 1)[-1]
|
||||
|
|
Loading…
Reference in a new issue