Merge pull request #211 from Techwolfy/upstream-new-comics-3
Add 38 comics and Tapastic scraper
This commit is contained in:
commit
9dab501ca9
30 changed files with 541 additions and 300 deletions
|
@ -106,27 +106,6 @@ class AGirlAndHerFed(_ParserScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class AHClub(_WPNaviIn):
|
|
||||||
baseUrl = 'http://rickgriffinstudios.com/'
|
|
||||||
url = baseUrl + 'ah-club/'
|
|
||||||
stripUrl = baseUrl + 'comic-post/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'cover'
|
|
||||||
latestSearch = '//a[contains(@title, "Permanent Link")]'
|
|
||||||
starter = indirectStarter
|
|
||||||
nav = {
|
|
||||||
'ah-club-2-cover': 'ah-club-1-page-24',
|
|
||||||
'ah-club-3-cover': 'ah-club-2-page-28',
|
|
||||||
'ah-club-4-cover': 'ah-club-3-page-22',
|
|
||||||
}
|
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
|
||||||
# Links between chapters
|
|
||||||
url = url.rstrip('/').rsplit('/', 1)[-1]
|
|
||||||
if self.nav and url in self.nav:
|
|
||||||
return self.stripUrl % self.nav[url]
|
|
||||||
return super(AHClub, self).getPrevUrl(url, data)
|
|
||||||
|
|
||||||
|
|
||||||
class AhoiPolloi(_ParserScraper):
|
class AhoiPolloi(_ParserScraper):
|
||||||
url = 'https://ahoipolloi.blogger.de/'
|
url = 'https://ahoipolloi.blogger.de/'
|
||||||
stripUrl = url + '?day=%s'
|
stripUrl = url + '?day=%s'
|
||||||
|
|
|
@ -188,10 +188,13 @@ class CavesAndCritters(_WPWebcomic):
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
class Centralia2050(_WordPressScraper):
|
class Centralia2050(_ParserScraper):
|
||||||
url = 'http://centralia2050.com/'
|
url = 'https://centralia2050.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % 'ch1cover'
|
firstStripUrl = stripUrl % 'ch1cover'
|
||||||
|
imageSearch = '//div[@id="spliced-comic"]//img'
|
||||||
|
prevSearch = '//a[@class="previous-comic"]'
|
||||||
|
nextSearch = '//a[@class="next-comic"]'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
@ -267,9 +270,9 @@ class Cloudscratcher(_ParserScraper):
|
||||||
url = 'http://www.cloudscratcher.com/'
|
url = 'http://www.cloudscratcher.com/'
|
||||||
stripUrl = url + 'comic.php?page=%s'
|
stripUrl = url + 'comic.php?page=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//div[@id="main_content"]//img[contains(@src, "comic")]'
|
imageSearch = '//img[contains(@src, "pages/")]'
|
||||||
prevSearch = '//a[./img[contains(@src, "previous-page")]]'
|
prevSearch = '//a[./img[@alt="Previous Page"]]'
|
||||||
latestSearch = '//a[@alt="Newest_Page"]'
|
latestSearch = '//a[./img[@alt="Comic"]]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
@ -277,7 +280,7 @@ class CollegeCatastrophe(_ParserScraper):
|
||||||
url = 'https://www.tigerknight.com/cc'
|
url = 'https://www.tigerknight.com/cc'
|
||||||
stripUrl = url + '/%s'
|
stripUrl = url + '/%s'
|
||||||
firstStripUrl = stripUrl % '2000-11-10'
|
firstStripUrl = stripUrl % '2000-11-10'
|
||||||
imageSearch = '//img[@class="comic-image"]'
|
imageSearch = '//img[d:class("comic-image")]'
|
||||||
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
|
@ -123,6 +123,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('AgentBishop', 'agentbishop'),
|
cls('AgentBishop', 'agentbishop'),
|
||||||
cls('AHappierKindOfSad', 'ahappierkindofsad'),
|
cls('AHappierKindOfSad', 'ahappierkindofsad'),
|
||||||
cls('AlbinoBrothers', 'albinobros'),
|
cls('AlbinoBrothers', 'albinobros'),
|
||||||
|
cls('Alderwood', 'alderwood'),
|
||||||
cls('AlexanderAndLucasRebooted', 'alexanderandlucas'),
|
cls('AlexanderAndLucasRebooted', 'alexanderandlucas'),
|
||||||
cls('AliaTerra', 'alia-terra'),
|
cls('AliaTerra', 'alia-terra'),
|
||||||
cls('AlienIrony', 'alien-irony'),
|
cls('AlienIrony', 'alien-irony'),
|
||||||
|
@ -336,6 +337,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('DeadFingers', 'deadfingers'),
|
cls('DeadFingers', 'deadfingers'),
|
||||||
cls('DeadRabbitCa', 'afairtrade'),
|
cls('DeadRabbitCa', 'afairtrade'),
|
||||||
cls('DeepBlue', 'deepblue'),
|
cls('DeepBlue', 'deepblue'),
|
||||||
|
cls('DeerMe', 'deerme'),
|
||||||
cls('DefineHero', 'definehero'),
|
cls('DefineHero', 'definehero'),
|
||||||
cls('DELIA', 'delia'),
|
cls('DELIA', 'delia'),
|
||||||
cls('DemasPokmonAdventure', 'nuzlocke-dema'),
|
cls('DemasPokmonAdventure', 'nuzlocke-dema'),
|
||||||
|
@ -462,6 +464,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('GrippsBrain', 'grippsbrain'),
|
cls('GrippsBrain', 'grippsbrain'),
|
||||||
cls('GrokBoop', 'grokboop'),
|
cls('GrokBoop', 'grokboop'),
|
||||||
cls('GrowingTroubles', 'growingtroubles'),
|
cls('GrowingTroubles', 'growingtroubles'),
|
||||||
|
cls('Guardia', 'guardia-tales-of-halgeis'),
|
||||||
cls('GUS', 'gus'),
|
cls('GUS', 'gus'),
|
||||||
cls('HalloweenCameoCaper2012', 'halloween2012'),
|
cls('HalloweenCameoCaper2012', 'halloween2012'),
|
||||||
cls('HalloweenCameoCaper2013', 'halloween2013'),
|
cls('HalloweenCameoCaper2013', 'halloween2013'),
|
||||||
|
@ -575,7 +578,6 @@ class ComicFury(_ParserScraper):
|
||||||
cls('KingdomOfTheDinosaurs', 'dinosaurkingdom'),
|
cls('KingdomOfTheDinosaurs', 'dinosaurkingdom'),
|
||||||
cls('KingdomPrettyCure', 'kingdomprettycure'),
|
cls('KingdomPrettyCure', 'kingdomprettycure'),
|
||||||
cls('KirbyVsShyGuy', 'kvsg'),
|
cls('KirbyVsShyGuy', 'kvsg'),
|
||||||
cls('Kitsune', 'kitsune'),
|
|
||||||
cls('KMLsSticks', 'kmlssticks'),
|
cls('KMLsSticks', 'kmlssticks'),
|
||||||
cls('KnavesEnd', 'knavesend'),
|
cls('KnavesEnd', 'knavesend'),
|
||||||
cls('KnightGuy', 'knightguy'),
|
cls('KnightGuy', 'knightguy'),
|
||||||
|
@ -672,7 +674,6 @@ class ComicFury(_ParserScraper):
|
||||||
cls('MonochromeRainbow', 'monobow'),
|
cls('MonochromeRainbow', 'monobow'),
|
||||||
cls('MonsterBait', 'deadnight'),
|
cls('MonsterBait', 'deadnight'),
|
||||||
cls('MonsterInTheKingdom', 'monster'),
|
cls('MonsterInTheKingdom', 'monster'),
|
||||||
cls('MonsterSoup', 'monstersoup'),
|
|
||||||
cls('MonstersWithBenefits', 'failmonsters'),
|
cls('MonstersWithBenefits', 'failmonsters'),
|
||||||
cls('MonstroniverseAdventures', 'monstroniverse'),
|
cls('MonstroniverseAdventures', 'monstroniverse'),
|
||||||
cls('MoonlitBrew', 'moonlitbrew'),
|
cls('MoonlitBrew', 'moonlitbrew'),
|
||||||
|
@ -800,8 +801,6 @@ class ComicFury(_ParserScraper):
|
||||||
cls('RequiemsGate', 'requiemsgate'),
|
cls('RequiemsGate', 'requiemsgate'),
|
||||||
cls('ReSetArt', 'resetfanarts'),
|
cls('ReSetArt', 'resetfanarts'),
|
||||||
cls('ResidentWeirdo', 'residentweirdo'),
|
cls('ResidentWeirdo', 'residentweirdo'),
|
||||||
cls('ResNullius', 'resnullius'),
|
|
||||||
cls('ResNulliusCS', 'resnulliuscs'),
|
|
||||||
cls('ReturnOfWonderland', 'returnofwonderland'),
|
cls('ReturnOfWonderland', 'returnofwonderland'),
|
||||||
cls('Revive', 'revive'),
|
cls('Revive', 'revive'),
|
||||||
cls('RexAfterDark', 'rexafterdark'),
|
cls('RexAfterDark', 'rexafterdark'),
|
||||||
|
@ -845,6 +844,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('ShakingOffSorceryPL', 'shakingoffsorcery-pl'),
|
cls('ShakingOffSorceryPL', 'shakingoffsorcery-pl'),
|
||||||
cls('ShamanQuest', 'shamanquest'),
|
cls('ShamanQuest', 'shamanquest'),
|
||||||
cls('ShatteredSkies', 'shatteredskies'),
|
cls('ShatteredSkies', 'shatteredskies'),
|
||||||
|
cls('Sharak', 'sharak'),
|
||||||
cls('Shenanigans', 's'),
|
cls('Shenanigans', 's'),
|
||||||
cls('ShenaniganSquares', 'ss-comic'),
|
cls('ShenaniganSquares', 'ss-comic'),
|
||||||
cls('ShikuTheFirstAndFinal', 'shiku'),
|
cls('ShikuTheFirstAndFinal', 'shiku'),
|
||||||
|
@ -856,6 +856,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('SixteenCandlesHuntersAgency', 'sixteencandles'),
|
cls('SixteenCandlesHuntersAgency', 'sixteencandles'),
|
||||||
cls('Skeeter', 'herecomesskeeter'),
|
cls('Skeeter', 'herecomesskeeter'),
|
||||||
cls('Sketchy', 'sketchy'),
|
cls('Sketchy', 'sketchy'),
|
||||||
|
cls('Skylords', 'skylords'),
|
||||||
cls('SlugMan', 'slug-man'),
|
cls('SlugMan', 'slug-man'),
|
||||||
cls('SmallTownValues', 'smalltownvalues'),
|
cls('SmallTownValues', 'smalltownvalues'),
|
||||||
cls('SmitheeZombieHunter', 'smitheezombiehunter'),
|
cls('SmitheeZombieHunter', 'smitheezombiehunter'),
|
||||||
|
@ -1096,6 +1097,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('VampireCatgirlPart2', 'vampirecatgirl2'),
|
cls('VampireCatgirlPart2', 'vampirecatgirl2'),
|
||||||
cls('VeldaGirlDetective', 'veldagirldetective'),
|
cls('VeldaGirlDetective', 'veldagirldetective'),
|
||||||
cls('Verboten', 'verboten'),
|
cls('Verboten', 'verboten'),
|
||||||
|
cls('VHV', 'vhv'),
|
||||||
cls('Victory', 'victoryadventures'),
|
cls('Victory', 'victoryadventures'),
|
||||||
cls('ViewHub', 'viewhub'),
|
cls('ViewHub', 'viewhub'),
|
||||||
cls('ViolentBlue', 'violentblue'),
|
cls('ViolentBlue', 'violentblue'),
|
||||||
|
|
|
@ -84,19 +84,6 @@ class DeepFried(_BasicScraper):
|
||||||
help = 'Index format: none'
|
help = 'Index format: none'
|
||||||
|
|
||||||
|
|
||||||
class DeerMe(_ParserScraper):
|
|
||||||
url = 'http://deerme.net/comics/'
|
|
||||||
stripUrl = url + '%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = ('//img[@id="comicimage"]', '//img[@id="latestcomicimage"]')
|
|
||||||
prevSearch = '//a[@rel="prev"]'
|
|
||||||
nextSearch = '//a[@rel="next"]'
|
|
||||||
starter = bounceStarter
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
|
|
||||||
|
|
||||||
|
|
||||||
class Delve(_WordPressScraper):
|
class Delve(_WordPressScraper):
|
||||||
url = 'https://thisis.delvecomic.com/NewWP/'
|
url = 'https://thisis.delvecomic.com/NewWP/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
|
@ -239,6 +226,13 @@ class DoemainOfOurOwn(_ParserScraper):
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
class DoesNotPlayWellWithOthers(_WPNavi):
|
||||||
|
url = 'http://www.doesnotplaywellwithothers.com/'
|
||||||
|
stripUrl = url + 'comic/%s'
|
||||||
|
firstStripUrl = stripUrl % 'pwc-0001'
|
||||||
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
class DoghouseDiaries(_ParserScraper):
|
class DoghouseDiaries(_ParserScraper):
|
||||||
url = 'http://thedoghousediaries.com/'
|
url = 'http://thedoghousediaries.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
|
|
|
@ -170,12 +170,13 @@ class Erstwhile(_WPNavi):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class Everblue(_ParserScraper):
|
class Everblue(_ComicControlScraper):
|
||||||
url = 'http://www.everblue-comic.com/comic/'
|
url = 'http://www.everblue-comic.com/comic/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//article[@id="comic"]//img'
|
|
||||||
prevSearch = '//a[contains(@class, "prev")]'
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
|
||||||
|
|
||||||
|
|
||||||
class EverybodyLovesEricRaymond(_ParserScraper):
|
class EverybodyLovesEricRaymond(_ParserScraper):
|
||||||
|
|
|
@ -21,13 +21,22 @@ class FalconTwin(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class FalseStart(_ParserScraper):
|
class FalseStart(_ParserScraper):
|
||||||
url = 'https://boneitiscomics.com/falsestart.php'
|
baseUrl = 'https://boneitisindustries.com/'
|
||||||
stripUrl = url + '?pg=%s'
|
url = baseUrl + 'comics/false-start/'
|
||||||
firstStripUrl = stripUrl % '1'
|
stripUrl = baseUrl + 'comic/%s/'
|
||||||
imageSearch = '//div[@class="page"]//img'
|
firstStripUrl = stripUrl % 'false-start-chapter-zero-page-1'
|
||||||
prevSearch = '//a[@id="prev"]'
|
imageSearch = '//div[@id="content"]//img[d:class("size-full")]'
|
||||||
|
prevSearch = '//a[./span[d:class("ticon-chevron-left")]]'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
archivePage = self.getPage(self.url)
|
||||||
|
self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href')
|
||||||
|
return self.archive[-1]
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
return self.archive[self.archive.index(url) - 1]
|
||||||
|
|
||||||
|
|
||||||
class Faneurysm(_WPNaviIn):
|
class Faneurysm(_WPNaviIn):
|
||||||
url = 'http://hijinksensue.com/comic/think-only-tree/'
|
url = 'http://hijinksensue.com/comic/think-only-tree/'
|
||||||
|
@ -144,7 +153,7 @@ class FoxDad(_ParserScraper):
|
||||||
url = 'https://foxdad.com/'
|
url = 'https://foxdad.com/'
|
||||||
stripUrl = url + 'post/%s'
|
stripUrl = url + 'post/%s'
|
||||||
firstStripUrl = stripUrl % '149683014997/some-people-are-just-different-support-the-comic'
|
firstStripUrl = stripUrl % '149683014997/some-people-are-just-different-support-the-comic'
|
||||||
imageSearch = '//figure[@class="photo-hires-item"]//img'
|
imageSearch = ('//figure[@class="photo-hires-item"]//img', '//figure[@class="tmblr-full"]//img')
|
||||||
prevSearch = '//a[@class="previous-button"]'
|
prevSearch = '//a[@class="previous-button"]'
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
|
|
@ -145,19 +145,6 @@ class GrrlPower(_WordPressScraper):
|
||||||
self.session.add_throttle('grrlpowercomic.com', 1.0, 1.5)
|
self.session.add_throttle('grrlpowercomic.com', 1.0, 1.5)
|
||||||
|
|
||||||
|
|
||||||
class Guardia(_ParserScraper):
|
|
||||||
url = 'https://ssp-comics.com/comics/Guardia/'
|
|
||||||
stripUrl = url + '?page=%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = '//img[contains(@src, "comics/Guardia/")]'
|
|
||||||
prevSearch = '//a[./button[@id="prevButton"]]'
|
|
||||||
nextSearch = '//a[./button[@id="nextButton"]]'
|
|
||||||
starter = bounceStarter
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
return pageUrl.rsplit('=', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
|
|
||||||
|
|
||||||
|
|
||||||
class GuildedAge(_WordPressScraper):
|
class GuildedAge(_WordPressScraper):
|
||||||
url = 'http://guildedage.net/'
|
url = 'http://guildedage.net/'
|
||||||
firstStripUrl = url + 'comic/chapter-1-cover/'
|
firstStripUrl = url + 'comic/chapter-1-cover/'
|
||||||
|
|
|
@ -5,12 +5,21 @@
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
|
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
|
||||||
|
|
||||||
|
|
||||||
|
class Hackles(_ParserScraper):
|
||||||
|
url = 'http://hackles.org/'
|
||||||
|
stripUrl = url + 'cgi-bin/archives.pl?request=%s'
|
||||||
|
firstStripUrl = stripUrl % '1'
|
||||||
|
imageSearch = '//img[contains(@src, "strips/")]'
|
||||||
|
prevSearch = '//a[text()="< previous"]'
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class HagarTheHorrible(_BasicScraper):
|
class HagarTheHorrible(_BasicScraper):
|
||||||
url = 'http://www.hagarthehorrible.net/'
|
url = 'http://www.hagarthehorrible.net/'
|
||||||
stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'
|
stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'
|
||||||
|
@ -107,12 +116,6 @@ class HijinksEnsuePhoto(_WPNaviIn):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class Housepets(_WordPressScraper):
|
|
||||||
url = 'http://www.housepetscomic.com/'
|
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = '2008/06/02/when-boredom-strikes'
|
|
||||||
|
|
||||||
|
|
||||||
class HowToBeAWerewolf(_ComicControlScraper):
|
class HowToBeAWerewolf(_ComicControlScraper):
|
||||||
url = 'http://howtobeawerewolf.com/'
|
url = 'http://howtobeawerewolf.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
|
|
|
@ -35,14 +35,6 @@ class IDreamOfAJeanieBottle(_WordPressScraper):
|
||||||
url = 'http://jeaniebottle.com/'
|
url = 'http://jeaniebottle.com/'
|
||||||
|
|
||||||
|
|
||||||
class InsignificantOtters(_WPWebcomic):
|
|
||||||
stripUrl = 'https://www.thedepthscomic.com/i-otters/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'page-1'
|
|
||||||
url = firstStripUrl
|
|
||||||
imageSearch = '//div[contains(@class, "webcomic-media")]//img'
|
|
||||||
starter = indirectStarter
|
|
||||||
|
|
||||||
|
|
||||||
class InternetWebcomic(_WPNavi):
|
class InternetWebcomic(_WPNavi):
|
||||||
url = 'http://www.internet-webcomic.com/'
|
url = 'http://www.internet-webcomic.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
|
|
|
@ -25,6 +25,7 @@ class KemonoCafe(_ParserScraper):
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Strip date from filenames
|
# Strip date from filenames
|
||||||
filename = imageUrl.rsplit('/', 1)[-1]
|
filename = imageUrl.rsplit('/', 1)[-1]
|
||||||
|
if not 'ultrarosa' in pageUrl:
|
||||||
if filename[4] == '-' and filename[7] == '-':
|
if filename[4] == '-' and filename[7] == '-':
|
||||||
filename = filename[10:]
|
filename = filename[10:]
|
||||||
if filename[0] == '-' or filename[0] == '_':
|
if filename[0] == '-' or filename[0] == '_':
|
||||||
|
@ -47,6 +48,7 @@ class KemonoCafe(_ParserScraper):
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
return (
|
return (
|
||||||
cls('AddictiveScience', 'addictivescience', 'page0001'),
|
cls('AddictiveScience', 'addictivescience', 'page0001'),
|
||||||
|
cls('Bethellium', 'bethellium', 'c01p00'),
|
||||||
cls('CaribbeanBlue', 'cb', 'page000', last='page325'),
|
cls('CaribbeanBlue', 'cb', 'page000', last='page325'),
|
||||||
cls('IMew', 'imew', 'imew00', last='imew50'),
|
cls('IMew', 'imew', 'imew00', last='imew50'),
|
||||||
cls('Knighthood', 'knighthood', 'kh0001'),
|
cls('Knighthood', 'knighthood', 'kh0001'),
|
||||||
|
@ -54,7 +56,9 @@ class KemonoCafe(_ParserScraper):
|
||||||
cls('LasLindas', 'laslindas', 'll0001', adult=True),
|
cls('LasLindas', 'laslindas', 'll0001', adult=True),
|
||||||
cls('Paprika', 'paprika', 'page000'),
|
cls('Paprika', 'paprika', 'page000'),
|
||||||
cls('PracticeMakesPerfect', 'pmp', 'title-001'),
|
cls('PracticeMakesPerfect', 'pmp', 'title-001'),
|
||||||
|
cls('PrincessBunny', 'princessbunny', 'pg001'),
|
||||||
cls('Rascals', 'rascals', 'rascals-pg-0', adult=True),
|
cls('Rascals', 'rascals', 'rascals-pg-0', adult=True),
|
||||||
cls('TheEyeOfRamalach', 'theeye', 'theeye-page01'),
|
cls('TheEyeOfRamalach', 'theeye', 'theeye-page01'),
|
||||||
cls('TinaOfTheSouth', 'tots', 'tos-01-01'),
|
cls('TinaOfTheSouth', 'tots', 'tos-01-01'),
|
||||||
|
cls('UltraRosa', 'ultrarosa', 'pg001'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -187,6 +187,12 @@ class MonsieurLeChien(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class MonsterSoup(_WordPressScraper):
|
||||||
|
url = 'https://monstersoupcomic.com/'
|
||||||
|
stripUrl = url + '?comic=%s'
|
||||||
|
firstStripUrl = stripUrl % 'chapter-1-cover'
|
||||||
|
|
||||||
|
|
||||||
class Moonlace(_WPWebcomic):
|
class Moonlace(_WPWebcomic):
|
||||||
url = 'https://moonlace.darkbluecomics.com/'
|
url = 'https://moonlace.darkbluecomics.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
|
|
|
@ -7,63 +7,86 @@ from ..scraper import _ParserScraper
|
||||||
|
|
||||||
|
|
||||||
class MangaDex(_ParserScraper):
|
class MangaDex(_ParserScraper):
|
||||||
imageSearch = '//img[contains(@class, "_images")]/@data-url'
|
|
||||||
prevSearch = '//a[contains(@class, "_prevEpisode")]'
|
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
def __init__(self, name, mangaid):
|
def __init__(self, name, mangaId):
|
||||||
super(MangaDex, self).__init__('MangaDex/' + name)
|
super(MangaDex, self).__init__('MangaDex/' + name)
|
||||||
|
|
||||||
baseUrl = 'https://mangadex.org/api/'
|
baseUrl = 'https://api.mangadex.org/'
|
||||||
self.url = baseUrl + '?id=%s&type=manga' % str(mangaid)
|
self.url = baseUrl + 'manga/%s' % mangaId
|
||||||
self.stripUrl = baseUrl + '?id=%s&type=chapter'
|
self.chaptersUrl = baseUrl + 'manga/%s/feed?translatedLanguage[]=en&order[chapter]=desc&limit=500' % mangaId
|
||||||
|
self.stripUrl = baseUrl + 'chapter/%s'
|
||||||
|
self.cdnUrl = baseUrl + 'at-home/server/%s'
|
||||||
|
self.imageUrl = 'https://uploads.mangadex.org/data/%s/%%s'
|
||||||
|
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Retrieve manga metadata from API
|
# Retrieve manga metadata from API
|
||||||
manga = self.session.get(self.url)
|
mangaData = self.session.get(self.url)
|
||||||
manga.raise_for_status()
|
mangaData.raise_for_status()
|
||||||
mangaData = manga.json()
|
manga = mangaData.json()['data']
|
||||||
|
|
||||||
|
# Retrieve chapter list from API
|
||||||
|
chapterList = []
|
||||||
|
chapterTotal = 1
|
||||||
|
chapterOffset = 0
|
||||||
|
while len(chapterList) < chapterTotal:
|
||||||
|
chapterData = self.session.get(self.chaptersUrl + '&offset=%d' % chapterOffset)
|
||||||
|
chapterData.raise_for_status()
|
||||||
|
chapterBlock = chapterData.json()
|
||||||
|
chapterTotal = chapterBlock['total']
|
||||||
|
chapterOffset = chapterBlock['offset'] + chapterBlock['limit']
|
||||||
|
chapterList.extend(chapterBlock['data'])
|
||||||
|
|
||||||
# Determine if manga is complete and/or adult
|
# Determine if manga is complete and/or adult
|
||||||
if mangaData['manga']['last_chapter'] != '0':
|
if manga['attributes']['lastChapter'] != '0':
|
||||||
for ch in mangaData['chapter']:
|
for chapter in chapterList:
|
||||||
if mangaData['chapter'][ch]['chapter'] == mangaData['manga']['last_chapter']:
|
if chapter['attributes']['chapter'] == manga['attributes']['lastChapter']:
|
||||||
self.endOfLife = True
|
self.endOfLife = True
|
||||||
if mangaData['manga']['hentai'] != '0':
|
break
|
||||||
|
|
||||||
|
if manga['attributes']['contentRating'] != 'safe':
|
||||||
self.adult = True
|
self.adult = True
|
||||||
|
|
||||||
# Prepare chapter list
|
# Prepare chapter list
|
||||||
self.chapters = []
|
self.chapters = []
|
||||||
for ch in mangaData['chapter']:
|
for chapter in chapterList:
|
||||||
if mangaData['chapter'][ch]['lang_code'] != 'gb':
|
if chapter['attributes']['chapter'] == '':
|
||||||
continue
|
continue
|
||||||
if len(self.chapters) < 1:
|
if chapter['attributes']['pages'] == 0:
|
||||||
self.chapters.append(ch)
|
|
||||||
continue
|
continue
|
||||||
if mangaData['chapter'][ch]['chapter'] == mangaData['chapter'][self.chapters[-1]]['chapter']:
|
if len(self.chapters) >= 1 and chapter['attributes']['chapter'] == self.chapters[-1]['attributes']['chapter']:
|
||||||
continue
|
continue
|
||||||
if mangaData['chapter'][ch]['chapter'] == '':
|
self.chapters.append(chapter)
|
||||||
continue
|
|
||||||
self.chapters.append(ch)
|
|
||||||
self.chapters.reverse()
|
self.chapters.reverse()
|
||||||
|
|
||||||
# Find first and last chapter
|
# Find first and last chapter
|
||||||
self.firstStripUrl = self.stripUrl % self.chapters[0]
|
self.firstStripUrl = self.stripUrl % self.chapters[0]['id']
|
||||||
return self.stripUrl % self.chapters[-1]
|
return self.stripUrl % self.chapters[-1]['id']
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
chapter = url.replace('&type=chapter', '').rsplit('=', 1)[-1]
|
# Determine previous chapter ID from cached list
|
||||||
return self.stripUrl % self.chapters[self.chapters.index(chapter) - 1]
|
chapterId = url.rsplit('/', 1)[-1]
|
||||||
|
chapter = list(filter(lambda c: c['id'] == chapterId, self.chapters))
|
||||||
|
if len(chapter) == 0:
|
||||||
|
return None
|
||||||
|
return self.stripUrl % self.chapters[self.chapters.index(chapter[0]) - 1]['id']
|
||||||
|
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
# Retrieve chapter metadata from API
|
# Retrieve chapter metadata from API
|
||||||
chapterData = json.loads(data.text_content())
|
chapterData = json.loads(data.text_content())
|
||||||
self.chapter = chapterData['chapter']
|
self.chapter = chapterData['data']
|
||||||
|
cdnData = self.session.get(self.cdnUrl % self.chapter['id'])
|
||||||
|
cdnData.raise_for_status()
|
||||||
|
cdnBlock = cdnData.json()
|
||||||
|
|
||||||
# Save link order for position-based filenames
|
# Save link order for position-based filenames
|
||||||
imageUrl = chapterData['server'] + chapterData['hash'] + '/%s'
|
imageUrl = self.imageUrl % cdnBlock['chapter']['hash']
|
||||||
self.imageUrls = [imageUrl % page for page in chapterData['page_array']]
|
self.imageUrls = [imageUrl % page for page in cdnBlock['chapter']['data']]
|
||||||
return self.imageUrls
|
return self.imageUrls
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Construct filename from episode number and page index in array
|
# Construct filename from episode number and page index in array
|
||||||
chapterNum = self.chapter
|
chapterNum = self.chapter['attributes']['chapter'] if self.chapter['attributes']['chapter'] != None else 0
|
||||||
pageNum = self.imageUrls.index(imageUrl)
|
pageNum = self.imageUrls.index(imageUrl)
|
||||||
pageExt = imageUrl.rsplit('.')[-1]
|
pageExt = imageUrl.rsplit('.')[-1]
|
||||||
return '%s-%02d.%s' % (chapterNum, pageNum, pageExt)
|
return '%s-%02d.%s' % (chapterNum, pageNum, pageExt)
|
||||||
|
@ -71,44 +94,58 @@ class MangaDex(_ParserScraper):
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
return (
|
return (
|
||||||
cls('AttackonTitan', 429),
|
cls('AttackOnTitan', '304ceac3-8cdb-4fe7-acf7-2b6ff7a60613'),
|
||||||
cls('Beastars', 20523),
|
cls('Beastars', 'f5e3baad-3cd4-427c-a2ec-ad7d776b370d'),
|
||||||
cls('BokuNoKokoroNoYabaiYatsu', 23811),
|
cls('BokuNoKokoroNoYabaiYatsu', '3df1a9a3-a1be-47a3-9e90-9b3e55b1d0ac'),
|
||||||
cls('DeliciousinDungeon', 13871),
|
cls('CheerfulAmnesia', 'f9448f90-c068-4b6a-8c85-03d739aef255'),
|
||||||
cls('DragonDrive', 5165),
|
cls('DoChokkyuuKareshiXKanojo', 'efb62763-c940-4495-aba5-69c192a999a4'),
|
||||||
cls('FuguushokuKajishiDakedoSaikyouDesu', 56319),
|
cls('DeliciousinDungeon', 'd90ea6cb-7bc3-4d80-8af0-28557e6c4e17'),
|
||||||
cls('GanbareDoukiChan', 46585),
|
cls('DragonDrive', '5c06ae70-b5cf-431a-bcd5-262a411de527'),
|
||||||
cls('HangingOutWithAGamerGirl', 42490),
|
cls('FuguushokuKajishiDakedoSaikyouDesu', '17b3b648-fd89-4a69-9a42-6068ffbfa7a7'),
|
||||||
cls('HoriMiya', 6770),
|
cls('GanbareDoukiChan', '190616bc-7da6-45fd-abd4-dd2ca656c183'),
|
||||||
cls('HowToOpenATriangularRiceball', 19305),
|
cls('HangingOutWithAGamerGirl', 'de9e3b62-eac5-4c0a-917d-ffccad694381'),
|
||||||
cls('InterspeciesReviewers', 20796),
|
cls('HoriMiya', 'a25e46ec-30f7-4db6-89df-cacbc1d9a900'),
|
||||||
cls('JahySamaWaKujikenai', 22369),
|
cls('HowToOpenATriangularRiceball', '6ebd90ce-d5e8-49c0-a4bc-e02e0f8ecb93'),
|
||||||
cls('JingaiNoYomeToIchaIchaSuru', 22651),
|
cls('HunterXHunter', 'db692d58-4b13-4174-ae8c-30c515c0689c'),
|
||||||
cls('KawaiiJoushiWoKomarasetai', 17910),
|
cls('IchaichasuruToOkaneGaWaichauFutariNoHanashi', '8eaaec7d-7aa7-490e-8d52-5a3d0a28e78b'),
|
||||||
cls('KanojoOkarishimasu', 22151),
|
cls('InterspeciesReviewers', '1b2fddf9-1385-4f3c-b37a-cf86a9428b1a'),
|
||||||
cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', 33797),
|
cls('JahySamaWaKujikenai', '2f4e5f5b-d930-4266-8c8a-c4cf9a81e51f'),
|
||||||
cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', 25495),
|
cls('JingaiNoYomeToIchaIchaSuru', '809d2493-df3c-4e72-a57e-3e0026cae9fb'),
|
||||||
cls('ModernMoGal', 30308),
|
cls('KaetteKudasaiAkutsuSan', '737a846b-2e67-4d63-9f7e-f54b3beebac4'),
|
||||||
cls('MyTinySenpaiFromWork', 43610),
|
cls('KawaiiJoushiWoKomarasetai', '23b7cc7a-df89-4049-af28-1fa78f88713e'),
|
||||||
cls('OMaidensinYourSavageSeason', 22030),
|
cls('KanojoOkarishimasu', '32fdfe9b-6e11-4a13-9e36-dcd8ea77b4e4'),
|
||||||
cls('OokamiShounenWaKyouMoUsoOKasaneru', 14569),
|
cls('KoiToUtatane', 'f7d40a27-e289-45b3-9c68-d1cb251897e6'),
|
||||||
cls('OokamiToKoshinryou', 1168),
|
cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', '58bc83a0-1808-484e-88b9-17e167469e23'),
|
||||||
cls('OtomeYoukaiZakuro', 4533),
|
cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', '55ace2fb-e157-4d76-9e72-67c6bd762a39'),
|
||||||
cls('OversimplifiedSCP', 32834),
|
cls('ModernMoGal', 'b1953f80-36f7-492c-b0f8-e9dd0ad01752'),
|
||||||
cls('PashiriNaBokuToKoisuruBanchouSan', 25862),
|
cls('MyTinySenpaiFromWork', '28ed63af-61f8-43af-bac3-762030c72963'),
|
||||||
cls('PleaseDontBullyMeNagatoro', 22631),
|
cls('OMaidensinYourSavageSeason', 'c4613b7d-7a6e-48f9-82f0-bce3dd33383a'),
|
||||||
cls('PleaseDontBullyMeNagatoroComicAnthology', 31004),
|
cls('OokamiShounenWaKyouMoUsoOKasaneru', '5e77d9e2-2e44-431a-a995-5fefd411e55e'),
|
||||||
cls('PleaseTellMeGalkochan', 12702),
|
cls('OokamiToKoshinryou', 'de900fd3-c94c-4148-bbcb-ca56eaeb57a4'),
|
||||||
cls('SaekiSanWaNemutteru', 28834),
|
cls('OtomeYoukaiZakuro', 'c1fa97be-0f1f-4686-84bc-806881c97d53'),
|
||||||
cls('SenpaiGaUzaiKouhaiNoHanashi', 23825),
|
cls('OversimplifiedSCP', 'e911fe33-a9b3-43dc-9eb7-f5ee081a6dc8'),
|
||||||
cls('SewayakiKitsuneNoSenkoSan', 22723),
|
cls('PashiriNaBokuToKoisuruBanchouSan', '838e5b3a-51c8-44cf-b6e2-68193416f6fe'),
|
||||||
cls('SousouNoFrieren', 48045),
|
cls('PleaseDontBullyMeNagatoro', 'd86cf65b-5f6c-437d-a0af-19a31f94ec55'),
|
||||||
cls('SwordArtOnline', 1360),
|
cls('PleaseDontBullyMeNagatoroComicAnthology', '2a4bc9ec-2d70-428a-8b46-27f6218ed267'),
|
||||||
cls('SwordArtOnlineProgressive', 9604),
|
cls('PleaseTellMeGalkochan', '7a2f2f6b-a6a6-4149-879b-3fc2f6916549'),
|
||||||
cls('TamenDeGushi', 13939),
|
cls('RebuildWorld', '99182618-ae92-4aec-a5df-518659b7b613'),
|
||||||
cls('TheWolfAndRedRidingHood', 31079),
|
cls('SaekiSanWaNemutteru', 'd9aecdab-8aef-4b90-98d5-32e86faffb28'),
|
||||||
cls('TomoChanWaOnnanoko', 15722),
|
cls('SeijoSamaIieToorisugariNoMamonotsukaiDesu', 'd4c40e73-251a-4bcb-a5a6-1edeec1e00e7'),
|
||||||
cls('TonikakuKawaii', 23439),
|
cls('SenpaiGaUzaiKouhaiNoHanashi', 'af38f328-8df1-4b4c-a272-e737625c3ddc'),
|
||||||
cls('YotsubaAnd', 311),
|
cls('SewayakiKitsuneNoSenkoSan', 'c26269c7-0f5d-4966-8cd5-b79acb86fb7a'),
|
||||||
cls('YuYuHakusho', 1738),
|
cls('ShinNoJitsuryokuWaGirigiriMadeKakushiteIyouToOmou', '22fda941-e603-4601-a536-c3ad6d004ba8'),
|
||||||
|
cls('SoloLeveling', '32d76d19-8a05-4db0-9fc2-e0b0648fe9d0'),
|
||||||
|
cls('SousouNoFrieren', 'b0b721ff-c388-4486-aa0f-c2b0bb321512'),
|
||||||
|
cls('SwordArtOnline', '3dd0b814-23f4-4342-b75b-f206598534f6'),
|
||||||
|
cls('SwordArtOnlineProgressive', '22ea3f54-11e4-4932-a527-89d63d3a62d9'),
|
||||||
|
cls('TadokoroSan', '8ffbfa2f-23fa-4490-848e-942581a4d873'),
|
||||||
|
cls('TamenDeGushi', '3f1453fb-9dac-4aca-a2ea-69613856c952'),
|
||||||
|
cls('TenseiShitaraSlimeDattaKen', 'e78a489b-6632-4d61-b00b-5206f5b8b22b'),
|
||||||
|
cls('TheNewGate', 'b41bef1e-7df9-4255-bd82-ecf570fec566'),
|
||||||
|
cls('TheWolfAndRedRidingHood', 'a7d1283b-ed38-4659-b8bc-47bfca5ccb8a'),
|
||||||
|
cls('TomoChanWaOnnanoko', '76ee7069-23b4-493c-bc44-34ccbf3051a8'),
|
||||||
|
cls('TonikakuKawaii', '30f3ac69-21b6-45ad-a110-d011b7aaadaa'),
|
||||||
|
cls('UramikoiKoiUramikoi', '009b6788-48f3-4e78-975c-097f54def7ab'),
|
||||||
|
cls('YotsubaAnd', '58be6aa6-06cb-4ca5-bd20-f1392ce451fb'),
|
||||||
|
cls('YuYuHakusho', '44a5cbe1-0204-4cc7-a1ff-0fda2ac004b6'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -152,7 +152,7 @@ class NineToNine(_ParserScraper):
|
||||||
url = 'https://www.tigerknight.com/99'
|
url = 'https://www.tigerknight.com/99'
|
||||||
stripUrl = url + '/%s'
|
stripUrl = url + '/%s'
|
||||||
firstStripUrl = stripUrl % '2014-01-01'
|
firstStripUrl = stripUrl % '2014-01-01'
|
||||||
imageSearch = '//img[@class="comic-image"]'
|
imageSearch = '//img[d:class("comic-image")]'
|
||||||
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
|
|
@ -617,12 +617,15 @@ class Removed(Scraper):
|
||||||
cls('ComicFury/GreenerGrass'),
|
cls('ComicFury/GreenerGrass'),
|
||||||
cls('ComicFury/HelloWanderingStar'),
|
cls('ComicFury/HelloWanderingStar'),
|
||||||
cls('ComicFury/Hodgemosh'),
|
cls('ComicFury/Hodgemosh'),
|
||||||
|
cls('ComicFury/Kitsune'),
|
||||||
cls('ComicFury/LaszloAndEdgar'),
|
cls('ComicFury/LaszloAndEdgar'),
|
||||||
cls('ComicFury/MegamanComic'),
|
cls('ComicFury/MegamanComic'),
|
||||||
cls('ComicFury/PatchworkPeople'),
|
cls('ComicFury/PatchworkPeople'),
|
||||||
cls('ComicFury/PornographyInFiveActs'),
|
cls('ComicFury/PornographyInFiveActs'),
|
||||||
cls('ComicFury/PoussireDeFe'),
|
cls('ComicFury/PoussireDeFe'),
|
||||||
cls('ComicFury/RED'),
|
cls('ComicFury/RED'),
|
||||||
|
cls('ComicFury/ResNullius'),
|
||||||
|
cls('ComicFury/ResNulliusCS'),
|
||||||
cls('ComicFury/Seed'),
|
cls('ComicFury/Seed'),
|
||||||
cls('ComicFury/TheAcryden'),
|
cls('ComicFury/TheAcryden'),
|
||||||
cls('ComicFury/TheHourlyComic'),
|
cls('ComicFury/TheHourlyComic'),
|
||||||
|
@ -955,6 +958,7 @@ class Removed(Scraper):
|
||||||
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
||||||
cls('MangaDex/SPYxFAMILY', 'legal'),
|
cls('MangaDex/SPYxFAMILY', 'legal'),
|
||||||
cls('Ryugou'),
|
cls('Ryugou'),
|
||||||
|
cls('SeelPeel'),
|
||||||
cls('SmackJeeves/20TimesKirby'),
|
cls('SmackJeeves/20TimesKirby'),
|
||||||
cls('SmackJeeves/2Kingdoms'),
|
cls('SmackJeeves/2Kingdoms'),
|
||||||
cls('SmackJeeves/355Days'),
|
cls('SmackJeeves/355Days'),
|
||||||
|
@ -1521,11 +1525,16 @@ class Removed(Scraper):
|
||||||
cls('Shivae/CafeAnime'),
|
cls('Shivae/CafeAnime'),
|
||||||
cls('Shivae/Extras'),
|
cls('Shivae/Extras'),
|
||||||
cls('SnafuComics/Titan'),
|
cls('SnafuComics/Titan'),
|
||||||
|
cls('StudioKhimera/Eorah', 'mov'),
|
||||||
cls('StuffNoOneToldMe'),
|
cls('StuffNoOneToldMe'),
|
||||||
|
cls('TaleOfTenThousand'),
|
||||||
cls('TheCyantianChronicles/CookieCaper'),
|
cls('TheCyantianChronicles/CookieCaper'),
|
||||||
cls('TheCyantianChronicles/Pawprints'),
|
cls('TheCyantianChronicles/Pawprints'),
|
||||||
|
cls('VGCats/Adventure'),
|
||||||
|
cls('VGCats/Super'),
|
||||||
cls('VictimsOfTheSystem'),
|
cls('VictimsOfTheSystem'),
|
||||||
cls('WebDesignerCOTW'),
|
cls('WebDesignerCOTW'),
|
||||||
|
cls('WintersLight'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1560,6 +1569,7 @@ class Renamed(Scraper):
|
||||||
cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'),
|
cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'),
|
||||||
cls('ComicFury/ICanSeeYourFeels', 'ComicFury/SeeYourFeels'),
|
cls('ComicFury/ICanSeeYourFeels', 'ComicFury/SeeYourFeels'),
|
||||||
cls('ComicFury/MAGISAupdatesMonWedFri', 'ComicFury/MAGISAPARASAYOupdatesMonFri'),
|
cls('ComicFury/MAGISAupdatesMonWedFri', 'ComicFury/MAGISAPARASAYOupdatesMonFri'),
|
||||||
|
cls('ComicFury/MonsterSoup', 'MonsterSoup'),
|
||||||
cls('ComicFury/OopsComicAdventure', 'OopsComicAdventure'),
|
cls('ComicFury/OopsComicAdventure', 'OopsComicAdventure'),
|
||||||
cls('ComicFury/ThomasAndZachary', 'ComicFury/ThomasAndZacharyArchives'),
|
cls('ComicFury/ThomasAndZachary', 'ComicFury/ThomasAndZacharyArchives'),
|
||||||
cls('ComicGenesis/TheLounge', 'KeenSpot/TheLounge'),
|
cls('ComicGenesis/TheLounge', 'KeenSpot/TheLounge'),
|
||||||
|
@ -1601,18 +1611,24 @@ class Renamed(Scraper):
|
||||||
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
|
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
|
||||||
|
|
||||||
# Renamed in 3.0
|
# Renamed in 3.0
|
||||||
|
cls('AHClub', 'RickGriffinStudios/AHClub'),
|
||||||
cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'),
|
cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'),
|
||||||
|
cls('DeerMe', 'ComicFury/DeerMe'),
|
||||||
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
|
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
|
||||||
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
|
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
|
||||||
cls('GoComics/Owlturd', 'GoComics/ShenComix'),
|
cls('GoComics/Owlturd', 'GoComics/ShenComix'),
|
||||||
cls('GoComics/PeanutsEnEspanol', 'GoComics/SnoopyEnEspanol'),
|
cls('GoComics/PeanutsEnEspanol', 'GoComics/SnoopyEnEspanol'),
|
||||||
cls('GoComics/RipleysBelieveItOrNotSpanish', 'GoComics/RipleysAunqueUstedNoLoCrea'),
|
cls('GoComics/RipleysBelieveItOrNotSpanish', 'GoComics/RipleysAunqueUstedNoLoCrea'),
|
||||||
cls('GoComics/WebcomicName', 'WebcomicName'),
|
cls('GoComics/WebcomicName', 'WebcomicName'),
|
||||||
|
cls('Guardia', 'ComicFury/Guardia'),
|
||||||
cls('Shivae/BlackRose', 'BlackRose'),
|
cls('Shivae/BlackRose', 'BlackRose'),
|
||||||
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
|
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
|
||||||
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
|
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
|
||||||
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
|
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
|
||||||
cls('SmackJeeves/GrowingTroubles', 'ComicFury/GrowingTroubles'),
|
cls('SmackJeeves/GrowingTroubles', 'ComicFury/GrowingTroubles'),
|
||||||
cls('SmackJeeves/TheRealmOfKaerwyn', 'ComicFury/TheRealmOfKaerwyn'),
|
cls('SmackJeeves/TheRealmOfKaerwyn', 'ComicFury/TheRealmOfKaerwyn'),
|
||||||
|
cls('SoloLeveling', 'MangaDex/SoloLeveling'),
|
||||||
cls('StudioKhimera/Draconia', 'Draconia'),
|
cls('StudioKhimera/Draconia', 'Draconia'),
|
||||||
|
cls('TracesOfThePast', 'RickGriffinStudios/TracesOfThePast'),
|
||||||
|
cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -33,8 +33,9 @@ class ProjectFuture(_ParserScraper):
|
||||||
cls('Emily', 'emily', '01-00'),
|
cls('Emily', 'emily', '01-00'),
|
||||||
cls('FishingTrip', 'fishing', '01-00'),
|
cls('FishingTrip', 'fishing', '01-00'),
|
||||||
cls('HeadsYouLose', 'heads', '00-01', last='07-12'),
|
cls('HeadsYouLose', 'heads', '00-01', last='07-12'),
|
||||||
|
cls('IPanther', 'panther', '00'),
|
||||||
cls('NiallsStory', 'niall', '00'),
|
cls('NiallsStory', 'niall', '00'),
|
||||||
cls('ProjectFuture', 'strip', '0'),
|
cls('ProjectFuture', 'strip', '0', last='664'),
|
||||||
cls('RedValentine', 'redvalentine', '1', last='6'),
|
cls('RedValentine', 'redvalentine', '1', last='6'),
|
||||||
cls('ShortStories', 'shorts', '01-00'),
|
cls('ShortStories', 'shorts', '01-00'),
|
||||||
cls('StrangeBedfellows', 'bedfellows', '1', last='6'),
|
cls('StrangeBedfellows', 'bedfellows', '1', last='6'),
|
||||||
|
@ -46,4 +47,5 @@ class ProjectFuture(_ParserScraper):
|
||||||
cls('TheSierraChronicles', 'sierra', '0', last='29'),
|
cls('TheSierraChronicles', 'sierra', '0', last='29'),
|
||||||
cls('TheTuppenyMan', 'tuppenny', '00', last='16'),
|
cls('TheTuppenyMan', 'tuppenny', '00', last='16'),
|
||||||
cls('TurningANewPage', 'azrael', '1', last='54'),
|
cls('TurningANewPage', 'azrael', '1', last='54'),
|
||||||
|
cls('Xerian', 'xerian', '01-00'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -116,9 +116,8 @@ class Requiem(_WordPressScraper):
|
||||||
class Replay(_ParserScraper):
|
class Replay(_ParserScraper):
|
||||||
url = 'http://replaycomic.com/'
|
url = 'http://replaycomic.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
url = stripUrl % 'trying-it-out'
|
|
||||||
firstStripUrl = stripUrl % 'red-desert'
|
firstStripUrl = stripUrl % 'red-desert'
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img[@alt]'
|
||||||
prevSearch = '//a[contains(@class, "comic-nav-previous")]'
|
prevSearch = '//a[contains(@class, "comic-nav-previous")]'
|
||||||
nextSearch = '//a[contains(@class, "comic-nav-next")]'
|
nextSearch = '//a[contains(@class, "comic-nav-next")]'
|
||||||
|
|
||||||
|
|
59
dosagelib/plugins/rickgriffinstudios.py
Normal file
59
dosagelib/plugins/rickgriffinstudios.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2022 Daniel Ring
|
||||||
|
from ..helpers import indirectStarter
|
||||||
|
from .common import _WordPressScraper, _WPNaviIn
|
||||||
|
|
||||||
|
|
||||||
|
class Housepets(_WordPressScraper):
|
||||||
|
url = 'http://www.housepetscomic.com/'
|
||||||
|
stripUrl = url + 'comic/%s/'
|
||||||
|
firstStripUrl = '2008/06/02/when-boredom-strikes'
|
||||||
|
|
||||||
|
|
||||||
|
class RickGriffinStudios(_WPNaviIn):
|
||||||
|
baseUrl = 'http://rickgriffinstudios.com/'
|
||||||
|
stripUrl = baseUrl + 'comic-post/%s/'
|
||||||
|
latestSearch = '//a[contains(@title, "Permanent Link")]'
|
||||||
|
starter = indirectStarter
|
||||||
|
nav = None
|
||||||
|
|
||||||
|
def __init__(self, name, sub, first, last=None, adult=False, nav=None):
|
||||||
|
super().__init__('RickGriffinStudios/' + name)
|
||||||
|
self.url = self.baseUrl + sub + '/'
|
||||||
|
self.firstStripUrl = self.stripUrl % first
|
||||||
|
|
||||||
|
if last:
|
||||||
|
self.url = self.stripUrl % last
|
||||||
|
self.starter = super(RickGriffinStudios, self).starter
|
||||||
|
self.endOfLife = True
|
||||||
|
|
||||||
|
if adult:
|
||||||
|
self.latestSearch = '//a[contains(@title, "NSFW")]'
|
||||||
|
self.adult = True
|
||||||
|
|
||||||
|
if nav:
|
||||||
|
self.nav = nav
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
# Links between chapters
|
||||||
|
url = url.rstrip('/').rsplit('/', 1)[-1]
|
||||||
|
if self.nav and url in self.nav:
|
||||||
|
return self.stripUrl % self.nav[url]
|
||||||
|
return super(RickGriffinStudios, self).getPrevUrl(url, data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getmodules(cls):
|
||||||
|
return (
|
||||||
|
cls('AHClub', 'ah-club', 'cover', nav={
|
||||||
|
'ah-club-2-cover': 'ah-club-1-page-24',
|
||||||
|
'ah-club-3-cover': 'ah-club-2-page-28',
|
||||||
|
'ah-club-4-cover': 'ah-club-3-page-22',
|
||||||
|
'ah-club-5-cover': 'ah-club-4-page-24'
|
||||||
|
}),
|
||||||
|
cls('HayvenCelestia', 'hayven-celestia', 'skinchange-p1'),
|
||||||
|
cls('TheStoryboard', 'the-storyboard', 'the-storyboard-001'),
|
||||||
|
cls('TracesOfThePast', 'in-the-new-age', 'totp-page-1'),
|
||||||
|
cls('TracesOfThePastNSFW', 'in-the-new-age', 'totp-page-1-nsfw', adult=True),
|
||||||
|
cls('ZootopiaNightTerrors', 'zootopia-night-terrors', 'zootopia-night-terrors-p1', 'zootopia-night-terrors-p7'),
|
||||||
|
)
|
|
@ -156,13 +156,6 @@ class Science(_ParserScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class SeelPeel(_WPNaviIn):
|
|
||||||
url = 'https://seelpeel.com/'
|
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'seelpeel-goes-live'
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
|
|
||||||
|
|
||||||
class SequentialArt(_ParserScraper):
|
class SequentialArt(_ParserScraper):
|
||||||
url = 'https://www.collectedcurios.com/sequentialart.php'
|
url = 'https://www.collectedcurios.com/sequentialart.php'
|
||||||
stripUrl = url + '?s=%s'
|
stripUrl = url + '?s=%s'
|
||||||
|
@ -216,6 +209,12 @@ class Sheldon(_BasicScraper):
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class Shifters(_WPNavi):
|
||||||
|
url = 'http://shiftersonline.com/'
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
firstStripUrl = stripUrl % 'shifters-redux-promo'
|
||||||
|
|
||||||
|
|
||||||
class ShipInABottle(_WordPressScraper):
|
class ShipInABottle(_WordPressScraper):
|
||||||
url = 'http://shipinbottle.pepsaga.com/'
|
url = 'http://shipinbottle.pepsaga.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
|
@ -356,66 +355,6 @@ class SodiumEyes(_WordPressScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class SoloLeveling(_ParserScraper):
|
|
||||||
url = 'https://w3.sololeveling.net/'
|
|
||||||
stripUrl = url + 'manga/solo-leveling-chapter-%s/'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = '//div[@class="img_container"]//img'
|
|
||||||
prevSearch = '//a[@rel="prev"]'
|
|
||||||
latestSearch = '//table[@class="chap_tab"]//a'
|
|
||||||
starter = indirectStarter
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
imageUrlFixes = {
|
|
||||||
'94-0_5dd574efda419/28.': '94-0_5dd574efda419/28a.',
|
|
||||||
'92-0_5dc2fcb9ed562/22.': '92-0_5dc2fcb9ed562/22s.',
|
|
||||||
'91-0_5db9b881ac2f0/20k.': '91-0_5db9b881ac2f0/20l.',
|
|
||||||
'91-0_5db9b881ac2f0/23.': '91-0_5db9b881ac2f0/23a.',
|
|
||||||
'90-0_5db08467ca2b1/07.': '90-0_5db08467ca2b1/07a.',
|
|
||||||
'90-0_5db08467ca2b1/09.': '90-0_5db08467ca2b1/09a.',
|
|
||||||
'90-0_5db08467ca2b1/13.': '90-0_5db08467ca2b1/13a.',
|
|
||||||
'90-0_5db08467ca2b1/14.': '90-0_5db08467ca2b1/14a.',
|
|
||||||
'90-0_5db08467ca2b1/21.': '90-0_5db08467ca2b1/21a.',
|
|
||||||
'90-0_5db08467ca2b1/22.': '90-0_5db08467ca2b1/22a.',
|
|
||||||
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
|
|
||||||
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
|
|
||||||
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
|
|
||||||
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
|
|
||||||
}
|
|
||||||
|
|
||||||
def imageUrlModifier(self, imageUrl, data):
|
|
||||||
if 'url=' in imageUrl:
|
|
||||||
imageUrl = imageUrl.split('url=')[1].split('&')[0]
|
|
||||||
for fix in self.imageUrlFixes:
|
|
||||||
imageUrl = imageUrl.replace(fix, self.imageUrlFixes[fix])
|
|
||||||
return imageUrl
|
|
||||||
|
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
|
||||||
# Save link order for position-based filenames
|
|
||||||
self.imageUrls = super(SoloLeveling, self).fetchUrls(url, data, urlSearch)
|
|
||||||
self.imageUrls = [self.imageUrlModifier(x, data) for x in self.imageUrls]
|
|
||||||
return self.imageUrls
|
|
||||||
|
|
||||||
def getPage(self, url):
|
|
||||||
try:
|
|
||||||
return super().getPage(url)
|
|
||||||
except HTTPError as e:
|
|
||||||
# CloudFlare WAF
|
|
||||||
if e.response.status_code == 403 and '1020' in e.response.text:
|
|
||||||
self.geoblocked()
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
|
||||||
return self.stripUrl % str(int(url.strip('/').rsplit('-', 1)[-1]) - 1)
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
# Construct filename from episode number and image position on page
|
|
||||||
episodeNum = pageUrl.strip('/').rsplit('-', 1)[-1]
|
|
||||||
imageNum = self.imageUrls.index(imageUrl)
|
|
||||||
imageExt = imageUrl.rsplit('.', 1)[-1]
|
|
||||||
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
|
|
||||||
|
|
||||||
|
|
||||||
class SomethingPositive(_ParserScraper):
|
class SomethingPositive(_ParserScraper):
|
||||||
url = 'https://www.somethingpositive.net/'
|
url = 'https://www.somethingpositive.net/'
|
||||||
stripUrl = url + 'sp%s.shtml'
|
stripUrl = url + 'sp%s.shtml'
|
||||||
|
@ -660,6 +599,19 @@ class StrongFemaleProtagonist(_ParserScraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StupidFox(_ParserScraper):
|
||||||
|
url = 'http://stupidfox.net/'
|
||||||
|
stripUrl = url + '%s'
|
||||||
|
firstStripUrl = stripUrl % 'hello'
|
||||||
|
imageSearch = '//div[@class="comicmid"]//img'
|
||||||
|
prevSearch = '//a[@accesskey="p"]'
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
page = self.getPage(pageUrl)
|
||||||
|
title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
|
||||||
|
return title + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
class SuburbanJungle(_ParserScraper):
|
class SuburbanJungle(_ParserScraper):
|
||||||
url = 'http://suburbanjungleclassic.com/'
|
url = 'http://suburbanjungleclassic.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
|
@ -675,8 +627,9 @@ class SuburbanJungleRoughHousing(_WordPressScraper):
|
||||||
|
|
||||||
|
|
||||||
class Supercell(_ParserScraper):
|
class Supercell(_ParserScraper):
|
||||||
url = 'https://www.supercellcomic.com/'
|
baseUrl = 'https://www.supercellcomic.com/'
|
||||||
stripUrl = url + 'pages/%s.html'
|
url = baseUrl + 'latest.html'
|
||||||
|
stripUrl = baseUrl + 'pages/%s.html'
|
||||||
firstStripUrl = stripUrl % '0001'
|
firstStripUrl = stripUrl % '0001'
|
||||||
imageSearch = '//img[@class="comicStretch"]'
|
imageSearch = '//img[@class="comicStretch"]'
|
||||||
prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]'
|
prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]'
|
||||||
|
@ -707,6 +660,6 @@ class SwordsAndSausages(_ParserScraper):
|
||||||
url = 'https://www.tigerknight.com/ss'
|
url = 'https://www.tigerknight.com/ss'
|
||||||
stripUrl = url + '/%s'
|
stripUrl = url + '/%s'
|
||||||
firstStripUrl = stripUrl % '1-1'
|
firstStripUrl = stripUrl % '1-1'
|
||||||
imageSearch = '//img[@class="comic-image"]'
|
imageSearch = '//img[d:class("comic-image")]'
|
||||||
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
prevSearch = '//a[./span[contains(text(), "Previous")]]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
|
@ -43,16 +43,17 @@ class StudioKhimera(_ParserScraper):
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls):
|
def getmodules(cls):
|
||||||
return (
|
return (
|
||||||
cls('Eorah', 'eorah'),
|
|
||||||
cls('Mousechievous', 'mousechievous'),
|
cls('Mousechievous', 'mousechievous'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class UberQuest(_WordPressScraper):
|
class UberQuest(_WordPressScraper):
|
||||||
name = 'StudioKhimera/UberQuest'
|
name = 'StudioKhimera/UberQuest'
|
||||||
url = 'https://uberquest.studiokhimera.com/'
|
stripUrl = 'https://uberquest.studiokhimera.com/comic/page/%s/'
|
||||||
stripUrl = url + 'comic/%s/'
|
url = stripUrl % 'latest'
|
||||||
firstStripUrl = stripUrl % 'chapter-1-cover'
|
firstStripUrl = stripUrl % 'cover'
|
||||||
|
imageSearch = '//div[@class="prj--comic-image"]/img'
|
||||||
|
prevSearch = '//uq-image-button[d:class("prj--comic-control-prev")]'
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Fix inconsistent filenames
|
# Fix inconsistent filenames
|
||||||
|
|
|
@ -23,14 +23,6 @@ class TailsAndTactics(_ParserScraper):
|
||||||
prevSearch = '//a[text()=" Back"]'
|
prevSearch = '//a[text()=" Back"]'
|
||||||
|
|
||||||
|
|
||||||
class TaleOfTenThousand(_ParserScraper):
|
|
||||||
url = 'http://www.t10k.club/'
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % '1-01_00'
|
|
||||||
imageSearch = '//article[@id="comic"]//img'
|
|
||||||
prevSearch = '//a[d:class("prev")]'
|
|
||||||
|
|
||||||
|
|
||||||
class TekMage(_WPNavi):
|
class TekMage(_WPNavi):
|
||||||
url = 'https://tekmagecomic.com/'
|
url = 'https://tekmagecomic.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
|
@ -61,6 +53,26 @@ class TheBrads(_ParserScraper):
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
class TheChroniclesOfHuxcyn(_WordPressScraper):
|
||||||
|
url = 'https://huxcyn.com/'
|
||||||
|
stripUrl = url + 'comic/%s'
|
||||||
|
firstStripUrl = stripUrl % 'opening-001'
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
# Fix inconsistent filenames
|
||||||
|
filename = imageUrl.rsplit('/', 1)[-1]
|
||||||
|
filename = filename.replace('IMG_0504', 'TCoH109')
|
||||||
|
filename = filename.replace('tcoh', 'TCoH')
|
||||||
|
filename = filename.replace('1599151639.xizana_f3a6458e-8d94-4259-bec3-5a92706fe493_jpeg', 'october.2020.cover')
|
||||||
|
filename = filename.replace('huxonsword', 'october.2020.huxonsword')
|
||||||
|
filename = filename.replace('New_Canvas100pageswebimage', 'TCoH100')
|
||||||
|
if filename[0] == '0':
|
||||||
|
filename = 'TCoH' + filename
|
||||||
|
elif filename[0] == '3':
|
||||||
|
pagenum = int(filename.rsplit('.', 1)[0].split('_', 1)[1].split('_', 1)[0])
|
||||||
|
filename = 'TCoH' + str(40 + pagenum) + filename.rsplit('.', 1)[-1]
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
class TheClassMenagerie(_ParserScraper):
|
class TheClassMenagerie(_ParserScraper):
|
||||||
stripUrl = 'http://www.theclassm.com/d/%s.html'
|
stripUrl = 'http://www.theclassm.com/d/%s.html'
|
||||||
|
@ -278,26 +290,6 @@ class ToonHole(_WordPressScraper):
|
||||||
return url in (self.url + "comic/if-game-of-thrones-was-animated/",)
|
return url in (self.url + "comic/if-game-of-thrones-was-animated/",)
|
||||||
|
|
||||||
|
|
||||||
class TracesOfThePast(_WPNaviIn):
|
|
||||||
baseUrl = 'http://rickgriffinstudios.com/'
|
|
||||||
url = baseUrl + 'in-the-new-age/'
|
|
||||||
stripUrl = baseUrl + 'comic-post/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'totp-page-1'
|
|
||||||
latestSearch = '//a[contains(@title, "Permanent Link")]'
|
|
||||||
starter = indirectStarter
|
|
||||||
|
|
||||||
|
|
||||||
class TracesOfThePastNSFW(_WPNaviIn):
|
|
||||||
name = 'TracesOfThePast/NSFW'
|
|
||||||
baseUrl = 'http://rickgriffinstudios.com/'
|
|
||||||
url = baseUrl + 'in-the-new-age/'
|
|
||||||
stripUrl = baseUrl + 'comic-post/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'totp-page-1-nsfw'
|
|
||||||
latestSearch = '//a[contains(@title, "NSFW")]'
|
|
||||||
starter = indirectStarter
|
|
||||||
adult = True
|
|
||||||
|
|
||||||
|
|
||||||
class TrippingOverYou(_BasicScraper):
|
class TrippingOverYou(_BasicScraper):
|
||||||
url = 'http://www.trippingoveryou.com/'
|
url = 'http://www.trippingoveryou.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
|
|
70
dosagelib/plugins/tapastic.py
Normal file
70
dosagelib/plugins/tapastic.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
|
from ..scraper import _ParserScraper
|
||||||
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class Tapastic(_ParserScraper):
|
||||||
|
baseUrl = 'https://tapas.io/'
|
||||||
|
imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
|
||||||
|
prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
|
||||||
|
latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
def __init__(self, name, url):
|
||||||
|
super(Tapastic, self).__init__('Tapastic/' + name)
|
||||||
|
self.url = self.baseUrl + 'series/' + url + '/info'
|
||||||
|
self.stripUrl = self.baseUrl + 'episode/%s'
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
# Retrieve comic metadata from info page
|
||||||
|
info = self.getPage(self.url)
|
||||||
|
series = info.xpath('//@data-series-id')[0]
|
||||||
|
# Retrieve comic metadata from API
|
||||||
|
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
|
||||||
|
data.raise_for_status()
|
||||||
|
episodes = data.json()['data']['body']
|
||||||
|
return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
# Retrieve comic metadata from API
|
||||||
|
data = self.session.get(url + '/info')
|
||||||
|
data.raise_for_status()
|
||||||
|
apiData = data.json()['data']
|
||||||
|
if apiData['scene'] == 2:
|
||||||
|
self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
|
||||||
|
return self.stripUrl % apiData['prev_ep_id']
|
||||||
|
|
||||||
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
|
# Save link order for position-based filenames
|
||||||
|
self.imageUrls = super().fetchUrls(url, data, urlSearch)
|
||||||
|
return self.imageUrls
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
# Construct filename from episode number and image position on page
|
||||||
|
episodeNum = pageUrl.rsplit('/', 1)[-1]
|
||||||
|
imageNum = self.imageUrls.index(imageUrl)
|
||||||
|
imageExt = pageUrl.rsplit('.', 1)[-1]
|
||||||
|
if len(self.imageUrls) > 1:
|
||||||
|
filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
|
||||||
|
else:
|
||||||
|
filename = "%s.%s" % (episodeNum, imageExt)
|
||||||
|
return filename
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getmodules(cls):
|
||||||
|
return (
|
||||||
|
# Manually-added comics
|
||||||
|
cls('AmpleTime', 'Ample-Time'),
|
||||||
|
cls('InsignificantOtters', 'IOtters'),
|
||||||
|
cls('NoFuture', 'NoFuture'),
|
||||||
|
cls('OrensForge', 'OrensForge'),
|
||||||
|
cls('RavenWolf', 'RavenWolf'),
|
||||||
|
cls('SyntheticInstinct', 'Synthetic-Instinct'),
|
||||||
|
cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
|
||||||
|
cls('VenturaCityDrifters', 'Ventura-City-Drifters'),
|
||||||
|
|
||||||
|
# START AUTOUPDATE
|
||||||
|
# END AUTOUPDATE
|
||||||
|
)
|
|
@ -4,6 +4,8 @@
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile
|
from re import compile
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -27,7 +29,21 @@ class Undertow(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class unDivine(_ComicControlScraper):
|
class unDivine(_ComicControlScraper):
|
||||||
url = 'http://undivinecomic.com/'
|
url = 'https://www.undivinecomic.com/'
|
||||||
|
stripUrl = url + 'comic/%s'
|
||||||
|
firstStripUrl = stripUrl % 'page-1'
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
# Fix inconsistent filenames
|
||||||
|
filename = imageUrl.rsplit('/', 1)[-1].replace(' ', '-')
|
||||||
|
filename = filename.replace('10B311D9-0992-4D74-AEB8-DAB714DA67C6', 'UD-322')
|
||||||
|
filename = filename.replace('99266624-7EF7-4E99-9EC9-DDB5F59CBDFD', 'UD-311')
|
||||||
|
filename = filename.replace('33C6A5A1-F703-4A0A-BCD5-DE1A09359D8E', 'UD-310')
|
||||||
|
filename = filename.replace('6CE01E81-C299-43C7-A221-8DE0670EFA30', 'ch4endbonusq4')
|
||||||
|
filename = filename.replace('DB66D93B-1FE5-49C7-90E0-FFF981DCD6B3', 'bipolar')
|
||||||
|
if len(filename) > 15 and filename[0].isdigit() and filename[10] == '-':
|
||||||
|
filename = filename[11:]
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
class UnicornJelly(_BasicScraper):
|
class UnicornJelly(_BasicScraper):
|
||||||
|
@ -45,13 +61,29 @@ class Unsounded(_ParserScraper):
|
||||||
startUrl = url + 'comic+index/'
|
startUrl = url + 'comic+index/'
|
||||||
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
|
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
|
||||||
firstStripUrl = stripUrl % ('01', '01', '01')
|
firstStripUrl = stripUrl % ('01', '01', '01')
|
||||||
imageSearch = '//img[contains(@src, "pageart/")]'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[d:class("back")]'
|
prevSearch = '//a[d:class("back")]'
|
||||||
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
|
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
help = 'Index format: chapter-page'
|
help = 'Index format: chapter-page'
|
||||||
|
|
||||||
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
|
imageUrls = super(Unsounded, self).fetchUrls(url, data, urlSearch)
|
||||||
|
# Include background for multi-image pages
|
||||||
|
imageRegex = compile(r'background-image: url\((pageart/.*)\)')
|
||||||
|
for match in imageRegex.finditer(str(etree.tostring(data))):
|
||||||
|
print(match)
|
||||||
|
searchUrls.append(normaliseURL(urljoin(data[1], match.group(1))))
|
||||||
|
return imageUrls
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
filename = imageUrl.rsplit('/', 1)[-1]
|
||||||
|
pagename = pageUrl.rsplit('/', 1)[-1]
|
||||||
|
if pagename.split('.', 1)[0] != filename.split('.', 1)[0]:
|
||||||
|
filename = pagename.split('_', 1)[0] + '_' + filename
|
||||||
|
return filename
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
# Fix missing navigation links between chapters
|
# Fix missing navigation links between chapters
|
||||||
if 'ch13/you_let_me_fall' in url:
|
if 'ch13/you_let_me_fall' in url:
|
||||||
|
|
|
@ -8,13 +8,22 @@ from ..helpers import bounceStarter, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class VampireHunterBoyfriends(_ParserScraper):
|
class VampireHunterBoyfriends(_ParserScraper):
|
||||||
url = 'https://boneitiscomics.com/vhb.php'
|
baseUrl = 'https://boneitisindustries.com/'
|
||||||
stripUrl = url + '?pg=%s'
|
url = baseUrl + 'comics/vampire-hunter-boyfriends/'
|
||||||
firstStripUrl = stripUrl % '1'
|
stripUrl = baseUrl + 'comic/%s/'
|
||||||
imageSearch = '//div[@class="page"]//img'
|
firstStripUrl = stripUrl % 'vampire-hunter-boyfriends-chapter-1-cover'
|
||||||
prevSearch = '//a[@id="prev"]'
|
imageSearch = '//div[@id="content"]//img[d:class("size-full")]'
|
||||||
|
prevSearch = '//a[./span[d:class("ticon-chevron-left")]]'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
archivePage = self.getPage(self.url)
|
||||||
|
self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href')
|
||||||
|
return self.archive[-1]
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
return self.archive[self.archive.index(url) - 1]
|
||||||
|
|
||||||
|
|
||||||
class Vexxarr(_ParserScraper):
|
class Vexxarr(_ParserScraper):
|
||||||
baseUrl = 'http://www.vexxarr.com/'
|
baseUrl = 'http://www.vexxarr.com/'
|
||||||
|
@ -40,20 +49,6 @@ class VGCats(_ParserScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class VGCatsAdventure(VGCats):
|
|
||||||
name = 'VGCats/Adventure'
|
|
||||||
url = 'http://www.vgcats.com/ffxi/'
|
|
||||||
stripUrl = url + '?strip_id=%s'
|
|
||||||
imageSearch = '//p/img[contains(@src, "images/")]'
|
|
||||||
|
|
||||||
|
|
||||||
class VGCatsSuper(VGCats):
|
|
||||||
name = 'VGCats/Super'
|
|
||||||
url = 'http://www.vgcats.com/super/'
|
|
||||||
stripUrl = url + '?strip_id=%s'
|
|
||||||
imageSearch = '//p/img[contains(@src, "images/")]'
|
|
||||||
|
|
||||||
|
|
||||||
class VickiFox(_ParserScraper):
|
class VickiFox(_ParserScraper):
|
||||||
url = 'http://www.vickifox.com/comic/strip'
|
url = 'http://www.vickifox.com/comic/strip'
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
|
|
|
@ -133,14 +133,6 @@ class WildeLife(_ComicControlScraper):
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
|
|
||||||
|
|
||||||
class WintersLight(_ParserScraper):
|
|
||||||
url = 'https://winterslight.anaria.net/'
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % 'winterslight00'
|
|
||||||
imageSearch = '//img[contains(@src, "comic/pages/")]'
|
|
||||||
prevSearch = '//a[contains(text(), "Previous")]'
|
|
||||||
|
|
||||||
|
|
||||||
class Wonderella(_BasicScraper):
|
class Wonderella(_BasicScraper):
|
||||||
url = 'http://nonadventures.com/'
|
url = 'http://nonadventures.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -232,9 +224,3 @@ class WormWorldSagaGerman(WormWorldSaga):
|
||||||
|
|
||||||
class WormWorldSagaSpanish(WormWorldSaga):
|
class WormWorldSagaSpanish(WormWorldSaga):
|
||||||
lang = 'es'
|
lang = 'es'
|
||||||
|
|
||||||
|
|
||||||
class Wrongside(_WPNavi):
|
|
||||||
url = 'http://www.ayzewi.com/comic/'
|
|
||||||
stripUrl = url + '?comic=%s'
|
|
||||||
firstStripUrl = stripUrl % 'intro-2'
|
|
||||||
|
|
|
@ -115,6 +115,7 @@ class WebToons(_ParserScraper):
|
||||||
cls('CityOfBlank', 'sf/city-of-blank', 1895),
|
cls('CityOfBlank', 'sf/city-of-blank', 1895),
|
||||||
cls('CityOfWalls', 'drama/city-of-wall', 505),
|
cls('CityOfWalls', 'drama/city-of-wall', 505),
|
||||||
cls('CityVamps', 'challenge/city-vamps-', 119224),
|
cls('CityVamps', 'challenge/city-vamps-', 119224),
|
||||||
|
cls('ClawShot', 'challenge/clawshot', 621465),
|
||||||
cls('ClusterFudge', 'slice-of-life/cluster-fudge', 355),
|
cls('ClusterFudge', 'slice-of-life/cluster-fudge', 355),
|
||||||
cls('CodeAdam', 'action/code-adam', 1657),
|
cls('CodeAdam', 'action/code-adam', 1657),
|
||||||
cls('CookingComically', 'tiptoon/cooking-comically', 622),
|
cls('CookingComically', 'tiptoon/cooking-comically', 622),
|
||||||
|
@ -134,7 +135,7 @@ class WebToons(_ParserScraper):
|
||||||
cls('DEADDAYS', 'horror/dead-days', 293),
|
cls('DEADDAYS', 'horror/dead-days', 293),
|
||||||
cls('Debunkers', 'challenge/debunkers', 148475),
|
cls('Debunkers', 'challenge/debunkers', 148475),
|
||||||
cls('DEEP', 'thriller/deep', 364),
|
cls('DEEP', 'thriller/deep', 364),
|
||||||
cls('Defects', 'challenge/defects', 221106),
|
cls('Defects', 'fantasy/defects', 2731),
|
||||||
cls('Denma', 'sf/denma', 921),
|
cls('Denma', 'sf/denma', 921),
|
||||||
cls('Dents', 'sf/dents', 671),
|
cls('Dents', 'sf/dents', 671),
|
||||||
cls('Deor', 'fantasy/deor', 1663),
|
cls('Deor', 'fantasy/deor', 1663),
|
||||||
|
@ -224,6 +225,7 @@ class WebToons(_ParserScraper):
|
||||||
cls('JustPancakes', 'comedy/just-pancakes', 1651),
|
cls('JustPancakes', 'comedy/just-pancakes', 1651),
|
||||||
cls('KidsAreAllRight', 'drama/kids-are-all-right', 283),
|
cls('KidsAreAllRight', 'drama/kids-are-all-right', 283),
|
||||||
cls('Killstagram', 'thriller/killstagram', 1971),
|
cls('Killstagram', 'thriller/killstagram', 1971),
|
||||||
|
cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703),
|
||||||
cls('KindOfConfidential', 'romance/kind-of-confidential', 663),
|
cls('KindOfConfidential', 'romance/kind-of-confidential', 663),
|
||||||
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
|
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
|
||||||
cls('KnightRun', 'sf/knight-run', 67),
|
cls('KnightRun', 'sf/knight-run', 67),
|
||||||
|
@ -317,6 +319,7 @@ class WebToons(_ParserScraper):
|
||||||
cls('RANDOMPHILIA', 'comedy/randomphilia', 386),
|
cls('RANDOMPHILIA', 'comedy/randomphilia', 386),
|
||||||
cls('Rebirth', 'sf/rebirth', 1412),
|
cls('Rebirth', 'sf/rebirth', 1412),
|
||||||
cls('RefundHighSchool', 'fantasy/refundhighschool', 1360),
|
cls('RefundHighSchool', 'fantasy/refundhighschool', 1360),
|
||||||
|
cls('ReturnToPlayer', 'action/return-to-player', 2574),
|
||||||
cls('RiseFromAshes', 'supernatural/rise-from-ashes', 959),
|
cls('RiseFromAshes', 'supernatural/rise-from-ashes', 959),
|
||||||
cls('RoarStreetJournal', 'slice-of-life/roar-street-journal', 397),
|
cls('RoarStreetJournal', 'slice-of-life/roar-street-journal', 397),
|
||||||
cls('RoomOfSwords', 'sf/room-of-swords', 1261),
|
cls('RoomOfSwords', 'sf/room-of-swords', 1261),
|
||||||
|
@ -380,12 +383,14 @@ class WebToons(_ParserScraper):
|
||||||
cls('TheGirlDownstairs', 'romance/the-girl-downstairs', 1809),
|
cls('TheGirlDownstairs', 'romance/the-girl-downstairs', 1809),
|
||||||
cls('THEGIRLFROMCLASS', 'drama/the-girl-from-class', 73),
|
cls('THEGIRLFROMCLASS', 'drama/the-girl-from-class', 73),
|
||||||
cls('TheGodOfHighSchool', 'action/the-god-of-high-school', 66),
|
cls('TheGodOfHighSchool', 'action/the-god-of-high-school', 66),
|
||||||
|
cls('TheGreenhouse', 'challenge/the-greenhouse-gl', 278312),
|
||||||
cls('TheKissBet', 'romance/the-kiss-bet', 1617),
|
cls('TheKissBet', 'romance/the-kiss-bet', 1617),
|
||||||
cls('TheLifeOfTheThreeBears', 'slice-of-life/the-life-of-the-three-bears', 390),
|
cls('TheLifeOfTheThreeBears', 'slice-of-life/the-life-of-the-three-bears', 390),
|
||||||
cls('ThePurpleHeart', 'super-hero/the-purple-heart', 723),
|
cls('ThePurpleHeart', 'super-hero/the-purple-heart', 723),
|
||||||
cls('TheRedBook', 'horror/the-red-book', 467),
|
cls('TheRedBook', 'horror/the-red-book', 467),
|
||||||
cls('TheRedHook', 'super-hero/the-red-hook', 643),
|
cls('TheRedHook', 'super-hero/the-red-hook', 643),
|
||||||
cls('TheRedKing', 'supernatural/the-red-king', 1687),
|
cls('TheRedKing', 'supernatural/the-red-king', 1687),
|
||||||
|
cls('TheRoomies', 'challenge/the-roomies-archive', 513669),
|
||||||
cls('TheShadowProphet', 'drama/the-shadow-prophet', 1881),
|
cls('TheShadowProphet', 'drama/the-shadow-prophet', 1881),
|
||||||
cls('TheSoundOfYourHeart', 'comedy/the-sound-of-your-heart', 269),
|
cls('TheSoundOfYourHeart', 'comedy/the-sound-of-your-heart', 269),
|
||||||
cls('TheSteamDragonExpress', 'fantasy/steam-dragon-express', 1270),
|
cls('TheSteamDragonExpress', 'fantasy/steam-dragon-express', 1270),
|
||||||
|
@ -412,6 +417,7 @@ class WebToons(_ParserScraper):
|
||||||
cls('UnderPrin', 'supernatural/underprin', 78),
|
cls('UnderPrin', 'supernatural/underprin', 78),
|
||||||
cls('UnderTheAegis', 'fantasy/under-the-aegis', 436),
|
cls('UnderTheAegis', 'fantasy/under-the-aegis', 436),
|
||||||
cls('UnholyBlood', 'supernatural/unholy-blood', 1262),
|
cls('UnholyBlood', 'supernatural/unholy-blood', 1262),
|
||||||
|
cls('UnintentionalGame', 'challenge/unintentional-game', 162674),
|
||||||
cls('UnknownCaller', 'thriller/ar-toon', 775),
|
cls('UnknownCaller', 'thriller/ar-toon', 775),
|
||||||
cls('UnlovableReplacement', 'romance/unlovable-replacement', 1762),
|
cls('UnlovableReplacement', 'romance/unlovable-replacement', 1762),
|
||||||
cls('UnluckyIsAsLuckyDoes', 'comedy/unlucky-is-as-lucky-does', 1554),
|
cls('UnluckyIsAsLuckyDoes', 'comedy/unlucky-is-as-lucky-does', 1554),
|
||||||
|
|
75
dosagelib/plugins/wrongside.py
Normal file
75
dosagelib/plugins/wrongside.py
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2022 Daniel Ring
|
||||||
|
from ..scraper import _ParserScraper
|
||||||
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class Wrongside(_ParserScraper):
|
||||||
|
baseUrl = 'http://ayzewi.com/maingallery3/'
|
||||||
|
url = baseUrl + 'index.php?/category/5'
|
||||||
|
stripUrl = baseUrl + 'picture.php?%s'
|
||||||
|
firstStripUrl = stripUrl % '/175/category/21'
|
||||||
|
imageSearch = '//img[@id="theMainImage"]/@src'
|
||||||
|
prevSearch = '//a[contains(@title, "Previous :")]'
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
archivePage = self.getPage(self.url)
|
||||||
|
chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href')
|
||||||
|
self.archive = []
|
||||||
|
for chapterUrl in chapterUrls:
|
||||||
|
chapterPage = self.getPage(chapterUrl)
|
||||||
|
self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
|
||||||
|
return self.archive[0]
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
if data.xpath(self.prevSearch) == [] and len(self.archive) > 0:
|
||||||
|
return self.archive.pop()
|
||||||
|
return super(Wrongside, self).getPrevUrl(url, data)
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
page = self.getPage(pageUrl)
|
||||||
|
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
||||||
|
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
|
class WrongsideBeginnings(Wrongside):
|
||||||
|
name = 'Wrongside/Beginnings'
|
||||||
|
baseUrl = 'http://ayzewi.com/maingallery3/'
|
||||||
|
url = baseUrl + 'index.php?/category/4'
|
||||||
|
stripUrl = baseUrl + 'picture.php?%s'
|
||||||
|
firstStripUrl = stripUrl % '/2/category/18'
|
||||||
|
|
||||||
|
|
||||||
|
class WrongsideSideStories(_ParserScraper):
|
||||||
|
baseUrl = 'http://ayzewi.com/maingallery3/'
|
||||||
|
stripUrl = baseUrl + 'picture.php?%s'
|
||||||
|
imageSearch = '//img[@id="theMainImage"]/@src'
|
||||||
|
prevSearch = '//a[contains(@title, "Previous :")]'
|
||||||
|
latestSearch = '(//ul[@id="thumbnails"]//a/@href)[last()]'
|
||||||
|
starter = indirectStarter
|
||||||
|
|
||||||
|
def __init__(self, name, category, first, last=None):
|
||||||
|
super().__init__('Wrongside/' + name)
|
||||||
|
self.url = self.baseUrl + 'index.php?/category/' + category
|
||||||
|
self.firstStripUrl = self.stripUrl % ('/' + first + '/category/' + category)
|
||||||
|
|
||||||
|
if last:
|
||||||
|
self.endOfLife = True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getmodules(cls):
|
||||||
|
return (
|
||||||
|
cls('AnarkisRising', '7', '302'),
|
||||||
|
cls('CommonsDreams', '9', '324'),
|
||||||
|
cls('Faith', '11', '349'),
|
||||||
|
cls('Sarah', '10', '337'),
|
||||||
|
cls('ThereAreNoAviansHere', '8', '313'),
|
||||||
|
cls('TheScientificProphet', '13', '358'),
|
||||||
|
cls('TheStrangers', '12', '361'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
page = self.getPage(pageUrl)
|
||||||
|
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
||||||
|
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
|
@ -9,7 +9,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
|
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
echo "Executing ${script}.py"
|
echo "Executing ${script}.py"
|
||||||
|
|
36
scripts/tapastic.py
Normal file
36
scripts/tapastic.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
|
"""
|
||||||
|
Script to get a list of Tapastic comics and save the info in a
|
||||||
|
JSON file for further processing.
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlsplit, parse_qs
|
||||||
|
|
||||||
|
from scriptutil import ComicListUpdater
|
||||||
|
from dosagelib.util import check_robotstxt
|
||||||
|
|
||||||
|
|
||||||
|
class TapasticUpdater(ComicListUpdater):
|
||||||
|
def collect_results(self):
|
||||||
|
# Retrieve the first 10 top comics list pages
|
||||||
|
url = 'https://tapas.io/comics?browse=ALL&sort_type=LIKE&pageNumber='
|
||||||
|
count = 10
|
||||||
|
|
||||||
|
data = [self.get_url(url + str(i), robot=False) for i in range(0, count)]
|
||||||
|
for page in data:
|
||||||
|
for comiclink in page.xpath('//a[@class="preferred title"]'):
|
||||||
|
comicurl = comiclink.attrib['href']
|
||||||
|
name = comiclink.text
|
||||||
|
self.add_comic(name, comicurl)
|
||||||
|
|
||||||
|
def get_entry(self, name, url):
|
||||||
|
shortName = name.replace(' ', '').replace('\'', '')
|
||||||
|
titleNum = int(parse_qs(urlsplit(url).query)['title_no'][0])
|
||||||
|
url = url.rsplit('/', 1)[0].replace('/series/', '')
|
||||||
|
return u"cls('%s', '%s', %d)," % (shortName, url, titleNum)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TapasticUpdater(__file__).run()
|
|
@ -11,7 +11,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
|
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
target="${d}/../dosagelib/plugins/${script}.py"
|
target="${d}/../dosagelib/plugins/${script}.py"
|
||||||
|
|
|
@ -7,8 +7,6 @@ import responses
|
||||||
|
|
||||||
import dosagelib.cmd
|
import dosagelib.cmd
|
||||||
import httpmocks
|
import httpmocks
|
||||||
from dosagelib.plugins.s import SoloLeveling
|
|
||||||
from dosagelib.scraper import GeoblockedException
|
|
||||||
|
|
||||||
|
|
||||||
def cmd(*options):
|
def cmd(*options):
|
||||||
|
@ -43,7 +41,11 @@ class TestModules(object):
|
||||||
cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol:2012/07/22')
|
cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol:2012/07/22')
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
|
@pytest.mark.skip(reason="SoloeLeveling was removed, so we have no way to test this...")
|
||||||
def test_sololeveling_geoblock(self, tmpdir):
|
def test_sololeveling_geoblock(self, tmpdir):
|
||||||
|
from dosagelib.plugins.s import SoloLeveling
|
||||||
|
from dosagelib.scraper import GeoblockedException
|
||||||
|
|
||||||
responses.add(responses.GET, 'https://w3.sololeveling.net/',
|
responses.add(responses.GET, 'https://w3.sololeveling.net/',
|
||||||
'<span>1020</span>', status=403)
|
'<span>1020</span>', status=403)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue