Merge pull request #211 from Techwolfy/upstream-new-comics-3

Add 38 comics and Tapastic scraper
This commit is contained in:
Tobias Gruetzmacher 2022-04-16 00:38:12 +02:00 committed by GitHub
commit 9dab501ca9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
30 changed files with 541 additions and 300 deletions

View file

@ -106,27 +106,6 @@ class AGirlAndHerFed(_ParserScraper):
help = 'Index format: nnn'
class AHClub(_WPNaviIn):
baseUrl = 'http://rickgriffinstudios.com/'
url = baseUrl + 'ah-club/'
stripUrl = baseUrl + 'comic-post/%s/'
firstStripUrl = stripUrl % 'cover'
latestSearch = '//a[contains(@title, "Permanent Link")]'
starter = indirectStarter
nav = {
'ah-club-2-cover': 'ah-club-1-page-24',
'ah-club-3-cover': 'ah-club-2-page-28',
'ah-club-4-cover': 'ah-club-3-page-22',
}
def getPrevUrl(self, url, data):
# Links between chapters
url = url.rstrip('/').rsplit('/', 1)[-1]
if self.nav and url in self.nav:
return self.stripUrl % self.nav[url]
return super(AHClub, self).getPrevUrl(url, data)
class AhoiPolloi(_ParserScraper):
url = 'https://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s'

View file

@ -188,10 +188,13 @@ class CavesAndCritters(_WPWebcomic):
adult = True
class Centralia2050(_WordPressScraper):
url = 'http://centralia2050.com/'
class Centralia2050(_ParserScraper):
url = 'https://centralia2050.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'ch1cover'
imageSearch = '//div[@id="spliced-comic"]//img'
prevSearch = '//a[@class="previous-comic"]'
nextSearch = '//a[@class="next-comic"]'
starter = bounceStarter
def namer(self, imageUrl, pageUrl):
@ -267,9 +270,9 @@ class Cloudscratcher(_ParserScraper):
url = 'http://www.cloudscratcher.com/'
stripUrl = url + 'comic.php?page=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@id="main_content"]//img[contains(@src, "comic")]'
prevSearch = '//a[./img[contains(@src, "previous-page")]]'
latestSearch = '//a[@alt="Newest_Page"]'
imageSearch = '//img[contains(@src, "pages/")]'
prevSearch = '//a[./img[@alt="Previous Page"]]'
latestSearch = '//a[./img[@alt="Comic"]]'
starter = indirectStarter
@ -277,7 +280,7 @@ class CollegeCatastrophe(_ParserScraper):
url = 'https://www.tigerknight.com/cc'
stripUrl = url + '/%s'
firstStripUrl = stripUrl % '2000-11-10'
imageSearch = '//img[@class="comic-image"]'
imageSearch = '//img[d:class("comic-image")]'
prevSearch = '//a[./span[contains(text(), "Previous")]]'
endOfLife = True
multipleImagesPerStrip = True

View file

@ -123,6 +123,7 @@ class ComicFury(_ParserScraper):
cls('AgentBishop', 'agentbishop'),
cls('AHappierKindOfSad', 'ahappierkindofsad'),
cls('AlbinoBrothers', 'albinobros'),
cls('Alderwood', 'alderwood'),
cls('AlexanderAndLucasRebooted', 'alexanderandlucas'),
cls('AliaTerra', 'alia-terra'),
cls('AlienIrony', 'alien-irony'),
@ -336,6 +337,7 @@ class ComicFury(_ParserScraper):
cls('DeadFingers', 'deadfingers'),
cls('DeadRabbitCa', 'afairtrade'),
cls('DeepBlue', 'deepblue'),
cls('DeerMe', 'deerme'),
cls('DefineHero', 'definehero'),
cls('DELIA', 'delia'),
cls('DemasPokmonAdventure', 'nuzlocke-dema'),
@ -462,6 +464,7 @@ class ComicFury(_ParserScraper):
cls('GrippsBrain', 'grippsbrain'),
cls('GrokBoop', 'grokboop'),
cls('GrowingTroubles', 'growingtroubles'),
cls('Guardia', 'guardia-tales-of-halgeis'),
cls('GUS', 'gus'),
cls('HalloweenCameoCaper2012', 'halloween2012'),
cls('HalloweenCameoCaper2013', 'halloween2013'),
@ -575,7 +578,6 @@ class ComicFury(_ParserScraper):
cls('KingdomOfTheDinosaurs', 'dinosaurkingdom'),
cls('KingdomPrettyCure', 'kingdomprettycure'),
cls('KirbyVsShyGuy', 'kvsg'),
cls('Kitsune', 'kitsune'),
cls('KMLsSticks', 'kmlssticks'),
cls('KnavesEnd', 'knavesend'),
cls('KnightGuy', 'knightguy'),
@ -672,7 +674,6 @@ class ComicFury(_ParserScraper):
cls('MonochromeRainbow', 'monobow'),
cls('MonsterBait', 'deadnight'),
cls('MonsterInTheKingdom', 'monster'),
cls('MonsterSoup', 'monstersoup'),
cls('MonstersWithBenefits', 'failmonsters'),
cls('MonstroniverseAdventures', 'monstroniverse'),
cls('MoonlitBrew', 'moonlitbrew'),
@ -800,8 +801,6 @@ class ComicFury(_ParserScraper):
cls('RequiemsGate', 'requiemsgate'),
cls('ReSetArt', 'resetfanarts'),
cls('ResidentWeirdo', 'residentweirdo'),
cls('ResNullius', 'resnullius'),
cls('ResNulliusCS', 'resnulliuscs'),
cls('ReturnOfWonderland', 'returnofwonderland'),
cls('Revive', 'revive'),
cls('RexAfterDark', 'rexafterdark'),
@ -845,6 +844,7 @@ class ComicFury(_ParserScraper):
cls('ShakingOffSorceryPL', 'shakingoffsorcery-pl'),
cls('ShamanQuest', 'shamanquest'),
cls('ShatteredSkies', 'shatteredskies'),
cls('Sharak', 'sharak'),
cls('Shenanigans', 's'),
cls('ShenaniganSquares', 'ss-comic'),
cls('ShikuTheFirstAndFinal', 'shiku'),
@ -856,6 +856,7 @@ class ComicFury(_ParserScraper):
cls('SixteenCandlesHuntersAgency', 'sixteencandles'),
cls('Skeeter', 'herecomesskeeter'),
cls('Sketchy', 'sketchy'),
cls('Skylords', 'skylords'),
cls('SlugMan', 'slug-man'),
cls('SmallTownValues', 'smalltownvalues'),
cls('SmitheeZombieHunter', 'smitheezombiehunter'),
@ -1096,6 +1097,7 @@ class ComicFury(_ParserScraper):
cls('VampireCatgirlPart2', 'vampirecatgirl2'),
cls('VeldaGirlDetective', 'veldagirldetective'),
cls('Verboten', 'verboten'),
cls('VHV', 'vhv'),
cls('Victory', 'victoryadventures'),
cls('ViewHub', 'viewhub'),
cls('ViolentBlue', 'violentblue'),

View file

@ -84,19 +84,6 @@ class DeepFried(_BasicScraper):
help = 'Index format: none'
class DeerMe(_ParserScraper):
url = 'http://deerme.net/comics/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = ('//img[@id="comicimage"]', '//img[@id="latestcomicimage"]')
prevSearch = '//a[@rel="prev"]'
nextSearch = '//a[@rel="next"]'
starter = bounceStarter
def namer(self, imageUrl, pageUrl):
return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
class Delve(_WordPressScraper):
url = 'https://thisis.delvecomic.com/NewWP/'
stripUrl = url + 'comic/%s/'
@ -239,6 +226,13 @@ class DoemainOfOurOwn(_ParserScraper):
return filename
class DoesNotPlayWellWithOthers(_WPNavi):
url = 'http://www.doesnotplaywellwithothers.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'pwc-0001'
adult = True
class DoghouseDiaries(_ParserScraper):
url = 'http://thedoghousediaries.com/'
stripUrl = url + '%s'

View file

@ -170,12 +170,13 @@ class Erstwhile(_WPNavi):
endOfLife = True
class Everblue(_ParserScraper):
class Everblue(_ComicControlScraper):
url = 'http://www.everblue-comic.com/comic/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//article[@id="comic"]//img'
prevSearch = '//a[contains(@class, "prev")]'
def namer(self, imageUrl, pageUrl):
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
class EverybodyLovesEricRaymond(_ParserScraper):

View file

@ -21,13 +21,22 @@ class FalconTwin(_BasicScraper):
class FalseStart(_ParserScraper):
url = 'https://boneitiscomics.com/falsestart.php'
stripUrl = url + '?pg=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@class="page"]//img'
prevSearch = '//a[@id="prev"]'
baseUrl = 'https://boneitisindustries.com/'
url = baseUrl + 'comics/false-start/'
stripUrl = baseUrl + 'comic/%s/'
firstStripUrl = stripUrl % 'false-start-chapter-zero-page-1'
imageSearch = '//div[@id="content"]//img[d:class("size-full")]'
prevSearch = '//a[./span[d:class("ticon-chevron-left")]]'
adult = True
def starter(self):
archivePage = self.getPage(self.url)
self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href')
return self.archive[-1]
def getPrevUrl(self, url, data):
return self.archive[self.archive.index(url) - 1]
class Faneurysm(_WPNaviIn):
url = 'http://hijinksensue.com/comic/think-only-tree/'
@ -144,7 +153,7 @@ class FoxDad(_ParserScraper):
url = 'https://foxdad.com/'
stripUrl = url + 'post/%s'
firstStripUrl = stripUrl % '149683014997/some-people-are-just-different-support-the-comic'
imageSearch = '//figure[@class="photo-hires-item"]//img'
imageSearch = ('//figure[@class="photo-hires-item"]//img', '//figure[@class="tmblr-full"]//img')
prevSearch = '//a[@class="previous-button"]'
def namer(self, imageUrl, pageUrl):

View file

@ -145,19 +145,6 @@ class GrrlPower(_WordPressScraper):
self.session.add_throttle('grrlpowercomic.com', 1.0, 1.5)
class Guardia(_ParserScraper):
url = 'https://ssp-comics.com/comics/Guardia/'
stripUrl = url + '?page=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//img[contains(@src, "comics/Guardia/")]'
prevSearch = '//a[./button[@id="prevButton"]]'
nextSearch = '//a[./button[@id="nextButton"]]'
starter = bounceStarter
def namer(self, imageUrl, pageUrl):
return pageUrl.rsplit('=', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
class GuildedAge(_WordPressScraper):
url = 'http://guildedage.net/'
firstStripUrl = url + 'comic/chapter-1-cover/'

View file

@ -5,12 +5,21 @@
# Copyright (C) 2019-2020 Daniel Ring
from re import compile, escape
from ..scraper import _BasicScraper
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, indirectStarter
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
class Hackles(_ParserScraper):
url = 'http://hackles.org/'
stripUrl = url + 'cgi-bin/archives.pl?request=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//img[contains(@src, "strips/")]'
prevSearch = '//a[text()="< previous"]'
endOfLife = True
class HagarTheHorrible(_BasicScraper):
url = 'http://www.hagarthehorrible.net/'
stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s'
@ -107,12 +116,6 @@ class HijinksEnsuePhoto(_WPNaviIn):
endOfLife = True
class Housepets(_WordPressScraper):
url = 'http://www.housepetscomic.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = '2008/06/02/when-boredom-strikes'
class HowToBeAWerewolf(_ComicControlScraper):
url = 'http://howtobeawerewolf.com/'
stripUrl = url + 'comic/%s'

View file

@ -35,14 +35,6 @@ class IDreamOfAJeanieBottle(_WordPressScraper):
url = 'http://jeaniebottle.com/'
class InsignificantOtters(_WPWebcomic):
stripUrl = 'https://www.thedepthscomic.com/i-otters/%s/'
firstStripUrl = stripUrl % 'page-1'
url = firstStripUrl
imageSearch = '//div[contains(@class, "webcomic-media")]//img'
starter = indirectStarter
class InternetWebcomic(_WPNavi):
url = 'http://www.internet-webcomic.com/'
stripUrl = url + '?p=%s'

View file

@ -25,10 +25,11 @@ class KemonoCafe(_ParserScraper):
def namer(self, imageUrl, pageUrl):
# Strip date from filenames
filename = imageUrl.rsplit('/', 1)[-1]
if filename[4] == '-' and filename[7] == '-':
filename = filename[10:]
if filename[0] == '-' or filename[0] == '_':
filename = filename[1:]
if not 'ultrarosa' in pageUrl:
if filename[4] == '-' and filename[7] == '-':
filename = filename[10:]
if filename[0] == '-' or filename[0] == '_':
filename = filename[1:]
# Fix duplicate filenames
if 'paprika' in pageUrl and '69-2' in pageUrl:
filename = filename.replace('69', '69-2')
@ -47,6 +48,7 @@ class KemonoCafe(_ParserScraper):
def getmodules(cls):
return (
cls('AddictiveScience', 'addictivescience', 'page0001'),
cls('Bethellium', 'bethellium', 'c01p00'),
cls('CaribbeanBlue', 'cb', 'page000', last='page325'),
cls('IMew', 'imew', 'imew00', last='imew50'),
cls('Knighthood', 'knighthood', 'kh0001'),
@ -54,7 +56,9 @@ class KemonoCafe(_ParserScraper):
cls('LasLindas', 'laslindas', 'll0001', adult=True),
cls('Paprika', 'paprika', 'page000'),
cls('PracticeMakesPerfect', 'pmp', 'title-001'),
cls('PrincessBunny', 'princessbunny', 'pg001'),
cls('Rascals', 'rascals', 'rascals-pg-0', adult=True),
cls('TheEyeOfRamalach', 'theeye', 'theeye-page01'),
cls('TinaOfTheSouth', 'tots', 'tos-01-01'),
cls('UltraRosa', 'ultrarosa', 'pg001'),
)

View file

@ -187,6 +187,12 @@ class MonsieurLeChien(_BasicScraper):
help = 'Index format: n'
class MonsterSoup(_WordPressScraper):
url = 'https://monstersoupcomic.com/'
stripUrl = url + '?comic=%s'
firstStripUrl = stripUrl % 'chapter-1-cover'
class Moonlace(_WPWebcomic):
url = 'https://moonlace.darkbluecomics.com/'
stripUrl = url + 'comic/%s/'

View file

@ -7,63 +7,86 @@ from ..scraper import _ParserScraper
class MangaDex(_ParserScraper):
imageSearch = '//img[contains(@class, "_images")]/@data-url'
prevSearch = '//a[contains(@class, "_prevEpisode")]'
multipleImagesPerStrip = True
def __init__(self, name, mangaid):
def __init__(self, name, mangaId):
super(MangaDex, self).__init__('MangaDex/' + name)
baseUrl = 'https://mangadex.org/api/'
self.url = baseUrl + '?id=%s&type=manga' % str(mangaid)
self.stripUrl = baseUrl + '?id=%s&type=chapter'
baseUrl = 'https://api.mangadex.org/'
self.url = baseUrl + 'manga/%s' % mangaId
self.chaptersUrl = baseUrl + 'manga/%s/feed?translatedLanguage[]=en&order[chapter]=desc&limit=500' % mangaId
self.stripUrl = baseUrl + 'chapter/%s'
self.cdnUrl = baseUrl + 'at-home/server/%s'
self.imageUrl = 'https://uploads.mangadex.org/data/%s/%%s'
def starter(self):
# Retrieve manga metadata from API
manga = self.session.get(self.url)
manga.raise_for_status()
mangaData = manga.json()
mangaData = self.session.get(self.url)
mangaData.raise_for_status()
manga = mangaData.json()['data']
# Retrieve chapter list from API
chapterList = []
chapterTotal = 1
chapterOffset = 0
while len(chapterList) < chapterTotal:
chapterData = self.session.get(self.chaptersUrl + '&offset=%d' % chapterOffset)
chapterData.raise_for_status()
chapterBlock = chapterData.json()
chapterTotal = chapterBlock['total']
chapterOffset = chapterBlock['offset'] + chapterBlock['limit']
chapterList.extend(chapterBlock['data'])
# Determine if manga is complete and/or adult
if mangaData['manga']['last_chapter'] != '0':
for ch in mangaData['chapter']:
if mangaData['chapter'][ch]['chapter'] == mangaData['manga']['last_chapter']:
if manga['attributes']['lastChapter'] != '0':
for chapter in chapterList:
if chapter['attributes']['chapter'] == manga['attributes']['lastChapter']:
self.endOfLife = True
if mangaData['manga']['hentai'] != '0':
break
if manga['attributes']['contentRating'] != 'safe':
self.adult = True
# Prepare chapter list
self.chapters = []
for ch in mangaData['chapter']:
if mangaData['chapter'][ch]['lang_code'] != 'gb':
for chapter in chapterList:
if chapter['attributes']['chapter'] == '':
continue
if len(self.chapters) < 1:
self.chapters.append(ch)
if chapter['attributes']['pages'] == 0:
continue
if mangaData['chapter'][ch]['chapter'] == mangaData['chapter'][self.chapters[-1]]['chapter']:
if len(self.chapters) >= 1 and chapter['attributes']['chapter'] == self.chapters[-1]['attributes']['chapter']:
continue
if mangaData['chapter'][ch]['chapter'] == '':
continue
self.chapters.append(ch)
self.chapters.append(chapter)
self.chapters.reverse()
# Find first and last chapter
self.firstStripUrl = self.stripUrl % self.chapters[0]
return self.stripUrl % self.chapters[-1]
self.firstStripUrl = self.stripUrl % self.chapters[0]['id']
return self.stripUrl % self.chapters[-1]['id']
def getPrevUrl(self, url, data):
chapter = url.replace('&type=chapter', '').rsplit('=', 1)[-1]
return self.stripUrl % self.chapters[self.chapters.index(chapter) - 1]
# Determine previous chapter ID from cached list
chapterId = url.rsplit('/', 1)[-1]
chapter = list(filter(lambda c: c['id'] == chapterId, self.chapters))
if len(chapter) == 0:
return None
return self.stripUrl % self.chapters[self.chapters.index(chapter[0]) - 1]['id']
def fetchUrls(self, url, data, urlSearch):
# Retrieve chapter metadata from API
chapterData = json.loads(data.text_content())
self.chapter = chapterData['chapter']
self.chapter = chapterData['data']
cdnData = self.session.get(self.cdnUrl % self.chapter['id'])
cdnData.raise_for_status()
cdnBlock = cdnData.json()
# Save link order for position-based filenames
imageUrl = chapterData['server'] + chapterData['hash'] + '/%s'
self.imageUrls = [imageUrl % page for page in chapterData['page_array']]
imageUrl = self.imageUrl % cdnBlock['chapter']['hash']
self.imageUrls = [imageUrl % page for page in cdnBlock['chapter']['data']]
return self.imageUrls
def namer(self, imageUrl, pageUrl):
# Construct filename from episode number and page index in array
chapterNum = self.chapter
chapterNum = self.chapter['attributes']['chapter'] if self.chapter['attributes']['chapter'] != None else 0
pageNum = self.imageUrls.index(imageUrl)
pageExt = imageUrl.rsplit('.')[-1]
return '%s-%02d.%s' % (chapterNum, pageNum, pageExt)
@ -71,44 +94,58 @@ class MangaDex(_ParserScraper):
@classmethod
def getmodules(cls):
return (
cls('AttackonTitan', 429),
cls('Beastars', 20523),
cls('BokuNoKokoroNoYabaiYatsu', 23811),
cls('DeliciousinDungeon', 13871),
cls('DragonDrive', 5165),
cls('FuguushokuKajishiDakedoSaikyouDesu', 56319),
cls('GanbareDoukiChan', 46585),
cls('HangingOutWithAGamerGirl', 42490),
cls('HoriMiya', 6770),
cls('HowToOpenATriangularRiceball', 19305),
cls('InterspeciesReviewers', 20796),
cls('JahySamaWaKujikenai', 22369),
cls('JingaiNoYomeToIchaIchaSuru', 22651),
cls('KawaiiJoushiWoKomarasetai', 17910),
cls('KanojoOkarishimasu', 22151),
cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', 33797),
cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', 25495),
cls('ModernMoGal', 30308),
cls('MyTinySenpaiFromWork', 43610),
cls('OMaidensinYourSavageSeason', 22030),
cls('OokamiShounenWaKyouMoUsoOKasaneru', 14569),
cls('OokamiToKoshinryou', 1168),
cls('OtomeYoukaiZakuro', 4533),
cls('OversimplifiedSCP', 32834),
cls('PashiriNaBokuToKoisuruBanchouSan', 25862),
cls('PleaseDontBullyMeNagatoro', 22631),
cls('PleaseDontBullyMeNagatoroComicAnthology', 31004),
cls('PleaseTellMeGalkochan', 12702),
cls('SaekiSanWaNemutteru', 28834),
cls('SenpaiGaUzaiKouhaiNoHanashi', 23825),
cls('SewayakiKitsuneNoSenkoSan', 22723),
cls('SousouNoFrieren', 48045),
cls('SwordArtOnline', 1360),
cls('SwordArtOnlineProgressive', 9604),
cls('TamenDeGushi', 13939),
cls('TheWolfAndRedRidingHood', 31079),
cls('TomoChanWaOnnanoko', 15722),
cls('TonikakuKawaii', 23439),
cls('YotsubaAnd', 311),
cls('YuYuHakusho', 1738),
cls('AttackOnTitan', '304ceac3-8cdb-4fe7-acf7-2b6ff7a60613'),
cls('Beastars', 'f5e3baad-3cd4-427c-a2ec-ad7d776b370d'),
cls('BokuNoKokoroNoYabaiYatsu', '3df1a9a3-a1be-47a3-9e90-9b3e55b1d0ac'),
cls('CheerfulAmnesia', 'f9448f90-c068-4b6a-8c85-03d739aef255'),
cls('DoChokkyuuKareshiXKanojo', 'efb62763-c940-4495-aba5-69c192a999a4'),
cls('DeliciousinDungeon', 'd90ea6cb-7bc3-4d80-8af0-28557e6c4e17'),
cls('DragonDrive', '5c06ae70-b5cf-431a-bcd5-262a411de527'),
cls('FuguushokuKajishiDakedoSaikyouDesu', '17b3b648-fd89-4a69-9a42-6068ffbfa7a7'),
cls('GanbareDoukiChan', '190616bc-7da6-45fd-abd4-dd2ca656c183'),
cls('HangingOutWithAGamerGirl', 'de9e3b62-eac5-4c0a-917d-ffccad694381'),
cls('HoriMiya', 'a25e46ec-30f7-4db6-89df-cacbc1d9a900'),
cls('HowToOpenATriangularRiceball', '6ebd90ce-d5e8-49c0-a4bc-e02e0f8ecb93'),
cls('HunterXHunter', 'db692d58-4b13-4174-ae8c-30c515c0689c'),
cls('IchaichasuruToOkaneGaWaichauFutariNoHanashi', '8eaaec7d-7aa7-490e-8d52-5a3d0a28e78b'),
cls('InterspeciesReviewers', '1b2fddf9-1385-4f3c-b37a-cf86a9428b1a'),
cls('JahySamaWaKujikenai', '2f4e5f5b-d930-4266-8c8a-c4cf9a81e51f'),
cls('JingaiNoYomeToIchaIchaSuru', '809d2493-df3c-4e72-a57e-3e0026cae9fb'),
cls('KaetteKudasaiAkutsuSan', '737a846b-2e67-4d63-9f7e-f54b3beebac4'),
cls('KawaiiJoushiWoKomarasetai', '23b7cc7a-df89-4049-af28-1fa78f88713e'),
cls('KanojoOkarishimasu', '32fdfe9b-6e11-4a13-9e36-dcd8ea77b4e4'),
cls('KoiToUtatane', 'f7d40a27-e289-45b3-9c68-d1cb251897e6'),
cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', '58bc83a0-1808-484e-88b9-17e167469e23'),
cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', '55ace2fb-e157-4d76-9e72-67c6bd762a39'),
cls('ModernMoGal', 'b1953f80-36f7-492c-b0f8-e9dd0ad01752'),
cls('MyTinySenpaiFromWork', '28ed63af-61f8-43af-bac3-762030c72963'),
cls('OMaidensinYourSavageSeason', 'c4613b7d-7a6e-48f9-82f0-bce3dd33383a'),
cls('OokamiShounenWaKyouMoUsoOKasaneru', '5e77d9e2-2e44-431a-a995-5fefd411e55e'),
cls('OokamiToKoshinryou', 'de900fd3-c94c-4148-bbcb-ca56eaeb57a4'),
cls('OtomeYoukaiZakuro', 'c1fa97be-0f1f-4686-84bc-806881c97d53'),
cls('OversimplifiedSCP', 'e911fe33-a9b3-43dc-9eb7-f5ee081a6dc8'),
cls('PashiriNaBokuToKoisuruBanchouSan', '838e5b3a-51c8-44cf-b6e2-68193416f6fe'),
cls('PleaseDontBullyMeNagatoro', 'd86cf65b-5f6c-437d-a0af-19a31f94ec55'),
cls('PleaseDontBullyMeNagatoroComicAnthology', '2a4bc9ec-2d70-428a-8b46-27f6218ed267'),
cls('PleaseTellMeGalkochan', '7a2f2f6b-a6a6-4149-879b-3fc2f6916549'),
cls('RebuildWorld', '99182618-ae92-4aec-a5df-518659b7b613'),
cls('SaekiSanWaNemutteru', 'd9aecdab-8aef-4b90-98d5-32e86faffb28'),
cls('SeijoSamaIieToorisugariNoMamonotsukaiDesu', 'd4c40e73-251a-4bcb-a5a6-1edeec1e00e7'),
cls('SenpaiGaUzaiKouhaiNoHanashi', 'af38f328-8df1-4b4c-a272-e737625c3ddc'),
cls('SewayakiKitsuneNoSenkoSan', 'c26269c7-0f5d-4966-8cd5-b79acb86fb7a'),
cls('ShinNoJitsuryokuWaGirigiriMadeKakushiteIyouToOmou', '22fda941-e603-4601-a536-c3ad6d004ba8'),
cls('SoloLeveling', '32d76d19-8a05-4db0-9fc2-e0b0648fe9d0'),
cls('SousouNoFrieren', 'b0b721ff-c388-4486-aa0f-c2b0bb321512'),
cls('SwordArtOnline', '3dd0b814-23f4-4342-b75b-f206598534f6'),
cls('SwordArtOnlineProgressive', '22ea3f54-11e4-4932-a527-89d63d3a62d9'),
cls('TadokoroSan', '8ffbfa2f-23fa-4490-848e-942581a4d873'),
cls('TamenDeGushi', '3f1453fb-9dac-4aca-a2ea-69613856c952'),
cls('TenseiShitaraSlimeDattaKen', 'e78a489b-6632-4d61-b00b-5206f5b8b22b'),
cls('TheNewGate', 'b41bef1e-7df9-4255-bd82-ecf570fec566'),
cls('TheWolfAndRedRidingHood', 'a7d1283b-ed38-4659-b8bc-47bfca5ccb8a'),
cls('TomoChanWaOnnanoko', '76ee7069-23b4-493c-bc44-34ccbf3051a8'),
cls('TonikakuKawaii', '30f3ac69-21b6-45ad-a110-d011b7aaadaa'),
cls('UramikoiKoiUramikoi', '009b6788-48f3-4e78-975c-097f54def7ab'),
cls('YotsubaAnd', '58be6aa6-06cb-4ca5-bd20-f1392ce451fb'),
cls('YuYuHakusho', '44a5cbe1-0204-4cc7-a1ff-0fda2ac004b6'),
)

View file

@ -152,7 +152,7 @@ class NineToNine(_ParserScraper):
url = 'https://www.tigerknight.com/99'
stripUrl = url + '/%s'
firstStripUrl = stripUrl % '2014-01-01'
imageSearch = '//img[@class="comic-image"]'
imageSearch = '//img[d:class("comic-image")]'
prevSearch = '//a[./span[contains(text(), "Previous")]]'
multipleImagesPerStrip = True

View file

@ -617,12 +617,15 @@ class Removed(Scraper):
cls('ComicFury/GreenerGrass'),
cls('ComicFury/HelloWanderingStar'),
cls('ComicFury/Hodgemosh'),
cls('ComicFury/Kitsune'),
cls('ComicFury/LaszloAndEdgar'),
cls('ComicFury/MegamanComic'),
cls('ComicFury/PatchworkPeople'),
cls('ComicFury/PornographyInFiveActs'),
cls('ComicFury/PoussireDeFe'),
cls('ComicFury/RED'),
cls('ComicFury/ResNullius'),
cls('ComicFury/ResNulliusCS'),
cls('ComicFury/Seed'),
cls('ComicFury/TheAcryden'),
cls('ComicFury/TheHourlyComic'),
@ -955,6 +958,7 @@ class Removed(Scraper):
cls('MangaDex/HeavensDesignTeam', 'legal'),
cls('MangaDex/SPYxFAMILY', 'legal'),
cls('Ryugou'),
cls('SeelPeel'),
cls('SmackJeeves/20TimesKirby'),
cls('SmackJeeves/2Kingdoms'),
cls('SmackJeeves/355Days'),
@ -1521,11 +1525,16 @@ class Removed(Scraper):
cls('Shivae/CafeAnime'),
cls('Shivae/Extras'),
cls('SnafuComics/Titan'),
cls('StudioKhimera/Eorah', 'mov'),
cls('StuffNoOneToldMe'),
cls('TaleOfTenThousand'),
cls('TheCyantianChronicles/CookieCaper'),
cls('TheCyantianChronicles/Pawprints'),
cls('VGCats/Adventure'),
cls('VGCats/Super'),
cls('VictimsOfTheSystem'),
cls('WebDesignerCOTW'),
cls('WintersLight'),
)
@ -1560,6 +1569,7 @@ class Renamed(Scraper):
cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'),
cls('ComicFury/ICanSeeYourFeels', 'ComicFury/SeeYourFeels'),
cls('ComicFury/MAGISAupdatesMonWedFri', 'ComicFury/MAGISAPARASAYOupdatesMonFri'),
cls('ComicFury/MonsterSoup', 'MonsterSoup'),
cls('ComicFury/OopsComicAdventure', 'OopsComicAdventure'),
cls('ComicFury/ThomasAndZachary', 'ComicFury/ThomasAndZacharyArchives'),
cls('ComicGenesis/TheLounge', 'KeenSpot/TheLounge'),
@ -1601,18 +1611,24 @@ class Renamed(Scraper):
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
# Renamed in 3.0
cls('AHClub', 'RickGriffinStudios/AHClub'),
cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'),
cls('DeerMe', 'ComicFury/DeerMe'),
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
cls('GoComics/Owlturd', 'GoComics/ShenComix'),
cls('GoComics/PeanutsEnEspanol', 'GoComics/SnoopyEnEspanol'),
cls('GoComics/RipleysBelieveItOrNotSpanish', 'GoComics/RipleysAunqueUstedNoLoCrea'),
cls('GoComics/WebcomicName', 'WebcomicName'),
cls('Guardia', 'ComicFury/Guardia'),
cls('Shivae/BlackRose', 'BlackRose'),
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
cls('SmackJeeves/GrowingTroubles', 'ComicFury/GrowingTroubles'),
cls('SmackJeeves/TheRealmOfKaerwyn', 'ComicFury/TheRealmOfKaerwyn'),
cls('SoloLeveling', 'MangaDex/SoloLeveling'),
cls('StudioKhimera/Draconia', 'Draconia'),
cls('TracesOfThePast', 'RickGriffinStudios/TracesOfThePast'),
cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'),
)

View file

@ -33,8 +33,9 @@ class ProjectFuture(_ParserScraper):
cls('Emily', 'emily', '01-00'),
cls('FishingTrip', 'fishing', '01-00'),
cls('HeadsYouLose', 'heads', '00-01', last='07-12'),
cls('IPanther', 'panther', '00'),
cls('NiallsStory', 'niall', '00'),
cls('ProjectFuture', 'strip', '0'),
cls('ProjectFuture', 'strip', '0', last='664'),
cls('RedValentine', 'redvalentine', '1', last='6'),
cls('ShortStories', 'shorts', '01-00'),
cls('StrangeBedfellows', 'bedfellows', '1', last='6'),
@ -46,4 +47,5 @@ class ProjectFuture(_ParserScraper):
cls('TheSierraChronicles', 'sierra', '0', last='29'),
cls('TheTuppenyMan', 'tuppenny', '00', last='16'),
cls('TurningANewPage', 'azrael', '1', last='54'),
cls('Xerian', 'xerian', '01-00'),
)

View file

@ -116,9 +116,8 @@ class Requiem(_WordPressScraper):
class Replay(_ParserScraper):
url = 'http://replaycomic.com/'
stripUrl = url + 'comic/%s/'
url = stripUrl % 'trying-it-out'
firstStripUrl = stripUrl % 'red-desert'
imageSearch = '//div[@id="comic"]//img'
imageSearch = '//div[@id="comic"]//img[@alt]'
prevSearch = '//a[contains(@class, "comic-nav-previous")]'
nextSearch = '//a[contains(@class, "comic-nav-next")]'

View file

@ -0,0 +1,59 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2022 Daniel Ring
from ..helpers import indirectStarter
from .common import _WordPressScraper, _WPNaviIn
class Housepets(_WordPressScraper):
url = 'http://www.housepetscomic.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = '2008/06/02/when-boredom-strikes'
class RickGriffinStudios(_WPNaviIn):
baseUrl = 'http://rickgriffinstudios.com/'
stripUrl = baseUrl + 'comic-post/%s/'
latestSearch = '//a[contains(@title, "Permanent Link")]'
starter = indirectStarter
nav = None
def __init__(self, name, sub, first, last=None, adult=False, nav=None):
super().__init__('RickGriffinStudios/' + name)
self.url = self.baseUrl + sub + '/'
self.firstStripUrl = self.stripUrl % first
if last:
self.url = self.stripUrl % last
self.starter = super(RickGriffinStudios, self).starter
self.endOfLife = True
if adult:
self.latestSearch = '//a[contains(@title, "NSFW")]'
self.adult = True
if nav:
self.nav = nav
def getPrevUrl(self, url, data):
# Links between chapters
url = url.rstrip('/').rsplit('/', 1)[-1]
if self.nav and url in self.nav:
return self.stripUrl % self.nav[url]
return super(RickGriffinStudios, self).getPrevUrl(url, data)
@classmethod
def getmodules(cls):
return (
cls('AHClub', 'ah-club', 'cover', nav={
'ah-club-2-cover': 'ah-club-1-page-24',
'ah-club-3-cover': 'ah-club-2-page-28',
'ah-club-4-cover': 'ah-club-3-page-22',
'ah-club-5-cover': 'ah-club-4-page-24'
}),
cls('HayvenCelestia', 'hayven-celestia', 'skinchange-p1'),
cls('TheStoryboard', 'the-storyboard', 'the-storyboard-001'),
cls('TracesOfThePast', 'in-the-new-age', 'totp-page-1'),
cls('TracesOfThePastNSFW', 'in-the-new-age', 'totp-page-1-nsfw', adult=True),
cls('ZootopiaNightTerrors', 'zootopia-night-terrors', 'zootopia-night-terrors-p1', 'zootopia-night-terrors-p7'),
)

View file

@ -156,13 +156,6 @@ class Science(_ParserScraper):
endOfLife = True
class SeelPeel(_WPNaviIn):
url = 'https://seelpeel.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'seelpeel-goes-live'
multipleImagesPerStrip = True
class SequentialArt(_ParserScraper):
url = 'https://www.collectedcurios.com/sequentialart.php'
stripUrl = url + '?s=%s'
@ -216,6 +209,12 @@ class Sheldon(_BasicScraper):
help = 'Index format: yymmdd'
class Shifters(_WPNavi):
url = 'http://shiftersonline.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'shifters-redux-promo'
class ShipInABottle(_WordPressScraper):
url = 'http://shipinbottle.pepsaga.com/'
stripUrl = url + '?p=%s'
@ -356,66 +355,6 @@ class SodiumEyes(_WordPressScraper):
endOfLife = True
class SoloLeveling(_ParserScraper):
url = 'https://w3.sololeveling.net/'
stripUrl = url + 'manga/solo-leveling-chapter-%s/'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@class="img_container"]//img'
prevSearch = '//a[@rel="prev"]'
latestSearch = '//table[@class="chap_tab"]//a'
starter = indirectStarter
multipleImagesPerStrip = True
imageUrlFixes = {
'94-0_5dd574efda419/28.': '94-0_5dd574efda419/28a.',
'92-0_5dc2fcb9ed562/22.': '92-0_5dc2fcb9ed562/22s.',
'91-0_5db9b881ac2f0/20k.': '91-0_5db9b881ac2f0/20l.',
'91-0_5db9b881ac2f0/23.': '91-0_5db9b881ac2f0/23a.',
'90-0_5db08467ca2b1/07.': '90-0_5db08467ca2b1/07a.',
'90-0_5db08467ca2b1/09.': '90-0_5db08467ca2b1/09a.',
'90-0_5db08467ca2b1/13.': '90-0_5db08467ca2b1/13a.',
'90-0_5db08467ca2b1/14.': '90-0_5db08467ca2b1/14a.',
'90-0_5db08467ca2b1/21.': '90-0_5db08467ca2b1/21a.',
'90-0_5db08467ca2b1/22.': '90-0_5db08467ca2b1/22a.',
'88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.',
'88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.',
'88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.',
'87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.',
}
def imageUrlModifier(self, imageUrl, data):
if 'url=' in imageUrl:
imageUrl = imageUrl.split('url=')[1].split('&')[0]
for fix in self.imageUrlFixes:
imageUrl = imageUrl.replace(fix, self.imageUrlFixes[fix])
return imageUrl
def fetchUrls(self, url, data, urlSearch):
# Save link order for position-based filenames
self.imageUrls = super(SoloLeveling, self).fetchUrls(url, data, urlSearch)
self.imageUrls = [self.imageUrlModifier(x, data) for x in self.imageUrls]
return self.imageUrls
def getPage(self, url):
try:
return super().getPage(url)
except HTTPError as e:
# CloudFlare WAF
if e.response.status_code == 403 and '1020' in e.response.text:
self.geoblocked()
else:
raise e
def getPrevUrl(self, url, data):
return self.stripUrl % str(int(url.strip('/').rsplit('-', 1)[-1]) - 1)
def namer(self, imageUrl, pageUrl):
# Construct filename from episode number and image position on page
episodeNum = pageUrl.strip('/').rsplit('-', 1)[-1]
imageNum = self.imageUrls.index(imageUrl)
imageExt = imageUrl.rsplit('.', 1)[-1]
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
class SomethingPositive(_ParserScraper):
url = 'https://www.somethingpositive.net/'
stripUrl = url + 'sp%s.shtml'
@ -660,6 +599,19 @@ class StrongFemaleProtagonist(_ParserScraper):
)
class StupidFox(_ParserScraper):
url = 'http://stupidfox.net/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'hello'
imageSearch = '//div[@class="comicmid"]//img'
prevSearch = '//a[@accesskey="p"]'
def namer(self, imageUrl, pageUrl):
page = self.getPage(pageUrl)
title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
return title + '.' + imageUrl.rsplit('.', 1)[-1]
class SuburbanJungle(_ParserScraper):
url = 'http://suburbanjungleclassic.com/'
stripUrl = url + '?p=%s'
@ -675,8 +627,9 @@ class SuburbanJungleRoughHousing(_WordPressScraper):
class Supercell(_ParserScraper):
url = 'https://www.supercellcomic.com/'
stripUrl = url + 'pages/%s.html'
baseUrl = 'https://www.supercellcomic.com/'
url = baseUrl + 'latest.html'
stripUrl = baseUrl + 'pages/%s.html'
firstStripUrl = stripUrl % '0001'
imageSearch = '//img[@class="comicStretch"]'
prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]'
@ -707,6 +660,6 @@ class SwordsAndSausages(_ParserScraper):
url = 'https://www.tigerknight.com/ss'
stripUrl = url + '/%s'
firstStripUrl = stripUrl % '1-1'
imageSearch = '//img[@class="comic-image"]'
imageSearch = '//img[d:class("comic-image")]'
prevSearch = '//a[./span[contains(text(), "Previous")]]'
multipleImagesPerStrip = True

View file

@ -43,16 +43,17 @@ class StudioKhimera(_ParserScraper):
@classmethod
def getmodules(cls):
return (
cls('Eorah', 'eorah'),
cls('Mousechievous', 'mousechievous'),
)
class UberQuest(_WordPressScraper):
name = 'StudioKhimera/UberQuest'
url = 'https://uberquest.studiokhimera.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'chapter-1-cover'
stripUrl = 'https://uberquest.studiokhimera.com/comic/page/%s/'
url = stripUrl % 'latest'
firstStripUrl = stripUrl % 'cover'
imageSearch = '//div[@class="prj--comic-image"]/img'
prevSearch = '//uq-image-button[d:class("prj--comic-control-prev")]'
def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames

View file

@ -23,14 +23,6 @@ class TailsAndTactics(_ParserScraper):
prevSearch = '//a[text()=" Back"]'
class TaleOfTenThousand(_ParserScraper):
url = 'http://www.t10k.club/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '1-01_00'
imageSearch = '//article[@id="comic"]//img'
prevSearch = '//a[d:class("prev")]'
class TekMage(_WPNavi):
url = 'https://tekmagecomic.com/'
stripUrl = url + 'comic/%s/'
@ -61,6 +53,26 @@ class TheBrads(_ParserScraper):
multipleImagesPerStrip = True
endOfLife = True
class TheChroniclesOfHuxcyn(_WordPressScraper):
url = 'https://huxcyn.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'opening-001'
def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames
filename = imageUrl.rsplit('/', 1)[-1]
filename = filename.replace('IMG_0504', 'TCoH109')
filename = filename.replace('tcoh', 'TCoH')
filename = filename.replace('1599151639.xizana_f3a6458e-8d94-4259-bec3-5a92706fe493_jpeg', 'october.2020.cover')
filename = filename.replace('huxonsword', 'october.2020.huxonsword')
filename = filename.replace('New_Canvas100pageswebimage', 'TCoH100')
if filename[0] == '0':
filename = 'TCoH' + filename
elif filename[0] == '3':
pagenum = int(filename.rsplit('.', 1)[0].split('_', 1)[1].split('_', 1)[0])
filename = 'TCoH' + str(40 + pagenum) + filename.rsplit('.', 1)[-1]
return filename
class TheClassMenagerie(_ParserScraper):
stripUrl = 'http://www.theclassm.com/d/%s.html'
@ -278,26 +290,6 @@ class ToonHole(_WordPressScraper):
return url in (self.url + "comic/if-game-of-thrones-was-animated/",)
class TracesOfThePast(_WPNaviIn):
baseUrl = 'http://rickgriffinstudios.com/'
url = baseUrl + 'in-the-new-age/'
stripUrl = baseUrl + 'comic-post/%s/'
firstStripUrl = stripUrl % 'totp-page-1'
latestSearch = '//a[contains(@title, "Permanent Link")]'
starter = indirectStarter
class TracesOfThePastNSFW(_WPNaviIn):
name = 'TracesOfThePast/NSFW'
baseUrl = 'http://rickgriffinstudios.com/'
url = baseUrl + 'in-the-new-age/'
stripUrl = baseUrl + 'comic-post/%s/'
firstStripUrl = stripUrl % 'totp-page-1-nsfw'
latestSearch = '//a[contains(@title, "NSFW")]'
starter = indirectStarter
adult = True
class TrippingOverYou(_BasicScraper):
url = 'http://www.trippingoveryou.com/'
stripUrl = url + 'comic/%s'

View file

@ -0,0 +1,70 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from ..scraper import _ParserScraper
from ..helpers import indirectStarter
class Tapastic(_ParserScraper):
baseUrl = 'https://tapas.io/'
imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
multipleImagesPerStrip = True
def __init__(self, name, url):
super(Tapastic, self).__init__('Tapastic/' + name)
self.url = self.baseUrl + 'series/' + url + '/info'
self.stripUrl = self.baseUrl + 'episode/%s'
def starter(self):
# Retrieve comic metadata from info page
info = self.getPage(self.url)
series = info.xpath('//@data-series-id')[0]
# Retrieve comic metadata from API
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
data.raise_for_status()
episodes = data.json()['data']['body']
return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]
def getPrevUrl(self, url, data):
# Retrieve comic metadata from API
data = self.session.get(url + '/info')
data.raise_for_status()
apiData = data.json()['data']
if apiData['scene'] == 2:
self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
return self.stripUrl % apiData['prev_ep_id']
def fetchUrls(self, url, data, urlSearch):
# Save link order for position-based filenames
self.imageUrls = super().fetchUrls(url, data, urlSearch)
return self.imageUrls
def namer(self, imageUrl, pageUrl):
# Construct filename from episode number and image position on page
episodeNum = pageUrl.rsplit('/', 1)[-1]
imageNum = self.imageUrls.index(imageUrl)
imageExt = pageUrl.rsplit('.', 1)[-1]
if len(self.imageUrls) > 1:
filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
else:
filename = "%s.%s" % (episodeNum, imageExt)
return filename
@classmethod
def getmodules(cls):
return (
# Manually-added comics
cls('AmpleTime', 'Ample-Time'),
cls('InsignificantOtters', 'IOtters'),
cls('NoFuture', 'NoFuture'),
cls('OrensForge', 'OrensForge'),
cls('RavenWolf', 'RavenWolf'),
cls('SyntheticInstinct', 'Synthetic-Instinct'),
cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
cls('VenturaCityDrifters', 'Ventura-City-Drifters'),
# START AUTOUPDATE
# END AUTOUPDATE
)

View file

@ -4,6 +4,8 @@
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from re import compile
from urllib.parse import urljoin
from lxml import etree
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
@ -27,7 +29,21 @@ class Undertow(_BasicScraper):
class unDivine(_ComicControlScraper):
url = 'http://undivinecomic.com/'
url = 'https://www.undivinecomic.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'page-1'
def namer(self, imageUrl, pageUrl):
# Fix inconsistent filenames
filename = imageUrl.rsplit('/', 1)[-1].replace(' ', '-')
filename = filename.replace('10B311D9-0992-4D74-AEB8-DAB714DA67C6', 'UD-322')
filename = filename.replace('99266624-7EF7-4E99-9EC9-DDB5F59CBDFD', 'UD-311')
filename = filename.replace('33C6A5A1-F703-4A0A-BCD5-DE1A09359D8E', 'UD-310')
filename = filename.replace('6CE01E81-C299-43C7-A221-8DE0670EFA30', 'ch4endbonusq4')
filename = filename.replace('DB66D93B-1FE5-49C7-90E0-FFF981DCD6B3', 'bipolar')
if len(filename) > 15 and filename[0].isdigit() and filename[10] == '-':
filename = filename[11:]
return filename
class UnicornJelly(_BasicScraper):
@ -45,13 +61,29 @@ class Unsounded(_ParserScraper):
startUrl = url + 'comic+index/'
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
firstStripUrl = stripUrl % ('01', '01', '01')
imageSearch = '//img[contains(@src, "pageart/")]'
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[d:class("back")]'
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
multipleImagesPerStrip = True
starter = indirectStarter
help = 'Index format: chapter-page'
def fetchUrls(self, url, data, urlSearch):
imageUrls = super(Unsounded, self).fetchUrls(url, data, urlSearch)
# Include background for multi-image pages
imageRegex = compile(r'background-image: url\((pageart/.*)\)')
for match in imageRegex.finditer(str(etree.tostring(data))):
print(match)
searchUrls.append(normaliseURL(urljoin(data[1], match.group(1))))
return imageUrls
def namer(self, imageUrl, pageUrl):
filename = imageUrl.rsplit('/', 1)[-1]
pagename = pageUrl.rsplit('/', 1)[-1]
if pagename.split('.', 1)[0] != filename.split('.', 1)[0]:
filename = pagename.split('_', 1)[0] + '_' + filename
return filename
def getPrevUrl(self, url, data):
# Fix missing navigation links between chapters
if 'ch13/you_let_me_fall' in url:

View file

@ -8,13 +8,22 @@ from ..helpers import bounceStarter, indirectStarter
class VampireHunterBoyfriends(_ParserScraper):
url = 'https://boneitiscomics.com/vhb.php'
stripUrl = url + '?pg=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@class="page"]//img'
prevSearch = '//a[@id="prev"]'
baseUrl = 'https://boneitisindustries.com/'
url = baseUrl + 'comics/vampire-hunter-boyfriends/'
stripUrl = baseUrl + 'comic/%s/'
firstStripUrl = stripUrl % 'vampire-hunter-boyfriends-chapter-1-cover'
imageSearch = '//div[@id="content"]//img[d:class("size-full")]'
prevSearch = '//a[./span[d:class("ticon-chevron-left")]]'
adult = True
def starter(self):
archivePage = self.getPage(self.url)
self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href')
return self.archive[-1]
def getPrevUrl(self, url, data):
return self.archive[self.archive.index(url) - 1]
class Vexxarr(_ParserScraper):
baseUrl = 'http://www.vexxarr.com/'
@ -40,20 +49,6 @@ class VGCats(_ParserScraper):
help = 'Index format: n (unpadded)'
class VGCatsAdventure(VGCats):
name = 'VGCats/Adventure'
url = 'http://www.vgcats.com/ffxi/'
stripUrl = url + '?strip_id=%s'
imageSearch = '//p/img[contains(@src, "images/")]'
class VGCatsSuper(VGCats):
name = 'VGCats/Super'
url = 'http://www.vgcats.com/super/'
stripUrl = url + '?strip_id=%s'
imageSearch = '//p/img[contains(@src, "images/")]'
class VickiFox(_ParserScraper):
url = 'http://www.vickifox.com/comic/strip'
stripUrl = url + '?id=%s'

View file

@ -133,14 +133,6 @@ class WildeLife(_ComicControlScraper):
firstStripUrl = stripUrl % '1'
class WintersLight(_ParserScraper):
url = 'https://winterslight.anaria.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'winterslight00'
imageSearch = '//img[contains(@src, "comic/pages/")]'
prevSearch = '//a[contains(text(), "Previous")]'
class Wonderella(_BasicScraper):
url = 'http://nonadventures.com/'
rurl = escape(url)
@ -232,9 +224,3 @@ class WormWorldSagaGerman(WormWorldSaga):
class WormWorldSagaSpanish(WormWorldSaga):
lang = 'es'
class Wrongside(_WPNavi):
url = 'http://www.ayzewi.com/comic/'
stripUrl = url + '?comic=%s'
firstStripUrl = stripUrl % 'intro-2'

View file

@ -115,6 +115,7 @@ class WebToons(_ParserScraper):
cls('CityOfBlank', 'sf/city-of-blank', 1895),
cls('CityOfWalls', 'drama/city-of-wall', 505),
cls('CityVamps', 'challenge/city-vamps-', 119224),
cls('ClawShot', 'challenge/clawshot', 621465),
cls('ClusterFudge', 'slice-of-life/cluster-fudge', 355),
cls('CodeAdam', 'action/code-adam', 1657),
cls('CookingComically', 'tiptoon/cooking-comically', 622),
@ -134,7 +135,7 @@ class WebToons(_ParserScraper):
cls('DEADDAYS', 'horror/dead-days', 293),
cls('Debunkers', 'challenge/debunkers', 148475),
cls('DEEP', 'thriller/deep', 364),
cls('Defects', 'challenge/defects', 221106),
cls('Defects', 'fantasy/defects', 2731),
cls('Denma', 'sf/denma', 921),
cls('Dents', 'sf/dents', 671),
cls('Deor', 'fantasy/deor', 1663),
@ -224,6 +225,7 @@ class WebToons(_ParserScraper):
cls('JustPancakes', 'comedy/just-pancakes', 1651),
cls('KidsAreAllRight', 'drama/kids-are-all-right', 283),
cls('Killstagram', 'thriller/killstagram', 1971),
cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703),
cls('KindOfConfidential', 'romance/kind-of-confidential', 663),
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
cls('KnightRun', 'sf/knight-run', 67),
@ -317,6 +319,7 @@ class WebToons(_ParserScraper):
cls('RANDOMPHILIA', 'comedy/randomphilia', 386),
cls('Rebirth', 'sf/rebirth', 1412),
cls('RefundHighSchool', 'fantasy/refundhighschool', 1360),
cls('ReturnToPlayer', 'action/return-to-player', 2574),
cls('RiseFromAshes', 'supernatural/rise-from-ashes', 959),
cls('RoarStreetJournal', 'slice-of-life/roar-street-journal', 397),
cls('RoomOfSwords', 'sf/room-of-swords', 1261),
@ -380,12 +383,14 @@ class WebToons(_ParserScraper):
cls('TheGirlDownstairs', 'romance/the-girl-downstairs', 1809),
cls('THEGIRLFROMCLASS', 'drama/the-girl-from-class', 73),
cls('TheGodOfHighSchool', 'action/the-god-of-high-school', 66),
cls('TheGreenhouse', 'challenge/the-greenhouse-gl', 278312),
cls('TheKissBet', 'romance/the-kiss-bet', 1617),
cls('TheLifeOfTheThreeBears', 'slice-of-life/the-life-of-the-three-bears', 390),
cls('ThePurpleHeart', 'super-hero/the-purple-heart', 723),
cls('TheRedBook', 'horror/the-red-book', 467),
cls('TheRedHook', 'super-hero/the-red-hook', 643),
cls('TheRedKing', 'supernatural/the-red-king', 1687),
cls('TheRoomies', 'challenge/the-roomies-archive', 513669),
cls('TheShadowProphet', 'drama/the-shadow-prophet', 1881),
cls('TheSoundOfYourHeart', 'comedy/the-sound-of-your-heart', 269),
cls('TheSteamDragonExpress', 'fantasy/steam-dragon-express', 1270),
@ -412,6 +417,7 @@ class WebToons(_ParserScraper):
cls('UnderPrin', 'supernatural/underprin', 78),
cls('UnderTheAegis', 'fantasy/under-the-aegis', 436),
cls('UnholyBlood', 'supernatural/unholy-blood', 1262),
cls('UnintentionalGame', 'challenge/unintentional-game', 162674),
cls('UnknownCaller', 'thriller/ar-toon', 775),
cls('UnlovableReplacement', 'romance/unlovable-replacement', 1762),
cls('UnluckyIsAsLuckyDoes', 'comedy/unlucky-is-as-lucky-does', 1554),

View file

@ -0,0 +1,75 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2022 Daniel Ring
from ..scraper import _ParserScraper
from ..helpers import indirectStarter
class Wrongside(_ParserScraper):
baseUrl = 'http://ayzewi.com/maingallery3/'
url = baseUrl + 'index.php?/category/5'
stripUrl = baseUrl + 'picture.php?%s'
firstStripUrl = stripUrl % '/175/category/21'
imageSearch = '//img[@id="theMainImage"]/@src'
prevSearch = '//a[contains(@title, "Previous :")]'
def starter(self):
archivePage = self.getPage(self.url)
chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href')
self.archive = []
for chapterUrl in chapterUrls:
chapterPage = self.getPage(chapterUrl)
self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
return self.archive[0]
def getPrevUrl(self, url, data):
if data.xpath(self.prevSearch) == [] and len(self.archive) > 0:
return self.archive.pop()
return super(Wrongside, self).getPrevUrl(url, data)
def namer(self, imageUrl, pageUrl):
page = self.getPage(pageUrl)
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
class WrongsideBeginnings(Wrongside):
name = 'Wrongside/Beginnings'
baseUrl = 'http://ayzewi.com/maingallery3/'
url = baseUrl + 'index.php?/category/4'
stripUrl = baseUrl + 'picture.php?%s'
firstStripUrl = stripUrl % '/2/category/18'
class WrongsideSideStories(_ParserScraper):
baseUrl = 'http://ayzewi.com/maingallery3/'
stripUrl = baseUrl + 'picture.php?%s'
imageSearch = '//img[@id="theMainImage"]/@src'
prevSearch = '//a[contains(@title, "Previous :")]'
latestSearch = '(//ul[@id="thumbnails"]//a/@href)[last()]'
starter = indirectStarter
def __init__(self, name, category, first, last=None):
super().__init__('Wrongside/' + name)
self.url = self.baseUrl + 'index.php?/category/' + category
self.firstStripUrl = self.stripUrl % ('/' + first + '/category/' + category)
if last:
self.endOfLife = True
@classmethod
def getmodules(cls):
return (
cls('AnarkisRising', '7', '302'),
cls('CommonsDreams', '9', '324'),
cls('Faith', '11', '349'),
cls('Sarah', '10', '337'),
cls('ThereAreNoAviansHere', '8', '313'),
cls('TheScientificProphet', '13', '358'),
cls('TheStrangers', '12', '361'),
)
def namer(self, imageUrl, pageUrl):
page = self.getPage(pageUrl)
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]

View file

@ -9,7 +9,7 @@ d=$(dirname $0)
if [ $# -ge 1 ]; then
list="$*"
else
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
fi
for script in $list; do
echo "Executing ${script}.py"

36
scripts/tapastic.py Normal file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
"""
Script to get a list of Tapastic comics and save the info in a
JSON file for further processing.
"""
from urllib.parse import urlsplit, parse_qs
from scriptutil import ComicListUpdater
from dosagelib.util import check_robotstxt
class TapasticUpdater(ComicListUpdater):
def collect_results(self):
# Retrieve the first 10 top comics list pages
url = 'https://tapas.io/comics?browse=ALL&sort_type=LIKE&pageNumber='
count = 10
data = [self.get_url(url + str(i), robot=False) for i in range(0, count)]
for page in data:
for comiclink in page.xpath('//a[@class="preferred title"]'):
comicurl = comiclink.attrib['href']
name = comiclink.text
self.add_comic(name, comicurl)
def get_entry(self, name, url):
shortName = name.replace(' ', '').replace('\'', '')
titleNum = int(parse_qs(urlsplit(url).query)['title_no'][0])
url = url.rsplit('/', 1)[0].replace('/series/', '')
return u"cls('%s', '%s', %d)," % (shortName, url, titleNum)
if __name__ == '__main__':
TapasticUpdater(__file__).run()

View file

@ -11,7 +11,7 @@ d=$(dirname $0)
if [ $# -ge 1 ]; then
list="$*"
else
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
fi
for script in $list; do
target="${d}/../dosagelib/plugins/${script}.py"

View file

@ -7,8 +7,6 @@ import responses
import dosagelib.cmd
import httpmocks
from dosagelib.plugins.s import SoloLeveling
from dosagelib.scraper import GeoblockedException
def cmd(*options):
@ -43,7 +41,11 @@ class TestModules(object):
cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol:2012/07/22')
@responses.activate
@pytest.mark.skip(reason="SoloeLeveling was removed, so we have no way to test this...")
def test_sololeveling_geoblock(self, tmpdir):
from dosagelib.plugins.s import SoloLeveling
from dosagelib.scraper import GeoblockedException
responses.add(responses.GET, 'https://w3.sololeveling.net/',
'<span>1020</span>', status=403)