diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index e082134d5..8734adb19 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -106,27 +106,6 @@ class AGirlAndHerFed(_ParserScraper): help = 'Index format: nnn' -class AHClub(_WPNaviIn): - baseUrl = 'http://rickgriffinstudios.com/' - url = baseUrl + 'ah-club/' - stripUrl = baseUrl + 'comic-post/%s/' - firstStripUrl = stripUrl % 'cover' - latestSearch = '//a[contains(@title, "Permanent Link")]' - starter = indirectStarter - nav = { - 'ah-club-2-cover': 'ah-club-1-page-24', - 'ah-club-3-cover': 'ah-club-2-page-28', - 'ah-club-4-cover': 'ah-club-3-page-22', - } - - def getPrevUrl(self, url, data): - # Links between chapters - url = url.rstrip('/').rsplit('/', 1)[-1] - if self.nav and url in self.nav: - return self.stripUrl % self.nav[url] - return super(AHClub, self).getPrevUrl(url, data) - - class AhoiPolloi(_ParserScraper): url = 'https://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index f29c0d1f0..7f26ce0bf 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -188,10 +188,13 @@ class CavesAndCritters(_WPWebcomic): adult = True -class Centralia2050(_WordPressScraper): - url = 'http://centralia2050.com/' +class Centralia2050(_ParserScraper): + url = 'https://centralia2050.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % 'ch1cover' + imageSearch = '//div[@id="spliced-comic"]//img' + prevSearch = '//a[@class="previous-comic"]' + nextSearch = '//a[@class="next-comic"]' starter = bounceStarter def namer(self, imageUrl, pageUrl): @@ -267,9 +270,9 @@ class Cloudscratcher(_ParserScraper): url = 'http://www.cloudscratcher.com/' stripUrl = url + 'comic.php?page=%s' firstStripUrl = stripUrl % '1' - imageSearch = '//div[@id="main_content"]//img[contains(@src, "comic")]' - prevSearch = '//a[./img[contains(@src, "previous-page")]]' - latestSearch = '//a[@alt="Newest_Page"]' + imageSearch = '//img[contains(@src, "pages/")]' + prevSearch = '//a[./img[@alt="Previous Page"]]' + latestSearch = '//a[./img[@alt="Comic"]]' starter = indirectStarter @@ -277,7 +280,7 @@ class CollegeCatastrophe(_ParserScraper): url = 'https://www.tigerknight.com/cc' stripUrl = url + '/%s' firstStripUrl = stripUrl % '2000-11-10' - imageSearch = '//img[@class="comic-image"]' + imageSearch = '//img[d:class("comic-image")]' prevSearch = '//a[./span[contains(text(), "Previous")]]' endOfLife = True multipleImagesPerStrip = True diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index ff98e2ef4..eb293fd5c 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -123,6 +123,7 @@ class ComicFury(_ParserScraper): cls('AgentBishop', 'agentbishop'), cls('AHappierKindOfSad', 'ahappierkindofsad'), cls('AlbinoBrothers', 'albinobros'), + cls('Alderwood', 'alderwood'), cls('AlexanderAndLucasRebooted', 'alexanderandlucas'), cls('AliaTerra', 'alia-terra'), cls('AlienIrony', 'alien-irony'), @@ -336,6 +337,7 @@ class ComicFury(_ParserScraper): cls('DeadFingers', 'deadfingers'), cls('DeadRabbitCa', 'afairtrade'), cls('DeepBlue', 'deepblue'), + cls('DeerMe', 'deerme'), cls('DefineHero', 'definehero'), cls('DELIA', 'delia'), cls('DemasPokmonAdventure', 'nuzlocke-dema'), @@ -462,6 +464,7 @@ class ComicFury(_ParserScraper): cls('GrippsBrain', 'grippsbrain'), cls('GrokBoop', 'grokboop'), cls('GrowingTroubles', 'growingtroubles'), + cls('Guardia', 'guardia-tales-of-halgeis'), cls('GUS', 'gus'), cls('HalloweenCameoCaper2012', 'halloween2012'), cls('HalloweenCameoCaper2013', 'halloween2013'), @@ -575,7 +578,6 @@ class ComicFury(_ParserScraper): cls('KingdomOfTheDinosaurs', 'dinosaurkingdom'), cls('KingdomPrettyCure', 'kingdomprettycure'), cls('KirbyVsShyGuy', 'kvsg'), - cls('Kitsune', 'kitsune'), cls('KMLsSticks', 'kmlssticks'), cls('KnavesEnd', 'knavesend'), cls('KnightGuy', 'knightguy'), @@ -672,7 +674,6 @@ class ComicFury(_ParserScraper): cls('MonochromeRainbow', 'monobow'), cls('MonsterBait', 'deadnight'), cls('MonsterInTheKingdom', 'monster'), - cls('MonsterSoup', 'monstersoup'), cls('MonstersWithBenefits', 'failmonsters'), cls('MonstroniverseAdventures', 'monstroniverse'), cls('MoonlitBrew', 'moonlitbrew'), @@ -800,8 +801,6 @@ class ComicFury(_ParserScraper): cls('RequiemsGate', 'requiemsgate'), cls('ReSetArt', 'resetfanarts'), cls('ResidentWeirdo', 'residentweirdo'), - cls('ResNullius', 'resnullius'), - cls('ResNulliusCS', 'resnulliuscs'), cls('ReturnOfWonderland', 'returnofwonderland'), cls('Revive', 'revive'), cls('RexAfterDark', 'rexafterdark'), @@ -845,6 +844,7 @@ class ComicFury(_ParserScraper): cls('ShakingOffSorceryPL', 'shakingoffsorcery-pl'), cls('ShamanQuest', 'shamanquest'), cls('ShatteredSkies', 'shatteredskies'), + cls('Sharak', 'sharak'), cls('Shenanigans', 's'), cls('ShenaniganSquares', 'ss-comic'), cls('ShikuTheFirstAndFinal', 'shiku'), @@ -856,6 +856,7 @@ class ComicFury(_ParserScraper): cls('SixteenCandlesHuntersAgency', 'sixteencandles'), cls('Skeeter', 'herecomesskeeter'), cls('Sketchy', 'sketchy'), + cls('Skylords', 'skylords'), cls('SlugMan', 'slug-man'), cls('SmallTownValues', 'smalltownvalues'), cls('SmitheeZombieHunter', 'smitheezombiehunter'), @@ -1096,6 +1097,7 @@ class ComicFury(_ParserScraper): cls('VampireCatgirlPart2', 'vampirecatgirl2'), cls('VeldaGirlDetective', 'veldagirldetective'), cls('Verboten', 'verboten'), + cls('VHV', 'vhv'), cls('Victory', 'victoryadventures'), cls('ViewHub', 'viewhub'), cls('ViolentBlue', 'violentblue'), diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index bf49824be..7cce6578f 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -84,19 +84,6 @@ class DeepFried(_BasicScraper): help = 'Index format: none' -class DeerMe(_ParserScraper): - url = 'http://deerme.net/comics/' - stripUrl = url + '%s' - firstStripUrl = stripUrl % '1' - imageSearch = ('//img[@id="comicimage"]', '//img[@id="latestcomicimage"]') - prevSearch = '//a[@rel="prev"]' - nextSearch = '//a[@rel="next"]' - starter = bounceStarter - - def namer(self, imageUrl, pageUrl): - return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1] - - class Delve(_WordPressScraper): url = 'https://thisis.delvecomic.com/NewWP/' stripUrl = url + 'comic/%s/' @@ -239,6 +226,13 @@ class DoemainOfOurOwn(_ParserScraper): return filename +class DoesNotPlayWellWithOthers(_WPNavi): + url = 'http://www.doesnotplaywellwithothers.com/' + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % 'pwc-0001' + adult = True + + class DoghouseDiaries(_ParserScraper): url = 'http://thedoghousediaries.com/' stripUrl = url + '%s' diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index c70053a4c..049518024 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -170,12 +170,13 @@ class Erstwhile(_WPNavi): endOfLife = True -class Everblue(_ParserScraper): +class Everblue(_ComicControlScraper): url = 'http://www.everblue-comic.com/comic/' stripUrl = url + '%s' firstStripUrl = stripUrl % '1' - imageSearch = '//article[@id="comic"]//img' - prevSearch = '//a[contains(@class, "prev")]' + + def namer(self, imageUrl, pageUrl): + return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1] class EverybodyLovesEricRaymond(_ParserScraper): diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index 3bd69e1d1..0078d69db 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -21,13 +21,22 @@ class FalconTwin(_BasicScraper): class FalseStart(_ParserScraper): - url = 'https://boneitiscomics.com/falsestart.php' - stripUrl = url + '?pg=%s' - firstStripUrl = stripUrl % '1' - imageSearch = '//div[@class="page"]//img' - prevSearch = '//a[@id="prev"]' + baseUrl = 'https://boneitisindustries.com/' + url = baseUrl + 'comics/false-start/' + stripUrl = baseUrl + 'comic/%s/' + firstStripUrl = stripUrl % 'false-start-chapter-zero-page-1' + imageSearch = '//div[@id="content"]//img[d:class("size-full")]' + prevSearch = '//a[./span[d:class("ticon-chevron-left")]]' adult = True + def starter(self): + archivePage = self.getPage(self.url) + self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href') + return self.archive[-1] + + def getPrevUrl(self, url, data): + return self.archive[self.archive.index(url) - 1] + class Faneurysm(_WPNaviIn): url = 'http://hijinksensue.com/comic/think-only-tree/' @@ -144,7 +153,7 @@ class FoxDad(_ParserScraper): url = 'https://foxdad.com/' stripUrl = url + 'post/%s' firstStripUrl = stripUrl % '149683014997/some-people-are-just-different-support-the-comic' - imageSearch = '//figure[@class="photo-hires-item"]//img' + imageSearch = ('//figure[@class="photo-hires-item"]//img', '//figure[@class="tmblr-full"]//img') prevSearch = '//a[@class="previous-button"]' def namer(self, imageUrl, pageUrl): diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index da53e9615..d4b72acf3 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -145,19 +145,6 @@ class GrrlPower(_WordPressScraper): self.session.add_throttle('grrlpowercomic.com', 1.0, 1.5) -class Guardia(_ParserScraper): - url = 'https://ssp-comics.com/comics/Guardia/' - stripUrl = url + '?page=%s' - firstStripUrl = stripUrl % '1' - imageSearch = '//img[contains(@src, "comics/Guardia/")]' - prevSearch = '//a[./button[@id="prevButton"]]' - nextSearch = '//a[./button[@id="nextButton"]]' - starter = bounceStarter - - def namer(self, imageUrl, pageUrl): - return pageUrl.rsplit('=', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1] - - class GuildedAge(_WordPressScraper): url = 'http://guildedage.net/' firstStripUrl = url + 'comic/chapter-1-cover/' diff --git a/dosagelib/plugins/h.py b/dosagelib/plugins/h.py index 0ae21bce7..21b0fbcc3 100644 --- a/dosagelib/plugins/h.py +++ b/dosagelib/plugins/h.py @@ -5,12 +5,21 @@ # Copyright (C) 2019-2020 Daniel Ring from re import compile, escape -from ..scraper import _BasicScraper +from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre from ..helpers import bounceStarter, indirectStarter from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn +class Hackles(_ParserScraper): + url = 'http://hackles.org/' + stripUrl = url + 'cgi-bin/archives.pl?request=%s' + firstStripUrl = stripUrl % '1' + imageSearch = '//img[contains(@src, "strips/")]' + prevSearch = '//a[text()="< previous"]' + endOfLife = True + + class HagarTheHorrible(_BasicScraper): url = 'http://www.hagarthehorrible.net/' stripUrl = 'http://www.hagardunor.net/comicstrips_us.php?serietype=9&colortype=1&serieno=%s' @@ -107,12 +116,6 @@ class HijinksEnsuePhoto(_WPNaviIn): endOfLife = True -class Housepets(_WordPressScraper): - url = 'http://www.housepetscomic.com/' - stripUrl = url + 'comic/%s/' - firstStripUrl = '2008/06/02/when-boredom-strikes' - - class HowToBeAWerewolf(_ComicControlScraper): url = 'http://howtobeawerewolf.com/' stripUrl = url + 'comic/%s' diff --git a/dosagelib/plugins/i.py b/dosagelib/plugins/i.py index c5493a1cf..979536bb8 100644 --- a/dosagelib/plugins/i.py +++ b/dosagelib/plugins/i.py @@ -35,14 +35,6 @@ class IDreamOfAJeanieBottle(_WordPressScraper): url = 'http://jeaniebottle.com/' -class InsignificantOtters(_WPWebcomic): - stripUrl = 'https://www.thedepthscomic.com/i-otters/%s/' - firstStripUrl = stripUrl % 'page-1' - url = firstStripUrl - imageSearch = '//div[contains(@class, "webcomic-media")]//img' - starter = indirectStarter - - class InternetWebcomic(_WPNavi): url = 'http://www.internet-webcomic.com/' stripUrl = url + '?p=%s' diff --git a/dosagelib/plugins/kemonocafe.py b/dosagelib/plugins/kemonocafe.py index b45668247..38fd68279 100644 --- a/dosagelib/plugins/kemonocafe.py +++ b/dosagelib/plugins/kemonocafe.py @@ -25,10 +25,11 @@ class KemonoCafe(_ParserScraper): def namer(self, imageUrl, pageUrl): # Strip date from filenames filename = imageUrl.rsplit('/', 1)[-1] - if filename[4] == '-' and filename[7] == '-': - filename = filename[10:] - if filename[0] == '-' or filename[0] == '_': - filename = filename[1:] + if not 'ultrarosa' in pageUrl: + if filename[4] == '-' and filename[7] == '-': + filename = filename[10:] + if filename[0] == '-' or filename[0] == '_': + filename = filename[1:] # Fix duplicate filenames if 'paprika' in pageUrl and '69-2' in pageUrl: filename = filename.replace('69', '69-2') @@ -47,6 +48,7 @@ class KemonoCafe(_ParserScraper): def getmodules(cls): return ( cls('AddictiveScience', 'addictivescience', 'page0001'), + cls('Bethellium', 'bethellium', 'c01p00'), cls('CaribbeanBlue', 'cb', 'page000', last='page325'), cls('IMew', 'imew', 'imew00', last='imew50'), cls('Knighthood', 'knighthood', 'kh0001'), @@ -54,7 +56,9 @@ class KemonoCafe(_ParserScraper): cls('LasLindas', 'laslindas', 'll0001', adult=True), cls('Paprika', 'paprika', 'page000'), cls('PracticeMakesPerfect', 'pmp', 'title-001'), + cls('PrincessBunny', 'princessbunny', 'pg001'), cls('Rascals', 'rascals', 'rascals-pg-0', adult=True), cls('TheEyeOfRamalach', 'theeye', 'theeye-page01'), cls('TinaOfTheSouth', 'tots', 'tos-01-01'), + cls('UltraRosa', 'ultrarosa', 'pg001'), ) diff --git a/dosagelib/plugins/m.py b/dosagelib/plugins/m.py index 1fbfb89cd..bef5d6f57 100644 --- a/dosagelib/plugins/m.py +++ b/dosagelib/plugins/m.py @@ -187,6 +187,12 @@ class MonsieurLeChien(_BasicScraper): help = 'Index format: n' +class MonsterSoup(_WordPressScraper): + url = 'https://monstersoupcomic.com/' + stripUrl = url + '?comic=%s' + firstStripUrl = stripUrl % 'chapter-1-cover' + + class Moonlace(_WPWebcomic): url = 'https://moonlace.darkbluecomics.com/' stripUrl = url + 'comic/%s/' diff --git a/dosagelib/plugins/mangadex.py b/dosagelib/plugins/mangadex.py index 7f48d9e0e..ea943edd3 100644 --- a/dosagelib/plugins/mangadex.py +++ b/dosagelib/plugins/mangadex.py @@ -7,63 +7,86 @@ from ..scraper import _ParserScraper class MangaDex(_ParserScraper): - imageSearch = '//img[contains(@class, "_images")]/@data-url' - prevSearch = '//a[contains(@class, "_prevEpisode")]' multipleImagesPerStrip = True - def __init__(self, name, mangaid): + def __init__(self, name, mangaId): super(MangaDex, self).__init__('MangaDex/' + name) - baseUrl = 'https://mangadex.org/api/' - self.url = baseUrl + '?id=%s&type=manga' % str(mangaid) - self.stripUrl = baseUrl + '?id=%s&type=chapter' + baseUrl = 'https://api.mangadex.org/' + self.url = baseUrl + 'manga/%s' % mangaId + self.chaptersUrl = baseUrl + 'manga/%s/feed?translatedLanguage[]=en&order[chapter]=desc&limit=500' % mangaId + self.stripUrl = baseUrl + 'chapter/%s' + self.cdnUrl = baseUrl + 'at-home/server/%s' + self.imageUrl = 'https://uploads.mangadex.org/data/%s/%%s' def starter(self): # Retrieve manga metadata from API - manga = self.session.get(self.url) - manga.raise_for_status() - mangaData = manga.json() + mangaData = self.session.get(self.url) + mangaData.raise_for_status() + manga = mangaData.json()['data'] + + # Retrieve chapter list from API + chapterList = [] + chapterTotal = 1 + chapterOffset = 0 + while len(chapterList) < chapterTotal: + chapterData = self.session.get(self.chaptersUrl + '&offset=%d' % chapterOffset) + chapterData.raise_for_status() + chapterBlock = chapterData.json() + chapterTotal = chapterBlock['total'] + chapterOffset = chapterBlock['offset'] + chapterBlock['limit'] + chapterList.extend(chapterBlock['data']) + # Determine if manga is complete and/or adult - if mangaData['manga']['last_chapter'] != '0': - for ch in mangaData['chapter']: - if mangaData['chapter'][ch]['chapter'] == mangaData['manga']['last_chapter']: + if manga['attributes']['lastChapter'] != '0': + for chapter in chapterList: + if chapter['attributes']['chapter'] == manga['attributes']['lastChapter']: self.endOfLife = True - if mangaData['manga']['hentai'] != '0': + break + + if manga['attributes']['contentRating'] != 'safe': self.adult = True + # Prepare chapter list self.chapters = [] - for ch in mangaData['chapter']: - if mangaData['chapter'][ch]['lang_code'] != 'gb': + for chapter in chapterList: + if chapter['attributes']['chapter'] == '': continue - if len(self.chapters) < 1: - self.chapters.append(ch) + if chapter['attributes']['pages'] == 0: continue - if mangaData['chapter'][ch]['chapter'] == mangaData['chapter'][self.chapters[-1]]['chapter']: + if len(self.chapters) >= 1 and chapter['attributes']['chapter'] == self.chapters[-1]['attributes']['chapter']: continue - if mangaData['chapter'][ch]['chapter'] == '': - continue - self.chapters.append(ch) + self.chapters.append(chapter) self.chapters.reverse() + # Find first and last chapter - self.firstStripUrl = self.stripUrl % self.chapters[0] - return self.stripUrl % self.chapters[-1] + self.firstStripUrl = self.stripUrl % self.chapters[0]['id'] + return self.stripUrl % self.chapters[-1]['id'] def getPrevUrl(self, url, data): - chapter = url.replace('&type=chapter', '').rsplit('=', 1)[-1] - return self.stripUrl % self.chapters[self.chapters.index(chapter) - 1] + # Determine previous chapter ID from cached list + chapterId = url.rsplit('/', 1)[-1] + chapter = list(filter(lambda c: c['id'] == chapterId, self.chapters)) + if len(chapter) == 0: + return None + return self.stripUrl % self.chapters[self.chapters.index(chapter[0]) - 1]['id'] def fetchUrls(self, url, data, urlSearch): # Retrieve chapter metadata from API chapterData = json.loads(data.text_content()) - self.chapter = chapterData['chapter'] + self.chapter = chapterData['data'] + cdnData = self.session.get(self.cdnUrl % self.chapter['id']) + cdnData.raise_for_status() + cdnBlock = cdnData.json() + # Save link order for position-based filenames - imageUrl = chapterData['server'] + chapterData['hash'] + '/%s' - self.imageUrls = [imageUrl % page for page in chapterData['page_array']] + imageUrl = self.imageUrl % cdnBlock['chapter']['hash'] + self.imageUrls = [imageUrl % page for page in cdnBlock['chapter']['data']] return self.imageUrls def namer(self, imageUrl, pageUrl): # Construct filename from episode number and page index in array - chapterNum = self.chapter + chapterNum = self.chapter['attributes']['chapter'] if self.chapter['attributes']['chapter'] != None else 0 pageNum = self.imageUrls.index(imageUrl) pageExt = imageUrl.rsplit('.')[-1] return '%s-%02d.%s' % (chapterNum, pageNum, pageExt) @@ -71,44 +94,58 @@ class MangaDex(_ParserScraper): @classmethod def getmodules(cls): return ( - cls('AttackonTitan', 429), - cls('Beastars', 20523), - cls('BokuNoKokoroNoYabaiYatsu', 23811), - cls('DeliciousinDungeon', 13871), - cls('DragonDrive', 5165), - cls('FuguushokuKajishiDakedoSaikyouDesu', 56319), - cls('GanbareDoukiChan', 46585), - cls('HangingOutWithAGamerGirl', 42490), - cls('HoriMiya', 6770), - cls('HowToOpenATriangularRiceball', 19305), - cls('InterspeciesReviewers', 20796), - cls('JahySamaWaKujikenai', 22369), - cls('JingaiNoYomeToIchaIchaSuru', 22651), - cls('KawaiiJoushiWoKomarasetai', 17910), - cls('KanojoOkarishimasu', 22151), - cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', 33797), - cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', 25495), - cls('ModernMoGal', 30308), - cls('MyTinySenpaiFromWork', 43610), - cls('OMaidensinYourSavageSeason', 22030), - cls('OokamiShounenWaKyouMoUsoOKasaneru', 14569), - cls('OokamiToKoshinryou', 1168), - cls('OtomeYoukaiZakuro', 4533), - cls('OversimplifiedSCP', 32834), - cls('PashiriNaBokuToKoisuruBanchouSan', 25862), - cls('PleaseDontBullyMeNagatoro', 22631), - cls('PleaseDontBullyMeNagatoroComicAnthology', 31004), - cls('PleaseTellMeGalkochan', 12702), - cls('SaekiSanWaNemutteru', 28834), - cls('SenpaiGaUzaiKouhaiNoHanashi', 23825), - cls('SewayakiKitsuneNoSenkoSan', 22723), - cls('SousouNoFrieren', 48045), - cls('SwordArtOnline', 1360), - cls('SwordArtOnlineProgressive', 9604), - cls('TamenDeGushi', 13939), - cls('TheWolfAndRedRidingHood', 31079), - cls('TomoChanWaOnnanoko', 15722), - cls('TonikakuKawaii', 23439), - cls('YotsubaAnd', 311), - cls('YuYuHakusho', 1738), + cls('AttackOnTitan', '304ceac3-8cdb-4fe7-acf7-2b6ff7a60613'), + cls('Beastars', 'f5e3baad-3cd4-427c-a2ec-ad7d776b370d'), + cls('BokuNoKokoroNoYabaiYatsu', '3df1a9a3-a1be-47a3-9e90-9b3e55b1d0ac'), + cls('CheerfulAmnesia', 'f9448f90-c068-4b6a-8c85-03d739aef255'), + cls('DoChokkyuuKareshiXKanojo', 'efb62763-c940-4495-aba5-69c192a999a4'), + cls('DeliciousinDungeon', 'd90ea6cb-7bc3-4d80-8af0-28557e6c4e17'), + cls('DragonDrive', '5c06ae70-b5cf-431a-bcd5-262a411de527'), + cls('FuguushokuKajishiDakedoSaikyouDesu', '17b3b648-fd89-4a69-9a42-6068ffbfa7a7'), + cls('GanbareDoukiChan', '190616bc-7da6-45fd-abd4-dd2ca656c183'), + cls('HangingOutWithAGamerGirl', 'de9e3b62-eac5-4c0a-917d-ffccad694381'), + cls('HoriMiya', 'a25e46ec-30f7-4db6-89df-cacbc1d9a900'), + cls('HowToOpenATriangularRiceball', '6ebd90ce-d5e8-49c0-a4bc-e02e0f8ecb93'), + cls('HunterXHunter', 'db692d58-4b13-4174-ae8c-30c515c0689c'), + cls('IchaichasuruToOkaneGaWaichauFutariNoHanashi', '8eaaec7d-7aa7-490e-8d52-5a3d0a28e78b'), + cls('InterspeciesReviewers', '1b2fddf9-1385-4f3c-b37a-cf86a9428b1a'), + cls('JahySamaWaKujikenai', '2f4e5f5b-d930-4266-8c8a-c4cf9a81e51f'), + cls('JingaiNoYomeToIchaIchaSuru', '809d2493-df3c-4e72-a57e-3e0026cae9fb'), + cls('KaetteKudasaiAkutsuSan', '737a846b-2e67-4d63-9f7e-f54b3beebac4'), + cls('KawaiiJoushiWoKomarasetai', '23b7cc7a-df89-4049-af28-1fa78f88713e'), + cls('KanojoOkarishimasu', '32fdfe9b-6e11-4a13-9e36-dcd8ea77b4e4'), + cls('KoiToUtatane', 'f7d40a27-e289-45b3-9c68-d1cb251897e6'), + cls('Lv2KaraCheatDattaMotoYuushaKouhoNoMattariIsekaiLife', '58bc83a0-1808-484e-88b9-17e167469e23'), + cls('MaouNoOreGaDoreiElfWoYomeNiShitandaGaDouMederebaIi', '55ace2fb-e157-4d76-9e72-67c6bd762a39'), + cls('ModernMoGal', 'b1953f80-36f7-492c-b0f8-e9dd0ad01752'), + cls('MyTinySenpaiFromWork', '28ed63af-61f8-43af-bac3-762030c72963'), + cls('OMaidensinYourSavageSeason', 'c4613b7d-7a6e-48f9-82f0-bce3dd33383a'), + cls('OokamiShounenWaKyouMoUsoOKasaneru', '5e77d9e2-2e44-431a-a995-5fefd411e55e'), + cls('OokamiToKoshinryou', 'de900fd3-c94c-4148-bbcb-ca56eaeb57a4'), + cls('OtomeYoukaiZakuro', 'c1fa97be-0f1f-4686-84bc-806881c97d53'), + cls('OversimplifiedSCP', 'e911fe33-a9b3-43dc-9eb7-f5ee081a6dc8'), + cls('PashiriNaBokuToKoisuruBanchouSan', '838e5b3a-51c8-44cf-b6e2-68193416f6fe'), + cls('PleaseDontBullyMeNagatoro', 'd86cf65b-5f6c-437d-a0af-19a31f94ec55'), + cls('PleaseDontBullyMeNagatoroComicAnthology', '2a4bc9ec-2d70-428a-8b46-27f6218ed267'), + cls('PleaseTellMeGalkochan', '7a2f2f6b-a6a6-4149-879b-3fc2f6916549'), + cls('RebuildWorld', '99182618-ae92-4aec-a5df-518659b7b613'), + cls('SaekiSanWaNemutteru', 'd9aecdab-8aef-4b90-98d5-32e86faffb28'), + cls('SeijoSamaIieToorisugariNoMamonotsukaiDesu', 'd4c40e73-251a-4bcb-a5a6-1edeec1e00e7'), + cls('SenpaiGaUzaiKouhaiNoHanashi', 'af38f328-8df1-4b4c-a272-e737625c3ddc'), + cls('SewayakiKitsuneNoSenkoSan', 'c26269c7-0f5d-4966-8cd5-b79acb86fb7a'), + cls('ShinNoJitsuryokuWaGirigiriMadeKakushiteIyouToOmou', '22fda941-e603-4601-a536-c3ad6d004ba8'), + cls('SoloLeveling', '32d76d19-8a05-4db0-9fc2-e0b0648fe9d0'), + cls('SousouNoFrieren', 'b0b721ff-c388-4486-aa0f-c2b0bb321512'), + cls('SwordArtOnline', '3dd0b814-23f4-4342-b75b-f206598534f6'), + cls('SwordArtOnlineProgressive', '22ea3f54-11e4-4932-a527-89d63d3a62d9'), + cls('TadokoroSan', '8ffbfa2f-23fa-4490-848e-942581a4d873'), + cls('TamenDeGushi', '3f1453fb-9dac-4aca-a2ea-69613856c952'), + cls('TenseiShitaraSlimeDattaKen', 'e78a489b-6632-4d61-b00b-5206f5b8b22b'), + cls('TheNewGate', 'b41bef1e-7df9-4255-bd82-ecf570fec566'), + cls('TheWolfAndRedRidingHood', 'a7d1283b-ed38-4659-b8bc-47bfca5ccb8a'), + cls('TomoChanWaOnnanoko', '76ee7069-23b4-493c-bc44-34ccbf3051a8'), + cls('TonikakuKawaii', '30f3ac69-21b6-45ad-a110-d011b7aaadaa'), + cls('UramikoiKoiUramikoi', '009b6788-48f3-4e78-975c-097f54def7ab'), + cls('YotsubaAnd', '58be6aa6-06cb-4ca5-bd20-f1392ce451fb'), + cls('YuYuHakusho', '44a5cbe1-0204-4cc7-a1ff-0fda2ac004b6'), ) diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 2fb7f4009..2c45ddc53 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -152,7 +152,7 @@ class NineToNine(_ParserScraper): url = 'https://www.tigerknight.com/99' stripUrl = url + '/%s' firstStripUrl = stripUrl % '2014-01-01' - imageSearch = '//img[@class="comic-image"]' + imageSearch = '//img[d:class("comic-image")]' prevSearch = '//a[./span[contains(text(), "Previous")]]' multipleImagesPerStrip = True diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 1ba2a7546..da12ea04d 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -617,12 +617,15 @@ class Removed(Scraper): cls('ComicFury/GreenerGrass'), cls('ComicFury/HelloWanderingStar'), cls('ComicFury/Hodgemosh'), + cls('ComicFury/Kitsune'), cls('ComicFury/LaszloAndEdgar'), cls('ComicFury/MegamanComic'), cls('ComicFury/PatchworkPeople'), cls('ComicFury/PornographyInFiveActs'), cls('ComicFury/PoussireDeFe'), cls('ComicFury/RED'), + cls('ComicFury/ResNullius'), + cls('ComicFury/ResNulliusCS'), cls('ComicFury/Seed'), cls('ComicFury/TheAcryden'), cls('ComicFury/TheHourlyComic'), @@ -955,6 +958,7 @@ class Removed(Scraper): cls('MangaDex/HeavensDesignTeam', 'legal'), cls('MangaDex/SPYxFAMILY', 'legal'), cls('Ryugou'), + cls('SeelPeel'), cls('SmackJeeves/20TimesKirby'), cls('SmackJeeves/2Kingdoms'), cls('SmackJeeves/355Days'), @@ -1521,11 +1525,16 @@ class Removed(Scraper): cls('Shivae/CafeAnime'), cls('Shivae/Extras'), cls('SnafuComics/Titan'), + cls('StudioKhimera/Eorah', 'mov'), cls('StuffNoOneToldMe'), + cls('TaleOfTenThousand'), cls('TheCyantianChronicles/CookieCaper'), cls('TheCyantianChronicles/Pawprints'), + cls('VGCats/Adventure'), + cls('VGCats/Super'), cls('VictimsOfTheSystem'), cls('WebDesignerCOTW'), + cls('WintersLight'), ) @@ -1560,6 +1569,7 @@ class Renamed(Scraper): cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'), cls('ComicFury/ICanSeeYourFeels', 'ComicFury/SeeYourFeels'), cls('ComicFury/MAGISAupdatesMonWedFri', 'ComicFury/MAGISAPARASAYOupdatesMonFri'), + cls('ComicFury/MonsterSoup', 'MonsterSoup'), cls('ComicFury/OopsComicAdventure', 'OopsComicAdventure'), cls('ComicFury/ThomasAndZachary', 'ComicFury/ThomasAndZacharyArchives'), cls('ComicGenesis/TheLounge', 'KeenSpot/TheLounge'), @@ -1601,18 +1611,24 @@ class Renamed(Scraper): cls('ZebraGirl', 'ComicFury/ZebraGirl'), # Renamed in 3.0 + cls('AHClub', 'RickGriffinStudios/AHClub'), cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'), + cls('DeerMe', 'ComicFury/DeerMe'), cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'), cls('GoComics/Cathy', 'GoComics/CathyClassics'), cls('GoComics/Owlturd', 'GoComics/ShenComix'), cls('GoComics/PeanutsEnEspanol', 'GoComics/SnoopyEnEspanol'), cls('GoComics/RipleysBelieveItOrNotSpanish', 'GoComics/RipleysAunqueUstedNoLoCrea'), cls('GoComics/WebcomicName', 'WebcomicName'), + cls('Guardia', 'ComicFury/Guardia'), cls('Shivae/BlackRose', 'BlackRose'), cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'), cls('SmackJeeves/ByTheBook', 'ByTheBook'), cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'), cls('SmackJeeves/GrowingTroubles', 'ComicFury/GrowingTroubles'), cls('SmackJeeves/TheRealmOfKaerwyn', 'ComicFury/TheRealmOfKaerwyn'), + cls('SoloLeveling', 'MangaDex/SoloLeveling'), cls('StudioKhimera/Draconia', 'Draconia'), + cls('TracesOfThePast', 'RickGriffinStudios/TracesOfThePast'), + cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'), ) diff --git a/dosagelib/plugins/projectfuture.py b/dosagelib/plugins/projectfuture.py index 6a7853bb9..1ebad3055 100644 --- a/dosagelib/plugins/projectfuture.py +++ b/dosagelib/plugins/projectfuture.py @@ -33,8 +33,9 @@ class ProjectFuture(_ParserScraper): cls('Emily', 'emily', '01-00'), cls('FishingTrip', 'fishing', '01-00'), cls('HeadsYouLose', 'heads', '00-01', last='07-12'), + cls('IPanther', 'panther', '00'), cls('NiallsStory', 'niall', '00'), - cls('ProjectFuture', 'strip', '0'), + cls('ProjectFuture', 'strip', '0', last='664'), cls('RedValentine', 'redvalentine', '1', last='6'), cls('ShortStories', 'shorts', '01-00'), cls('StrangeBedfellows', 'bedfellows', '1', last='6'), @@ -46,4 +47,5 @@ class ProjectFuture(_ParserScraper): cls('TheSierraChronicles', 'sierra', '0', last='29'), cls('TheTuppenyMan', 'tuppenny', '00', last='16'), cls('TurningANewPage', 'azrael', '1', last='54'), + cls('Xerian', 'xerian', '01-00'), ) diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py index 95725338e..69947e968 100644 --- a/dosagelib/plugins/r.py +++ b/dosagelib/plugins/r.py @@ -116,9 +116,8 @@ class Requiem(_WordPressScraper): class Replay(_ParserScraper): url = 'http://replaycomic.com/' stripUrl = url + 'comic/%s/' - url = stripUrl % 'trying-it-out' firstStripUrl = stripUrl % 'red-desert' - imageSearch = '//div[@id="comic"]//img' + imageSearch = '//div[@id="comic"]//img[@alt]' prevSearch = '//a[contains(@class, "comic-nav-previous")]' nextSearch = '//a[contains(@class, "comic-nav-next")]' diff --git a/dosagelib/plugins/rickgriffinstudios.py b/dosagelib/plugins/rickgriffinstudios.py new file mode 100644 index 000000000..c57ae6dc9 --- /dev/null +++ b/dosagelib/plugins/rickgriffinstudios.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2019-2020 Tobias Gruetzmacher +# Copyright (C) 2019-2022 Daniel Ring +from ..helpers import indirectStarter +from .common import _WordPressScraper, _WPNaviIn + + +class Housepets(_WordPressScraper): + url = 'http://www.housepetscomic.com/' + stripUrl = url + 'comic/%s/' + firstStripUrl = '2008/06/02/when-boredom-strikes' + + +class RickGriffinStudios(_WPNaviIn): + baseUrl = 'http://rickgriffinstudios.com/' + stripUrl = baseUrl + 'comic-post/%s/' + latestSearch = '//a[contains(@title, "Permanent Link")]' + starter = indirectStarter + nav = None + + def __init__(self, name, sub, first, last=None, adult=False, nav=None): + super().__init__('RickGriffinStudios/' + name) + self.url = self.baseUrl + sub + '/' + self.firstStripUrl = self.stripUrl % first + + if last: + self.url = self.stripUrl % last + self.starter = super(RickGriffinStudios, self).starter + self.endOfLife = True + + if adult: + self.latestSearch = '//a[contains(@title, "NSFW")]' + self.adult = True + + if nav: + self.nav = nav + + def getPrevUrl(self, url, data): + # Links between chapters + url = url.rstrip('/').rsplit('/', 1)[-1] + if self.nav and url in self.nav: + return self.stripUrl % self.nav[url] + return super(RickGriffinStudios, self).getPrevUrl(url, data) + + @classmethod + def getmodules(cls): + return ( + cls('AHClub', 'ah-club', 'cover', nav={ + 'ah-club-2-cover': 'ah-club-1-page-24', + 'ah-club-3-cover': 'ah-club-2-page-28', + 'ah-club-4-cover': 'ah-club-3-page-22', + 'ah-club-5-cover': 'ah-club-4-page-24' + }), + cls('HayvenCelestia', 'hayven-celestia', 'skinchange-p1'), + cls('TheStoryboard', 'the-storyboard', 'the-storyboard-001'), + cls('TracesOfThePast', 'in-the-new-age', 'totp-page-1'), + cls('TracesOfThePastNSFW', 'in-the-new-age', 'totp-page-1-nsfw', adult=True), + cls('ZootopiaNightTerrors', 'zootopia-night-terrors', 'zootopia-night-terrors-p1', 'zootopia-night-terrors-p7'), + ) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index e6e07e6b5..a543977dd 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -156,13 +156,6 @@ class Science(_ParserScraper): endOfLife = True -class SeelPeel(_WPNaviIn): - url = 'https://seelpeel.com/' - stripUrl = url + 'comic/%s/' - firstStripUrl = stripUrl % 'seelpeel-goes-live' - multipleImagesPerStrip = True - - class SequentialArt(_ParserScraper): url = 'https://www.collectedcurios.com/sequentialart.php' stripUrl = url + '?s=%s' @@ -216,6 +209,12 @@ class Sheldon(_BasicScraper): help = 'Index format: yymmdd' +class Shifters(_WPNavi): + url = 'http://shiftersonline.com/' + stripUrl = url + '%s/' + firstStripUrl = stripUrl % 'shifters-redux-promo' + + class ShipInABottle(_WordPressScraper): url = 'http://shipinbottle.pepsaga.com/' stripUrl = url + '?p=%s' @@ -356,66 +355,6 @@ class SodiumEyes(_WordPressScraper): endOfLife = True -class SoloLeveling(_ParserScraper): - url = 'https://w3.sololeveling.net/' - stripUrl = url + 'manga/solo-leveling-chapter-%s/' - firstStripUrl = stripUrl % '1' - imageSearch = '//div[@class="img_container"]//img' - prevSearch = '//a[@rel="prev"]' - latestSearch = '//table[@class="chap_tab"]//a' - starter = indirectStarter - multipleImagesPerStrip = True - imageUrlFixes = { - '94-0_5dd574efda419/28.': '94-0_5dd574efda419/28a.', - '92-0_5dc2fcb9ed562/22.': '92-0_5dc2fcb9ed562/22s.', - '91-0_5db9b881ac2f0/20k.': '91-0_5db9b881ac2f0/20l.', - '91-0_5db9b881ac2f0/23.': '91-0_5db9b881ac2f0/23a.', - '90-0_5db08467ca2b1/07.': '90-0_5db08467ca2b1/07a.', - '90-0_5db08467ca2b1/09.': '90-0_5db08467ca2b1/09a.', - '90-0_5db08467ca2b1/13.': '90-0_5db08467ca2b1/13a.', - '90-0_5db08467ca2b1/14.': '90-0_5db08467ca2b1/14a.', - '90-0_5db08467ca2b1/21.': '90-0_5db08467ca2b1/21a.', - '90-0_5db08467ca2b1/22.': '90-0_5db08467ca2b1/22a.', - '88-0_5d9e0dedb942e/03.': '88-0_5d9e0dedb942e/03b.', - '88-0_5d9e0dedb942e/05.': '88-0_5d9e0dedb942e/05a.', - '88-0_5d9e0dedb942e/30.': '88-0_5d9e0dedb942e/30a.', - '87-0_5d94cdebd9df7/01a.': '87-0_5d94cdebd9df7/01c.', - } - - def imageUrlModifier(self, imageUrl, data): - if 'url=' in imageUrl: - imageUrl = imageUrl.split('url=')[1].split('&')[0] - for fix in self.imageUrlFixes: - imageUrl = imageUrl.replace(fix, self.imageUrlFixes[fix]) - return imageUrl - - def fetchUrls(self, url, data, urlSearch): - # Save link order for position-based filenames - self.imageUrls = super(SoloLeveling, self).fetchUrls(url, data, urlSearch) - self.imageUrls = [self.imageUrlModifier(x, data) for x in self.imageUrls] - return self.imageUrls - - def getPage(self, url): - try: - return super().getPage(url) - except HTTPError as e: - # CloudFlare WAF - if e.response.status_code == 403 and '1020' in e.response.text: - self.geoblocked() - else: - raise e - - def getPrevUrl(self, url, data): - return self.stripUrl % str(int(url.strip('/').rsplit('-', 1)[-1]) - 1) - - def namer(self, imageUrl, pageUrl): - # Construct filename from episode number and image position on page - episodeNum = pageUrl.strip('/').rsplit('-', 1)[-1] - imageNum = self.imageUrls.index(imageUrl) - imageExt = imageUrl.rsplit('.', 1)[-1] - return "%s-%03d.%s" % (episodeNum, imageNum, imageExt) - - class SomethingPositive(_ParserScraper): url = 'https://www.somethingpositive.net/' stripUrl = url + 'sp%s.shtml' @@ -660,6 +599,19 @@ class StrongFemaleProtagonist(_ParserScraper): ) +class StupidFox(_ParserScraper): + url = 'http://stupidfox.net/' + stripUrl = url + '%s' + firstStripUrl = stripUrl % 'hello' + imageSearch = '//div[@class="comicmid"]//img' + prevSearch = '//a[@accesskey="p"]' + + def namer(self, imageUrl, pageUrl): + page = self.getPage(pageUrl) + title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-') + return title + '.' + imageUrl.rsplit('.', 1)[-1] + + class SuburbanJungle(_ParserScraper): url = 'http://suburbanjungleclassic.com/' stripUrl = url + '?p=%s' @@ -675,8 +627,9 @@ class SuburbanJungleRoughHousing(_WordPressScraper): class Supercell(_ParserScraper): - url = 'https://www.supercellcomic.com/' - stripUrl = url + 'pages/%s.html' + baseUrl = 'https://www.supercellcomic.com/' + url = baseUrl + 'latest.html' + stripUrl = baseUrl + 'pages/%s.html' firstStripUrl = stripUrl % '0001' imageSearch = '//img[@class="comicStretch"]' prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]' @@ -707,6 +660,6 @@ class SwordsAndSausages(_ParserScraper): url = 'https://www.tigerknight.com/ss' stripUrl = url + '/%s' firstStripUrl = stripUrl % '1-1' - imageSearch = '//img[@class="comic-image"]' + imageSearch = '//img[d:class("comic-image")]' prevSearch = '//a[./span[contains(text(), "Previous")]]' multipleImagesPerStrip = True diff --git a/dosagelib/plugins/studiokhimera.py b/dosagelib/plugins/studiokhimera.py index 267a578f4..66dd0447c 100644 --- a/dosagelib/plugins/studiokhimera.py +++ b/dosagelib/plugins/studiokhimera.py @@ -43,16 +43,17 @@ class StudioKhimera(_ParserScraper): @classmethod def getmodules(cls): return ( - cls('Eorah', 'eorah'), cls('Mousechievous', 'mousechievous'), ) class UberQuest(_WordPressScraper): name = 'StudioKhimera/UberQuest' - url = 'https://uberquest.studiokhimera.com/' - stripUrl = url + 'comic/%s/' - firstStripUrl = stripUrl % 'chapter-1-cover' + stripUrl = 'https://uberquest.studiokhimera.com/comic/page/%s/' + url = stripUrl % 'latest' + firstStripUrl = stripUrl % 'cover' + imageSearch = '//div[@class="prj--comic-image"]/img' + prevSearch = '//uq-image-button[d:class("prj--comic-control-prev")]' def namer(self, imageUrl, pageUrl): # Fix inconsistent filenames diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 0792f63e1..7e8c1b369 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -23,14 +23,6 @@ class TailsAndTactics(_ParserScraper): prevSearch = '//a[text()=" Back"]' -class TaleOfTenThousand(_ParserScraper): - url = 'http://www.t10k.club/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % '1-01_00' - imageSearch = '//article[@id="comic"]//img' - prevSearch = '//a[d:class("prev")]' - - class TekMage(_WPNavi): url = 'https://tekmagecomic.com/' stripUrl = url + 'comic/%s/' @@ -61,6 +53,26 @@ class TheBrads(_ParserScraper): multipleImagesPerStrip = True endOfLife = True +class TheChroniclesOfHuxcyn(_WordPressScraper): + url = 'https://huxcyn.com/' + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % 'opening-001' + + def namer(self, imageUrl, pageUrl): + # Fix inconsistent filenames + filename = imageUrl.rsplit('/', 1)[-1] + filename = filename.replace('IMG_0504', 'TCoH109') + filename = filename.replace('tcoh', 'TCoH') + filename = filename.replace('1599151639.xizana_f3a6458e-8d94-4259-bec3-5a92706fe493_jpeg', 'october.2020.cover') + filename = filename.replace('huxonsword', 'october.2020.huxonsword') + filename = filename.replace('New_Canvas100pageswebimage', 'TCoH100') + if filename[0] == '0': + filename = 'TCoH' + filename + elif filename[0] == '3': + pagenum = int(filename.rsplit('.', 1)[0].split('_', 1)[1].split('_', 1)[0]) + filename = 'TCoH' + str(40 + pagenum) + filename.rsplit('.', 1)[-1] + return filename + class TheClassMenagerie(_ParserScraper): stripUrl = 'http://www.theclassm.com/d/%s.html' @@ -278,26 +290,6 @@ class ToonHole(_WordPressScraper): return url in (self.url + "comic/if-game-of-thrones-was-animated/",) -class TracesOfThePast(_WPNaviIn): - baseUrl = 'http://rickgriffinstudios.com/' - url = baseUrl + 'in-the-new-age/' - stripUrl = baseUrl + 'comic-post/%s/' - firstStripUrl = stripUrl % 'totp-page-1' - latestSearch = '//a[contains(@title, "Permanent Link")]' - starter = indirectStarter - - -class TracesOfThePastNSFW(_WPNaviIn): - name = 'TracesOfThePast/NSFW' - baseUrl = 'http://rickgriffinstudios.com/' - url = baseUrl + 'in-the-new-age/' - stripUrl = baseUrl + 'comic-post/%s/' - firstStripUrl = stripUrl % 'totp-page-1-nsfw' - latestSearch = '//a[contains(@title, "NSFW")]' - starter = indirectStarter - adult = True - - class TrippingOverYou(_BasicScraper): url = 'http://www.trippingoveryou.com/' stripUrl = url + 'comic/%s' diff --git a/dosagelib/plugins/tapastic.py b/dosagelib/plugins/tapastic.py new file mode 100644 index 000000000..ae840de56 --- /dev/null +++ b/dosagelib/plugins/tapastic.py @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2019-2020 Tobias Gruetzmacher +# Copyright (C) 2019-2020 Daniel Ring +from ..scraper import _ParserScraper +from ..helpers import indirectStarter + + +class Tapastic(_ParserScraper): + baseUrl = 'https://tapas.io/' + imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src' + prevSearch = '//a[contains(@class, "js-prev-ep-btn")]' + latestSearch = '//ul[contains(@class, "js-episode-list")]//a' + multipleImagesPerStrip = True + + def __init__(self, name, url): + super(Tapastic, self).__init__('Tapastic/' + name) + self.url = self.baseUrl + 'series/' + url + '/info' + self.stripUrl = self.baseUrl + 'episode/%s' + + def starter(self): + # Retrieve comic metadata from info page + info = self.getPage(self.url) + series = info.xpath('//@data-series-id')[0] + # Retrieve comic metadata from API + data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST') + data.raise_for_status() + episodes = data.json()['data']['body'] + return self.stripUrl % episodes.split('data-id="')[1].split('"')[0] + + def getPrevUrl(self, url, data): + # Retrieve comic metadata from API + data = self.session.get(url + '/info') + data.raise_for_status() + apiData = data.json()['data'] + if apiData['scene'] == 2: + self.firstStripUrl = self.stripUrl % apiData['prev_ep_id'] + return self.stripUrl % apiData['prev_ep_id'] + + def fetchUrls(self, url, data, urlSearch): + # Save link order for position-based filenames + self.imageUrls = super().fetchUrls(url, data, urlSearch) + return self.imageUrls + + def namer(self, imageUrl, pageUrl): + # Construct filename from episode number and image position on page + episodeNum = pageUrl.rsplit('/', 1)[-1] + imageNum = self.imageUrls.index(imageUrl) + imageExt = pageUrl.rsplit('.', 1)[-1] + if len(self.imageUrls) > 1: + filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt) + else: + filename = "%s.%s" % (episodeNum, imageExt) + return filename + + @classmethod + def getmodules(cls): + return ( + # Manually-added comics + cls('AmpleTime', 'Ample-Time'), + cls('InsignificantOtters', 'IOtters'), + cls('NoFuture', 'NoFuture'), + cls('OrensForge', 'OrensForge'), + cls('RavenWolf', 'RavenWolf'), + cls('SyntheticInstinct', 'Synthetic-Instinct'), + cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'), + cls('VenturaCityDrifters', 'Ventura-City-Drifters'), + + # START AUTOUPDATE + # END AUTOUPDATE + ) diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index 0ef199e12..983d565f5 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -4,6 +4,8 @@ # Copyright (C) 2015-2020 Tobias Gruetzmacher # Copyright (C) 2019-2020 Daniel Ring from re import compile +from urllib.parse import urljoin +from lxml import etree from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter @@ -27,7 +29,21 @@ class Undertow(_BasicScraper): class unDivine(_ComicControlScraper): - url = 'http://undivinecomic.com/' + url = 'https://www.undivinecomic.com/' + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % 'page-1' + + def namer(self, imageUrl, pageUrl): + # Fix inconsistent filenames + filename = imageUrl.rsplit('/', 1)[-1].replace(' ', '-') + filename = filename.replace('10B311D9-0992-4D74-AEB8-DAB714DA67C6', 'UD-322') + filename = filename.replace('99266624-7EF7-4E99-9EC9-DDB5F59CBDFD', 'UD-311') + filename = filename.replace('33C6A5A1-F703-4A0A-BCD5-DE1A09359D8E', 'UD-310') + filename = filename.replace('6CE01E81-C299-43C7-A221-8DE0670EFA30', 'ch4endbonusq4') + filename = filename.replace('DB66D93B-1FE5-49C7-90E0-FFF981DCD6B3', 'bipolar') + if len(filename) > 15 and filename[0].isdigit() and filename[10] == '-': + filename = filename[11:] + return filename class UnicornJelly(_BasicScraper): @@ -45,13 +61,29 @@ class Unsounded(_ParserScraper): startUrl = url + 'comic+index/' stripUrl = url + 'comic/ch%s/ch%s_%s.html' firstStripUrl = stripUrl % ('01', '01', '01') - imageSearch = '//img[contains(@src, "pageart/")]' + imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[d:class("back")]' latestSearch = '//div[@id="chapter_box"][1]//a[last()]' multipleImagesPerStrip = True starter = indirectStarter help = 'Index format: chapter-page' + def fetchUrls(self, url, data, urlSearch): + imageUrls = super(Unsounded, self).fetchUrls(url, data, urlSearch) + # Include background for multi-image pages + imageRegex = compile(r'background-image: url\((pageart/.*)\)') + for match in imageRegex.finditer(str(etree.tostring(data))): + print(match) + searchUrls.append(normaliseURL(urljoin(data[1], match.group(1)))) + return imageUrls + + def namer(self, imageUrl, pageUrl): + filename = imageUrl.rsplit('/', 1)[-1] + pagename = pageUrl.rsplit('/', 1)[-1] + if pagename.split('.', 1)[0] != filename.split('.', 1)[0]: + filename = pagename.split('_', 1)[0] + '_' + filename + return filename + def getPrevUrl(self, url, data): # Fix missing navigation links between chapters if 'ch13/you_let_me_fall' in url: diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index d375a7ee0..e807882e8 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -8,13 +8,22 @@ from ..helpers import bounceStarter, indirectStarter class VampireHunterBoyfriends(_ParserScraper): - url = 'https://boneitiscomics.com/vhb.php' - stripUrl = url + '?pg=%s' - firstStripUrl = stripUrl % '1' - imageSearch = '//div[@class="page"]//img' - prevSearch = '//a[@id="prev"]' + baseUrl = 'https://boneitisindustries.com/' + url = baseUrl + 'comics/vampire-hunter-boyfriends/' + stripUrl = baseUrl + 'comic/%s/' + firstStripUrl = stripUrl % 'vampire-hunter-boyfriends-chapter-1-cover' + imageSearch = '//div[@id="content"]//img[d:class("size-full")]' + prevSearch = '//a[./span[d:class("ticon-chevron-left")]]' adult = True + def starter(self): + archivePage = self.getPage(self.url) + self.archive = archivePage.xpath('//div[contains(@class, "vcex-portfolio-grid")]//a/@href') + return self.archive[-1] + + def getPrevUrl(self, url, data): + return self.archive[self.archive.index(url) - 1] + class Vexxarr(_ParserScraper): baseUrl = 'http://www.vexxarr.com/' @@ -40,20 +49,6 @@ class VGCats(_ParserScraper): help = 'Index format: n (unpadded)' -class VGCatsAdventure(VGCats): - name = 'VGCats/Adventure' - url = 'http://www.vgcats.com/ffxi/' - stripUrl = url + '?strip_id=%s' - imageSearch = '//p/img[contains(@src, "images/")]' - - -class VGCatsSuper(VGCats): - name = 'VGCats/Super' - url = 'http://www.vgcats.com/super/' - stripUrl = url + '?strip_id=%s' - imageSearch = '//p/img[contains(@src, "images/")]' - - class VickiFox(_ParserScraper): url = 'http://www.vickifox.com/comic/strip' stripUrl = url + '?id=%s' diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index 3f8f80cf1..63d5657cc 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -133,14 +133,6 @@ class WildeLife(_ComicControlScraper): firstStripUrl = stripUrl % '1' -class WintersLight(_ParserScraper): - url = 'https://winterslight.anaria.net/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'winterslight00' - imageSearch = '//img[contains(@src, "comic/pages/")]' - prevSearch = '//a[contains(text(), "Previous")]' - - class Wonderella(_BasicScraper): url = 'http://nonadventures.com/' rurl = escape(url) @@ -232,9 +224,3 @@ class WormWorldSagaGerman(WormWorldSaga): class WormWorldSagaSpanish(WormWorldSaga): lang = 'es' - - -class Wrongside(_WPNavi): - url = 'http://www.ayzewi.com/comic/' - stripUrl = url + '?comic=%s' - firstStripUrl = stripUrl % 'intro-2' diff --git a/dosagelib/plugins/webtoons.py b/dosagelib/plugins/webtoons.py index 2a2c6fe91..e9c9ccb9d 100644 --- a/dosagelib/plugins/webtoons.py +++ b/dosagelib/plugins/webtoons.py @@ -115,6 +115,7 @@ class WebToons(_ParserScraper): cls('CityOfBlank', 'sf/city-of-blank', 1895), cls('CityOfWalls', 'drama/city-of-wall', 505), cls('CityVamps', 'challenge/city-vamps-', 119224), + cls('ClawShot', 'challenge/clawshot', 621465), cls('ClusterFudge', 'slice-of-life/cluster-fudge', 355), cls('CodeAdam', 'action/code-adam', 1657), cls('CookingComically', 'tiptoon/cooking-comically', 622), @@ -134,7 +135,7 @@ class WebToons(_ParserScraper): cls('DEADDAYS', 'horror/dead-days', 293), cls('Debunkers', 'challenge/debunkers', 148475), cls('DEEP', 'thriller/deep', 364), - cls('Defects', 'challenge/defects', 221106), + cls('Defects', 'fantasy/defects', 2731), cls('Denma', 'sf/denma', 921), cls('Dents', 'sf/dents', 671), cls('Deor', 'fantasy/deor', 1663), @@ -224,6 +225,7 @@ class WebToons(_ParserScraper): cls('JustPancakes', 'comedy/just-pancakes', 1651), cls('KidsAreAllRight', 'drama/kids-are-all-right', 283), cls('Killstagram', 'thriller/killstagram', 1971), + cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703), cls('KindOfConfidential', 'romance/kind-of-confidential', 663), cls('KindOfLove', 'slice-of-life/kind-of-love', 1850), cls('KnightRun', 'sf/knight-run', 67), @@ -317,6 +319,7 @@ class WebToons(_ParserScraper): cls('RANDOMPHILIA', 'comedy/randomphilia', 386), cls('Rebirth', 'sf/rebirth', 1412), cls('RefundHighSchool', 'fantasy/refundhighschool', 1360), + cls('ReturnToPlayer', 'action/return-to-player', 2574), cls('RiseFromAshes', 'supernatural/rise-from-ashes', 959), cls('RoarStreetJournal', 'slice-of-life/roar-street-journal', 397), cls('RoomOfSwords', 'sf/room-of-swords', 1261), @@ -380,12 +383,14 @@ class WebToons(_ParserScraper): cls('TheGirlDownstairs', 'romance/the-girl-downstairs', 1809), cls('THEGIRLFROMCLASS', 'drama/the-girl-from-class', 73), cls('TheGodOfHighSchool', 'action/the-god-of-high-school', 66), + cls('TheGreenhouse', 'challenge/the-greenhouse-gl', 278312), cls('TheKissBet', 'romance/the-kiss-bet', 1617), cls('TheLifeOfTheThreeBears', 'slice-of-life/the-life-of-the-three-bears', 390), cls('ThePurpleHeart', 'super-hero/the-purple-heart', 723), cls('TheRedBook', 'horror/the-red-book', 467), cls('TheRedHook', 'super-hero/the-red-hook', 643), cls('TheRedKing', 'supernatural/the-red-king', 1687), + cls('TheRoomies', 'challenge/the-roomies-archive', 513669), cls('TheShadowProphet', 'drama/the-shadow-prophet', 1881), cls('TheSoundOfYourHeart', 'comedy/the-sound-of-your-heart', 269), cls('TheSteamDragonExpress', 'fantasy/steam-dragon-express', 1270), @@ -412,6 +417,7 @@ class WebToons(_ParserScraper): cls('UnderPrin', 'supernatural/underprin', 78), cls('UnderTheAegis', 'fantasy/under-the-aegis', 436), cls('UnholyBlood', 'supernatural/unholy-blood', 1262), + cls('UnintentionalGame', 'challenge/unintentional-game', 162674), cls('UnknownCaller', 'thriller/ar-toon', 775), cls('UnlovableReplacement', 'romance/unlovable-replacement', 1762), cls('UnluckyIsAsLuckyDoes', 'comedy/unlucky-is-as-lucky-does', 1554), diff --git a/dosagelib/plugins/wrongside.py b/dosagelib/plugins/wrongside.py new file mode 100644 index 000000000..785ed7ec5 --- /dev/null +++ b/dosagelib/plugins/wrongside.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2019-2020 Tobias Gruetzmacher +# Copyright (C) 2019-2022 Daniel Ring +from ..scraper import _ParserScraper +from ..helpers import indirectStarter + + +class Wrongside(_ParserScraper): + baseUrl = 'http://ayzewi.com/maingallery3/' + url = baseUrl + 'index.php?/category/5' + stripUrl = baseUrl + 'picture.php?%s' + firstStripUrl = stripUrl % '/175/category/21' + imageSearch = '//img[@id="theMainImage"]/@src' + prevSearch = '//a[contains(@title, "Previous :")]' + + def starter(self): + archivePage = self.getPage(self.url) + chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href') + self.archive = [] + for chapterUrl in chapterUrls: + chapterPage = self.getPage(chapterUrl) + self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0]) + return self.archive[0] + + def getPrevUrl(self, url, data): + if data.xpath(self.prevSearch) == [] and len(self.archive) > 0: + return self.archive.pop() + return super(Wrongside, self).getPrevUrl(url, data) + + def namer(self, imageUrl, pageUrl): + page = self.getPage(pageUrl) + title = page.xpath('//div[@class="browsePath"]/h2/text()')[0] + return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1] + + +class WrongsideBeginnings(Wrongside): + name = 'Wrongside/Beginnings' + baseUrl = 'http://ayzewi.com/maingallery3/' + url = baseUrl + 'index.php?/category/4' + stripUrl = baseUrl + 'picture.php?%s' + firstStripUrl = stripUrl % '/2/category/18' + + +class WrongsideSideStories(_ParserScraper): + baseUrl = 'http://ayzewi.com/maingallery3/' + stripUrl = baseUrl + 'picture.php?%s' + imageSearch = '//img[@id="theMainImage"]/@src' + prevSearch = '//a[contains(@title, "Previous :")]' + latestSearch = '(//ul[@id="thumbnails"]//a/@href)[last()]' + starter = indirectStarter + + def __init__(self, name, category, first, last=None): + super().__init__('Wrongside/' + name) + self.url = self.baseUrl + 'index.php?/category/' + category + self.firstStripUrl = self.stripUrl % ('/' + first + '/category/' + category) + + if last: + self.endOfLife = True + + @classmethod + def getmodules(cls): + return ( + cls('AnarkisRising', '7', '302'), + cls('CommonsDreams', '9', '324'), + cls('Faith', '11', '349'), + cls('Sarah', '10', '337'), + cls('ThereAreNoAviansHere', '8', '313'), + cls('TheScientificProphet', '13', '358'), + cls('TheStrangers', '12', '361'), + ) + + def namer(self, imageUrl, pageUrl): + page = self.getPage(pageUrl) + title = page.xpath('//div[@class="browsePath"]/h2/text()')[0] + return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1] diff --git a/scripts/generate_json.sh b/scripts/generate_json.sh index c28c21ddc..6ff7617c5 100755 --- a/scripts/generate_json.sh +++ b/scripts/generate_json.sh @@ -9,7 +9,7 @@ d=$(dirname $0) if [ $# -ge 1 ]; then list="$*" else - list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons" + list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons" fi for script in $list; do echo "Executing ${script}.py" diff --git a/scripts/tapastic.py b/scripts/tapastic.py new file mode 100644 index 000000000..d285c024e --- /dev/null +++ b/scripts/tapastic.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (C) 2019-2020 Tobias Gruetzmacher +# Copyright (C) 2019-2020 Daniel Ring +""" +Script to get a list of Tapastic comics and save the info in a +JSON file for further processing. +""" +from urllib.parse import urlsplit, parse_qs + +from scriptutil import ComicListUpdater +from dosagelib.util import check_robotstxt + + +class TapasticUpdater(ComicListUpdater): + def collect_results(self): + # Retrieve the first 10 top comics list pages + url = 'https://tapas.io/comics?browse=ALL&sort_type=LIKE&pageNumber=' + count = 10 + + data = [self.get_url(url + str(i), robot=False) for i in range(0, count)] + for page in data: + for comiclink in page.xpath('//a[@class="preferred title"]'): + comicurl = comiclink.attrib['href'] + name = comiclink.text + self.add_comic(name, comicurl) + + def get_entry(self, name, url): + shortName = name.replace(' ', '').replace('\'', '') + titleNum = int(parse_qs(urlsplit(url).query)['title_no'][0]) + url = url.rsplit('/', 1)[0].replace('/series/', '') + return u"cls('%s', '%s', %d)," % (shortName, url, titleNum) + + +if __name__ == '__main__': + TapasticUpdater(__file__).run() diff --git a/scripts/update_plugins.sh b/scripts/update_plugins.sh index 97bc7d3e6..8ce0a9126 100755 --- a/scripts/update_plugins.sh +++ b/scripts/update_plugins.sh @@ -11,7 +11,7 @@ d=$(dirname $0) if [ $# -ge 1 ]; then list="$*" else - list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons" + list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons" fi for script in $list; do target="${d}/../dosagelib/plugins/${script}.py" diff --git a/tests/test_modules.py b/tests/test_modules.py index 84f11ab58..c6ec1e021 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -7,8 +7,6 @@ import responses import dosagelib.cmd import httpmocks -from dosagelib.plugins.s import SoloLeveling -from dosagelib.scraper import GeoblockedException def cmd(*options): @@ -43,7 +41,11 @@ class TestModules(object): cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol:2012/07/22') @responses.activate + @pytest.mark.skip(reason="SoloeLeveling was removed, so we have no way to test this...") def test_sololeveling_geoblock(self, tmpdir): + from dosagelib.plugins.s import SoloLeveling + from dosagelib.scraper import GeoblockedException + responses.add(responses.GET, 'https://w3.sololeveling.net/', '1020', status=403)