Add site engine for NamirDeiter comics and migrate existing comics

This commit is contained in:
Techwolf 2019-07-11 23:31:22 -07:00 committed by Tobias Gruetzmacher
parent 4795613343
commit 0fed6c2c36
6 changed files with 64 additions and 52 deletions

View file

@ -347,15 +347,6 @@ class Annyseed(_ParserScraper):
return tourl return tourl
class ApartmentForTwo(_ParserScraper):
url = 'https://apartmentfor2.com/'
stripUrl = url + 'comics/index.php?date=%s'
firstStripUrl = url + 'comics/'
imageSearch = '//a/img[contains(@src, "comics/")]'
prevSearch = '//a[./img[contains(@src, "previous")]]'
help = 'Index format: yyyymmdd'
class AntiheroForHire(_ParserScraper): class AntiheroForHire(_ParserScraper):
stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s' stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s'
firstStripUrl = stripUrl % '2016/6/8/entrance-vigil' firstStripUrl = stripUrl % '2016/6/8/entrance-vigil'

View file

@ -19,17 +19,6 @@ class Namesake(_ComicControlScraper):
firstStripUrl = stripUrl % 'the-journey-begins' firstStripUrl = stripUrl % 'the-journey-begins'
class NamirDeiter(_ParserScraper):
baseUrl = 'https://www.namirdeiter.com/comics/'
stripUrl = baseUrl + 'index.php?date=%s'
url = stripUrl % '20150410'
firstStripUrl = baseUrl
imageSearch = '//a/img'
prevSearch = '//a[text()="Previous"]'
endOfLife = True
help = 'Index format: yyyymmdd'
class NatalieDee(_BasicScraper): class NatalieDee(_BasicScraper):
url = 'http://www.nataliedee.com/' url = 'http://www.nataliedee.com/'
rurl = escape(url) rurl = escape(url)
@ -115,15 +104,6 @@ class Nicky510(_WPNavi):
endOfLife = True endOfLife = True
class NicoleAndDerek(_ParserScraper):
url = 'https://nicoleandderek.com/'
stripUrl = url + 'comics/index.php?date=%s'
firstStripUrl = url + 'comics/'
imageSearch = '//a/img[contains(@src, "comics/")]'
prevSearch = '//a[./img[contains(@src, "previous")]]'
help = 'Index format: yyyymmdd'
class Nightshift(_ParserScraper): class Nightshift(_ParserScraper):
url = 'http://www.poecatcomix.com/comics/nightshift/' url = 'http://www.poecatcomix.com/comics/nightshift/'
stripUrl = url + '%s/' stripUrl = url + '%s/'

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2019-2020 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from .common import _ParserScraper
class NamirDeiter(_ParserScraper):
imageSearch = '//img[contains(@src, "comics/")]'
prevSearch = ('//a[@rel="prev"]',
'//a[./img[contains(@src, "previous")]]',
'//a[contains(text(), "Previous")]')
def __init__(self, name, baseUrl, first=None, last=None):
if name == 'NamirDeiter':
super(NamirDeiter, self).__init__(name)
else:
super(NamirDeiter, self).__init__('NamirDeiter/' + name)
self.url = 'https://' + baseUrl + '/'
self.stripUrl = self.url + 'comics/index.php?date=%s'
if first:
self.firstStripUrl = self.stripUrl % first
else:
self.firstStripUrl = self.url + 'comics/'
if last:
self.url = self.stripUrl % last
self.endOfLife = True
def link_modifier(self, fromurl, tourl):
# Links are often absolute and keep jumping between http and https
return tourl.replace('http:', 'https:').replace('/www.', '/')
@classmethod
def getmodules(cls):
return (
cls('ApartmentForTwo', 'apartmentfor2.com'),
cls('NamirDeiter', 'namirdeiter.com', last='20150410'),
cls('NicoleAndDerek', 'nicoleandderek.com'),
cls('OneHundredPercentCat', 'ndunlimited.com/100cat', last='20121001'),
cls('SpareParts', 'sparepartscomics.com', first='20031022', last='20080331'),
cls('TheNDU', 'thendu.com'),
cls('WonderKittens', 'wonderkittens.com'),
cls('YouSayItFirst', 'yousayitfirst.com', first='20040220', last='20130125')
)
class UnlikeMinerva(_ParserScraper):
name = 'NamirDeiter/UnlikeMinerva'
baseUrl = 'https://unlikeminerva.com/archive/index.php'
stripUrl = baseUrl + '?week=%s'
url = stripUrl % '127'
firstStripUrl = stripUrl % '26'
imageSearch = '//img[contains(@src, "archive/")]'
prevSearch = '//a[./img[contains(@src, "previous")]]'
multipleImagesPerStrip = True
endOfLife = True

View file

@ -635,6 +635,7 @@ class Renamed(Scraper):
return ( return (
# Renamed in 2.16 # Renamed in 2.16
cls('1997', '1977'), cls('1997', '1977'),
cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'),
cls('Catena', 'CatenaManor/CatenaCafe'), cls('Catena', 'CatenaManor/CatenaCafe'),
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'), cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'), cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
@ -726,6 +727,7 @@ class Renamed(Scraper):
cls('KeenSpot/Newshounds', 'Newshounds'), cls('KeenSpot/Newshounds', 'Newshounds'),
cls('KeenSpot/SinFest', 'SinFest'), cls('KeenSpot/SinFest', 'SinFest'),
cls('KeenSpot/TheGodChild', 'GodChild'), cls('KeenSpot/TheGodChild', 'GodChild'),
cls('NicoleAndDerek', 'NamirDeiter/NicoleAndDerek'),
cls('OnTheFasttrack', 'ComicsKingdom/OnTheFastrack'), cls('OnTheFasttrack', 'ComicsKingdom/OnTheFastrack'),
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'), cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'), cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
@ -739,6 +741,8 @@ class Renamed(Scraper):
cls('SmackJeeves/RiversideExtras', 'RiversideExtras'), cls('SmackJeeves/RiversideExtras', 'RiversideExtras'),
cls('SmackJeeves/StarTrip', 'StarTrip'), cls('SmackJeeves/StarTrip', 'StarTrip'),
cls('TracyAndTristan', 'ComicFury/TracyAndTristan'), cls('TracyAndTristan', 'ComicFury/TracyAndTristan'),
cls('UnlikeMinerva', 'NamirDeiter/UnlikeMinerva'),
cls('Wulffmorgenthaler', 'WuMo'), cls('Wulffmorgenthaler', 'WuMo'),
cls('YouSayItFirst', 'NamirDeiter/YouSayItFirst'),
cls('ZebraGirl', 'ComicFury/ZebraGirl'), cls('ZebraGirl', 'ComicFury/ZebraGirl'),
) )

View file

@ -42,18 +42,6 @@ class UnicornJelly(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class UnlikeMinerva(_ParserScraper):
baseUrl = 'https://unlikeminerva.com/archive/index.php'
stripUrl = baseUrl + '?week=%s'
url = stripUrl % '127'
firstStripUrl = stripUrl % '26'
imageSearch = '//img[contains(@src, "archive/")]'
prevSearch = '//a[./img[contains(@src, "previous")]]'
multipleImagesPerStrip = True
endOfLife = True
help = 'Index format: number'
class Unsounded(_ParserScraper): class Unsounded(_ParserScraper):
url = 'http://www.casualvillain.com/Unsounded/' url = 'http://www.casualvillain.com/Unsounded/'
startUrl = url + 'comic+index/' startUrl = url + 'comic+index/'

View file

@ -5,19 +5,8 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from ..scraper import _ParserScraper
from .common import _WordPressScraper from .common import _WordPressScraper
class YAFGC(_WordPressScraper): class YAFGC(_WordPressScraper):
url = 'http://yafgc.net/' url = 'http://yafgc.net/'
class YouSayItFirst(_ParserScraper):
stripUrl = 'https://www.yousayitfirst.com/comics/index.php?date=%s'
url = stripUrl % '20130125'
firstStripUrl = stripUrl % '20040220'
imageSearch = '//a/img'
prevSearch = '//a[text()="Previous"]'
endOfLife = True
help = 'Index format: yyyymmdd'