Minor fixes to several strips (#158)

* Fix Twokinds
* Fix XKCD
* Fix Unsounded
* Fix SluggyFreelance
* Fix Oglaf
* Fix missing and incorrect renames
* Fix WLP/PeterIsTheWolf{General,Adult}
This commit is contained in:
Daniel Ring 2020-04-06 04:23:23 -07:00 committed by GitHub
parent d9988bc55d
commit e1821e23ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 19 additions and 29 deletions

View file

@ -47,11 +47,10 @@ class OffWhite(_ParserScraper):
class Oglaf(_ParserScraper): class Oglaf(_ParserScraper):
url = 'http://oglaf.com/' url = 'http://oglaf.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'cumsprite'
imageSearch = '//img[@id="strip"]' imageSearch = '//img[@id="strip"]'
# search for "previous story" only prevSearch = '//a[@rel="prev"]'
prevSearch = '//link[@rel="prev"]' nextSearch = '//a[@rel="next"]'
# search for "next page"
nextSearch = '//link[@rel="next"]'
multipleImagesPerStrip = True multipleImagesPerStrip = True
adult = True adult = True

View file

@ -635,7 +635,7 @@ class Renamed(Scraper):
# Renamed in 2.16 # Renamed in 2.16
cls('1997', '1977'), cls('1997', '1977'),
cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'), cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'),
cls('Catena', 'CatenaManor/CatenaCafe'), cls('Catena', 'CatenaManor'),
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'), cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'), cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'), cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'),

View file

@ -286,6 +286,7 @@ class SlightlyDamned(_ComicControlScraper):
class SluggyFreelance(_ParserScraper): class SluggyFreelance(_ParserScraper):
url = 'http://sluggy.com/' url = 'http://sluggy.com/'
stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s' stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content') imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
prevSearch = '//div[%s]/a' % xpath_class('previous') prevSearch = '//div[%s]/a' % xpath_class('previous')
latestSearch = '//a[%s]' % xpath_class('archives_link') latestSearch = '//a[%s]' % xpath_class('archives_link')
@ -294,9 +295,8 @@ class SluggyFreelance(_ParserScraper):
help = 'Index format: chapter' help = 'Index format: chapter'
def namer(self, imageurl, pageurl): def namer(self, imageurl, pageurl):
"""Remove random noise from name.""" # Remove random noise from filename
fn = imageurl.rsplit('/', 1)[-1] return imageurl.rsplit('/', 1)[-1].split('.pagespeed', 1)[0]
return sub(r'\.(png|gif|jpg).*\.\1', '', fn)
class SMBC(_ComicControlScraper): class SMBC(_ComicControlScraper):

View file

@ -45,8 +45,8 @@ class Unsounded(_ParserScraper):
startUrl = url + 'comic+index/' startUrl = url + 'comic+index/'
stripUrl = url + 'comic/ch%s/ch%s_%s.html' stripUrl = url + 'comic/ch%s/ch%s_%s.html'
firstStripUrl = stripUrl % ('01', '01', '01') firstStripUrl = stripUrl % ('01', '01', '01')
imageSearch = '//img[contains(@src, "/pageart/ch")]' imageSearch = '//img[contains(@src, "pageart/")]'
prevSearch = '//a[{}]'.format(xpath_class('back')) prevSearch = '//a[%s]' % xpath_class('back')
latestSearch = '//div[@id="chapter_box"][1]//a[last()]' latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
multipleImagesPerStrip = True multipleImagesPerStrip = True
starter = indirectStarter starter = indirectStarter
@ -59,7 +59,6 @@ class Unsounded(_ParserScraper):
return super(Unsounded, self).getPrevUrl(url, data) return super(Unsounded, self).getPrevUrl(url, data)
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):
"""Get comic strip URL from index."""
chapter, num = index.split('-') chapter, num = index.split('-')
return self.stripUrl % (chapter, chapter, num) return self.stripUrl % (chapter, chapter, num)

View file

@ -10,7 +10,7 @@ from ..helpers import bounceStarter
class _WLPComics(_ParserScraper): class _WLPComics(_ParserScraper):
imageSearch = '//center/*/img[contains(@alt, " Comic")]' imageSearch = '//img[contains(@alt, " Comic")]'
prevSearch = '//a[contains(text(), "Previous ")]' prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]' nextSearch = '//a[contains(text(), "Next ")]'
starter = bounceStarter starter = bounceStarter
@ -23,24 +23,19 @@ class _WLPComics(_ParserScraper):
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
image_url.rsplit('/', 1)[-1]) image_url.rsplit('/', 1)[-1])
def getIndexStripUrl(self, index):
return self.url + '%s.html' % index
class ChichiChan(_WLPComics): class ChichiChan(_WLPComics):
url = 'http://www.wlpcomics.com/adult/chichi/' url = 'http://www.wlpcomics.com/adult/chichi/'
stripUrl = url + '%s.html'
adult = True adult = True
class ChocolateMilkMaid(_WLPComics): class ChocolateMilkMaid(_WLPComics):
# Newer pages seem to be broken # Newer pages seem to be broken
baseurl = 'http://www.wlpcomics.com/adult/cm/' stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'
url = baseurl + '264.html' url = stripUrl % '264'
adult = True adult = True
def getIndexStripUrl(self, index):
return self.baseurl + '%s.html' % index
def link_modifier(self, fromurl, tourl): def link_modifier(self, fromurl, tourl):
"""Bugfix for self-referencing pages...""" """Bugfix for self-referencing pages..."""
if tourl == fromurl: if tourl == fromurl:
@ -53,6 +48,7 @@ class ChocolateMilkMaid(_WLPComics):
class MaidAttack(_WLPComics): class MaidAttack(_WLPComics):
url = 'http://www.wlpcomics.com/general/maidattack/' url = 'http://www.wlpcomics.com/general/maidattack/'
stripUrl = url + '%s.html'
class PeterIsTheWolfAdult(_WLPComics): class PeterIsTheWolfAdult(_WLPComics):
@ -96,6 +92,7 @@ class PeterIsTheWolfGeneral(_WLPComics):
class Stellar(_WLPComics): class Stellar(_WLPComics):
url = 'http://www.wlpcomics.com/adult/stellar/' url = 'http://www.wlpcomics.com/adult/stellar/'
stripUrl = url + '%s.html'
adult = True adult = True
def link_modifier(self, fromurl, tourl): def link_modifier(self, fromurl, tourl):

View file

@ -7,29 +7,24 @@ from ..scraper import _ParserScraper
from ..helpers import bounceStarter from ..helpers import bounceStarter
class Xkcd(_ParserScraper): class XKCD(_ParserScraper):
name = 'xkcd' name = 'xkcd'
url = 'https://xkcd.com/' url = 'https://xkcd.com/'
starter = bounceStarter
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
textSearch = imageSearch + '/@title'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
nextSearch = '//a[@rel="next"]' nextSearch = '//a[@rel="next"]'
starter = bounceStarter
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
textSearch = '//div[@id="comic"]//img/@title'
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
index = int(page_url.rstrip('/').rsplit('/', 1)[-1]) index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
name = image_url.rsplit('/', 1)[-1].split('.')[0] name = image_url.rsplit('/', 1)[-1].split('.')[0]
return '%03d-%s' % (index, name) return '%04d-%s' % (index, name)
def imageUrlModifier(self, url, data): def imageUrlModifier(self, url, data):
if url and '/large/' in data: if url and '/large/' in data:
return url.replace(".png", "_large.png") return url.replace(".png", "_large.png")
return url return url
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % '1663', # Garden
)