Minor fixes to several strips (#158)
* Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult}
This commit is contained in:
parent
d9988bc55d
commit
e1821e23ba
6 changed files with 19 additions and 29 deletions
|
@ -47,11 +47,10 @@ class OffWhite(_ParserScraper):
|
||||||
class Oglaf(_ParserScraper):
|
class Oglaf(_ParserScraper):
|
||||||
url = 'http://oglaf.com/'
|
url = 'http://oglaf.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
|
firstStripUrl = stripUrl % 'cumsprite'
|
||||||
imageSearch = '//img[@id="strip"]'
|
imageSearch = '//img[@id="strip"]'
|
||||||
# search for "previous story" only
|
prevSearch = '//a[@rel="prev"]'
|
||||||
prevSearch = '//link[@rel="prev"]'
|
nextSearch = '//a[@rel="next"]'
|
||||||
# search for "next page"
|
|
||||||
nextSearch = '//link[@rel="next"]'
|
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
|
@ -635,7 +635,7 @@ class Renamed(Scraper):
|
||||||
# Renamed in 2.16
|
# Renamed in 2.16
|
||||||
cls('1997', '1977'),
|
cls('1997', '1977'),
|
||||||
cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'),
|
cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'),
|
||||||
cls('Catena', 'CatenaManor/CatenaCafe'),
|
cls('Catena', 'CatenaManor'),
|
||||||
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
|
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
|
||||||
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
|
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
|
||||||
cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'),
|
cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'),
|
||||||
|
|
|
@ -286,6 +286,7 @@ class SlightlyDamned(_ComicControlScraper):
|
||||||
class SluggyFreelance(_ParserScraper):
|
class SluggyFreelance(_ParserScraper):
|
||||||
url = 'http://sluggy.com/'
|
url = 'http://sluggy.com/'
|
||||||
stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
|
stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
|
||||||
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
|
imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
|
||||||
prevSearch = '//div[%s]/a' % xpath_class('previous')
|
prevSearch = '//div[%s]/a' % xpath_class('previous')
|
||||||
latestSearch = '//a[%s]' % xpath_class('archives_link')
|
latestSearch = '//a[%s]' % xpath_class('archives_link')
|
||||||
|
@ -294,9 +295,8 @@ class SluggyFreelance(_ParserScraper):
|
||||||
help = 'Index format: chapter'
|
help = 'Index format: chapter'
|
||||||
|
|
||||||
def namer(self, imageurl, pageurl):
|
def namer(self, imageurl, pageurl):
|
||||||
"""Remove random noise from name."""
|
# Remove random noise from filename
|
||||||
fn = imageurl.rsplit('/', 1)[-1]
|
return imageurl.rsplit('/', 1)[-1].split('.pagespeed', 1)[0]
|
||||||
return sub(r'\.(png|gif|jpg).*\.\1', '', fn)
|
|
||||||
|
|
||||||
|
|
||||||
class SMBC(_ComicControlScraper):
|
class SMBC(_ComicControlScraper):
|
||||||
|
|
|
@ -45,8 +45,8 @@ class Unsounded(_ParserScraper):
|
||||||
startUrl = url + 'comic+index/'
|
startUrl = url + 'comic+index/'
|
||||||
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
|
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
|
||||||
firstStripUrl = stripUrl % ('01', '01', '01')
|
firstStripUrl = stripUrl % ('01', '01', '01')
|
||||||
imageSearch = '//img[contains(@src, "/pageart/ch")]'
|
imageSearch = '//img[contains(@src, "pageart/")]'
|
||||||
prevSearch = '//a[{}]'.format(xpath_class('back'))
|
prevSearch = '//a[%s]' % xpath_class('back')
|
||||||
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
|
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
@ -59,7 +59,6 @@ class Unsounded(_ParserScraper):
|
||||||
return super(Unsounded, self).getPrevUrl(url, data)
|
return super(Unsounded, self).getPrevUrl(url, data)
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
"""Get comic strip URL from index."""
|
|
||||||
chapter, num = index.split('-')
|
chapter, num = index.split('-')
|
||||||
return self.stripUrl % (chapter, chapter, num)
|
return self.stripUrl % (chapter, chapter, num)
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..helpers import bounceStarter
|
||||||
|
|
||||||
|
|
||||||
class _WLPComics(_ParserScraper):
|
class _WLPComics(_ParserScraper):
|
||||||
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
imageSearch = '//img[contains(@alt, " Comic")]'
|
||||||
prevSearch = '//a[contains(text(), "Previous ")]'
|
prevSearch = '//a[contains(text(), "Previous ")]'
|
||||||
nextSearch = '//a[contains(text(), "Next ")]'
|
nextSearch = '//a[contains(text(), "Next ")]'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
@ -23,24 +23,19 @@ class _WLPComics(_ParserScraper):
|
||||||
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
||||||
image_url.rsplit('/', 1)[-1])
|
image_url.rsplit('/', 1)[-1])
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
|
||||||
return self.url + '%s.html' % index
|
|
||||||
|
|
||||||
|
|
||||||
class ChichiChan(_WLPComics):
|
class ChichiChan(_WLPComics):
|
||||||
url = 'http://www.wlpcomics.com/adult/chichi/'
|
url = 'http://www.wlpcomics.com/adult/chichi/'
|
||||||
|
stripUrl = url + '%s.html'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
class ChocolateMilkMaid(_WLPComics):
|
class ChocolateMilkMaid(_WLPComics):
|
||||||
# Newer pages seem to be broken
|
# Newer pages seem to be broken
|
||||||
baseurl = 'http://www.wlpcomics.com/adult/cm/'
|
stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'
|
||||||
url = baseurl + '264.html'
|
url = stripUrl % '264'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
|
||||||
return self.baseurl + '%s.html' % index
|
|
||||||
|
|
||||||
def link_modifier(self, fromurl, tourl):
|
def link_modifier(self, fromurl, tourl):
|
||||||
"""Bugfix for self-referencing pages..."""
|
"""Bugfix for self-referencing pages..."""
|
||||||
if tourl == fromurl:
|
if tourl == fromurl:
|
||||||
|
@ -53,6 +48,7 @@ class ChocolateMilkMaid(_WLPComics):
|
||||||
|
|
||||||
class MaidAttack(_WLPComics):
|
class MaidAttack(_WLPComics):
|
||||||
url = 'http://www.wlpcomics.com/general/maidattack/'
|
url = 'http://www.wlpcomics.com/general/maidattack/'
|
||||||
|
stripUrl = url + '%s.html'
|
||||||
|
|
||||||
|
|
||||||
class PeterIsTheWolfAdult(_WLPComics):
|
class PeterIsTheWolfAdult(_WLPComics):
|
||||||
|
@ -96,6 +92,7 @@ class PeterIsTheWolfGeneral(_WLPComics):
|
||||||
|
|
||||||
class Stellar(_WLPComics):
|
class Stellar(_WLPComics):
|
||||||
url = 'http://www.wlpcomics.com/adult/stellar/'
|
url = 'http://www.wlpcomics.com/adult/stellar/'
|
||||||
|
stripUrl = url + '%s.html'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
def link_modifier(self, fromurl, tourl):
|
def link_modifier(self, fromurl, tourl):
|
||||||
|
|
|
@ -7,29 +7,24 @@ from ..scraper import _ParserScraper
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
|
|
||||||
|
|
||||||
class Xkcd(_ParserScraper):
|
class XKCD(_ParserScraper):
|
||||||
name = 'xkcd'
|
name = 'xkcd'
|
||||||
url = 'https://xkcd.com/'
|
url = 'https://xkcd.com/'
|
||||||
starter = bounceStarter
|
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
|
textSearch = imageSearch + '/@title'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
nextSearch = '//a[@rel="next"]'
|
nextSearch = '//a[@rel="next"]'
|
||||||
|
starter = bounceStarter
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
textSearch = '//div[@id="comic"]//img/@title'
|
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
||||||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||||
return '%03d-%s' % (index, name)
|
return '%04d-%s' % (index, name)
|
||||||
|
|
||||||
def imageUrlModifier(self, url, data):
|
def imageUrlModifier(self, url, data):
|
||||||
if url and '/large/' in data:
|
if url and '/large/' in data:
|
||||||
return url.replace(".png", "_large.png")
|
return url.replace(".png", "_large.png")
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
return url in (
|
|
||||||
self.stripUrl % '1663', # Garden
|
|
||||||
)
|
|
||||||
|
|
Loading…
Reference in a new issue