Minor fixes to several strips (#158)
* Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult}
This commit is contained in:
parent
d9988bc55d
commit
e1821e23ba
6 changed files with 19 additions and 29 deletions
|
@ -47,11 +47,10 @@ class OffWhite(_ParserScraper):
|
|||
class Oglaf(_ParserScraper):
|
||||
url = 'http://oglaf.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'cumsprite'
|
||||
imageSearch = '//img[@id="strip"]'
|
||||
# search for "previous story" only
|
||||
prevSearch = '//link[@rel="prev"]'
|
||||
# search for "next page"
|
||||
nextSearch = '//link[@rel="next"]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
nextSearch = '//a[@rel="next"]'
|
||||
multipleImagesPerStrip = True
|
||||
adult = True
|
||||
|
||||
|
|
|
@ -635,7 +635,7 @@ class Renamed(Scraper):
|
|||
# Renamed in 2.16
|
||||
cls('1997', '1977'),
|
||||
cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'),
|
||||
cls('Catena', 'CatenaManor/CatenaCafe'),
|
||||
cls('Catena', 'CatenaManor'),
|
||||
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
|
||||
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
|
||||
cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'),
|
||||
|
|
|
@ -286,6 +286,7 @@ class SlightlyDamned(_ComicControlScraper):
|
|||
class SluggyFreelance(_ParserScraper):
|
||||
url = 'http://sluggy.com/'
|
||||
stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
|
||||
prevSearch = '//div[%s]/a' % xpath_class('previous')
|
||||
latestSearch = '//a[%s]' % xpath_class('archives_link')
|
||||
|
@ -294,9 +295,8 @@ class SluggyFreelance(_ParserScraper):
|
|||
help = 'Index format: chapter'
|
||||
|
||||
def namer(self, imageurl, pageurl):
|
||||
"""Remove random noise from name."""
|
||||
fn = imageurl.rsplit('/', 1)[-1]
|
||||
return sub(r'\.(png|gif|jpg).*\.\1', '', fn)
|
||||
# Remove random noise from filename
|
||||
return imageurl.rsplit('/', 1)[-1].split('.pagespeed', 1)[0]
|
||||
|
||||
|
||||
class SMBC(_ComicControlScraper):
|
||||
|
|
|
@ -45,8 +45,8 @@ class Unsounded(_ParserScraper):
|
|||
startUrl = url + 'comic+index/'
|
||||
stripUrl = url + 'comic/ch%s/ch%s_%s.html'
|
||||
firstStripUrl = stripUrl % ('01', '01', '01')
|
||||
imageSearch = '//img[contains(@src, "/pageart/ch")]'
|
||||
prevSearch = '//a[{}]'.format(xpath_class('back'))
|
||||
imageSearch = '//img[contains(@src, "pageart/")]'
|
||||
prevSearch = '//a[%s]' % xpath_class('back')
|
||||
latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
|
||||
multipleImagesPerStrip = True
|
||||
starter = indirectStarter
|
||||
|
@ -59,7 +59,6 @@ class Unsounded(_ParserScraper):
|
|||
return super(Unsounded, self).getPrevUrl(url, data)
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
"""Get comic strip URL from index."""
|
||||
chapter, num = index.split('-')
|
||||
return self.stripUrl % (chapter, chapter, num)
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ from ..helpers import bounceStarter
|
|||
|
||||
|
||||
class _WLPComics(_ParserScraper):
|
||||
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
||||
imageSearch = '//img[contains(@alt, " Comic")]'
|
||||
prevSearch = '//a[contains(text(), "Previous ")]'
|
||||
nextSearch = '//a[contains(text(), "Next ")]'
|
||||
starter = bounceStarter
|
||||
|
@ -23,24 +23,19 @@ class _WLPComics(_ParserScraper):
|
|||
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
||||
image_url.rsplit('/', 1)[-1])
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
return self.url + '%s.html' % index
|
||||
|
||||
|
||||
class ChichiChan(_WLPComics):
|
||||
url = 'http://www.wlpcomics.com/adult/chichi/'
|
||||
stripUrl = url + '%s.html'
|
||||
adult = True
|
||||
|
||||
|
||||
class ChocolateMilkMaid(_WLPComics):
|
||||
# Newer pages seem to be broken
|
||||
baseurl = 'http://www.wlpcomics.com/adult/cm/'
|
||||
url = baseurl + '264.html'
|
||||
stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'
|
||||
url = stripUrl % '264'
|
||||
adult = True
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
return self.baseurl + '%s.html' % index
|
||||
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
"""Bugfix for self-referencing pages..."""
|
||||
if tourl == fromurl:
|
||||
|
@ -53,6 +48,7 @@ class ChocolateMilkMaid(_WLPComics):
|
|||
|
||||
class MaidAttack(_WLPComics):
|
||||
url = 'http://www.wlpcomics.com/general/maidattack/'
|
||||
stripUrl = url + '%s.html'
|
||||
|
||||
|
||||
class PeterIsTheWolfAdult(_WLPComics):
|
||||
|
@ -96,6 +92,7 @@ class PeterIsTheWolfGeneral(_WLPComics):
|
|||
|
||||
class Stellar(_WLPComics):
|
||||
url = 'http://www.wlpcomics.com/adult/stellar/'
|
||||
stripUrl = url + '%s.html'
|
||||
adult = True
|
||||
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
|
|
|
@ -7,29 +7,24 @@ from ..scraper import _ParserScraper
|
|||
from ..helpers import bounceStarter
|
||||
|
||||
|
||||
class Xkcd(_ParserScraper):
|
||||
class XKCD(_ParserScraper):
|
||||
name = 'xkcd'
|
||||
url = 'https://xkcd.com/'
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
textSearch = imageSearch + '/@title'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
nextSearch = '//a[@rel="next"]'
|
||||
starter = bounceStarter
|
||||
help = 'Index format: n (unpadded)'
|
||||
textSearch = '//div[@id="comic"]//img/@title'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
||||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||
return '%03d-%s' % (index, name)
|
||||
return '%04d-%s' % (index, name)
|
||||
|
||||
def imageUrlModifier(self, url, data):
|
||||
if url and '/large/' in data:
|
||||
return url.replace(".png", "_large.png")
|
||||
return url
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return url in (
|
||||
self.stripUrl % '1663', # Garden
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue