From e1821e23baf648d4dbe2d70cea57fc8ae8f0d8ec Mon Sep 17 00:00:00 2001 From: Daniel Ring Date: Mon, 6 Apr 2020 04:23:23 -0700 Subject: [PATCH] Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} --- dosagelib/plugins/o.py | 7 +++---- dosagelib/plugins/old.py | 2 +- dosagelib/plugins/s.py | 6 +++--- dosagelib/plugins/u.py | 5 ++--- dosagelib/plugins/wlpcomics.py | 15 ++++++--------- dosagelib/plugins/x.py | 13 ++++--------- 6 files changed, 19 insertions(+), 29 deletions(-) diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py index 664186f35..5d16e5adb 100644 --- a/dosagelib/plugins/o.py +++ b/dosagelib/plugins/o.py @@ -47,11 +47,10 @@ class OffWhite(_ParserScraper): class Oglaf(_ParserScraper): url = 'http://oglaf.com/' stripUrl = url + '%s/' + firstStripUrl = stripUrl % 'cumsprite' imageSearch = '//img[@id="strip"]' - # search for "previous story" only - prevSearch = '//link[@rel="prev"]' - # search for "next page" - nextSearch = '//link[@rel="next"]' + prevSearch = '//a[@rel="prev"]' + nextSearch = '//a[@rel="next"]' multipleImagesPerStrip = True adult = True diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index bc61bc299..6ee0e74ec 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -635,7 +635,7 @@ class Renamed(Scraper): # Renamed in 2.16 cls('1997', '1977'), cls('ApartmentForTwo', 'NamirDeiter/ApartmentForTwo'), - cls('Catena', 'CatenaManor/CatenaCafe'), + cls('Catena', 'CatenaManor'), cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'), cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'), cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'), diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 31f2dff36..1b1d66994 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -286,6 +286,7 @@ class SlightlyDamned(_ComicControlScraper): class SluggyFreelance(_ParserScraper): url = 'http://sluggy.com/' stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s' + firstStripUrl = stripUrl % '1' imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content') prevSearch = '//div[%s]/a' % xpath_class('previous') latestSearch = '//a[%s]' % xpath_class('archives_link') @@ -294,9 +295,8 @@ class SluggyFreelance(_ParserScraper): help = 'Index format: chapter' def namer(self, imageurl, pageurl): - """Remove random noise from name.""" - fn = imageurl.rsplit('/', 1)[-1] - return sub(r'\.(png|gif|jpg).*\.\1', '', fn) + # Remove random noise from filename + return imageurl.rsplit('/', 1)[-1].split('.pagespeed', 1)[0] class SMBC(_ComicControlScraper): diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index a9e99828b..8f3ca5b4a 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -45,8 +45,8 @@ class Unsounded(_ParserScraper): startUrl = url + 'comic+index/' stripUrl = url + 'comic/ch%s/ch%s_%s.html' firstStripUrl = stripUrl % ('01', '01', '01') - imageSearch = '//img[contains(@src, "/pageart/ch")]' - prevSearch = '//a[{}]'.format(xpath_class('back')) + imageSearch = '//img[contains(@src, "pageart/")]' + prevSearch = '//a[%s]' % xpath_class('back') latestSearch = '//div[@id="chapter_box"][1]//a[last()]' multipleImagesPerStrip = True starter = indirectStarter @@ -59,7 +59,6 @@ class Unsounded(_ParserScraper): return super(Unsounded, self).getPrevUrl(url, data) def getIndexStripUrl(self, index): - """Get comic strip URL from index.""" chapter, num = index.split('-') return self.stripUrl % (chapter, chapter, num) diff --git a/dosagelib/plugins/wlpcomics.py b/dosagelib/plugins/wlpcomics.py index 3624fee70..95a1d9822 100644 --- a/dosagelib/plugins/wlpcomics.py +++ b/dosagelib/plugins/wlpcomics.py @@ -10,7 +10,7 @@ from ..helpers import bounceStarter class _WLPComics(_ParserScraper): - imageSearch = '//center/*/img[contains(@alt, " Comic")]' + imageSearch = '//img[contains(@alt, " Comic")]' prevSearch = '//a[contains(text(), "Previous ")]' nextSearch = '//a[contains(text(), "Next ")]' starter = bounceStarter @@ -23,24 +23,19 @@ class _WLPComics(_ParserScraper): return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + image_url.rsplit('/', 1)[-1]) - def getIndexStripUrl(self, index): - return self.url + '%s.html' % index - class ChichiChan(_WLPComics): url = 'http://www.wlpcomics.com/adult/chichi/' + stripUrl = url + '%s.html' adult = True class ChocolateMilkMaid(_WLPComics): # Newer pages seem to be broken - baseurl = 'http://www.wlpcomics.com/adult/cm/' - url = baseurl + '264.html' + stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html' + url = stripUrl % '264' adult = True - def getIndexStripUrl(self, index): - return self.baseurl + '%s.html' % index - def link_modifier(self, fromurl, tourl): """Bugfix for self-referencing pages...""" if tourl == fromurl: @@ -53,6 +48,7 @@ class ChocolateMilkMaid(_WLPComics): class MaidAttack(_WLPComics): url = 'http://www.wlpcomics.com/general/maidattack/' + stripUrl = url + '%s.html' class PeterIsTheWolfAdult(_WLPComics): @@ -96,6 +92,7 @@ class PeterIsTheWolfGeneral(_WLPComics): class Stellar(_WLPComics): url = 'http://www.wlpcomics.com/adult/stellar/' + stripUrl = url + '%s.html' adult = True def link_modifier(self, fromurl, tourl): diff --git a/dosagelib/plugins/x.py b/dosagelib/plugins/x.py index 40738887a..d57364cb5 100644 --- a/dosagelib/plugins/x.py +++ b/dosagelib/plugins/x.py @@ -7,29 +7,24 @@ from ..scraper import _ParserScraper from ..helpers import bounceStarter -class Xkcd(_ParserScraper): +class XKCD(_ParserScraper): name = 'xkcd' url = 'https://xkcd.com/' - starter = bounceStarter stripUrl = url + '%s/' firstStripUrl = stripUrl % '1' imageSearch = '//div[@id="comic"]//img' + textSearch = imageSearch + '/@title' prevSearch = '//a[@rel="prev"]' nextSearch = '//a[@rel="next"]' + starter = bounceStarter help = 'Index format: n (unpadded)' - textSearch = '//div[@id="comic"]//img/@title' def namer(self, image_url, page_url): index = int(page_url.rstrip('/').rsplit('/', 1)[-1]) name = image_url.rsplit('/', 1)[-1].split('.')[0] - return '%03d-%s' % (index, name) + return '%04d-%s' % (index, name) def imageUrlModifier(self, url, data): if url and '/large/' in data: return url.replace(".png", "_large.png") return url - - def shouldSkipUrl(self, url, data): - return url in ( - self.stripUrl % '1663', # Garden - )