Apply link modifier to all links.

This was previously only the "previous link modifier", now it can also
modify "next" and "latest" links. Additionally, the modifier is given
the current URL, so those cases can be distinguished.
This commit is contained in:
Tobias Gruetzmacher 2016-11-01 01:12:16 +01:00
parent 7fc05f75f5
commit bc755d09a3
5 changed files with 24 additions and 22 deletions

View file

@ -34,9 +34,11 @@ def bounceStarter(self):
This needs the url and nextSearch properties be defined on the class. This needs the url and nextSearch properties be defined on the class.
""" """
data = self.getPage(self.url) data = self.getPage(self.url)
url1 = self.fetchUrl(self.url, data, self.prevSearch) prevurl = self.fetchUrl(self.url, data, self.prevSearch)
data = self.getPage(url1) prevurl = self.link_modifier(self.url, prevurl)
return self.fetchUrl(url1, data, self.nextSearch) data = self.getPage(prevurl)
nexturl = self.fetchUrl(prevurl, data, self.nextSearch)
return self.link_modifier(prevurl, nexturl)
def indirectStarter(self): def indirectStarter(self):
@ -48,4 +50,5 @@ def indirectStarter(self):
'latestSearch' is used on the page content to find the latest strip.""" 'latestSearch' is used on the page content to find the latest strip."""
url = self.startUrl if hasattr(self, "startUrl") else self.url url = self.startUrl if hasattr(self, "startUrl") else self.url
data = self.getPage(url) data = self.getPage(url)
return self.fetchUrl(url, data, self.latestSearch) newurl = self.fetchUrl(url, data, self.latestSearch)
return self.link_modifier(url, newurl)

View file

@ -178,11 +178,11 @@ class Chester5000XYV(_WordPressScraper):
adult = True adult = True
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
def prevUrlModifier(self, prev_url): def link_modifier(self, fromurl, tourl):
"""Bugfix for link to blog""" """Bugfix for link to blog"""
if prev_url == self.stripUrl % '714': if tourl == self.stripUrl % '714':
return self.stripUrl % '710' return self.stripUrl % '710'
return prev_url return tourl
class Chisuji(_WordPressScraper): class Chisuji(_WordPressScraper):

View file

@ -24,13 +24,12 @@ class ComicGenesis(_BasicScraper):
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
def prevUrlModifier(self, prev_url): def link_modifier(self, fromurl, tourl):
if prev_url: return tourl.replace(
return prev_url.replace( "keenspace.com", "comicgenesis.com").replace(
"keenspace.com", "comicgenesis.com").replace( "keenspot.com", "comicgenesis.com").replace(
"keenspot.com", "comicgenesis.com").replace( "toonspace.com", "comicgenesis.com").replace(
"toonspace.com", "comicgenesis.com").replace( "comicgen.com", "comicgenesis.com")
"comicgen.com", "comicgenesis.com")
def __init__(self, name, sub=None, last=None, baseUrl=None): def __init__(self, name, sub=None, last=None, baseUrl=None):
super(ComicGenesis, self).__init__('ComicGenesis/' + name) super(ComicGenesis, self).__init__('ComicGenesis/' + name)

View file

@ -55,8 +55,8 @@ class Stellar(_WLPComics):
url = 'http://www.wlpcomics.com/adult/stellar/' url = 'http://www.wlpcomics.com/adult/stellar/'
adult = True adult = True
def prevUrlModifier(self, prev_url): def link_modifier(self, fromurl, tourl):
"""Bugfix for empty page...""" """Bugfix for empty page..."""
if prev_url == self.url + '075.html': if tourl == self.url + '075.html':
return self.url + '074.html' return self.url + '074.html'
return prev_url return tourl

View file

@ -220,7 +220,7 @@ class Scraper(object):
# assume there is no previous URL, but print a warning # assume there is no previous URL, but print a warning
out.warn(u"%s Assuming no previous comic strips exist." % msg) out.warn(u"%s Assuming no previous comic strips exist." % msg)
else: else:
prevUrl = self.prevUrlModifier(prevUrl) prevUrl = self.link_modifier(url, prevUrl)
out.debug(u"Found previous URL %s" % prevUrl) out.debug(u"Found previous URL %s" % prevUrl)
getHandler().comicPageLink(self, url, prevUrl) getHandler().comicPageLink(self, url, prevUrl)
return prevUrl return prevUrl
@ -237,12 +237,12 @@ class Scraper(object):
"""Return filename for given image and page URL.""" """Return filename for given image and page URL."""
return None return None
def prevUrlModifier(self, prev_url): def link_modifier(self, fromurl, tourl):
"""Optional modification of parsed previous URLs. Useful if """Optional modification of parsed link (previous/back/latest) URLs.
there are domain redirects. The default implementation does Useful if there are domain redirects. The default implementation does
not modify the URL. not modify the URL.
""" """
return prev_url return tourl
def imageUrlModifier(self, image_url, data): def imageUrlModifier(self, image_url, data):
"""Optional modification of parsed image URLs. Useful if the URL """Optional modification of parsed image URLs. Useful if the URL