Apply link modifier to all links.

This was previously only the "previous link modifier", now it can also
modify "next" and "latest" links. Additionally, the modifier is given
the current URL, so those cases can be distinguished.
This commit is contained in:
Tobias Gruetzmacher 2016-11-01 01:12:16 +01:00
parent 7fc05f75f5
commit bc755d09a3
5 changed files with 24 additions and 22 deletions

View file

@ -34,9 +34,11 @@ def bounceStarter(self):
This needs the url and nextSearch properties be defined on the class.
"""
data = self.getPage(self.url)
url1 = self.fetchUrl(self.url, data, self.prevSearch)
data = self.getPage(url1)
return self.fetchUrl(url1, data, self.nextSearch)
prevurl = self.fetchUrl(self.url, data, self.prevSearch)
prevurl = self.link_modifier(self.url, prevurl)
data = self.getPage(prevurl)
nexturl = self.fetchUrl(prevurl, data, self.nextSearch)
return self.link_modifier(prevurl, nexturl)
def indirectStarter(self):
@ -48,4 +50,5 @@ def indirectStarter(self):
'latestSearch' is used on the page content to find the latest strip."""
url = self.startUrl if hasattr(self, "startUrl") else self.url
data = self.getPage(url)
return self.fetchUrl(url, data, self.latestSearch)
newurl = self.fetchUrl(url, data, self.latestSearch)
return self.link_modifier(url, newurl)

View file

@ -178,11 +178,11 @@ class Chester5000XYV(_WordPressScraper):
adult = True
help = 'Index format: n (unpadded)'
def prevUrlModifier(self, prev_url):
def link_modifier(self, fromurl, tourl):
"""Bugfix for link to blog"""
if prev_url == self.stripUrl % '714':
if tourl == self.stripUrl % '714':
return self.stripUrl % '710'
return prev_url
return tourl
class Chisuji(_WordPressScraper):

View file

@ -24,13 +24,12 @@ class ComicGenesis(_BasicScraper):
multipleImagesPerStrip = True
help = 'Index format: yyyymmdd'
def prevUrlModifier(self, prev_url):
if prev_url:
return prev_url.replace(
"keenspace.com", "comicgenesis.com").replace(
"keenspot.com", "comicgenesis.com").replace(
"toonspace.com", "comicgenesis.com").replace(
"comicgen.com", "comicgenesis.com")
def link_modifier(self, fromurl, tourl):
return tourl.replace(
"keenspace.com", "comicgenesis.com").replace(
"keenspot.com", "comicgenesis.com").replace(
"toonspace.com", "comicgenesis.com").replace(
"comicgen.com", "comicgenesis.com")
def __init__(self, name, sub=None, last=None, baseUrl=None):
super(ComicGenesis, self).__init__('ComicGenesis/' + name)

View file

@ -55,8 +55,8 @@ class Stellar(_WLPComics):
url = 'http://www.wlpcomics.com/adult/stellar/'
adult = True
def prevUrlModifier(self, prev_url):
def link_modifier(self, fromurl, tourl):
"""Bugfix for empty page..."""
if prev_url == self.url + '075.html':
if tourl == self.url + '075.html':
return self.url + '074.html'
return prev_url
return tourl

View file

@ -220,7 +220,7 @@ class Scraper(object):
# assume there is no previous URL, but print a warning
out.warn(u"%s Assuming no previous comic strips exist." % msg)
else:
prevUrl = self.prevUrlModifier(prevUrl)
prevUrl = self.link_modifier(url, prevUrl)
out.debug(u"Found previous URL %s" % prevUrl)
getHandler().comicPageLink(self, url, prevUrl)
return prevUrl
@ -237,12 +237,12 @@ class Scraper(object):
"""Return filename for given image and page URL."""
return None
def prevUrlModifier(self, prev_url):
"""Optional modification of parsed previous URLs. Useful if
there are domain redirects. The default implementation does
def link_modifier(self, fromurl, tourl):
"""Optional modification of parsed link (previous/back/latest) URLs.
Useful if there are domain redirects. The default implementation does
not modify the URL.
"""
return prev_url
return tourl
def imageUrlModifier(self, image_url, data):
"""Optional modification of parsed image URLs. Useful if the URL