Apply link modifier to all links.
This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished.
This commit is contained in:
parent
7fc05f75f5
commit
bc755d09a3
5 changed files with 24 additions and 22 deletions
|
@ -34,9 +34,11 @@ def bounceStarter(self):
|
||||||
This needs the url and nextSearch properties be defined on the class.
|
This needs the url and nextSearch properties be defined on the class.
|
||||||
"""
|
"""
|
||||||
data = self.getPage(self.url)
|
data = self.getPage(self.url)
|
||||||
url1 = self.fetchUrl(self.url, data, self.prevSearch)
|
prevurl = self.fetchUrl(self.url, data, self.prevSearch)
|
||||||
data = self.getPage(url1)
|
prevurl = self.link_modifier(self.url, prevurl)
|
||||||
return self.fetchUrl(url1, data, self.nextSearch)
|
data = self.getPage(prevurl)
|
||||||
|
nexturl = self.fetchUrl(prevurl, data, self.nextSearch)
|
||||||
|
return self.link_modifier(prevurl, nexturl)
|
||||||
|
|
||||||
|
|
||||||
def indirectStarter(self):
|
def indirectStarter(self):
|
||||||
|
@ -48,4 +50,5 @@ def indirectStarter(self):
|
||||||
'latestSearch' is used on the page content to find the latest strip."""
|
'latestSearch' is used on the page content to find the latest strip."""
|
||||||
url = self.startUrl if hasattr(self, "startUrl") else self.url
|
url = self.startUrl if hasattr(self, "startUrl") else self.url
|
||||||
data = self.getPage(url)
|
data = self.getPage(url)
|
||||||
return self.fetchUrl(url, data, self.latestSearch)
|
newurl = self.fetchUrl(url, data, self.latestSearch)
|
||||||
|
return self.link_modifier(url, newurl)
|
||||||
|
|
|
@ -178,11 +178,11 @@ class Chester5000XYV(_WordPressScraper):
|
||||||
adult = True
|
adult = True
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
def prevUrlModifier(self, prev_url):
|
def link_modifier(self, fromurl, tourl):
|
||||||
"""Bugfix for link to blog"""
|
"""Bugfix for link to blog"""
|
||||||
if prev_url == self.stripUrl % '714':
|
if tourl == self.stripUrl % '714':
|
||||||
return self.stripUrl % '710'
|
return self.stripUrl % '710'
|
||||||
return prev_url
|
return tourl
|
||||||
|
|
||||||
|
|
||||||
class Chisuji(_WordPressScraper):
|
class Chisuji(_WordPressScraper):
|
||||||
|
|
|
@ -24,13 +24,12 @@ class ComicGenesis(_BasicScraper):
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
def prevUrlModifier(self, prev_url):
|
def link_modifier(self, fromurl, tourl):
|
||||||
if prev_url:
|
return tourl.replace(
|
||||||
return prev_url.replace(
|
"keenspace.com", "comicgenesis.com").replace(
|
||||||
"keenspace.com", "comicgenesis.com").replace(
|
"keenspot.com", "comicgenesis.com").replace(
|
||||||
"keenspot.com", "comicgenesis.com").replace(
|
"toonspace.com", "comicgenesis.com").replace(
|
||||||
"toonspace.com", "comicgenesis.com").replace(
|
"comicgen.com", "comicgenesis.com")
|
||||||
"comicgen.com", "comicgenesis.com")
|
|
||||||
|
|
||||||
def __init__(self, name, sub=None, last=None, baseUrl=None):
|
def __init__(self, name, sub=None, last=None, baseUrl=None):
|
||||||
super(ComicGenesis, self).__init__('ComicGenesis/' + name)
|
super(ComicGenesis, self).__init__('ComicGenesis/' + name)
|
||||||
|
|
|
@ -55,8 +55,8 @@ class Stellar(_WLPComics):
|
||||||
url = 'http://www.wlpcomics.com/adult/stellar/'
|
url = 'http://www.wlpcomics.com/adult/stellar/'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
def prevUrlModifier(self, prev_url):
|
def link_modifier(self, fromurl, tourl):
|
||||||
"""Bugfix for empty page..."""
|
"""Bugfix for empty page..."""
|
||||||
if prev_url == self.url + '075.html':
|
if tourl == self.url + '075.html':
|
||||||
return self.url + '074.html'
|
return self.url + '074.html'
|
||||||
return prev_url
|
return tourl
|
||||||
|
|
|
@ -220,7 +220,7 @@ class Scraper(object):
|
||||||
# assume there is no previous URL, but print a warning
|
# assume there is no previous URL, but print a warning
|
||||||
out.warn(u"%s Assuming no previous comic strips exist." % msg)
|
out.warn(u"%s Assuming no previous comic strips exist." % msg)
|
||||||
else:
|
else:
|
||||||
prevUrl = self.prevUrlModifier(prevUrl)
|
prevUrl = self.link_modifier(url, prevUrl)
|
||||||
out.debug(u"Found previous URL %s" % prevUrl)
|
out.debug(u"Found previous URL %s" % prevUrl)
|
||||||
getHandler().comicPageLink(self, url, prevUrl)
|
getHandler().comicPageLink(self, url, prevUrl)
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
@ -237,12 +237,12 @@ class Scraper(object):
|
||||||
"""Return filename for given image and page URL."""
|
"""Return filename for given image and page URL."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def prevUrlModifier(self, prev_url):
|
def link_modifier(self, fromurl, tourl):
|
||||||
"""Optional modification of parsed previous URLs. Useful if
|
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||||
there are domain redirects. The default implementation does
|
Useful if there are domain redirects. The default implementation does
|
||||||
not modify the URL.
|
not modify the URL.
|
||||||
"""
|
"""
|
||||||
return prev_url
|
return tourl
|
||||||
|
|
||||||
def imageUrlModifier(self, image_url, data):
|
def imageUrlModifier(self, image_url, data):
|
||||||
"""Optional modification of parsed image URLs. Useful if the URL
|
"""Optional modification of parsed image URLs. Useful if the URL
|
||||||
|
|
Loading…
Reference in a new issue