Apply link modifier to all links.
This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished.
This commit is contained in:
parent
7fc05f75f5
commit
bc755d09a3
5 changed files with 24 additions and 22 deletions
|
@ -34,9 +34,11 @@ def bounceStarter(self):
|
|||
This needs the url and nextSearch properties be defined on the class.
|
||||
"""
|
||||
data = self.getPage(self.url)
|
||||
url1 = self.fetchUrl(self.url, data, self.prevSearch)
|
||||
data = self.getPage(url1)
|
||||
return self.fetchUrl(url1, data, self.nextSearch)
|
||||
prevurl = self.fetchUrl(self.url, data, self.prevSearch)
|
||||
prevurl = self.link_modifier(self.url, prevurl)
|
||||
data = self.getPage(prevurl)
|
||||
nexturl = self.fetchUrl(prevurl, data, self.nextSearch)
|
||||
return self.link_modifier(prevurl, nexturl)
|
||||
|
||||
|
||||
def indirectStarter(self):
|
||||
|
@ -48,4 +50,5 @@ def indirectStarter(self):
|
|||
'latestSearch' is used on the page content to find the latest strip."""
|
||||
url = self.startUrl if hasattr(self, "startUrl") else self.url
|
||||
data = self.getPage(url)
|
||||
return self.fetchUrl(url, data, self.latestSearch)
|
||||
newurl = self.fetchUrl(url, data, self.latestSearch)
|
||||
return self.link_modifier(url, newurl)
|
||||
|
|
|
@ -178,11 +178,11 @@ class Chester5000XYV(_WordPressScraper):
|
|||
adult = True
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
def prevUrlModifier(self, prev_url):
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
"""Bugfix for link to blog"""
|
||||
if prev_url == self.stripUrl % '714':
|
||||
if tourl == self.stripUrl % '714':
|
||||
return self.stripUrl % '710'
|
||||
return prev_url
|
||||
return tourl
|
||||
|
||||
|
||||
class Chisuji(_WordPressScraper):
|
||||
|
|
|
@ -24,13 +24,12 @@ class ComicGenesis(_BasicScraper):
|
|||
multipleImagesPerStrip = True
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
def prevUrlModifier(self, prev_url):
|
||||
if prev_url:
|
||||
return prev_url.replace(
|
||||
"keenspace.com", "comicgenesis.com").replace(
|
||||
"keenspot.com", "comicgenesis.com").replace(
|
||||
"toonspace.com", "comicgenesis.com").replace(
|
||||
"comicgen.com", "comicgenesis.com")
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
return tourl.replace(
|
||||
"keenspace.com", "comicgenesis.com").replace(
|
||||
"keenspot.com", "comicgenesis.com").replace(
|
||||
"toonspace.com", "comicgenesis.com").replace(
|
||||
"comicgen.com", "comicgenesis.com")
|
||||
|
||||
def __init__(self, name, sub=None, last=None, baseUrl=None):
|
||||
super(ComicGenesis, self).__init__('ComicGenesis/' + name)
|
||||
|
|
|
@ -55,8 +55,8 @@ class Stellar(_WLPComics):
|
|||
url = 'http://www.wlpcomics.com/adult/stellar/'
|
||||
adult = True
|
||||
|
||||
def prevUrlModifier(self, prev_url):
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
"""Bugfix for empty page..."""
|
||||
if prev_url == self.url + '075.html':
|
||||
if tourl == self.url + '075.html':
|
||||
return self.url + '074.html'
|
||||
return prev_url
|
||||
return tourl
|
||||
|
|
|
@ -220,7 +220,7 @@ class Scraper(object):
|
|||
# assume there is no previous URL, but print a warning
|
||||
out.warn(u"%s Assuming no previous comic strips exist." % msg)
|
||||
else:
|
||||
prevUrl = self.prevUrlModifier(prevUrl)
|
||||
prevUrl = self.link_modifier(url, prevUrl)
|
||||
out.debug(u"Found previous URL %s" % prevUrl)
|
||||
getHandler().comicPageLink(self, url, prevUrl)
|
||||
return prevUrl
|
||||
|
@ -237,12 +237,12 @@ class Scraper(object):
|
|||
"""Return filename for given image and page URL."""
|
||||
return None
|
||||
|
||||
def prevUrlModifier(self, prev_url):
|
||||
"""Optional modification of parsed previous URLs. Useful if
|
||||
there are domain redirects. The default implementation does
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||
Useful if there are domain redirects. The default implementation does
|
||||
not modify the URL.
|
||||
"""
|
||||
return prev_url
|
||||
return tourl
|
||||
|
||||
def imageUrlModifier(self, image_url, data):
|
||||
"""Optional modification of parsed image URLs. Useful if the URL
|
||||
|
|
Loading…
Reference in a new issue