Implement Oglaf's strange navigation (fixes #33)

(also should fix wummel#91)
This commit is contained in:
Tobias Gruetzmacher 2016-05-21 02:38:07 +02:00
parent 51008a975b
commit 4630ea047c

View file

@ -22,19 +22,32 @@ class OctopusPie(_ParserScraper):
help = 'Index format: yyyy-mm-dd/nnn-strip-name' help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class Oglaf(_BasicScraper): class Oglaf(_ParserScraper):
url = 'http://oglaf.com/' url = 'http://oglaf.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://media\.oglaf\.com/comic/[^"]+)', before="strip")) imageSearch = '//img[@id="strip"]'
prevSearch = ( # search for "previous story" only
# first search for "next page" URLs prevSearch = '//a[div[@id="pvs"]]'
compile(tagre("a", "href", r'(/[^"]+/\d+/)') + tagre("div", "id", "nx")), # search for "next page"
# then for "prev story" nextSearch = '//a[div[@id="nx"]]'
compile(tagre("a", "href", r'(/[^"]+)') + tagre("div", "id", "pvs?")), multipleImagesPerStrip = True
)
help = 'Index format: stripname'
adult = True adult = True
def fetchUrls(self, url, data, search):
urls = []
urls.extend(super(Oglaf, self).fetchUrls(url, data, search))
if search == self.imageSearch:
try:
nexturls = self.fetchUrls(url, data, self.nextSearch)
except ValueError:
pass
else:
while nexturls and nexturls[0].startswith(url):
data = self.getPage(nexturls[0])
urls.extend(super(Oglaf, self).fetchUrls(nexturls, data, search))
nexturls = self.fetchUrls(url, data, self.nextSearch)
return urls
class OhJoySexToy(_WordPressScraper): class OhJoySexToy(_WordPressScraper):
url = 'http://www.ohjoysextoy.com/' url = 'http://www.ohjoysextoy.com/'