2016-04-01 22:14:31 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2016-10-28 22:21:41 +00:00
|
|
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
2016-04-01 22:14:31 +00:00
|
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
2017-02-13 21:41:17 +00:00
|
|
|
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
2016-04-01 22:14:31 +00:00
|
|
|
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
|
|
|
|
from ..scraper import _ParserScraper
|
2017-02-13 21:41:17 +00:00
|
|
|
from ..helpers import indirectStarter, xpath_class
|
2016-04-01 22:14:31 +00:00
|
|
|
|
|
|
|
# Common base classes for comics with the same structure (same hosting
|
|
|
|
# software, for example) go here. Since those are shared by many modules,
|
|
|
|
# please don't use lists of expression, as that makes it hard to track which
|
|
|
|
# expression is for which comics.
|
|
|
|
|
2016-04-10 21:04:34 +00:00
|
|
|
|
2016-04-01 22:14:31 +00:00
|
|
|
class _WordPressScraper(_ParserScraper):
|
|
|
|
imageSearch = '//div[@id="comic"]//img'
|
2017-05-21 23:17:05 +00:00
|
|
|
prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
|
2019-12-03 18:50:41 +00:00
|
|
|
nextSearch = '//a[%s]' % xpath_class('comic-nav-next')
|
2019-07-18 08:30:21 +00:00
|
|
|
latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
|
2017-05-21 23:17:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
class _WPNavi(_WordPressScraper):
|
|
|
|
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
2016-04-01 22:14:31 +00:00
|
|
|
|
|
|
|
|
2016-05-01 23:25:34 +00:00
|
|
|
class _WPNaviIn(_WordPressScraper):
|
|
|
|
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
|
|
|
|
|
|
|
|
2019-10-18 07:49:55 +00:00
|
|
|
class _WPWebcomic(_WordPressScraper):
|
|
|
|
imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image'))
|
|
|
|
prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
|
|
|
|
nextSearch = '///a[{}]'.format(xpath_class('next-webcomic-link'))
|
|
|
|
latestSearch = '//a[{}]'.format(xpath_class('last-webcomic-link'))
|
|
|
|
|
|
|
|
|
2016-04-03 22:12:53 +00:00
|
|
|
class _ComicControlScraper(_ParserScraper):
|
|
|
|
imageSearch = '//img[@id="cc-comic"]'
|
|
|
|
prevSearch = '//a[@rel="prev"]'
|
2019-06-21 07:54:45 +00:00
|
|
|
nextSearch = '//a[@rel="next"]'
|
|
|
|
latestSearch = '//a[@rel="last"]'
|
2016-05-06 23:50:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
class _TumblrScraper(_ParserScraper):
|
|
|
|
starter = indirectStarter
|
|
|
|
|
|
|
|
def namer(self, image_url, page_url):
|
|
|
|
# tumblr URLs: http://host/post/num/name
|
|
|
|
# 0 1 2 3 4 5
|
|
|
|
parts = page_url.split('/')
|
|
|
|
if len(parts) > 5:
|
|
|
|
return '%s_%s' % (parts[4], parts[5])
|
|
|
|
else:
|
|
|
|
return parts[4]
|
|
|
|
|
|
|
|
def shouldSkipUrl(self, url, data):
|
|
|
|
"""Reblogged stuff is iframed"""
|
|
|
|
return data.xpath('//div[@id="post"]//iframe')
|