Add comic "turnoff" (closes #139)
This commit is contained in:
parent
f1fb85a744
commit
6fd3282047
3 changed files with 34 additions and 1 deletions
|
@ -5,7 +5,11 @@
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape
|
from re import compile, escape, MULTILINE
|
||||||
|
try:
|
||||||
|
from functools import cached_property
|
||||||
|
except ImportError:
|
||||||
|
from cached_property import cached_property
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, xpath_class
|
from ..helpers import indirectStarter, xpath_class
|
||||||
|
@ -190,6 +194,33 @@ class TumbleDryComics(_WordPressScraper):
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
class Turnoff(_ParserScraper):
|
||||||
|
name = 'turnoff'
|
||||||
|
url = 'https://turnoff.us/'
|
||||||
|
imageSearch = '//article[%s]//img' % xpath_class('post-content')
|
||||||
|
prevSearch = '//div[%s]//a' % xpath_class('prev')
|
||||||
|
stripUrl = url + 'geek/%s'
|
||||||
|
firstStripUrl = stripUrl % 'tcp-buddies'
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def comics_order(self):
|
||||||
|
# Neither the images nor the pages contain information about dates or indices.
|
||||||
|
# However we can extract the order of the images from the JavaScript.
|
||||||
|
html = self.session.get(self.url).text
|
||||||
|
list_regex = compile(r"""^\s*"/geek/(.*)",\s*$""", flags=MULTILINE)
|
||||||
|
return list(reversed(list_regex.findall(html)))
|
||||||
|
|
||||||
|
def namer(self, image_url, page_url):
|
||||||
|
comic_name = page_url.split('/')[-1]
|
||||||
|
try:
|
||||||
|
index = self.comics_order.index(comic_name) + 1
|
||||||
|
except ValueError:
|
||||||
|
index = len(self.comics_order)
|
||||||
|
file_name = image_url.split('/')[-1]
|
||||||
|
return "%03d-%s" % (index, file_name)
|
||||||
|
|
||||||
|
|
||||||
class TwoGuysAndGuy(_BasicScraper):
|
class TwoGuysAndGuy(_BasicScraper):
|
||||||
url = 'http://www.twogag.com/'
|
url = 'http://www.twogag.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
|
|
@ -4,3 +4,4 @@ requests>=2.0
|
||||||
six
|
six
|
||||||
backports.shutil_get_terminal_size; python_version<'3.3'
|
backports.shutil_get_terminal_size; python_version<'3.3'
|
||||||
backports.functools_lru_cache; python_version<'3.2'
|
backports.functools_lru_cache; python_version<'3.2'
|
||||||
|
cached_property; python_version<'3.8'
|
||||||
|
|
|
@ -40,6 +40,7 @@ install_requires =
|
||||||
six
|
six
|
||||||
backports.shutil_get_terminal_size; python_version<'3.3'
|
backports.shutil_get_terminal_size; python_version<'3.3'
|
||||||
backports.functools_lru_cache; python_version<'3.2'
|
backports.functools_lru_cache; python_version<'3.2'
|
||||||
|
cached_property; python_version<'3.8'
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
|
|
Loading…
Reference in a new issue