Add comic "turnoff" (closes #139)

This commit is contained in:
Jakob Kogler 2019-07-14 08:06:31 +02:00 committed by Tobias Gruetzmacher
parent f1fb85a744
commit 6fd3282047
3 changed files with 34 additions and 1 deletions

View file

@ -5,7 +5,11 @@
from __future__ import absolute_import, division, print_function
from re import compile, escape
from re import compile, escape, MULTILINE
try:
from functools import cached_property
except ImportError:
from cached_property import cached_property
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class
@ -190,6 +194,33 @@ class TumbleDryComics(_WordPressScraper):
return filename
class Turnoff(_ParserScraper):
name = 'turnoff'
url = 'https://turnoff.us/'
imageSearch = '//article[%s]//img' % xpath_class('post-content')
prevSearch = '//div[%s]//a' % xpath_class('prev')
stripUrl = url + 'geek/%s'
firstStripUrl = stripUrl % 'tcp-buddies'
multipleImagesPerStrip = True
@cached_property
def comics_order(self):
# Neither the images nor the pages contain information about dates or indices.
# However we can extract the order of the images from the JavaScript.
html = self.session.get(self.url).text
list_regex = compile(r"""^\s*"/geek/(.*)",\s*$""", flags=MULTILINE)
return list(reversed(list_regex.findall(html)))
def namer(self, image_url, page_url):
comic_name = page_url.split('/')[-1]
try:
index = self.comics_order.index(comic_name) + 1
except ValueError:
index = len(self.comics_order)
file_name = image_url.split('/')[-1]
return "%03d-%s" % (index, file_name)
class TwoGuysAndGuy(_BasicScraper):
url = 'http://www.twogag.com/'
rurl = escape(url)

View file

@ -4,3 +4,4 @@ requests>=2.0
six
backports.shutil_get_terminal_size; python_version<'3.3'
backports.functools_lru_cache; python_version<'3.2'
cached_property; python_version<'3.8'

View file

@ -40,6 +40,7 @@ install_requires =
six
backports.shutil_get_terminal_size; python_version<'3.3'
backports.functools_lru_cache; python_version<'3.2'
cached_property; python_version<'3.8'
[options.entry_points]
console_scripts =