From 6fd328204758483dbaf12e868d2192a2b96e05c2 Mon Sep 17 00:00:00 2001 From: Jakob Kogler Date: Sun, 14 Jul 2019 08:06:31 +0200 Subject: [PATCH] Add comic "turnoff" (closes #139) --- dosagelib/plugins/t.py | 33 ++++++++++++++++++++++++++++++++- requirements.txt | 1 + setup.cfg | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index e5e85193c..1a3b3abab 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -5,7 +5,11 @@ from __future__ import absolute_import, division, print_function -from re import compile, escape +from re import compile, escape, MULTILINE +try: + from functools import cached_property +except ImportError: + from cached_property import cached_property from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, xpath_class @@ -190,6 +194,33 @@ class TumbleDryComics(_WordPressScraper): return filename +class Turnoff(_ParserScraper): + name = 'turnoff' + url = 'https://turnoff.us/' + imageSearch = '//article[%s]//img' % xpath_class('post-content') + prevSearch = '//div[%s]//a' % xpath_class('prev') + stripUrl = url + 'geek/%s' + firstStripUrl = stripUrl % 'tcp-buddies' + multipleImagesPerStrip = True + + @cached_property + def comics_order(self): + # Neither the images nor the pages contain information about dates or indices. + # However we can extract the order of the images from the JavaScript. + html = self.session.get(self.url).text + list_regex = compile(r"""^\s*"/geek/(.*)",\s*$""", flags=MULTILINE) + return list(reversed(list_regex.findall(html))) + + def namer(self, image_url, page_url): + comic_name = page_url.split('/')[-1] + try: + index = self.comics_order.index(comic_name) + 1 + except ValueError: + index = len(self.comics_order) + file_name = image_url.split('/')[-1] + return "%03d-%s" % (index, file_name) + + class TwoGuysAndGuy(_BasicScraper): url = 'http://www.twogag.com/' rurl = escape(url) diff --git a/requirements.txt b/requirements.txt index 2f36880e2..af3012a41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ requests>=2.0 six backports.shutil_get_terminal_size; python_version<'3.3' backports.functools_lru_cache; python_version<'3.2' +cached_property; python_version<'3.8' diff --git a/setup.cfg b/setup.cfg index 66f97ace0..2841dea4a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ install_requires = six backports.shutil_get_terminal_size; python_version<'3.3' backports.functools_lru_cache; python_version<'3.2' + cached_property; python_version<'3.8' [options.entry_points] console_scripts =