From 0bcfb8a82e0257b63d7432e9380946270af63a05 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 4 Apr 2016 00:12:53 +0200 Subject: [PATCH] Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle --- dosagelib/plugins/b.py | 6 ++++- dosagelib/plugins/comiccontrol.py | 29 ----------------------- dosagelib/plugins/common.py | 5 ++++ dosagelib/plugins/g.py | 5 ++++ dosagelib/plugins/k.py | 7 ++++++ dosagelib/plugins/l.py | 6 ++++- dosagelib/plugins/m.py | 6 ++++- dosagelib/plugins/n.py | 14 ++---------- dosagelib/plugins/p.py | 6 ++++- dosagelib/plugins/s.py | 38 +++++++++++-------------------- 10 files changed, 52 insertions(+), 70 deletions(-) delete mode 100644 dosagelib/plugins/comiccontrol.py diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index b45c36db5..996c264a9 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -10,7 +10,7 @@ from re import compile, escape from ..util import tagre, getPageContent from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter -from .common import _WordPressScraper, _ComicPressScraper +from .common import _ComicControlScraper, _ComicPressScraper, _WordPressScraper class BackwaterPlanet(_BasicScraper): @@ -40,6 +40,10 @@ class BadMachinery(_BasicScraper): help = 'Index format: yyyymmdd' +class BalderDash(_ComicControlScraper): + url = 'http://www.balderdashcomic.com/' + + class Bardsworth(_WordPressScraper): url = 'http://www.bardsworth.com/' starter = indirectStarter('http://www.bardsworth.com/', diff --git a/dosagelib/plugins/comiccontrol.py b/dosagelib/plugins/comiccontrol.py deleted file mode 100644 index e5185744a..000000000 --- a/dosagelib/plugins/comiccontrol.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -from dosagelib.helpers import indirectStarter -from ..scraper import make_scraper, _ParserScraper - - -def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None): - attrs = dict( - name=name, - url=url, - imageSearch=['//div[@id="cc-comicbody"]//img'], - prevSearch=['//a[@rel="prev"]'] - ) - if lang: - attrs['lang'] = lang - if firstUrl: - attrs['firstUrl'] = url + firstUrl - if starter: - attrs['starter'] = starter - if textSearch: - attrs['textSearch'] = textSearch - globals()[name] = make_scraper(name, _ParserScraper, **attrs) - - -add('GoGetARoomie', 'http://www.gogetaroomie.com') -add('KiwiBlitz', 'http://www.kiwiblitz.com') -add('LetsSpeakEnglish', 'http://www.marycagle.com') -add('Metacarpolis', 'http://www.metacarpolis.com') -add('Spinnerette', 'http://www.spinnyverse.com') -add('StreetFighter', 'http://www.streetfightercomics.com') diff --git a/dosagelib/plugins/common.py b/dosagelib/plugins/common.py index f00cc3ab7..734543706 100644 --- a/dosagelib/plugins/common.py +++ b/dosagelib/plugins/common.py @@ -23,3 +23,8 @@ class _WordPressScraper(_ParserScraper): class _ComicPressScraper(_WordPressScraper): prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]" + + +class _ComicControlScraper(_ParserScraper): + imageSearch = '//img[@id="cc-comic"]' + prevSearch = '//a[@rel="prev"]' diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 8b2060685..4b058a787 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -9,6 +9,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre +from .common import _ComicControlScraper class Galaxion(_BasicScraper): @@ -128,6 +129,10 @@ class GoblinsComic(_ParserScraper): help = 'Index format: ddmmyyyy' +class GoGetARoomie(_ComicControlScraper): + url = 'http://www.gogetaroomie.com' + + class GoneWithTheBlastwave(_BasicScraper): url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' starter = indirectStarter( diff --git a/dosagelib/plugins/k.py b/dosagelib/plugins/k.py index c86d1dcc3..13380101d 100644 --- a/dosagelib/plugins/k.py +++ b/dosagelib/plugins/k.py @@ -4,10 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, IGNORECASE + from ..scraper import _BasicScraper from ..util import tagre from ..helpers import indirectStarter +from .common import _ComicControlScraper class KevinAndKell(_BasicScraper): @@ -47,6 +50,10 @@ class KickInTheHead(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' +class KiwiBlitz(_ComicControlScraper): + url = 'http://www.kiwiblitz.com' + + class Krakow(_BasicScraper): url = 'http://www.krakow.krakowstudios.com/' stripUrl = url + 'archive.php?date=%s' diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py index 8766cf28b..8e7d5dc0f 100644 --- a/dosagelib/plugins/l.py +++ b/dosagelib/plugins/l.py @@ -10,7 +10,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, indirectStarter from ..util import tagre -from .common import _WordPressScraper, WP_LATEST_SEARCH +from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH class Lackadaisy(_BasicScraper): @@ -70,6 +70,10 @@ class LeastICouldDo(_BasicScraper): help = 'Index format: yyyymmdd' +class LetsSpeakEnglish(_ComicControlScraper): + url = 'http://www.marycagle.com' + + class Lint(_BasicScraper): url = 'http://www.purnicellin.com/lint/' rurl = escape(url) diff --git a/dosagelib/plugins/m.py b/dosagelib/plugins/m.py index 019091087..24a0b5ac9 100755 --- a/dosagelib/plugins/m.py +++ b/dosagelib/plugins/m.py @@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre -from .common import _WordPressScraper +from .common import _ComicControlScraper, _WordPressScraper class MacHall(_BasicScraper): @@ -128,6 +128,10 @@ class MenageA3(_BasicScraper): help = 'Index format: name' +class Metacarpolis(_ComicControlScraper): + url = 'http://www.metacarpolis.com' + + class Misfile(_BasicScraper): url = 'http://www.misfile.com/' stripUrl = url + '?date=%s' diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 9eeea81b2..325ee9f26 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -10,21 +10,11 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre -from .common import _WordPressScraper, WP_LATEST_SEARCH +from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH -class Namesake(_BasicScraper): +class Namesake(_ComicControlScraper): url = 'http://namesakecomic.com/' - stripUrl = url + 'comic/%s/' - firstStripUrl = stripUrl % 'prologue-cover-3' - imageSearch = compile(tagre("img", "src", r'([^"]*/wp-content/uploads/[^"]+)', after='title=')) - prevSearch = compile(tagre("a", "href", r'([^"]*/comic/[^"]+)', after='navi-prev')) - help = 'Index format: name' - - @classmethod - def namer(cls, imageUrl, pageUrl): - imgmatch = compile(r'uploads/(\d+)/(\d+)/(.+)$').search(imageUrl) - return '-'.join(imgmatch.groups()) class NamirDeiter(_BasicScraper): diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index f617b3025..0b3eb38fa 100755 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -8,7 +8,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, queryNamer, indirectStarter from ..util import tagre -from .common import _WordPressScraper +from .common import _ComicControlScraper, _WordPressScraper class PandyLand(_WordPressScraper): @@ -148,6 +148,10 @@ class PHDComics(_BasicScraper): ) +class Picklewhistle(_ComicControlScraper): + url = 'http://www.picklewhistle.com/' + + class PicPakDog(_BasicScraper): url = 'http://www.picpak.net/' rurl = escape(url) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 9a4d619d4..7d6718f41 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -4,13 +4,15 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, IGNORECASE, sub from os.path import splitext -from datetime import datetime +import datetime + from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, bounceStarter from ..util import tagre, getPageContent -from .common import _WordPressScraper +from .common import _ComicControlScraper, _WordPressScraper class SabrinaOnline(_BasicScraper): @@ -199,7 +201,6 @@ class ShermansLagoon(_BasicScraper): def namer(cls, imageUrl, pageUrl): name = pageUrl.rsplit('/', 3)[2] if name == "shermanslagoon.com": - import datetime name = datetime.date.today().strftime("%B-%d-%Y").lower() # name is monthname-day-year month, day, year = name.split('-') @@ -260,29 +261,8 @@ class SkinDeep(_BasicScraper): help = 'Index format: custom' -class SleeplessDomain(_ParserScraper): +class SleeplessDomain(_ComicControlScraper): url = 'http://www.sleeplessdomain.com/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'chapter-1-cover' - css = True - imageSearch = 'img#cc-comic' - prevSearch = 'div.nav a.prev' - starter = bounceStarter(url, 'div.nav a.next') - help = 'Index format: chapter-X-page-Y (unpadded)' - - @classmethod - def namer(cls, imageUrl, pageUrl): - """Image file name is UNIX time stamp & something for most of the comics...""" - start = '' - tsmatch = compile(r'/(\d+)-').search(imageUrl) - if tsmatch: - start = datetime.utcfromtimestamp( - int(tsmatch.group(1))).strftime("%Y-%m-%d") - else: - # There were only chapter 1, page 4 and 5 not matching when writing - # this... - start = '2015-04-11x' - return start + "-" + pageUrl.rsplit('/', 1)[-1] class SlightlyDamned(_WordPressScraper): @@ -463,6 +443,10 @@ class SpareParts(_BasicScraper): help = 'Index format: yyyymmdd' +class Spinnerette(_ComicControlScraper): + url = 'http://www.spinnyverse.com' + + class SPQRBlues(_WordPressScraper): url = 'http://spqrblues.com/IV/' @@ -528,6 +512,10 @@ class StrawberryDeathCake(_BasicScraper): after="previous")) +class StreetFighter(_ComicControlScraper): + url = 'http://www.streetfightercomics.com' + + class StrongFemaleProtagonist(_ParserScraper): url = 'http://strongfemaleprotagonist.com/' stripUrl = url + '%s/'