Move ComicControl into common module.

- Move all comics using ComicControl into alphabetical files.
- Add BalderDash & Picklewhistle
This commit is contained in:
Tobias Gruetzmacher 2016-04-04 00:12:53 +02:00
parent 0d453a6858
commit 0bcfb8a82e
10 changed files with 52 additions and 70 deletions

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..util import tagre, getPageContent
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import _WordPressScraper, _ComicPressScraper
from .common import _ComicControlScraper, _ComicPressScraper, _WordPressScraper
class BackwaterPlanet(_BasicScraper):
@ -40,6 +40,10 @@ class BadMachinery(_BasicScraper):
help = 'Index format: yyyymmdd'
class BalderDash(_ComicControlScraper):
url = 'http://www.balderdashcomic.com/'
class Bardsworth(_WordPressScraper):
url = 'http://www.bardsworth.com/'
starter = indirectStarter('http://www.bardsworth.com/',

View file

@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
from dosagelib.helpers import indirectStarter
from ..scraper import make_scraper, _ParserScraper
def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None):
attrs = dict(
name=name,
url=url,
imageSearch=['//div[@id="cc-comicbody"]//img'],
prevSearch=['//a[@rel="prev"]']
)
if lang:
attrs['lang'] = lang
if firstUrl:
attrs['firstUrl'] = url + firstUrl
if starter:
attrs['starter'] = starter
if textSearch:
attrs['textSearch'] = textSearch
globals()[name] = make_scraper(name, _ParserScraper, **attrs)
add('GoGetARoomie', 'http://www.gogetaroomie.com')
add('KiwiBlitz', 'http://www.kiwiblitz.com')
add('LetsSpeakEnglish', 'http://www.marycagle.com')
add('Metacarpolis', 'http://www.metacarpolis.com')
add('Spinnerette', 'http://www.spinnyverse.com')
add('StreetFighter', 'http://www.streetfightercomics.com')

View file

@ -23,3 +23,8 @@ class _WordPressScraper(_ParserScraper):
class _ComicPressScraper(_WordPressScraper):
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"
class _ComicControlScraper(_ParserScraper):
imageSearch = '//img[@id="cc-comic"]'
prevSearch = '//a[@rel="prev"]'

View file

@ -9,6 +9,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _ComicControlScraper
class Galaxion(_BasicScraper):
@ -128,6 +129,10 @@ class GoblinsComic(_ParserScraper):
help = 'Index format: ddmmyyyy'
class GoGetARoomie(_ComicControlScraper):
url = 'http://www.gogetaroomie.com'
class GoneWithTheBlastwave(_BasicScraper):
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
starter = indirectStarter(

View file

@ -4,10 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import indirectStarter
from .common import _ComicControlScraper
class KevinAndKell(_BasicScraper):
@ -47,6 +50,10 @@ class KickInTheHead(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripname'
class KiwiBlitz(_ComicControlScraper):
url = 'http://www.kiwiblitz.com'
class Krakow(_BasicScraper):
url = 'http://www.krakow.krakowstudios.com/'
stripUrl = url + 'archive.php?date=%s'

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .common import _WordPressScraper, WP_LATEST_SEARCH
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
class Lackadaisy(_BasicScraper):
@ -70,6 +70,10 @@ class LeastICouldDo(_BasicScraper):
help = 'Index format: yyyymmdd'
class LetsSpeakEnglish(_ComicControlScraper):
url = 'http://www.marycagle.com'
class Lint(_BasicScraper):
url = 'http://www.purnicellin.com/lint/'
rurl = escape(url)

View file

@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from .common import _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper
class MacHall(_BasicScraper):
@ -128,6 +128,10 @@ class MenageA3(_BasicScraper):
help = 'Index format: name'
class Metacarpolis(_ComicControlScraper):
url = 'http://www.metacarpolis.com'
class Misfile(_BasicScraper):
url = 'http://www.misfile.com/'
stripUrl = url + '?date=%s'

View file

@ -10,21 +10,11 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper, WP_LATEST_SEARCH
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
class Namesake(_BasicScraper):
class Namesake(_ComicControlScraper):
url = 'http://namesakecomic.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'prologue-cover-3'
imageSearch = compile(tagre("img", "src", r'([^"]*/wp-content/uploads/[^"]+)', after='title='))
prevSearch = compile(tagre("a", "href", r'([^"]*/comic/[^"]+)', after='navi-prev'))
help = 'Index format: name'
@classmethod
def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'uploads/(\d+)/(\d+)/(.+)$').search(imageUrl)
return '-'.join(imgmatch.groups())
class NamirDeiter(_BasicScraper):

View file

@ -8,7 +8,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre
from .common import _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper
class PandyLand(_WordPressScraper):
@ -148,6 +148,10 @@ class PHDComics(_BasicScraper):
)
class Picklewhistle(_ComicControlScraper):
url = 'http://www.picklewhistle.com/'
class PicPakDog(_BasicScraper):
url = 'http://www.picpak.net/'
rurl = escape(url)

View file

@ -4,13 +4,15 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE, sub
from os.path import splitext
from datetime import datetime
import datetime
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre, getPageContent
from .common import _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper
class SabrinaOnline(_BasicScraper):
@ -199,7 +201,6 @@ class ShermansLagoon(_BasicScraper):
def namer(cls, imageUrl, pageUrl):
name = pageUrl.rsplit('/', 3)[2]
if name == "shermanslagoon.com":
import datetime
name = datetime.date.today().strftime("%B-%d-%Y").lower()
# name is monthname-day-year
month, day, year = name.split('-')
@ -260,29 +261,8 @@ class SkinDeep(_BasicScraper):
help = 'Index format: custom'
class SleeplessDomain(_ParserScraper):
class SleeplessDomain(_ComicControlScraper):
url = 'http://www.sleeplessdomain.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'chapter-1-cover'
css = True
imageSearch = 'img#cc-comic'
prevSearch = 'div.nav a.prev'
starter = bounceStarter(url, 'div.nav a.next')
help = 'Index format: chapter-X-page-Y (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Image file name is UNIX time stamp & something for most of the comics..."""
start = ''
tsmatch = compile(r'/(\d+)-').search(imageUrl)
if tsmatch:
start = datetime.utcfromtimestamp(
int(tsmatch.group(1))).strftime("%Y-%m-%d")
else:
# There were only chapter 1, page 4 and 5 not matching when writing
# this...
start = '2015-04-11x'
return start + "-" + pageUrl.rsplit('/', 1)[-1]
class SlightlyDamned(_WordPressScraper):
@ -463,6 +443,10 @@ class SpareParts(_BasicScraper):
help = 'Index format: yyyymmdd'
class Spinnerette(_ComicControlScraper):
url = 'http://www.spinnyverse.com'
class SPQRBlues(_WordPressScraper):
url = 'http://spqrblues.com/IV/'
@ -528,6 +512,10 @@ class StrawberryDeathCake(_BasicScraper):
after="previous"))
class StreetFighter(_ComicControlScraper):
url = 'http://www.streetfightercomics.com'
class StrongFemaleProtagonist(_ParserScraper):
url = 'http://strongfemaleprotagonist.com/'
stripUrl = url + '%s/'