Move ComicControl into common module.
- Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle
This commit is contained in:
parent
0d453a6858
commit
0bcfb8a82e
10 changed files with 52 additions and 70 deletions
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..util import tagre, getPageContent
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _WordPressScraper, _ComicPressScraper
|
||||
from .common import _ComicControlScraper, _ComicPressScraper, _WordPressScraper
|
||||
|
||||
|
||||
class BackwaterPlanet(_BasicScraper):
|
||||
|
@ -40,6 +40,10 @@ class BadMachinery(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class BalderDash(_ComicControlScraper):
|
||||
url = 'http://www.balderdashcomic.com/'
|
||||
|
||||
|
||||
class Bardsworth(_WordPressScraper):
|
||||
url = 'http://www.bardsworth.com/'
|
||||
starter = indirectStarter('http://www.bardsworth.com/',
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from dosagelib.helpers import indirectStarter
|
||||
from ..scraper import make_scraper, _ParserScraper
|
||||
|
||||
|
||||
def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None):
|
||||
attrs = dict(
|
||||
name=name,
|
||||
url=url,
|
||||
imageSearch=['//div[@id="cc-comicbody"]//img'],
|
||||
prevSearch=['//a[@rel="prev"]']
|
||||
)
|
||||
if lang:
|
||||
attrs['lang'] = lang
|
||||
if firstUrl:
|
||||
attrs['firstUrl'] = url + firstUrl
|
||||
if starter:
|
||||
attrs['starter'] = starter
|
||||
if textSearch:
|
||||
attrs['textSearch'] = textSearch
|
||||
globals()[name] = make_scraper(name, _ParserScraper, **attrs)
|
||||
|
||||
|
||||
add('GoGetARoomie', 'http://www.gogetaroomie.com')
|
||||
add('KiwiBlitz', 'http://www.kiwiblitz.com')
|
||||
add('LetsSpeakEnglish', 'http://www.marycagle.com')
|
||||
add('Metacarpolis', 'http://www.metacarpolis.com')
|
||||
add('Spinnerette', 'http://www.spinnyverse.com')
|
||||
add('StreetFighter', 'http://www.streetfightercomics.com')
|
|
@ -23,3 +23,8 @@ class _WordPressScraper(_ParserScraper):
|
|||
|
||||
class _ComicPressScraper(_WordPressScraper):
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"
|
||||
|
||||
|
||||
class _ComicControlScraper(_ParserScraper):
|
||||
imageSearch = '//img[@id="cc-comic"]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
|
|
@ -9,6 +9,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper
|
||||
|
||||
|
||||
class Galaxion(_BasicScraper):
|
||||
|
@ -128,6 +129,10 @@ class GoblinsComic(_ParserScraper):
|
|||
help = 'Index format: ddmmyyyy'
|
||||
|
||||
|
||||
class GoGetARoomie(_ComicControlScraper):
|
||||
url = 'http://www.gogetaroomie.com'
|
||||
|
||||
|
||||
class GoneWithTheBlastwave(_BasicScraper):
|
||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
||||
starter = indirectStarter(
|
||||
|
|
|
@ -4,10 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _ComicControlScraper
|
||||
|
||||
|
||||
class KevinAndKell(_BasicScraper):
|
||||
|
@ -47,6 +50,10 @@ class KickInTheHead(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class KiwiBlitz(_ComicControlScraper):
|
||||
url = 'http://www.kiwiblitz.com'
|
||||
|
||||
|
||||
class Krakow(_BasicScraper):
|
||||
url = 'http://www.krakow.krakowstudios.com/'
|
||||
stripUrl = url + 'archive.php?date=%s'
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
|
||||
|
||||
|
||||
class Lackadaisy(_BasicScraper):
|
||||
|
@ -70,6 +70,10 @@ class LeastICouldDo(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class LetsSpeakEnglish(_ComicControlScraper):
|
||||
url = 'http://www.marycagle.com'
|
||||
|
||||
|
||||
class Lint(_BasicScraper):
|
||||
url = 'http://www.purnicellin.com/lint/'
|
||||
rurl = escape(url)
|
||||
|
|
|
@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE
|
|||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper
|
||||
|
||||
|
||||
class MacHall(_BasicScraper):
|
||||
|
@ -128,6 +128,10 @@ class MenageA3(_BasicScraper):
|
|||
help = 'Index format: name'
|
||||
|
||||
|
||||
class Metacarpolis(_ComicControlScraper):
|
||||
url = 'http://www.metacarpolis.com'
|
||||
|
||||
|
||||
class Misfile(_BasicScraper):
|
||||
url = 'http://www.misfile.com/'
|
||||
stripUrl = url + '?date=%s'
|
||||
|
|
|
@ -10,21 +10,11 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
|
||||
|
||||
|
||||
class Namesake(_BasicScraper):
|
||||
class Namesake(_ComicControlScraper):
|
||||
url = 'http://namesakecomic.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'prologue-cover-3'
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*/wp-content/uploads/[^"]+)', after='title='))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]*/comic/[^"]+)', after='navi-prev'))
|
||||
help = 'Index format: name'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
imgmatch = compile(r'uploads/(\d+)/(\d+)/(.+)$').search(imageUrl)
|
||||
return '-'.join(imgmatch.groups())
|
||||
|
||||
|
||||
class NamirDeiter(_BasicScraper):
|
||||
|
|
|
@ -8,7 +8,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper
|
||||
|
||||
|
||||
class PandyLand(_WordPressScraper):
|
||||
|
@ -148,6 +148,10 @@ class PHDComics(_BasicScraper):
|
|||
)
|
||||
|
||||
|
||||
class Picklewhistle(_ComicControlScraper):
|
||||
url = 'http://www.picklewhistle.com/'
|
||||
|
||||
|
||||
class PicPakDog(_BasicScraper):
|
||||
url = 'http://www.picpak.net/'
|
||||
rurl = escape(url)
|
||||
|
|
|
@ -4,13 +4,15 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape, IGNORECASE, sub
|
||||
from os.path import splitext
|
||||
from datetime import datetime
|
||||
import datetime
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter
|
||||
from ..util import tagre, getPageContent
|
||||
from .common import _WordPressScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper
|
||||
|
||||
|
||||
class SabrinaOnline(_BasicScraper):
|
||||
|
@ -199,7 +201,6 @@ class ShermansLagoon(_BasicScraper):
|
|||
def namer(cls, imageUrl, pageUrl):
|
||||
name = pageUrl.rsplit('/', 3)[2]
|
||||
if name == "shermanslagoon.com":
|
||||
import datetime
|
||||
name = datetime.date.today().strftime("%B-%d-%Y").lower()
|
||||
# name is monthname-day-year
|
||||
month, day, year = name.split('-')
|
||||
|
@ -260,29 +261,8 @@ class SkinDeep(_BasicScraper):
|
|||
help = 'Index format: custom'
|
||||
|
||||
|
||||
class SleeplessDomain(_ParserScraper):
|
||||
class SleeplessDomain(_ComicControlScraper):
|
||||
url = 'http://www.sleeplessdomain.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % 'chapter-1-cover'
|
||||
css = True
|
||||
imageSearch = 'img#cc-comic'
|
||||
prevSearch = 'div.nav a.prev'
|
||||
starter = bounceStarter(url, 'div.nav a.next')
|
||||
help = 'Index format: chapter-X-page-Y (unpadded)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
"""Image file name is UNIX time stamp & something for most of the comics..."""
|
||||
start = ''
|
||||
tsmatch = compile(r'/(\d+)-').search(imageUrl)
|
||||
if tsmatch:
|
||||
start = datetime.utcfromtimestamp(
|
||||
int(tsmatch.group(1))).strftime("%Y-%m-%d")
|
||||
else:
|
||||
# There were only chapter 1, page 4 and 5 not matching when writing
|
||||
# this...
|
||||
start = '2015-04-11x'
|
||||
return start + "-" + pageUrl.rsplit('/', 1)[-1]
|
||||
|
||||
|
||||
class SlightlyDamned(_WordPressScraper):
|
||||
|
@ -463,6 +443,10 @@ class SpareParts(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Spinnerette(_ComicControlScraper):
|
||||
url = 'http://www.spinnyverse.com'
|
||||
|
||||
|
||||
class SPQRBlues(_WordPressScraper):
|
||||
url = 'http://spqrblues.com/IV/'
|
||||
|
||||
|
@ -528,6 +512,10 @@ class StrawberryDeathCake(_BasicScraper):
|
|||
after="previous"))
|
||||
|
||||
|
||||
class StreetFighter(_ComicControlScraper):
|
||||
url = 'http://www.streetfightercomics.com'
|
||||
|
||||
|
||||
class StrongFemaleProtagonist(_ParserScraper):
|
||||
url = 'http://strongfemaleprotagonist.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
|
Loading…
Reference in a new issue