Move more comics to common WordPressScraper.

This commit is contained in:
Tobias Gruetzmacher 2016-04-10 23:04:34 +02:00
parent f6e605e146
commit fa98f6ddbf
16 changed files with 137 additions and 231 deletions

View file

@ -4,11 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, MULTILINE
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter
from .common import _WordPressScraper, _ComicPressScraper, WP_LATEST_SEARCH
from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
class AbstruseGoose(_BasicScraper):
@ -67,15 +69,14 @@ class Achewood(_BasicScraper):
namer = regexNamer(compile(r'date=(\d+)'))
class AfterStrife(_BasicScraper):
class AfterStrife(_WordPressScraper):
baseUrl = 'http://afterstrife.com/'
rurl = escape(baseUrl)
stripUrl = baseUrl + '?p=%s'
url = stripUrl % '262'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<img src="(%sstrips/.+?)"' % rurl)
prevSearch = compile(r'<a href="(.+?)" class="navi navi-prev"')
prevSearch = '//a[%s]' % xpath_class('navi-prev')
help = 'Index format: nnn'
endOfLife = True
class AGirlAndHerFed(_BasicScraper):
@ -99,13 +100,9 @@ class AhoiPolloi(_ParserScraper):
help = 'Index format: yyyymmdd'
class AhoyEarth(_ParserScraper):
class AhoyEarth(_WordPressScraper):
url = 'http://www.ahoyearth.com/'
stripUrl = url + '%s/'
css = True
imageSearch = '#comic-1 img'
prevSearch = '.navi-prev'
help = 'Index format: ddmmyyyy'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class AirForceBlues(_WordPressScraper):
@ -124,8 +121,9 @@ class ALessonIsLearned(_BasicScraper):
help = 'Index format: nnn'
class Alice(_ComicPressScraper):
class Alice(_WordPressScraper):
url = 'http://www.alicecomics.com/'
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
starter = indirectStarter('http://www.alicecomics.com/',
'//a[text()="Latest Alice!"]')

View file

@ -10,8 +10,8 @@ from re import compile, escape
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import (_ComicControlScraper, _ComicPressScraper,
_WordPressScraper, WP_PREV_SEARCH)
from .common import (_ComicControlScraper, _WordPressScraper, WP_PREV_SEARCH,
xpath_class)
class BackwaterPlanet(_BasicScraper):
@ -61,15 +61,10 @@ class Baroquen(_BasicScraper):
help = 'Index format: yyyy/mm/dd/strip-name'
class Bearmageddon(_BasicScraper):
class Bearmageddon(_WordPressScraper):
url = 'http://bearmageddon.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/08/01/page-1'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after='navi-prev'))
help = 'Index format: yyyy/mm/dd/stripname'
firstStripUrl = url + '2011/08/01/page-1/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class Beetlebum(_BasicScraper):
@ -334,8 +329,9 @@ class Buni(_WordPressScraper):
url = 'http://www.bunicomic.com/'
class BusinessCat(_ComicPressScraper):
class BusinessCat(_WordPressScraper):
url = 'http://www.businesscat.happyjar.com/'
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
class ButtercupFestival(_ParserScraper):

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .common import _WordPressScraper, _ComicPressScraper
from .common import _WordPressScraper
class Caggage(_BasicScraper):
@ -295,8 +295,9 @@ class CowboyJedi(_WordPressScraper):
url = 'http://www.cowboyjedi.com/'
class CraftedFables(_ComicPressScraper):
class CraftedFables(_WordPressScraper):
url = 'http://www.caf-fiends.net/comicpress/'
prevSearch = '//a[@rel="prev"]'
class CrapIDrewOnMyLunchBreak(_BasicScraper):

View file

@ -12,8 +12,15 @@ from ..scraper import _ParserScraper
# please don't use lists of expression, as that makes it hard to track which
# expression is for which comics.
WP_LATEST_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]'
WP_PREV_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-previous ")]'
def xpath_class(name):
"""Returns an XPath expressions which finds a tag which has a specified
class."""
return 'contains(concat(" ", @class, " "), " %s ")' % name
WP_LATEST_SEARCH = '//a[%s]' % xpath_class('comic-nav-last')
WP_PREV_SEARCH = '//a[%s]' % xpath_class('comic-nav-previous')
class _WordPressScraper(_ParserScraper):
@ -21,10 +28,6 @@ class _WordPressScraper(_ParserScraper):
prevSearch = WP_PREV_SEARCH
class _ComicPressScraper(_WordPressScraper):
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"
class _ComicControlScraper(_ParserScraper):
imageSearch = '//img[@id="cc-comic"]'
prevSearch = '//a[@rel="prev"]'

View file

@ -3,19 +3,25 @@
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre
from .common import _WordPressScraper, xpath_class
class DailyDose(_BasicScraper):
url = 'http://dailydoseofcomics.com/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="preview")))
starter = indirectStarter(
url, compile(tagre("a", "href",
r'(http://dailydoseofcomics\.com/[^"]+)',
after="preview")))
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="align(?:none|center)"))
imageSearch = compile(tagre("img", "src", r'([^"]+)',
before="align(?:none|center)"))
prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
help = 'Index format: stripname'
@ -30,8 +36,8 @@ class DamnLol(_BasicScraper):
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
)
help = 'Index format: stripname-number'
starter = bounceStarter(url,
compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
starter = bounceStarter(
url, compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -47,7 +53,7 @@ class Damonk(_BasicScraper):
firstStripUrl = stripUrl % '20060522'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
tagre("img", "src", r'/images/previous_day\.gif'))
tagre("img", "src", r'/images/previous_day\.gif'))
help = 'Index format: yyyymmdd'
@ -64,19 +70,15 @@ class DarthsAndDroids(_BasicScraper):
url = 'http://www.darthsanddroids.net/'
stripUrl = url + 'episodes/%s.html'
firstStripUrl = stripUrl % '0001'
prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') + '&lt;PREVIOUS' )
prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') +
'&lt;PREVIOUS')
imageSearch = compile(tagre("img", "src", r'(/comics/darths\d\d\d\d\.jpg)'))
class DasLebenIstKeinPonyhof(_BasicScraper):
class DasLebenIstKeinPonyhof(_WordPressScraper):
url = 'http://sarahburrini.com/wordpress/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'mein-erster-webcomic'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
firstStripUrl = url + 'comic/mein-erster-webcomic/'
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname'
lang = 'de'
@ -133,7 +135,8 @@ class DieFruehreifen(_BasicScraper):
stripUrl = url + '?id=%s&order=DESC'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[Ff]rueh_?[Ss]trip_\d+.jpg)'))
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") + tagre("img","id",r"naechster"))
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") +
tagre("img", "id", r"naechster"))
help = 'Index format: n (unpadded)'
lang = 'de'
@ -144,7 +147,7 @@ class DieselSweeties(_BasicScraper):
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') +
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
help = 'Index format: n (unpadded)'
@classmethod
@ -174,19 +177,15 @@ class DMFA(_BasicScraper):
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)') +
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
help = 'Index format: nnn (normally, some specials)'
class DoctorCat(_ParserScraper):
url = "http://doctorcatmd.com/"
stripUrl = url + "comic/%s"
firstStripUrl = stripUrl % "doctor-cat"
css = True
imageSearch = '#comic img'
prevSearch = '.navi-prev'
help = 'Index format: stripname'
class DoctorCat(_WordPressScraper):
url = 'http://doctorcatmd.com/'
firstStripUrl = url + 'comic/doctor-cat'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class DoemainOfOurOwn(_BasicScraper):
@ -221,8 +220,11 @@ class DorkTower(_BasicScraper):
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
imageSearch = compile(tagre("div", "class", "entry-content") + "\s*<p>\s*" + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, after=' alt'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
imageSearch = compile(tagre("div", "class", "entry-content") +
"\s*<p>\s*" +
tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl,
after=' alt'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
@ -250,9 +252,11 @@ class DresdenCodak(_BasicScraper):
firstStripUrl = url + '2007/02/08/pom/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("img", "src", r"%sm_prev2?\.png" % rurl, quote=""))
starter = indirectStarter(url, compile(tagre("div", "id", "preview") +
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
tagre("img", "src", r"%sm_prev2?\.png" % rurl,
quote=""))
starter = indirectStarter(
url, compile(tagre("div", "id", "preview") +
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
class DrFun(_BasicScraper):
@ -309,6 +313,7 @@ class DumbingOfAge(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: yyyy/comic/book-num/seriesname/stripname'
class DungeonsAndDenizens(_BasicScraper):
url = 'http://dungeond.com/'
stripUrl = url + r'\d+/\d+/\d+/%s/'

View file

@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE
from ..helpers import indirectStarter
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from .common import _WordPressScraper, WP_LATEST_SEARCH
from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
class EarthsongSaga(_ParserScraper):
@ -83,12 +83,10 @@ class EdibleDirt(_BasicScraper):
help = 'Index format: number'
class EdmundFinney(_ParserScraper):
class EdmundFinney(_WordPressScraper):
url = 'http://eqcomics.com/'
firstStripUrl = url + '2009/03/08/sunday-aliens/'
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="navi navi-prev"]'
help = 'Index format: yyyy/mm/dd/stripname'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class EerieCuties(_BasicScraper):
@ -174,13 +172,10 @@ class ErrantStory(_BasicScraper):
help = 'Index format: yyyy-mm-dd/num'
class Erstwhile(_ParserScraper):
class Erstwhile(_WordPressScraper):
url = 'http://www.erstwhiletales.com/'
stripUrl = url + '%s/'
css = True
imageSearch = 'div.comicpane a img'
prevSearch = 'a.navi-prev'
help = 'Index format: title-nn'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
endOfLife = True
class Eryl(_WordPressScraper):
@ -199,14 +194,10 @@ class EverybodyLovesEricRaymond(_BasicScraper):
help = 'Index format: name-of-old-comic'
class EverydayBlues(_BasicScraper):
class EverydayBlues(_WordPressScraper):
url = 'http://everydayblues.everydayblues.net/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/02/11/sometimes'
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/stripname'
firstStripUrl = url + '2010/02/11/sometimes/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class EvilDiva(_BasicScraper):
@ -242,15 +233,11 @@ class Exiern(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripname'
class ExploitationNow(_BasicScraper):
class ExploitationNow(_WordPressScraper):
url = 'http://www.exploitationnow.com/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '2000-07-07/9'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num'
firstStripUrl = url + '2000-07-07/9'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
endOfLife = True
class ExtraLife(_BasicScraper):

View file

@ -4,12 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _ComicControlScraper
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class Galaxion(_BasicScraper):
@ -150,15 +151,10 @@ class GoneWithTheBlastwave(_BasicScraper):
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
class GrrlPower(_BasicScraper):
class GrrlPower(_WordPressScraper):
url = 'http://grrlpowercomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '48'
imageSearch = compile(tagre("img", "src", r'(.*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(.*/archives/\d+)',
after="navi-prev"))
help = 'Index format: number'
firstStripUrl = url + 'archives/48'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class GUComics(_BasicScraper):

View file

@ -4,10 +4,12 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
from .common import _WordPressScraper
from .common import _WordPressScraper, xpath_class
class IAmArg(_BasicScraper):
@ -34,13 +36,11 @@ class IDreamOfAJeanieBottle(_WordPressScraper):
url = 'http://jeaniebottle.com/'
class InternetWebcomic(_BasicScraper):
class InternetWebcomic(_WordPressScraper):
url = 'http://www.internet-webcomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '30'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"/]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi navi-prev"))
prevSearch = '//a[%s]' % xpath_class('navi-prev')
help = 'Index format: n'

View file

@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import indirectStarter
from .common import _ComicControlScraper
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class KevinAndKell(_BasicScraper):
@ -37,17 +37,10 @@ class Key(_BasicScraper):
help = 'Index format: nnn'
class KickInTheHead(_BasicScraper):
class KickInTheHead(_WordPressScraper):
url = 'http://www.kickinthehead.org/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/03/20/ipod-envy'
imageSearch = compile(
tagre("img", "src",
r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
firstStripUrl = url + '2003/03/20/ipod-envy/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class KiwiBlitz(_ComicControlScraper):

View file

@ -10,7 +10,8 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
xpath_class)
class Namesake(_ComicControlScraper):
@ -108,7 +109,7 @@ class NichtLustig(_BasicScraper):
class Nicky510(_WordPressScraper):
url = 'http://www.nickyitis.com/'
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class Nimona(_BasicScraper):

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper
from .common import _WordPressScraper, xpath_class
class OctopusPie(_ParserScraper):
@ -23,17 +23,6 @@ class OctopusPie(_ParserScraper):
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class OddFish(_BasicScraper):
url = 'http://www.odd-fish.net/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'tv-tentacles'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: stripname'
class Oglaf(_BasicScraper):
url = 'http://oglaf.com/'
stripUrl = url + '%s/'
@ -48,18 +37,11 @@ class Oglaf(_BasicScraper):
adult = True
class OhJoySexToy(_BasicScraper):
class OhJoySexToy(_WordPressScraper):
url = 'http://www.ohjoysextoy.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'introduction'
imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after='navi navi-prev'))
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
tagre("img", "alt", r'([^"]+)'))
help = 'Index Format: name'
firstStripUrl = url + 'introduction/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
textSearch = '//div[@id="comic"]//img/@alt'
adult = True
@ -119,16 +101,11 @@ class OnTheFastrack(_BasicScraper):
return "%s.gif" % name.title()
class Optipess(_BasicScraper):
class Optipess(_WordPressScraper):
url = 'http://www.optipess.com/'
stripUrl = url + '%s'
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
imageSearch = compile(tagre("img", "src",
r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url))
help = 'Index format: yyyy/mm/dd/stripname'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
textSearch = '//div[@id="comic"]//img/@alt'
class OrnerBoy(_BasicScraper):

View file

@ -4,11 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class PandyLand(_WordPressScraper):
@ -40,14 +42,9 @@ class ParallelUniversum(_BasicScraper):
lang = 'de'
class PartiallyClips(_BasicScraper):
class PartiallyClips(_WordPressScraper):
url = 'http://partiallyclips.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2001/10/28/screaming-woman'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
firstStripUrl = url + 'comic/screaming-woman/'
class PastelDefender(_BasicScraper):
@ -252,25 +249,14 @@ class PS238(_ParserScraper):
help = 'Index format: yyyy-mm-dd'
class PunksAndNerds(_BasicScraper):
class PunksAndNerds(_WordPressScraper):
url = 'http://www.punksandnerds.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '15'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="navi-prev"))
prevSearch = '//a[%s]' % xpath_class('navi-prev')
help = 'Index format: nnn'
class PunksAndNerdsOld(_BasicScraper):
url = 'http://original.punksandnerds.com/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(r' src="(/comics/.+?)"')
prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">')
help = 'Index format: yyyymmdd'
class PvPonline(_BasicScraper):
url = 'http://pvponline.com/comic'
stripUrl = url + '%s'

View file

@ -12,7 +12,8 @@ import datetime
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
xpath_class)
class SabrinaOnline(_BasicScraper):
@ -36,15 +37,10 @@ class SabrinaOnline(_BasicScraper):
return archivepages[-1]
class SafelyEndangered(_BasicScraper):
class SafelyEndangered(_WordPressScraper):
url = 'http://www.safelyendangered.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'ignored'
imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*'))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads'))
help = 'Index format: yyyy/mm/stripname'
firstStripUrl = url + 'comic/ignored/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class SailorsunOrg(_WordPressScraper):
@ -209,15 +205,9 @@ class ShermansLagoon(_BasicScraper):
return "%s-%s-%s" % (year, month, day)
class Shivae(_BasicScraper):
url = 'http://shivae.net/'
rurl = escape(url)
stripUrl = url + 'blog/%s/'
firstStripUrl = stripUrl % '2007/09/21/09212007'
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Shivae(_WordPressScraper):
url = 'http://shivae.com/'
firstStripUrl = url + 'gnip/ck-chapter-01/caidenkoel-title-01/'
class Shortpacked(_ParserScraper):
@ -229,14 +219,9 @@ class Shortpacked(_ParserScraper):
help = 'Index format: nnn'
class ShotgunShuffle(_BasicScraper):
class ShotgunShuffle(_WordPressScraper):
url = 'http://shotgunshuffle.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'pilot/'
imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
help = 'Index format: stripname'
firstStripUrl = url + 'comic/pilot/'
class SinFest(_BasicScraper):
@ -362,7 +347,7 @@ class SpaceTrawler(_WordPressScraper):
base_url = 'http://spacetrawler.com/'
url = base_url + '2013/12/24/spacetrawler-379/'
firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
prevSearch = '//a[%s]' % xpath_class('navi-prev')
endOfLife = True

View file

@ -4,22 +4,19 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper, xpath_class
class Underling(_BasicScraper):
class Underling(_WordPressScraper):
url = 'http://underlingcomic.com/'
stripUrl = url
rurl = escape(url)
firstStripUrl = stripUrl + 'page-one/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after=r'class="[^"]*navi-prev'))
help = 'Index format: nnn'
firstStripUrl = url + 'page-one/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class Undertow(_BasicScraper):

View file

@ -4,11 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper
class WapsiSquare(_BasicScraper):
@ -108,15 +110,10 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n'
class Whomp(_BasicScraper):
class Whomp(_ComicControlScraper):
url = 'http://www.whompcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/06/14/06142010'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
firstStripUrl = url + 'comic/06152010'
textSearch = '//img[@id="cc-comic"]/@title'
class WhyTheLongFace(_BasicScraper):
@ -193,16 +190,13 @@ class WorldOfMrToast(_BasicScraper):
def getPrevUrl(self, url, data, baseUrl):
idx = self.prevurls.index(url)
try:
return self.prevurls[idx+1]
return self.prevurls[idx + 1]
except IndexError:
return None
class WorldOfWarcraftEh(_BasicScraper):
class WorldOfWarcraftEh(_WordPressScraper):
url = 'http://woweh.com/'
stripUrl = None
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
class WormWorldSaga(_BasicScraper):
@ -242,12 +236,3 @@ class WormWorldSagaGerman(WormWorldSaga):
class WormWorldSagaSpanish(WormWorldSaga):
lang = 'es'
class WotNow(_BasicScraper):
url = 'http://shadowburn.binmode.com/wotnow/'
stripUrl = url + 'comic.php?comic_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
help = 'Index format: n (unpadded)'

View file

@ -10,6 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter
from .common import _WordPressScraper, xpath_class
class ZapComic(_ParserScraper):
@ -38,16 +39,11 @@ class Zapiro(_BasicScraper):
return name
class ZenPencils(_BasicScraper):
class ZenPencils(_WordPressScraper):
url = 'http://zenpencils.com/'
rurl = escape(url)
multipleImagesPerStrip = True
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '1-ralph-waldo-emerson-make-them-cry'
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+/)' % rurl,
after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(http://cdn\.zenpencils\.com/wp-content/uploads/\d+[^"]+)'))
help = 'Index format: num-stripname'
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class ZombieHunters(_BasicScraper):