Move more comics to common WordPressScraper.
This commit is contained in:
parent
f6e605e146
commit
fa98f6ddbf
16 changed files with 137 additions and 231 deletions
|
@ -4,11 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape, MULTILINE
|
||||
|
||||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
||||
from .common import _WordPressScraper, _ComicPressScraper, WP_LATEST_SEARCH
|
||||
from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
|
||||
|
||||
|
||||
class AbstruseGoose(_BasicScraper):
|
||||
|
@ -67,15 +69,14 @@ class Achewood(_BasicScraper):
|
|||
namer = regexNamer(compile(r'date=(\d+)'))
|
||||
|
||||
|
||||
class AfterStrife(_BasicScraper):
|
||||
class AfterStrife(_WordPressScraper):
|
||||
baseUrl = 'http://afterstrife.com/'
|
||||
rurl = escape(baseUrl)
|
||||
stripUrl = baseUrl + '?p=%s'
|
||||
url = stripUrl % '262'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(r'<img src="(%sstrips/.+?)"' % rurl)
|
||||
prevSearch = compile(r'<a href="(.+?)" class="navi navi-prev"')
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
help = 'Index format: nnn'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class AGirlAndHerFed(_BasicScraper):
|
||||
|
@ -99,13 +100,9 @@ class AhoiPolloi(_ParserScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class AhoyEarth(_ParserScraper):
|
||||
class AhoyEarth(_WordPressScraper):
|
||||
url = 'http://www.ahoyearth.com/'
|
||||
stripUrl = url + '%s/'
|
||||
css = True
|
||||
imageSearch = '#comic-1 img'
|
||||
prevSearch = '.navi-prev'
|
||||
help = 'Index format: ddmmyyyy'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class AirForceBlues(_WordPressScraper):
|
||||
|
@ -124,8 +121,9 @@ class ALessonIsLearned(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Alice(_ComicPressScraper):
|
||||
class Alice(_WordPressScraper):
|
||||
url = 'http://www.alicecomics.com/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
||||
starter = indirectStarter('http://www.alicecomics.com/',
|
||||
'//a[text()="Latest Alice!"]')
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@ from re import compile, escape
|
|||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from .common import (_ComicControlScraper, _ComicPressScraper,
|
||||
_WordPressScraper, WP_PREV_SEARCH)
|
||||
from .common import (_ComicControlScraper, _WordPressScraper, WP_PREV_SEARCH,
|
||||
xpath_class)
|
||||
|
||||
|
||||
class BackwaterPlanet(_BasicScraper):
|
||||
|
@ -61,15 +61,10 @@ class Baroquen(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
class Bearmageddon(_BasicScraper):
|
||||
class Bearmageddon(_WordPressScraper):
|
||||
url = 'http://bearmageddon.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2011/08/01/page-1'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||
after='navi-prev'))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
firstStripUrl = url + '2011/08/01/page-1/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class Beetlebum(_BasicScraper):
|
||||
|
@ -334,8 +329,9 @@ class Buni(_WordPressScraper):
|
|||
url = 'http://www.bunicomic.com/'
|
||||
|
||||
|
||||
class BusinessCat(_ComicPressScraper):
|
||||
class BusinessCat(_WordPressScraper):
|
||||
url = 'http://www.businesscat.happyjar.com/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
||||
|
||||
|
||||
class ButtercupFestival(_ParserScraper):
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, _ComicPressScraper
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class Caggage(_BasicScraper):
|
||||
|
@ -295,8 +295,9 @@ class CowboyJedi(_WordPressScraper):
|
|||
url = 'http://www.cowboyjedi.com/'
|
||||
|
||||
|
||||
class CraftedFables(_ComicPressScraper):
|
||||
class CraftedFables(_WordPressScraper):
|
||||
url = 'http://www.caf-fiends.net/comicpress/'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
||||
|
||||
class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||
|
|
|
@ -12,8 +12,15 @@ from ..scraper import _ParserScraper
|
|||
# please don't use lists of expression, as that makes it hard to track which
|
||||
# expression is for which comics.
|
||||
|
||||
WP_LATEST_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]'
|
||||
WP_PREV_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-previous ")]'
|
||||
|
||||
def xpath_class(name):
|
||||
"""Returns an XPath expressions which finds a tag which has a specified
|
||||
class."""
|
||||
return 'contains(concat(" ", @class, " "), " %s ")' % name
|
||||
|
||||
|
||||
WP_LATEST_SEARCH = '//a[%s]' % xpath_class('comic-nav-last')
|
||||
WP_PREV_SEARCH = '//a[%s]' % xpath_class('comic-nav-previous')
|
||||
|
||||
|
||||
class _WordPressScraper(_ParserScraper):
|
||||
|
@ -21,10 +28,6 @@ class _WordPressScraper(_ParserScraper):
|
|||
prevSearch = WP_PREV_SEARCH
|
||||
|
||||
|
||||
class _ComicPressScraper(_WordPressScraper):
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"
|
||||
|
||||
|
||||
class _ComicControlScraper(_ParserScraper):
|
||||
imageSearch = '//img[@id="cc-comic"]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
|
|
@ -3,19 +3,25 @@
|
|||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class DailyDose(_BasicScraper):
|
||||
url = 'http://dailydoseofcomics.com/'
|
||||
starter = indirectStarter(url,
|
||||
compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="preview")))
|
||||
starter = indirectStarter(
|
||||
url, compile(tagre("a", "href",
|
||||
r'(http://dailydoseofcomics\.com/[^"]+)',
|
||||
after="preview")))
|
||||
stripUrl = url + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="align(?:none|center)"))
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]+)',
|
||||
before="align(?:none|center)"))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
@ -30,8 +36,8 @@ class DamnLol(_BasicScraper):
|
|||
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
||||
)
|
||||
help = 'Index format: stripname-number'
|
||||
starter = bounceStarter(url,
|
||||
compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
|
||||
starter = bounceStarter(
|
||||
url, compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -64,19 +70,15 @@ class DarthsAndDroids(_BasicScraper):
|
|||
url = 'http://www.darthsanddroids.net/'
|
||||
stripUrl = url + 'episodes/%s.html'
|
||||
firstStripUrl = stripUrl % '0001'
|
||||
prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') + '<PREVIOUS' )
|
||||
prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') +
|
||||
'<PREVIOUS')
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/darths\d\d\d\d\.jpg)'))
|
||||
|
||||
|
||||
class DasLebenIstKeinPonyhof(_BasicScraper):
|
||||
class DasLebenIstKeinPonyhof(_WordPressScraper):
|
||||
url = 'http://sarahburrini.com/wordpress/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'mein-erster-webcomic'
|
||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
firstStripUrl = url + 'comic/mein-erster-webcomic/'
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
|
||||
help = 'Index format: stripname'
|
||||
lang = 'de'
|
||||
|
||||
|
||||
|
@ -133,7 +135,8 @@ class DieFruehreifen(_BasicScraper):
|
|||
stripUrl = url + '?id=%s&order=DESC'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[Ff]rueh_?[Ss]trip_\d+.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") + tagre("img","id",r"naechster"))
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") +
|
||||
tagre("img", "id", r"naechster"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
lang = 'de'
|
||||
|
||||
|
@ -174,19 +177,15 @@ class DMFA(_BasicScraper):
|
|||
firstStripUrl = stripUrl % '001'
|
||||
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
|
||||
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)') +
|
||||
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
|
||||
help = 'Index format: nnn (normally, some specials)'
|
||||
|
||||
|
||||
class DoctorCat(_ParserScraper):
|
||||
url = "http://doctorcatmd.com/"
|
||||
stripUrl = url + "comic/%s"
|
||||
firstStripUrl = stripUrl % "doctor-cat"
|
||||
css = True
|
||||
imageSearch = '#comic img'
|
||||
prevSearch = '.navi-prev'
|
||||
help = 'Index format: stripname'
|
||||
class DoctorCat(_WordPressScraper):
|
||||
url = 'http://doctorcatmd.com/'
|
||||
firstStripUrl = url + 'comic/doctor-cat'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class DoemainOfOurOwn(_BasicScraper):
|
||||
|
@ -221,8 +220,11 @@ class DorkTower(_BasicScraper):
|
|||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
|
||||
imageSearch = compile(tagre("div", "class", "entry-content") + "\s*<p>\s*" + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, after=' alt'))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
|
||||
imageSearch = compile(tagre("div", "class", "entry-content") +
|
||||
"\s*<p>\s*" +
|
||||
tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl,
|
||||
after=' alt'))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous")
|
||||
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
|
||||
|
||||
|
||||
|
@ -250,8 +252,10 @@ class DresdenCodak(_BasicScraper):
|
|||
firstStripUrl = url + '2007/02/08/pom/'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
|
||||
tagre("img", "src", r"%sm_prev2?\.png" % rurl, quote=""))
|
||||
starter = indirectStarter(url, compile(tagre("div", "id", "preview") +
|
||||
tagre("img", "src", r"%sm_prev2?\.png" % rurl,
|
||||
quote=""))
|
||||
starter = indirectStarter(
|
||||
url, compile(tagre("div", "id", "preview") +
|
||||
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
|
||||
|
||||
|
||||
|
@ -309,6 +313,7 @@ class DumbingOfAge(_BasicScraper):
|
|||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
help = 'Index format: yyyy/comic/book-num/seriesname/stripname'
|
||||
|
||||
|
||||
class DungeonsAndDenizens(_BasicScraper):
|
||||
url = 'http://dungeond.com/'
|
||||
stripUrl = url + r'\d+/\d+/\d+/%s/'
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE
|
|||
from ..helpers import indirectStarter
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||
from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
|
||||
|
||||
|
||||
class EarthsongSaga(_ParserScraper):
|
||||
|
@ -83,12 +83,10 @@ class EdibleDirt(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class EdmundFinney(_ParserScraper):
|
||||
class EdmundFinney(_WordPressScraper):
|
||||
url = 'http://eqcomics.com/'
|
||||
firstStripUrl = url + '2009/03/08/sunday-aliens/'
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[@class="navi navi-prev"]'
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class EerieCuties(_BasicScraper):
|
||||
|
@ -174,13 +172,10 @@ class ErrantStory(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd/num'
|
||||
|
||||
|
||||
class Erstwhile(_ParserScraper):
|
||||
class Erstwhile(_WordPressScraper):
|
||||
url = 'http://www.erstwhiletales.com/'
|
||||
stripUrl = url + '%s/'
|
||||
css = True
|
||||
imageSearch = 'div.comicpane a img'
|
||||
prevSearch = 'a.navi-prev'
|
||||
help = 'Index format: title-nn'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class Eryl(_WordPressScraper):
|
||||
|
@ -199,14 +194,10 @@ class EverybodyLovesEricRaymond(_BasicScraper):
|
|||
help = 'Index format: name-of-old-comic'
|
||||
|
||||
|
||||
class EverydayBlues(_BasicScraper):
|
||||
class EverydayBlues(_WordPressScraper):
|
||||
url = 'http://everydayblues.everydayblues.net/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2010/02/11/sometimes'
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="navi-prev"))
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
firstStripUrl = url + '2010/02/11/sometimes/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class EvilDiva(_BasicScraper):
|
||||
|
@ -242,15 +233,11 @@ class Exiern(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class ExploitationNow(_BasicScraper):
|
||||
class ExploitationNow(_WordPressScraper):
|
||||
url = 'http://www.exploitationnow.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '2000-07-07/9'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: yyyy-mm-dd/num'
|
||||
firstStripUrl = url + '2000-07-07/9'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class ExtraLife(_BasicScraper):
|
||||
|
|
|
@ -4,12 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class Galaxion(_BasicScraper):
|
||||
|
@ -150,15 +151,10 @@ class GoneWithTheBlastwave(_BasicScraper):
|
|||
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
|
||||
|
||||
|
||||
class GrrlPower(_BasicScraper):
|
||||
class GrrlPower(_WordPressScraper):
|
||||
url = 'http://grrlpowercomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'archives/%s'
|
||||
firstStripUrl = stripUrl % '48'
|
||||
imageSearch = compile(tagre("img", "src", r'(.*/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(.*/archives/\d+)',
|
||||
after="navi-prev"))
|
||||
help = 'Index format: number'
|
||||
firstStripUrl = url + 'archives/48'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class GUComics(_BasicScraper):
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class IAmArg(_BasicScraper):
|
||||
|
@ -34,13 +36,11 @@ class IDreamOfAJeanieBottle(_WordPressScraper):
|
|||
url = 'http://jeaniebottle.com/'
|
||||
|
||||
|
||||
class InternetWebcomic(_BasicScraper):
|
||||
class InternetWebcomic(_WordPressScraper):
|
||||
url = 'http://www.internet-webcomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '30'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"/]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi navi-prev"))
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE
|
|||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _ComicControlScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class KevinAndKell(_BasicScraper):
|
||||
|
@ -37,17 +37,10 @@ class Key(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class KickInTheHead(_BasicScraper):
|
||||
class KickInTheHead(_WordPressScraper):
|
||||
url = 'http://www.kickinthehead.org/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2003/03/20/ipod-envy'
|
||||
imageSearch = compile(
|
||||
tagre("img", "src",
|
||||
r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
firstStripUrl = url + '2003/03/20/ipod-envy/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class KiwiBlitz(_ComicControlScraper):
|
||||
|
|
|
@ -10,7 +10,8 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
|
||||
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
|
||||
xpath_class)
|
||||
|
||||
|
||||
class Namesake(_ComicControlScraper):
|
||||
|
@ -108,7 +109,7 @@ class NichtLustig(_BasicScraper):
|
|||
|
||||
class Nicky510(_WordPressScraper):
|
||||
url = 'http://www.nickyitis.com/'
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class Nimona(_BasicScraper):
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class OctopusPie(_ParserScraper):
|
||||
|
@ -23,17 +23,6 @@ class OctopusPie(_ParserScraper):
|
|||
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
|
||||
|
||||
|
||||
class OddFish(_BasicScraper):
|
||||
url = 'http://www.odd-fish.net/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'tv-tentacles'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class Oglaf(_BasicScraper):
|
||||
url = 'http://oglaf.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -48,18 +37,11 @@ class Oglaf(_BasicScraper):
|
|||
adult = True
|
||||
|
||||
|
||||
class OhJoySexToy(_BasicScraper):
|
||||
class OhJoySexToy(_WordPressScraper):
|
||||
url = 'http://www.ohjoysextoy.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'introduction'
|
||||
imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
|
||||
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after='navi navi-prev'))
|
||||
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
|
||||
tagre("img", "alt", r'([^"]+)'))
|
||||
help = 'Index Format: name'
|
||||
firstStripUrl = url + 'introduction/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
textSearch = '//div[@id="comic"]//img/@alt'
|
||||
adult = True
|
||||
|
||||
|
||||
|
@ -119,16 +101,11 @@ class OnTheFastrack(_BasicScraper):
|
|||
return "%s.gif" % name.title()
|
||||
|
||||
|
||||
class Optipess(_BasicScraper):
|
||||
class Optipess(_WordPressScraper):
|
||||
url = 'http://www.optipess.com/'
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
after="navi navi-prev"))
|
||||
textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
textSearch = '//div[@id="comic"]//img/@alt'
|
||||
|
||||
|
||||
class OrnerBoy(_BasicScraper):
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper, _WordPressScraper
|
||||
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class PandyLand(_WordPressScraper):
|
||||
|
@ -40,14 +42,9 @@ class ParallelUniversum(_BasicScraper):
|
|||
lang = 'de'
|
||||
|
||||
|
||||
class PartiallyClips(_BasicScraper):
|
||||
class PartiallyClips(_WordPressScraper):
|
||||
url = 'http://partiallyclips.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2001/10/28/screaming-woman'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
firstStripUrl = url + 'comic/screaming-woman/'
|
||||
|
||||
|
||||
class PastelDefender(_BasicScraper):
|
||||
|
@ -252,25 +249,14 @@ class PS238(_ParserScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class PunksAndNerds(_BasicScraper):
|
||||
class PunksAndNerds(_WordPressScraper):
|
||||
url = 'http://www.punksandnerds.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '15'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||
after="navi-prev"))
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class PunksAndNerdsOld(_BasicScraper):
|
||||
url = 'http://original.punksandnerds.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
imageSearch = compile(r' src="(/comics/.+?)"')
|
||||
prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class PvPonline(_BasicScraper):
|
||||
url = 'http://pvponline.com/comic'
|
||||
stripUrl = url + '%s'
|
||||
|
|
|
@ -12,7 +12,8 @@ import datetime
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
|
||||
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
|
||||
xpath_class)
|
||||
|
||||
|
||||
class SabrinaOnline(_BasicScraper):
|
||||
|
@ -36,15 +37,10 @@ class SabrinaOnline(_BasicScraper):
|
|||
return archivepages[-1]
|
||||
|
||||
|
||||
class SafelyEndangered(_BasicScraper):
|
||||
class SafelyEndangered(_WordPressScraper):
|
||||
url = 'http://www.safelyendangered.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % 'ignored'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
after="navi navi-prev"))
|
||||
textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads'))
|
||||
help = 'Index format: yyyy/mm/stripname'
|
||||
firstStripUrl = url + 'comic/ignored/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class SailorsunOrg(_WordPressScraper):
|
||||
|
@ -209,15 +205,9 @@ class ShermansLagoon(_BasicScraper):
|
|||
return "%s-%s-%s" % (year, month, day)
|
||||
|
||||
|
||||
class Shivae(_BasicScraper):
|
||||
url = 'http://shivae.net/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'blog/%s/'
|
||||
firstStripUrl = stripUrl % '2007/09/21/09212007'
|
||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
class Shivae(_WordPressScraper):
|
||||
url = 'http://shivae.com/'
|
||||
firstStripUrl = url + 'gnip/ck-chapter-01/caidenkoel-title-01/'
|
||||
|
||||
|
||||
class Shortpacked(_ParserScraper):
|
||||
|
@ -229,14 +219,9 @@ class Shortpacked(_ParserScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class ShotgunShuffle(_BasicScraper):
|
||||
class ShotgunShuffle(_WordPressScraper):
|
||||
url = 'http://shotgunshuffle.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % 'pilot/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
after="navi navi-prev"))
|
||||
help = 'Index format: stripname'
|
||||
firstStripUrl = url + 'comic/pilot/'
|
||||
|
||||
|
||||
class SinFest(_BasicScraper):
|
||||
|
@ -362,7 +347,7 @@ class SpaceTrawler(_WordPressScraper):
|
|||
base_url = 'http://spacetrawler.com/'
|
||||
url = base_url + '2013/12/24/spacetrawler-379/'
|
||||
firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
endOfLife = True
|
||||
|
||||
|
||||
|
|
|
@ -4,22 +4,19 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class Underling(_BasicScraper):
|
||||
class Underling(_WordPressScraper):
|
||||
url = 'http://underlingcomic.com/'
|
||||
stripUrl = url
|
||||
rurl = escape(url)
|
||||
firstStripUrl = stripUrl + 'page-one/'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
after=r'class="[^"]*navi-prev'))
|
||||
help = 'Index format: nnn'
|
||||
firstStripUrl = url + 'page-one/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class Undertow(_BasicScraper):
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _ComicControlScraper, _WordPressScraper
|
||||
|
||||
|
||||
class WapsiSquare(_BasicScraper):
|
||||
|
@ -108,15 +110,10 @@ class WhiteNoise(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
class Whomp(_BasicScraper):
|
||||
class Whomp(_ComicControlScraper):
|
||||
url = 'http://www.whompcomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2010/06/14/06142010'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
firstStripUrl = url + 'comic/06152010'
|
||||
textSearch = '//img[@id="cc-comic"]/@title'
|
||||
|
||||
|
||||
class WhyTheLongFace(_BasicScraper):
|
||||
|
@ -193,16 +190,13 @@ class WorldOfMrToast(_BasicScraper):
|
|||
def getPrevUrl(self, url, data, baseUrl):
|
||||
idx = self.prevurls.index(url)
|
||||
try:
|
||||
return self.prevurls[idx+1]
|
||||
return self.prevurls[idx + 1]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
class WorldOfWarcraftEh(_BasicScraper):
|
||||
class WorldOfWarcraftEh(_WordPressScraper):
|
||||
url = 'http://woweh.com/'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
||||
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
|
||||
|
||||
|
||||
class WormWorldSaga(_BasicScraper):
|
||||
|
@ -242,12 +236,3 @@ class WormWorldSagaGerman(WormWorldSaga):
|
|||
|
||||
class WormWorldSagaSpanish(WormWorldSaga):
|
||||
lang = 'es'
|
||||
|
||||
|
||||
class WotNow(_BasicScraper):
|
||||
url = 'http://shadowburn.binmode.com/wotnow/'
|
||||
stripUrl = url + 'comic.php?comic_id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
|
||||
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
|
|
@ -10,6 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import bounceStarter
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
||||
class ZapComic(_ParserScraper):
|
||||
|
@ -38,16 +39,11 @@ class Zapiro(_BasicScraper):
|
|||
return name
|
||||
|
||||
|
||||
class ZenPencils(_BasicScraper):
|
||||
class ZenPencils(_WordPressScraper):
|
||||
url = 'http://zenpencils.com/'
|
||||
rurl = escape(url)
|
||||
multipleImagesPerStrip = True
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % '1-ralph-waldo-emerson-make-them-cry'
|
||||
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+/)' % rurl,
|
||||
after="navi-prev"))
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.zenpencils\.com/wp-content/uploads/\d+[^"]+)'))
|
||||
help = 'Index format: num-stripname'
|
||||
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
|
||||
|
||||
class ZombieHunters(_BasicScraper):
|
||||
|
|
Loading…
Reference in a new issue