Read starter parameters from class.
This allows to specify starters in a more declarative and dynamic way.
This commit is contained in:
parent
b865a171f9
commit
42e43fa4e6
23 changed files with 186 additions and 140 deletions
|
@ -1,8 +1,13 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from .util import getQueryParams
|
from .util import getQueryParams
|
||||||
|
|
||||||
|
|
||||||
def queryNamer(paramName, usePageUrl=False):
|
def queryNamer(paramName, usePageUrl=False):
|
||||||
"""Get name from URL query part."""
|
"""Get name from URL query part."""
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -25,23 +30,32 @@ def regexNamer(regex, usePageUrl=False):
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def bounceStarter(url, nextSearch):
|
def bounceStarter():
|
||||||
"""Get start URL by "bouncing" back and forth one time."""
|
"""Get start URL by "bouncing" back and forth one time.
|
||||||
|
|
||||||
|
This needs the url and nextSearch properties be defined on the class.
|
||||||
|
"""
|
||||||
@classmethod
|
@classmethod
|
||||||
def _starter(cls):
|
def _starter(cls):
|
||||||
"""Get bounced start URL."""
|
"""Get bounced start URL."""
|
||||||
data = cls.getPage(url)
|
data = cls.getPage(cls.url)
|
||||||
url1 = cls.fetchUrl(url, data, cls.prevSearch)
|
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
||||||
data = cls.getPage(url1)
|
data = cls.getPage(url1)
|
||||||
return cls.fetchUrl(url1, data, nextSearch)
|
return cls.fetchUrl(url1, data, cls.nextSearch)
|
||||||
return _starter
|
return _starter
|
||||||
|
|
||||||
|
|
||||||
def indirectStarter(url, latestSearch):
|
def indirectStarter():
|
||||||
"""Get start URL by indirection."""
|
"""Get start URL by indirection.
|
||||||
|
|
||||||
|
This is useful for comics where the latest comic can't be reached at a
|
||||||
|
stable URL. If the class has an attribute 'startUrl', this page is fetched
|
||||||
|
first, otherwise the page at 'url' is fetched. After that, the attribute
|
||||||
|
'latestSearch' is used on the page content to find the latest strip."""
|
||||||
@classmethod
|
@classmethod
|
||||||
def _starter(cls):
|
def _starter(cls):
|
||||||
"""Get indirect start URL."""
|
"""Get indirect start URL."""
|
||||||
|
url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
|
||||||
data = cls.getPage(url)
|
data = cls.getPage(url)
|
||||||
return cls.fetchUrl(url, data, latestSearch)
|
return cls.fetchUrl(url, data, cls.latestSearch)
|
||||||
return _starter
|
return _starter
|
||||||
|
|
|
@ -16,8 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
|
||||||
class AbstruseGoose(_BasicScraper):
|
class AbstruseGoose(_BasicScraper):
|
||||||
url = 'http://abstrusegoose.com/'
|
url = 'http://abstrusegoose.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = bounceStarter(
|
starter = bounceStarter()
|
||||||
url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »"))
|
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre('img', 'src',
|
imageSearch = compile(tagre('img', 'src',
|
||||||
|
@ -81,7 +80,6 @@ class AfterStrife(_WordPressScraper):
|
||||||
|
|
||||||
class AGirlAndHerFed(_BasicScraper):
|
class AGirlAndHerFed(_BasicScraper):
|
||||||
url = 'http://www.agirlandherfed.com/'
|
url = 'http://www.agirlandherfed.com/'
|
||||||
starter = bounceStarter(url, compile(r'<a href="([^"]+)">[^>]+Back'))
|
|
||||||
stripUrl = url + '1.%s.html'
|
stripUrl = url + '1.%s.html'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
|
||||||
|
@ -114,7 +112,6 @@ class ALessonIsLearned(_BasicScraper):
|
||||||
url = 'http://www.alessonislearned.com/'
|
url = 'http://www.alessonislearned.com/'
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
|
||||||
quote="'") + r"[^>]+previous")
|
quote="'") + r"[^>]+previous")
|
||||||
starter = indirectStarter(url, prevSearch)
|
|
||||||
stripUrl = url + 'index.php?comic=%s'
|
stripUrl = url + 'index.php?comic=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
|
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
|
||||||
|
@ -124,8 +121,8 @@ class ALessonIsLearned(_BasicScraper):
|
||||||
class Alice(_WordPressScraper):
|
class Alice(_WordPressScraper):
|
||||||
url = 'http://www.alicecomics.com/'
|
url = 'http://www.alicecomics.com/'
|
||||||
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
||||||
starter = indirectStarter('http://www.alicecomics.com/',
|
latestSearch = '//a[text()="Latest Alice!"]'
|
||||||
'//a[text()="Latest Alice!"]')
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class AlienLovesPredator(_BasicScraper):
|
class AlienLovesPredator(_BasicScraper):
|
||||||
|
@ -264,7 +261,8 @@ class ARedTailsDream(_BasicScraper):
|
||||||
class Ashes(_WordPressScraper):
|
class Ashes(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
|
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class ASkeweredParadise(_BasicScraper):
|
class ASkeweredParadise(_BasicScraper):
|
||||||
|
@ -289,12 +287,13 @@ class ASofterWorld(_ParserScraper):
|
||||||
class AstronomyPOTD(_ParserScraper):
|
class AstronomyPOTD(_ParserScraper):
|
||||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||||
url = baseUrl + 'astropix.html'
|
url = baseUrl + 'astropix.html'
|
||||||
starter = bounceStarter(url, '//a[text()=">"]')
|
starter = bounceStarter()
|
||||||
stripUrl = baseUrl + 'ap%s.html'
|
stripUrl = baseUrl + 'ap%s.html'
|
||||||
firstStripUrl = stripUrl % '061012'
|
firstStripUrl = stripUrl % '061012'
|
||||||
imageSearch = '//a/img'
|
imageSearch = '//a/img'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = '//a[text()="<"]'
|
prevSearch = '//a[text()="<"]'
|
||||||
|
nextSearch = '//a[text()=">"]'
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -47,8 +47,8 @@ class BalderDash(_ComicControlScraper):
|
||||||
|
|
||||||
class Bardsworth(_WordPressScraper):
|
class Bardsworth(_WordPressScraper):
|
||||||
url = 'http://www.bardsworth.com/'
|
url = 'http://www.bardsworth.com/'
|
||||||
starter = indirectStarter('http://www.bardsworth.com/',
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
'//a[@rel="bookmark"]')
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class Baroquen(_BasicScraper):
|
class Baroquen(_BasicScraper):
|
||||||
|
@ -72,12 +72,15 @@ class Beetlebum(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
|
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
|
||||||
starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
|
starter = indirectStarter()
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href',
|
prevSearch = compile(tagre('a', 'href',
|
||||||
r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
|
r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
|
||||||
after='prev'))
|
after='prev'))
|
||||||
|
latestSearch = compile(tagre('a', 'href',
|
||||||
|
r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl,
|
||||||
|
after='bookmark'))
|
||||||
help = 'Index format: yyyy/mm/dd/striptitle'
|
help = 'Index format: yyyy/mm/dd/striptitle'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
@ -223,7 +226,8 @@ class BoredAndEvil(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '2004-06-07'
|
firstStripUrl = stripUrl % '2004-06-07'
|
||||||
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
|
||||||
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
||||||
starter = indirectStarter(url, prevSearch)
|
latestSearch = prevSearch
|
||||||
|
starter = indirectStarter()
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
|
||||||
after="prevarea"))
|
after="prevarea"))
|
||||||
latestSearch = compile(tagre("a", "href",
|
latestSearch = compile(tagre("a", "href",
|
||||||
r'(view\.php\?page=[0-9]+[^"]*)'))
|
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||||
starter = indirectStarter(url, latestSearch)
|
starter = indirectStarter()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -275,10 +275,11 @@ class CoolCatStudio(_BasicScraper):
|
||||||
|
|
||||||
class CorydonCafe(_ParserScraper):
|
class CorydonCafe(_ParserScraper):
|
||||||
url = 'http://corydoncafe.com/'
|
url = 'http://corydoncafe.com/'
|
||||||
starter = indirectStarter(url, '//ul//a')
|
starter = indirectStarter()
|
||||||
stripUrl = url + '%s.php'
|
stripUrl = url + '%s.php'
|
||||||
imageSearch = "//center[2]//img"
|
imageSearch = "//center[2]//img"
|
||||||
prevSearch = '//a[@title="prev"]'
|
prevSearch = '//a[@title="prev"]'
|
||||||
|
latestSearch = '//ul//a'
|
||||||
help = 'Index format: yyyy/stripname'
|
help = 'Index format: yyyy/stripname'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -345,14 +346,15 @@ class CucumberQuest(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'cq/%s/'
|
stripUrl = url + 'cq/%s/'
|
||||||
firstStripUrl = stripUrl % 'page-1'
|
firstStripUrl = stripUrl % 'page-1'
|
||||||
starter = indirectStarter(url + 'recent.html',
|
startUrl = url + 'recent.html'
|
||||||
compile(r'window\.location="(/cq/[^"]+/)"'))
|
starter = indirectStarter()
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous"))
|
||||||
|
latestSearch = compile(r'window\.location="(/cq/[^"]+/)"')
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -377,11 +379,12 @@ class Curvy(_ParserScraper):
|
||||||
|
|
||||||
class CyanideAndHappiness(_BasicScraper):
|
class CyanideAndHappiness(_BasicScraper):
|
||||||
url = 'http://www.explosm.net/comics/'
|
url = 'http://www.explosm.net/comics/'
|
||||||
starter = bounceStarter(url, compile(tagre("a", "href", r"(/comics/\d+/)", after="next-comic")))
|
starter = bounceStarter()
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '15'
|
firstStripUrl = stripUrl % '15'
|
||||||
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
|
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', after="previous-comic"))
|
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', after="previous-comic"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r"(/comics/\d+/)", after="next-comic"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -13,17 +13,12 @@ from ..util import tagre
|
||||||
from .common import _WordPressScraper, xpath_class
|
from .common import _WordPressScraper, xpath_class
|
||||||
|
|
||||||
|
|
||||||
class DailyDose(_BasicScraper):
|
class DailyDose(_ParserScraper):
|
||||||
url = 'http://dailydoseofcomics.com/'
|
url = 'http://dailydoseofcomics.com/'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href",
|
imageSearch = '//p/a/img'
|
||||||
r'(http://dailydoseofcomics\.com/[^"]+)',
|
prevSearch = '//a[@rel="prev"]'
|
||||||
after="preview")))
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
stripUrl = url + '%s/'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]+)',
|
|
||||||
before="align(?:none|center)"))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class DamnLol(_BasicScraper):
|
class DamnLol(_BasicScraper):
|
||||||
|
@ -31,13 +26,13 @@ class DamnLol(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next"))
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(%si/[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%si/[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
||||||
)
|
)
|
||||||
help = 'Index format: stripname-number'
|
help = 'Index format: stripname-number'
|
||||||
starter = bounceStarter(
|
starter = bounceStarter()
|
||||||
url, compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -160,9 +155,12 @@ class Dilbert(_BasicScraper):
|
||||||
url = 'http://dilbert.com/'
|
url = 'http://dilbert.com/'
|
||||||
stripUrl = url + '/strip/%s/'
|
stripUrl = url + '/strip/%s/'
|
||||||
firstStripUrl = stripUrl % '1989-04-16'
|
firstStripUrl = stripUrl % '1989-04-16'
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'(http://dilbert.com/strip/[0-9-]*)', after="Click to see")))
|
starter = indirectStarter()
|
||||||
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
|
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
|
||||||
|
latestSearch = compile(tagre("a", "href",
|
||||||
|
r'(http://dilbert.com/strip/[0-9-]*)',
|
||||||
|
after="Click to see"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -254,9 +252,10 @@ class DresdenCodak(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
|
||||||
tagre("img", "src", r"%sm_prev2?\.png" % rurl,
|
tagre("img", "src", r"%sm_prev2?\.png" % rurl,
|
||||||
quote=""))
|
quote=""))
|
||||||
starter = indirectStarter(
|
latestSearch = compile(tagre("div", "id", "preview") +
|
||||||
url, compile(tagre("div", "id", "preview") +
|
tagre("a", "href",
|
||||||
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
|
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
|
||||||
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class DrFun(_BasicScraper):
|
class DrFun(_BasicScraper):
|
||||||
|
|
|
@ -15,9 +15,10 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
|
||||||
|
|
||||||
class EarthsongSaga(_ParserScraper):
|
class EarthsongSaga(_ParserScraper):
|
||||||
url = 'http://earthsongsaga.com/index.php'
|
url = 'http://earthsongsaga.com/index.php'
|
||||||
starter = indirectStarter(url, '//div[@id="leftmenu"]/span[1]/a[1]')
|
starter = indirectStarter()
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[@title="Previous"]'
|
prevSearch = '//a[@title="Previous"]'
|
||||||
|
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetchUrls(cls, url, data, urlSearch):
|
def fetchUrls(cls, url, data, urlSearch):
|
||||||
|
@ -43,21 +44,23 @@ class EarthsongSaga(_ParserScraper):
|
||||||
class EasilyAmused(_WordPressScraper):
|
class EasilyAmused(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
|
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class EatLiver(_BasicScraper):
|
class EatLiver(_BasicScraper):
|
||||||
url = 'http://www.eatliver.com/'
|
url = 'http://www.eatliver.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = indirectStarter(url, compile(
|
starter = indirectStarter()
|
||||||
tagre("a", "href", r'(i\.php\?n=\d+)') +
|
|
||||||
tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br"))
|
|
||||||
stripUrl = url + "i.php?n=%s"
|
stripUrl = url + "i.php?n=%s"
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
||||||
before="image_src"))
|
before="image_src"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
||||||
"<< Previous")
|
"<< Previous")
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
||||||
|
tagre("img", "src", r'img/small/[^"]+') +
|
||||||
|
r"</a>\s*<br")
|
||||||
|
|
||||||
|
|
||||||
class EatThatToast(_BasicScraper):
|
class EatThatToast(_BasicScraper):
|
||||||
|
@ -181,7 +184,8 @@ class Erstwhile(_WordPressScraper):
|
||||||
class Eryl(_WordPressScraper):
|
class Eryl(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
|
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
starter = indirectStarter()
|
||||||
help = 'This was known as DarkWings in previous Dosage versions'
|
help = 'This was known as DarkWings in previous Dosage versions'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,10 +26,9 @@ class FantasyRealms(_BasicScraper):
|
||||||
stripUrl = url + 'manga/%s.php'
|
stripUrl = url + 'manga/%s.php'
|
||||||
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
||||||
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
||||||
|
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url,
|
|
||||||
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
|
|
||||||
|
|
||||||
|
|
||||||
class FauxPas(_BasicScraper):
|
class FauxPas(_BasicScraper):
|
||||||
|
@ -47,8 +46,9 @@ class FeyWinds(_BasicScraper):
|
||||||
stripUrl = baseUrl + 'comic/page.php?id=%s'
|
stripUrl = baseUrl + 'comic/page.php?id=%s'
|
||||||
imageSearch = compile(r"(../comic/pages//.+?)'")
|
imageSearch = compile(r"(../comic/pages//.+?)'")
|
||||||
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
||||||
|
latestSearch = compile(r'(comic/page.php\?id.+?)"')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter(url, compile(r'(comic/page.php\?id.+?)"'))
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class FilibusterCartoons(_BasicScraper):
|
class FilibusterCartoons(_BasicScraper):
|
||||||
|
@ -159,9 +159,9 @@ class FredoAndPidjin(_BasicScraper):
|
||||||
)
|
)
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
||||||
starter = indirectStarter(
|
latestSearch = compile(tagre('a', 'href', "(" + url +
|
||||||
url,
|
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
|
||||||
compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class Freefall(_BasicScraper):
|
class Freefall(_BasicScraper):
|
||||||
|
|
|
@ -27,15 +27,15 @@ class Garanos(_BasicScraper):
|
||||||
baseUrl = 'http://garanos.alexheberling.com/'
|
baseUrl = 'http://garanos.alexheberling.com/'
|
||||||
rurl = escape(baseUrl)
|
rurl = escape(baseUrl)
|
||||||
url = baseUrl + 'pages/page-1/'
|
url = baseUrl + 'pages/page-1/'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
|
|
||||||
after="nav-last")))
|
|
||||||
stripUrl = baseUrl + 'pages/page-%s'
|
stripUrl = baseUrl + 'pages/page-%s'
|
||||||
imageSearch = compile(
|
imageSearch = compile(
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
r'(%swp-content/uploads/sites/\d+/\d+/\d+/[^"]+)' % rurl))
|
r'(%swp-content/uploads/sites/\d+/\d+/\d+/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
|
prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
|
||||||
after="prev"))
|
after="prev"))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
|
||||||
|
after="nav-last"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
@ -136,14 +136,14 @@ class GoGetARoomie(_ComicControlScraper):
|
||||||
|
|
||||||
class GoneWithTheBlastwave(_BasicScraper):
|
class GoneWithTheBlastwave(_BasicScraper):
|
||||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
|
||||||
r'<img src="images/page/default/latest'))
|
|
||||||
stripUrl = url[:-1] + '%s'
|
stripUrl = url[:-1] + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
||||||
prevSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
prevSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
||||||
r'<img src="images/page/default/previous')
|
r'<img src="images/page/default/previous')
|
||||||
|
latestSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
||||||
|
r'<img src="images/page/default/latest')
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -41,15 +41,15 @@ class _HappyJar(_WordPressScraper):
|
||||||
class HarkAVagrant(_BasicScraper):
|
class HarkAVagrant(_BasicScraper):
|
||||||
url = 'http://www.harkavagrant.com/'
|
url = 'http://www.harkavagrant.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = bounceStarter(
|
starter = bounceStarter()
|
||||||
url, compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
|
||||||
tagre("img", "src", "buttonnext.png")))
|
|
||||||
stripUrl = url + 'index.php?id=%s'
|
stripUrl = url + 'index.php?id=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
||||||
after='BORDER'))
|
after='BORDER'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
||||||
tagre("img", "src", "buttonprevious.png"))
|
tagre("img", "src", "buttonprevious.png"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
||||||
|
tagre("img", "src", "buttonnext.png"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -30,9 +34,12 @@ class JerkCity(_BasicScraper):
|
||||||
class JimBenton(_BasicScraper):
|
class JimBenton(_BasicScraper):
|
||||||
url = 'http://www.jimbenton.com/page14/page14.html'
|
url = 'http://www.jimbenton.com/page14/page14.html'
|
||||||
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'(files/JimBentonComic-[^>]+\.html)', quote="")))
|
starter = indirectStarter()
|
||||||
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)', before="photo-frame"))
|
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
|
||||||
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)', quote="") + "Next")
|
before="photo-frame"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
|
||||||
|
quote="") + "Next")
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(files/JimBentonComic-[^>]+\.html)', quote=""))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,6 +65,7 @@ class JustAnotherEscape(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'index.cgi?date=%s'
|
stripUrl = url + 'index.cgi?date=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s/index\.cgi\?date=\d+)' % rurl)
|
prevSearch = compile(tagre("a", "href",
|
||||||
+ tagre("img", "alt", "Previous Comic"))
|
r'(%s/index\.cgi\?date=\d+)' % rurl) +
|
||||||
|
tagre("img", "alt", "Previous Comic"))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
|
@ -9,7 +9,6 @@ from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..helpers import indirectStarter
|
|
||||||
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,4 +80,3 @@ class KuroShouri(_BasicScraper):
|
||||||
tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,
|
tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,
|
||||||
after="previous"))
|
after="previous"))
|
||||||
help = 'Index format: chapter-n-page-m'
|
help = 'Index format: chapter-n-page-m'
|
||||||
starter = indirectStarter(url, prevSearch)
|
|
||||||
|
|
|
@ -21,10 +21,10 @@ class Lackadaisy(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +
|
prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +
|
||||||
"< Previous")
|
"< Previous")
|
||||||
|
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
|
||||||
|
"Next")
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
starter = bounceStarter(
|
starter = bounceStarter()
|
||||||
url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
|
|
||||||
"Next"))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -37,7 +37,8 @@ class Lackadaisy(_BasicScraper):
|
||||||
class Laiyu(_WordPressScraper):
|
class Laiyu(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
|
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class LasLindas(_BasicScraper):
|
class LasLindas(_BasicScraper):
|
||||||
|
@ -64,9 +65,9 @@ class LeastICouldDo(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
||||||
after="Previous"))
|
after="Previous"))
|
||||||
starter = indirectStarter(
|
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
||||||
url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
after="feature-comic"))
|
||||||
after="feature-comic")))
|
starter = indirectStarter()
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -110,12 +111,11 @@ class LoadingArtist(_ParserScraper):
|
||||||
|
|
||||||
class LookingForGroup(_ParserScraper):
|
class LookingForGroup(_ParserScraper):
|
||||||
url = 'http://www.lfgcomic.com/'
|
url = 'http://www.lfgcomic.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'page/%s/'
|
stripUrl = url + 'page/%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
css = True
|
css = True
|
||||||
imageSearch = '#comic img'
|
imageSearch = '#comic img'
|
||||||
prevSearch = '#comic-left > a'
|
prevSearch = '#comic-left > a'
|
||||||
starter = indirectStarter(url, '#header-dropdown-comic-lfg > a:nth-of-type(2)')
|
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
|
||||||
nameSearch = compile(r'/page/([-0-9]+)/')
|
starter = indirectStarter()
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
|
@ -102,9 +102,9 @@ class NichtLustig(_BasicScraper):
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
|
|
||||||
|
|
||||||
|
|
||||||
class Nicky510(_WordPressScraper):
|
class Nicky510(_WordPressScraper):
|
||||||
|
@ -136,7 +136,8 @@ class NobodyScores(_BasicScraper):
|
||||||
class NoMoreSavePoints(_WordPressScraper):
|
class NoMoreSavePoints(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
|
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class NoNeedForBushido(_BasicScraper):
|
class NoNeedForBushido(_BasicScraper):
|
||||||
|
@ -149,10 +150,10 @@ class NoNeedForBushido(_BasicScraper):
|
||||||
after="attachment-full"))
|
after="attachment-full"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||||
after="previous-webcomic"))
|
after="previous-webcomic"))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||||
|
after="last-webcomic"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
|
||||||
after="last-webcomic")))
|
|
||||||
|
|
||||||
|
|
||||||
class NotInventedHere(_BasicScraper):
|
class NotInventedHere(_BasicScraper):
|
||||||
|
|
|
@ -8,7 +8,6 @@ from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .common import _WordPressScraper, xpath_class
|
from .common import _WordPressScraper, xpath_class
|
||||||
|
|
||||||
|
@ -53,7 +52,6 @@ class OkCancel(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
|
||||||
prevSearch = compile(tagre("div", "class", "previous") +
|
prevSearch = compile(tagre("div", "class", "previous") +
|
||||||
tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
|
tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
|
||||||
starter = indirectStarter(url, prevSearch)
|
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -20,12 +20,13 @@ class PandyLand(_WordPressScraper):
|
||||||
|
|
||||||
class ParadigmShift(_BasicScraper):
|
class ParadigmShift(_BasicScraper):
|
||||||
url = 'http://www.paradigmshiftmanga.com/'
|
url = 'http://www.paradigmshiftmanga.com/'
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)',
|
starter = indirectStarter()
|
||||||
after="next-comic-link")))
|
|
||||||
stripUrl = url + 'ps/%s.html'
|
stripUrl = url + 'ps/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
after="previous-comic-link"))
|
after="previous-comic-link"))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="next-comic-link"))
|
||||||
help = 'Index format: custom'
|
help = 'Index format: custom'
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,7 +73,6 @@ class PennyAndAggie(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
|
||||||
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
|
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
|
||||||
starter = indirectStarter(url, prevSearch)
|
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
@ -162,11 +162,12 @@ class PicPakDog(_BasicScraper):
|
||||||
|
|
||||||
class PiledHigherAndDeeper(_BasicScraper):
|
class PiledHigherAndDeeper(_BasicScraper):
|
||||||
url = 'http://www.phdcomics.com/comics.php'
|
url = 'http://www.phdcomics.com/comics.php'
|
||||||
starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif'))
|
starter = bounceStarter()
|
||||||
stripUrl = url + '?comicid=%s'
|
stripUrl = url + '?comicid=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
|
||||||
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
|
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
|
||||||
|
nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
namer = queryNamer('comicid', usePageUrl=True)
|
namer = queryNamer('comicid', usePageUrl=True)
|
||||||
|
|
||||||
|
@ -204,9 +205,9 @@ class PokeyThePenguin(_ParserScraper):
|
||||||
stripUrl = url + 'index%s.html'
|
stripUrl = url + 'index%s.html'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//p/img'
|
imageSearch = '//p/img'
|
||||||
prevSearch = True
|
latestSearch = '(//a)[last()]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
starter = indirectStarter(url, "(//a)[last()]")
|
starter = indirectStarter()
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
|
@ -230,22 +231,22 @@ class PoorlyDrawnLines(_BasicScraper):
|
||||||
|
|
||||||
class Precocious(_BasicScraper):
|
class Precocious(_BasicScraper):
|
||||||
url = 'http://www.precociouscomic.com/'
|
url = 'http://www.precociouscomic.com/'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
|
|
||||||
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
|
||||||
)
|
|
||||||
stripUrl = url + 'archive/comic/%s'
|
stripUrl = url + 'archive/comic/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
|
||||||
|
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
class PS238(_ParserScraper):
|
class PS238(_ParserScraper):
|
||||||
url = 'http://ps238.nodwick.com/'
|
url = 'http://ps238.nodwick.com/'
|
||||||
stripUrl = url + '/comic/%s/'
|
stripUrl = url + '/comic/%s/'
|
||||||
starter = bounceStarter(url, '//a[@class="comic-nav-base comic-nav-next"]')
|
starter = bounceStarter()
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
|
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
|
||||||
|
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
from ..scraper import _BasicScraper
|
|
||||||
from ..scraper import _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
@ -16,6 +19,7 @@ class RadioactivePanda(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
|
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
|
||||||
help = 'Index format: n (no padding)'
|
help = 'Index format: n (no padding)'
|
||||||
|
|
||||||
|
|
||||||
class RalfTheDestroyer(_ParserScraper):
|
class RalfTheDestroyer(_ParserScraper):
|
||||||
url = 'http://ralfthedestroyer.com/'
|
url = 'http://ralfthedestroyer.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
|
@ -47,12 +51,12 @@ class RealmOfAtland(_BasicScraper):
|
||||||
class RedMeat(_BasicScraper):
|
class RedMeat(_BasicScraper):
|
||||||
baseUrl = 'http://www.redmeat.com/redmeat/'
|
baseUrl = 'http://www.redmeat.com/redmeat/'
|
||||||
url = baseUrl + 'current/index.html'
|
url = baseUrl + 'current/index.html'
|
||||||
starter = bounceStarter(url,
|
starter = bounceStarter()
|
||||||
compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="next")))
|
|
||||||
stripUrl = baseUrl + '%s/index.html'
|
stripUrl = baseUrl + '%s/index.html'
|
||||||
firstStripUrl = stripUrl % '1996-06-10'
|
firstStripUrl = stripUrl % '1996-06-10'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="prev"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="next"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,7 +85,8 @@ class RomanticallyApocalyptic(_BasicScraper):
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%sart/\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%sart/\d+[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+[^"]+)' % rurl)+"\s*"+tagre('span', 'class', 'spritePrevious'))
|
prevSearch = compile(tagre("a", "href", r'(%s\d+[^"]+)' % rurl) + "\s*" +
|
||||||
|
tagre('span', 'class', 'spritePrevious'))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
@ -101,5 +106,6 @@ class Ruthe(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)')+'vorheriger')
|
prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)') +
|
||||||
|
'vorheriger')
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
|
@ -69,9 +69,10 @@ class ScandinaviaAndTheWorld(_ParserScraper):
|
||||||
url = 'http://satwcomic.com/'
|
url = 'http://satwcomic.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
||||||
starter = indirectStarter(url, '//a[text()="View latest comic"]')
|
starter = indirectStarter()
|
||||||
imageSearch = '//img[@itemprop="image"]'
|
imageSearch = '//img[@itemprop="image"]'
|
||||||
prevSearch = '//a[@accesskey="p"]'
|
prevSearch = '//a[@accesskey="p"]'
|
||||||
|
latestSearch = '//a[text()="View latest comic"]'
|
||||||
textSearch = '//span[@itemprop="articleBody"]'
|
textSearch = '//span[@itemprop="articleBody"]'
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
@ -194,9 +195,9 @@ class SexyLosers(_BasicScraper):
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
|
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
|
||||||
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
||||||
|
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter()
|
||||||
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -333,7 +334,8 @@ class SnowFlame(_WordPressScraper):
|
||||||
url = 'http://www.snowflamecomic.com/'
|
url = 'http://www.snowflamecomic.com/'
|
||||||
stripUrl = url + '?comic=snowflame-%s-%s'
|
stripUrl = url + '?comic=snowflame-%s-%s'
|
||||||
firstStripUrl = stripUrl % ('01', '01')
|
firstStripUrl = stripUrl % ('01', '01')
|
||||||
starter = bounceStarter(url, WP_LATEST_SEARCH)
|
starter = bounceStarter()
|
||||||
|
nextSearch = WP_LATEST_SEARCH
|
||||||
help = 'Index format: chapter-page'
|
help = 'Index format: chapter-page'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
|
@ -392,8 +394,9 @@ class Spamusement(_BasicScraper):
|
||||||
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
|
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
|
||||||
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
|
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
|
||||||
IGNORECASE)
|
IGNORECASE)
|
||||||
|
latestSearch = prevSearch
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter(url, prevSearch)
|
starter = indirectStarter()
|
||||||
|
|
||||||
|
|
||||||
class SpareParts(_BasicScraper):
|
class SpareParts(_BasicScraper):
|
||||||
|
@ -504,8 +507,7 @@ class StuffNoOneToldMe(_BasicScraper):
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
firstStripUrl = stripUrl % '2010/05/01'
|
firstStripUrl = stripUrl % '2010/05/01'
|
||||||
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
||||||
starter = indirectStarter(
|
starter = indirectStarter()
|
||||||
url, compile(tagre("a", "href", olderHref, quote="'")))
|
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
||||||
r"(?:</a>|<br />)"),
|
r"(?:</a>|<br />)"),
|
||||||
|
@ -515,6 +517,7 @@ class StuffNoOneToldMe(_BasicScraper):
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", olderHref, quote="'",
|
prevSearch = compile(tagre("a", "href", olderHref, quote="'",
|
||||||
before="older-link"))
|
before="older-link"))
|
||||||
|
latestSearch = compile(tagre("a", "href", olderHref, quote="'"))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
|
|
|
@ -76,14 +76,14 @@ class TheNoob(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class TheOrderOfTheStick(_BasicScraper):
|
class TheOrderOfTheStick(_BasicScraper):
|
||||||
baseUrl = 'http://www.giantitp.com/'
|
url = 'http://www.giantitp.com/'
|
||||||
url = baseUrl + 'comics/oots0863.html'
|
stripUrl = url + 'comics/oots%s.html'
|
||||||
stripUrl = baseUrl + 'comics/oots%s.html'
|
|
||||||
firstStripUrl = stripUrl % '0001'
|
firstStripUrl = stripUrl % '0001'
|
||||||
imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
|
imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
|
||||||
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
||||||
|
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter(baseUrl, compile(r'<A href="(/comics/oots\d{4}\.html)"'))
|
starter = indirectStarter()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -116,9 +116,13 @@ class TheThinHLine(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'post/%s'
|
stripUrl = url + 'post/%s'
|
||||||
firstStripUrl = stripUrl % '3517345105'
|
firstStripUrl = stripUrl % '3517345105'
|
||||||
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
|
imageSearch = compile(tagre('img', 'data-src',
|
||||||
|
r'([^"]+media.tumblr.com/[^"]+)',
|
||||||
|
before='content-image'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
|
latestSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after='class="timestamp"'))
|
||||||
|
starter = indirectStarter()
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
||||||
|
|
|
@ -21,12 +21,10 @@ class Underling(_WordPressScraper):
|
||||||
|
|
||||||
class Undertow(_BasicScraper):
|
class Undertow(_BasicScraper):
|
||||||
url = 'http://undertow.dreamshards.org/'
|
url = 'http://undertow.dreamshards.org/'
|
||||||
stripUrl = url + '%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
||||||
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
||||||
help = 'Index format: good luck !'
|
latestSearch = compile(r'href="(.+?)".+?Most recent page')
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter()
|
||||||
compile(r'href="(.+?)".+?Most recent page'))
|
|
||||||
|
|
||||||
|
|
||||||
class UnicornJelly(_BasicScraper):
|
class UnicornJelly(_BasicScraper):
|
||||||
|
@ -46,9 +44,10 @@ class Unsounded(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
|
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
|
||||||
starter = indirectStarter(
|
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
||||||
url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
tagre("img", "src",
|
||||||
tagre("img", "src", r"%simages/newpages\.png" % rurl)))
|
r"%simages/newpages\.png" % rurl))
|
||||||
|
starter = indirectStarter()
|
||||||
help = 'Index format: chapter-number'
|
help = 'Index format: chapter-number'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
|
|
|
@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
|
||||||
class WebDesignerCOTW(_BasicScraper):
|
class WebDesignerCOTW(_BasicScraper):
|
||||||
url = 'http://www.webdesignerdepot.com/'
|
url = 'http://www.webdesignerdepot.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
|
starter = indirectStarter()
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
|
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
|
@ -57,6 +57,7 @@ class WebDesignerCOTW(_BasicScraper):
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
|
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
|
||||||
before='prev', quote="'"))
|
before='prev', quote="'"))
|
||||||
|
latestSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl))
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -4,13 +4,13 @@ from ..scraper import make_scraper
|
||||||
from .common import _WordPressScraper
|
from .common import _WordPressScraper
|
||||||
|
|
||||||
|
|
||||||
def add(name, url, starter=None):
|
def add(name, start):
|
||||||
attrs = dict(
|
attrs = dict(
|
||||||
name=name,
|
name=name,
|
||||||
url=url
|
url='http://hijinksensue.com/',
|
||||||
|
latestSearch=start,
|
||||||
|
starter=indirectStarter()
|
||||||
)
|
)
|
||||||
if starter:
|
|
||||||
attrs['starter'] = starter
|
|
||||||
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,4 +22,4 @@ for (name, starterXPath) in [
|
||||||
('HijinksEnsueConvention', '//h4[text()="Latest Fancy Convention Sketches"]/..//a'),
|
('HijinksEnsueConvention', '//h4[text()="Latest Fancy Convention Sketches"]/..//a'),
|
||||||
('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a')
|
('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a')
|
||||||
]:
|
]:
|
||||||
add(name, 'http://hijinksensue.com/', starter=indirectStarter('http://hijinksensue.com/', starterXPath))
|
add(name, starterXPath)
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
|
@ -12,13 +15,13 @@ from ..util import tagre
|
||||||
class Xkcd(_BasicScraper):
|
class Xkcd(_BasicScraper):
|
||||||
name = 'xkcd'
|
name = 'xkcd'
|
||||||
url = 'http://xkcd.com/'
|
url = 'http://xkcd.com/'
|
||||||
starter = bounceStarter(url, compile(tagre("a", "href", r'(/\d+/)',
|
starter = bounceStarter()
|
||||||
before="next")))
|
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src",
|
imageSearch = compile(tagre("img", "src",
|
||||||
r'(//imgs\.xkcd\.com/comics/[^"]+)'))
|
r'(//imgs\.xkcd\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
|
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
||||||
before=r'//imgs\.xkcd\.com/comics/'))
|
before=r'//imgs\.xkcd\.com/comics/'))
|
||||||
|
|
|
@ -22,15 +22,16 @@ class ZapComic(_ParserScraper):
|
||||||
|
|
||||||
class Zapiro(_BasicScraper):
|
class Zapiro(_BasicScraper):
|
||||||
url = 'http://www.mg.co.za/zapiro/'
|
url = 'http://www.mg.co.za/zapiro/'
|
||||||
starter = bounceStarter(
|
starter = bounceStarter()
|
||||||
url, compile(tagre("li", "class", r'nav_older') +
|
|
||||||
tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')))
|
|
||||||
stripUrl = 'http://mg.co.za/cartoon/%s'
|
stripUrl = 'http://mg.co.za/cartoon/%s'
|
||||||
firstStripUrl = stripUrl % 'zapiro_681'
|
firstStripUrl = stripUrl % 'zapiro_681'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
||||||
prevSearch = compile(tagre("li", "class", r'nav_older') +
|
prevSearch = compile(tagre("li", "class", r'nav_older') +
|
||||||
tagre("a", "href",
|
tagre("a", "href",
|
||||||
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
||||||
|
nextSearch = compile(tagre("li", "class", r'nav_older') +
|
||||||
|
tagre("a", "href",
|
||||||
|
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
||||||
help = 'Index format: yyyy-mm-dd-stripname'
|
help = 'Index format: yyyy-mm-dd-stripname'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
Loading…
Reference in a new issue