Refactor: Convert starter to simple method.
This commit is contained in:
parent
1aebdce5d2
commit
0468f2f31a
30 changed files with 145 additions and 158 deletions
|
@ -30,32 +30,24 @@ def regexNamer(regex, usePageUrl=False):
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def bounceStarter():
|
def bounceStarter(self):
|
||||||
"""Get start URL by "bouncing" back and forth one time.
|
"""Get start URL by "bouncing" back and forth one time.
|
||||||
|
|
||||||
This needs the url and nextSearch properties be defined on the class.
|
This needs the url and nextSearch properties be defined on the class.
|
||||||
"""
|
"""
|
||||||
@classmethod
|
data = self.getPage(self.url)
|
||||||
def _starter(cls):
|
url1 = self.fetchUrl(self.url, data, self.prevSearch)
|
||||||
"""Get bounced start URL."""
|
data = self.getPage(url1)
|
||||||
data = cls.getPage(cls.url)
|
return self.fetchUrl(url1, data, self.nextSearch)
|
||||||
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
|
||||||
data = cls.getPage(url1)
|
|
||||||
return cls.fetchUrl(url1, data, cls.nextSearch)
|
|
||||||
return _starter
|
|
||||||
|
|
||||||
|
|
||||||
def indirectStarter():
|
def indirectStarter(self):
|
||||||
"""Get start URL by indirection.
|
"""Get start URL by indirection.
|
||||||
|
|
||||||
This is useful for comics where the latest comic can't be reached at a
|
This is useful for comics where the latest comic can't be reached at a
|
||||||
stable URL. If the class has an attribute 'startUrl', this page is fetched
|
stable URL. If the class has an attribute 'startUrl', this page is fetched
|
||||||
first, otherwise the page at 'url' is fetched. After that, the attribute
|
first, otherwise the page at 'url' is fetched. After that, the attribute
|
||||||
'latestSearch' is used on the page content to find the latest strip."""
|
'latestSearch' is used on the page content to find the latest strip."""
|
||||||
@classmethod
|
url = self.startUrl if hasattr(self, "startUrl") else self.url
|
||||||
def _starter(cls):
|
data = self.getPage(url)
|
||||||
"""Get indirect start URL."""
|
return self.fetchUrl(url, data, self.latestSearch)
|
||||||
url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
|
|
||||||
data = cls.getPage(url)
|
|
||||||
return cls.fetchUrl(url, data, cls.latestSearch)
|
|
||||||
return _starter
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
|
||||||
class AbstruseGoose(_BasicScraper):
|
class AbstruseGoose(_BasicScraper):
|
||||||
url = 'http://abstrusegoose.com/'
|
url = 'http://abstrusegoose.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre('img', 'src',
|
imageSearch = compile(tagre('img', 'src',
|
||||||
|
@ -122,7 +122,7 @@ class Alice(_WordPressScraper):
|
||||||
url = 'http://www.alicecomics.com/'
|
url = 'http://www.alicecomics.com/'
|
||||||
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
||||||
latestSearch = '//a[text()="Latest Alice!"]'
|
latestSearch = '//a[text()="Latest Alice!"]'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class AlienLovesPredator(_BasicScraper):
|
class AlienLovesPredator(_BasicScraper):
|
||||||
|
@ -262,7 +262,7 @@ class Ashes(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
|
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class ASkeweredParadise(_BasicScraper):
|
class ASkeweredParadise(_BasicScraper):
|
||||||
|
@ -287,7 +287,7 @@ class ASofterWorld(_ParserScraper):
|
||||||
class AstronomyPOTD(_ParserScraper):
|
class AstronomyPOTD(_ParserScraper):
|
||||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||||
url = baseUrl + 'astropix.html'
|
url = baseUrl + 'astropix.html'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = baseUrl + 'ap%s.html'
|
stripUrl = baseUrl + 'ap%s.html'
|
||||||
firstStripUrl = stripUrl % '061012'
|
firstStripUrl = stripUrl % '061012'
|
||||||
imageSearch = '//a/img'
|
imageSearch = '//a/img'
|
||||||
|
|
|
@ -48,7 +48,7 @@ class BalderDash(_ComicControlScraper):
|
||||||
class Bardsworth(_WordPressScraper):
|
class Bardsworth(_WordPressScraper):
|
||||||
url = 'http://www.bardsworth.com/'
|
url = 'http://www.bardsworth.com/'
|
||||||
latestSearch = '//a[@rel="bookmark"]'
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class Baroquen(_BasicScraper):
|
class Baroquen(_BasicScraper):
|
||||||
|
@ -72,7 +72,7 @@ class Beetlebum(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
|
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href',
|
prevSearch = compile(tagre('a', 'href',
|
||||||
|
@ -227,7 +227,7 @@ class BoredAndEvil(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
|
||||||
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
||||||
latestSearch = prevSearch
|
latestSearch = prevSearch
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
|
||||||
after="prevarea"))
|
after="prevarea"))
|
||||||
latestSearch = compile(tagre("a", "href",
|
latestSearch = compile(tagre("a", "href",
|
||||||
r'(view\.php\?page=[0-9]+[^"]*)'))
|
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -275,7 +275,7 @@ class CoolCatStudio(_BasicScraper):
|
||||||
|
|
||||||
class CorydonCafe(_ParserScraper):
|
class CorydonCafe(_ParserScraper):
|
||||||
url = 'http://corydoncafe.com/'
|
url = 'http://corydoncafe.com/'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url + '%s.php'
|
stripUrl = url + '%s.php'
|
||||||
imageSearch = "//center[2]//img"
|
imageSearch = "//center[2]//img"
|
||||||
prevSearch = '//a[@title="prev"]'
|
prevSearch = '//a[@title="prev"]'
|
||||||
|
@ -347,7 +347,7 @@ class CucumberQuest(_BasicScraper):
|
||||||
stripUrl = url + 'cq/%s/'
|
stripUrl = url + 'cq/%s/'
|
||||||
firstStripUrl = stripUrl % 'page-1'
|
firstStripUrl = stripUrl % 'page-1'
|
||||||
startUrl = url + 'recent.html'
|
startUrl = url + 'recent.html'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
||||||
|
@ -379,7 +379,7 @@ class Curvy(_ParserScraper):
|
||||||
|
|
||||||
class CyanideAndHappiness(_BasicScraper):
|
class CyanideAndHappiness(_BasicScraper):
|
||||||
url = 'http://www.explosm.net/comics/'
|
url = 'http://www.explosm.net/comics/'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '15'
|
firstStripUrl = stripUrl % '15'
|
||||||
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
|
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
|
||||||
|
|
|
@ -1,7 +1,12 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import make_scraper
|
from ..scraper import make_scraper
|
||||||
from ..util import tagre, getQueryParams
|
from ..util import tagre, getQueryParams
|
||||||
|
|
||||||
|
@ -11,6 +16,7 @@ _prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
|
||||||
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
|
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
|
||||||
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
|
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
|
||||||
|
|
||||||
|
|
||||||
def add(name, shortName, imageFolder=None, lastStrip=None):
|
def add(name, shortName, imageFolder=None, lastStrip=None):
|
||||||
classname = 'CloneManga_%s' % name
|
classname = 'CloneManga_%s' % name
|
||||||
_url = 'http://manga.clone-army.org'
|
_url = 'http://manga.clone-army.org'
|
||||||
|
@ -22,22 +28,21 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
||||||
|
|
||||||
@classmethod
|
def _starter(self):
|
||||||
def _starter(cls):
|
|
||||||
# first, try hopping to previous and next comic
|
# first, try hopping to previous and next comic
|
||||||
data = cls.getPage(baseUrl)
|
data = self.getPage(baseUrl)
|
||||||
try:
|
try:
|
||||||
url = cls.fetchUrl(baseUrl, data, _prevSearch)
|
url = self.fetchUrl(baseUrl, data, _prevSearch)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# no previous link found, try hopping to last comic
|
# no previous link found, try hopping to last comic
|
||||||
return cls.fetchUrl(baseUrl, data, _lastSearch)
|
return self.fetchUrl(baseUrl, data, _lastSearch)
|
||||||
else:
|
else:
|
||||||
data = cls.getPage(url)
|
data = self.getPage(url)
|
||||||
return cls.fetchUrl(url, data, _nextSearch)
|
return self.fetchUrl(url, data, _nextSearch)
|
||||||
|
|
||||||
attrs = dict(
|
attrs = dict(
|
||||||
name='CloneManga/' + name,
|
name='CloneManga/' + name,
|
||||||
stripUrl = baseUrl + '?page=%s',
|
stripUrl=baseUrl + '?page=%s',
|
||||||
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
|
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
|
||||||
prevSearch=_prevSearch,
|
prevSearch=_prevSearch,
|
||||||
help='Index format: n',
|
help='Index format: n',
|
||||||
|
|
|
@ -20,7 +20,7 @@ class _ComicFury(_ParserScraper):
|
||||||
prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous")
|
prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous")
|
||||||
nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next")
|
nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next")
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
|
|
@ -18,11 +18,10 @@ class _Creators(_ParserScraper):
|
||||||
def getName(cls):
|
def getName(cls):
|
||||||
return 'Creators/' + cls.__name__
|
return 'Creators/' + cls.__name__
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
start = self.url + self.path
|
||||||
start = cls.url + cls.path
|
data = self.getPage(start)
|
||||||
data = cls.getPage(start)
|
return self.fetchUrl(start, data, self.latestSearch)
|
||||||
return cls.fetchUrl(start, data, cls.latestSearch)
|
|
||||||
|
|
||||||
|
|
||||||
class _CreatorsEs(_Creators):
|
class _CreatorsEs(_Creators):
|
||||||
|
|
|
@ -15,7 +15,7 @@ from .common import _WordPressScraper, xpath_class
|
||||||
|
|
||||||
class DailyDose(_ParserScraper):
|
class DailyDose(_ParserScraper):
|
||||||
url = 'http://dailydoseofcomics.com/'
|
url = 'http://dailydoseofcomics.com/'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = '//p/a/img'
|
imageSearch = '//p/a/img'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
latestSearch = '//a[@rel="bookmark"]'
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
|
@ -32,7 +32,7 @@ class DamnLol(_BasicScraper):
|
||||||
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
||||||
)
|
)
|
||||||
help = 'Index format: stripname-number'
|
help = 'Index format: stripname-number'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -155,7 +155,7 @@ class Dilbert(_BasicScraper):
|
||||||
url = 'http://dilbert.com/'
|
url = 'http://dilbert.com/'
|
||||||
stripUrl = url + '/strip/%s/'
|
stripUrl = url + '/strip/%s/'
|
||||||
firstStripUrl = stripUrl % '1989-04-16'
|
firstStripUrl = stripUrl % '1989-04-16'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
|
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
|
||||||
latestSearch = compile(tagre("a", "href",
|
latestSearch = compile(tagre("a", "href",
|
||||||
|
@ -255,7 +255,7 @@ class DresdenCodak(_BasicScraper):
|
||||||
latestSearch = compile(tagre("div", "id", "preview") +
|
latestSearch = compile(tagre("div", "id", "preview") +
|
||||||
tagre("a", "href",
|
tagre("a", "href",
|
||||||
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
|
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class DrFun(_BasicScraper):
|
class DrFun(_BasicScraper):
|
||||||
|
|
|
@ -1,18 +1,24 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import make_scraper
|
from ..scraper import make_scraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
# note: adding the compile() functions inside add() is a major performance hog
|
# note: adding the compile() functions inside add() is a major performance hog
|
||||||
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
|
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
|
||||||
_linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)')
|
_linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)')
|
||||||
_prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev"))
|
_prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev"))
|
||||||
_nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next"))
|
_nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next"))
|
||||||
_lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last"))
|
_lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last"))
|
||||||
|
|
||||||
|
|
||||||
def add(name, path):
|
def add(name, path):
|
||||||
# XXX disallowed by the server administrator
|
# XXX disallowed by the server administrator
|
||||||
classname = '_DrunkDuck_%s' % name
|
classname = '_DrunkDuck_%s' % name
|
||||||
|
@ -24,18 +30,17 @@ def add(name, path):
|
||||||
ext = imageUrl.rsplit('.')[-1]
|
ext = imageUrl.rsplit('.')[-1]
|
||||||
return '%d.%s' % (index, ext)
|
return '%d.%s' % (index, ext)
|
||||||
|
|
||||||
@classmethod
|
def _starter(self):
|
||||||
def _starter(cls):
|
|
||||||
# first, try hopping to previous and next comic
|
# first, try hopping to previous and next comic
|
||||||
data = cls.getPage(_url)
|
data = self.getPage(_url)
|
||||||
try:
|
try:
|
||||||
url = cls.fetchUrl(_url, data, _prevSearch)
|
url = self.fetchUrl(_url, data, _prevSearch)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# no previous link found, try hopping to last comic
|
# no previous link found, try hopping to last comic
|
||||||
return cls.fetchUrl(_url, data, _lastSearch)
|
return self.fetchUrl(_url, data, _lastSearch)
|
||||||
else:
|
else:
|
||||||
data = cls.getPage(url)
|
data = self.getPage(url)
|
||||||
return cls.fetchUrl(url, data, _nextSearch)
|
return self.fetchUrl(url, data, _nextSearch)
|
||||||
|
|
||||||
attrs = dict(
|
attrs = dict(
|
||||||
name = 'DrunkDuck/' + name,
|
name = 'DrunkDuck/' + name,
|
||||||
|
|
8
dosagelib/plugins/e.py
Executable file → Normal file
8
dosagelib/plugins/e.py
Executable file → Normal file
|
@ -15,7 +15,7 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
|
||||||
|
|
||||||
class EarthsongSaga(_ParserScraper):
|
class EarthsongSaga(_ParserScraper):
|
||||||
url = 'http://earthsongsaga.com/index.php'
|
url = 'http://earthsongsaga.com/index.php'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[@title="Previous"]'
|
prevSearch = '//a[@title="Previous"]'
|
||||||
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
|
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
|
||||||
|
@ -45,13 +45,13 @@ class EasilyAmused(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
|
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class EatLiver(_BasicScraper):
|
class EatLiver(_BasicScraper):
|
||||||
url = 'http://www.eatliver.com/'
|
url = 'http://www.eatliver.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url + "i.php?n=%s"
|
stripUrl = url + "i.php?n=%s"
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
||||||
|
@ -185,7 +185,7 @@ class Eryl(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
|
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'This was known as DarkWings in previous Dosage versions'
|
help = 'This was known as DarkWings in previous Dosage versions'
|
||||||
|
|
||||||
|
|
||||||
|
|
6
dosagelib/plugins/f.py
Executable file → Normal file
6
dosagelib/plugins/f.py
Executable file → Normal file
|
@ -28,7 +28,7 @@ class FantasyRealms(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
||||||
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
|
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class FauxPas(_BasicScraper):
|
class FauxPas(_BasicScraper):
|
||||||
|
@ -48,7 +48,7 @@ class FeyWinds(_BasicScraper):
|
||||||
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
||||||
latestSearch = compile(r'(comic/page.php\?id.+?)"')
|
latestSearch = compile(r'(comic/page.php\?id.+?)"')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class FilibusterCartoons(_BasicScraper):
|
class FilibusterCartoons(_BasicScraper):
|
||||||
|
@ -161,7 +161,7 @@ class FredoAndPidjin(_BasicScraper):
|
||||||
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
||||||
latestSearch = compile(tagre('a', 'href', "(" + url +
|
latestSearch = compile(tagre('a', 'href', "(" + url +
|
||||||
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
|
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class Freefall(_BasicScraper):
|
class Freefall(_BasicScraper):
|
||||||
|
|
|
@ -27,7 +27,7 @@ class Garanos(_BasicScraper):
|
||||||
baseUrl = 'http://garanos.alexheberling.com/'
|
baseUrl = 'http://garanos.alexheberling.com/'
|
||||||
rurl = escape(baseUrl)
|
rurl = escape(baseUrl)
|
||||||
url = baseUrl + 'pages/page-1/'
|
url = baseUrl + 'pages/page-1/'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = baseUrl + 'pages/page-%s'
|
stripUrl = baseUrl + 'pages/page-%s'
|
||||||
imageSearch = compile(
|
imageSearch = compile(
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
|
@ -136,7 +136,7 @@ class GoGetARoomie(_ComicControlScraper):
|
||||||
|
|
||||||
class GoneWithTheBlastwave(_BasicScraper):
|
class GoneWithTheBlastwave(_BasicScraper):
|
||||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url[:-1] + '%s'
|
stripUrl = url[:-1] + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
||||||
|
|
|
@ -20,13 +20,12 @@ class _GoComics(_ParserScraper):
|
||||||
def getName(cls):
|
def getName(cls):
|
||||||
return 'GoComics/' + cls.__name__[2:]
|
return 'GoComics/' + cls.__name__[2:]
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
url1 = self.url + self.path
|
||||||
url1 = cls.url + cls.path
|
data = self.getPage(url1)
|
||||||
data = cls.getPage(url1)
|
url2 = self.fetchUrl(url1, data, self.prevSearch)
|
||||||
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
|
data = self.getPage(url2)
|
||||||
data = cls.getPage(url2)
|
return self.fetchUrl(url2, data, self.nextSearch)
|
||||||
return cls.fetchUrl(url2, data, cls.nextSearch)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, image_url, page_url):
|
def namer(cls, image_url, page_url):
|
||||||
|
|
|
@ -22,13 +22,12 @@ class HagarTheHorrible(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
|
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
|
||||||
"""Return last gallery link."""
|
"""Return last gallery link."""
|
||||||
url = 'http://www.hagardunor.net/comics.php'
|
url = 'http://www.hagardunor.net/comics.php'
|
||||||
data = cls.getPage(url)
|
data = self.getPage(url)
|
||||||
pattern = compile(tagre("a", "href", cls.prevUrl))
|
pattern = compile(tagre("a", "href", self.prevUrl))
|
||||||
for starturl in cls.fetchUrls(url, data, pattern):
|
for starturl in self.fetchUrls(url, data, pattern):
|
||||||
pass
|
pass
|
||||||
return starturl
|
return starturl
|
||||||
|
|
||||||
|
@ -41,7 +40,7 @@ class _HappyJar(_WordPressScraper):
|
||||||
class HarkAVagrant(_BasicScraper):
|
class HarkAVagrant(_BasicScraper):
|
||||||
url = 'http://www.harkavagrant.com/'
|
url = 'http://www.harkavagrant.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = url + 'index.php?id=%s'
|
stripUrl = url + 'index.php?id=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
||||||
|
|
|
@ -34,7 +34,7 @@ class JerkCity(_BasicScraper):
|
||||||
class JimBenton(_BasicScraper):
|
class JimBenton(_BasicScraper):
|
||||||
url = 'http://www.jimbenton.com/page14/page14.html'
|
url = 'http://www.jimbenton.com/page14/page14.html'
|
||||||
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
|
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
|
||||||
before="photo-frame"))
|
before="photo-frame"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
|
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
|
||||||
|
|
|
@ -24,7 +24,7 @@ class Lackadaisy(_BasicScraper):
|
||||||
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
|
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
|
||||||
"Next")
|
"Next")
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -38,7 +38,7 @@ class Laiyu(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
|
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class LasLindas(_BasicScraper):
|
class LasLindas(_BasicScraper):
|
||||||
|
@ -67,7 +67,7 @@ class LeastICouldDo(_BasicScraper):
|
||||||
after="Previous"))
|
after="Previous"))
|
||||||
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
||||||
after="feature-comic"))
|
after="feature-comic"))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,5 +117,5 @@ class LookingForGroup(_ParserScraper):
|
||||||
imageSearch = '#comic img'
|
imageSearch = '#comic img'
|
||||||
prevSearch = '#comic-left > a'
|
prevSearch = '#comic-left > a'
|
||||||
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
|
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
|
@ -104,7 +104,7 @@ class NichtLustig(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||||
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
|
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class Nicky510(_WordPressScraper):
|
class Nicky510(_WordPressScraper):
|
||||||
|
@ -137,7 +137,7 @@ class NoMoreSavePoints(_WordPressScraper):
|
||||||
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
|
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class NoNeedForBushido(_BasicScraper):
|
class NoNeedForBushido(_BasicScraper):
|
||||||
|
@ -153,7 +153,7 @@ class NoNeedForBushido(_BasicScraper):
|
||||||
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||||
after="last-webcomic"))
|
after="last-webcomic"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class NotInventedHere(_BasicScraper):
|
class NotInventedHere(_BasicScraper):
|
||||||
|
|
|
@ -11,9 +11,8 @@ class _NuklearPower(_ParserScraper):
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
imageSearch = '//div[@id="comic"]/img'
|
imageSearch = '//div[@id="comic"]/img'
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
return self.url + self.path + '/'
|
||||||
return cls.url + cls.path + '/'
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getName(cls):
|
def getName(cls):
|
||||||
|
|
20
dosagelib/plugins/p.py
Executable file → Normal file
20
dosagelib/plugins/p.py
Executable file → Normal file
|
@ -20,7 +20,7 @@ class PandyLand(_WordPressScraper):
|
||||||
|
|
||||||
class ParadigmShift(_BasicScraper):
|
class ParadigmShift(_BasicScraper):
|
||||||
url = 'http://www.paradigmshiftmanga.com/'
|
url = 'http://www.paradigmshiftmanga.com/'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url + 'ps/%s.html'
|
stripUrl = url + 'ps/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
@ -86,6 +86,7 @@ class PennyArcade(_BasicScraper):
|
||||||
before="btnPrev"))
|
before="btnPrev"))
|
||||||
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
before="btnNext"))
|
before="btnNext"))
|
||||||
|
starter = bounceStarter
|
||||||
help = 'Index format: yyyy/mm/dd/'
|
help = 'Index format: yyyy/mm/dd/'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -99,15 +100,6 @@ class PennyArcade(_BasicScraper):
|
||||||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def starter(cls):
|
|
||||||
"""Get bounced start URL."""
|
|
||||||
data = cls.getPage(cls.url)
|
|
||||||
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
|
||||||
data = cls.getPage(url1)
|
|
||||||
url2 = cls.fetchUrl(url1, data, cls.nextSearch)
|
|
||||||
return cls.prevUrlModifier(url2)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
p = pageUrl.split('/')
|
p = pageUrl.split('/')
|
||||||
|
@ -162,7 +154,7 @@ class PicPakDog(_BasicScraper):
|
||||||
|
|
||||||
class PiledHigherAndDeeper(_BasicScraper):
|
class PiledHigherAndDeeper(_BasicScraper):
|
||||||
url = 'http://www.phdcomics.com/comics.php'
|
url = 'http://www.phdcomics.com/comics.php'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = url + '?comicid=%s'
|
stripUrl = url + '?comicid=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
|
||||||
|
@ -207,7 +199,7 @@ class PokeyThePenguin(_ParserScraper):
|
||||||
imageSearch = '//p/img'
|
imageSearch = '//p/img'
|
||||||
latestSearch = '(//a)[last()]'
|
latestSearch = '(//a)[last()]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
|
@ -231,7 +223,7 @@ class PoorlyDrawnLines(_BasicScraper):
|
||||||
|
|
||||||
class Precocious(_BasicScraper):
|
class Precocious(_BasicScraper):
|
||||||
url = 'http://www.precociouscomic.com/'
|
url = 'http://www.precociouscomic.com/'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url + 'archive/comic/%s'
|
stripUrl = url + 'archive/comic/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
||||||
|
@ -243,7 +235,7 @@ class Precocious(_BasicScraper):
|
||||||
class PS238(_ParserScraper):
|
class PS238(_ParserScraper):
|
||||||
url = 'http://ps238.nodwick.com/'
|
url = 'http://ps238.nodwick.com/'
|
||||||
stripUrl = url + '/comic/%s/'
|
stripUrl = url + '/comic/%s/'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
|
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
|
||||||
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
|
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
|
||||||
|
|
|
@ -51,7 +51,7 @@ class RealmOfAtland(_BasicScraper):
|
||||||
class RedMeat(_BasicScraper):
|
class RedMeat(_BasicScraper):
|
||||||
baseUrl = 'http://www.redmeat.com/redmeat/'
|
baseUrl = 'http://www.redmeat.com/redmeat/'
|
||||||
url = baseUrl + 'current/index.html'
|
url = baseUrl + 'current/index.html'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = baseUrl + '%s/index.html'
|
stripUrl = baseUrl + '%s/index.html'
|
||||||
firstStripUrl = stripUrl % '1996-06-10'
|
firstStripUrl = stripUrl % '1996-06-10'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
|
||||||
|
|
|
@ -27,13 +27,12 @@ class SabrinaOnline(_BasicScraper):
|
||||||
adult = True
|
adult = True
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
|
||||||
"""Pick last one in a list of archive pages."""
|
"""Pick last one in a list of archive pages."""
|
||||||
archive = cls.url + 'archive.html'
|
archive = self.url + 'archive.html'
|
||||||
data = cls.getPage(archive)
|
data = self.getPage(archive)
|
||||||
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
|
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
|
||||||
archivepages = cls.fetchUrls(archive, data, search)
|
archivepages = self.fetchUrls(archive, data, search)
|
||||||
return archivepages[-1]
|
return archivepages[-1]
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,7 +68,7 @@ class ScandinaviaAndTheWorld(_ParserScraper):
|
||||||
url = 'http://satwcomic.com/'
|
url = 'http://satwcomic.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = '//img[@itemprop="image"]'
|
imageSearch = '//img[@itemprop="image"]'
|
||||||
prevSearch = '//a[@accesskey="p"]'
|
prevSearch = '//a[@accesskey="p"]'
|
||||||
latestSearch = '//a[text()="View latest comic"]'
|
latestSearch = '//a[text()="View latest comic"]'
|
||||||
|
@ -166,14 +165,13 @@ class ScurryAndCover(_ParserScraper):
|
||||||
image = images[0]
|
image = images[0]
|
||||||
return [cls.url + '/images/pages/' + image + '-xsmall.png']
|
return [cls.url + '/images/pages/' + image + '-xsmall.png']
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
|
||||||
"""Go forward as far as possibe, then start."""
|
"""Go forward as far as possibe, then start."""
|
||||||
url = cls.url
|
url = self.url
|
||||||
while True:
|
while True:
|
||||||
data = cls.getPage(url)
|
data = self.getPage(url)
|
||||||
try:
|
try:
|
||||||
url = cls.fetchUrl(url, data, cls.nextSearch)
|
url = self.fetchUrl(url, data, self.nextSearch)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
break
|
break
|
||||||
return url
|
return url
|
||||||
|
@ -197,7 +195,7 @@ class SexyLosers(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
||||||
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
|
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -334,7 +332,7 @@ class SnowFlame(_WordPressScraper):
|
||||||
url = 'http://www.snowflamecomic.com/'
|
url = 'http://www.snowflamecomic.com/'
|
||||||
stripUrl = url + '?comic=snowflame-%s-%s'
|
stripUrl = url + '?comic=snowflame-%s-%s'
|
||||||
firstStripUrl = stripUrl % ('01', '01')
|
firstStripUrl = stripUrl % ('01', '01')
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
nextSearch = WP_LATEST_SEARCH
|
nextSearch = WP_LATEST_SEARCH
|
||||||
help = 'Index format: chapter-page'
|
help = 'Index format: chapter-page'
|
||||||
|
|
||||||
|
@ -396,7 +394,7 @@ class Spamusement(_BasicScraper):
|
||||||
IGNORECASE)
|
IGNORECASE)
|
||||||
latestSearch = prevSearch
|
latestSearch = prevSearch
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class SpareParts(_BasicScraper):
|
class SpareParts(_BasicScraper):
|
||||||
|
@ -507,7 +505,7 @@ class StuffNoOneToldMe(_BasicScraper):
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
firstStripUrl = stripUrl % '2010/05/01'
|
firstStripUrl = stripUrl % '2010/05/01'
|
||||||
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
||||||
r"(?:</a>|<br />)"),
|
r"(?:</a>|<br />)"),
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import make_scraper
|
from ..scraper import make_scraper
|
||||||
from ..util import tagre, quote, case_insensitive_re
|
from ..util import tagre, quote, case_insensitive_re
|
||||||
|
|
||||||
|
|
||||||
# SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
|
# SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
|
||||||
# That's why there are so much different search patterns.
|
# That's why there are so much different search patterns.
|
||||||
|
|
||||||
|
@ -31,6 +37,7 @@ _nextSearch = (
|
||||||
compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")),
|
compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def add(name, url, adult, bounce):
|
def add(name, url, adult, bounce):
|
||||||
classname = 'SmackJeeves_' + name
|
classname = 'SmackJeeves_' + name
|
||||||
|
|
||||||
|
@ -41,15 +48,14 @@ def add(name, url, adult, bounce):
|
||||||
return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl)
|
return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl)
|
||||||
return pageUrl
|
return pageUrl
|
||||||
|
|
||||||
@classmethod
|
def _starter(self):
|
||||||
def _starter(cls):
|
|
||||||
"""Get start URL."""
|
"""Get start URL."""
|
||||||
url1 = modifier(url)
|
url1 = modifier(url)
|
||||||
data = cls.getPage(url1)
|
data = self.getPage(url1)
|
||||||
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
|
url2 = self.fetchUrl(url1, data, self.prevSearch)
|
||||||
if bounce:
|
if bounce:
|
||||||
data = cls.getPage(url2)
|
data = self.getPage(url2)
|
||||||
url3 = cls.fetchUrl(url2, data, _nextSearch)
|
url3 = self.fetchUrl(url2, data, _nextSearch)
|
||||||
return modifier(url3)
|
return modifier(url3)
|
||||||
return modifier(url2)
|
return modifier(url2)
|
||||||
|
|
||||||
|
@ -76,7 +82,8 @@ def add(name, url, adult, bounce):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
# do not edit anything below since these entries are generated from
|
||||||
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# DO NOT REMOVE
|
||||||
add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True)
|
add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True)
|
||||||
add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False)
|
add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False)
|
||||||
|
@ -110,7 +117,7 @@ add('AlwaysRainingHere', 'http://alwaysraininghere.smackjeeves.com/comics/', Fal
|
||||||
add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True)
|
add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True)
|
||||||
add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True)
|
add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True)
|
||||||
add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True)
|
add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True)
|
||||||
#add('Amya', 'http://amya.smackjeeves.com/comics/', False, True)
|
# add('Amya', 'http://amya.smackjeeves.com/comics/', False, True)
|
||||||
add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True)
|
add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True)
|
||||||
add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True)
|
add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True)
|
||||||
add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True)
|
add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True)
|
||||||
|
@ -176,7 +183,7 @@ add('Cambion', 'http://cambion.smackjeeves.com/comics/', True, True)
|
||||||
add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True)
|
add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True)
|
||||||
add('Captor', 'http://captor.smackjeeves.com/comics/', False, True)
|
add('Captor', 'http://captor.smackjeeves.com/comics/', False, True)
|
||||||
add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True)
|
add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True)
|
||||||
#add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True)
|
# add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True)
|
||||||
add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True)
|
add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True)
|
||||||
add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True)
|
add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True)
|
||||||
add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True)
|
add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True)
|
||||||
|
@ -281,7 +288,7 @@ add('FinalArcanum', 'http://finalarcanum.smackjeeves.com/comics/', False, True)
|
||||||
add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True)
|
add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True)
|
||||||
add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True)
|
add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True)
|
||||||
add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False)
|
add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False)
|
||||||
#add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True)
|
# add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True)
|
||||||
add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True)
|
add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True)
|
||||||
add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False)
|
add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False)
|
||||||
add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True)
|
add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True)
|
||||||
|
@ -474,7 +481,7 @@ add('MythsofUnovaAWhiteNuzlockeRunHardMode', 'http://mythsofunova.smackjeeves.co
|
||||||
add('NIK', 'http://nik.smackjeeves.com/comics/', False, True)
|
add('NIK', 'http://nik.smackjeeves.com/comics/', False, True)
|
||||||
add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True)
|
add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True)
|
||||||
add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True)
|
add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True)
|
||||||
#add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True)
|
# add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True)
|
||||||
add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True)
|
add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True)
|
||||||
add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True)
|
add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True)
|
||||||
add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True)
|
add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True)
|
||||||
|
@ -766,7 +773,7 @@ add('WhenSheWasBad', 'http://whenshewasbad.smackjeeves.com/comics/', False, True
|
||||||
add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False)
|
add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False)
|
||||||
add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True)
|
add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True)
|
||||||
add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False)
|
add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False)
|
||||||
#add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True)
|
# add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True)
|
||||||
add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True)
|
add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True)
|
||||||
add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True)
|
add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True)
|
||||||
add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True)
|
add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True)
|
||||||
|
|
15
dosagelib/plugins/t.py
Executable file → Normal file
15
dosagelib/plugins/t.py
Executable file → Normal file
|
@ -83,7 +83,7 @@ class TheOrderOfTheStick(_BasicScraper):
|
||||||
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
||||||
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
|
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
@ -122,7 +122,7 @@ class TheThinHLine(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
||||||
latestSearch = compile(tagre("a", "href", r'([^"]+)',
|
latestSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
after='class="timestamp"'))
|
after='class="timestamp"'))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
||||||
|
@ -180,17 +180,14 @@ class ThreePanelSoul(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class ThunderAndLightning(_BasicScraper):
|
class ThunderAndLightning(_BasicScraper):
|
||||||
url = 'http://www.talcomic.com/wp/'
|
baseUrl = 'http://www.talcomic.com/wp/'
|
||||||
rurl = escape(url)
|
url = baseUrl + '?latestcomic'
|
||||||
stripUrl = url + '%s/'
|
rurl = escape(baseUrl)
|
||||||
|
stripUrl = baseUrl + '%s/'
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
help = 'Index format: yyyy/mm/dd/page-nn'
|
help = 'Index format: yyyy/mm/dd/page-nn'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def starter(cls):
|
|
||||||
return cls.url + '?latestcomic'
|
|
||||||
|
|
||||||
|
|
||||||
class TinyKittenTeeth(_BasicScraper):
|
class TinyKittenTeeth(_BasicScraper):
|
||||||
url = 'http://www.tinykittenteeth.com/'
|
url = 'http://www.tinykittenteeth.com/'
|
||||||
|
|
|
@ -24,7 +24,7 @@ class Undertow(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
||||||
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
||||||
latestSearch = compile(r'href="(.+?)".+?Most recent page')
|
latestSearch = compile(r'href="(.+?)".+?Most recent page')
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class UnicornJelly(_BasicScraper):
|
class UnicornJelly(_BasicScraper):
|
||||||
|
@ -47,7 +47,7 @@ class Unsounded(_BasicScraper):
|
||||||
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
r"%simages/newpages\.png" % rurl))
|
r"%simages/newpages\.png" % rurl))
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
help = 'Index format: chapter-number'
|
help = 'Index format: chapter-number'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
|
|
|
@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
|
||||||
class WebDesignerCOTW(_BasicScraper):
|
class WebDesignerCOTW(_BasicScraper):
|
||||||
url = 'http://www.webdesignerdepot.com/'
|
url = 'http://www.webdesignerdepot.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = indirectStarter()
|
starter = indirectStarter
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
|
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
|
@ -211,10 +211,9 @@ class WormWorldSaga(_BasicScraper):
|
||||||
latestChapter = 5
|
latestChapter = 5
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
@classmethod
|
def starter(self):
|
||||||
def starter(cls):
|
|
||||||
return '%schapters/chapter%02d/%s/index.php' % (
|
return '%schapters/chapter%02d/%s/index.php' % (
|
||||||
cls.url, cls.latestChapter, cls.lang.upper())
|
self.url, self.latestChapter, self.lang.upper())
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
"""Find previous URL."""
|
"""Find previous URL."""
|
||||||
|
|
|
@ -6,15 +6,11 @@
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||||
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
class _WebcomicFactory(_WordPressScraper):
|
class _WebcomicFactory(_WordPressScraper):
|
||||||
@classmethod
|
starter = indirectStarter
|
||||||
def starter(cls):
|
latestSearch = WP_LATEST_SEARCH
|
||||||
"""this is basically helpers.indirectStarter, but dynamically selecting
|
|
||||||
the right parameters."""
|
|
||||||
data = cls.getPage(cls.firstStripUrl)
|
|
||||||
return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH)
|
|
||||||
|
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
|
|
|
@ -13,7 +13,7 @@ class _WLPComics(_ParserScraper):
|
||||||
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
||||||
prevSearch = '//a[contains(text(), "Previous ")]'
|
prevSearch = '//a[contains(text(), "Previous ")]'
|
||||||
nextSearch = '//a[contains(text(), "Next ")]'
|
nextSearch = '//a[contains(text(), "Next ")]'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -9,7 +9,7 @@ def add(name, start):
|
||||||
name=name,
|
name=name,
|
||||||
url='http://hijinksensue.com/',
|
url='http://hijinksensue.com/',
|
||||||
latestSearch=start,
|
latestSearch=start,
|
||||||
starter=indirectStarter()
|
starter=indirectStarter
|
||||||
)
|
)
|
||||||
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ..util import tagre
|
||||||
class Xkcd(_BasicScraper):
|
class Xkcd(_BasicScraper):
|
||||||
name = 'xkcd'
|
name = 'xkcd'
|
||||||
url = 'http://xkcd.com/'
|
url = 'http://xkcd.com/'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src",
|
imageSearch = compile(tagre("img", "src",
|
||||||
|
|
|
@ -22,7 +22,7 @@ class ZapComic(_ParserScraper):
|
||||||
|
|
||||||
class Zapiro(_BasicScraper):
|
class Zapiro(_BasicScraper):
|
||||||
url = 'http://www.mg.co.za/zapiro/'
|
url = 'http://www.mg.co.za/zapiro/'
|
||||||
starter = bounceStarter()
|
starter = bounceStarter
|
||||||
stripUrl = 'http://mg.co.za/cartoon/%s'
|
stripUrl = 'http://mg.co.za/cartoon/%s'
|
||||||
firstStripUrl = stripUrl % 'zapiro_681'
|
firstStripUrl = stripUrl % 'zapiro_681'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
||||||
|
|
Loading…
Reference in a new issue