Refactor: Convert starter to simple method.
This commit is contained in:
parent
1aebdce5d2
commit
0468f2f31a
30 changed files with 145 additions and 158 deletions
|
@ -30,32 +30,24 @@ def regexNamer(regex, usePageUrl=False):
|
|||
return _namer
|
||||
|
||||
|
||||
def bounceStarter():
|
||||
def bounceStarter(self):
|
||||
"""Get start URL by "bouncing" back and forth one time.
|
||||
|
||||
This needs the url and nextSearch properties be defined on the class.
|
||||
"""
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
"""Get bounced start URL."""
|
||||
data = cls.getPage(cls.url)
|
||||
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
||||
data = cls.getPage(url1)
|
||||
return cls.fetchUrl(url1, data, cls.nextSearch)
|
||||
return _starter
|
||||
data = self.getPage(self.url)
|
||||
url1 = self.fetchUrl(self.url, data, self.prevSearch)
|
||||
data = self.getPage(url1)
|
||||
return self.fetchUrl(url1, data, self.nextSearch)
|
||||
|
||||
|
||||
def indirectStarter():
|
||||
def indirectStarter(self):
|
||||
"""Get start URL by indirection.
|
||||
|
||||
This is useful for comics where the latest comic can't be reached at a
|
||||
stable URL. If the class has an attribute 'startUrl', this page is fetched
|
||||
first, otherwise the page at 'url' is fetched. After that, the attribute
|
||||
'latestSearch' is used on the page content to find the latest strip."""
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
"""Get indirect start URL."""
|
||||
url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
|
||||
data = cls.getPage(url)
|
||||
return cls.fetchUrl(url, data, cls.latestSearch)
|
||||
return _starter
|
||||
url = self.startUrl if hasattr(self, "startUrl") else self.url
|
||||
data = self.getPage(url)
|
||||
return self.fetchUrl(url, data, self.latestSearch)
|
||||
|
|
|
@ -16,7 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
|
|||
class AbstruseGoose(_BasicScraper):
|
||||
url = 'http://abstrusegoose.com/'
|
||||
rurl = escape(url)
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre('img', 'src',
|
||||
|
@ -122,7 +122,7 @@ class Alice(_WordPressScraper):
|
|||
url = 'http://www.alicecomics.com/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
|
||||
latestSearch = '//a[text()="Latest Alice!"]'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class AlienLovesPredator(_BasicScraper):
|
||||
|
@ -262,7 +262,7 @@ class Ashes(_WordPressScraper):
|
|||
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
|
||||
firstStripUrl = url
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class ASkeweredParadise(_BasicScraper):
|
||||
|
@ -287,7 +287,7 @@ class ASofterWorld(_ParserScraper):
|
|||
class AstronomyPOTD(_ParserScraper):
|
||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||
url = baseUrl + 'astropix.html'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = baseUrl + 'ap%s.html'
|
||||
firstStripUrl = stripUrl % '061012'
|
||||
imageSearch = '//a/img'
|
||||
|
|
|
@ -48,7 +48,7 @@ class BalderDash(_ComicControlScraper):
|
|||
class Bardsworth(_WordPressScraper):
|
||||
url = 'http://www.bardsworth.com/'
|
||||
latestSearch = '//a[@rel="bookmark"]'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class Baroquen(_BasicScraper):
|
||||
|
@ -72,7 +72,7 @@ class Beetlebum(_BasicScraper):
|
|||
rurl = escape(url)
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
multipleImagesPerStrip = True
|
||||
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
||||
prevSearch = compile(tagre('a', 'href',
|
||||
|
@ -227,7 +227,7 @@ class BoredAndEvil(_BasicScraper):
|
|||
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
|
||||
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
||||
latestSearch = prevSearch
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
|
|||
after="prevarea"))
|
||||
latestSearch = compile(tagre("a", "href",
|
||||
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -275,7 +275,7 @@ class CoolCatStudio(_BasicScraper):
|
|||
|
||||
class CorydonCafe(_ParserScraper):
|
||||
url = 'http://corydoncafe.com/'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url + '%s.php'
|
||||
imageSearch = "//center[2]//img"
|
||||
prevSearch = '//a[@title="prev"]'
|
||||
|
@ -347,7 +347,7 @@ class CucumberQuest(_BasicScraper):
|
|||
stripUrl = url + 'cq/%s/'
|
||||
firstStripUrl = stripUrl % 'page-1'
|
||||
startUrl = url + 'recent.html'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = (
|
||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
||||
|
@ -379,7 +379,7 @@ class Curvy(_ParserScraper):
|
|||
|
||||
class CyanideAndHappiness(_BasicScraper):
|
||||
url = 'http://www.explosm.net/comics/'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '15'
|
||||
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre, getQueryParams
|
||||
|
||||
|
@ -11,6 +16,7 @@ _prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
|
|||
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
|
||||
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
|
||||
|
||||
|
||||
def add(name, shortName, imageFolder=None, lastStrip=None):
|
||||
classname = 'CloneManga_%s' % name
|
||||
_url = 'http://manga.clone-army.org'
|
||||
|
@ -22,18 +28,17 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
|
|||
def namer(cls, imageUrl, pageUrl):
|
||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
||||
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
def _starter(self):
|
||||
# first, try hopping to previous and next comic
|
||||
data = cls.getPage(baseUrl)
|
||||
data = self.getPage(baseUrl)
|
||||
try:
|
||||
url = cls.fetchUrl(baseUrl, data, _prevSearch)
|
||||
url = self.fetchUrl(baseUrl, data, _prevSearch)
|
||||
except ValueError:
|
||||
# no previous link found, try hopping to last comic
|
||||
return cls.fetchUrl(baseUrl, data, _lastSearch)
|
||||
return self.fetchUrl(baseUrl, data, _lastSearch)
|
||||
else:
|
||||
data = cls.getPage(url)
|
||||
return cls.fetchUrl(url, data, _nextSearch)
|
||||
data = self.getPage(url)
|
||||
return self.fetchUrl(url, data, _nextSearch)
|
||||
|
||||
attrs = dict(
|
||||
name='CloneManga/' + name,
|
||||
|
|
|
@ -20,7 +20,7 @@ class _ComicFury(_ParserScraper):
|
|||
prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous")
|
||||
nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next")
|
||||
help = 'Index format: n'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
|
|
@ -18,11 +18,10 @@ class _Creators(_ParserScraper):
|
|||
def getName(cls):
|
||||
return 'Creators/' + cls.__name__
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
start = cls.url + cls.path
|
||||
data = cls.getPage(start)
|
||||
return cls.fetchUrl(start, data, cls.latestSearch)
|
||||
def starter(self):
|
||||
start = self.url + self.path
|
||||
data = self.getPage(start)
|
||||
return self.fetchUrl(start, data, self.latestSearch)
|
||||
|
||||
|
||||
class _CreatorsEs(_Creators):
|
||||
|
|
|
@ -15,7 +15,7 @@ from .common import _WordPressScraper, xpath_class
|
|||
|
||||
class DailyDose(_ParserScraper):
|
||||
url = 'http://dailydoseofcomics.com/'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = '//p/a/img'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
latestSearch = '//a[@rel="bookmark"]'
|
||||
|
@ -32,7 +32,7 @@ class DamnLol(_BasicScraper):
|
|||
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
|
||||
)
|
||||
help = 'Index format: stripname-number'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -155,7 +155,7 @@ class Dilbert(_BasicScraper):
|
|||
url = 'http://dilbert.com/'
|
||||
stripUrl = url + '/strip/%s/'
|
||||
firstStripUrl = stripUrl % '1989-04-16'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
|
||||
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
|
||||
latestSearch = compile(tagre("a", "href",
|
||||
|
@ -255,7 +255,7 @@ class DresdenCodak(_BasicScraper):
|
|||
latestSearch = compile(tagre("div", "id", "preview") +
|
||||
tagre("a", "href",
|
||||
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class DrFun(_BasicScraper):
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
# note: adding the compile() functions inside add() is a major performance hog
|
||||
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
|
||||
_linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)')
|
||||
|
@ -13,6 +18,7 @@ _prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev"))
|
|||
_nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next"))
|
||||
_lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last"))
|
||||
|
||||
|
||||
def add(name, path):
|
||||
# XXX disallowed by the server administrator
|
||||
classname = '_DrunkDuck_%s' % name
|
||||
|
@ -24,18 +30,17 @@ def add(name, path):
|
|||
ext = imageUrl.rsplit('.')[-1]
|
||||
return '%d.%s' % (index, ext)
|
||||
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
def _starter(self):
|
||||
# first, try hopping to previous and next comic
|
||||
data = cls.getPage(_url)
|
||||
data = self.getPage(_url)
|
||||
try:
|
||||
url = cls.fetchUrl(_url, data, _prevSearch)
|
||||
url = self.fetchUrl(_url, data, _prevSearch)
|
||||
except ValueError:
|
||||
# no previous link found, try hopping to last comic
|
||||
return cls.fetchUrl(_url, data, _lastSearch)
|
||||
return self.fetchUrl(_url, data, _lastSearch)
|
||||
else:
|
||||
data = cls.getPage(url)
|
||||
return cls.fetchUrl(url, data, _nextSearch)
|
||||
data = self.getPage(url)
|
||||
return self.fetchUrl(url, data, _nextSearch)
|
||||
|
||||
attrs = dict(
|
||||
name = 'DrunkDuck/' + name,
|
||||
|
|
8
dosagelib/plugins/e.py
Executable file → Normal file
8
dosagelib/plugins/e.py
Executable file → Normal file
|
@ -15,7 +15,7 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
|
|||
|
||||
class EarthsongSaga(_ParserScraper):
|
||||
url = 'http://earthsongsaga.com/index.php'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[@title="Previous"]'
|
||||
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
|
||||
|
@ -45,13 +45,13 @@ class EasilyAmused(_WordPressScraper):
|
|||
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
|
||||
firstStripUrl = url
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class EatLiver(_BasicScraper):
|
||||
url = 'http://www.eatliver.com/'
|
||||
rurl = escape(url)
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url + "i.php?n=%s"
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
||||
|
@ -185,7 +185,7 @@ class Eryl(_WordPressScraper):
|
|||
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
|
||||
firstStripUrl = url
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'This was known as DarkWings in previous Dosage versions'
|
||||
|
||||
|
||||
|
|
6
dosagelib/plugins/f.py
Executable file → Normal file
6
dosagelib/plugins/f.py
Executable file → Normal file
|
@ -28,7 +28,7 @@ class FantasyRealms(_BasicScraper):
|
|||
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
||||
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
|
||||
help = 'Index format: nnn'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class FauxPas(_BasicScraper):
|
||||
|
@ -48,7 +48,7 @@ class FeyWinds(_BasicScraper):
|
|||
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
||||
latestSearch = compile(r'(comic/page.php\?id.+?)"')
|
||||
help = 'Index format: n (unpadded)'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class FilibusterCartoons(_BasicScraper):
|
||||
|
@ -161,7 +161,7 @@ class FredoAndPidjin(_BasicScraper):
|
|||
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
||||
latestSearch = compile(tagre('a', 'href', "(" + url +
|
||||
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class Freefall(_BasicScraper):
|
||||
|
|
|
@ -27,7 +27,7 @@ class Garanos(_BasicScraper):
|
|||
baseUrl = 'http://garanos.alexheberling.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'pages/page-1/'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = baseUrl + 'pages/page-%s'
|
||||
imageSearch = compile(
|
||||
tagre("img", "src",
|
||||
|
@ -136,7 +136,7 @@ class GoGetARoomie(_ComicControlScraper):
|
|||
|
||||
class GoneWithTheBlastwave(_BasicScraper):
|
||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url[:-1] + '%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
||||
|
|
|
@ -20,13 +20,12 @@ class _GoComics(_ParserScraper):
|
|||
def getName(cls):
|
||||
return 'GoComics/' + cls.__name__[2:]
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
url1 = cls.url + cls.path
|
||||
data = cls.getPage(url1)
|
||||
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
|
||||
data = cls.getPage(url2)
|
||||
return cls.fetchUrl(url2, data, cls.nextSearch)
|
||||
def starter(self):
|
||||
url1 = self.url + self.path
|
||||
data = self.getPage(url1)
|
||||
url2 = self.fetchUrl(url1, data, self.prevSearch)
|
||||
data = self.getPage(url2)
|
||||
return self.fetchUrl(url2, data, self.nextSearch)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
|
|
|
@ -22,13 +22,12 @@ class HagarTheHorrible(_BasicScraper):
|
|||
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
|
||||
help = 'Index format: number'
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
def starter(self):
|
||||
"""Return last gallery link."""
|
||||
url = 'http://www.hagardunor.net/comics.php'
|
||||
data = cls.getPage(url)
|
||||
pattern = compile(tagre("a", "href", cls.prevUrl))
|
||||
for starturl in cls.fetchUrls(url, data, pattern):
|
||||
data = self.getPage(url)
|
||||
pattern = compile(tagre("a", "href", self.prevUrl))
|
||||
for starturl in self.fetchUrls(url, data, pattern):
|
||||
pass
|
||||
return starturl
|
||||
|
||||
|
@ -41,7 +40,7 @@ class _HappyJar(_WordPressScraper):
|
|||
class HarkAVagrant(_BasicScraper):
|
||||
url = 'http://www.harkavagrant.com/'
|
||||
rurl = escape(url)
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
||||
|
|
|
@ -34,7 +34,7 @@ class JerkCity(_BasicScraper):
|
|||
class JimBenton(_BasicScraper):
|
||||
url = 'http://www.jimbenton.com/page14/page14.html'
|
||||
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
|
||||
before="photo-frame"))
|
||||
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
|
||||
|
|
|
@ -24,7 +24,7 @@ class Lackadaisy(_BasicScraper):
|
|||
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
|
||||
"Next")
|
||||
help = 'Index format: n'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -38,7 +38,7 @@ class Laiyu(_WordPressScraper):
|
|||
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
|
||||
firstStripUrl = url
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class LasLindas(_BasicScraper):
|
||||
|
@ -67,7 +67,7 @@ class LeastICouldDo(_BasicScraper):
|
|||
after="Previous"))
|
||||
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
|
||||
after="feature-comic"))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
@ -117,5 +117,5 @@ class LookingForGroup(_ParserScraper):
|
|||
imageSearch = '#comic img'
|
||||
prevSearch = '#comic-left > a'
|
||||
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'Index format: nnn'
|
||||
|
|
|
@ -104,7 +104,7 @@ class NichtLustig(_BasicScraper):
|
|||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
|
||||
help = 'Index format: yymmdd'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class Nicky510(_WordPressScraper):
|
||||
|
@ -137,7 +137,7 @@ class NoMoreSavePoints(_WordPressScraper):
|
|||
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
|
||||
firstStripUrl = url
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class NoNeedForBushido(_BasicScraper):
|
||||
|
@ -153,7 +153,7 @@ class NoNeedForBushido(_BasicScraper):
|
|||
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||
after="last-webcomic"))
|
||||
help = 'Index format: nnn'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class NotInventedHere(_BasicScraper):
|
||||
|
|
|
@ -11,9 +11,8 @@ class _NuklearPower(_ParserScraper):
|
|||
prevSearch = '//a[@rel="prev"]'
|
||||
imageSearch = '//div[@id="comic"]/img'
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
return cls.url + cls.path + '/'
|
||||
def starter(self):
|
||||
return self.url + self.path + '/'
|
||||
|
||||
@classmethod
|
||||
def getName(cls):
|
||||
|
|
20
dosagelib/plugins/p.py
Executable file → Normal file
20
dosagelib/plugins/p.py
Executable file → Normal file
|
@ -20,7 +20,7 @@ class PandyLand(_WordPressScraper):
|
|||
|
||||
class ParadigmShift(_BasicScraper):
|
||||
url = 'http://www.paradigmshiftmanga.com/'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url + 'ps/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
|
@ -86,6 +86,7 @@ class PennyArcade(_BasicScraper):
|
|||
before="btnPrev"))
|
||||
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
before="btnNext"))
|
||||
starter = bounceStarter
|
||||
help = 'Index format: yyyy/mm/dd/'
|
||||
|
||||
@classmethod
|
||||
|
@ -99,15 +100,6 @@ class PennyArcade(_BasicScraper):
|
|||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||
return prevUrl
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
"""Get bounced start URL."""
|
||||
data = cls.getPage(cls.url)
|
||||
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
||||
data = cls.getPage(url1)
|
||||
url2 = cls.fetchUrl(url1, data, cls.nextSearch)
|
||||
return cls.prevUrlModifier(url2)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
p = pageUrl.split('/')
|
||||
|
@ -162,7 +154,7 @@ class PicPakDog(_BasicScraper):
|
|||
|
||||
class PiledHigherAndDeeper(_BasicScraper):
|
||||
url = 'http://www.phdcomics.com/comics.php'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '?comicid=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
|
||||
|
@ -207,7 +199,7 @@ class PokeyThePenguin(_ParserScraper):
|
|||
imageSearch = '//p/img'
|
||||
latestSearch = '(//a)[last()]'
|
||||
multipleImagesPerStrip = True
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'Index format: number'
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
|
@ -231,7 +223,7 @@ class PoorlyDrawnLines(_BasicScraper):
|
|||
|
||||
class Precocious(_BasicScraper):
|
||||
url = 'http://www.precociouscomic.com/'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url + 'archive/comic/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
||||
|
@ -243,7 +235,7 @@ class Precocious(_BasicScraper):
|
|||
class PS238(_ParserScraper):
|
||||
url = 'http://ps238.nodwick.com/'
|
||||
stripUrl = url + '/comic/%s/'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
|
||||
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
|
||||
|
|
|
@ -51,7 +51,7 @@ class RealmOfAtland(_BasicScraper):
|
|||
class RedMeat(_BasicScraper):
|
||||
baseUrl = 'http://www.redmeat.com/redmeat/'
|
||||
url = baseUrl + 'current/index.html'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = baseUrl + '%s/index.html'
|
||||
firstStripUrl = stripUrl % '1996-06-10'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
|
||||
|
|
|
@ -27,13 +27,12 @@ class SabrinaOnline(_BasicScraper):
|
|||
adult = True
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
def starter(self):
|
||||
"""Pick last one in a list of archive pages."""
|
||||
archive = cls.url + 'archive.html'
|
||||
data = cls.getPage(archive)
|
||||
archive = self.url + 'archive.html'
|
||||
data = self.getPage(archive)
|
||||
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
|
||||
archivepages = cls.fetchUrls(archive, data, search)
|
||||
archivepages = self.fetchUrls(archive, data, search)
|
||||
return archivepages[-1]
|
||||
|
||||
|
||||
|
@ -69,7 +68,7 @@ class ScandinaviaAndTheWorld(_ParserScraper):
|
|||
url = 'http://satwcomic.com/'
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = '//img[@itemprop="image"]'
|
||||
prevSearch = '//a[@accesskey="p"]'
|
||||
latestSearch = '//a[text()="View latest comic"]'
|
||||
|
@ -166,14 +165,13 @@ class ScurryAndCover(_ParserScraper):
|
|||
image = images[0]
|
||||
return [cls.url + '/images/pages/' + image + '-xsmall.png']
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
def starter(self):
|
||||
"""Go forward as far as possibe, then start."""
|
||||
url = cls.url
|
||||
url = self.url
|
||||
while True:
|
||||
data = cls.getPage(url)
|
||||
data = self.getPage(url)
|
||||
try:
|
||||
url = cls.fetchUrl(url, data, cls.nextSearch)
|
||||
url = self.fetchUrl(url, data, self.nextSearch)
|
||||
except ValueError:
|
||||
break
|
||||
return url
|
||||
|
@ -197,7 +195,7 @@ class SexyLosers(_BasicScraper):
|
|||
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
||||
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
|
||||
help = 'Index format: nnn'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -334,7 +332,7 @@ class SnowFlame(_WordPressScraper):
|
|||
url = 'http://www.snowflamecomic.com/'
|
||||
stripUrl = url + '?comic=snowflame-%s-%s'
|
||||
firstStripUrl = stripUrl % ('01', '01')
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
nextSearch = WP_LATEST_SEARCH
|
||||
help = 'Index format: chapter-page'
|
||||
|
||||
|
@ -396,7 +394,7 @@ class Spamusement(_BasicScraper):
|
|||
IGNORECASE)
|
||||
latestSearch = prevSearch
|
||||
help = 'Index format: n (unpadded)'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class SpareParts(_BasicScraper):
|
||||
|
@ -507,7 +505,7 @@ class StuffNoOneToldMe(_BasicScraper):
|
|||
stripUrl = url + '%s.html'
|
||||
firstStripUrl = stripUrl % '2010/05/01'
|
||||
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
imageSearch = (
|
||||
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
||||
r"(?:</a>|<br />)"),
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre, quote, case_insensitive_re
|
||||
|
||||
|
||||
# SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
|
||||
# That's why there are so much different search patterns.
|
||||
|
||||
|
@ -31,6 +37,7 @@ _nextSearch = (
|
|||
compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")),
|
||||
)
|
||||
|
||||
|
||||
def add(name, url, adult, bounce):
|
||||
classname = 'SmackJeeves_' + name
|
||||
|
||||
|
@ -41,15 +48,14 @@ def add(name, url, adult, bounce):
|
|||
return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl)
|
||||
return pageUrl
|
||||
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
def _starter(self):
|
||||
"""Get start URL."""
|
||||
url1 = modifier(url)
|
||||
data = cls.getPage(url1)
|
||||
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
|
||||
data = self.getPage(url1)
|
||||
url2 = self.fetchUrl(url1, data, self.prevSearch)
|
||||
if bounce:
|
||||
data = cls.getPage(url2)
|
||||
url3 = cls.fetchUrl(url2, data, _nextSearch)
|
||||
data = self.getPage(url2)
|
||||
url3 = self.fetchUrl(url2, data, _nextSearch)
|
||||
return modifier(url3)
|
||||
return modifier(url2)
|
||||
|
||||
|
@ -76,7 +82,8 @@ def add(name, url, adult, bounce):
|
|||
)
|
||||
|
||||
|
||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True)
|
||||
add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False)
|
||||
|
|
15
dosagelib/plugins/t.py
Executable file → Normal file
15
dosagelib/plugins/t.py
Executable file → Normal file
|
@ -83,7 +83,7 @@ class TheOrderOfTheStick(_BasicScraper):
|
|||
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
||||
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
|
||||
help = 'Index format: n (unpadded)'
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -122,7 +122,7 @@ class TheThinHLine(_BasicScraper):
|
|||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
||||
latestSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||
after='class="timestamp"'))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
adult = True
|
||||
|
||||
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
||||
|
@ -180,17 +180,14 @@ class ThreePanelSoul(_BasicScraper):
|
|||
|
||||
|
||||
class ThunderAndLightning(_BasicScraper):
|
||||
url = 'http://www.talcomic.com/wp/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
baseUrl = 'http://www.talcomic.com/wp/'
|
||||
url = baseUrl + '?latestcomic'
|
||||
rurl = escape(baseUrl)
|
||||
stripUrl = baseUrl + '%s/'
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
help = 'Index format: yyyy/mm/dd/page-nn'
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
return cls.url + '?latestcomic'
|
||||
|
||||
|
||||
class TinyKittenTeeth(_BasicScraper):
|
||||
url = 'http://www.tinykittenteeth.com/'
|
||||
|
|
|
@ -24,7 +24,7 @@ class Undertow(_BasicScraper):
|
|||
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
||||
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
||||
latestSearch = compile(r'href="(.+?)".+?Most recent page')
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class UnicornJelly(_BasicScraper):
|
||||
|
@ -47,7 +47,7 @@ class Unsounded(_BasicScraper):
|
|||
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
||||
tagre("img", "src",
|
||||
r"%simages/newpages\.png" % rurl))
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
help = 'Index format: chapter-number'
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
|
|
|
@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
|
|||
class WebDesignerCOTW(_BasicScraper):
|
||||
url = 'http://www.webdesignerdepot.com/'
|
||||
rurl = escape(url)
|
||||
starter = indirectStarter()
|
||||
starter = indirectStarter
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
|
||||
imageSearch = (
|
||||
|
@ -211,10 +211,9 @@ class WormWorldSaga(_BasicScraper):
|
|||
latestChapter = 5
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
def starter(self):
|
||||
return '%schapters/chapter%02d/%s/index.php' % (
|
||||
cls.url, cls.latestChapter, cls.lang.upper())
|
||||
self.url, self.latestChapter, self.lang.upper())
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
"""Find previous URL."""
|
||||
|
|
|
@ -6,15 +6,11 @@
|
|||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||
|
||||
from ..helpers import indirectStarter
|
||||
|
||||
class _WebcomicFactory(_WordPressScraper):
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
"""this is basically helpers.indirectStarter, but dynamically selecting
|
||||
the right parameters."""
|
||||
data = cls.getPage(cls.firstStripUrl)
|
||||
return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH)
|
||||
starter = indirectStarter
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
|
||||
|
||||
# do not edit anything below since these entries are generated from
|
||||
|
|
|
@ -13,7 +13,7 @@ class _WLPComics(_ParserScraper):
|
|||
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
||||
prevSearch = '//a[contains(text(), "Previous ")]'
|
||||
nextSearch = '//a[contains(text(), "Next ")]'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
help = 'Index format: nnn'
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -9,7 +9,7 @@ def add(name, start):
|
|||
name=name,
|
||||
url='http://hijinksensue.com/',
|
||||
latestSearch=start,
|
||||
starter=indirectStarter()
|
||||
starter=indirectStarter
|
||||
)
|
||||
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from ..util import tagre
|
|||
class Xkcd(_BasicScraper):
|
||||
name = 'xkcd'
|
||||
url = 'http://xkcd.com/'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
|
|
|
@ -22,7 +22,7 @@ class ZapComic(_ParserScraper):
|
|||
|
||||
class Zapiro(_BasicScraper):
|
||||
url = 'http://www.mg.co.za/zapiro/'
|
||||
starter = bounceStarter()
|
||||
starter = bounceStarter
|
||||
stripUrl = 'http://mg.co.za/cartoon/%s'
|
||||
firstStripUrl = stripUrl % 'zapiro_681'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
||||
|
|
Loading…
Reference in a new issue