Refactor: Convert starter to simple method.

This commit is contained in:
Tobias Gruetzmacher 2016-04-13 20:01:51 +02:00
parent 1aebdce5d2
commit 0468f2f31a
30 changed files with 145 additions and 158 deletions

View file

@ -30,32 +30,24 @@ def regexNamer(regex, usePageUrl=False):
return _namer
def bounceStarter():
def bounceStarter(self):
"""Get start URL by "bouncing" back and forth one time.
This needs the url and nextSearch properties be defined on the class.
"""
@classmethod
def _starter(cls):
"""Get bounced start URL."""
data = cls.getPage(cls.url)
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url1)
return cls.fetchUrl(url1, data, cls.nextSearch)
return _starter
data = self.getPage(self.url)
url1 = self.fetchUrl(self.url, data, self.prevSearch)
data = self.getPage(url1)
return self.fetchUrl(url1, data, self.nextSearch)
def indirectStarter():
def indirectStarter(self):
"""Get start URL by indirection.
This is useful for comics where the latest comic can't be reached at a
stable URL. If the class has an attribute 'startUrl', this page is fetched
first, otherwise the page at 'url' is fetched. After that, the attribute
'latestSearch' is used on the page content to find the latest strip."""
@classmethod
def _starter(cls):
"""Get indirect start URL."""
url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
data = cls.getPage(url)
return cls.fetchUrl(url, data, cls.latestSearch)
return _starter
url = self.startUrl if hasattr(self, "startUrl") else self.url
data = self.getPage(url)
return self.fetchUrl(url, data, self.latestSearch)

View file

@ -16,7 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/'
rurl = escape(url)
starter = bounceStarter()
starter = bounceStarter
stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src',
@ -122,7 +122,7 @@ class Alice(_WordPressScraper):
url = 'http://www.alicecomics.com/'
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
latestSearch = '//a[text()="Latest Alice!"]'
starter = indirectStarter()
starter = indirectStarter
class AlienLovesPredator(_BasicScraper):
@ -262,7 +262,7 @@ class Ashes(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter()
starter = indirectStarter
class ASkeweredParadise(_BasicScraper):
@ -287,7 +287,7 @@ class ASofterWorld(_ParserScraper):
class AstronomyPOTD(_ParserScraper):
baseUrl = 'http://apod.nasa.gov/apod/'
url = baseUrl + 'astropix.html'
starter = bounceStarter()
starter = bounceStarter
stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = '//a/img'

View file

@ -48,7 +48,7 @@ class BalderDash(_ComicControlScraper):
class Bardsworth(_WordPressScraper):
url = 'http://www.bardsworth.com/'
latestSearch = '//a[@rel="bookmark"]'
starter = indirectStarter()
starter = indirectStarter
class Baroquen(_BasicScraper):
@ -72,7 +72,7 @@ class Beetlebum(_BasicScraper):
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
starter = indirectStarter()
starter = indirectStarter
multipleImagesPerStrip = True
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre('a', 'href',
@ -227,7 +227,7 @@ class BoredAndEvil(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
latestSearch = prevSearch
starter = indirectStarter()
starter = indirectStarter
help = 'Index format: yyyy-mm-dd'

View file

@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
after="prevarea"))
latestSearch = compile(tagre("a", "href",
r'(view\.php\?page=[0-9]+[^"]*)'))
starter = indirectStarter()
starter = indirectStarter
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -275,7 +275,7 @@ class CoolCatStudio(_BasicScraper):
class CorydonCafe(_ParserScraper):
url = 'http://corydoncafe.com/'
starter = indirectStarter()
starter = indirectStarter
stripUrl = url + '%s.php'
imageSearch = "//center[2]//img"
prevSearch = '//a[@title="prev"]'
@ -347,7 +347,7 @@ class CucumberQuest(_BasicScraper):
stripUrl = url + 'cq/%s/'
firstStripUrl = stripUrl % 'page-1'
startUrl = url + 'recent.html'
starter = indirectStarter()
starter = indirectStarter
imageSearch = (
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
@ -379,7 +379,7 @@ class Curvy(_ParserScraper):
class CyanideAndHappiness(_BasicScraper):
url = 'http://www.explosm.net/comics/'
starter = bounceStarter()
starter = bounceStarter
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '15'
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))

View file

@ -1,7 +1,12 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile
from ..scraper import make_scraper
from ..util import tagre, getQueryParams
@ -11,6 +16,7 @@ _prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
def add(name, shortName, imageFolder=None, lastStrip=None):
classname = 'CloneManga_%s' % name
_url = 'http://manga.clone-army.org'
@ -22,22 +28,21 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
def namer(cls, imageUrl, pageUrl):
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
@classmethod
def _starter(cls):
def _starter(self):
# first, try hopping to previous and next comic
data = cls.getPage(baseUrl)
data = self.getPage(baseUrl)
try:
url = cls.fetchUrl(baseUrl, data, _prevSearch)
url = self.fetchUrl(baseUrl, data, _prevSearch)
except ValueError:
# no previous link found, try hopping to last comic
return cls.fetchUrl(baseUrl, data, _lastSearch)
return self.fetchUrl(baseUrl, data, _lastSearch)
else:
data = cls.getPage(url)
return cls.fetchUrl(url, data, _nextSearch)
data = self.getPage(url)
return self.fetchUrl(url, data, _nextSearch)
attrs = dict(
name='CloneManga/' + name,
stripUrl = baseUrl + '?page=%s',
stripUrl=baseUrl + '?page=%s',
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
prevSearch=_prevSearch,
help='Index format: n',

View file

@ -20,7 +20,7 @@ class _ComicFury(_ParserScraper):
prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous")
nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next")
help = 'Index format: n'
starter = bounceStarter()
starter = bounceStarter
@classmethod
def namer(cls, imageUrl, pageUrl):

View file

@ -18,11 +18,10 @@ class _Creators(_ParserScraper):
def getName(cls):
return 'Creators/' + cls.__name__
@classmethod
def starter(cls):
start = cls.url + cls.path
data = cls.getPage(start)
return cls.fetchUrl(start, data, cls.latestSearch)
def starter(self):
start = self.url + self.path
data = self.getPage(start)
return self.fetchUrl(start, data, self.latestSearch)
class _CreatorsEs(_Creators):

View file

@ -15,7 +15,7 @@ from .common import _WordPressScraper, xpath_class
class DailyDose(_ParserScraper):
url = 'http://dailydoseofcomics.com/'
starter = indirectStarter()
starter = indirectStarter
imageSearch = '//p/a/img'
prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[@rel="bookmark"]'
@ -32,7 +32,7 @@ class DamnLol(_BasicScraper):
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
)
help = 'Index format: stripname-number'
starter = bounceStarter()
starter = bounceStarter
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -155,7 +155,7 @@ class Dilbert(_BasicScraper):
url = 'http://dilbert.com/'
stripUrl = url + '/strip/%s/'
firstStripUrl = stripUrl % '1989-04-16'
starter = indirectStarter()
starter = indirectStarter
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
latestSearch = compile(tagre("a", "href",
@ -255,7 +255,7 @@ class DresdenCodak(_BasicScraper):
latestSearch = compile(tagre("div", "id", "preview") +
tagre("a", "href",
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
starter = indirectStarter()
starter = indirectStarter
class DrFun(_BasicScraper):

View file

@ -1,18 +1,24 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile
from ..scraper import make_scraper
from ..util import tagre
# note: adding the compile() functions inside add() is a major performance hog
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
_linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)')
_prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev"))
_nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next"))
_lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last"))
def add(name, path):
# XXX disallowed by the server administrator
classname = '_DrunkDuck_%s' % name
@ -24,18 +30,17 @@ def add(name, path):
ext = imageUrl.rsplit('.')[-1]
return '%d.%s' % (index, ext)
@classmethod
def _starter(cls):
def _starter(self):
# first, try hopping to previous and next comic
data = cls.getPage(_url)
data = self.getPage(_url)
try:
url = cls.fetchUrl(_url, data, _prevSearch)
url = self.fetchUrl(_url, data, _prevSearch)
except ValueError:
# no previous link found, try hopping to last comic
return cls.fetchUrl(_url, data, _lastSearch)
return self.fetchUrl(_url, data, _lastSearch)
else:
data = cls.getPage(url)
return cls.fetchUrl(url, data, _nextSearch)
data = self.getPage(url)
return self.fetchUrl(url, data, _nextSearch)
attrs = dict(
name = 'DrunkDuck/' + name,

8
dosagelib/plugins/e.py Executable file → Normal file
View file

@ -15,7 +15,7 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
class EarthsongSaga(_ParserScraper):
url = 'http://earthsongsaga.com/index.php'
starter = indirectStarter()
starter = indirectStarter
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@title="Previous"]'
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
@ -45,13 +45,13 @@ class EasilyAmused(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter()
starter = indirectStarter
class EatLiver(_BasicScraper):
url = 'http://www.eatliver.com/'
rurl = escape(url)
starter = indirectStarter()
starter = indirectStarter
stripUrl = url + "i.php?n=%s"
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
@ -185,7 +185,7 @@ class Eryl(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter()
starter = indirectStarter
help = 'This was known as DarkWings in previous Dosage versions'

6
dosagelib/plugins/f.py Executable file → Normal file
View file

@ -28,7 +28,7 @@ class FantasyRealms(_BasicScraper):
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter()
starter = indirectStarter
class FauxPas(_BasicScraper):
@ -48,7 +48,7 @@ class FeyWinds(_BasicScraper):
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
latestSearch = compile(r'(comic/page.php\?id.+?)"')
help = 'Index format: n (unpadded)'
starter = indirectStarter()
starter = indirectStarter
class FilibusterCartoons(_BasicScraper):
@ -161,7 +161,7 @@ class FredoAndPidjin(_BasicScraper):
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
latestSearch = compile(tagre('a', 'href', "(" + url +
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
starter = indirectStarter()
starter = indirectStarter
class Freefall(_BasicScraper):

View file

@ -27,7 +27,7 @@ class Garanos(_BasicScraper):
baseUrl = 'http://garanos.alexheberling.com/'
rurl = escape(baseUrl)
url = baseUrl + 'pages/page-1/'
starter = indirectStarter()
starter = indirectStarter
stripUrl = baseUrl + 'pages/page-%s'
imageSearch = compile(
tagre("img", "src",
@ -136,7 +136,7 @@ class GoGetARoomie(_ComicControlScraper):
class GoneWithTheBlastwave(_BasicScraper):
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
starter = indirectStarter()
starter = indirectStarter
stripUrl = url[:-1] + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')

View file

@ -20,13 +20,12 @@ class _GoComics(_ParserScraper):
def getName(cls):
return 'GoComics/' + cls.__name__[2:]
@classmethod
def starter(cls):
url1 = cls.url + cls.path
data = cls.getPage(url1)
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
data = cls.getPage(url2)
return cls.fetchUrl(url2, data, cls.nextSearch)
def starter(self):
url1 = self.url + self.path
data = self.getPage(url1)
url2 = self.fetchUrl(url1, data, self.prevSearch)
data = self.getPage(url2)
return self.fetchUrl(url2, data, self.nextSearch)
@classmethod
def namer(cls, image_url, page_url):

View file

@ -22,13 +22,12 @@ class HagarTheHorrible(_BasicScraper):
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
help = 'Index format: number'
@classmethod
def starter(cls):
def starter(self):
"""Return last gallery link."""
url = 'http://www.hagardunor.net/comics.php'
data = cls.getPage(url)
pattern = compile(tagre("a", "href", cls.prevUrl))
for starturl in cls.fetchUrls(url, data, pattern):
data = self.getPage(url)
pattern = compile(tagre("a", "href", self.prevUrl))
for starturl in self.fetchUrls(url, data, pattern):
pass
return starturl
@ -41,7 +40,7 @@ class _HappyJar(_WordPressScraper):
class HarkAVagrant(_BasicScraper):
url = 'http://www.harkavagrant.com/'
rurl = escape(url)
starter = bounceStarter()
starter = bounceStarter
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,

View file

@ -34,7 +34,7 @@ class JerkCity(_BasicScraper):
class JimBenton(_BasicScraper):
url = 'http://www.jimbenton.com/page14/page14.html'
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
starter = indirectStarter()
starter = indirectStarter
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
before="photo-frame"))
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',

View file

@ -24,7 +24,7 @@ class Lackadaisy(_BasicScraper):
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
"Next")
help = 'Index format: n'
starter = bounceStarter()
starter = bounceStarter
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -38,7 +38,7 @@ class Laiyu(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter()
starter = indirectStarter
class LasLindas(_BasicScraper):
@ -67,7 +67,7 @@ class LeastICouldDo(_BasicScraper):
after="Previous"))
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
after="feature-comic"))
starter = indirectStarter()
starter = indirectStarter
help = 'Index format: yyyymmdd'
@ -117,5 +117,5 @@ class LookingForGroup(_ParserScraper):
imageSearch = '#comic img'
prevSearch = '#comic-left > a'
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
starter = indirectStarter()
starter = indirectStarter
help = 'Index format: nnn'

View file

@ -104,7 +104,7 @@ class NichtLustig(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
help = 'Index format: yymmdd'
starter = indirectStarter()
starter = indirectStarter
class Nicky510(_WordPressScraper):
@ -137,7 +137,7 @@ class NoMoreSavePoints(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter()
starter = indirectStarter
class NoNeedForBushido(_BasicScraper):
@ -153,7 +153,7 @@ class NoNeedForBushido(_BasicScraper):
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
after="last-webcomic"))
help = 'Index format: nnn'
starter = indirectStarter()
starter = indirectStarter
class NotInventedHere(_BasicScraper):

View file

@ -11,9 +11,8 @@ class _NuklearPower(_ParserScraper):
prevSearch = '//a[@rel="prev"]'
imageSearch = '//div[@id="comic"]/img'
@classmethod
def starter(cls):
return cls.url + cls.path + '/'
def starter(self):
return self.url + self.path + '/'
@classmethod
def getName(cls):

20
dosagelib/plugins/p.py Executable file → Normal file
View file

@ -20,7 +20,7 @@ class PandyLand(_WordPressScraper):
class ParadigmShift(_BasicScraper):
url = 'http://www.paradigmshiftmanga.com/'
starter = indirectStarter()
starter = indirectStarter
stripUrl = url + 'ps/%s.html'
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
@ -86,6 +86,7 @@ class PennyArcade(_BasicScraper):
before="btnPrev"))
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
before="btnNext"))
starter = bounceStarter
help = 'Index format: yyyy/mm/dd/'
@classmethod
@ -99,15 +100,6 @@ class PennyArcade(_BasicScraper):
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl
@classmethod
def starter(cls):
"""Get bounced start URL."""
data = cls.getPage(cls.url)
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url1)
url2 = cls.fetchUrl(url1, data, cls.nextSearch)
return cls.prevUrlModifier(url2)
@classmethod
def namer(cls, imageUrl, pageUrl):
p = pageUrl.split('/')
@ -162,7 +154,7 @@ class PicPakDog(_BasicScraper):
class PiledHigherAndDeeper(_BasicScraper):
url = 'http://www.phdcomics.com/comics.php'
starter = bounceStarter()
starter = bounceStarter
stripUrl = url + '?comicid=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
@ -207,7 +199,7 @@ class PokeyThePenguin(_ParserScraper):
imageSearch = '//p/img'
latestSearch = '(//a)[last()]'
multipleImagesPerStrip = True
starter = indirectStarter()
starter = indirectStarter
help = 'Index format: number'
def getPrevUrl(self, url, data):
@ -231,7 +223,7 @@ class PoorlyDrawnLines(_BasicScraper):
class Precocious(_BasicScraper):
url = 'http://www.precociouscomic.com/'
starter = indirectStarter()
starter = indirectStarter
stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
@ -243,7 +235,7 @@ class Precocious(_BasicScraper):
class PS238(_ParserScraper):
url = 'http://ps238.nodwick.com/'
stripUrl = url + '/comic/%s/'
starter = bounceStarter()
starter = bounceStarter
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'

View file

@ -51,7 +51,7 @@ class RealmOfAtland(_BasicScraper):
class RedMeat(_BasicScraper):
baseUrl = 'http://www.redmeat.com/redmeat/'
url = baseUrl + 'current/index.html'
starter = bounceStarter()
starter = bounceStarter
stripUrl = baseUrl + '%s/index.html'
firstStripUrl = stripUrl % '1996-06-10'
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))

View file

@ -27,13 +27,12 @@ class SabrinaOnline(_BasicScraper):
adult = True
multipleImagesPerStrip = True
@classmethod
def starter(cls):
def starter(self):
"""Pick last one in a list of archive pages."""
archive = cls.url + 'archive.html'
data = cls.getPage(archive)
archive = self.url + 'archive.html'
data = self.getPage(archive)
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
archivepages = cls.fetchUrls(archive, data, search)
archivepages = self.fetchUrls(archive, data, search)
return archivepages[-1]
@ -69,7 +68,7 @@ class ScandinaviaAndTheWorld(_ParserScraper):
url = 'http://satwcomic.com/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
starter = indirectStarter()
starter = indirectStarter
imageSearch = '//img[@itemprop="image"]'
prevSearch = '//a[@accesskey="p"]'
latestSearch = '//a[text()="View latest comic"]'
@ -166,14 +165,13 @@ class ScurryAndCover(_ParserScraper):
image = images[0]
return [cls.url + '/images/pages/' + image + '-xsmall.png']
@classmethod
def starter(cls):
def starter(self):
"""Go forward as far as possibe, then start."""
url = cls.url
url = self.url
while True:
data = cls.getPage(url)
data = self.getPage(url)
try:
url = cls.fetchUrl(url, data, cls.nextSearch)
url = self.fetchUrl(url, data, self.nextSearch)
except ValueError:
break
return url
@ -197,7 +195,7 @@ class SexyLosers(_BasicScraper):
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter()
starter = indirectStarter
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -334,7 +332,7 @@ class SnowFlame(_WordPressScraper):
url = 'http://www.snowflamecomic.com/'
stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01')
starter = bounceStarter()
starter = bounceStarter
nextSearch = WP_LATEST_SEARCH
help = 'Index format: chapter-page'
@ -396,7 +394,7 @@ class Spamusement(_BasicScraper):
IGNORECASE)
latestSearch = prevSearch
help = 'Index format: n (unpadded)'
starter = indirectStarter()
starter = indirectStarter
class SpareParts(_BasicScraper):
@ -507,7 +505,7 @@ class StuffNoOneToldMe(_BasicScraper):
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter()
starter = indirectStarter
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
r"(?:</a>|<br />)"),

View file

@ -1,10 +1,16 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile
from ..scraper import make_scraper
from ..util import tagre, quote, case_insensitive_re
# SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
# That's why there are so much different search patterns.
@ -31,6 +37,7 @@ _nextSearch = (
compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")),
)
def add(name, url, adult, bounce):
classname = 'SmackJeeves_' + name
@ -41,15 +48,14 @@ def add(name, url, adult, bounce):
return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl)
return pageUrl
@classmethod
def _starter(cls):
def _starter(self):
"""Get start URL."""
url1 = modifier(url)
data = cls.getPage(url1)
url2 = cls.fetchUrl(url1, data, cls.prevSearch)
data = self.getPage(url1)
url2 = self.fetchUrl(url1, data, self.prevSearch)
if bounce:
data = cls.getPage(url2)
url3 = cls.fetchUrl(url2, data, _nextSearch)
data = self.getPage(url2)
url3 = self.fetchUrl(url2, data, _nextSearch)
return modifier(url3)
return modifier(url2)
@ -76,7 +82,8 @@ def add(name, url, adult, bounce):
)
# do not edit anything below since these entries are generated from scripts/update.sh
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True)
add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False)
@ -110,7 +117,7 @@ add('AlwaysRainingHere', 'http://alwaysraininghere.smackjeeves.com/comics/', Fal
add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True)
add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True)
add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True)
#add('Amya', 'http://amya.smackjeeves.com/comics/', False, True)
# add('Amya', 'http://amya.smackjeeves.com/comics/', False, True)
add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True)
add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True)
add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True)
@ -176,7 +183,7 @@ add('Cambion', 'http://cambion.smackjeeves.com/comics/', True, True)
add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True)
add('Captor', 'http://captor.smackjeeves.com/comics/', False, True)
add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True)
#add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True)
# add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True)
add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True)
add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True)
add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True)
@ -281,7 +288,7 @@ add('FinalArcanum', 'http://finalarcanum.smackjeeves.com/comics/', False, True)
add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True)
add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True)
add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False)
#add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True)
# add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True)
add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True)
add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False)
add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True)
@ -474,7 +481,7 @@ add('MythsofUnovaAWhiteNuzlockeRunHardMode', 'http://mythsofunova.smackjeeves.co
add('NIK', 'http://nik.smackjeeves.com/comics/', False, True)
add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True)
add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True)
#add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True)
# add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True)
add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True)
add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True)
add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True)
@ -766,7 +773,7 @@ add('WhenSheWasBad', 'http://whenshewasbad.smackjeeves.com/comics/', False, True
add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False)
add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True)
add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False)
#add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True)
# add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True)
add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True)
add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True)
add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True)

15
dosagelib/plugins/t.py Executable file → Normal file
View file

@ -83,7 +83,7 @@ class TheOrderOfTheStick(_BasicScraper):
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
help = 'Index format: n (unpadded)'
starter = indirectStarter()
starter = indirectStarter
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -122,7 +122,7 @@ class TheThinHLine(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>')
latestSearch = compile(tagre("a", "href", r'([^"]+)',
after='class="timestamp"'))
starter = indirectStarter()
starter = indirectStarter
adult = True
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
@ -180,17 +180,14 @@ class ThreePanelSoul(_BasicScraper):
class ThunderAndLightning(_BasicScraper):
url = 'http://www.talcomic.com/wp/'
rurl = escape(url)
stripUrl = url + '%s/'
baseUrl = 'http://www.talcomic.com/wp/'
url = baseUrl + '?latestcomic'
rurl = escape(baseUrl)
stripUrl = baseUrl + '%s/'
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/page-nn'
@classmethod
def starter(cls):
return cls.url + '?latestcomic'
class TinyKittenTeeth(_BasicScraper):
url = 'http://www.tinykittenteeth.com/'

View file

@ -24,7 +24,7 @@ class Undertow(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
prevSearch = compile(r'href="(.+?)".+?teynpoint')
latestSearch = compile(r'href="(.+?)".+?Most recent page')
starter = indirectStarter()
starter = indirectStarter
class UnicornJelly(_BasicScraper):
@ -47,7 +47,7 @@ class Unsounded(_BasicScraper):
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
tagre("img", "src",
r"%simages/newpages\.png" % rurl))
starter = indirectStarter()
starter = indirectStarter
help = 'Index format: chapter-number'
def getIndexStripUrl(self, index):

View file

@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/'
rurl = escape(url)
starter = indirectStarter()
starter = indirectStarter
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = (
@ -211,10 +211,9 @@ class WormWorldSaga(_BasicScraper):
latestChapter = 5
multipleImagesPerStrip = True
@classmethod
def starter(cls):
def starter(self):
return '%schapters/chapter%02d/%s/index.php' % (
cls.url, cls.latestChapter, cls.lang.upper())
self.url, self.latestChapter, self.lang.upper())
def getPrevUrl(self, url, data):
"""Find previous URL."""

View file

@ -6,15 +6,11 @@
from __future__ import absolute_import, division, print_function
from .common import _WordPressScraper, WP_LATEST_SEARCH
from ..helpers import indirectStarter
class _WebcomicFactory(_WordPressScraper):
@classmethod
def starter(cls):
"""this is basically helpers.indirectStarter, but dynamically selecting
the right parameters."""
data = cls.getPage(cls.firstStripUrl)
return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH)
starter = indirectStarter
latestSearch = WP_LATEST_SEARCH
# do not edit anything below since these entries are generated from

View file

@ -13,7 +13,7 @@ class _WLPComics(_ParserScraper):
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]'
starter = bounceStarter()
starter = bounceStarter
help = 'Index format: nnn'
@classmethod

View file

@ -9,7 +9,7 @@ def add(name, start):
name=name,
url='http://hijinksensue.com/',
latestSearch=start,
starter=indirectStarter()
starter=indirectStarter
)
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)

View file

@ -15,7 +15,7 @@ from ..util import tagre
class Xkcd(_BasicScraper):
name = 'xkcd'
url = 'http://xkcd.com/'
starter = bounceStarter()
starter = bounceStarter
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src",

View file

@ -22,7 +22,7 @@ class ZapComic(_ParserScraper):
class Zapiro(_BasicScraper):
url = 'http://www.mg.co.za/zapiro/'
starter = bounceStarter()
starter = bounceStarter
stripUrl = 'http://mg.co.za/cartoon/%s'
firstStripUrl = stripUrl % 'zapiro_681'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))