Refactor: Convert starter to simple method.

This commit is contained in:
Tobias Gruetzmacher 2016-04-13 20:01:51 +02:00
parent 1aebdce5d2
commit 0468f2f31a
30 changed files with 145 additions and 158 deletions

View file

@ -30,32 +30,24 @@ def regexNamer(regex, usePageUrl=False):
return _namer return _namer
def bounceStarter(): def bounceStarter(self):
"""Get start URL by "bouncing" back and forth one time. """Get start URL by "bouncing" back and forth one time.
This needs the url and nextSearch properties be defined on the class. This needs the url and nextSearch properties be defined on the class.
""" """
@classmethod data = self.getPage(self.url)
def _starter(cls): url1 = self.fetchUrl(self.url, data, self.prevSearch)
"""Get bounced start URL.""" data = self.getPage(url1)
data = cls.getPage(cls.url) return self.fetchUrl(url1, data, self.nextSearch)
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url1)
return cls.fetchUrl(url1, data, cls.nextSearch)
return _starter
def indirectStarter(): def indirectStarter(self):
"""Get start URL by indirection. """Get start URL by indirection.
This is useful for comics where the latest comic can't be reached at a This is useful for comics where the latest comic can't be reached at a
stable URL. If the class has an attribute 'startUrl', this page is fetched stable URL. If the class has an attribute 'startUrl', this page is fetched
first, otherwise the page at 'url' is fetched. After that, the attribute first, otherwise the page at 'url' is fetched. After that, the attribute
'latestSearch' is used on the page content to find the latest strip.""" 'latestSearch' is used on the page content to find the latest strip."""
@classmethod url = self.startUrl if hasattr(self, "startUrl") else self.url
def _starter(cls): data = self.getPage(url)
"""Get indirect start URL.""" return self.fetchUrl(url, data, self.latestSearch)
url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
data = cls.getPage(url)
return cls.fetchUrl(url, data, cls.latestSearch)
return _starter

View file

@ -16,7 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
class AbstruseGoose(_BasicScraper): class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/' url = 'http://abstrusegoose.com/'
rurl = escape(url) rurl = escape(url)
starter = bounceStarter() starter = bounceStarter
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', imageSearch = compile(tagre('img', 'src',
@ -122,7 +122,7 @@ class Alice(_WordPressScraper):
url = 'http://www.alicecomics.com/' url = 'http://www.alicecomics.com/'
prevSearch = '//a[%s]' % xpath_class('navi-prev-in') prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
latestSearch = '//a[text()="Latest Alice!"]' latestSearch = '//a[text()="Latest Alice!"]'
starter = indirectStarter() starter = indirectStarter
class AlienLovesPredator(_BasicScraper): class AlienLovesPredator(_BasicScraper):
@ -262,7 +262,7 @@ class Ashes(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/' url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
firstStripUrl = url firstStripUrl = url
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
starter = indirectStarter() starter = indirectStarter
class ASkeweredParadise(_BasicScraper): class ASkeweredParadise(_BasicScraper):
@ -287,7 +287,7 @@ class ASofterWorld(_ParserScraper):
class AstronomyPOTD(_ParserScraper): class AstronomyPOTD(_ParserScraper):
baseUrl = 'http://apod.nasa.gov/apod/' baseUrl = 'http://apod.nasa.gov/apod/'
url = baseUrl + 'astropix.html' url = baseUrl + 'astropix.html'
starter = bounceStarter() starter = bounceStarter
stripUrl = baseUrl + 'ap%s.html' stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012' firstStripUrl = stripUrl % '061012'
imageSearch = '//a/img' imageSearch = '//a/img'

View file

@ -48,7 +48,7 @@ class BalderDash(_ComicControlScraper):
class Bardsworth(_WordPressScraper): class Bardsworth(_WordPressScraper):
url = 'http://www.bardsworth.com/' url = 'http://www.bardsworth.com/'
latestSearch = '//a[@rel="bookmark"]' latestSearch = '//a[@rel="bookmark"]'
starter = indirectStarter() starter = indirectStarter
class Baroquen(_BasicScraper): class Baroquen(_BasicScraper):
@ -72,7 +72,7 @@ class Beetlebum(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile' firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
starter = indirectStarter() starter = indirectStarter
multipleImagesPerStrip = True multipleImagesPerStrip = True
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)')) imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre('a', 'href', prevSearch = compile(tagre('a', 'href',
@ -227,7 +227,7 @@ class BoredAndEvil(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif') prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
latestSearch = prevSearch latestSearch = prevSearch
starter = indirectStarter() starter = indirectStarter
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'

View file

@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
after="prevarea")) after="prevarea"))
latestSearch = compile(tagre("a", "href", latestSearch = compile(tagre("a", "href",
r'(view\.php\?page=[0-9]+[^"]*)')) r'(view\.php\?page=[0-9]+[^"]*)'))
starter = indirectStarter() starter = indirectStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -275,7 +275,7 @@ class CoolCatStudio(_BasicScraper):
class CorydonCafe(_ParserScraper): class CorydonCafe(_ParserScraper):
url = 'http://corydoncafe.com/' url = 'http://corydoncafe.com/'
starter = indirectStarter() starter = indirectStarter
stripUrl = url + '%s.php' stripUrl = url + '%s.php'
imageSearch = "//center[2]//img" imageSearch = "//center[2]//img"
prevSearch = '//a[@title="prev"]' prevSearch = '//a[@title="prev"]'
@ -347,7 +347,7 @@ class CucumberQuest(_BasicScraper):
stripUrl = url + 'cq/%s/' stripUrl = url + 'cq/%s/'
firstStripUrl = stripUrl % 'page-1' firstStripUrl = stripUrl % 'page-1'
startUrl = url + 'recent.html' startUrl = url + 'recent.html'
starter = indirectStarter() starter = indirectStarter
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
@ -379,7 +379,7 @@ class Curvy(_ParserScraper):
class CyanideAndHappiness(_BasicScraper): class CyanideAndHappiness(_BasicScraper):
url = 'http://www.explosm.net/comics/' url = 'http://www.explosm.net/comics/'
starter = bounceStarter() starter = bounceStarter
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '15' firstStripUrl = stripUrl % '15'
imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic")) imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))

View file

@ -1,7 +1,12 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
from ..util import tagre, getQueryParams from ..util import tagre, getQueryParams
@ -11,6 +16,7 @@ _prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif")) _nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif")) _lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
def add(name, shortName, imageFolder=None, lastStrip=None): def add(name, shortName, imageFolder=None, lastStrip=None):
classname = 'CloneManga_%s' % name classname = 'CloneManga_%s' % name
_url = 'http://manga.clone-army.org' _url = 'http://manga.clone-army.org'
@ -22,22 +28,21 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
return '%03d' % int(getQueryParams(pageUrl)['page'][0]) return '%03d' % int(getQueryParams(pageUrl)['page'][0])
@classmethod def _starter(self):
def _starter(cls):
# first, try hopping to previous and next comic # first, try hopping to previous and next comic
data = cls.getPage(baseUrl) data = self.getPage(baseUrl)
try: try:
url = cls.fetchUrl(baseUrl, data, _prevSearch) url = self.fetchUrl(baseUrl, data, _prevSearch)
except ValueError: except ValueError:
# no previous link found, try hopping to last comic # no previous link found, try hopping to last comic
return cls.fetchUrl(baseUrl, data, _lastSearch) return self.fetchUrl(baseUrl, data, _lastSearch)
else: else:
data = cls.getPage(url) data = self.getPage(url)
return cls.fetchUrl(url, data, _nextSearch) return self.fetchUrl(url, data, _nextSearch)
attrs = dict( attrs = dict(
name='CloneManga/' + name, name='CloneManga/' + name,
stripUrl = baseUrl + '?page=%s', stripUrl=baseUrl + '?page=%s',
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")), imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
prevSearch=_prevSearch, prevSearch=_prevSearch,
help='Index format: n', help='Index format: n',

View file

@ -20,7 +20,7 @@ class _ComicFury(_ParserScraper):
prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous") prevSearch = ('//a[@rel="prev"]', XPATH_LINK % "Previous")
nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next") nextSearch = ('//a[@rel="next"]', XPATH_LINK % "Next")
help = 'Index format: n' help = 'Index format: n'
starter = bounceStarter() starter = bounceStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):

View file

@ -18,11 +18,10 @@ class _Creators(_ParserScraper):
def getName(cls): def getName(cls):
return 'Creators/' + cls.__name__ return 'Creators/' + cls.__name__
@classmethod def starter(self):
def starter(cls): start = self.url + self.path
start = cls.url + cls.path data = self.getPage(start)
data = cls.getPage(start) return self.fetchUrl(start, data, self.latestSearch)
return cls.fetchUrl(start, data, cls.latestSearch)
class _CreatorsEs(_Creators): class _CreatorsEs(_Creators):

View file

@ -15,7 +15,7 @@ from .common import _WordPressScraper, xpath_class
class DailyDose(_ParserScraper): class DailyDose(_ParserScraper):
url = 'http://dailydoseofcomics.com/' url = 'http://dailydoseofcomics.com/'
starter = indirectStarter() starter = indirectStarter
imageSearch = '//p/a/img' imageSearch = '//p/a/img'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[@rel="bookmark"]' latestSearch = '//a[@rel="bookmark"]'
@ -32,7 +32,7 @@ class DamnLol(_BasicScraper):
compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)), compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
) )
help = 'Index format: stripname-number' help = 'Index format: stripname-number'
starter = bounceStarter() starter = bounceStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -155,7 +155,7 @@ class Dilbert(_BasicScraper):
url = 'http://dilbert.com/' url = 'http://dilbert.com/'
stripUrl = url + '/strip/%s/' stripUrl = url + '/strip/%s/'
firstStripUrl = stripUrl % '1989-04-16' firstStripUrl = stripUrl % '1989-04-16'
starter = indirectStarter() starter = indirectStarter
prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip")) prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)')) imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
latestSearch = compile(tagre("a", "href", latestSearch = compile(tagre("a", "href",
@ -255,7 +255,7 @@ class DresdenCodak(_BasicScraper):
latestSearch = compile(tagre("div", "id", "preview") + latestSearch = compile(tagre("div", "id", "preview") +
tagre("a", "href", tagre("a", "href",
r'(%s\d+/\d+/\d+/[^"]+)' % rurl)) r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
starter = indirectStarter() starter = indirectStarter
class DrFun(_BasicScraper): class DrFun(_BasicScraper):

View file

@ -1,18 +1,24 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
from ..util import tagre from ..util import tagre
# note: adding the compile() functions inside add() is a major performance hog # note: adding the compile() functions inside add() is a major performance hog
_imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image")) _imageSearch = compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
_linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)') _linkSearch = tagre("a", "href", r'(/[^"]+/\d+/)')
_prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev")) _prevSearch = compile(_linkSearch + tagre("img", "class", "arrow_prev"))
_nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next")) _nextSearch = compile(_linkSearch + tagre("img", "class", "arrow_next"))
_lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last")) _lastSearch = compile(_linkSearch + tagre("img", "class", "arrow_last"))
def add(name, path): def add(name, path):
# XXX disallowed by the server administrator # XXX disallowed by the server administrator
classname = '_DrunkDuck_%s' % name classname = '_DrunkDuck_%s' % name
@ -24,18 +30,17 @@ def add(name, path):
ext = imageUrl.rsplit('.')[-1] ext = imageUrl.rsplit('.')[-1]
return '%d.%s' % (index, ext) return '%d.%s' % (index, ext)
@classmethod def _starter(self):
def _starter(cls):
# first, try hopping to previous and next comic # first, try hopping to previous and next comic
data = cls.getPage(_url) data = self.getPage(_url)
try: try:
url = cls.fetchUrl(_url, data, _prevSearch) url = self.fetchUrl(_url, data, _prevSearch)
except ValueError: except ValueError:
# no previous link found, try hopping to last comic # no previous link found, try hopping to last comic
return cls.fetchUrl(_url, data, _lastSearch) return self.fetchUrl(_url, data, _lastSearch)
else: else:
data = cls.getPage(url) data = self.getPage(url)
return cls.fetchUrl(url, data, _nextSearch) return self.fetchUrl(url, data, _nextSearch)
attrs = dict( attrs = dict(
name = 'DrunkDuck/' + name, name = 'DrunkDuck/' + name,

8
dosagelib/plugins/e.py Executable file → Normal file
View file

@ -15,7 +15,7 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class
class EarthsongSaga(_ParserScraper): class EarthsongSaga(_ParserScraper):
url = 'http://earthsongsaga.com/index.php' url = 'http://earthsongsaga.com/index.php'
starter = indirectStarter() starter = indirectStarter
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@title="Previous"]' prevSearch = '//a[@title="Previous"]'
latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]' latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'
@ -45,13 +45,13 @@ class EasilyAmused(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/' url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
firstStripUrl = url firstStripUrl = url
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
starter = indirectStarter() starter = indirectStarter
class EatLiver(_BasicScraper): class EatLiver(_BasicScraper):
url = 'http://www.eatliver.com/' url = 'http://www.eatliver.com/'
rurl = escape(url) rurl = escape(url)
starter = indirectStarter() starter = indirectStarter
stripUrl = url + "i.php?n=%s" stripUrl = url + "i.php?n=%s"
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl, imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
@ -185,7 +185,7 @@ class Eryl(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/' url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
firstStripUrl = url firstStripUrl = url
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
starter = indirectStarter() starter = indirectStarter
help = 'This was known as DarkWings in previous Dosage versions' help = 'This was known as DarkWings in previous Dosage versions'

6
dosagelib/plugins/f.py Executable file → Normal file
View file

@ -28,7 +28,7 @@ class FantasyRealms(_BasicScraper):
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE) prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE) latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter() starter = indirectStarter
class FauxPas(_BasicScraper): class FauxPas(_BasicScraper):
@ -48,7 +48,7 @@ class FeyWinds(_BasicScraper):
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png") prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
latestSearch = compile(r'(comic/page.php\?id.+?)"') latestSearch = compile(r'(comic/page.php\?id.+?)"')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter() starter = indirectStarter
class FilibusterCartoons(_BasicScraper): class FilibusterCartoons(_BasicScraper):
@ -161,7 +161,7 @@ class FredoAndPidjin(_BasicScraper):
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>") prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
latestSearch = compile(tagre('a', 'href', "(" + url + latestSearch = compile(tagre('a', 'href', "(" + url +
r'\d\d\d\d/\d\d/\d\d/[^"]+/)')) r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
starter = indirectStarter() starter = indirectStarter
class Freefall(_BasicScraper): class Freefall(_BasicScraper):

View file

@ -27,7 +27,7 @@ class Garanos(_BasicScraper):
baseUrl = 'http://garanos.alexheberling.com/' baseUrl = 'http://garanos.alexheberling.com/'
rurl = escape(baseUrl) rurl = escape(baseUrl)
url = baseUrl + 'pages/page-1/' url = baseUrl + 'pages/page-1/'
starter = indirectStarter() starter = indirectStarter
stripUrl = baseUrl + 'pages/page-%s' stripUrl = baseUrl + 'pages/page-%s'
imageSearch = compile( imageSearch = compile(
tagre("img", "src", tagre("img", "src",
@ -136,7 +136,7 @@ class GoGetARoomie(_ComicControlScraper):
class GoneWithTheBlastwave(_BasicScraper): class GoneWithTheBlastwave(_BasicScraper):
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
starter = indirectStarter() starter = indirectStarter
stripUrl = url[:-1] + '%s' stripUrl = url[:-1] + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"') imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')

View file

@ -20,13 +20,12 @@ class _GoComics(_ParserScraper):
def getName(cls): def getName(cls):
return 'GoComics/' + cls.__name__[2:] return 'GoComics/' + cls.__name__[2:]
@classmethod def starter(self):
def starter(cls): url1 = self.url + self.path
url1 = cls.url + cls.path data = self.getPage(url1)
data = cls.getPage(url1) url2 = self.fetchUrl(url1, data, self.prevSearch)
url2 = cls.fetchUrl(url1, data, cls.prevSearch) data = self.getPage(url2)
data = cls.getPage(url2) return self.fetchUrl(url2, data, self.nextSearch)
return cls.fetchUrl(url2, data, cls.nextSearch)
@classmethod @classmethod
def namer(cls, image_url, page_url): def namer(cls, image_url, page_url):

View file

@ -22,13 +22,12 @@ class HagarTheHorrible(_BasicScraper):
prevSearch = compile(tagre("a", "href", prevUrl, after="Previous")) prevSearch = compile(tagre("a", "href", prevUrl, after="Previous"))
help = 'Index format: number' help = 'Index format: number'
@classmethod def starter(self):
def starter(cls):
"""Return last gallery link.""" """Return last gallery link."""
url = 'http://www.hagardunor.net/comics.php' url = 'http://www.hagardunor.net/comics.php'
data = cls.getPage(url) data = self.getPage(url)
pattern = compile(tagre("a", "href", cls.prevUrl)) pattern = compile(tagre("a", "href", self.prevUrl))
for starturl in cls.fetchUrls(url, data, pattern): for starturl in self.fetchUrls(url, data, pattern):
pass pass
return starturl return starturl
@ -41,7 +40,7 @@ class _HappyJar(_WordPressScraper):
class HarkAVagrant(_BasicScraper): class HarkAVagrant(_BasicScraper):
url = 'http://www.harkavagrant.com/' url = 'http://www.harkavagrant.com/'
rurl = escape(url) rurl = escape(url)
starter = bounceStarter() starter = bounceStarter
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,

View file

@ -34,7 +34,7 @@ class JerkCity(_BasicScraper):
class JimBenton(_BasicScraper): class JimBenton(_BasicScraper):
url = 'http://www.jimbenton.com/page14/page14.html' url = 'http://www.jimbenton.com/page14/page14.html'
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html' stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
starter = indirectStarter() starter = indirectStarter
imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)', imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
before="photo-frame")) before="photo-frame"))
prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)', prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',

View file

@ -24,7 +24,7 @@ class Lackadaisy(_BasicScraper):
nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") + nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
"Next") "Next")
help = 'Index format: n' help = 'Index format: n'
starter = bounceStarter() starter = bounceStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -38,7 +38,7 @@ class Laiyu(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/' url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
firstStripUrl = url firstStripUrl = url
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
starter = indirectStarter() starter = indirectStarter
class LasLindas(_BasicScraper): class LasLindas(_BasicScraper):
@ -67,7 +67,7 @@ class LeastICouldDo(_BasicScraper):
after="Previous")) after="Previous"))
latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
after="feature-comic")) after="feature-comic"))
starter = indirectStarter() starter = indirectStarter
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -117,5 +117,5 @@ class LookingForGroup(_ParserScraper):
imageSearch = '#comic img' imageSearch = '#comic img'
prevSearch = '#comic-left > a' prevSearch = '#comic-left > a'
latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)' latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
starter = indirectStarter() starter = indirectStarter
help = 'Index format: nnn' help = 'Index format: nnn'

View file

@ -104,7 +104,7 @@ class NichtLustig(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)')) prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')) latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
starter = indirectStarter() starter = indirectStarter
class Nicky510(_WordPressScraper): class Nicky510(_WordPressScraper):
@ -137,7 +137,7 @@ class NoMoreSavePoints(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/' url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
firstStripUrl = url firstStripUrl = url
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
starter = indirectStarter() starter = indirectStarter
class NoNeedForBushido(_BasicScraper): class NoNeedForBushido(_BasicScraper):
@ -153,7 +153,7 @@ class NoNeedForBushido(_BasicScraper):
latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
after="last-webcomic")) after="last-webcomic"))
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter() starter = indirectStarter
class NotInventedHere(_BasicScraper): class NotInventedHere(_BasicScraper):

View file

@ -11,9 +11,8 @@ class _NuklearPower(_ParserScraper):
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
imageSearch = '//div[@id="comic"]/img' imageSearch = '//div[@id="comic"]/img'
@classmethod def starter(self):
def starter(cls): return self.url + self.path + '/'
return cls.url + cls.path + '/'
@classmethod @classmethod
def getName(cls): def getName(cls):

20
dosagelib/plugins/p.py Executable file → Normal file
View file

@ -20,7 +20,7 @@ class PandyLand(_WordPressScraper):
class ParadigmShift(_BasicScraper): class ParadigmShift(_BasicScraper):
url = 'http://www.paradigmshiftmanga.com/' url = 'http://www.paradigmshiftmanga.com/'
starter = indirectStarter() starter = indirectStarter
stripUrl = url + 'ps/%s.html' stripUrl = url + 'ps/%s.html'
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)')) imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', prevSearch = compile(tagre("a", "href", r'([^"]+)',
@ -86,6 +86,7 @@ class PennyArcade(_BasicScraper):
before="btnPrev")) before="btnPrev"))
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
before="btnNext")) before="btnNext"))
starter = bounceStarter
help = 'Index format: yyyy/mm/dd/' help = 'Index format: yyyy/mm/dd/'
@classmethod @classmethod
@ -99,15 +100,6 @@ class PennyArcade(_BasicScraper):
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm) prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl return prevUrl
@classmethod
def starter(cls):
"""Get bounced start URL."""
data = cls.getPage(cls.url)
url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url1)
url2 = cls.fetchUrl(url1, data, cls.nextSearch)
return cls.prevUrlModifier(url2)
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
p = pageUrl.split('/') p = pageUrl.split('/')
@ -162,7 +154,7 @@ class PicPakDog(_BasicScraper):
class PiledHigherAndDeeper(_BasicScraper): class PiledHigherAndDeeper(_BasicScraper):
url = 'http://www.phdcomics.com/comics.php' url = 'http://www.phdcomics.com/comics.php'
starter = bounceStarter() starter = bounceStarter
stripUrl = url + '?comicid=%s' stripUrl = url + '?comicid=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote="")) imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
@ -207,7 +199,7 @@ class PokeyThePenguin(_ParserScraper):
imageSearch = '//p/img' imageSearch = '//p/img'
latestSearch = '(//a)[last()]' latestSearch = '(//a)[last()]'
multipleImagesPerStrip = True multipleImagesPerStrip = True
starter = indirectStarter() starter = indirectStarter
help = 'Index format: number' help = 'Index format: number'
def getPrevUrl(self, url, data): def getPrevUrl(self, url, data):
@ -231,7 +223,7 @@ class PoorlyDrawnLines(_BasicScraper):
class Precocious(_BasicScraper): class Precocious(_BasicScraper):
url = 'http://www.precociouscomic.com/' url = 'http://www.precociouscomic.com/'
starter = indirectStarter() starter = indirectStarter
stripUrl = url + 'archive/comic/%s' stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))')) imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png")) prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
@ -243,7 +235,7 @@ class Precocious(_BasicScraper):
class PS238(_ParserScraper): class PS238(_ParserScraper):
url = 'http://ps238.nodwick.com/' url = 'http://ps238.nodwick.com/'
stripUrl = url + '/comic/%s/' stripUrl = url + '/comic/%s/'
starter = bounceStarter() starter = bounceStarter
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]' prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
nextSearch = '//a[@class="comic-nav-base comic-nav-next"]' nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'

View file

@ -51,7 +51,7 @@ class RealmOfAtland(_BasicScraper):
class RedMeat(_BasicScraper): class RedMeat(_BasicScraper):
baseUrl = 'http://www.redmeat.com/redmeat/' baseUrl = 'http://www.redmeat.com/redmeat/'
url = baseUrl + 'current/index.html' url = baseUrl + 'current/index.html'
starter = bounceStarter() starter = bounceStarter
stripUrl = baseUrl + '%s/index.html' stripUrl = baseUrl + '%s/index.html'
firstStripUrl = stripUrl % '1996-06-10' firstStripUrl = stripUrl % '1996-06-10'
imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)')) imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))

View file

@ -27,13 +27,12 @@ class SabrinaOnline(_BasicScraper):
adult = True adult = True
multipleImagesPerStrip = True multipleImagesPerStrip = True
@classmethod def starter(self):
def starter(cls):
"""Pick last one in a list of archive pages.""" """Pick last one in a list of archive pages."""
archive = cls.url + 'archive.html' archive = self.url + 'archive.html'
data = cls.getPage(archive) data = self.getPage(archive)
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)")) search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
archivepages = cls.fetchUrls(archive, data, search) archivepages = self.fetchUrls(archive, data, search)
return archivepages[-1] return archivepages[-1]
@ -69,7 +68,7 @@ class ScandinaviaAndTheWorld(_ParserScraper):
url = 'http://satwcomic.com/' url = 'http://satwcomic.com/'
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % 'sweden-denmark-and-norway' firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
starter = indirectStarter() starter = indirectStarter
imageSearch = '//img[@itemprop="image"]' imageSearch = '//img[@itemprop="image"]'
prevSearch = '//a[@accesskey="p"]' prevSearch = '//a[@accesskey="p"]'
latestSearch = '//a[text()="View latest comic"]' latestSearch = '//a[text()="View latest comic"]'
@ -166,14 +165,13 @@ class ScurryAndCover(_ParserScraper):
image = images[0] image = images[0]
return [cls.url + '/images/pages/' + image + '-xsmall.png'] return [cls.url + '/images/pages/' + image + '-xsmall.png']
@classmethod def starter(self):
def starter(cls):
"""Go forward as far as possibe, then start.""" """Go forward as far as possibe, then start."""
url = cls.url url = self.url
while True: while True:
data = cls.getPage(url) data = self.getPage(url)
try: try:
url = cls.fetchUrl(url, data, cls.nextSearch) url = self.fetchUrl(url, data, self.nextSearch)
except ValueError: except ValueError:
break break
return url return url
@ -197,7 +195,7 @@ class SexyLosers(_BasicScraper):
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE) prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE) latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter() starter = indirectStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -334,7 +332,7 @@ class SnowFlame(_WordPressScraper):
url = 'http://www.snowflamecomic.com/' url = 'http://www.snowflamecomic.com/'
stripUrl = url + '?comic=snowflame-%s-%s' stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01') firstStripUrl = stripUrl % ('01', '01')
starter = bounceStarter() starter = bounceStarter
nextSearch = WP_LATEST_SEARCH nextSearch = WP_LATEST_SEARCH
help = 'Index format: chapter-page' help = 'Index format: chapter-page'
@ -396,7 +394,7 @@ class Spamusement(_BasicScraper):
IGNORECASE) IGNORECASE)
latestSearch = prevSearch latestSearch = prevSearch
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter() starter = indirectStarter
class SpareParts(_BasicScraper): class SpareParts(_BasicScraper):
@ -507,7 +505,7 @@ class StuffNoOneToldMe(_BasicScraper):
stripUrl = url + '%s.html' stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01' firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)" olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter() starter = indirectStarter
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
r"(?:</a>|<br />)"), r"(?:</a>|<br />)"),

View file

@ -1,10 +1,16 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
from ..util import tagre, quote, case_insensitive_re from ..util import tagre, quote, case_insensitive_re
# SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly. # SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
# That's why there are so much different search patterns. # That's why there are so much different search patterns.
@ -31,6 +37,7 @@ _nextSearch = (
compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")), compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")),
) )
def add(name, url, adult, bounce): def add(name, url, adult, bounce):
classname = 'SmackJeeves_' + name classname = 'SmackJeeves_' + name
@ -41,15 +48,14 @@ def add(name, url, adult, bounce):
return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl) return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl)
return pageUrl return pageUrl
@classmethod def _starter(self):
def _starter(cls):
"""Get start URL.""" """Get start URL."""
url1 = modifier(url) url1 = modifier(url)
data = cls.getPage(url1) data = self.getPage(url1)
url2 = cls.fetchUrl(url1, data, cls.prevSearch) url2 = self.fetchUrl(url1, data, self.prevSearch)
if bounce: if bounce:
data = cls.getPage(url2) data = self.getPage(url2)
url3 = cls.fetchUrl(url2, data, _nextSearch) url3 = self.fetchUrl(url2, data, _nextSearch)
return modifier(url3) return modifier(url3)
return modifier(url2) return modifier(url2)
@ -76,7 +82,8 @@ def add(name, url, adult, bounce):
) )
# do not edit anything below since these entries are generated from scripts/update.sh # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE # DO NOT REMOVE
add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True) add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True)
add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False) add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False)
@ -110,7 +117,7 @@ add('AlwaysRainingHere', 'http://alwaysraininghere.smackjeeves.com/comics/', Fal
add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True) add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True)
add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True) add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True)
add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True) add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True)
#add('Amya', 'http://amya.smackjeeves.com/comics/', False, True) # add('Amya', 'http://amya.smackjeeves.com/comics/', False, True)
add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True) add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True)
add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True) add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True)
add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True) add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True)
@ -176,7 +183,7 @@ add('Cambion', 'http://cambion.smackjeeves.com/comics/', True, True)
add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True) add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True)
add('Captor', 'http://captor.smackjeeves.com/comics/', False, True) add('Captor', 'http://captor.smackjeeves.com/comics/', False, True)
add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True) add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True)
#add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True) # add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True)
add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True) add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True)
add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True) add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True)
add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True) add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True)
@ -281,7 +288,7 @@ add('FinalArcanum', 'http://finalarcanum.smackjeeves.com/comics/', False, True)
add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True) add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True)
add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True) add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True)
add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False) add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False)
#add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True) # add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True)
add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True) add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True)
add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False) add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False)
add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True) add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True)
@ -474,7 +481,7 @@ add('MythsofUnovaAWhiteNuzlockeRunHardMode', 'http://mythsofunova.smackjeeves.co
add('NIK', 'http://nik.smackjeeves.com/comics/', False, True) add('NIK', 'http://nik.smackjeeves.com/comics/', False, True)
add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True) add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True)
add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True) add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True)
#add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True) # add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True)
add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True) add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True)
add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True) add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True)
add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True) add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True)
@ -766,7 +773,7 @@ add('WhenSheWasBad', 'http://whenshewasbad.smackjeeves.com/comics/', False, True
add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False) add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False)
add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True) add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True)
add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False) add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False)
#add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True) # add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True)
add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True) add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True)
add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True) add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True)
add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True) add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True)

15
dosagelib/plugins/t.py Executable file → Normal file
View file

@ -83,7 +83,7 @@ class TheOrderOfTheStick(_BasicScraper):
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"') prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"') latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter() starter = indirectStarter
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -122,7 +122,7 @@ class TheThinHLine(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>') prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>')
latestSearch = compile(tagre("a", "href", r'([^"]+)', latestSearch = compile(tagre("a", "href", r'([^"]+)',
after='class="timestamp"')) after='class="timestamp"'))
starter = indirectStarter() starter = indirectStarter
adult = True adult = True
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl)) indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
@ -180,17 +180,14 @@ class ThreePanelSoul(_BasicScraper):
class ThunderAndLightning(_BasicScraper): class ThunderAndLightning(_BasicScraper):
url = 'http://www.talcomic.com/wp/' baseUrl = 'http://www.talcomic.com/wp/'
rurl = escape(url) url = baseUrl + '?latestcomic'
stripUrl = url + '%s/' rurl = escape(baseUrl)
stripUrl = baseUrl + '%s/'
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/page-nn' help = 'Index format: yyyy/mm/dd/page-nn'
@classmethod
def starter(cls):
return cls.url + '?latestcomic'
class TinyKittenTeeth(_BasicScraper): class TinyKittenTeeth(_BasicScraper):
url = 'http://www.tinykittenteeth.com/' url = 'http://www.tinykittenteeth.com/'

View file

@ -24,7 +24,7 @@ class Undertow(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)')) imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
prevSearch = compile(r'href="(.+?)".+?teynpoint') prevSearch = compile(r'href="(.+?)".+?teynpoint')
latestSearch = compile(r'href="(.+?)".+?Most recent page') latestSearch = compile(r'href="(.+?)".+?Most recent page')
starter = indirectStarter() starter = indirectStarter
class UnicornJelly(_BasicScraper): class UnicornJelly(_BasicScraper):
@ -47,7 +47,7 @@ class Unsounded(_BasicScraper):
latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) + latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
tagre("img", "src", tagre("img", "src",
r"%simages/newpages\.png" % rurl)) r"%simages/newpages\.png" % rurl))
starter = indirectStarter() starter = indirectStarter
help = 'Index format: chapter-number' help = 'Index format: chapter-number'
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):

View file

@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
class WebDesignerCOTW(_BasicScraper): class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/' url = 'http://www.webdesignerdepot.com/'
rurl = escape(url) rurl = escape(url)
starter = indirectStarter() starter = indirectStarter
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1' firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = ( imageSearch = (
@ -211,10 +211,9 @@ class WormWorldSaga(_BasicScraper):
latestChapter = 5 latestChapter = 5
multipleImagesPerStrip = True multipleImagesPerStrip = True
@classmethod def starter(self):
def starter(cls):
return '%schapters/chapter%02d/%s/index.php' % ( return '%schapters/chapter%02d/%s/index.php' % (
cls.url, cls.latestChapter, cls.lang.upper()) self.url, self.latestChapter, self.lang.upper())
def getPrevUrl(self, url, data): def getPrevUrl(self, url, data):
"""Find previous URL.""" """Find previous URL."""

View file

@ -6,15 +6,11 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from .common import _WordPressScraper, WP_LATEST_SEARCH from .common import _WordPressScraper, WP_LATEST_SEARCH
from ..helpers import indirectStarter
class _WebcomicFactory(_WordPressScraper): class _WebcomicFactory(_WordPressScraper):
@classmethod starter = indirectStarter
def starter(cls): latestSearch = WP_LATEST_SEARCH
"""this is basically helpers.indirectStarter, but dynamically selecting
the right parameters."""
data = cls.getPage(cls.firstStripUrl)
return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH)
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from

View file

@ -13,7 +13,7 @@ class _WLPComics(_ParserScraper):
imageSearch = '//center/*/img[contains(@alt, " Comic")]' imageSearch = '//center/*/img[contains(@alt, " Comic")]'
prevSearch = '//a[contains(text(), "Previous ")]' prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]' nextSearch = '//a[contains(text(), "Next ")]'
starter = bounceStarter() starter = bounceStarter
help = 'Index format: nnn' help = 'Index format: nnn'
@classmethod @classmethod

View file

@ -9,7 +9,7 @@ def add(name, start):
name=name, name=name,
url='http://hijinksensue.com/', url='http://hijinksensue.com/',
latestSearch=start, latestSearch=start,
starter=indirectStarter() starter=indirectStarter
) )
globals()[name] = make_scraper(name, _WordPressScraper, **attrs) globals()[name] = make_scraper(name, _WordPressScraper, **attrs)

View file

@ -15,7 +15,7 @@ from ..util import tagre
class Xkcd(_BasicScraper): class Xkcd(_BasicScraper):
name = 'xkcd' name = 'xkcd'
url = 'http://xkcd.com/' url = 'http://xkcd.com/'
starter = bounceStarter() starter = bounceStarter
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", imageSearch = compile(tagre("img", "src",

View file

@ -22,7 +22,7 @@ class ZapComic(_ParserScraper):
class Zapiro(_BasicScraper): class Zapiro(_BasicScraper):
url = 'http://www.mg.co.za/zapiro/' url = 'http://www.mg.co.za/zapiro/'
starter = bounceStarter() starter = bounceStarter
stripUrl = 'http://mg.co.za/cartoon/%s' stripUrl = 'http://mg.co.za/cartoon/%s'
firstStripUrl = stripUrl % 'zapiro_681' firstStripUrl = stripUrl % 'zapiro_681'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))