Fixed & removed some comics in s.py.
This commit is contained in:
parent
6bbdcfb341
commit
9feaf245f2
1 changed files with 24 additions and 108 deletions
|
@ -11,16 +11,18 @@ import datetime
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter
|
from ..helpers import indirectStarter, bounceStarter
|
||||||
from ..util import tagre, getPageContent
|
from ..util import tagre
|
||||||
from .common import _ComicControlScraper, _WordPressScraper
|
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH
|
||||||
|
|
||||||
|
|
||||||
class SabrinaOnline(_BasicScraper):
|
class SabrinaOnline(_BasicScraper):
|
||||||
url = 'http://sabrina-online.com/'
|
url = 'http://sabrina-online.com/'
|
||||||
|
stripUrl = url + '%s.html'
|
||||||
|
firstStripUrl = stripUrl % '1996-01'
|
||||||
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
|
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
|
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
|
||||||
tagre("img", "src", "b_back.gif"))
|
tagre("img", "src", "b_back.gif"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: yyyy-qq'
|
||||||
adult = True
|
adult = True
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
@ -28,10 +30,10 @@ class SabrinaOnline(_BasicScraper):
|
||||||
def starter(cls):
|
def starter(cls):
|
||||||
"""Pick last one in a list of archive pages."""
|
"""Pick last one in a list of archive pages."""
|
||||||
archive = cls.url + 'archive.html'
|
archive = cls.url + 'archive.html'
|
||||||
data = getPageContent(archive, cls.session)
|
data = cls.getPage(archive)
|
||||||
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
|
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
|
||||||
archivepages = search.findall(data)
|
archivepages = cls.fetchUrls(archive, data, search)
|
||||||
return cls.url + archivepages[-1]
|
return archivepages[-1]
|
||||||
|
|
||||||
|
|
||||||
class SafelyEndangered(_BasicScraper):
|
class SafelyEndangered(_BasicScraper):
|
||||||
|
@ -308,58 +310,11 @@ class SMBC(_ParserScraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SnowFlakes(_BasicScraper):
|
class SnowFlame(_WordPressScraper):
|
||||||
url = 'http://www.snowflakescomic.com/'
|
|
||||||
stripUrl = url + '?id=%s&sl=%s'
|
|
||||||
firstStripUrl = stripUrl % ('103', '1')
|
|
||||||
endOfLife = True
|
|
||||||
imageSearch = (
|
|
||||||
compile(tagre("img", "src", r'(comics/[^"]+)')),
|
|
||||||
compile(tagre("img", "src",
|
|
||||||
r'(http://www.snowflakescomic.com/comics/[^"]+)')),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
|
|
||||||
tagre("img", "src", r'images/nav_prior-ON\.gif'))
|
|
||||||
help = 'Index format: number'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def starter(cls):
|
|
||||||
return cls.stripUrl % ('530', '5')
|
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
|
||||||
return self.stripUrl % (index, index[0])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use strip index number for image name."""
|
|
||||||
index = int(compile(r'id=(\d+)').search(pageUrl).group(1))
|
|
||||||
ext = imageUrl.rsplit('.', 1)[1]
|
|
||||||
return "SnowFlakes-%d.%s" % (index, ext)
|
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
"""Skip pages without images."""
|
|
||||||
return url in (
|
|
||||||
self.stripUrl % ('279', '2'), # no comic
|
|
||||||
self.stripUrl % ('278', '2'), # no comic
|
|
||||||
self.stripUrl % ('277', '2'), # no comic
|
|
||||||
self.stripUrl % ('276', '2'), # no comic
|
|
||||||
self.stripUrl % ('275', '2'), # no comic
|
|
||||||
self.stripUrl % ('214', '2'), # no comic
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SnowFlame(_BasicScraper):
|
|
||||||
url = 'http://www.snowflamecomic.com/'
|
url = 'http://www.snowflamecomic.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?comic=snowflame-%s-%s'
|
stripUrl = url + '?comic=snowflame-%s-%s'
|
||||||
firstStripUrl = stripUrl % ('01', '01')
|
firstStripUrl = stripUrl % ('01', '01')
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame "))
|
starter = bounceStarter(url, WP_LATEST_SEARCH)
|
||||||
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
|
||||||
tagre("a", "href",
|
|
||||||
r'(%s\?comic=snowflame[^"]+)' % rurl))
|
|
||||||
starter = bounceStarter(
|
|
||||||
url, compile(tagre("span", "class", "mininav-next") +
|
|
||||||
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
|
|
||||||
help = 'Index format: chapter-page'
|
help = 'Index format: chapter-page'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
|
@ -375,15 +330,8 @@ class SnowFlame(_BasicScraper):
|
||||||
return "%s-%s-%s" % (chapter, page, filename)
|
return "%s-%s-%s" % (chapter, page, filename)
|
||||||
|
|
||||||
|
|
||||||
class SodiumEyes(_BasicScraper):
|
class SodiumEyes(_WordPressScraper):
|
||||||
url = 'http://sodiumeyes.com/'
|
url = 'http://sodiumeyes.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl,
|
|
||||||
quote=""))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class SomethingPositive(_BasicScraper):
|
class SomethingPositive(_BasicScraper):
|
||||||
|
@ -410,15 +358,12 @@ class Sorcery101(_BasicScraper):
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
class SpaceTrawler(_BasicScraper):
|
class SpaceTrawler(_WordPressScraper):
|
||||||
url = 'http://spacetrawler.com/'
|
base_url = 'http://spacetrawler.com/'
|
||||||
rurl = escape(url)
|
url = base_url + '2013/12/24/spacetrawler-379/'
|
||||||
stripUrl = url + '%s/'
|
firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
|
||||||
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
|
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
endOfLife = True
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
|
||||||
after="navi-prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class Spamusement(_BasicScraper):
|
class Spamusement(_BasicScraper):
|
||||||
|
@ -453,22 +398,20 @@ class SPQRBlues(_WordPressScraper):
|
||||||
|
|
||||||
class StandStillStaySilent(_ParserScraper):
|
class StandStillStaySilent(_ParserScraper):
|
||||||
url = 'http://www.sssscomic.com/comic.php'
|
url = 'http://www.sssscomic.com/comic.php'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?page=%s'
|
stripUrl = url + '?page=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//img[@class="comicnormal"]'
|
imageSearch = '//img[@class="comicnormal"]'
|
||||||
prevSearch = '//a//div[@id="navprev"]'
|
prevSearch = '//a[div[@id="navprev"]]'
|
||||||
help = 'Index Format: number'
|
help = 'Index Format: number'
|
||||||
|
|
||||||
|
|
||||||
class StarCrossdDestiny(_BasicScraper):
|
class StarCrossdDestiny(_ParserScraper):
|
||||||
baseUrl = 'http://www.starcrossd.net/'
|
baseUrl = 'http://www.starcrossd.net/'
|
||||||
rurl = escape(baseUrl)
|
|
||||||
url = baseUrl + 'comic.html'
|
url = baseUrl + 'comic.html'
|
||||||
stripUrl = baseUrl + 'archives/%s.html'
|
stripUrl = baseUrl + 'archives/%s.html'
|
||||||
firstStripUrl = stripUrl % '00000001'
|
firstStripUrl = stripUrl % '00000001'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
|
prevSearch = '//a[text()="prev"]'
|
||||||
help = 'Index format: nnnnnnnn'
|
help = 'Index format: nnnnnnnn'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -520,7 +463,7 @@ class StrongFemaleProtagonist(_ParserScraper):
|
||||||
url = 'http://strongfemaleprotagonist.com/'
|
url = 'http://strongfemaleprotagonist.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
css = True
|
css = True
|
||||||
imageSearch = 'article p:first-child img'
|
imageSearch = 'article p img'
|
||||||
prevSearch = 'div.nav-previous > a'
|
prevSearch = 'div.nav-previous > a'
|
||||||
help = 'Index format: issue-?/page-??'
|
help = 'Index format: issue-?/page-??'
|
||||||
|
|
||||||
|
@ -533,20 +476,10 @@ class StrongFemaleProtagonist(_ParserScraper):
|
||||||
self.stripUrl % 'issue-5/newspaper',
|
self.stripUrl % 'issue-5/newspaper',
|
||||||
self.stripUrl % 'issue-5/hiatus-1',
|
self.stripUrl % 'issue-5/hiatus-1',
|
||||||
self.stripUrl % 'issue-5/hiatus-2',
|
self.stripUrl % 'issue-5/hiatus-2',
|
||||||
|
self.stripUrl % 'ssue-1/no-page',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class Stubble(_BasicScraper):
|
|
||||||
url = 'http://stubblecomics.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
firstStripUrl = stripUrl % '4'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
|
||||||
after="navi-prev"))
|
|
||||||
help = 'Index format: number'
|
|
||||||
|
|
||||||
|
|
||||||
class StuffNoOneToldMe(_BasicScraper):
|
class StuffNoOneToldMe(_BasicScraper):
|
||||||
url = 'http://www.snotm.com/'
|
url = 'http://www.snotm.com/'
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
|
@ -585,22 +518,5 @@ class StuffNoOneToldMe(_BasicScraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SuburbanTribe(_BasicScraper):
|
class SupernormalStep(_ComicControlScraper):
|
||||||
url = 'http://www.pixelwhip.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
|
||||||
after="prev"))
|
|
||||||
help = 'Index format: nnnn'
|
|
||||||
|
|
||||||
|
|
||||||
class SupernormalStep(_BasicScraper):
|
|
||||||
url = 'http://supernormalstep.com/'
|
url = 'http://supernormalstep.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
firstStripUrl = stripUrl % '8'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
|
||||||
after="prev"))
|
|
||||||
help = 'Index format: number'
|
|
||||||
|
|
Loading…
Reference in a new issue