BloomingFaeries: Don't download every page twice.
(Also, simplify namer, switch to _ParserScraper)
This commit is contained in:
parent
8db6f8e8b7
commit
6bbdcfb341
2 changed files with 11 additions and 20 deletions
|
@ -7,10 +7,11 @@ from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..util import tagre, getPageContent
|
from ..util import tagre
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from .common import _ComicControlScraper, _ComicPressScraper, _WordPressScraper
|
from .common import (_ComicControlScraper, _ComicPressScraper,
|
||||||
|
_WordPressScraper, WP_PREV_SEARCH)
|
||||||
|
|
||||||
|
|
||||||
class BackwaterPlanet(_BasicScraper):
|
class BackwaterPlanet(_BasicScraper):
|
||||||
|
@ -179,26 +180,16 @@ class BloodBound(_WordPressScraper):
|
||||||
firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'
|
firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'
|
||||||
|
|
||||||
|
|
||||||
class BloomingFaeries(_BasicScraper):
|
class BloomingFaeries(_ParserScraper):
|
||||||
adult = True
|
adult = True
|
||||||
url = 'http://www.bloomingfaeries.com/'
|
url = 'http://www.bloomingfaeries.com/'
|
||||||
stripUrl = url + 'comic/public/%s/'
|
firstStripUrl = url + 'comic/public/pit-stop/'
|
||||||
firstStripUrl = stripUrl % "pit-stop"
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title'))
|
prevSearch = WP_PREV_SEARCH
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
|
||||||
after='comic-nav-base comic-nav-previous'))
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, image_url, page_url):
|
||||||
bf = imageUrl.split('/')
|
return "_".join(image_url.rsplit('/', 3)[1:])
|
||||||
name = bf[-1]
|
|
||||||
re = compile(tagre("div", "class", r'comic-id-([^"]+)'))
|
|
||||||
content = getPageContent(pageUrl, cls.session)
|
|
||||||
match = re.search(content)
|
|
||||||
if not match:
|
|
||||||
return None
|
|
||||||
return "BF%s_%s" % (match.group(1), name)
|
|
||||||
|
|
||||||
|
|
||||||
class BMovieComic(_BasicScraper):
|
class BMovieComic(_BasicScraper):
|
||||||
|
|
|
@ -12,13 +12,13 @@ from ..scraper import _ParserScraper
|
||||||
# please don't use lists of expression, as that makes it hard to track which
|
# please don't use lists of expression, as that makes it hard to track which
|
||||||
# expression is for which comics.
|
# expression is for which comics.
|
||||||
|
|
||||||
|
|
||||||
WP_LATEST_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]'
|
WP_LATEST_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]'
|
||||||
|
WP_PREV_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-previous ")]'
|
||||||
|
|
||||||
|
|
||||||
class _WordPressScraper(_ParserScraper):
|
class _WordPressScraper(_ParserScraper):
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' comic-nav-previous ')]"
|
prevSearch = WP_PREV_SEARCH
|
||||||
|
|
||||||
|
|
||||||
class _ComicPressScraper(_WordPressScraper):
|
class _ComicPressScraper(_WordPressScraper):
|
||||||
|
|
Loading…
Reference in a new issue