Fix multiple imgs for json flag & ZenPencils bouncer (#133)
When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url.
This commit is contained in:
parent
7c1ee6a47c
commit
6c8814fe40
7 changed files with 34 additions and 3 deletions
|
@ -290,7 +290,7 @@ class JSONEventHandler(EventHandler):
|
||||||
|
|
||||||
# If there's already an image for this page start keeping track of their order
|
# If there's already an image for this page start keeping track of their order
|
||||||
if len(pageInfo['images'].keys()) == 1:
|
if len(pageInfo['images'].keys()) == 1:
|
||||||
pageInfo['imagesOrder'] = [pageInfo['images'].keys()[0]]
|
pageInfo['imagesOrder'] = list(pageInfo['images'].keys())
|
||||||
if 'imagesOrder' in pageInfo.keys():
|
if 'imagesOrder' in pageInfo.keys():
|
||||||
pageInfo['imagesOrder'].append(comic.url)
|
pageInfo['imagesOrder'].append(comic.url)
|
||||||
|
|
||||||
|
|
|
@ -33,9 +33,12 @@ class Zapiro(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(_WPNavi):
|
class ZenPencils(_WPNavi):
|
||||||
url = 'http://zenpencils.com/'
|
url = 'https://zenpencils.com/'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
||||||
|
starter = bounceStarter
|
||||||
|
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||||
|
nextSearch = '//a[%s]' % xpath_class('navi-next')
|
||||||
|
|
||||||
|
|
||||||
class ZombieHunters(_BasicScraper):
|
class ZombieHunters(_BasicScraper):
|
||||||
|
|
|
@ -47,6 +47,11 @@ def bloomingfaeries():
|
||||||
|
|
||||||
add(GET, re.compile(r'http://www\.bloomingfaeries\.com/.*\.jpg'), _img(), content_type='image/jpeg')
|
add(GET, re.compile(r'http://www\.bloomingfaeries\.com/.*\.jpg'), _img(), content_type='image/jpeg')
|
||||||
|
|
||||||
|
def zenpencils():
|
||||||
|
add(GET, 'https://zenpencils.com/', _content('zp-home'))
|
||||||
|
add(GET, 'https://zenpencils.com/comic/missing/', _content('zp-223'))
|
||||||
|
add(GET, 'https://zenpencils.com/comic/lifejacket/', _content('zp-222'))
|
||||||
|
add(GET, re.compile(r'https://cdn-zenpencils\.netdna-ssl\.com/wp-content/uploads/.*\.jpg'), _img(), content_type='image/jpeg')
|
||||||
|
|
||||||
def vote():
|
def vote():
|
||||||
add(POST, 'http://gaecounter.appspot.com/count/', 'no')
|
add(POST, 'http://gaecounter.appspot.com/count/', 'no')
|
||||||
|
|
BIN
tests/responses/zp-222.html.gz
Normal file
BIN
tests/responses/zp-222.html.gz
Normal file
Binary file not shown.
BIN
tests/responses/zp-223.html.gz
Normal file
BIN
tests/responses/zp-223.html.gz
Normal file
Binary file not shown.
BIN
tests/responses/zp-home.html.gz
Normal file
BIN
tests/responses/zp-home.html.gz
Normal file
Binary file not shown.
|
@ -10,7 +10,7 @@ import responses
|
||||||
|
|
||||||
import dosagelib.cmd
|
import dosagelib.cmd
|
||||||
import httpmocks
|
import httpmocks
|
||||||
|
import json
|
||||||
|
|
||||||
def cmd(*options):
|
def cmd(*options):
|
||||||
"""'Fake' run dosage with given options."""
|
"""'Fake' run dosage with given options."""
|
||||||
|
@ -70,3 +70,26 @@ class TestDosage(object):
|
||||||
def test_fetch_indexed(self, tmpdir):
|
def test_fetch_indexed(self, tmpdir):
|
||||||
httpmocks.xkcd()
|
httpmocks.xkcd()
|
||||||
cmd_ok("-n", "2", "-v", "-b", str(tmpdir), "xkcd:303")
|
cmd_ok("-n", "2", "-v", "-b", str(tmpdir), "xkcd:303")
|
||||||
|
|
||||||
|
@responses.activate
|
||||||
|
def test_json_page_key_bounce_and_multi_image(self, tmpdir):
|
||||||
|
httpmocks.zenpencils()
|
||||||
|
print(tmpdir)
|
||||||
|
cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
|
||||||
|
|
||||||
|
directory = tmpdir.join('ZenPencils')
|
||||||
|
f = directory.join('dosage.json').open(encoding='utf-8')
|
||||||
|
data = json.load(f)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
pages = data['pages']
|
||||||
|
assert len(pages) == 1
|
||||||
|
|
||||||
|
page = list(pages.keys())[0]
|
||||||
|
assert page == 'https://zenpencils.com/comic/missing/'
|
||||||
|
|
||||||
|
images = data['pages'][page]['images']
|
||||||
|
assert len(images) == 2
|
||||||
|
|
||||||
|
for imgurl, imgfile in images.items():
|
||||||
|
assert directory.join(imgfile).check(file=1)
|
||||||
|
|
Loading…
Reference in a new issue