Fix multiple imgs for json flag & ZenPencils bouncer (#133)

When using the JSON output flag, if the page has more than one image,
dictionary indexing cannot be used as list.

For the ZenPencils comic, the bouncer is missing, saving the page url
as the root url.
This commit is contained in:
Gervásio Júnior 2019-06-19 02:09:33 -03:00 committed by Tobias Gruetzmacher
parent 7c1ee6a47c
commit 6c8814fe40
7 changed files with 34 additions and 3 deletions

View file

@ -290,7 +290,7 @@ class JSONEventHandler(EventHandler):
# If there's already an image for this page start keeping track of their order # If there's already an image for this page start keeping track of their order
if len(pageInfo['images'].keys()) == 1: if len(pageInfo['images'].keys()) == 1:
pageInfo['imagesOrder'] = [pageInfo['images'].keys()[0]] pageInfo['imagesOrder'] = list(pageInfo['images'].keys())
if 'imagesOrder' in pageInfo.keys(): if 'imagesOrder' in pageInfo.keys():
pageInfo['imagesOrder'].append(comic.url) pageInfo['imagesOrder'].append(comic.url)

View file

@ -33,9 +33,12 @@ class Zapiro(_ParserScraper):
class ZenPencils(_WPNavi): class ZenPencils(_WPNavi):
url = 'http://zenpencils.com/' url = 'https://zenpencils.com/'
multipleImagesPerStrip = True multipleImagesPerStrip = True
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/' firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
starter = bounceStarter
prevSearch = '//a[%s]' % xpath_class('navi-prev')
nextSearch = '//a[%s]' % xpath_class('navi-next')
class ZombieHunters(_BasicScraper): class ZombieHunters(_BasicScraper):

View file

@ -47,6 +47,11 @@ def bloomingfaeries():
add(GET, re.compile(r'http://www\.bloomingfaeries\.com/.*\.jpg'), _img(), content_type='image/jpeg') add(GET, re.compile(r'http://www\.bloomingfaeries\.com/.*\.jpg'), _img(), content_type='image/jpeg')
def zenpencils():
add(GET, 'https://zenpencils.com/', _content('zp-home'))
add(GET, 'https://zenpencils.com/comic/missing/', _content('zp-223'))
add(GET, 'https://zenpencils.com/comic/lifejacket/', _content('zp-222'))
add(GET, re.compile(r'https://cdn-zenpencils\.netdna-ssl\.com/wp-content/uploads/.*\.jpg'), _img(), content_type='image/jpeg')
def vote(): def vote():
add(POST, 'http://gaecounter.appspot.com/count/', 'no') add(POST, 'http://gaecounter.appspot.com/count/', 'no')

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -10,7 +10,7 @@ import responses
import dosagelib.cmd import dosagelib.cmd
import httpmocks import httpmocks
import json
def cmd(*options): def cmd(*options):
"""'Fake' run dosage with given options.""" """'Fake' run dosage with given options."""
@ -70,3 +70,26 @@ class TestDosage(object):
def test_fetch_indexed(self, tmpdir): def test_fetch_indexed(self, tmpdir):
httpmocks.xkcd() httpmocks.xkcd()
cmd_ok("-n", "2", "-v", "-b", str(tmpdir), "xkcd:303") cmd_ok("-n", "2", "-v", "-b", str(tmpdir), "xkcd:303")
@responses.activate
def test_json_page_key_bounce_and_multi_image(self, tmpdir):
httpmocks.zenpencils()
print(tmpdir)
cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
directory = tmpdir.join('ZenPencils')
f = directory.join('dosage.json').open(encoding='utf-8')
data = json.load(f)
f.close()
pages = data['pages']
assert len(pages) == 1
page = list(pages.keys())[0]
assert page == 'https://zenpencils.com/comic/missing/'
images = data['pages'][page]['images']
assert len(images) == 2
for imgurl, imgfile in images.items():
assert directory.join(imgfile).check(file=1)