Fix some comics.
This commit is contained in:
parent
5006ed7f40
commit
eba2f0089d
6 changed files with 24 additions and 25 deletions
|
@ -17,10 +17,10 @@ class FetchComicError(IOError):
|
||||||
class ComicStrip(object):
|
class ComicStrip(object):
|
||||||
"""A list of comic image URLs."""
|
"""A list of comic image URLs."""
|
||||||
|
|
||||||
def __init__(self, name, parentUrl, imageUrls, namer):
|
def __init__(self, name, stripUrl, imageUrls, namer):
|
||||||
"""Store the image URL list."""
|
"""Store the image URL list."""
|
||||||
self.name = name
|
self.name = name
|
||||||
self.parentUrl = parentUrl
|
self.stripUrl = stripUrl
|
||||||
self.imageUrls = imageUrls
|
self.imageUrls = imageUrls
|
||||||
self.namer = namer
|
self.namer = namer
|
||||||
|
|
||||||
|
@ -31,10 +31,10 @@ class ComicStrip(object):
|
||||||
|
|
||||||
def getDownloader(self, url):
|
def getDownloader(self, url):
|
||||||
"""Get an image downloader."""
|
"""Get an image downloader."""
|
||||||
filename = self.namer(url, self.parentUrl)
|
filename = self.namer(url, self.stripUrl)
|
||||||
if filename is None:
|
if filename is None:
|
||||||
filename = url.rsplit('/', 1)[1]
|
filename = url.rsplit('/', 1)[1]
|
||||||
return ComicImage(self.name, url, self.parentUrl, filename)
|
return ComicImage(self.name, url, self.stripUrl, filename)
|
||||||
|
|
||||||
|
|
||||||
class ComicImage(object):
|
class ComicImage(object):
|
||||||
|
|
|
@ -19,7 +19,9 @@ def regexNamer(regex):
|
||||||
"""Get name from regular expression."""
|
"""Get name from regular expression."""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _namer(imageUrl, pageUrl):
|
def _namer(imageUrl, pageUrl):
|
||||||
return regex.search(imageUrl).group(1)
|
mo = regex.search(imageUrl)
|
||||||
|
if mo:
|
||||||
|
return mo.group(1)
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -73,7 +73,7 @@ class Alice(_BasicScraper):
|
||||||
latestUrl = 'http://alice.alicecomics.com/'
|
latestUrl = 'http://alice.alicecomics.com/'
|
||||||
stripUrl = 'http://alice.alicecomics.com/wp-content/webcomic/alicecomics/%s.jpg'
|
stripUrl = 'http://alice.alicecomics.com/wp-content/webcomic/alicecomics/%s.jpg'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/webcomic/alicecomics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/webcomic/alicecomics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://alice.alicecomics.com/archive/[^!]+)', after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(http://alice.alicecomics.com/archive/[^"]+)', after="previous"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ class BadlyDrawnKitties(_BasicScraper):
|
||||||
latestUrl = 'http://www.badlydrawnkitties.com/'
|
latestUrl = 'http://www.badlydrawnkitties.com/'
|
||||||
stripUrl = 'http://www.badlydrawnkitties.com/new/%s.html'
|
stripUrl = 'http://www.badlydrawnkitties.com/new/%s.html'
|
||||||
imageSearch = compile(r'<img src="(/new/.+?)">')
|
imageSearch = compile(r'<img src="(/new/.+?)">')
|
||||||
prevSearch = compile(r'"(/new/.+?)".+?previous.gif')
|
prevSearch = compile(tagre("a", "href", r'(/[^"]+)') + tagre("img", "src", r'/images/previous\.gif'))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ class ButternutSquash(_BasicScraper):
|
||||||
latestUrl = 'http://www.butternutsquash.net/'
|
latestUrl = 'http://www.butternutsquash.net/'
|
||||||
stripUrl = 'http://www.butternutsquash.net/v3/%s'
|
stripUrl = 'http://www.butternutsquash.net/v3/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^!]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
|
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -245,8 +245,8 @@ class BloodBound(_BasicScraper):
|
||||||
class BookOfBiff(_BasicScraper):
|
class BookOfBiff(_BasicScraper):
|
||||||
latestUrl = 'http://www.thebookofbiff.com/'
|
latestUrl = 'http://www.thebookofbiff.com/'
|
||||||
stripUrl = 'http://www.thebookofbiff.com/%s'
|
stripUrl = 'http://www.thebookofbiff.com/%s'
|
||||||
imageSearch = compile(r'<img src="(http://www.thebookofbiff.com/comics/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))
|
||||||
prevSearch = compile(r'<a href="(http://www.thebookofbiff.com/.+?)">◄ Previous</a>')
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
|
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ class Catena(_BasicScraper):
|
||||||
latestUrl = 'http://catenamanor.com/'
|
latestUrl = 'http://catenamanor.com/'
|
||||||
stripUrl = 'http://catenamanor.com/%s.gif'
|
stripUrl = 'http://catenamanor.com/%s.gif'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://catenamanor\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://catenamanor\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'[^"]+', after='rel="prev"'))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
|
||||||
help = 'Index format: yyyy-mm-dd-<name>'
|
help = 'Index format: yyyy-mm-dd-<name>'
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,15 +127,6 @@ class Comedity(_BasicScraper):
|
||||||
help = 'Index format: n (no padding)'
|
help = 'Index format: n (no padding)'
|
||||||
|
|
||||||
|
|
||||||
class Comet7(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.comet7.com/'
|
|
||||||
imageUrl = 'http://www.comet7.com/archive_page.php?id=%s'
|
|
||||||
imageSearch = compile(r'"(.*?/strips/.*?)"')
|
|
||||||
prevSearch = compile(r'"(.*?)".*?previous_stripf')
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Commissioned(_BasicScraper):
|
class Commissioned(_BasicScraper):
|
||||||
latestUrl = 'http://www.commissionedcomic.com/'
|
latestUrl = 'http://www.commissionedcomic.com/'
|
||||||
stripUrl = 'http://www.commissionedcomic.com/index.php?strip=%s'
|
stripUrl = 'http://www.commissionedcomic.com/index.php?strip=%s'
|
||||||
|
@ -148,8 +139,8 @@ class Commissioned(_BasicScraper):
|
||||||
class CoolCatStudio(_BasicScraper):
|
class CoolCatStudio(_BasicScraper):
|
||||||
latestUrl = 'http://www.coolcatstudio.com/'
|
latestUrl = 'http://www.coolcatstudio.com/'
|
||||||
stripUrl = 'http://www.coolcatstudio.com/index.php?p=%s'
|
stripUrl = 'http://www.coolcatstudio.com/index.php?p=%s'
|
||||||
imageSearch = compile(r'(/comics/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(http://www.coolcatstudio.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(r"href='(.+?)'>PREV")
|
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="cniprevt"))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +205,7 @@ def cloneManga(name, shortName, lastStrip=None):
|
||||||
name='CloneManga/' + name,
|
name='CloneManga/' + name,
|
||||||
starter=starter,
|
starter=starter,
|
||||||
stripUrl=stripUrl,
|
stripUrl=stripUrl,
|
||||||
imageSearch=compile(tagre("img", "src", r'((?:%s)?/%s/[^"]+)' % (url, shortName), after="center")),
|
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
|
||||||
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
|
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
|
||||||
help='Index format: n',
|
help='Index format: n',
|
||||||
namer=namer)
|
namer=namer)
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
import re
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from dosagelib import scraper
|
from dosagelib import scraper
|
||||||
|
@ -18,7 +19,7 @@ class _ComicTester(TestCase):
|
||||||
|
|
||||||
def test_comic(self):
|
def test_comic(self):
|
||||||
# Test a scraper. It must be able to traverse backward for
|
# Test a scraper. It must be able to traverse backward for
|
||||||
# at least 5 pages from the start, and find strip images
|
# at least 5 strips from the start, and find strip images
|
||||||
# on at least 4 pages.
|
# on at least 4 pages.
|
||||||
scraperobj = self.scraperclass()
|
scraperobj = self.scraperclass()
|
||||||
num = empty = 0
|
num = empty = 0
|
||||||
|
@ -27,7 +28,12 @@ class _ComicTester(TestCase):
|
||||||
for image in strip.getImages():
|
for image in strip.getImages():
|
||||||
images += 1
|
images += 1
|
||||||
self.save(image)
|
self.save(image)
|
||||||
if not images:
|
if images:
|
||||||
|
# test that the stripUrl regex matches the retrieved strip URL
|
||||||
|
urlmatch = re.escape(self.scraperclass.stripUrl).replace("%s", r".+")
|
||||||
|
mo = re.compile(urlmatch).match(strip.stripUrl)
|
||||||
|
self.check(mo is not None, 'strip URL %r does not match %r' % (strip.stripUrl, self.scraperclass.stripUrl))
|
||||||
|
else:
|
||||||
empty += 1
|
empty += 1
|
||||||
num += 1
|
num += 1
|
||||||
self.check(num >= 4, 'traversal failed after %d strips.' % num)
|
self.check(num >= 4, 'traversal failed after %d strips.' % num)
|
||||||
|
|
Loading…
Reference in a new issue