diff --git a/dosagelib/comic.py b/dosagelib/comic.py
index f1924e2d5..563fb705e 100644
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@@ -17,10 +17,10 @@ class FetchComicError(IOError):
class ComicStrip(object):
"""A list of comic image URLs."""
- def __init__(self, name, parentUrl, imageUrls, namer):
+ def __init__(self, name, stripUrl, imageUrls, namer):
"""Store the image URL list."""
self.name = name
- self.parentUrl = parentUrl
+ self.stripUrl = stripUrl
self.imageUrls = imageUrls
self.namer = namer
@@ -31,10 +31,10 @@ class ComicStrip(object):
def getDownloader(self, url):
"""Get an image downloader."""
- filename = self.namer(url, self.parentUrl)
+ filename = self.namer(url, self.stripUrl)
if filename is None:
filename = url.rsplit('/', 1)[1]
- return ComicImage(self.name, url, self.parentUrl, filename)
+ return ComicImage(self.name, url, self.stripUrl, filename)
class ComicImage(object):
diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py
index 021155632..64f16d8c1 100644
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@@ -19,7 +19,9 @@ def regexNamer(regex):
"""Get name from regular expression."""
@staticmethod
def _namer(imageUrl, pageUrl):
- return regex.search(imageUrl).group(1)
+ mo = regex.search(imageUrl)
+ if mo:
+ return mo.group(1)
return _namer
diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py
index a771feecd..5591683aa 100644
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@@ -73,7 +73,7 @@ class Alice(_BasicScraper):
latestUrl = 'http://alice.alicecomics.com/'
stripUrl = 'http://alice.alicecomics.com/wp-content/webcomic/alicecomics/%s.jpg'
imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/webcomic/alicecomics/[^"]+)'))
- prevSearch = compile(tagre("a", "href", r'(http://alice.alicecomics.com/archive/[^!]+)', after="previous"))
+ prevSearch = compile(tagre("a", "href", r'(http://alice.alicecomics.com/archive/[^"]+)', after="previous"))
help = 'Index format: yyyy-mm-dd'
diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py
index 2c12093d7..2fd6c87c3 100644
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@@ -10,7 +10,7 @@ class BadlyDrawnKitties(_BasicScraper):
latestUrl = 'http://www.badlydrawnkitties.com/'
stripUrl = 'http://www.badlydrawnkitties.com/new/%s.html'
imageSearch = compile(r'')
- prevSearch = compile(r'"(/new/.+?)".+?previous.gif')
+ prevSearch = compile(tagre("a", "href", r'(/[^"]+)') + tagre("img", "src", r'/images/previous\.gif'))
help = 'Index format: n (unpadded)'
@@ -123,7 +123,7 @@ class ButternutSquash(_BasicScraper):
latestUrl = 'http://www.butternutsquash.net/'
stripUrl = 'http://www.butternutsquash.net/v3/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)'))
- prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^!]+)', after="prev"))
+ prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
@@ -245,8 +245,8 @@ class BloodBound(_BasicScraper):
class BookOfBiff(_BasicScraper):
latestUrl = 'http://www.thebookofbiff.com/'
stripUrl = 'http://www.thebookofbiff.com/%s'
- imageSearch = compile(r'◄ Previous')
+ imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))
+ prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py
index a8c1d8ba5..77a4b97b4 100644
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@@ -52,7 +52,7 @@ class Catena(_BasicScraper):
latestUrl = 'http://catenamanor.com/'
stripUrl = 'http://catenamanor.com/%s.gif'
imageSearch = compile(tagre("img", "src", r'(http://catenamanor\.com/comics/[^"]+)'))
- prevSearch = compile(tagre("a", "href", r'[^"]+', after='rel="prev"'))
+ prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy-mm-dd-'
@@ -127,15 +127,6 @@ class Comedity(_BasicScraper):
help = 'Index format: n (no padding)'
-class Comet7(_BasicScraper):
- latestUrl = 'http://www.comet7.com/'
- imageUrl = 'http://www.comet7.com/archive_page.php?id=%s'
- imageSearch = compile(r'"(.*?/strips/.*?)"')
- prevSearch = compile(r'"(.*?)".*?previous_stripf')
- help = 'Index format: n (unpadded)'
-
-
-
class Commissioned(_BasicScraper):
latestUrl = 'http://www.commissionedcomic.com/'
stripUrl = 'http://www.commissionedcomic.com/index.php?strip=%s'
@@ -148,8 +139,8 @@ class Commissioned(_BasicScraper):
class CoolCatStudio(_BasicScraper):
latestUrl = 'http://www.coolcatstudio.com/'
stripUrl = 'http://www.coolcatstudio.com/index.php?p=%s'
- imageSearch = compile(r'(/comics/.+?)"')
- prevSearch = compile(r"href='(.+?)'>PREV")
+ imageSearch = compile(tagre("img", "src", r'(http://www.coolcatstudio.com/comics/[^"]+)'))
+ prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="cniprevt"))
help = 'Index format: n'
@@ -214,7 +205,7 @@ def cloneManga(name, shortName, lastStrip=None):
name='CloneManga/' + name,
starter=starter,
stripUrl=stripUrl,
- imageSearch=compile(tagre("img", "src", r'((?:%s)?/%s/[^"]+)' % (url, shortName), after="center")),
+ imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
help='Index format: n',
namer=namer)
diff --git a/tests/test_comics.py b/tests/test_comics.py
index 35b735081..af7d2432b 100644
--- a/tests/test_comics.py
+++ b/tests/test_comics.py
@@ -3,6 +3,7 @@
# Copyright (C) 2012 Bastian Kleineidam
import tempfile
import shutil
+import re
from itertools import islice
from unittest import TestCase
from dosagelib import scraper
@@ -18,7 +19,7 @@ class _ComicTester(TestCase):
def test_comic(self):
# Test a scraper. It must be able to traverse backward for
- # at least 5 pages from the start, and find strip images
+ # at least 5 strips from the start, and find strip images
# on at least 4 pages.
scraperobj = self.scraperclass()
num = empty = 0
@@ -27,7 +28,12 @@ class _ComicTester(TestCase):
for image in strip.getImages():
images += 1
self.save(image)
- if not images:
+ if images:
+ # test that the stripUrl regex matches the retrieved strip URL
+ urlmatch = re.escape(self.scraperclass.stripUrl).replace("%s", r".+")
+ mo = re.compile(urlmatch).match(strip.stripUrl)
+ self.check(mo is not None, 'strip URL %r does not match %r' % (strip.stripUrl, self.scraperclass.stripUrl))
+ else:
empty += 1
num += 1
self.check(num >= 4, 'traversal failed after %d strips.' % num)