Improved comic test.

This commit is contained in:
Bastian Kleineidam 2012-11-25 07:56:46 +01:00
parent 958a788550
commit 7e91c83753
2 changed files with 14 additions and 41 deletions

View file

@ -8,27 +8,18 @@ from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre
class TalesOfPylea(_BasicScraper):
latestUrl = 'http://talesofpylea.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
prevSearch = compile(r' <a href="(.+?)">Back</a>')
help = 'Index format: nnn'
class TheNoob(_BasicScraper): class TheNoob(_BasicScraper):
latestUrl = 'http://www.thenoobcomic.com/index.php' latestUrl = 'http://www.thenoobcomic.com/index.php'
stripUrl = latestUrl + '?pos=%' stripUrl = latestUrl + '?pos=%'
imageSearch = compile(r'<img src="(/headquarters/comics/.+?)"') imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
prevSearch = compile(r'<a class="comic_nav_previous_button" href="(.+?)"></a>') prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
class TheOrderOfTheStick(_BasicScraper): class TheOrderOfTheStick(_BasicScraper):
latestUrl = 'http://www.giantitp.com/' latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
stripUrl = latestUrl + 'comics/images/%s' stripUrl = latestUrl + 'comics/oots%s.html'
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">') imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"') prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -38,7 +29,7 @@ class TheOrderOfTheStick(_BasicScraper):
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm' latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
stripUrl = 'http://plif.courageunfettered.com/archive/wc%s.gif' stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm'
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">') imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>') prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -47,37 +38,28 @@ class TheParkingLotIsFull(_BasicScraper):
class TheWotch(_BasicScraper): class TheWotch(_BasicScraper):
latestUrl = 'http://www.thewotch.com/' latestUrl = 'http://www.thewotch.com/'
stripUrl = latestUrl + '?epDate=%s' stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r"<img.+?src='(comics/.+?)'") imageSearch = compile(r"<img.+?src='(comics/.+?)'")
prevSearch = compile(r"<link rel='Previous' href='(\?epDate=\d+-\d+-\d+)'") prevSearch = compile(r"<link rel='Previous' href='(\?date=\d+-\d+-\d+)'")
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class Thorn(_BasicScraper):
latestUrl = 'http://www.mimisgrotto.com/thorn/index.html'
stripUrl = 'http://www.mimisgrotto.com/thorn/%s.html'
imageSearch = compile(r'"(strips/.+?)"')
prevSearch = compile(r'(\d[\d][\d].html)">Prev')
help = 'Index format: nnn'
class TinyKittenTeeth(_BasicScraper): class TinyKittenTeeth(_BasicScraper):
latestUrl = 'http://www.tinykittenteeth.com/' latestUrl = 'http://www.tinykittenteeth.com/'
stripUrl = latestUrl + 'index.php?current=%s' stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: n (unpadded)' help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
class TwoTwoOneFour(_BasicScraper): class TwoTwoOneFour(_BasicScraper):
latestUrl = 'http://www.nitrocosm.com/go/2214_classic/' latestUrl = 'http://www.nitrocosm.com/go/2214_classic/'
stripUrl = latestUrl + '%s/' stripUrl = latestUrl + '%s/'
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"') imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/[^"]+)', before="gallery_display"))
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">') prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/2214_classic/\d+/)', after="Previous"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class TheWhiteboard(_BasicScraper): class TheWhiteboard(_BasicScraper):
latestUrl = 'http://www.the-whiteboard.com/' latestUrl = 'http://www.the-whiteboard.com/'
stripUrl = latestUrl + 'auto%s.html' stripUrl = latestUrl + 'auto%s.html'
@ -103,12 +85,3 @@ class TheOuterQuarter(_BasicScraper):
imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"') imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">') prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn' help = 'Index format: nnn'
class TheHorrificAdventuresOfFranky(_BasicScraper):
latestUrl = 'http://www.boneyardfranky.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://www.boneyardfranky.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'

View file

@ -28,14 +28,14 @@ class _ComicTester(TestCase):
for image in strip.getImages(): for image in strip.getImages():
images += 1 images += 1
self.save(image) self.save(image)
if images == 0:
empty += 1
if num > 0: if num > 0:
self.check_stripurl(strip) self.check_stripurl(strip)
else:
empty += 1
num += 1 num += 1
if self.scraperclass.prevSearch: if self.scraperclass.prevSearch:
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num) self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
self.check(empty <= 1, 'failed to find images on %d pages, check the imageSearch pattern.' % empty) self.check(empty == 0, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
def check_stripurl(self, strip): def check_stripurl(self, strip):
if not self.scraperclass.stripUrl: if not self.scraperclass.stripUrl: