Use tagre matching function.

This commit is contained in:
Bastian Kleineidam 2012-09-26 14:42:28 +02:00
parent ef9d5e86e9
commit 4a53639e79

View file

@ -1,6 +1,7 @@
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, MULTILINE from re import compile, MULTILINE
from ..util import tagre
from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
@ -8,15 +9,15 @@ from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
class ALessonIsLearned(_BasicScraper): class ALessonIsLearned(_BasicScraper):
latestUrl = 'http://www.alessonislearned.com/' latestUrl = 'http://www.alessonislearned.com/'
imageUrl = 'http://www.alessonislearned.com/lesson%s.html' imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
imageSearch = compile(r'<img src="(cmx/.+?)"') imageSearch = compile(tagre("img", "src", r"(cmx/.+?)"))
prevSearch = compile(r"<a href='(index.php\?comic=.+?)'.+?previous") prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=.+?)")+r".+?previous")
help = 'Index format: nnn' help = 'Index format: nnn'
class ASofterWorld(_BasicScraper): class ASofterWorld(_BasicScraper):
latestUrl = 'http://www.asofterworld.com/' latestUrl = 'http://www.asofterworld.com/'
imageUrl = 'http://www.asofterworld.com/index.php?id=%s' imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
imageSearch = compile(r'<img src="(http://www.asofterworld.com/clean/[^"]+)"') imageSearch = compile(tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
prevSearch = compile(r'"([^"]+)">back') prevSearch = compile(r'"([^"]+)">back')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -24,8 +25,8 @@ class ASofterWorld(_BasicScraper):
class AbleAndBaker(_BasicScraper): class AbleAndBaker(_BasicScraper):
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php' latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s' imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
imageSearch = compile(r'<img src="(comics/.+?)"') imageSearch = compile(tagre('img', 'src', r'(comics/.+)'))
prevSearch = compile(r'<a href="(.+\d+?)".+previous.gif') prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -40,26 +41,25 @@ class AbominableCharlesChristopher(_BasicScraper):
class AbstractGender(_BasicScraper): class AbstractGender(_BasicScraper):
latestUrl = 'http://www.abstractgender.com/' latestUrl = 'http://www.abstractgender.com/'
imageUrl = 'http://www.abstractgender.com/?comic=%s' imageUrl = 'http://www.abstractgender.com/?comic=%s'
imageSearch = compile(r'<img[^>]+src="(comics/\d+\.\w+)"') imageSearch = compile(tagre('img', 'src', r'(comics/\d+\.\w+)'))
prevSearch = compile(r'<a\W+href="(\?comic=\d+)"><img[^>]+id="comic_menu_prev"') prevSearch = compile(tagre('a', 'href', r"(\?comic=\d+)")+tagre("img", "id", "comic_menu_prev"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AbsurdNotions(_BasicScraper): class AbsurdNotions(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/page129.html' latestUrl = 'http://www.absurdnotions.org/page129.html'
imageUrl = 'http://www.absurdnotions.org/page%s.html' imageUrl = 'http://www.absurdnotions.org/page%s.html'
imageSearch = compile(r'<IMG SRC="(an[^"]+)"') imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
prevSearch = compile(r'HREF="([^"]+)"><IMG SRC="nprev\.gif"') prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AbstruseGoose(_BasicScraper): class AbstruseGoose(_BasicScraper):
starter = bounceStarter('http://abstrusegoose.com/', starter = bounceStarter('http://abstrusegoose.com/',
compile(r'<a href = "(http://abstrusegoose.com/\d+)">Next &raquo;</a>')) compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next &raquo;</a>"))
imageUrl = 'http://abstrusegoose.com/c%s.html' imageUrl = 'http://abstrusegoose.com/c%s.html'
imageSearch = compile(r'<img[^<]+src="(http://abstrusegoose.com/strips/[^<>"]+)"') imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
prevSearch = compile(r'<a href = "(http://abstrusegoose.com/\d+)">&laquo; Previous</a>') prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'&laquo; Previous</a>')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod @classmethod
@ -69,16 +69,14 @@ class AbstruseGoose(_BasicScraper):
return 'c%03d-%s' % (index, name) return 'c%03d-%s' % (index, name)
class AcademyVale(_BasicScraper): class AcademyVale(_BasicScraper):
latestUrl = 'http://imagerie.com/vale/' latestUrl = 'http://imagerie.com/vale/'
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s' imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
imageSearch = compile(r'<IMG.+?SRC="(avale\d{4}-\d{2}\..*?)"') imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\..*?)'))
prevSearch = compile(r'HREF=(avarch.*?)><IMG SRC="AVNavBack.gif"') prevSearch = compile(tagre('a', 'href', r'(avarch.*?)') + tagre('img', 'src', 'AVNavBack\.gif'))
help = 'Index format: nnn' help = 'Index format: nnn'
class Alice(_BasicScraper): class Alice(_BasicScraper):
latestUrl = 'http://alice.alicecomics.com/' latestUrl = 'http://alice.alicecomics.com/'
imageUrl = 'http://alice.alicecomics.com/%s' imageUrl = 'http://alice.alicecomics.com/%s'
@ -87,7 +85,6 @@ class Alice(_BasicScraper):
help = 'Index format: non' help = 'Index format: non'
class AlienLovesPredator(_BasicScraper): class AlienLovesPredator(_BasicScraper):
imageUrl = 'http://alienlovespredator.com/%s' imageUrl = 'http://alienlovespredator.com/%s'
imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;">&nbsp;</div>', MULTILINE) imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;">&nbsp;</div>', MULTILINE)
@ -104,11 +101,10 @@ class AlienLovesPredator(_BasicScraper):
return '%s-%s-%s-%s' % (vol, num, ccc, ddd) return '%s-%s-%s-%s' % (vol, num, ccc, ddd)
class AnarchySD(_BasicScraper): class AnarchySD(_BasicScraper):
imageUrl = 'http://www.anarchycomic.com/page%s.php' imageUrl = 'http://www.anarchycomic.com/page%s.php'
imageSearch = compile(r'<img.+src="../(images/page\d+\..+?)"') imageSearch = compile(tagre('img', 'src', r'../(images/page\d+\..+?)'))
prevSearch = compile(r'<a href="(page\d+\.php)">PREVIOUS PAGE') prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)')+'PREVIOUS PAGE')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter( starter = indirectStarter(
'http://www.anarchycomic.com/page1.php', 'http://www.anarchycomic.com/page1.php',