Use tagre matching function.
This commit is contained in:
parent
ef9d5e86e9
commit
4a53639e79
1 changed files with 17 additions and 21 deletions
|
@ -1,6 +1,7 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
from re import compile, MULTILINE
|
from re import compile, MULTILINE
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
|
from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
|
||||||
|
|
||||||
|
@ -8,15 +9,15 @@ from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
|
||||||
class ALessonIsLearned(_BasicScraper):
|
class ALessonIsLearned(_BasicScraper):
|
||||||
latestUrl = 'http://www.alessonislearned.com/'
|
latestUrl = 'http://www.alessonislearned.com/'
|
||||||
imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
|
imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
|
||||||
imageSearch = compile(r'<img src="(cmx/.+?)"')
|
imageSearch = compile(tagre("img", "src", r"(cmx/.+?)"))
|
||||||
prevSearch = compile(r"<a href='(index.php\?comic=.+?)'.+?previous")
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=.+?)")+r".+?previous")
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class ASofterWorld(_BasicScraper):
|
class ASofterWorld(_BasicScraper):
|
||||||
latestUrl = 'http://www.asofterworld.com/'
|
latestUrl = 'http://www.asofterworld.com/'
|
||||||
imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
|
imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
|
||||||
imageSearch = compile(r'<img src="(http://www.asofterworld.com/clean/[^"]+)"')
|
imageSearch = compile(tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
|
||||||
prevSearch = compile(r'"([^"]+)">back')
|
prevSearch = compile(r'"([^"]+)">back')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@ -24,8 +25,8 @@ class ASofterWorld(_BasicScraper):
|
||||||
class AbleAndBaker(_BasicScraper):
|
class AbleAndBaker(_BasicScraper):
|
||||||
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
|
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
|
||||||
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
|
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
|
||||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
imageSearch = compile(tagre('img', 'src', r'(comics/.+)'))
|
||||||
prevSearch = compile(r'<a href="(.+\d+?)".+previous.gif')
|
prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif')
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,26 +41,25 @@ class AbominableCharlesChristopher(_BasicScraper):
|
||||||
class AbstractGender(_BasicScraper):
|
class AbstractGender(_BasicScraper):
|
||||||
latestUrl = 'http://www.abstractgender.com/'
|
latestUrl = 'http://www.abstractgender.com/'
|
||||||
imageUrl = 'http://www.abstractgender.com/?comic=%s'
|
imageUrl = 'http://www.abstractgender.com/?comic=%s'
|
||||||
imageSearch = compile(r'<img[^>]+src="(comics/\d+\.\w+)"')
|
imageSearch = compile(tagre('img', 'src', r'(comics/\d+\.\w+)'))
|
||||||
prevSearch = compile(r'<a\W+href="(\?comic=\d+)"><img[^>]+id="comic_menu_prev"')
|
prevSearch = compile(tagre('a', 'href', r"(\?comic=\d+)")+tagre("img", "id", "comic_menu_prev"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class AbsurdNotions(_BasicScraper):
|
class AbsurdNotions(_BasicScraper):
|
||||||
latestUrl = 'http://www.absurdnotions.org/page129.html'
|
latestUrl = 'http://www.absurdnotions.org/page129.html'
|
||||||
imageUrl = 'http://www.absurdnotions.org/page%s.html'
|
imageUrl = 'http://www.absurdnotions.org/page%s.html'
|
||||||
imageSearch = compile(r'<IMG SRC="(an[^"]+)"')
|
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
|
||||||
prevSearch = compile(r'HREF="([^"]+)"><IMG SRC="nprev\.gif"')
|
prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AbstruseGoose(_BasicScraper):
|
class AbstruseGoose(_BasicScraper):
|
||||||
starter = bounceStarter('http://abstrusegoose.com/',
|
starter = bounceStarter('http://abstrusegoose.com/',
|
||||||
compile(r'<a href = "(http://abstrusegoose.com/\d+)">Next »</a>'))
|
compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »</a>"))
|
||||||
imageUrl = 'http://abstrusegoose.com/c%s.html'
|
imageUrl = 'http://abstrusegoose.com/c%s.html'
|
||||||
imageSearch = compile(r'<img[^<]+src="(http://abstrusegoose.com/strips/[^<>"]+)"')
|
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
||||||
prevSearch = compile(r'<a href = "(http://abstrusegoose.com/\d+)">« Previous</a>')
|
prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'« Previous</a>')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -69,16 +69,14 @@ class AbstruseGoose(_BasicScraper):
|
||||||
return 'c%03d-%s' % (index, name)
|
return 'c%03d-%s' % (index, name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AcademyVale(_BasicScraper):
|
class AcademyVale(_BasicScraper):
|
||||||
latestUrl = 'http://imagerie.com/vale/'
|
latestUrl = 'http://imagerie.com/vale/'
|
||||||
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
|
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
|
||||||
imageSearch = compile(r'<IMG.+?SRC="(avale\d{4}-\d{2}\..*?)"')
|
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\..*?)'))
|
||||||
prevSearch = compile(r'HREF=(avarch.*?)><IMG SRC="AVNavBack.gif"')
|
prevSearch = compile(tagre('a', 'href', r'(avarch.*?)') + tagre('img', 'src', 'AVNavBack\.gif'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Alice(_BasicScraper):
|
class Alice(_BasicScraper):
|
||||||
latestUrl = 'http://alice.alicecomics.com/'
|
latestUrl = 'http://alice.alicecomics.com/'
|
||||||
imageUrl = 'http://alice.alicecomics.com/%s'
|
imageUrl = 'http://alice.alicecomics.com/%s'
|
||||||
|
@ -87,7 +85,6 @@ class Alice(_BasicScraper):
|
||||||
help = 'Index format: non'
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AlienLovesPredator(_BasicScraper):
|
class AlienLovesPredator(_BasicScraper):
|
||||||
imageUrl = 'http://alienlovespredator.com/%s'
|
imageUrl = 'http://alienlovespredator.com/%s'
|
||||||
imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;"> </div>', MULTILINE)
|
imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;"> </div>', MULTILINE)
|
||||||
|
@ -104,11 +101,10 @@ class AlienLovesPredator(_BasicScraper):
|
||||||
return '%s-%s-%s-%s' % (vol, num, ccc, ddd)
|
return '%s-%s-%s-%s' % (vol, num, ccc, ddd)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AnarchySD(_BasicScraper):
|
class AnarchySD(_BasicScraper):
|
||||||
imageUrl = 'http://www.anarchycomic.com/page%s.php'
|
imageUrl = 'http://www.anarchycomic.com/page%s.php'
|
||||||
imageSearch = compile(r'<img.+src="../(images/page\d+\..+?)"')
|
imageSearch = compile(tagre('img', 'src', r'../(images/page\d+\..+?)'))
|
||||||
prevSearch = compile(r'<a href="(page\d+\.php)">PREVIOUS PAGE')
|
prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)')+'PREVIOUS PAGE')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter(
|
starter = indirectStarter(
|
||||||
'http://www.anarchycomic.com/page1.php',
|
'http://www.anarchycomic.com/page1.php',
|
||||||
|
|
Loading…
Reference in a new issue