Fix some modules (a&b).
This commit is contained in:
parent
c04c62e92b
commit
b1d2650615
3 changed files with 13 additions and 15 deletions
|
@ -10,7 +10,7 @@ from re import compile, escape, MULTILINE
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
||||||
from .common import _WordPressScraper, _WPNaviIn, xpath_class, WP_LATEST_SEARCH
|
from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
|
||||||
|
|
||||||
|
|
||||||
class AbstruseGoose(_BasicScraper):
|
class AbstruseGoose(_BasicScraper):
|
||||||
|
@ -117,7 +117,7 @@ class ALessonIsLearned(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class Alice(_WPNaviIn):
|
class Alice(_WordPressScraper):
|
||||||
url = 'http://www.alicecomics.com/'
|
url = 'http://www.alicecomics.com/'
|
||||||
latestSearch = '//a[text()="Latest Alice!"]'
|
latestSearch = '//a[text()="Latest Alice!"]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
@ -152,8 +152,11 @@ class AlphaLuna(_BasicScraper):
|
||||||
url = 'http://www.alphaluna.net/'
|
url = 'http://www.alphaluna.net/'
|
||||||
stripUrl = url + 'issue-%s/'
|
stripUrl = url + 'issue-%s/'
|
||||||
firstStripUrl = stripUrl % '1/cover'
|
firstStripUrl = stripUrl % '1/cover'
|
||||||
imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)'))
|
imageSearch = compile(tagre("a", "href",
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev"))
|
r'[^"]*/(?:issue-|support/upcoming)[^"]+') +
|
||||||
|
tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
||||||
|
tagre("img", "alt", "Prev"))
|
||||||
help = 'Index format: issue/page (e.g. 4/05)'
|
help = 'Index format: issue/page (e.g. 4/05)'
|
||||||
|
|
||||||
|
|
||||||
|
@ -263,16 +266,6 @@ class Ashes(_WordPressScraper):
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class ASkeweredParadise(_BasicScraper):
|
|
||||||
url = 'http://aspcomics.net/'
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % '001'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
|
|
||||||
prevSearch = compile(tagre("a", "href", "(/comic/\d+)") +
|
|
||||||
r"[^>]+Previous")
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class ASofterWorld(_ParserScraper):
|
class ASofterWorld(_ParserScraper):
|
||||||
url = 'http://www.asofterworld.com/'
|
url = 'http://www.asofterworld.com/'
|
||||||
stripUrl = url + 'index.php?id=%s'
|
stripUrl = url + 'index.php?id=%s'
|
||||||
|
|
|
@ -57,7 +57,8 @@ class Baroquen(_BasicScraper):
|
||||||
class Bearmageddon(_WordPressScraper):
|
class Bearmageddon(_WordPressScraper):
|
||||||
url = 'http://bearmageddon.com/'
|
url = 'http://bearmageddon.com/'
|
||||||
firstStripUrl = url + '2011/08/01/page-1/'
|
firstStripUrl = url + '2011/08/01/page-1/'
|
||||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
latestSearch = '//a[div[%s]]' % xpath_class('latest-page')
|
||||||
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class Beetlebum(_BasicScraper):
|
class Beetlebum(_BasicScraper):
|
||||||
|
@ -210,6 +211,9 @@ class BroodHollow(_WordPressScraper):
|
||||||
url = 'http://broodhollow.chainsawsuit.com/'
|
url = 'http://broodhollow.chainsawsuit.com/'
|
||||||
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
|
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
return data.xpath('//div[@id="comic"]//iframe')
|
||||||
|
|
||||||
|
|
||||||
class Buni(_WordPressScraper):
|
class Buni(_WordPressScraper):
|
||||||
url = 'http://www.bunicomic.com/'
|
url = 'http://www.bunicomic.com/'
|
||||||
|
|
|
@ -32,6 +32,7 @@ class Removed(Scraper):
|
||||||
cls('AlsoBagels'),
|
cls('AlsoBagels'),
|
||||||
cls('Antics'),
|
cls('Antics'),
|
||||||
cls('Arcamax/BleekerTheRechargeableDog'),
|
cls('Arcamax/BleekerTheRechargeableDog'),
|
||||||
|
cls('ASkeweredParadise'),
|
||||||
cls('BackwaterPlanet'),
|
cls('BackwaterPlanet'),
|
||||||
cls('BigFatWhale'),
|
cls('BigFatWhale'),
|
||||||
cls('Blip'),
|
cls('Blip'),
|
||||||
|
|
Loading…
Reference in a new issue