Fix WLP comics.
This commit is contained in:
parent
af2e57d850
commit
ad7a297964
1 changed files with 62 additions and 29 deletions
|
@ -1,39 +1,72 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
from re import compile
|
from __future__ import absolute_import, division, print_function
|
||||||
from ..util import tagre
|
|
||||||
from ..scraper import make_scraper
|
from ..scraper import _ParserScraper
|
||||||
from ..helpers import bounceStarter
|
|
||||||
|
|
||||||
|
|
||||||
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+/comics/[^"]+)'))
|
class _WLPComics(_ParserScraper):
|
||||||
_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous')
|
imageSearch = '//center/*/img[contains(@alt, " Comic")]'
|
||||||
_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next')
|
prevSearch = '//a[contains(text(), "Previous ")]'
|
||||||
|
nextSearch = '//a[contains(text(), "Next ")]'
|
||||||
|
help = 'Index format: nnn'
|
||||||
def add(name, path):
|
|
||||||
baseUrl = 'http://www.wlpcomics.com/' + path
|
|
||||||
classname = 'WLP_' + name
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def getName(cls):
|
||||||
return pageUrl.split('/')[-1].split('.')[0]
|
return 'WLP/' + cls.__name__
|
||||||
|
|
||||||
globals()[classname] = make_scraper(classname,
|
@classmethod
|
||||||
name = 'WLP/' + name,
|
def starter(cls):
|
||||||
url = baseUrl,
|
"""Get bounced start URL."""
|
||||||
starter = bounceStarter(baseUrl, _nextSearch),
|
data = cls.getPage(cls.url)
|
||||||
stripUrl = baseUrl + '%s.html',
|
url2 = cls.fetchUrl(cls.url, data, cls.prevSearch)
|
||||||
imageSearch = _imageSearch,
|
data = cls.getPage(url2)
|
||||||
prevSearch = _prevSearch,
|
return cls.fetchUrl(url2, data, cls.nextSearch)
|
||||||
namer = namer,
|
|
||||||
help = 'Index format: nnn',
|
@classmethod
|
||||||
)
|
def namer(cls, image_url, page_url):
|
||||||
|
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
||||||
|
image_url.rsplit('/', 1)[-1])
|
||||||
|
|
||||||
|
def getIndexStripUrl(self, index):
|
||||||
|
return self.url + '%s.html'
|
||||||
|
|
||||||
|
|
||||||
add('ChichiChan', 'adult/chichi/')
|
class ChichiChan(_WLPComics):
|
||||||
add('ChocolateMilkMaid', 'adult/cm/')
|
url = 'http://www.wlpcomics.com/adult/chichi/'
|
||||||
add('MaidAttack', 'general/maidattack/')
|
adult = True
|
||||||
add('ShadowChasers', 'general/shadowchasers/')
|
|
||||||
|
|
||||||
|
class ChocolateMilkMaid(_WLPComics):
|
||||||
|
# Newer pages seem to be broken
|
||||||
|
url = 'http://www.wlpcomics.com/adult/cm/262.html'
|
||||||
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
|
class MaidAttack(_WLPComics):
|
||||||
|
url = 'http://www.wlpcomics.com/general/maidattack/'
|
||||||
|
|
||||||
|
|
||||||
|
class PeterIsTheWolfAdult(_WLPComics):
|
||||||
|
url = 'http://www.peteristhewolf.com/adult/home.html'
|
||||||
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
|
class PeterIsTheWolfGeneral(_WLPComics):
|
||||||
|
url = 'http://www.peteristhewolf.com/general/'
|
||||||
|
|
||||||
|
|
||||||
|
class Stellar(_WLPComics):
|
||||||
|
url = 'http://www.wlpcomics.com/adult/stellar/'
|
||||||
|
adult = True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fetchUrls(cls, url, data, urlSearch):
|
||||||
|
"""Bugfix for empty page..."""
|
||||||
|
urls = super(Stellar, cls).fetchUrls(url, data, urlSearch)
|
||||||
|
if cls.url + '075.html' in urls:
|
||||||
|
urls = [cls.url + '074.html']
|
||||||
|
return urls
|
||||||
|
|
Loading…
Reference in a new issue