Fix WLP comics.

This commit is contained in:
Tobias Gruetzmacher 2016-04-11 01:07:21 +02:00
parent af2e57d850
commit ad7a297964

View file

@ -1,39 +1,72 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from re import compile from __future__ import absolute_import, division, print_function
from ..util import tagre
from ..scraper import make_scraper from ..scraper import _ParserScraper
from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+/comics/[^"]+)')) class _WLPComics(_ParserScraper):
_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous') imageSearch = '//center/*/img[contains(@alt, " Comic")]'
_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next') prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]'
help = 'Index format: nnn'
def add(name, path):
baseUrl = 'http://www.wlpcomics.com/' + path
classname = 'WLP_' + name
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def getName(cls):
return pageUrl.split('/')[-1].split('.')[0] return 'WLP/' + cls.__name__
globals()[classname] = make_scraper(classname, @classmethod
name = 'WLP/' + name, def starter(cls):
url = baseUrl, """Get bounced start URL."""
starter = bounceStarter(baseUrl, _nextSearch), data = cls.getPage(cls.url)
stripUrl = baseUrl + '%s.html', url2 = cls.fetchUrl(cls.url, data, cls.prevSearch)
imageSearch = _imageSearch, data = cls.getPage(url2)
prevSearch = _prevSearch, return cls.fetchUrl(url2, data, cls.nextSearch)
namer = namer,
help = 'Index format: nnn', @classmethod
) def namer(cls, image_url, page_url):
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
image_url.rsplit('/', 1)[-1])
def getIndexStripUrl(self, index):
return self.url + '%s.html'
add('ChichiChan', 'adult/chichi/') class ChichiChan(_WLPComics):
add('ChocolateMilkMaid', 'adult/cm/') url = 'http://www.wlpcomics.com/adult/chichi/'
add('MaidAttack', 'general/maidattack/') adult = True
add('ShadowChasers', 'general/shadowchasers/')
class ChocolateMilkMaid(_WLPComics):
# Newer pages seem to be broken
url = 'http://www.wlpcomics.com/adult/cm/262.html'
adult = True
class MaidAttack(_WLPComics):
url = 'http://www.wlpcomics.com/general/maidattack/'
class PeterIsTheWolfAdult(_WLPComics):
url = 'http://www.peteristhewolf.com/adult/home.html'
adult = True
class PeterIsTheWolfGeneral(_WLPComics):
url = 'http://www.peteristhewolf.com/general/'
class Stellar(_WLPComics):
url = 'http://www.wlpcomics.com/adult/stellar/'
adult = True
@classmethod
def fetchUrls(cls, url, data, urlSearch):
"""Bugfix for empty page..."""
urls = super(Stellar, cls).fetchUrls(url, data, urlSearch)
if cls.url + '075.html' in urls:
urls = [cls.url + '074.html']
return urls