dosage/dosagelib/plugins/wlpcomics.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring

from __future__ import absolute_import, division, print_function

import re

from ..scraper import _ParserScraper
from ..helpers import bounceStarter


class _WLPComics(_ParserScraper):
    imageSearch = '//center/*/img[contains(@alt, " Comic")]'
    prevSearch = '//a[contains(text(), "Previous ")]'
    nextSearch = '//a[contains(text(), "Next ")]'
    starter = bounceStarter
    help = 'Index format: nnn'

    def __init__(self, name):
        super(_WLPComics, self).__init__('WLP/' + name)

    def namer(self, image_url, page_url):
        return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
                image_url.rsplit('/', 1)[-1])

    def getIndexStripUrl(self, index):
        return self.url + '%s.html' % index


class ChichiChan(_WLPComics):
    url = 'http://www.wlpcomics.com/adult/chichi/'
    adult = True


class ChocolateMilkMaid(_WLPComics):
    # Newer pages seem to be broken
    baseurl = 'http://www.wlpcomics.com/adult/cm/'
    url = baseurl + '264.html'
    adult = True

    def getIndexStripUrl(self, index):
        return self.baseurl + '%s.html' % index

    def link_modifier(self, fromurl, tourl):
        """Bugfix for self-referencing pages..."""
        if tourl == fromurl:
            return re.sub(r'/(\d+)\.ht',
                          lambda m: '/%03i.ht' % (int(m.group(1)) - 1), tourl)
        if '263.html' in fromurl and '265.html' in tourl:
            return self.baseurl + '264.html'
        return tourl


class MaidAttack(_WLPComics):
    url = 'http://www.wlpcomics.com/general/maidattack/'


class PeterIsTheWolfAdult(_WLPComics):
    stripUrl = 'http://www.peteristhewolf.com/adult/%s.html'
    url = stripUrl % 'home'
    firstStripUrl = stripUrl % '001'
    multipleImagesPerStrip = True
    adult = True

    def namer(self, imageUrl, pageUrl):
        name = pageUrl.rsplit('/', 1)[-1].split('.')[0] + '_' + imageUrl.rsplit('/', 1)[-1]
        if 'adult' in imageUrl:
            name = name.split('.')
            return name[0] + '_adult.' + name[1]
        return name

    def getPrevUrl(self, url, data):
        # Fix loop in site navigation
        if url == self.stripUrl % '194':
            return self.stripUrl % '193'
        return super(PeterIsTheWolfAdult, self).getPrevUrl(url, data)


class PeterIsTheWolfGeneral(_WLPComics):
    url = 'http://www.peteristhewolf.com/general/'
    stripUrl = url + '%s.html'
    firstStripUrl = stripUrl % '001'

    def getPrevUrl(self, url, data):
        # Fix loops in site navigation
        if url == self.stripUrl % '406':
            return self.stripUrl % '405'
        if url == self.stripUrl % '230':
            return self.stripUrl % '229'
        if url == self.stripUrl % '229':
            return self.stripUrl % '228'
        if url == self.stripUrl % '153':
            return self.stripUrl % '152'
        return super(PeterIsTheWolfGeneral, self).getPrevUrl(url, data)


class Stellar(_WLPComics):
    url = 'http://www.wlpcomics.com/adult/stellar/'
    adult = True

    def link_modifier(self, fromurl, tourl):
        """Bugfix for empty page..."""
        if tourl == self.url + '075.html':
            return self.url + '074.html'
        return tourl
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`# -- coding: utf-8 --`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Add self to authors list, update copyright headers 2020-01-13 06:34:05 +00:00			`# Copyright (C) 2015-2020 Tobias Gruetzmacher`
			`# Copyright (C) 2019-2020 Daniel Ring`
Fix some comics. 2012-11-26 06:13:32 +00:00
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`from __future__ import absolute_import, division, print_function`
Fix some comics. 2012-11-26 06:13:32 +00:00
Small fix to the WLP module. 2016-11-01 01:27:29 +00:00			`import re`

Fix WLP comics. 2016-04-10 23:07:21 +00:00			`from ..scraper import _ParserScraper`
Use default bounceStarter for site modules. 2016-04-12 23:24:13 +00:00			`from ..helpers import bounceStarter`
Fix some comics. 2012-11-26 06:13:32 +00:00
Add comic scripts, add fixes and other stuff. 2012-11-28 17:15:12 +00:00
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`class _WLPComics(_ParserScraper):`
			`imageSearch = '//center/*/img[contains(@alt, " Comic")]'`
			`prevSearch = '//a[contains(text(), "Previous ")]'`
			`nextSearch = '//a[contains(text(), "Next ")]'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = bounceStarter`
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`help = 'Index format: nnn'`
Add comic scripts, add fixes and other stuff. 2012-11-28 17:15:12 +00:00
Refactor: Introduce generator methods for scrapers This allows one comic module class to generate multiple scrapers. This change is to support a more dynamic module system as described in #42. 2016-05-20 23:18:42 +00:00			`def __init__(self, name):`
			`super(_WLPComics, self).__init__('WLP/' + name)`
Fix WLP comics. 2016-04-10 23:07:21 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +`
			`image_url.rsplit('/', 1)[-1])`

			`def getIndexStripUrl(self, index):`
Small fix to the WLP module. 2016-11-01 01:27:29 +00:00			`return self.url + '%s.html' % index`
Fix WLP comics. 2016-04-10 23:07:21 +00:00

			`class ChichiChan(_WLPComics):`
			`url = 'http://www.wlpcomics.com/adult/chichi/'`
			`adult = True`


			`class ChocolateMilkMaid(_WLPComics):`
			`# Newer pages seem to be broken`
Small fix to the WLP module. 2016-11-01 01:27:29 +00:00			`baseurl = 'http://www.wlpcomics.com/adult/cm/'`
			`url = baseurl + '264.html'`
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`adult = True`

Small fix to the WLP module. 2016-11-01 01:27:29 +00:00			`def getIndexStripUrl(self, index):`
			`return self.baseurl + '%s.html' % index`

			`def link_modifier(self, fromurl, tourl):`
			`"""Bugfix for self-referencing pages..."""`
			`if tourl == fromurl:`
			`return re.sub(r'/(\d+)\.ht',`
			`lambda m: '/%03i.ht' % (int(m.group(1)) - 1), tourl)`
			`if '263.html' in fromurl and '265.html' in tourl:`
			`return self.baseurl + '264.html'`
			`return tourl`

Fix WLP comics. 2016-04-10 23:07:21 +00:00
			`class MaidAttack(_WLPComics):`
			`url = 'http://www.wlpcomics.com/general/maidattack/'`


			`class PeterIsTheWolfAdult(_WLPComics):`
Fix WLP/PeterIsTheWolf 2019-12-17 09:33:48 +00:00			`stripUrl = 'http://www.peteristhewolf.com/adult/%s.html'`
			`url = stripUrl % 'home'`
			`firstStripUrl = stripUrl % '001'`
			`multipleImagesPerStrip = True`
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`adult = True`

Fix WLP/PeterIsTheWolf 2019-06-27 04:19:24 +00:00			`def namer(self, imageUrl, pageUrl):`
			`name = pageUrl.rsplit('/', 1)[-1].split('.')[0] + '_' + imageUrl.rsplit('/', 1)[-1]`
			`if 'adult' in imageUrl:`
			`name = name.split('.')`
			`return name[0] + '_adult.' + name[1]`
			`return name`

Fix WLP/PeterIsTheWolf 2019-12-17 09:33:48 +00:00			`def getPrevUrl(self, url, data):`
			`# Fix loop in site navigation`
			`if url == self.stripUrl % '194':`
			`return self.stripUrl % '193'`
			`return super(PeterIsTheWolfAdult, self).getPrevUrl(url, data)`

Fix WLP comics. 2016-04-10 23:07:21 +00:00
			`class PeterIsTheWolfGeneral(_WLPComics):`
			`url = 'http://www.peteristhewolf.com/general/'`
Fix WLP/PeterIsTheWolf 2019-12-17 09:33:48 +00:00			`stripUrl = url + '%s.html'`
			`firstStripUrl = stripUrl % '001'`

			`def getPrevUrl(self, url, data):`
			`# Fix loops in site navigation`
			`if url == self.stripUrl % '406':`
			`return self.stripUrl % '405'`
			`if url == self.stripUrl % '230':`
			`return self.stripUrl % '229'`
			`if url == self.stripUrl % '229':`
			`return self.stripUrl % '228'`
			`if url == self.stripUrl % '153':`
			`return self.stripUrl % '152'`
			`return super(PeterIsTheWolfGeneral, self).getPrevUrl(url, data)`
Fix WLP comics. 2016-04-10 23:07:21 +00:00

			`class Stellar(_WLPComics):`
			`url = 'http://www.wlpcomics.com/adult/stellar/'`
			`adult = True`
Fix some comics. 2012-11-26 06:13:32 +00:00
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 00:12:16 +00:00			`def link_modifier(self, fromurl, tourl):`
Fix WLP comics. 2016-04-10 23:07:21 +00:00			`"""Bugfix for empty page..."""`
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 00:12:16 +00:00			`if tourl == self.url + '075.html':`
Refactor: All the other class methods. Turns out, it would have been better if all methods had been instance methods and not class methods. This finished a big chunk of the rework needed for #42. 2016-04-21 21:52:31 +00:00			`return self.url + '074.html'`
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 00:12:16 +00:00			`return tourl`