dosage/dosagelib/plugins/wlpcomics.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
import re

from ..scraper import _ParserScraper
from ..helpers import bounceStarter


class _WLPComics(_ParserScraper):
    imageSearch = '//img[contains(@alt, " Comic")]'
    prevSearch = '//a[contains(text(), "Previous ")]'
    nextSearch = '//a[contains(text(), "Next ")]'
    starter = bounceStarter
    help = 'Index format: nnn'

    def __init__(self, name):
        super(_WLPComics, self).__init__('WLP/' + name)

    def namer(self, image_url, page_url):
        return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
                image_url.rsplit('/', 1)[-1])


class ChichiChan(_WLPComics):
    url = 'http://www.wlpcomics.com/adult/chichi/'
    stripUrl = url + '%s.html'
    adult = True


class ChocolateMilkMaid(_WLPComics):
    # Newer pages seem to be broken
    stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'
    url = stripUrl % '264'
    adult = True

    def link_modifier(self, fromurl, tourl):
        """Bugfix for self-referencing pages..."""
        if tourl == fromurl:
            return re.sub(r'/(\d+)\.ht',
                          lambda m: '/%03i.ht' % (int(m.group(1)) - 1), tourl)
        if '263.html' in fromurl and '265.html' in tourl:
            return self.stripUrl % '264'
        return tourl


class MaidAttack(_WLPComics):
    url = 'http://www.wlpcomics.com/general/maidattack/'
    stripUrl = url + '%s.html'


class PeterIsTheWolfAdult(_WLPComics):
    stripUrl = 'http://www.peteristhewolf.com/adult/%s.html'
    # navigation to newest page is broken
    url = stripUrl % '427'
    firstStripUrl = stripUrl % '001'
    multipleImagesPerStrip = True
    adult = True
    endOfLife = True

    def namer(self, imageUrl, pageUrl):
        name = pageUrl.rsplit('/', 1)[-1].split('.')[0] + '_' + imageUrl.rsplit('/', 1)[-1]
        if 'adult' in imageUrl:
            name = name.split('.')
            return name[0] + '_adult.' + name[1]
        return name

    def getPrevUrl(self, url, data):
        # Fix loop in site navigation
        if url == self.stripUrl % '194':
            return self.stripUrl % '193'
        return super(PeterIsTheWolfAdult, self).getPrevUrl(url, data)


class PeterIsTheWolfGeneral(_WLPComics):
    stripUrl = 'http://www.peteristhewolf.com/general/%s.html'
    # navigation to newest page is broken
    url = stripUrl % '427'
    firstStripUrl = stripUrl % '001'
    endOfLife = True

    def getPrevUrl(self, url, data):
        # Fix loops in site navigation
        if url == self.stripUrl % '406':
            return self.stripUrl % '405'
        if url == self.stripUrl % '230':
            return self.stripUrl % '229'
        if url == self.stripUrl % '229':
            return self.stripUrl % '228'
        if url == self.stripUrl % '153':
            return self.stripUrl % '152'
        return super(PeterIsTheWolfGeneral, self).getPrevUrl(url, data)


class Stellar(_WLPComics):
    url = 'http://www.wlpcomics.com/adult/stellar/'
    stripUrl = url + '%s.html'
    adult = True

    def link_modifier(self, fromurl, tourl):
        """Bugfix for empty page..."""
        if tourl == self.url + '075.html':
            return self.url + '074.html'
        return tourl
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 13:45:44 +02:00			`# SPDX-License-Identifier: MIT`
Fixup copyright years. 2016-10-29 00:21:41 +02:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 16:50:57 +01:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`# Copyright (C) 2015-2021 Tobias Gruetzmacher`
Add self to authors list, update copyright headers 2020-01-12 22:34:05 -08:00			`# Copyright (C) 2019-2020 Daniel Ring`
Small fix to the WLP module. 2016-11-01 02:27:29 +01:00			`import re`

Fix WLP comics. 2016-04-11 01:07:21 +02:00			`from ..scraper import _ParserScraper`
Use default bounceStarter for site modules. 2016-04-13 01:24:13 +02:00			`from ..helpers import bounceStarter`
Fix some comics. 2012-11-26 07:13:32 +01:00
Add comic scripts, add fixes and other stuff. 2012-11-28 18:15:12 +01:00
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`class _WLPComics(_ParserScraper):`
Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} 2020-04-06 04:23:23 -07:00			`imageSearch = '//img[contains(@alt, " Comic")]'`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`prevSearch = '//a[contains(text(), "Previous ")]'`
			`nextSearch = '//a[contains(text(), "Next ")]'`
Refactor: Convert starter to simple method. 2016-04-13 20:01:51 +02:00			`starter = bounceStarter`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`help = 'Index format: nnn'`
Add comic scripts, add fixes and other stuff. 2012-11-28 18:15:12 +01:00
Refactor: Introduce generator methods for scrapers This allows one comic module class to generate multiple scrapers. This change is to support a more dynamic module system as described in #42. 2016-05-21 01:18:42 +02:00			`def __init__(self, name):`
			`super(_WLPComics, self).__init__('WLP/' + name)`
Fix WLP comics. 2016-04-11 01:07:21 +02:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 08:20:49 +02:00			`def namer(self, image_url, page_url):`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +`
			`image_url.rsplit('/', 1)[-1])`


			`class ChichiChan(_WLPComics):`
			`url = 'http://www.wlpcomics.com/adult/chichi/'`
Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} 2020-04-06 04:23:23 -07:00			`stripUrl = url + '%s.html'`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`adult = True`


			`class ChocolateMilkMaid(_WLPComics):`
			`# Newer pages seem to be broken`
Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} 2020-04-06 04:23:23 -07:00			`stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'`
			`url = stripUrl % '264'`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`adult = True`

Small fix to the WLP module. 2016-11-01 02:27:29 +01:00			`def link_modifier(self, fromurl, tourl):`
			`"""Bugfix for self-referencing pages..."""`
			`if tourl == fromurl:`
			`return re.sub(r'/(\d+)\.ht',`
			`lambda m: '/%03i.ht' % (int(m.group(1)) - 1), tourl)`
			`if '263.html' in fromurl and '265.html' in tourl:`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`return self.stripUrl % '264'`
Small fix to the WLP module. 2016-11-01 02:27:29 +01:00			`return tourl`

Fix WLP comics. 2016-04-11 01:07:21 +02:00
			`class MaidAttack(_WLPComics):`
			`url = 'http://www.wlpcomics.com/general/maidattack/'`
Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} 2020-04-06 04:23:23 -07:00			`stripUrl = url + '%s.html'`
Fix WLP comics. 2016-04-11 01:07:21 +02:00

			`class PeterIsTheWolfAdult(_WLPComics):`
Fix WLP/PeterIsTheWolf 2019-12-17 01:33:48 -08:00			`stripUrl = 'http://www.peteristhewolf.com/adult/%s.html'`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`# navigation to newest page is broken`
			`url = stripUrl % '427'`
Fix WLP/PeterIsTheWolf 2019-12-17 01:33:48 -08:00			`firstStripUrl = stripUrl % '001'`
			`multipleImagesPerStrip = True`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`adult = True`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`endOfLife = True`
Fix WLP comics. 2016-04-11 01:07:21 +02:00
Fix WLP/PeterIsTheWolf 2019-06-26 21:19:24 -07:00			`def namer(self, imageUrl, pageUrl):`
			`name = pageUrl.rsplit('/', 1)[-1].split('.')[0] + '_' + imageUrl.rsplit('/', 1)[-1]`
			`if 'adult' in imageUrl:`
			`name = name.split('.')`
			`return name[0] + '_adult.' + name[1]`
			`return name`

Fix WLP/PeterIsTheWolf 2019-12-17 01:33:48 -08:00			`def getPrevUrl(self, url, data):`
			`# Fix loop in site navigation`
			`if url == self.stripUrl % '194':`
			`return self.stripUrl % '193'`
			`return super(PeterIsTheWolfAdult, self).getPrevUrl(url, data)`

Fix WLP comics. 2016-04-11 01:07:21 +02:00
			`class PeterIsTheWolfGeneral(_WLPComics):`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`stripUrl = 'http://www.peteristhewolf.com/general/%s.html'`
			`# navigation to newest page is broken`
			`url = stripUrl % '427'`
Fix WLP/PeterIsTheWolf 2019-12-17 01:33:48 -08:00			`firstStripUrl = stripUrl % '001'`
Fix some navigation issues in WLP comics 2021-03-20 01:20:09 +01:00			`endOfLife = True`
Fix WLP/PeterIsTheWolf 2019-12-17 01:33:48 -08:00
			`def getPrevUrl(self, url, data):`
			`# Fix loops in site navigation`
			`if url == self.stripUrl % '406':`
			`return self.stripUrl % '405'`
			`if url == self.stripUrl % '230':`
			`return self.stripUrl % '229'`
			`if url == self.stripUrl % '229':`
			`return self.stripUrl % '228'`
			`if url == self.stripUrl % '153':`
			`return self.stripUrl % '152'`
			`return super(PeterIsTheWolfGeneral, self).getPrevUrl(url, data)`
Fix WLP comics. 2016-04-11 01:07:21 +02:00

			`class Stellar(_WLPComics):`
			`url = 'http://www.wlpcomics.com/adult/stellar/'`
Minor fixes to several strips (#158) * Fix Twokinds * Fix XKCD * Fix Unsounded * Fix SluggyFreelance * Fix Oglaf * Fix missing and incorrect renames * Fix WLP/PeterIsTheWolf{General,Adult} 2020-04-06 04:23:23 -07:00			`stripUrl = url + '%s.html'`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`adult = True`
Fix some comics. 2012-11-26 07:13:32 +01:00
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 01:12:16 +01:00			`def link_modifier(self, fromurl, tourl):`
Fix WLP comics. 2016-04-11 01:07:21 +02:00			`"""Bugfix for empty page..."""`
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 01:12:16 +01:00			`if tourl == self.url + '075.html':`
Refactor: All the other class methods. Turns out, it would have been better if all methods had been instance methods and not class methods. This finished a big chunk of the rework needed for #42. 2016-04-21 23:52:31 +02:00			`return self.url + '074.html'`
Apply link modifier to all links. This was previously only the "previous link modifier", now it can also modify "next" and "latest" links. Additionally, the modifier is given the current URL, so those cases can be distinguished. 2016-11-01 01:12:16 +01:00			`return tourl`