dosage/dosagelib/plugins/i.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring
from re import compile, escape

from ..scraper import BasicScraper, ParserScraper
from ..util import tagre
from .common import WordPressScraper, WordPressNavi


class IAmArg(BasicScraper):
    url = 'http://iamarg.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2011/05/08/05082011'
    imageSearch = compile(tagre("img", "src", r'(//iamarg.com/comics/\d+-\d+-\d+[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
    help = 'Index format: yyyy/mm/dd/stripname'


class ICanBarelyDraw(BasicScraper):
    url = 'http://www.icanbarelydraw.com/comic/'
    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '39'
    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl))
    help = 'Index format: number'


class IDreamOfAJeanieBottle(WordPressScraper):
    url = 'http://jeaniebottle.com/'


class InternetWebcomic(WordPressNavi):
    url = 'http://www.internet-webcomic.com/'
    stripUrl = url + '?p=%s'
    firstStripUrl = stripUrl % '30'
    help = 'Index format: n'


class Inverloch(ParserScraper):
    stripUrl = 'https://www.seraph-inn.com/view.php?story=inverloch&page=%s'
    url = stripUrl % '763'
    firstStripUrl = stripUrl % '2'
    imageSearch = '//img[@class="page"]'
    prevSearch = '//p[@class="comic-nav"]/a[text()=" Previous"]'
    multipleImagesPerStrip = True
    endOfLife = True


class IRovedOut(ParserScraper):
    url = 'https://www.irovedout.com/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % 'iro'
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[@class="navi comic-nav-previous navi-prev"]'

    def namer(self, imageUrl, pageUrl):
        page = self.getPage(pageUrl)
        strip = page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')
        parts = page.xpath('//div[@id="comic"]//img/@src')[0].split('/')
        return "{0}-{1}".format(strip.zfill(5), parts[7])

    def starter(self):
        """Return last gallery link."""
        url = 'https://www.irovedout.com/'
        data = self.getPage(url)
        link = data.xpath('//a[@class="navi navi-last"]/@href').pop(0)
        return link


class IrregularWebcomic(BasicScraper):
    url = 'http://www.irregularwebcomic.net/'
    stripUrl = url + '%s.html'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
    prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
    help = 'Index format: nnn'


class IslaAukate(ParserScraper):
    url = 'https://overlordcomic.com/archive/default/latest'
    stripUrl = 'https://overlordcomic.com/archive/default/pages/%s'
    firstStripUrl = stripUrl % '001'
    imageSearch = '//div[@id="comicpage"]/img'
    prevSearch = '//nav[@class="comicnav"]/a[text()="Prev"]'

    def namer(self, imageUrl, pageUrl):
        filename = imageUrl.rsplit('/', 1)[-1]
        return filename.rsplit('_', 1)[0] + '.' + filename.rsplit('.', 1)[-1]


class IslaAukateColor(ParserScraper):
    url = 'https://overlordcomic.com/archive/color/latest'
    stripUrl = 'https://overlordcomic.com/archive/color/pages/%s'
    firstStripUrl = stripUrl % '001'
    imageSearch = '//div[@id="comicpage"]/img'
    prevSearch = '//nav[@class="comicnav"]/a[text()="Prev"]'

    def namer(self, imageUrl, pageUrl):
        # Fix filenames of early comics
        filename = imageUrl.rsplit('/', 1)[-1]
        if filename[0].isdigit():
            filename = 'Aukate' + filename
        return filename.rsplit('_', 1)[0] + '.' + filename.rsplit('.', 1)[-1]


class ItsWalky(WordPressScraper):
    url = 'http://www.itswalky.com/'
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 11:45:44 +00:00			`# SPDX-License-Identifier: MIT`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Clean up some minor warnings 2022-05-28 15:52:42 +00:00			`# Copyright (C) 2015-2022 Tobias Gruetzmacher`
Add self to authors list, update copyright headers 2020-01-13 06:34:05 +00:00			`# Copyright (C) 2019-2020 Daniel Ring`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Move more comics to common WordPressScraper. 2016-04-10 21:04:34 +00:00
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`from ..scraper import BasicScraper, ParserScraper`
Updated documentation and fix some comics. 2012-11-20 17:53:53 +00:00			`from ..util import tagre`
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`from .common import WordPressScraper, WordPressNavi`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class IAmArg(BasicScraper):`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`url = 'http://iamarg.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % '2011/05/08/05082011'`
Fixed IAmArg 2015-04-15 18:43:06 +00:00			`imageSearch = compile(tagre("img", "src", r'(//iamarg.com/comics/\d+-\d+-\d+[^"]+)'))`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00			`help = 'Index format: yyyy/mm/dd/stripname'`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class ICanBarelyDraw(BasicScraper):`
Add ICanBarelyDraw 2013-07-04 10:22:20 +00:00			`url = 'http://www.icanbarelydraw.com/comic/'`
			`rurl = escape(url)`
			`stripUrl = url + '%s'`
			`firstStripUrl = stripUrl % '39'`
			`imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))`
			`prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl))`
			`help = 'Index format: number'`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class IDreamOfAJeanieBottle(WordPressScraper):`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`url = 'http://jeaniebottle.com/'`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class InternetWebcomic(WordPressNavi):`
Added EdmundFinney, Gaia, GaiaGerman, InternetWebcomic, NotInventedHere, RedsPlanet, RomanticallyApocalyptic, ScandinaviaAndTheWorld, TheGamerCat, Weregeek 2013-12-10 18:50:21 +00:00			`url = 'http://www.internet-webcomic.com/'`
			`stripUrl = url + '?p=%s'`
			`firstStripUrl = stripUrl % '30'`
			`help = 'Index format: n'`


Add Inverloch 2022-05-21 06:59:50 +00:00			`class Inverloch(ParserScraper):`
			`stripUrl = 'https://www.seraph-inn.com/view.php?story=inverloch&page=%s'`
			`url = stripUrl % '763'`
			`firstStripUrl = stripUrl % '2'`
			`imageSearch = '//img[@class="page"]'`
			`prevSearch = '//p[@class="comic-nav"]/a[text()=" Previous"]'`
			`multipleImagesPerStrip = True`
			`endOfLife = True`


feat: Added "I Roved Out" 2024-01-16 03:57:34 +00:00			`class IRovedOut(ParserScraper):`
			`url = 'https://www.irovedout.com/'`
			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % 'iro'`
			`imageSearch = '//div[@id="comic"]//img'`
			`prevSearch = '//a[@class="navi comic-nav-previous navi-prev"]'`

			`def namer(self, imageUrl, pageUrl):`
			`page = self.getPage(pageUrl)`
			`strip = page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')`
			`parts = page.xpath('//div[@id="comic"]//img/@src')[0].split('/')`
fix: correcting the links 2024-01-16 04:07:05 +00:00			`return "{0}-{1}".format(strip.zfill(5), parts[7])`
feat: Added "I Roved Out" 2024-01-16 03:57:34 +00:00
			`def starter(self):`
			`"""Return last gallery link."""`
			`url = 'https://www.irovedout.com/'`
			`data = self.getPage(url)`
			`link = data.xpath('//a[@class="navi navi-last"]/@href').pop(0)`
			`return link`


Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class IrregularWebcomic(BasicScraper):`
Sort comics. 2013-03-06 19:21:10 +00:00			`url = 'http://www.irregularwebcomic.net/'`
			`stripUrl = url + '%s.html'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Sort comics. 2013-03-06 19:21:10 +00:00			`imageSearch = compile(r'<img .src="(.comics/.(png\|jpg\|gif))".>')`
			`prevSearch = compile(r'<a href="(/\d+\.html\|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')`
			`help = 'Index format: nnn'`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class IslaAukate(ParserScraper):`
Add IslaAukate and IslaAukateColor 2019-06-19 08:27:06 +00:00			`url = 'https://overlordcomic.com/archive/default/latest'`
			`stripUrl = 'https://overlordcomic.com/archive/default/pages/%s'`
			`firstStripUrl = stripUrl % '001'`
			`imageSearch = '//div[@id="comicpage"]/img'`
			`prevSearch = '//nav[@class="comicnav"]/a[text()="Prev"]'`

Fix IslaAukate and IslaAukateColor 2020-04-11 23:51:08 +00:00			`def namer(self, imageUrl, pageUrl):`
			`filename = imageUrl.rsplit('/', 1)[-1]`
			`return filename.rsplit('_', 1)[0] + '.' + filename.rsplit('.', 1)[-1]`

Add IslaAukate and IslaAukateColor 2019-06-19 08:27:06 +00:00
Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class IslaAukateColor(ParserScraper):`
Add IslaAukate and IslaAukateColor 2019-06-19 08:27:06 +00:00			`url = 'https://overlordcomic.com/archive/color/latest'`
			`stripUrl = 'https://overlordcomic.com/archive/color/pages/%s'`
			`firstStripUrl = stripUrl % '001'`
			`imageSearch = '//div[@id="comicpage"]/img'`
			`prevSearch = '//nav[@class="comicnav"]/a[text()="Prev"]'`

			`def namer(self, imageUrl, pageUrl):`
			`# Fix filenames of early comics`
			`filename = imageUrl.rsplit('/', 1)[-1]`
			`if filename[0].isdigit():`
			`filename = 'Aukate' + filename`
Fix IslaAukate and IslaAukateColor 2020-04-11 23:51:08 +00:00			`return filename.rsplit('_', 1)[0] + '.' + filename.rsplit('.', 1)[-1]`
Add IslaAukate and IslaAukateColor 2019-06-19 08:27:06 +00:00

Deprecate underscore-prefixed parent classes This is trying to strike a balance between updating as much existing classes as possible, but not making the diff too big... 2022-06-06 10:08:32 +00:00			`class ItsWalky(WordPressScraper):`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`url = 'http://www.itswalky.com/'`