Sort comics alphabetically & PEP8 style fixes.
This commit is contained in:
parent
dda920f353
commit
7f1e136d8b
17 changed files with 1186 additions and 1031 deletions
|
@ -1,31 +1,23 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, MULTILINE
|
from re import compile, escape, MULTILINE
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class AbsurdNotions(_BasicScraper):
|
|
||||||
baseUrl = 'http://www.absurdnotions.org/'
|
|
||||||
url = baseUrl + 'page129.html'
|
|
||||||
stripUrl = baseUrl + 'page%s.html'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
|
|
||||||
class AbstruseGoose(_BasicScraper):
|
class AbstruseGoose(_BasicScraper):
|
||||||
url = 'http://abstrusegoose.com/'
|
url = 'http://abstrusegoose.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
|
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
imageSearch = compile(tagre('img', 'src',
|
||||||
|
r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous')
|
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous')
|
||||||
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »')
|
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
@ -38,12 +30,25 @@ class AbstruseGoose(_BasicScraper):
|
||||||
return 'c%03d-%s' % (index, name)
|
return 'c%03d-%s' % (index, name)
|
||||||
|
|
||||||
|
|
||||||
|
class AbsurdNotions(_BasicScraper):
|
||||||
|
baseUrl = 'http://www.absurdnotions.org/'
|
||||||
|
url = baseUrl + 'page129.html'
|
||||||
|
stripUrl = baseUrl + 'page%s.html'
|
||||||
|
firstStripUrl = stripUrl % '1'
|
||||||
|
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
prevSearch = compile(tagre('a', 'href', r'([^"]+)') +
|
||||||
|
tagre('img', 'src', 'nprev\.gif'))
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class AcademyVale(_BasicScraper):
|
class AcademyVale(_BasicScraper):
|
||||||
url = 'http://www.imagerie.com/vale/'
|
url = 'http://www.imagerie.com/vale/'
|
||||||
stripUrl = url + 'avarch.cgi?%s'
|
stripUrl = url + 'avarch.cgi?%s'
|
||||||
firstStripUrl = stripUrl % '001'
|
firstStripUrl = stripUrl % '001'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
|
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif'))
|
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") +
|
||||||
|
tagre('img', 'src', 'AVNavBack\.gif'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,7 +57,8 @@ class Achewood(_BasicScraper):
|
||||||
stripUrl = url + 'index.php?date=%s'
|
stripUrl = url + 'index.php?date=%s'
|
||||||
firstStripUrl = stripUrl % '00000000'
|
firstStripUrl = stripUrl % '00000000'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
|
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)',
|
||||||
|
after="Previous"))
|
||||||
help = 'Index format: mmddyyyy'
|
help = 'Index format: mmddyyyy'
|
||||||
namer = regexNamer(compile(r'date=(\d+)'))
|
namer = regexNamer(compile(r'date=(\d+)'))
|
||||||
|
|
||||||
|
@ -70,8 +76,7 @@ class AfterStrife(_BasicScraper):
|
||||||
|
|
||||||
class AGirlAndHerFed(_BasicScraper):
|
class AGirlAndHerFed(_BasicScraper):
|
||||||
url = 'http://www.agirlandherfed.com/'
|
url = 'http://www.agirlandherfed.com/'
|
||||||
starter = bounceStarter(url,
|
starter = bounceStarter(url, compile(r'<a href="([^"]+)">[^>]+Back'))
|
||||||
compile(r'<a href="([^"]+)">[^>]+Back'))
|
|
||||||
stripUrl = url + '1.%s.html'
|
stripUrl = url + '1.%s.html'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
|
||||||
|
@ -79,24 +84,16 @@ class AGirlAndHerFed(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class AhoyEarth(_ParserScraper):
|
|
||||||
url = 'http://www.ahoyearth.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
css = True
|
|
||||||
imageSearch = '#comic-1 img'
|
|
||||||
prevSearch = '.navi-prev'
|
|
||||||
help = 'Index format: ddmmyyyy'
|
|
||||||
|
|
||||||
|
|
||||||
class AhoiPolloi(_BasicScraper):
|
class AhoiPolloi(_BasicScraper):
|
||||||
url = 'http://ahoipolloi.blogger.de/'
|
url = 'http://ahoipolloi.blogger.de/'
|
||||||
stripUrl = url + '?day=%s'
|
stripUrl = url + '?day=%s'
|
||||||
firstStripUrl = stripUrl % '20060306'
|
firstStripUrl = stripUrl % '20060306'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
|
imageSearch = compile(tagre('img', 'src',
|
||||||
prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
|
r'(/static/antville/ahoipolloi/images/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre('a', 'href',
|
||||||
|
r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -104,6 +101,15 @@ class AhoiPolloi(_BasicScraper):
|
||||||
return imageUrl.rsplit('/', 1)[1]
|
return imageUrl.rsplit('/', 1)[1]
|
||||||
|
|
||||||
|
|
||||||
|
class AhoyEarth(_ParserScraper):
|
||||||
|
url = 'http://www.ahoyearth.com/'
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
css = True
|
||||||
|
imageSearch = '#comic-1 img'
|
||||||
|
prevSearch = '.navi-prev'
|
||||||
|
help = 'Index format: ddmmyyyy'
|
||||||
|
|
||||||
|
|
||||||
class AirForceBlues(_BasicScraper):
|
class AirForceBlues(_BasicScraper):
|
||||||
url = 'http://www.afblues.com/'
|
url = 'http://www.afblues.com/'
|
||||||
stripUrl = url + 'wordpress/%s/'
|
stripUrl = url + 'wordpress/%s/'
|
||||||
|
@ -115,7 +121,8 @@ class AirForceBlues(_BasicScraper):
|
||||||
|
|
||||||
class ALessonIsLearned(_BasicScraper):
|
class ALessonIsLearned(_BasicScraper):
|
||||||
url = 'http://www.alessonislearned.com/'
|
url = 'http://www.alessonislearned.com/'
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
|
||||||
|
quote="'")+r"[^>]+previous")
|
||||||
starter = indirectStarter(url, prevSearch)
|
starter = indirectStarter(url, prevSearch)
|
||||||
stripUrl = url + 'index.php?comic=%s'
|
stripUrl = url + 'index.php?comic=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
|
@ -127,7 +134,8 @@ class AlienLovesPredator(_BasicScraper):
|
||||||
url = 'http://alienlovespredator.com/'
|
url = 'http://alienlovespredator.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
|
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"'))
|
imageSearch = compile(tagre("img", "src", r'([^"]+)',
|
||||||
|
after='border="1" alt="" width="750"'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
@ -244,10 +252,13 @@ class Antics(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '3'
|
firstStripUrl = stripUrl % '3'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev'))
|
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after='prev'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class AoiHouse(_ParserScraper):
|
class AoiHouse(_ParserScraper):
|
||||||
url = 'http://www.aoihouse.net/'
|
url = 'http://www.aoihouse.net/'
|
||||||
imageSearch = '//div[@id="comic"]/a[2]/img'
|
imageSearch = '//div[@id="comic"]/a[2]/img'
|
||||||
|
@ -270,10 +281,19 @@ class ARedTailsDream(_BasicScraper):
|
||||||
url = baseUrl + 'comic/recent.php'
|
url = baseUrl + 'comic/recent.php'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)'))
|
imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') +
|
prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') +
|
||||||
tagre("img", "src", r'.*?aprev.*?'))
|
tagre("img", "src", r'.*?aprev.*?'))
|
||||||
help = 'Index format: nn'
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
class ASkeweredParadise(_BasicScraper):
|
||||||
|
url = 'http://aspcomics.net/'
|
||||||
|
stripUrl = url + 'comic/%s'
|
||||||
|
firstStripUrl = stripUrl % '001'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
|
||||||
|
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class ASofterWorld(_ParserScraper):
|
class ASofterWorld(_ParserScraper):
|
||||||
url = 'http://www.asofterworld.com/'
|
url = 'http://www.asofterworld.com/'
|
||||||
stripUrl = url + 'index.php?id=%s'
|
stripUrl = url + 'index.php?id=%s'
|
||||||
|
@ -283,7 +303,6 @@ class ASofterWorld(_ParserScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AstronomyPOTD(_BasicScraper):
|
class AstronomyPOTD(_BasicScraper):
|
||||||
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
|
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
|
||||||
url = baseUrl + 'astropix.html'
|
url = baseUrl + 'astropix.html'
|
||||||
|
@ -299,22 +318,13 @@ class AstronomyPOTD(_BasicScraper):
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return url in (
|
return url in (
|
||||||
self.stripUrl % '130217', # video
|
self.stripUrl % '130217', # video
|
||||||
self.stripUrl % '130218', # video
|
self.stripUrl % '130218', # video
|
||||||
self.stripUrl % '130226', # video
|
self.stripUrl % '130226', # video
|
||||||
self.stripUrl % '130424', # video
|
self.stripUrl % '130424', # video
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
|
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
|
||||||
imageUrl.split('/')[-1].split('.')[0])
|
imageUrl.split('/')[-1].split('.')[0])
|
||||||
|
|
||||||
|
|
||||||
class ASkeweredParadise(_BasicScraper):
|
|
||||||
url = 'http://aspcomics.net/'
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % '001'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
|
|
||||||
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..util import tagre, getPageContent
|
from ..util import tagre, getPageContent
|
||||||
|
@ -22,7 +24,8 @@ class BadassMuthas(_BasicScraper):
|
||||||
stripUrl = url + '?%s'
|
stripUrl = url + '?%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
||||||
|
tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +54,8 @@ class Bearmageddon(_BasicScraper):
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2011/08/01/page-1'
|
firstStripUrl = stripUrl % '2011/08/01/page-1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev'))
|
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||||
|
after='navi-prev'))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,7 +67,9 @@ class Beetlebum(_BasicScraper):
|
||||||
starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
|
starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl, after='prev'))
|
prevSearch = compile(tagre('a', 'href',
|
||||||
|
r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
|
||||||
|
after='prev'))
|
||||||
help = 'Index format: yyyy/mm/dd/striptitle'
|
help = 'Index format: yyyy/mm/dd/striptitle'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
@ -71,7 +77,7 @@ class Beetlebum(_BasicScraper):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
|
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
|
||||||
name = '%s-%s-%s-%s' % indexes
|
name = '%s-%s-%s-%s' % indexes
|
||||||
name = name + '_' + imageUrl.split( '/' )[-1]
|
name = name + '_' + imageUrl.split('/')[-1]
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,14 +95,16 @@ class BetweenFailures(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'comics1/%s'
|
stripUrl = url + 'comics1/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl, after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl,
|
||||||
|
after="previous"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
class BigFatWhale(_BasicScraper):
|
class BigFatWhale(_BasicScraper):
|
||||||
url = 'http://www.bigfatwhale.com/'
|
url = 'http://www.bigfatwhale.com/'
|
||||||
stripUrl = url + 'archives/bfw_%s.htm'
|
stripUrl = url + 'archives/bfw_%s.htm'
|
||||||
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)'))
|
imageSearch = compile(tagre("img", "src",
|
||||||
|
r'(archives/bfw_[^"]+|bfw_[^"]+)'))
|
||||||
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
|
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
@ -125,7 +133,8 @@ class BizarreUprising(_BasicScraper):
|
||||||
stripUrl = url + 'view/%s'
|
stripUrl = url + 'view/%s'
|
||||||
firstStripUrl = stripUrl % '1/awakening-splash'
|
firstStripUrl = stripUrl % '1/awakening-splash'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif'))
|
prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') +
|
||||||
|
tagre("img", "src", r'images/b_prev\.gif'))
|
||||||
help = 'Index format: n/name'
|
help = 'Index format: n/name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -133,7 +142,8 @@ class BlankIt(_BasicScraper):
|
||||||
url = 'http://blankitcomics.com/'
|
url = 'http://blankitcomics.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '0001'
|
firstStripUrl = stripUrl % '0001'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src",
|
||||||
|
r'(http://blankitcomics\.com/bicomics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
@ -151,26 +161,28 @@ class Blip(_BasicScraper):
|
||||||
if prevUrl:
|
if prevUrl:
|
||||||
return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
|
return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
|
||||||
|
|
||||||
|
|
||||||
class BloomingFaeries(_BasicScraper):
|
class BloomingFaeries(_BasicScraper):
|
||||||
adult = True
|
adult = True
|
||||||
url = 'http://www.bloomingfaeries.com/'
|
url = 'http://www.bloomingfaeries.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'comic/public/%s/'
|
stripUrl = url + 'comic/public/%s/'
|
||||||
firstStripUrl = stripUrl % "pit-stop"
|
firstStripUrl = stripUrl % "pit-stop"
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title'))
|
imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='comic-nav-base comic-nav-previous'))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after='comic-nav-base comic-nav-previous'))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
bf = imageUrl.split('/')
|
bf = imageUrl.split('/')
|
||||||
name = bf[-1]
|
name = bf[-1]
|
||||||
re = compile(tagre("div","class",r'comic-id-([^"]+)'))
|
re = compile(tagre("div", "class", r'comic-id-([^"]+)'))
|
||||||
content = getPageContent(pageUrl, cls.session)
|
content = getPageContent(pageUrl, cls.session)
|
||||||
match = re.search(content)
|
match = re.search(content)
|
||||||
if not match:
|
if not match:
|
||||||
return None
|
return None
|
||||||
return "BF%s_%s" % (match.group(1),name)
|
return "BF%s_%s" % (match.group(1), name)
|
||||||
|
|
||||||
|
|
||||||
class BMovieComic(_BasicScraper):
|
class BMovieComic(_BasicScraper):
|
||||||
url = 'http://www.bmoviecomic.com/'
|
url = 'http://www.bmoviecomic.com/'
|
||||||
|
@ -187,7 +199,9 @@ class BobWhite(_BasicScraper):
|
||||||
stripUrl = url + '?webcomic_post=%s'
|
stripUrl = url + '?webcomic_post=%s'
|
||||||
firstStripUrl = stripUrl % '20110504'
|
firstStripUrl = stripUrl % '20110504'
|
||||||
imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
|
imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous')
|
prevSearch = compile(tagre("a", "href",
|
||||||
|
"(%s\?webcomic_post=\d+)" % rurl) +
|
||||||
|
r'[^"]+Previous')
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,9 +228,11 @@ class BoxerHockey(_BasicScraper):
|
||||||
url = 'http://boxerhockey.fireball20xl.com/'
|
url = 'http://boxerhockey.fireball20xl.com/'
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
firstStripUrl = stripUrl % '56'
|
firstStripUrl = stripUrl % '56'
|
||||||
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg"))
|
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)',
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') +
|
after="comicimg"))
|
||||||
r'[^>]+Previous')
|
prevSearch = compile(tagre("a", "href",
|
||||||
|
r'(http://www\.boxerhockey\.com/\?id=\d+)') +
|
||||||
|
r'[^>]+Previous')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -230,7 +246,8 @@ class BoyOnAStickAndSlither(_BasicScraper):
|
||||||
stripUrl = url + 'page/%s'
|
stripUrl = url + 'page/%s'
|
||||||
firstStripUrl = stripUrl % '2'
|
firstStripUrl = stripUrl % '2'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
|
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
|
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
|
||||||
|
"<span>Next page")
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -276,16 +293,6 @@ class BrentalFlossGuest(BrentalFloss):
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
|
|
||||||
|
|
||||||
class Brink(_BasicScraper):
|
|
||||||
url = 'http://paperfangs.com/brink/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
firstStripUrl = stripUrl % '5'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
|
||||||
help = 'Index format: number'
|
|
||||||
|
|
||||||
|
|
||||||
class BrightlyWound(_BasicScraper):
|
class BrightlyWound(_BasicScraper):
|
||||||
baseUrl = 'http://www.brightlywound.com/'
|
baseUrl = 'http://www.brightlywound.com/'
|
||||||
url = baseUrl + '?comic=137'
|
url = baseUrl + '?comic=137'
|
||||||
|
@ -296,6 +303,16 @@ class BrightlyWound(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class Brink(_BasicScraper):
|
||||||
|
url = 'http://paperfangs.com/brink/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '?p=%s'
|
||||||
|
firstStripUrl = stripUrl % '5'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||||
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class ButtercupFestival(_ParserScraper):
|
class ButtercupFestival(_ParserScraper):
|
||||||
url = 'http://www.buttercupfestival.com/'
|
url = 'http://www.buttercupfestival.com/'
|
||||||
stripUrl = url + '%s.htm'
|
stripUrl = url + '%s.htm'
|
||||||
|
@ -305,16 +322,6 @@ class ButtercupFestival(_ParserScraper):
|
||||||
help = 'Index format: 2-number'
|
help = 'Index format: 2-number'
|
||||||
|
|
||||||
|
|
||||||
class ButterSafe(_BasicScraper):
|
|
||||||
url = 'http://buttersafe.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class ButternutSquash(_BasicScraper):
|
class ButternutSquash(_BasicScraper):
|
||||||
url = 'http://www.butternutsquash.net/'
|
url = 'http://www.butternutsquash.net/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -323,3 +330,14 @@ class ButternutSquash(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
|
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
|
||||||
|
|
||||||
|
|
||||||
|
class ButterSafe(_BasicScraper):
|
||||||
|
url = 'http://buttersafe.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015 Tobias Gruetzmacher
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ from ..helpers import bounceStarter, indirectStarter
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .wordpress import _WordpressScraper
|
from .wordpress import _WordpressScraper
|
||||||
|
|
||||||
|
|
||||||
class Caggage(_BasicScraper):
|
class Caggage(_BasicScraper):
|
||||||
url = 'http://caggagecomic.com/'
|
url = 'http://caggagecomic.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -21,6 +22,7 @@ class Caggage(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class CampComic(_BasicScraper):
|
class CampComic(_BasicScraper):
|
||||||
url = 'http://campcomic.com/comic/'
|
url = 'http://campcomic.com/comic/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -30,24 +32,28 @@ class CampComic(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev"))
|
||||||
help = 'Index Format: number'
|
help = 'Index Format: number'
|
||||||
|
|
||||||
|
|
||||||
class CaptainSNES(_BasicScraper):
|
class CaptainSNES(_BasicScraper):
|
||||||
url = 'http://www.captainsnes.com/'
|
url = 'http://www.captainsnes.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2001/07/10/the-mistake'
|
firstStripUrl = stripUrl % '2001/07/10/the-mistake'
|
||||||
imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, quote="'"))
|
imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl,
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("span", "class", "prev"))
|
quote="'"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
|
||||||
|
tagre("span", "class", "prev"))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
||||||
|
|
||||||
|
|
||||||
class Carciphona(_BasicScraper):
|
class Carciphona(_BasicScraper):
|
||||||
url = 'http://carciphona.com/'
|
url = 'http://carciphona.com/'
|
||||||
stripUrl = url + 'view.php?page=%s&chapter=%s'
|
imageSearch = compile(tagre("div", "style",
|
||||||
imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)'))
|
r'background-image:url\((_pages[^)]*)\)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea"))
|
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)',
|
||||||
latestSearch = compile(tagre("a", "href", r'(view\.php\?page=[0-9]+[^"]*)'))
|
after="prevarea"))
|
||||||
help = 'Index format: None'
|
latestSearch = compile(tagre("a", "href",
|
||||||
|
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||||
starter = indirectStarter(url, latestSearch)
|
starter = indirectStarter(url, latestSearch)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -61,8 +67,8 @@ class CaseyAndAndy(_BasicScraper):
|
||||||
stripUrl = url + 'view.php?strip=%s'
|
stripUrl = url + 'view.php?strip=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)'))
|
imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)')
|
prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') +
|
||||||
+ tagre("img", "src", r'previous\.gif'))
|
tagre("img", "src", r'previous\.gif'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,8 +76,10 @@ class CasuallyKayla(_BasicScraper):
|
||||||
url = 'http://casuallykayla.com/'
|
url = 'http://casuallykayla.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '89'
|
firstStripUrl = stripUrl % '89'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)'))
|
r'(http://casuallykayla\.com/comics/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("div", "class", r'nav-previous') +
|
||||||
|
tagre("a", "href", r'([^"]+)'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,7 +90,9 @@ class Catalyst(_BasicScraper):
|
||||||
stripUrl = baseUrl + "comic.php?comic_id=%s"
|
stripUrl = baseUrl + "comic.php?comic_id=%s"
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl))
|
||||||
prevSearch = compile("<center>" + tagre("a", "href", r'(%scomic\.php\?comic_id=\d+)' % rurl))
|
prevSearch = compile("<center>" +
|
||||||
|
tagre("a", "href",
|
||||||
|
r'(%scomic\.php\?comic_id=\d+)' % rurl))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -101,10 +111,12 @@ class CatAndGirl(_BasicScraper):
|
||||||
self.stripUrl % '4299',
|
self.stripUrl % '4299',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CatNine(_WordpressScraper):
|
class CatNine(_WordpressScraper):
|
||||||
url = 'http://cat-nine.net'
|
url = 'http://cat-nine.net'
|
||||||
firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/'
|
firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/'
|
||||||
|
|
||||||
|
|
||||||
class CatVersusHuman(_ParserScraper):
|
class CatVersusHuman(_ParserScraper):
|
||||||
url = 'http://www.catversushuman.com'
|
url = 'http://www.catversushuman.com'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
@ -130,7 +142,8 @@ class Champ2010(_BasicScraper):
|
||||||
stripUrl = baseUrl + '%s.html'
|
stripUrl = baseUrl + '%s.html'
|
||||||
firstStripUrl = stripUrl % 'champ1-1-10-fuck'
|
firstStripUrl = stripUrl % 'champ1-1-10-fuck'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
|
after="Previous"))
|
||||||
help = 'Index format: yy-dd-mm'
|
help = 'Index format: yy-dd-mm'
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,8 +151,10 @@ class ChannelAte(_BasicScraper):
|
||||||
url = 'http://www.channelate.com/'
|
url = 'http://www.channelate.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href",
|
||||||
|
r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,13 +201,15 @@ class CigarroAndCerveja(_ParserScraper):
|
||||||
imageSearch = '//div[@id="comic"]//img',
|
imageSearch = '//div[@id="comic"]//img',
|
||||||
prevSearch = '//a[contains(text()," Prev")]',
|
prevSearch = '//a[contains(text()," Prev")]',
|
||||||
|
|
||||||
|
|
||||||
class Collar6(_BasicScraper):
|
class Collar6(_BasicScraper):
|
||||||
url = 'http://collar6.com/'
|
url = 'http://collar6.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'archive/%s'
|
stripUrl = url + 'archive/%s'
|
||||||
firstStripUrl = stripUrl % 'collar-6-187'
|
firstStripUrl = stripUrl % 'collar-6-187'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl,
|
||||||
|
after="previous"))
|
||||||
help = 'Index format: <name>'
|
help = 'Index format: <name>'
|
||||||
|
|
||||||
|
|
||||||
|
@ -211,7 +228,8 @@ class Commissioned(_BasicScraper):
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '139'
|
firstStripUrl = stripUrl % '139'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
@ -222,7 +240,7 @@ class CompanyY(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '2009/08/14/coming-soon'
|
firstStripUrl = stripUrl % '2009/08/14/coming-soon'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("div", "class", r"nav-previous") +
|
prevSearch = compile(tagre("div", "class", r"nav-previous") +
|
||||||
tagre("a", "href", r'(%s[^"]+)' % rurl))
|
tagre("a", "href", r'(%s[^"]+)' % rurl))
|
||||||
help = 'Index format: yyyy/mm/dd/strip-name'
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -268,6 +286,15 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
class CrimsonDark(_BasicScraper):
|
||||||
|
url = 'http://www.davidcsimon.com/crimsondark/'
|
||||||
|
stripUrl = url + 'index.php?view=comic&strip_id=%s'
|
||||||
|
firstStripUrl = stripUrl % '1'
|
||||||
|
imageSearch = compile(r'src="(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class CtrlAltDel(_BasicScraper):
|
class CtrlAltDel(_BasicScraper):
|
||||||
url = 'http://www.cad-comic.com/cad/'
|
url = 'http://www.cad-comic.com/cad/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
|
@ -290,22 +317,13 @@ class CtrlAltDelSillies(CtrlAltDel):
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
|
|
||||||
|
|
||||||
class CrimsonDark(_BasicScraper):
|
|
||||||
url = 'http://www.davidcsimon.com/crimsondark/'
|
|
||||||
stripUrl = url + 'index.php?view=comic&strip_id=%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = compile(r'src="(.+?strips/.+?)"')
|
|
||||||
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
|
|
||||||
class CucumberQuest(_BasicScraper):
|
class CucumberQuest(_BasicScraper):
|
||||||
url = 'http://cucumber.gigidigi.com/'
|
url = 'http://cucumber.gigidigi.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'cq/%s/'
|
stripUrl = url + 'cq/%s/'
|
||||||
firstStripUrl = stripUrl % 'page-1'
|
firstStripUrl = stripUrl % 'page-1'
|
||||||
starter = indirectStarter(url + 'recent.html',
|
starter = indirectStarter(url + 'recent.html',
|
||||||
compile(r'window\.location="(/cq/[^"]+/)"'))
|
compile(r'window\.location="(/cq/[^"]+/)"'))
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -18,13 +20,16 @@ class EarthsongSaga(_ParserScraper):
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetchUrls(cls, url, data, urlSearch):
|
def fetchUrls(cls, url, data, urlSearch):
|
||||||
urls = super(EarthsongSaga, cls).fetchUrls(url, data, urlSearch)
|
urls = super(EarthsongSaga, cls).fetchUrls(url, data, urlSearch)
|
||||||
return [x.replace('earthsongsaga.com/../', 'earthsongsaga.com/') for x in urls]
|
return [x.replace('earthsongsaga.com/../',
|
||||||
|
'earthsongsaga.com/') for x in urls]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl)
|
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
|
||||||
|
IGNORECASE).search(imageUrl)
|
||||||
if not imgmatch:
|
if not imgmatch:
|
||||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$', IGNORECASE).search(imageUrl)
|
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
|
||||||
|
IGNORECASE).search(imageUrl)
|
||||||
suffix = "cover"
|
suffix = "cover"
|
||||||
else:
|
else:
|
||||||
suffix = ""
|
suffix = ""
|
||||||
|
@ -33,16 +38,18 @@ class EarthsongSaga(_ParserScraper):
|
||||||
int(imgmatch.group(3)), suffix)
|
int(imgmatch.group(3)), suffix)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class EatLiver(_BasicScraper):
|
class EatLiver(_BasicScraper):
|
||||||
url = 'http://www.eatliver.com/'
|
url = 'http://www.eatliver.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
starter = indirectStarter(url, compile(
|
||||||
|
tagre("a", "href", r'(i\.php\?n=\d+)') +
|
||||||
tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br"))
|
tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br"))
|
||||||
stripUrl = url + "i.php?n=%s"
|
stripUrl = url + "i.php?n=%s"
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl, before="image_src"))
|
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
||||||
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') + "<< Previous")
|
before="image_src"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
||||||
|
"<< Previous")
|
||||||
|
|
||||||
|
|
||||||
class EatThatToast(_BasicScraper):
|
class EatThatToast(_BasicScraper):
|
||||||
|
@ -64,6 +71,7 @@ class EdibleDirt(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)")+"Previous")
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)")+"Previous")
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class EdmundFinney(_ParserScraper):
|
class EdmundFinney(_ParserScraper):
|
||||||
url = 'http://eqcomics.com/'
|
url = 'http://eqcomics.com/'
|
||||||
firstStripUrl = url + '2009/03/08/sunday-aliens/'
|
firstStripUrl = url + '2009/03/08/sunday-aliens/'
|
||||||
|
@ -71,6 +79,7 @@ class EdmundFinney(_ParserScraper):
|
||||||
prevSearch = '//a[@class="navi navi-prev"]'
|
prevSearch = '//a[@class="navi navi-prev"]'
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
class EerieCuties(_BasicScraper):
|
class EerieCuties(_BasicScraper):
|
||||||
url = 'http://www.eeriecuties.com/'
|
url = 'http://www.eeriecuties.com/'
|
||||||
stripUrl = url + 'strips-ec/%s'
|
stripUrl = url + 'strips-ec/%s'
|
||||||
|
@ -79,37 +88,13 @@ class EerieCuties(_BasicScraper):
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
class Eriadan(_BasicScraper):
|
|
||||||
url = 'http://www.shockdom.com/webcomics/eriadan/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl, after='width="[68]00"'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
|
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
return url in (
|
|
||||||
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Erstwhile(_ParserScraper):
|
|
||||||
url = 'http://www.erstwhiletales.com/'
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
css = True
|
|
||||||
imageSearch = 'div.comicpane a img'
|
|
||||||
prevSearch = 'a.navi-prev'
|
|
||||||
help = 'Index format: title-nn'
|
|
||||||
|
|
||||||
|
|
||||||
class ElfOnlyInn(_BasicScraper):
|
class ElfOnlyInn(_BasicScraper):
|
||||||
url = 'http://www.elfonlyinn.net/'
|
url = 'http://www.elfonlyinn.net/'
|
||||||
stripUrl = url + 'd/%s.html'
|
stripUrl = url + 'd/%s.html'
|
||||||
firstStripUrl = stripUrl % '20020523'
|
firstStripUrl = stripUrl % '20020523'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
|
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
|
||||||
tagre("img", "src", r'/images/previous_day\.gif'))
|
tagre("img", "src", r'/images/previous_day\.gif'))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,8 +102,10 @@ class ElGoonishShive(_BasicScraper):
|
||||||
name = 'KeenSpot/ElGoonishShive'
|
name = 'KeenSpot/ElGoonishShive'
|
||||||
url = 'http://www.egscomics.com/'
|
url = 'http://www.egscomics.com/'
|
||||||
stripUrl = url + 'index.php?id=%s'
|
stripUrl = url + 'index.php?id=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)', after="comic"))
|
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)',
|
||||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
after="comic"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)',
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -126,8 +113,10 @@ class ElGoonishShiveNP(_BasicScraper):
|
||||||
name = 'KeenSpot/ElGoonishShiveNP'
|
name = 'KeenSpot/ElGoonishShiveNP'
|
||||||
url = 'http://www.egscomics.com/egsnp.php'
|
url = 'http://www.egscomics.com/egsnp.php'
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)', after="comic"))
|
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)',
|
||||||
prevSearch = compile(tagre("a", "href", r'(/egsnp\.php\?id=\d+)', after="prev"))
|
after="comic"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/egsnp\.php\?id=\d+)',
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,6 +139,22 @@ class EmergencyExit(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class Eriadan(_BasicScraper):
|
||||||
|
url = 'http://www.shockdom.com/webcomics/eriadan/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl,
|
||||||
|
after='width="[68]00"'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
||||||
|
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
return url in (
|
||||||
|
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ErrantStory(_BasicScraper):
|
class ErrantStory(_BasicScraper):
|
||||||
url = 'http://www.errantstory.com/'
|
url = 'http://www.errantstory.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
|
@ -158,6 +163,15 @@ class ErrantStory(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd/num'
|
help = 'Index format: yyyy-mm-dd/num'
|
||||||
|
|
||||||
|
|
||||||
|
class Erstwhile(_ParserScraper):
|
||||||
|
url = 'http://www.erstwhiletales.com/'
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
css = True
|
||||||
|
imageSearch = 'div.comicpane a img'
|
||||||
|
prevSearch = 'a.navi-prev'
|
||||||
|
help = 'Index format: title-nn'
|
||||||
|
|
||||||
|
|
||||||
class EverybodyLovesEricRaymond(_BasicScraper):
|
class EverybodyLovesEricRaymond(_BasicScraper):
|
||||||
url = 'http://geekz.co.uk/lovesraymond/'
|
url = 'http://geekz.co.uk/lovesraymond/'
|
||||||
stripUrl = url + 'archive/%s'
|
stripUrl = url + 'archive/%s'
|
||||||
|
@ -190,11 +204,13 @@ class EvilInc(_BasicScraper):
|
||||||
url = 'http://evil-inc.com/'
|
url = 'http://evil-inc.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % 'monday-3'
|
firstStripUrl = stripUrl % 'monday-3'
|
||||||
imageSearch = compile(tagre("div", "id", "comic") +
|
imageSearch = compile(
|
||||||
r'\s*.*\s*' + #filter out the variant href tag
|
tagre("div", "id", "comic") +
|
||||||
tagre("img", "src", r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
|
r'\s*.*\s*' + # filter out the variant href tag
|
||||||
|
tagre("img", "src",
|
||||||
|
r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
|
||||||
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
||||||
tagre("a", "href", r'([^"]+)'))
|
tagre("a", "href", r'([^"]+)'))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +230,8 @@ class ExploitationNow(_BasicScraper):
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '2000-07-07/9'
|
firstStripUrl = stripUrl % '2000-07-07/9'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
help = 'Index format: yyyy-mm-dd/num'
|
help = 'Index format: yyyy-mm-dd/num'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
@ -24,7 +26,8 @@ class FantasyRealms(_BasicScraper):
|
||||||
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
||||||
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
|
url,
|
||||||
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
|
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,7 +85,7 @@ class Flemcomics(_BasicScraper):
|
||||||
stripUrl = url + 'd/%s.html'
|
stripUrl = url + 'd/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
|
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
|
||||||
tagre("img", "src", r'/images/previous_day\.jpg'))
|
tagre("img", "src", r'/images/previous_day\.jpg'))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -92,7 +95,8 @@ class Flipside(_BasicScraper):
|
||||||
stripUrl = url + '?i=%s'
|
stripUrl = url + '?i=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,7 +116,7 @@ class FonFlatter(_BasicScraper):
|
||||||
self.stripUrl % "2006/09/21/danke",
|
self.stripUrl % "2006/09/21/danke",
|
||||||
self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
|
self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
|
||||||
self.stripUrl % "2005/10/19/naq-never-asked-questions",
|
self.stripUrl % "2005/10/19/naq-never-asked-questions",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ForLackOfABetterComic(_BasicScraper):
|
class ForLackOfABetterComic(_BasicScraper):
|
||||||
|
@ -138,13 +142,6 @@ class FragileSpanish(_ParserScraper):
|
||||||
prevSearch = '//a[@class="comicnav" and contains(text(),"Anterior")]'
|
prevSearch = '//a[@class="comicnav" and contains(text(),"Anterior")]'
|
||||||
lang = 'es'
|
lang = 'es'
|
||||||
|
|
||||||
class Freefall(_BasicScraper):
|
|
||||||
url = 'http://freefall.purrsia.com/default.htm'
|
|
||||||
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
|
|
||||||
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
|
|
||||||
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
|
|
||||||
help = 'Index format: nnnn/nnnnn'
|
|
||||||
|
|
||||||
|
|
||||||
class FredoAndPidjin(_BasicScraper):
|
class FredoAndPidjin(_BasicScraper):
|
||||||
url = 'http://www.pidjin.net/'
|
url = 'http://www.pidjin.net/'
|
||||||
|
@ -157,10 +154,19 @@ class FredoAndPidjin(_BasicScraper):
|
||||||
)
|
)
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
|
url,
|
||||||
compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
||||||
|
|
||||||
|
|
||||||
|
class Freefall(_BasicScraper):
|
||||||
|
url = 'http://freefall.purrsia.com/default.htm'
|
||||||
|
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
|
||||||
|
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
|
||||||
|
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
|
||||||
|
help = 'Index format: nnnn/nnnnn'
|
||||||
|
|
||||||
|
|
||||||
class FullFrontalNerdity(_BasicScraper):
|
class FullFrontalNerdity(_BasicScraper):
|
||||||
url = 'http://ffn.nodwick.com/'
|
url = 'http://ffn.nodwick.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from ..scraper import _ParserScraper
|
from ..scraper import _ParserScraper
|
||||||
|
|
||||||
|
|
||||||
class Footloose(_ParserScraper):
|
class Footloose(_ParserScraper):
|
||||||
url = 'http://footloosecomic.com/footloose.php'
|
url = 'http://footloosecomic.com/footloose.php'
|
||||||
imageSearch='//body/p[1]//img'
|
imageSearch = '//body/p[1]//img'
|
||||||
prevSearch='//body/a[2]'
|
prevSearch = '//body/a[2]'
|
||||||
|
|
||||||
|
|
||||||
class Cherry(Footloose):
|
class Cherry(Footloose):
|
||||||
url = 'http://footloosecomic.com/cherry/index.php'
|
url = 'http://footloosecomic.com/cherry/index.php'
|
||||||
|
|
||||||
|
|
||||||
class Desigaspring(Footloose):
|
class Desigaspring(Footloose):
|
||||||
url = 'http://footloosecomic.com/dspring/index.php'
|
url = 'http://footloosecomic.com/dspring/index.php'
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
|
@ -155,6 +156,16 @@ class GrrlPower(_BasicScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
class GUComics(_BasicScraper):
|
||||||
|
url = 'http://www.gucomics.com/'
|
||||||
|
stripUrl = url + '%s'
|
||||||
|
firstStripUrl = stripUrl % '20000710'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
|
||||||
|
tagre("img", "src", r'/images/nav/prev\.png'))
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class GunnerkriggCourt(_BasicScraper):
|
class GunnerkriggCourt(_BasicScraper):
|
||||||
url = 'http://www.gunnerkrigg.com/'
|
url = 'http://www.gunnerkrigg.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
|
@ -176,13 +187,3 @@ class Gunshow(_BasicScraper):
|
||||||
tagre("a", "href", r'([^"]+)') +
|
tagre("a", "href", r'([^"]+)') +
|
||||||
tagre("img", "src", r'[^"]*menu/small/previous\.gif'))
|
tagre("img", "src", r'[^"]*menu/small/previous\.gif'))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
class GUComics(_BasicScraper):
|
|
||||||
url = 'http://www.gucomics.com/'
|
|
||||||
stripUrl = url + '%s'
|
|
||||||
firstStripUrl = stripUrl % '20000710'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
|
|
||||||
tagre("img", "src", r'/images/nav/prev\.png'))
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
|
@ -25,21 +27,19 @@ class MadamAndEve(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class Magellan(_ParserScraper):
|
class Magellan(_ParserScraper):
|
||||||
description = u'A comic strip about Superheroes and Not-Superheroes'
|
|
||||||
url = 'http://magellanverse.com/'
|
url = 'http://magellanverse.com/'
|
||||||
stripUrl = url + '%s/'
|
|
||||||
css = True
|
css = True
|
||||||
imageSearch = '#comic-1 > a:first-child img'
|
imageSearch = '#comic-1 > a:first-child img'
|
||||||
prevSearch = '.nav-previous > a'
|
prevSearch = '.nav-previous > a'
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class MagickChicks(_BasicScraper):
|
class MagickChicks(_BasicScraper):
|
||||||
url = 'http://www.magickchicks.com/'
|
url = 'http://www.magickchicks.com/'
|
||||||
stripUrl = url + 'strips-mc/%s'
|
stripUrl = url + 'strips-mc/%s'
|
||||||
firstStripUrl = stripUrl % 'tis_but_a_trifle'
|
firstStripUrl = stripUrl % 'tis_but_a_trifle'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)', before="cn[id]prevt"))
|
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)',
|
||||||
|
before="cn[id]prevt"))
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,7 +54,6 @@ class ManlyGuysDoingManlyThings(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class MareInternum(_ParserScraper):
|
class MareInternum(_ParserScraper):
|
||||||
description = u'Mare Internum is an online science fiction graphic novel about the isolated inhabitants of the planet Mars. '
|
|
||||||
url = 'http://marecomic.com/'
|
url = 'http://marecomic.com/'
|
||||||
stripUrl = url + 'comics/ch%s'
|
stripUrl = url + 'comics/ch%s'
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
|
@ -76,7 +75,8 @@ class MarriedToTheSea(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '022806'
|
firstStripUrl = stripUrl % '022806'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow"))
|
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
|
||||||
|
before="overflow"))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||||
help = 'Index format: mmddyy'
|
help = 'Index format: mmddyy'
|
||||||
|
|
||||||
|
@ -85,6 +85,7 @@ class MarriedToTheSea(_BasicScraper):
|
||||||
unused, date, filename = imageUrl.rsplit('/', 2)
|
unused, date, filename = imageUrl.rsplit('/', 2)
|
||||||
return '%s-%s' % (date, filename)
|
return '%s-%s' % (date, filename)
|
||||||
|
|
||||||
|
|
||||||
class MaxOveracts(_ParserScraper):
|
class MaxOveracts(_ParserScraper):
|
||||||
url = 'http://occasionalcomics.com/'
|
url = 'http://occasionalcomics.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
|
@ -108,7 +109,8 @@ class MenageA3(_BasicScraper):
|
||||||
url = 'http://www.ma3comic.com/'
|
url = 'http://www.ma3comic.com/'
|
||||||
stripUrl = url + 'strips-ma3/%s'
|
stripUrl = url + 'strips-ma3/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)', before="cn[id]prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)',
|
||||||
|
before="cn[id]prev"))
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,40 +119,43 @@ class Misfile(_BasicScraper):
|
||||||
stripUrl = url + '?date=%s'
|
stripUrl = url + '?date=%s'
|
||||||
firstStripUrl = stripUrl % '2004-02-22'
|
firstStripUrl = stripUrl % '2004-02-22'
|
||||||
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
|
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
|
||||||
prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", before="Previous"))
|
prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'",
|
||||||
|
before="Previous"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class Moonsticks(_ParserScraper):
|
|
||||||
url = "http://moonsticks.org/"
|
|
||||||
stripUrl = url
|
|
||||||
imageSearch = "//div[@class='entry']//img"
|
|
||||||
prevSearch = u"//a[text()='« Previous']"
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class MonsieurLeChien(_BasicScraper):
|
class MonsieurLeChien(_BasicScraper):
|
||||||
url = 'http://www.monsieur-le-chien.fr/'
|
url = 'http://www.monsieur-le-chien.fr/'
|
||||||
stripUrl = url + 'index.php?planche=%s'
|
stripUrl = url + 'index.php?planche=%s'
|
||||||
firstStripUrl = stripUrl % '2'
|
firstStripUrl = stripUrl % '2'
|
||||||
lang = 'fr'
|
lang = 'fr'
|
||||||
imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "i/precedent.gif"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
||||||
|
tagre("img", "src", "i/precedent.gif"))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class Moonsticks(_ParserScraper):
|
||||||
|
url = "http://moonsticks.org/"
|
||||||
|
imageSearch = "//div[@class='entry']//img"
|
||||||
|
prevSearch = u"//a[text()='« Previous']"
|
||||||
|
|
||||||
|
|
||||||
class MrLovenstein(_BasicScraper):
|
class MrLovenstein(_BasicScraper):
|
||||||
url = 'http://www.mrlovenstein.com/'
|
url = 'http://www.mrlovenstein.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'comic/%s#comic'
|
stripUrl = url + 'comic/%s#comic'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
#captures rollover comic
|
# captures rollover comic
|
||||||
compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" + tagre("div", "style", r'display: none;') + "\s*.*\s*" + tagre("img", "src", r'(/images/comics/[^"]+)')),
|
compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" +
|
||||||
#captures standard comic
|
tagre("div", "style", r'display: none;') + "\s*.*\s*" +
|
||||||
compile(tagre("img", "src", r'(/images/comics/[^"]+)', before="comic_main_image")),
|
tagre("img", "src", r'(/images/comics/[^"]+)')),
|
||||||
|
# captures standard comic
|
||||||
|
compile(tagre("img", "src", r'(/images/comics/[^"]+)',
|
||||||
|
before="comic_main_image")),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "/images/nav_left.png"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
||||||
|
tagre("img", "src", "/images/nav_left.png"))
|
||||||
textSearch = compile(r'<meta name="description" content="(.+?)" />')
|
textSearch = compile(r'<meta name="description" content="(.+?)" />')
|
||||||
help = 'Index Format: n'
|
help = 'Index Format: n'
|
||||||
|
|
||||||
|
@ -163,7 +168,8 @@ class MyCartoons(_BasicScraper):
|
||||||
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
|
||||||
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + "«")
|
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) +
|
||||||
|
"«")
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
@ -172,4 +178,3 @@ class MysteriesOfTheArcana(_ParserScraper):
|
||||||
url = 'http://mysteriesofthearcana.com/'
|
url = 'http://mysteriesofthearcana.com/'
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = '//a[@class="navprevious"]'
|
prevSearch = '//a[@class="navprevious"]'
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -37,7 +39,8 @@ class NatalieDee(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '022806'
|
firstStripUrl = stripUrl % '022806'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow"))
|
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
|
||||||
|
before="overflow"))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||||
help = 'Index format: mmddyy'
|
help = 'Index format: mmddyy'
|
||||||
|
|
||||||
|
@ -47,6 +50,23 @@ class NatalieDee(_BasicScraper):
|
||||||
return '%s-%s' % (date, filename)
|
return '%s-%s' % (date, filename)
|
||||||
|
|
||||||
|
|
||||||
|
class NekkoAndJoruba(_BasicScraper):
|
||||||
|
url = 'http://www.nekkoandjoruba.com/'
|
||||||
|
stripUrl = url + '?p=%s'
|
||||||
|
firstStripUrl = stripUrl % '7'
|
||||||
|
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">‹</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class NekoTheKitty(_ParserScraper):
|
||||||
|
url = 'http://www.nekothekitty.net/'
|
||||||
|
stripUrl = url + 'comics/%s'
|
||||||
|
firstStripUrl = stripUrl % '936393/001-video-games'
|
||||||
|
imageSearch = '//a[@id="comic_image"]/img'
|
||||||
|
prevSearch = '//a[text()="<-"]'
|
||||||
|
|
||||||
|
|
||||||
class NeoEarth(_BasicScraper):
|
class NeoEarth(_BasicScraper):
|
||||||
url = 'http://www.neo-earth.com/NE/'
|
url = 'http://www.neo-earth.com/NE/'
|
||||||
stripUrl = url + 'index.php?date=%s'
|
stripUrl = url + 'index.php?date=%s'
|
||||||
|
@ -72,23 +92,6 @@ class NewWorld(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/stripn'
|
help = 'Index format: yyyy/mm/dd/stripn'
|
||||||
|
|
||||||
|
|
||||||
class NekkoAndJoruba(_BasicScraper):
|
|
||||||
url = 'http://www.nekkoandjoruba.com/'
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
firstStripUrl = stripUrl % '7'
|
|
||||||
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
|
|
||||||
prevSearch = compile(r'<a href="(.+?)">‹</a>')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class NekoTheKitty(_ParserScraper):
|
|
||||||
url = 'http://www.nekothekitty.net/'
|
|
||||||
stripUrl = url + 'comics/%s'
|
|
||||||
firstStripUrl = stripUrl % '936393/001-video-games'
|
|
||||||
imageSearch = '//a[@id="comic_image"]/img'
|
|
||||||
prevSearch = '//a[text()="<-"]'
|
|
||||||
|
|
||||||
|
|
||||||
class NichtLustig(_BasicScraper):
|
class NichtLustig(_BasicScraper):
|
||||||
url = 'http://www.nichtlustig.de/main.html'
|
url = 'http://www.nichtlustig.de/main.html'
|
||||||
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
|
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
|
||||||
|
@ -96,13 +99,12 @@ class NichtLustig(_BasicScraper):
|
||||||
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
|
url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
|
||||||
|
|
||||||
|
|
||||||
class Nimona(_BasicScraper):
|
class Nimona(_BasicScraper):
|
||||||
url = 'http://gingerhaze.com/nimona/'
|
url = 'http://gingerhaze.com/nimona/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % "comic/page-1"
|
firstStripUrl = stripUrl % "comic/page-1"
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)'))
|
imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)'))
|
||||||
|
@ -111,20 +113,6 @@ class Nimona(_BasicScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class Nnewts(_BasicScraper):
|
|
||||||
url = 'http://nnewts.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
firstStripUrl = stripUrl % 'nnewts-page-1'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%snewty/comics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s(?:nnewts-)?page-\d+/)' % rurl, after="navi-prev"))
|
|
||||||
help = 'Index format: page-number'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getDisabledReasons(cls):
|
|
||||||
return {'cannotReadOnline': 'Comic is not available for reading online.'}
|
|
||||||
|
|
||||||
|
|
||||||
class NobodyScores(_BasicScraper):
|
class NobodyScores(_BasicScraper):
|
||||||
url = 'http://nobodyscores.loosenutstudio.com/'
|
url = 'http://nobodyscores.loosenutstudio.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -143,11 +131,14 @@ class NoNeedForBushido(_BasicScraper):
|
||||||
imageSearch = compile(
|
imageSearch = compile(
|
||||||
tagre("a", "rel", "next") +
|
tagre("a", "rel", "next") +
|
||||||
tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl,
|
tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl,
|
||||||
after="attachment-full"))
|
after="attachment-full"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="previous-webcomic"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||||
|
after="previous-webcomic"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="last-webcomic")))
|
url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
|
||||||
|
after="last-webcomic")))
|
||||||
|
|
||||||
|
|
||||||
class NotInventedHere(_BasicScraper):
|
class NotInventedHere(_BasicScraper):
|
||||||
url = 'http://notinventedhe.re/'
|
url = 'http://notinventedhe.re/'
|
||||||
|
@ -158,6 +149,7 @@ class NotInventedHere(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous')
|
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous')
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class Nukees(_BasicScraper):
|
class Nukees(_BasicScraper):
|
||||||
url = 'http://www.nukees.com/'
|
url = 'http://www.nukees.com/'
|
||||||
stripUrl = url + 'd/%s'
|
stripUrl = url + 'd/%s'
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -47,9 +49,12 @@ class OhJoySexToy(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % 'introduction'
|
firstStripUrl = stripUrl % 'introduction'
|
||||||
imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='navi navi-prev'))
|
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "alt", r'([^"]+)'))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
|
after='navi navi-prev'))
|
||||||
|
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
|
||||||
|
tagre("img", "alt", r'([^"]+)'))
|
||||||
help = 'Index Format: name'
|
help = 'Index Format: name'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
@ -75,6 +80,15 @@ class OmakeTheater(_ParserScraper):
|
||||||
help = 'Index format: number (unpadded)'
|
help = 'Index format: number (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class OneQuestion(_BasicScraper):
|
||||||
|
url = 'http://onequestioncomic.com/'
|
||||||
|
stripUrl = url + 'comic.php?strip_id=%s'
|
||||||
|
firstStripUrl = stripUrl % '1'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class OnTheFastrack(_BasicScraper):
|
class OnTheFastrack(_BasicScraper):
|
||||||
url = 'http://onthefastrack.com/'
|
url = 'http://onthefastrack.com/'
|
||||||
stripUrl = url + 'comics/%s'
|
stripUrl = url + 'comics/%s'
|
||||||
|
@ -82,7 +96,7 @@ class OnTheFastrack(_BasicScraper):
|
||||||
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
||||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||||
help = 'Index format: monthname-dd-yyyy'
|
help = 'Index format: monthname-dd-yyyy'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
name = pageUrl.rsplit('/', 3)[2]
|
name = pageUrl.rsplit('/', 3)[2]
|
||||||
|
@ -94,21 +108,14 @@ class OnTheFastrack(_BasicScraper):
|
||||||
return "%s.gif" % name.title()
|
return "%s.gif" % name.title()
|
||||||
|
|
||||||
|
|
||||||
class OneQuestion(_BasicScraper):
|
|
||||||
url = 'http://onequestioncomic.com/'
|
|
||||||
stripUrl = url + 'comic.php?strip_id=%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
|
|
||||||
class Optipess(_BasicScraper):
|
class Optipess(_BasicScraper):
|
||||||
url = 'http://www.optipess.com/'
|
url = 'http://www.optipess.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
|
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev"))
|
r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="navi navi-prev"))
|
||||||
textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url))
|
textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
@ -119,8 +126,9 @@ class OrnerBoy(_BasicScraper):
|
||||||
stripUrl = url + 'index.php?comicID=%s'
|
stripUrl = url + 'index.php?comicID=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comicID=\d+)' % rurl) +
|
prevSearch = compile(tagre("a", "href",
|
||||||
tagre("img", "src", r'images/prev_a\.gif'))
|
r'(%sindex\.php\?comicID=\d+)' % rurl) +
|
||||||
|
tagre("img", "src", r'images/prev_a\.gif'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,6 +146,6 @@ class OverCompensating(_BasicScraper):
|
||||||
stripUrl = url + 'oc/index.php?comic=%s'
|
stripUrl = url + 'oc/index.php?comic=%s'
|
||||||
firstStripUrl = stripUrl % '0'
|
firstStripUrl = stripUrl % '0'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href",
|
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
|
||||||
r'(/oc/index\.php\?comic=\d+)', after="go back"))
|
after="go back"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
||||||
|
@ -10,10 +12,12 @@ from ..util import tagre
|
||||||
|
|
||||||
class ParadigmShift(_BasicScraper):
|
class ParadigmShift(_BasicScraper):
|
||||||
url = 'http://www.paradigmshiftmanga.com/'
|
url = 'http://www.paradigmshiftmanga.com/'
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after="next-comic-link")))
|
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="next-comic-link")))
|
||||||
stripUrl = url + 'ps/%s.html'
|
stripUrl = url + 'ps/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-comic-link"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="previous-comic-link"))
|
||||||
help = 'Index format: custom'
|
help = 'Index format: custom'
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,9 +26,10 @@ class ParallelUniversum(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
|
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src",
|
||||||
|
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
|
||||||
tagre("span", "class", "prev"))
|
tagre("span", "class", "prev"))
|
||||||
help = 'Index format: number-stripname'
|
help = 'Index format: number-stripname'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
@ -74,8 +79,10 @@ class PennyArcade(_BasicScraper):
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1998/11/18'
|
firstStripUrl = stripUrl % '1998/11/18'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext"))
|
before="btnPrev"))
|
||||||
|
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||||
|
before="btnNext"))
|
||||||
help = 'Index format: yyyy/mm/dd/'
|
help = 'Index format: yyyy/mm/dd/'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -110,7 +117,8 @@ class PeppermintSaga(_BasicScraper):
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '3'
|
firstStripUrl = stripUrl % '3'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
@ -121,14 +129,16 @@ class PHDComics(_BasicScraper):
|
||||||
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd[^ ]+)', quote=""))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd[^ ]+)', quote=""))
|
||||||
prevSearch = compile(tagre("a", "href", r'((?:comics/)?archive\.php\?comicid=\d+)', quote="") +
|
prevSearch = compile(
|
||||||
|
tagre("a", "href", r'((?:comics/)?archive\.php\?comicid=\d+)',
|
||||||
|
quote="") +
|
||||||
tagre("img", "src", r'(?:comics/)?images/prev_button\.gif', quote=""))
|
tagre("img", "src", r'(?:comics/)?images/prev_button\.gif', quote=""))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return url in (
|
return url in (
|
||||||
self.stripUrl % '1669', # video
|
self.stripUrl % '1669', # video
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,20 +148,11 @@ class PicPakDog(_BasicScraper):
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % 'dogs-cant-spell'
|
firstStripUrl = stripUrl % 'dogs-cant-spell'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="nav-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
|
||||||
|
after="nav-prev"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
class Pixel(_BasicScraper):
|
|
||||||
url = 'http://pixelcomic.net/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s'
|
|
||||||
firstStripUrl = stripUrl % '000.shtml'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl, before="prev"))
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class PiledHigherAndDeeper(_BasicScraper):
|
class PiledHigherAndDeeper(_BasicScraper):
|
||||||
url = 'http://www.phdcomics.com/comics.php'
|
url = 'http://www.phdcomics.com/comics.php'
|
||||||
starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif'))
|
starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif'))
|
||||||
|
@ -172,6 +173,17 @@ class Pimpette(_ParserScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class Pixel(_BasicScraper):
|
||||||
|
url = 'http://pixelcomic.net/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '%s'
|
||||||
|
firstStripUrl = stripUrl % '000.shtml'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl,
|
||||||
|
before="prev"))
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class PlanescapeSurvival(_BasicScraper):
|
class PlanescapeSurvival(_BasicScraper):
|
||||||
url = 'http://planescapecomic.com/'
|
url = 'http://planescapecomic.com/'
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
|
@ -204,14 +216,16 @@ class PoorlyDrawnLines(_BasicScraper):
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % 'campus-characters/'
|
firstStripUrl = stripUrl % 'campus-characters/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://poorlydrawnlines\.com/wp-content/uploads/\d+/\d+/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://poorlydrawnlines\.com/wp-content/uploads/\d+/\d+/[^"]+)'))
|
||||||
prevSearch = compile(tagre("li", "class", r'previous') + tagre("a", "href", r'(%s[^"]+)' % rurl))
|
prevSearch = compile(tagre("li", "class", r'previous') +
|
||||||
|
tagre("a", "href", r'(%s[^"]+)' % rurl))
|
||||||
help = 'Index Format: name'
|
help = 'Index Format: name'
|
||||||
|
|
||||||
|
|
||||||
class Precocious(_BasicScraper):
|
class Precocious(_BasicScraper):
|
||||||
url = 'http://www.precociouscomic.com/'
|
url = 'http://www.precociouscomic.com/'
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
|
||||||
|
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
||||||
)
|
)
|
||||||
stripUrl = url + 'archive/comic/%s'
|
stripUrl = url + 'archive/comic/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
||||||
|
@ -234,7 +248,8 @@ class PunksAndNerds(_BasicScraper):
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '15'
|
firstStripUrl = stripUrl % '15'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -250,5 +265,6 @@ class PvPonline(_BasicScraper):
|
||||||
url = 'http://pvponline.com/comic'
|
url = 'http://pvponline.com/comic'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="left divider"))
|
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)',
|
||||||
|
after="left divider"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE, sub
|
from re import compile, escape, IGNORECASE, sub
|
||||||
from os.path import splitext, basename
|
from os.path import splitext
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter
|
from ..helpers import indirectStarter, bounceStarter
|
||||||
|
@ -14,7 +16,7 @@ class SabrinaOnline(_BasicScraper):
|
||||||
url = 'http://sabrina-online.com/'
|
url = 'http://sabrina-online.com/'
|
||||||
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
|
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
|
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
|
||||||
tagre("img", "src", "b_back.gif"))
|
tagre("img", "src", "b_back.gif"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
adult = True
|
adult = True
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
@ -32,9 +34,10 @@ class SabrinaOnline(_BasicScraper):
|
||||||
class SafelyEndangered(_BasicScraper):
|
class SafelyEndangered(_BasicScraper):
|
||||||
url = 'http://www.safelyendangered.com/'
|
url = 'http://www.safelyendangered.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % 'ignored'
|
firstStripUrl = stripUrl % 'ignored'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="navi navi-prev"))
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads'))
|
textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads'))
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
|
@ -84,9 +87,12 @@ class ScenesFromAMultiverse(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '2010/06/14/parenthood'
|
firstStripUrl = stripUrl % '2010/06/14/parenthood'
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("div", "id", "comic") + r"\s*" +
|
compile(tagre("div", "id", "comic") + r"\s*" +
|
||||||
tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
|
tagre("img", "src",
|
||||||
compile(tagre("div", "id", "comic") + r"\s*" + tagre("a", "href", r'[^"]*') +
|
r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
|
||||||
tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
|
compile(tagre("div", "id", "comic") + r"\s*" +
|
||||||
|
tagre("a", "href", r'[^"]*') +
|
||||||
|
tagre("img", "src",
|
||||||
|
r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
@ -98,7 +104,8 @@ class SchlockMercenary(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '2000-06-12'
|
firstStripUrl = stripUrl % '2000-06-12'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous"))
|
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'",
|
||||||
|
after="nav-previous"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -137,8 +144,8 @@ class SequentialArt(_BasicScraper):
|
||||||
stripUrl = url + '?s=%s'
|
stripUrl = url + '?s=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip"))
|
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)')
|
prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') +
|
||||||
+ tagre("img", "src", "Nav_BackOne\.gif"))
|
tagre("img", "src", "Nav_BackOne\.gif"))
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,7 +172,8 @@ class Sheldon(_BasicScraper):
|
||||||
stripUrl = url + 'archive/%s.html'
|
stripUrl = url + 'archive/%s.html'
|
||||||
firstStripUrl = stripUrl % '011130'
|
firstStripUrl = stripUrl % '011130'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, after="sidenav-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
|
||||||
|
after="sidenav-prev"))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,7 +202,8 @@ class Shivae(_BasicScraper):
|
||||||
stripUrl = url + 'blog/%s/'
|
stripUrl = url + 'blog/%s/'
|
||||||
firstStripUrl = stripUrl % '2007/09/21/09212007'
|
firstStripUrl = stripUrl % '2007/09/21/09212007'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -210,9 +219,10 @@ class Shortpacked(_ParserScraper):
|
||||||
class ShotgunShuffle(_BasicScraper):
|
class ShotgunShuffle(_BasicScraper):
|
||||||
url = 'http://shotgunshuffle.com/'
|
url = 'http://shotgunshuffle.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % 'pilot/'
|
firstStripUrl = stripUrl % 'pilot/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="navi navi-prev"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -220,28 +230,19 @@ class SinFest(_BasicScraper):
|
||||||
name = 'KeenSpot/SinFest'
|
name = 'KeenSpot/SinFest'
|
||||||
url = 'http://www.sinfest.net/'
|
url = 'http://www.sinfest.net/'
|
||||||
stripUrl = url + 'view.php?date=%s'
|
stripUrl = url + 'view.php?date=%s'
|
||||||
imageSearch = compile(tagre("img","src", r'(btphp/comics/.+)', after="alt"))
|
imageSearch = compile(tagre("img", "src", r'(btphp/comics/.+)',
|
||||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' + tagre("img", "src", r'\.\./images/prev\.gif'))
|
after="alt"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' +
|
||||||
|
tagre("img", "src", r'\.\./images/prev\.gif'))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
# XXX disallowed by robots.txt
|
|
||||||
class _Sketchesnatched(_BasicScraper):
|
|
||||||
url = 'http://sketchesnatched.blogspot.com/'
|
|
||||||
stripUrl = url + 'search?updated-max=%s%%2B01:00&max-results=1'
|
|
||||||
firstStripUrl = stripUrl % '2011-01-27T08:32:00'
|
|
||||||
imageSearch = compile(tagre("meta", "content", r"(http://\d+\.bp\.blogspot\.com/[^']+)",
|
|
||||||
after=r'image_url', quote="'"))
|
|
||||||
prevSearch = compile(tagre("a", "href", r"(http://sketchesnatched\.blogspot\.[a-z]+/search[^']+)",
|
|
||||||
before=r"blog-pager-older-link", quote="'"))
|
|
||||||
help = 'Index format: yyyy-mm-ddThh:mm:ss'
|
|
||||||
|
|
||||||
|
|
||||||
class SkinDeep(_BasicScraper):
|
class SkinDeep(_BasicScraper):
|
||||||
url = 'http://www.skindeepcomic.com/'
|
url = 'http://www.skindeepcomic.com/'
|
||||||
stripUrl = url + 'archive/%s/'
|
stripUrl = url + 'archive/%s/'
|
||||||
imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
|
imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-webcomic-link"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after="previous-webcomic-link"))
|
||||||
help = 'Index format: custom'
|
help = 'Index format: custom'
|
||||||
|
|
||||||
|
|
||||||
|
@ -261,7 +262,8 @@ class SleeplessDomain(_ParserScraper):
|
||||||
start = ''
|
start = ''
|
||||||
tsmatch = compile(r'/(\d+)-').search(imageUrl)
|
tsmatch = compile(r'/(\d+)-').search(imageUrl)
|
||||||
if tsmatch:
|
if tsmatch:
|
||||||
start = datetime.utcfromtimestamp(int(tsmatch.group(1))).strftime("%Y-%m-%d")
|
start = datetime.utcfromtimestamp(
|
||||||
|
int(tsmatch.group(1))).strftime("%Y-%m-%d")
|
||||||
else:
|
else:
|
||||||
# There were only chapter 1, page 4 and 5 not matching when writing
|
# There were only chapter 1, page 4 and 5 not matching when writing
|
||||||
# this...
|
# this...
|
||||||
|
@ -315,10 +317,11 @@ class SnowFlakes(_BasicScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(comics/[^"]+)')),
|
compile(tagre("img", "src", r'(comics/[^"]+)')),
|
||||||
compile(tagre("img", "src", r'(http://www.snowflakescomic.com/comics/[^"]+)')),
|
compile(tagre("img", "src",
|
||||||
|
r'(http://www.snowflakescomic.com/comics/[^"]+)')),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
|
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
|
||||||
tagre("img", "src", r'images/nav_prior-ON\.gif'))
|
tagre("img", "src", r'images/nav_prior-ON\.gif'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -338,12 +341,12 @@ class SnowFlakes(_BasicScraper):
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return url in (
|
return url in (
|
||||||
self.stripUrl % ('279', '2'), # no comic
|
self.stripUrl % ('279', '2'), # no comic
|
||||||
self.stripUrl % ('278', '2'), # no comic
|
self.stripUrl % ('278', '2'), # no comic
|
||||||
self.stripUrl % ('277', '2'), # no comic
|
self.stripUrl % ('277', '2'), # no comic
|
||||||
self.stripUrl % ('276', '2'), # no comic
|
self.stripUrl % ('276', '2'), # no comic
|
||||||
self.stripUrl % ('275', '2'), # no comic
|
self.stripUrl % ('275', '2'), # no comic
|
||||||
self.stripUrl % ('214', '2'), # no comic
|
self.stripUrl % ('214', '2'), # no comic
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -354,10 +357,11 @@ class SnowFlame(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % ('01', '01')
|
firstStripUrl = stripUrl % ('01', '01')
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame "))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame "))
|
||||||
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
||||||
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))
|
tagre("a", "href",
|
||||||
starter = bounceStarter(url,
|
r'(%s\?comic=snowflame[^"]+)' % rurl))
|
||||||
compile(tagre("span", "class", "mininav-next") +
|
starter = bounceStarter(
|
||||||
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
|
url, compile(tagre("span", "class", "mininav-next") +
|
||||||
|
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
|
||||||
help = 'Index format: chapter-page'
|
help = 'Index format: chapter-page'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
|
@ -378,18 +382,33 @@ class SodiumEyes(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
|
firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote=""))
|
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl,
|
||||||
|
quote=""))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
class SomethingPositive(_BasicScraper):
|
||||||
|
url = 'http://www.somethingpositive.net/'
|
||||||
|
stripUrl = url + 'sp%s.shtml'
|
||||||
|
imageSearch = (
|
||||||
|
compile(tagre("img", "src", r'(sp\d+\.png)')),
|
||||||
|
compile(tagre("img", "src", r'(twither\.gif)')),
|
||||||
|
)
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "(?:" +
|
||||||
|
tagre("img", "src", r'images/previous\.gif') +
|
||||||
|
"|Previous)")
|
||||||
|
help = 'Index format: mmddyyyy'
|
||||||
|
|
||||||
|
|
||||||
class Sorcery101(_BasicScraper):
|
class Sorcery101(_BasicScraper):
|
||||||
baseUrl = 'http://www.sorcery101.net/'
|
baseUrl = 'http://www.sorcery101.net/'
|
||||||
url = baseUrl + 'sorcery-101/'
|
url = baseUrl + 'sorcery-101/'
|
||||||
rurl = escape(baseUrl)
|
rurl = escape(baseUrl)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-"))
|
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl,
|
||||||
|
after="previous-"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -399,7 +418,8 @@ class SpaceTrawler(_BasicScraper):
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
|
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -408,7 +428,8 @@ class Spamusement(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'index.php/comics/view/%s'
|
stripUrl = url + 'index.php/comics/view/%s'
|
||||||
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
|
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
|
||||||
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, IGNORECASE)
|
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
|
||||||
|
IGNORECASE)
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter(url, prevSearch)
|
starter = indirectStarter(url, prevSearch)
|
||||||
|
|
||||||
|
@ -419,7 +440,8 @@ class SpareParts(_BasicScraper):
|
||||||
stripUrl = baseUrl + 'comics/index.php?date=%s'
|
stripUrl = baseUrl + 'comics/index.php?date=%s'
|
||||||
firstStripUrl = stripUrl % '20031022'
|
firstStripUrl = stripUrl % '20031022'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic")
|
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)',
|
||||||
|
quote="'") + "Previous Comic")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -433,6 +455,29 @@ class StandStillStaySilent(_ParserScraper):
|
||||||
help = 'Index Format: number'
|
help = 'Index Format: number'
|
||||||
|
|
||||||
|
|
||||||
|
class StarCrossdDestiny(_BasicScraper):
|
||||||
|
baseUrl = 'http://www.starcrossd.net/'
|
||||||
|
rurl = escape(baseUrl)
|
||||||
|
url = baseUrl + 'comic.html'
|
||||||
|
stripUrl = baseUrl + 'archives/%s.html'
|
||||||
|
firstStripUrl = stripUrl % '00000001'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
|
||||||
|
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
|
||||||
|
help = 'Index format: nnnnnnnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
if imageUrl.find('ch1') == -1:
|
||||||
|
# At first all images were stored in a strips/ directory but
|
||||||
|
# that was changed with the introduction of book2
|
||||||
|
imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl)
|
||||||
|
elif not imageUrl.find('strips') == -1:
|
||||||
|
imageUrl = imageUrl.replace('strips/', '')
|
||||||
|
directory, filename = imageUrl.split('/')[-2:]
|
||||||
|
filename, extension = splitext(filename)
|
||||||
|
return directory + '-' + filename
|
||||||
|
|
||||||
|
|
||||||
class StationV3(_ParserScraper):
|
class StationV3(_ParserScraper):
|
||||||
url = 'http://www.stationv3.com/'
|
url = 'http://www.stationv3.com/'
|
||||||
stripUrl = url + 'd/%s.html'
|
stripUrl = url + 'd/%s.html'
|
||||||
|
@ -447,62 +492,18 @@ class StickyDillyBuns(_BasicScraper):
|
||||||
stripUrl = url + 'strips-sdb/%s'
|
stripUrl = url + 'strips-sdb/%s'
|
||||||
firstStripUrl = stripUrl % 'awesome_leading_man'
|
firstStripUrl = stripUrl % 'awesome_leading_man'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', before="cn[id]prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)',
|
||||||
|
before="cn[id]prev"))
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
||||||
|
|
||||||
class Stubble(_BasicScraper):
|
|
||||||
url = 'http://stubblecomics.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '?p=%s'
|
|
||||||
firstStripUrl = stripUrl % '4'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
|
|
||||||
help = 'Index format: number'
|
|
||||||
|
|
||||||
|
|
||||||
class StuffNoOneToldMe(_BasicScraper):
|
|
||||||
url = 'http://www.snotm.com/'
|
|
||||||
stripUrl = url + '%s.html'
|
|
||||||
firstStripUrl = stripUrl % '2010/05/01'
|
|
||||||
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
|
||||||
starter = indirectStarter(url,
|
|
||||||
compile(tagre("a", "href", olderHref, quote="'")))
|
|
||||||
imageSearch = (
|
|
||||||
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:</a>|<br />)"),
|
|
||||||
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + r"(?:(?: )?</a>|<span |<br />)"),
|
|
||||||
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", olderHref, quote="'", before="older-link"))
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use page URL to construct meaningful image name."""
|
|
||||||
parts, year, month, stripname = pageUrl.rsplit('/', 3)
|
|
||||||
stripname = stripname.rsplit('.', 1)[0]
|
|
||||||
parts, imagename = imageUrl.rsplit('/', 1)
|
|
||||||
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
|
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
"""Skip pages without images."""
|
|
||||||
return url in (
|
|
||||||
self.stripUrl % '2012/08/self-rant', # no comic
|
|
||||||
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
|
|
||||||
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
|
|
||||||
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
|
|
||||||
self.stripUrl % '2010/11/ear-infection', # no comic
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class StrawberryDeathCake(_BasicScraper):
|
class StrawberryDeathCake(_BasicScraper):
|
||||||
url = 'http://strawberrydeathcake.com/'
|
url = 'http://strawberrydeathcake.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'archive/%s/'
|
imageSearch = compile(tagre("img", "src",
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
|
r'(%swp-content/webcomic/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl,
|
||||||
help = 'Index format: stripname'
|
after="previous"))
|
||||||
|
|
||||||
|
|
||||||
class StrongFemaleProtagonist(_ParserScraper):
|
class StrongFemaleProtagonist(_ParserScraper):
|
||||||
|
@ -524,63 +525,72 @@ class StrongFemaleProtagonist(_ParserScraper):
|
||||||
self.stripUrl % 'issue-5/hiatus-2',
|
self.stripUrl % 'issue-5/hiatus-2',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Stubble(_BasicScraper):
|
||||||
|
url = 'http://stubblecomics.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '?p=%s'
|
||||||
|
firstStripUrl = stripUrl % '4'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
class StuffNoOneToldMe(_BasicScraper):
|
||||||
|
url = 'http://www.snotm.com/'
|
||||||
|
stripUrl = url + '%s.html'
|
||||||
|
firstStripUrl = stripUrl % '2010/05/01'
|
||||||
|
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
|
||||||
|
starter = indirectStarter(
|
||||||
|
url, compile(tagre("a", "href", olderHref, quote="'")))
|
||||||
|
imageSearch = (
|
||||||
|
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
|
||||||
|
r"(?:</a>|<br />)"),
|
||||||
|
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') +
|
||||||
|
r"(?:(?: )?</a>|<span |<br />)"),
|
||||||
|
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
|
||||||
|
)
|
||||||
|
prevSearch = compile(tagre("a", "href", olderHref, quote="'",
|
||||||
|
before="older-link"))
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
"""Use page URL to construct meaningful image name."""
|
||||||
|
parts, year, month, stripname = pageUrl.rsplit('/', 3)
|
||||||
|
stripname = stripname.rsplit('.', 1)[0]
|
||||||
|
parts, imagename = imageUrl.rsplit('/', 1)
|
||||||
|
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
"""Skip pages without images."""
|
||||||
|
return url in (
|
||||||
|
self.stripUrl % '2012/08/self-rant', # no comic
|
||||||
|
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
|
||||||
|
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
|
||||||
|
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
|
||||||
|
self.stripUrl % '2010/11/ear-infection', # no comic
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SuburbanTribe(_BasicScraper):
|
class SuburbanTribe(_BasicScraper):
|
||||||
url = 'http://www.pixelwhip.com/'
|
url = 'http://www.pixelwhip.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
class SomethingPositive(_BasicScraper):
|
|
||||||
url = 'http://www.somethingpositive.net/'
|
|
||||||
stripUrl = url + 'sp%s.shtml'
|
|
||||||
imageSearch = (
|
|
||||||
compile(tagre("img", "src", r'(sp\d+\.png)')),
|
|
||||||
compile(tagre("img", "src", r'(twither\.gif)')),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') +
|
|
||||||
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
|
|
||||||
help = 'Index format: mmddyyyy'
|
|
||||||
|
|
||||||
|
|
||||||
class StarCrossdDestiny(_BasicScraper):
|
|
||||||
baseUrl = 'http://www.starcrossd.net/'
|
|
||||||
rurl = escape(baseUrl)
|
|
||||||
url = baseUrl + 'comic.html'
|
|
||||||
stripUrl = baseUrl + 'archives/%s.html'
|
|
||||||
firstStripUrl = stripUrl % '00000001'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
|
|
||||||
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
|
|
||||||
help = 'Index format: nnnnnnnn'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
if imageUrl.find('ch1') == -1:
|
|
||||||
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
|
|
||||||
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
|
|
||||||
elif not imageUrl.find('strips') == -1:
|
|
||||||
imageUrl = imageUrl.replace('strips/','')
|
|
||||||
directory, filename = imageUrl.split('/')[-2:]
|
|
||||||
filename, extension = splitext(filename)
|
|
||||||
return directory + '-' + filename
|
|
||||||
|
|
||||||
|
|
||||||
# XXX disallowed by robots.txt
|
|
||||||
class _StrangeCandy(_BasicScraper):
|
|
||||||
url = 'http://www.strangecandy.net/'
|
|
||||||
stripUrl = url + 'd/%s.html'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
|
|
||||||
help = 'Index format: yyyyddmm'
|
|
||||||
|
|
||||||
|
|
||||||
class SupernormalStep(_BasicScraper):
|
class SupernormalStep(_BasicScraper):
|
||||||
url = 'http://supernormalstep.com/'
|
url = 'http://supernormalstep.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?p=%s'
|
||||||
firstStripUrl = stripUrl % '8'
|
firstStripUrl = stripUrl % '8'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
@ -23,7 +25,8 @@ class TheDevilsPanties(_BasicScraper):
|
||||||
stripUrl = url + 'archives/%s'
|
stripUrl = url + 'archives/%s'
|
||||||
firstStripUrl = stripUrl % '300'
|
firstStripUrl = stripUrl % '300'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.thedevilspanties\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.thedevilspanties\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/archives/\d+)', after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(/archives/\d+)',
|
||||||
|
after="Previous"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,16 +45,20 @@ class TheLandscaper(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/comic/comic_page/[^"]+)'))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)')+'‹ Previous')
|
r'(/comics/comic/comic_page/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)') +
|
||||||
|
'‹ Previous')
|
||||||
help = 'Index format: name'
|
help = 'Index format: name'
|
||||||
|
|
||||||
|
|
||||||
class TheNoob(_BasicScraper):
|
class TheNoob(_BasicScraper):
|
||||||
url = 'http://www.thenoobcomic.com/index.php'
|
url = 'http://www.thenoobcomic.com/index.php'
|
||||||
stripUrl = url + '?pos=%s'
|
stripUrl = url + '?pos=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button"))
|
prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)',
|
||||||
|
before="comic_nav_previous_button"))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,6 +77,16 @@ class TheOrderOfTheStick(_BasicScraper):
|
||||||
return pageUrl.rsplit('/', 1)[-1][:-5]
|
return pageUrl.rsplit('/', 1)[-1][:-5]
|
||||||
|
|
||||||
|
|
||||||
|
class TheOuterQuarter(_BasicScraper):
|
||||||
|
url = 'http://theouterquarter.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + 'comic/%s'
|
||||||
|
firstStripUrl = stripUrl % 'oq-the-first-take/4'
|
||||||
|
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class TheParkingLotIsFull(_BasicScraper):
|
class TheParkingLotIsFull(_BasicScraper):
|
||||||
baseUrl = 'http://plif.courageunfettered.com/'
|
baseUrl = 'http://plif.courageunfettered.com/'
|
||||||
url = baseUrl + 'archive/arch2002.htm'
|
url = baseUrl + 'archive/arch2002.htm'
|
||||||
|
@ -81,6 +98,40 @@ class TheParkingLotIsFull(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class TheThinHLine(_BasicScraper):
|
||||||
|
url = 'http://thinhline.tumblr.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + 'post/%s'
|
||||||
|
firstStripUrl = stripUrl % '3517345105'
|
||||||
|
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
||||||
|
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
|
||||||
|
adult = True
|
||||||
|
|
||||||
|
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
||||||
|
|
||||||
|
def getComicStrip(self, url, data):
|
||||||
|
"""The comic strip image is in a separate page."""
|
||||||
|
pageUrl = self.fetchUrl(url, data, self.indirectImageSearch)
|
||||||
|
pageData = self.getPage(pageUrl)
|
||||||
|
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
"""Use page URL sequence which is apparently increasing."""
|
||||||
|
num = pageUrl.split('/')[-1]
|
||||||
|
ext = imageUrl.rsplit('.', 1)[1]
|
||||||
|
return "thethinhline-%s.%s" % (num, ext)
|
||||||
|
|
||||||
|
|
||||||
|
class TheWhiteboard(_BasicScraper):
|
||||||
|
url = 'http://www.the-whiteboard.com/'
|
||||||
|
stripUrl = url + 'auto%s.html'
|
||||||
|
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
|
||||||
|
prevSearch = compile(r' <a href="(.+?)">previous</a>', IGNORECASE)
|
||||||
|
help = 'Index format: twb or wb + n wg. twb1000'
|
||||||
|
|
||||||
|
|
||||||
class TheWotch(_BasicScraper):
|
class TheWotch(_BasicScraper):
|
||||||
url = 'http://www.thewotch.com/'
|
url = 'http://www.thewotch.com/'
|
||||||
stripUrl = url + '?date=%s'
|
stripUrl = url + '?date=%s'
|
||||||
|
@ -101,6 +152,16 @@ class ThisIsIndexed(_BasicScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
class ThreePanelSoul(_BasicScraper):
|
||||||
|
url = 'http://threepanelsoul.com/'
|
||||||
|
rurl = escape(url)
|
||||||
|
stripUrl = url + '%s/'
|
||||||
|
firstStripUrl = stripUrl % '2006/05/11/a-test-comic'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
class ThunderAndLightning(_BasicScraper):
|
class ThunderAndLightning(_BasicScraper):
|
||||||
url = 'http://www.talcomic.com/wp/'
|
url = 'http://www.talcomic.com/wp/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -137,68 +198,6 @@ class ToonHole(_BasicScraper):
|
||||||
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
|
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
|
||||||
|
|
||||||
|
|
||||||
class TwoLumps(_BasicScraper):
|
|
||||||
url = 'http://www.twolumps.net/'
|
|
||||||
stripUrl = url + 'd/%s.html'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev"))
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class TheWhiteboard(_BasicScraper):
|
|
||||||
url = 'http://www.the-whiteboard.com/'
|
|
||||||
stripUrl = url + 'auto%s.html'
|
|
||||||
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
|
|
||||||
prevSearch = compile(r' <a href="(.+?)">previous</a>', IGNORECASE)
|
|
||||||
help = 'Index format: twb or wb + n wg. twb1000'
|
|
||||||
|
|
||||||
|
|
||||||
class TheOuterQuarter(_BasicScraper):
|
|
||||||
url = 'http://theouterquarter.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % 'oq-the-first-take/4'
|
|
||||||
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
|
|
||||||
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class TheThinHLine(_BasicScraper):
|
|
||||||
url = 'http://thinhline.tumblr.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'post/%s'
|
|
||||||
firstStripUrl = stripUrl % '3517345105'
|
|
||||||
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '></a>')
|
|
||||||
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
|
|
||||||
adult = True
|
|
||||||
|
|
||||||
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
|
|
||||||
|
|
||||||
def getComicStrip(self, url, data):
|
|
||||||
"""The comic strip image is in a separate page."""
|
|
||||||
pageUrl = self.fetchUrl(url, data, self.indirectImageSearch)
|
|
||||||
pageData = self.getPage(pageUrl)
|
|
||||||
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use page URL sequence which is apparently increasing."""
|
|
||||||
num = pageUrl.split('/')[-1]
|
|
||||||
ext = imageUrl.rsplit('.', 1)[1]
|
|
||||||
return "thethinhline-%s.%s" % (num, ext)
|
|
||||||
|
|
||||||
|
|
||||||
class ThreePanelSoul(_BasicScraper):
|
|
||||||
url = 'http://threepanelsoul.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
firstStripUrl = stripUrl % '2006/05/11/a-test-comic'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class TracyAndTristan(_BasicScraper):
|
class TracyAndTristan(_BasicScraper):
|
||||||
url = 'http://tandt.thecomicseries.com/'
|
url = 'http://tandt.thecomicseries.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -214,6 +213,15 @@ class TwoGuysAndGuy(_BasicScraper):
|
||||||
stripUrl = url + 'archives/%s'
|
stripUrl = url + 'archives/%s'
|
||||||
firstStripUrl = stripUrl % '4'
|
firstStripUrl = stripUrl % '4'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(%scomics/\d{4}-\d{2}-\d{2}[^"]*)' % rurl))
|
imageSearch = compile(tagre('img', 'src', r'(%scomics/\d{4}-\d{2}-\d{2}[^"]*)' % rurl))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl, after='title="Previous"'))
|
prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl,
|
||||||
|
after='title="Previous"'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
|
class TwoLumps(_BasicScraper):
|
||||||
|
url = 'http://www.twolumps.net/'
|
||||||
|
stripUrl = url + 'd/%s.html'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev"))
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from ..util import getQueryParams, tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class Underling(_BasicScraper):
|
class Underling(_BasicScraper):
|
||||||
url = 'http://underlingcomic.com/'
|
url = 'http://underlingcomic.com/'
|
||||||
|
@ -14,7 +17,8 @@ class Underling(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
firstStripUrl = stripUrl + 'page-one/'
|
firstStripUrl = stripUrl + 'page-one/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after = r'class="[^"]*navi-prev'))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)',
|
||||||
|
after=r'class="[^"]*navi-prev'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,26 +49,12 @@ class Unsounded(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
|
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
|
||||||
starter = indirectStarter(url,
|
starter = indirectStarter(
|
||||||
compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
|
||||||
tagre("img", "src", r"%simages/newpages\.png" % rurl)))
|
tagre("img", "src", r"%simages/newpages\.png" % rurl)))
|
||||||
help = 'Index format: chapter-number'
|
help = 'Index format: chapter-number'
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
"""Get comic strip URL from index."""
|
"""Get comic strip URL from index."""
|
||||||
chapter, num = index.split('-')
|
chapter, num = index.split('-')
|
||||||
return self.stripUrl % (chapter, chapter, num)
|
return self.stripUrl % (chapter, chapter, num)
|
||||||
|
|
||||||
|
|
||||||
# XXX disallowed by robots.txt
|
|
||||||
class _UserFriendly(_BasicScraper):
|
|
||||||
url = 'http://ars.userfriendly.org/cartoons/?mode=classic'
|
|
||||||
stripUrl = url + '&id=%s'
|
|
||||||
starter = bounceStarter(url, compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
|
|
||||||
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
|
|
||||||
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
@ -23,28 +25,29 @@ class VGCats(_BasicScraper):
|
||||||
firstStripUrl = stripUrl % '0'
|
firstStripUrl = stripUrl % '0'
|
||||||
imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') +
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') +
|
||||||
tagre("img", "src", r"back\.gif"))
|
tagre("img", "src", r"back\.gif"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class VGCatsSuper(VGCats):
|
|
||||||
name = 'VGCats/Super'
|
|
||||||
url = 'http://www.vgcats.com/super/'
|
|
||||||
stripUrl = url + '?strip_id=%s'
|
|
||||||
|
|
||||||
|
|
||||||
class VGCatsAdventure(VGCats):
|
class VGCatsAdventure(VGCats):
|
||||||
name = 'VGCats/Adventure'
|
name = 'VGCats/Adventure'
|
||||||
url = 'http://www.vgcats.com/ffxi/'
|
url = 'http://www.vgcats.com/ffxi/'
|
||||||
stripUrl = url + '?strip_id=%s'
|
stripUrl = url + '?strip_id=%s'
|
||||||
|
|
||||||
|
|
||||||
|
class VGCatsSuper(VGCats):
|
||||||
|
name = 'VGCats/Super'
|
||||||
|
url = 'http://www.vgcats.com/super/'
|
||||||
|
stripUrl = url + '?strip_id=%s'
|
||||||
|
|
||||||
|
|
||||||
class VictimsOfTheSystem(_BasicScraper):
|
class VictimsOfTheSystem(_BasicScraper):
|
||||||
url = 'http://www.votscomic.com/'
|
url = 'http://www.votscomic.com/'
|
||||||
stripUrl = url + '?id=%s.jpg'
|
stripUrl = url + '?id=%s.jpg'
|
||||||
firstStripUrl = stripUrl % '070103-002452'
|
firstStripUrl = stripUrl % '070103-002452'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') + "Previous")
|
prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') +
|
||||||
|
"Previous")
|
||||||
help = 'Index format: nnn-nnn'
|
help = 'Index format: nnn-nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,7 +55,8 @@ class ViiviJaWagner(_BasicScraper):
|
||||||
url = 'http://www.hs.fi/viivijawagner/'
|
url = 'http://www.hs.fi/viivijawagner/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm"))
|
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)',
|
||||||
|
before="prev-cm"))
|
||||||
help = 'Index format: none'
|
help = 'Index format: none'
|
||||||
lang = 'fi'
|
lang = 'fi'
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
@ -24,7 +26,8 @@ class WastedTalent(_BasicScraper):
|
||||||
stripUrl = url + 'comic/%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % 'anime-crack'
|
firstStripUrl = stripUrl % 'anime-crack'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev"))
|
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)',
|
||||||
|
after="comic_prev"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,7 +53,8 @@ class WebDesignerCOTW(_BasicScraper):
|
||||||
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
|
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
|
||||||
)
|
)
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'"))
|
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
|
||||||
|
before='prev', quote="'"))
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
@ -78,8 +82,10 @@ class Weregeek(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2006/11/27/'
|
firstStripUrl = stripUrl % '2006/11/27/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src",
|
||||||
prevSearch = compile(tagre("a", "href", r'((%s)?(/)?\d+/\d+/\d+/)'% rurl)+'\s*'+ tagre('img', 'src', '[^"]*previous_day.gif'))
|
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'((%s)?/?\d+/\d+/\d+/)' % rurl) +
|
||||||
|
'\s*' + tagre('img', 'src', '[^"]*previous_day.gif'))
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,7 +114,8 @@ class Whomp(_BasicScraper):
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2010/06/14/06142010'
|
firstStripUrl = stripUrl % '2010/06/14/06142010'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||||
|
after="navi-prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,7 +125,8 @@ class WhyTheLongFace(_BasicScraper):
|
||||||
url = baseUrl + 'wtlf200709.html'
|
url = baseUrl + 'wtlf200709.html'
|
||||||
stripUrl = baseUrl + 'wtlf%s.html'
|
stripUrl = baseUrl + 'wtlf%s.html'
|
||||||
firstStripUrl = stripUrl % '200306'
|
firstStripUrl = stripUrl % '200306'
|
||||||
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE)
|
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl,
|
||||||
|
IGNORECASE)
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
|
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
|
||||||
help = 'Index format: yyyymm'
|
help = 'Index format: yyyymm'
|
||||||
|
@ -129,7 +137,8 @@ class Wigu(_BasicScraper):
|
||||||
stripUrl = url + 'oc/index.php?comic=%s'
|
stripUrl = url + 'oc/index.php?comic=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', after="go back"))
|
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
|
||||||
|
after="go back"))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,9 +147,11 @@ class Wonderella(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
|
firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
|
||||||
imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') + r"\s*" +
|
imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') +
|
||||||
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
r"\s*" +
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
|
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
|
||||||
|
after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
@ -187,6 +198,13 @@ class WorldOfMrToast(_BasicScraper):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class WorldOfWarcraftEh(_BasicScraper):
|
||||||
|
url = 'http://woweh.com/'
|
||||||
|
stripUrl = None
|
||||||
|
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
|
||||||
|
|
||||||
|
|
||||||
class WormWorldSaga(_BasicScraper):
|
class WormWorldSaga(_BasicScraper):
|
||||||
url = 'http://www.wormworldsaga.com/'
|
url = 'http://www.wormworldsaga.com/'
|
||||||
stripUrl = url + 'chapters/%s/index.php'
|
stripUrl = url + 'chapters/%s/index.php'
|
||||||
|
@ -214,15 +232,17 @@ class WormWorldSaga(_BasicScraper):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class WormWorldSagaFrench(WormWorldSaga):
|
||||||
|
lang = 'fr'
|
||||||
|
|
||||||
|
|
||||||
class WormWorldSagaGerman(WormWorldSaga):
|
class WormWorldSagaGerman(WormWorldSaga):
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
|
||||||
class WormWorldSagaSpanish(WormWorldSaga):
|
class WormWorldSagaSpanish(WormWorldSaga):
|
||||||
lang = 'es'
|
lang = 'es'
|
||||||
|
|
||||||
class WormWorldSagaFrench(WormWorldSaga):
|
|
||||||
lang = 'fr'
|
|
||||||
|
|
||||||
|
|
||||||
class WotNow(_BasicScraper):
|
class WotNow(_BasicScraper):
|
||||||
url = 'http://shadowburn.binmode.com/wotnow/'
|
url = 'http://shadowburn.binmode.com/wotnow/'
|
||||||
|
@ -231,11 +251,3 @@ class WotNow(_BasicScraper):
|
||||||
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
|
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
|
||||||
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
|
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
# XXX disallowed by robots.txt
|
|
||||||
class _WorldOfWarcraftEh(_BasicScraper):
|
|
||||||
url = 'http://woweh.com/'
|
|
||||||
stripUrl = None
|
|
||||||
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
|
||||||
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
|
|
||||||
|
|
Loading…
Reference in a new issue