Remove make_scraper for most WordPress comics.
- Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes.
This commit is contained in:
parent
a7b6599cd4
commit
bb1f20d867
17 changed files with 268 additions and 103 deletions
|
@ -8,18 +8,22 @@ from re import compile, escape, MULTILINE
|
|||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
||||
from .common import _WordPressScraper, _ComicPressScraper
|
||||
|
||||
|
||||
class AbstruseGoose(_BasicScraper):
|
||||
url = 'http://abstrusegoose.com/'
|
||||
rurl = escape(url)
|
||||
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
|
||||
starter = bounceStarter(
|
||||
url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »"))
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre('img', 'src',
|
||||
r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
||||
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous')
|
||||
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »')
|
||||
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) +
|
||||
r'« Previous')
|
||||
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) +
|
||||
r'Next »')
|
||||
help = 'Index format: n (unpadded)'
|
||||
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
||||
|
||||
|
@ -122,7 +126,7 @@ class AirForceBlues(_BasicScraper):
|
|||
class ALessonIsLearned(_BasicScraper):
|
||||
url = 'http://www.alessonislearned.com/'
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
|
||||
quote="'")+r"[^>]+previous")
|
||||
quote="'") + r"[^>]+previous")
|
||||
starter = indirectStarter(url, prevSearch)
|
||||
stripUrl = url + 'index.php?comic=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -130,6 +134,12 @@ class ALessonIsLearned(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Alice(_ComicPressScraper):
|
||||
url = 'http://www.alicecomics.com/'
|
||||
starter = indirectStarter('http://www.alicecomics.com/',
|
||||
'//a[text()="Latest Alice!"]')
|
||||
|
||||
|
||||
class AlienLovesPredator(_BasicScraper):
|
||||
url = 'http://alienlovespredator.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -220,12 +230,17 @@ class AmazingSuperPowers(_BasicScraper):
|
|||
)
|
||||
|
||||
|
||||
class Amya(_WordPressScraper):
|
||||
url = 'http://www.amyachronicles.com/'
|
||||
|
||||
|
||||
class Angband(_BasicScraper):
|
||||
url = 'http://angband.calamarain.net/'
|
||||
stripUrl = url + 'view.php?date=%s'
|
||||
firstStripUrl = stripUrl % '2005-12-30'
|
||||
imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous")
|
||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') +
|
||||
"Previous")
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
@ -233,7 +248,7 @@ class Angels2200(_BasicScraper):
|
|||
url = 'http://www.janahoffmann.com/angels/'
|
||||
stripUrl = url + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'"))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous")
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous")
|
||||
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
|
||||
|
||||
|
||||
|
@ -290,7 +305,8 @@ class ASkeweredParadise(_BasicScraper):
|
|||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % '001'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
|
||||
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
|
||||
prevSearch = compile(tagre("a", "href", "(/comic/\d+)") +
|
||||
r"[^>]+Previous")
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
@ -306,8 +322,8 @@ class ASofterWorld(_ParserScraper):
|
|||
class AstronomyPOTD(_BasicScraper):
|
||||
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
|
||||
url = baseUrl + 'astropix.html'
|
||||
starter = bounceStarter(url,
|
||||
compile(tagre("a", "href", r'(ap\d{6}\.html)') + "></a>"))
|
||||
starter = bounceStarter(
|
||||
url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + "></a>"))
|
||||
stripUrl = baseUrl + 'ap%s.html'
|
||||
firstStripUrl = stripUrl % '061012'
|
||||
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
|
||||
|
@ -328,3 +344,7 @@ class AstronomyPOTD(_BasicScraper):
|
|||
def namer(cls, imageUrl, pageUrl):
|
||||
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
|
||||
imageUrl.split('/')[-1].split('.')[0])
|
||||
|
||||
|
||||
class AxeCop(_WordPressScraper):
|
||||
url = 'http://axecop.com/comic/season-two/'
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..util import tagre, getPageContent
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _WordPressScraper, _ComicPressScraper
|
||||
|
||||
|
||||
class BackwaterPlanet(_BasicScraper):
|
||||
|
@ -38,6 +40,12 @@ class BadMachinery(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Bardsworth(_WordPressScraper):
|
||||
url = 'http://www.bardsworth.com/'
|
||||
starter = indirectStarter('http://www.bardsworth.com/',
|
||||
'//a[@rel="bookmark"]')
|
||||
|
||||
|
||||
class Baroquen(_BasicScraper):
|
||||
url = 'http://www.baroquencomics.com/'
|
||||
rurl = escape(url)
|
||||
|
@ -162,6 +170,11 @@ class Blip(_BasicScraper):
|
|||
return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
|
||||
|
||||
|
||||
class BloodBound(_WordPressScraper):
|
||||
url = 'http://bloodboundcomic.com/'
|
||||
firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'
|
||||
|
||||
|
||||
class BloomingFaeries(_BasicScraper):
|
||||
adult = True
|
||||
url = 'http://www.bloomingfaeries.com/'
|
||||
|
@ -255,6 +268,10 @@ class BoyOnAStickAndSlither(_BasicScraper):
|
|||
return pageUrl.rsplit('/')[-1]
|
||||
|
||||
|
||||
class BratHalla(_WordPressScraper):
|
||||
url = 'http://brat-halla.com/'
|
||||
|
||||
|
||||
class BrentalFloss(_BasicScraper):
|
||||
url = 'http://brentalflossthecomic.com/'
|
||||
stripUrl = url + '?id=%s'
|
||||
|
@ -313,6 +330,19 @@ class Brink(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class BroodHollow(_WordPressScraper):
|
||||
url = 'http://broodhollow.chainsawsuit.com/'
|
||||
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
|
||||
|
||||
|
||||
class Buni(_WordPressScraper):
|
||||
url = 'http://www.bunicomic.com/'
|
||||
|
||||
|
||||
class BusinessCat(_ComicPressScraper):
|
||||
url = 'http://www.businesscat.happyjar.com/'
|
||||
|
||||
|
||||
class ButtercupFestival(_ParserScraper):
|
||||
url = 'http://www.buttercupfestival.com/'
|
||||
stripUrl = url + '%s.htm'
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import tagre
|
||||
from .wordpress import _WordpressScraper
|
||||
from .common import _WordPressScraper, _ComicPressScraper
|
||||
|
||||
|
||||
class Caggage(_BasicScraper):
|
||||
|
@ -19,7 +19,8 @@ class Caggage(_BasicScraper):
|
|||
stripUrl = url + 'archives/%s'
|
||||
firstStripUrl = stripUrl % '77'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl,
|
||||
after="prev"))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
|
@ -102,7 +103,7 @@ class CatAndGirl(_BasicScraper):
|
|||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '1602'
|
||||
imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous</a>")
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous</a>")
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
@ -112,11 +113,19 @@ class CatAndGirl(_BasicScraper):
|
|||
)
|
||||
|
||||
|
||||
class CatNine(_WordpressScraper):
|
||||
class Catena(_WordPressScraper):
|
||||
url = 'http://catenamanor.com/'
|
||||
|
||||
|
||||
class CatNine(_WordPressScraper):
|
||||
url = 'http://cat-nine.net'
|
||||
firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/'
|
||||
|
||||
|
||||
class CatsAndCameras(_WordPressScraper):
|
||||
url = 'http://catsncameras.com/'
|
||||
|
||||
|
||||
class CatVersusHuman(_ParserScraper):
|
||||
url = 'http://www.catversushuman.com'
|
||||
multipleImagesPerStrip = True
|
||||
|
@ -277,6 +286,19 @@ class CorydonCafe(_ParserScraper):
|
|||
return pageUrl.split('/')[-1].split('.')[0]
|
||||
|
||||
|
||||
class CourtingDisaster(_WordPressScraper):
|
||||
url = 'http://www.courting-disaster.com/'
|
||||
firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/'
|
||||
|
||||
|
||||
class CowboyJedi(_WordPressScraper):
|
||||
url = 'http://www.cowboyjedi.com/'
|
||||
|
||||
|
||||
class CraftedFables(_ComicPressScraper):
|
||||
url = 'http://www.caf-fiends.net/comicpress/'
|
||||
|
||||
|
||||
class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||
url = 'http://crap.jinwicked.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
|
22
dosagelib/plugins/common.py
Normal file
22
dosagelib/plugins/common.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from ..scraper import _ParserScraper
|
||||
|
||||
# Common base classes for comics with the same structure (same hosting
|
||||
# software, for example) go here. Since those are shared by many modules,
|
||||
# please don't use lists of expression, as that makes it hard to track which
|
||||
# expression is for which comics.
|
||||
|
||||
|
||||
class _WordPressScraper(_ParserScraper):
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' comic-nav-previous ')]"
|
||||
|
||||
|
||||
class _ComicPressScraper(_WordPressScraper):
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"
|
|
@ -9,6 +9,7 @@ from re import compile, escape, IGNORECASE
|
|||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class FalconTwin(_BasicScraper):
|
||||
|
@ -129,6 +130,10 @@ class ForLackOfABetterComic(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class FowlLanguage(_WordPressScraper):
|
||||
url = 'http://www.fowllanguagecomics.com/'
|
||||
|
||||
|
||||
class Fragile(_ParserScraper):
|
||||
url = 'http://www.fragilestory.com/'
|
||||
imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img'
|
||||
|
@ -153,10 +158,10 @@ class FredoAndPidjin(_BasicScraper):
|
|||
compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/old/[^"]+\.[a-z]+)')),
|
||||
)
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
||||
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
|
||||
starter = indirectStarter(
|
||||
url,
|
||||
compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
||||
compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
||||
|
||||
|
||||
class Freefall(_BasicScraper):
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import bounceStarter
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class HagarTheHorrible(_BasicScraper):
|
||||
|
@ -28,15 +33,20 @@ class HagarTheHorrible(_BasicScraper):
|
|||
return starturl
|
||||
|
||||
|
||||
class HappyJar(_WordPressScraper):
|
||||
url = 'http://www.happyjar.com/'
|
||||
|
||||
|
||||
class HarkAVagrant(_BasicScraper):
|
||||
url = 'http://www.harkavagrant.com/'
|
||||
rurl = escape(url)
|
||||
starter = bounceStarter(url,
|
||||
compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
||||
starter = bounceStarter(
|
||||
url, compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
||||
tagre("img", "src", "buttonnext.png")))
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, after='BORDER'))
|
||||
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
|
||||
after='BORDER'))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
|
||||
tagre("img", "src", "buttonprevious.png"))
|
||||
help = 'Index format: number'
|
||||
|
@ -48,10 +58,16 @@ class HarkAVagrant(_BasicScraper):
|
|||
return '%s-%s' % (num, filename)
|
||||
|
||||
|
||||
class Hipsters(_WordPressScraper):
|
||||
url = 'http://www.hipsters-comic.com/'
|
||||
firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/'
|
||||
|
||||
|
||||
class HorribleVille(_BasicScraper):
|
||||
url = 'http://horribleville.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
firstStripUrl = stripUrl % '20051220'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') +
|
||||
tagre("img", "src", r'/images/previous\.png'))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from re import compile, escape
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class IAmArg(_BasicScraper):
|
||||
|
@ -27,6 +30,10 @@ class ICanBarelyDraw(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class IDreamOfAJeanieBottle(_WordPressScraper):
|
||||
url = 'http://jeaniebottle.com/'
|
||||
|
||||
|
||||
class InternetWebcomic(_BasicScraper):
|
||||
url = 'http://www.internet-webcomic.com/'
|
||||
rurl = escape(url)
|
||||
|
@ -44,3 +51,7 @@ class IrregularWebcomic(_BasicScraper):
|
|||
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
|
||||
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class ItsWalky(_WordPressScraper):
|
||||
url = 'http://www.itswalky.com/'
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from re import compile, escape, IGNORECASE
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class MacHall(_BasicScraper):
|
||||
|
@ -95,6 +97,10 @@ class MaxOveracts(_ParserScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Meek(_WordPressScraper):
|
||||
url = 'http://www.meekcomic.com/'
|
||||
|
||||
|
||||
class MegaTokyo(_BasicScraper):
|
||||
url = 'http://megatokyo.com/'
|
||||
stripUrl = url + 'strip/%s'
|
||||
|
@ -104,6 +110,14 @@ class MegaTokyo(_BasicScraper):
|
|||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
class Meiosis(_WordPressScraper):
|
||||
url = 'http://meiosiswebcomic.com/'
|
||||
|
||||
|
||||
class Melonpool(_WordPressScraper):
|
||||
url = 'http://www.melonpool.com/'
|
||||
|
||||
|
||||
class MenageA3(_BasicScraper):
|
||||
adult = True
|
||||
url = 'http://www.ma3comic.com/'
|
||||
|
@ -124,6 +138,12 @@ class Misfile(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class MistyTheMouse(_WordPressScraper):
|
||||
url = 'http://www.mistythemouse.com/'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
firstStripUrl = 'http://www.mistythemouse.com/?p=12'
|
||||
|
||||
|
||||
class MonsieurLeChien(_BasicScraper):
|
||||
url = 'http://www.monsieur-le-chien.fr/'
|
||||
stripUrl = url + 'index.php?planche=%s'
|
||||
|
|
|
@ -8,6 +8,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class Namesake(_BasicScraper):
|
||||
|
@ -30,7 +31,7 @@ class NamirDeiter(_BasicScraper):
|
|||
stripUrl = url + 'comics/index.php?date=%s'
|
||||
firstStripUrl = stripUrl % '19991128'
|
||||
imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote=""))
|
||||
prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'")+"Previous")
|
||||
prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'") + "Previous")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
@ -50,6 +51,11 @@ class NatalieDee(_BasicScraper):
|
|||
return '%s-%s' % (date, filename)
|
||||
|
||||
|
||||
class Nedroid(_WordPressScraper):
|
||||
url = 'http://nedroid.com/'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
||||
|
||||
class NekkoAndJoruba(_BasicScraper):
|
||||
url = 'http://www.nekkoandjoruba.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -76,6 +82,11 @@ class NeoEarth(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class NerfNow(_WordPressScraper):
|
||||
url = 'https://www.nerfnow.com/'
|
||||
prevSearch = '//li[@id="nav_previous"]/a'
|
||||
|
||||
|
||||
class NewAdventuresOfBobbin(_BasicScraper):
|
||||
url = 'http://www.bobbin-comic.com/bobbin_strips/'
|
||||
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
|
||||
|
@ -103,6 +114,11 @@ class NichtLustig(_BasicScraper):
|
|||
url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
|
||||
|
||||
|
||||
class Nicky510(_WordPressScraper):
|
||||
url = 'http://www.nickyitis.com/'
|
||||
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
|
||||
|
||||
|
||||
class Nimona(_BasicScraper):
|
||||
url = 'http://gingerhaze.com/nimona/'
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -146,7 +162,8 @@ class NotInventedHere(_BasicScraper):
|
|||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'on/2009-9-21'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://thiswas.notinventedhe.re/on/\d+-\d+-\d+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous')
|
||||
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)') +
|
||||
'\s*Previous')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from re import compile, escape
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class NineteenSeventySeven(_WordPressScraper):
|
||||
name = '1977'
|
||||
url = 'http://1977thecomic.com/'
|
||||
|
|
|
@ -4,10 +4,13 @@
|
|||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class OctopusPie(_ParserScraper):
|
||||
|
@ -26,7 +29,8 @@ class OddFish(_BasicScraper):
|
|||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'tv-tentacles'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after="navi-prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
|
@ -65,7 +69,8 @@ class OkCancel(_BasicScraper):
|
|||
stripUrl = url + 'comic/%s.html'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
|
||||
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
|
||||
prevSearch = compile(tagre("div", "class", "previous") +
|
||||
tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
|
||||
starter = indirectStarter(url, prevSearch)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
@ -85,10 +90,16 @@ class OneQuestion(_BasicScraper):
|
|||
stripUrl = url + 'comic.php?strip_id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
|
||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') +
|
||||
tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class OnTheEdge(_WordPressScraper):
|
||||
url = 'http://ontheedgecomics.com/'
|
||||
firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'
|
||||
|
||||
|
||||
class OnTheFastrack(_BasicScraper):
|
||||
url = 'http://onthefastrack.com/'
|
||||
stripUrl = url + 'comics/%s'
|
||||
|
|
|
@ -8,6 +8,12 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class PandyLand(_WordPressScraper):
|
||||
url = 'http://pandyland.net/'
|
||||
firstStripUrl = 'http://pandyland.net/1/'
|
||||
|
||||
|
||||
class ParadigmShift(_BasicScraper):
|
||||
|
|
|
@ -10,6 +10,7 @@ from datetime import datetime
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter
|
||||
from ..util import tagre, getPageContent
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class SabrinaOnline(_BasicScraper):
|
||||
|
@ -42,6 +43,10 @@ class SafelyEndangered(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/stripname'
|
||||
|
||||
|
||||
class SailorsunOrg(_WordPressScraper):
|
||||
url = 'http://sailorsun.org/'
|
||||
|
||||
|
||||
class SamAndFuzzy(_BasicScraper):
|
||||
url = 'http://www.samandfuzzy.com/'
|
||||
stripUrl = 'http://samandfuzzy.com/%s'
|
||||
|
@ -166,6 +171,11 @@ class SexyLosers(_BasicScraper):
|
|||
return index + '-' + title
|
||||
|
||||
|
||||
class Sharksplode(_WordPressScraper):
|
||||
url = 'http://sharksplode.com/'
|
||||
textSearch = '//div[@id="comic"]//img/@alt'
|
||||
|
||||
|
||||
class Sheldon(_BasicScraper):
|
||||
url = 'http://www.sheldoncomics.com/'
|
||||
rurl = escape(url)
|
||||
|
@ -237,6 +247,10 @@ class SinFest(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class Sithrah(_WordPressScraper):
|
||||
url = 'http://sithrah.com/'
|
||||
|
||||
|
||||
class SkinDeep(_BasicScraper):
|
||||
url = 'http://www.skindeepcomic.com/'
|
||||
stripUrl = url + 'archive/%s/'
|
||||
|
@ -271,6 +285,10 @@ class SleeplessDomain(_ParserScraper):
|
|||
return start + "-" + pageUrl.rsplit('/', 1)[-1]
|
||||
|
||||
|
||||
class SlightlyDamned(_WordPressScraper):
|
||||
url = 'http://www.sdamned.com/'
|
||||
|
||||
|
||||
class SluggyFreelance(_BasicScraper):
|
||||
url = 'http://www.sluggy.com/'
|
||||
stripUrl = url + 'comics/archives/daily/%s'
|
||||
|
@ -445,6 +463,10 @@ class SpareParts(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class SPQRBlues(_WordPressScraper):
|
||||
url = 'http://spqrblues.com/IV/'
|
||||
|
||||
|
||||
class StandStillStaySilent(_ParserScraper):
|
||||
url = 'http://www.sssscomic.com/comic.php'
|
||||
rurl = escape(url)
|
||||
|
|
|
@ -8,6 +8,7 @@ from re import compile, escape, IGNORECASE
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class TheBrads(_BasicScraper):
|
||||
|
@ -30,6 +31,10 @@ class TheDevilsPanties(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class TheDreamlandChronicles(_WordPressScraper):
|
||||
url = 'http://www.thedreamlandchronicles.com/'
|
||||
|
||||
|
||||
class TheGamerCat(_ParserScraper):
|
||||
url = "http://www.thegamercat.com/"
|
||||
stripUrl = url + "comic/%s/"
|
||||
|
@ -40,6 +45,10 @@ class TheGamerCat(_ParserScraper):
|
|||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class TheGentlemansArmchair(_WordPressScraper):
|
||||
url = 'http://thegentlemansarmchair.com/'
|
||||
|
||||
|
||||
class TheLandscaper(_BasicScraper):
|
||||
url = 'http://landscaper.visual-assault.net/comic/latest'
|
||||
rurl = escape(url)
|
||||
|
@ -52,6 +61,10 @@ class TheLandscaper(_BasicScraper):
|
|||
help = 'Index format: name'
|
||||
|
||||
|
||||
class TheMelvinChronicles(_WordPressScraper):
|
||||
url = 'http://melvin.jeaniebottle.com/'
|
||||
|
||||
|
||||
class TheNoob(_BasicScraper):
|
||||
url = 'http://www.thenoobcomic.com/index.php'
|
||||
stripUrl = url + '?pos=%s'
|
||||
|
|
|
@ -1,79 +1,19 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from dosagelib.helpers import indirectStarter
|
||||
from ..scraper import make_scraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..scraper import make_scraper
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class _WordpressScraper(_ParserScraper):
|
||||
imageSearch = ('//div[@id="comic"]//img',
|
||||
'//div[@class="webcomic-image"]//img')
|
||||
prevSearch = ("//a[contains(concat(' ', text(), ' '), ' Prev ')]",
|
||||
"//a[contains(concat(' ', text(), ' '), ' Previous ')]",
|
||||
"//a[contains(concat(' ', @class, ' '), ' navi-prev ')]",
|
||||
"//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]",
|
||||
"//a[contains(concat(' ', @class, ' '), ' navi-previous ')]",
|
||||
"//a[contains(concat(' ', @class, ' '), ' previous-webcomic-link ')]")
|
||||
|
||||
|
||||
def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None):
|
||||
def add(name, url, starter=None):
|
||||
attrs = dict(
|
||||
name=name,
|
||||
url=url
|
||||
)
|
||||
if lang:
|
||||
attrs['lang'] = lang
|
||||
if firstUrl:
|
||||
attrs['firstUrl'] = url + firstUrl
|
||||
if starter:
|
||||
attrs['starter'] = starter
|
||||
if textSearch:
|
||||
attrs['textSearch'] = textSearch
|
||||
globals()[name] = make_scraper(name, _WordpressScraper, **attrs)
|
||||
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
|
||||
|
||||
|
||||
class Amya(_WordpressScraper):
|
||||
url = 'http://www.amyachronicles.com/'
|
||||
|
||||
|
||||
add('1997', 'http://1977thecomic.com/')
|
||||
add('Alice', 'http://www.alicecomics.com/',
|
||||
starter=indirectStarter('http://www.alicecomics.com/', '//a[text()="Latest Alice!"]'))
|
||||
add('AxeCop', 'http://axecop.com/comic/season-two/')
|
||||
add('Bardsworth', 'http://www.bardsworth.com/')
|
||||
add('BloodBound', 'http://bloodboundcomic.com/', 'comic/06112006/')
|
||||
add('BratHalla', 'http://brat-halla.com/')
|
||||
add('BroodHollow', 'http://broodhollow.chainsawsuit.com/', 'page/2012/10/06/book-1-curious-little-thing')
|
||||
add('Buni', 'http://www.bunicomic.com/')
|
||||
add('BusinessCat', 'http://www.businesscat.happyjar.com/')
|
||||
add('Catena', 'http://catenamanor.com/')
|
||||
add('CatsAndCameras', 'http://catsncameras.com/')
|
||||
add('CraftedFables', 'http://www.caf-fiends.net/comicpress/')
|
||||
add('CourtingDisaster', 'http://www.courting-disaster.com/', 'comic/courting-disaster-17/')
|
||||
add('CowboyJedi', 'http://www.cowboyjedi.com/')
|
||||
add('FowlLanguage', 'http://www.fowllanguagecomics.com/')
|
||||
add('HappyJar', 'http://www.happyjar.com/')
|
||||
add('Hipsters', 'http://www.hipsters-comic.com/', 'comic/hip01/')
|
||||
add('IDreamOfAJeanieBottle', 'http://jeaniebottle.com/')
|
||||
add('ItsWalky', 'http://www.itswalky.com/')
|
||||
add('KatzenfutterGeleespritzer', 'http://www.katzenfuttergeleespritzer.de/', 'comics/gert-grendil/', lang='de')
|
||||
add('Meek', 'http://www.meekcomic.com/')
|
||||
add('Meiosis', 'http://meiosiswebcomic.com/')
|
||||
add('Melonpool', 'http://www.melonpool.com/')
|
||||
add('MistyTheMouse', 'http://www.mistythemouse.com/')
|
||||
add('Nedroid', 'http://nedroid.com/')
|
||||
add('NerfNow', 'https://www.nerfnow.com/')
|
||||
add('Nicky510', 'http://www.nickyitis.com/')
|
||||
add('OnTheEdge', 'http://ontheedgecomics.com/', 'comic/ote0001/')
|
||||
add('PandyLand', 'http://pandyland.net/', '1/')
|
||||
add('SailorsunOrg', 'http://sailorsun.org/')
|
||||
add('Sharksplode', 'http://sharksplode.com/', textSearch='//div[@id="comic"]//img/@alt')
|
||||
add('Sithrah', 'http://sithrah.com/')
|
||||
add('SlightlyDamned', 'http://www.sdamned.com/')
|
||||
add('SPQRBlues', 'http://spqrblues.com/IV/')
|
||||
add('TheDreamlandChronicles', 'http://www.thedreamlandchronicles.com/')
|
||||
add('TheGentlemansArmchair', 'http://thegentlemansarmchair.com/')
|
||||
add('TheMelvinChronicles', 'http://melvin.jeaniebottle.com/')
|
||||
add('YAFGC', 'http://yafgc.net/')
|
||||
|
||||
# all comics on HijiNKS ENSUE
|
||||
for (name, starterXPath) in [
|
||||
('HijinksEnsue', '//h4[text()="Read The Latest HijiNKS ENSUE"]/..//a'),
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class YAFGC(_WordPressScraper):
|
||||
url = 'http://yafgc.net/'
|
||||
|
|
Loading…
Reference in a new issue