Remove make_scraper for most WordPress comics.

- Dropped KatzenfutterGeleespritzer, because robots.txt.
- Move all WordPress/ComicPress scrapers into alphabetical files.
- Move _WordPressScraper & _ComicPress scraper into common.py.
- Some smaller PEP8 fixes.
This commit is contained in:
Tobias Gruetzmacher 2016-04-02 00:14:31 +02:00
parent a7b6599cd4
commit bb1f20d867
17 changed files with 268 additions and 103 deletions

View file

@ -8,18 +8,22 @@ from re import compile, escape, MULTILINE
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter
from .common import _WordPressScraper, _ComicPressScraper
class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/'
rurl = escape(url)
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
starter = bounceStarter(
url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »"))
stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src',
r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous')
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next &raquo;')
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) +
r'&laquo; Previous')
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) +
r'Next &raquo;')
help = 'Index format: n (unpadded)'
textSearch = compile(tagre("img", "title", r'([^"]+)'))
@ -122,7 +126,7 @@ class AirForceBlues(_BasicScraper):
class ALessonIsLearned(_BasicScraper):
url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
quote="'")+r"[^>]+previous")
quote="'") + r"[^>]+previous")
starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
@ -130,6 +134,12 @@ class ALessonIsLearned(_BasicScraper):
help = 'Index format: nnn'
class Alice(_ComicPressScraper):
url = 'http://www.alicecomics.com/'
starter = indirectStarter('http://www.alicecomics.com/',
'//a[text()="Latest Alice!"]')
class AlienLovesPredator(_BasicScraper):
url = 'http://alienlovespredator.com/'
stripUrl = url + '%s/'
@ -220,12 +230,17 @@ class AmazingSuperPowers(_BasicScraper):
)
class Amya(_WordPressScraper):
url = 'http://www.amyachronicles.com/'
class Angband(_BasicScraper):
url = 'http://angband.calamarain.net/'
stripUrl = url + 'view.php?date=%s'
firstStripUrl = stripUrl % '2005-12-30'
imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous")
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') +
"Previous")
help = 'Index format: yyyy-mm-dd'
@ -233,7 +248,7 @@ class Angels2200(_BasicScraper):
url = 'http://www.janahoffmann.com/angels/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+"&laquo; Previous")
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&laquo; Previous")
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
@ -290,7 +305,8 @@ class ASkeweredParadise(_BasicScraper):
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
prevSearch = compile(tagre("a", "href", "(/comic/\d+)") +
r"[^>]+Previous")
help = 'Index format: nnn'
@ -306,8 +322,8 @@ class ASofterWorld(_ParserScraper):
class AstronomyPOTD(_BasicScraper):
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
url = baseUrl + 'astropix.html'
starter = bounceStarter(url,
compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
starter = bounceStarter(
url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
@ -328,3 +344,7 @@ class AstronomyPOTD(_BasicScraper):
def namer(cls, imageUrl, pageUrl):
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
imageUrl.split('/')[-1].split('.')[0])
class AxeCop(_WordPressScraper):
url = 'http://axecop.com/comic/season-two/'

View file

@ -4,11 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..util import tagre, getPageContent
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import _WordPressScraper, _ComicPressScraper
class BackwaterPlanet(_BasicScraper):
@ -38,6 +40,12 @@ class BadMachinery(_BasicScraper):
help = 'Index format: yyyymmdd'
class Bardsworth(_WordPressScraper):
url = 'http://www.bardsworth.com/'
starter = indirectStarter('http://www.bardsworth.com/',
'//a[@rel="bookmark"]')
class Baroquen(_BasicScraper):
url = 'http://www.baroquencomics.com/'
rurl = escape(url)
@ -162,6 +170,11 @@ class Blip(_BasicScraper):
return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
class BloodBound(_WordPressScraper):
url = 'http://bloodboundcomic.com/'
firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'
class BloomingFaeries(_BasicScraper):
adult = True
url = 'http://www.bloomingfaeries.com/'
@ -255,6 +268,10 @@ class BoyOnAStickAndSlither(_BasicScraper):
return pageUrl.rsplit('/')[-1]
class BratHalla(_WordPressScraper):
url = 'http://brat-halla.com/'
class BrentalFloss(_BasicScraper):
url = 'http://brentalflossthecomic.com/'
stripUrl = url + '?id=%s'
@ -313,6 +330,19 @@ class Brink(_BasicScraper):
help = 'Index format: number'
class BroodHollow(_WordPressScraper):
url = 'http://broodhollow.chainsawsuit.com/'
firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'
class Buni(_WordPressScraper):
url = 'http://www.bunicomic.com/'
class BusinessCat(_ComicPressScraper):
url = 'http://www.businesscat.happyjar.com/'
class ButtercupFestival(_ParserScraper):
url = 'http://www.buttercupfestival.com/'
stripUrl = url + '%s.htm'

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .wordpress import _WordpressScraper
from .common import _WordPressScraper, _ComicPressScraper
class Caggage(_BasicScraper):
@ -19,7 +19,8 @@ class Caggage(_BasicScraper):
stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '77'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl,
after="prev"))
help = 'Index format: number'
@ -102,7 +103,7 @@ class CatAndGirl(_BasicScraper):
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '1602'
imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous</a>")
prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous</a>")
help = 'Index format: n (unpadded)'
def shouldSkipUrl(self, url, data):
@ -112,11 +113,19 @@ class CatAndGirl(_BasicScraper):
)
class CatNine(_WordpressScraper):
class Catena(_WordPressScraper):
url = 'http://catenamanor.com/'
class CatNine(_WordPressScraper):
url = 'http://cat-nine.net'
firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/'
class CatsAndCameras(_WordPressScraper):
url = 'http://catsncameras.com/'
class CatVersusHuman(_ParserScraper):
url = 'http://www.catversushuman.com'
multipleImagesPerStrip = True
@ -277,6 +286,19 @@ class CorydonCafe(_ParserScraper):
return pageUrl.split('/')[-1].split('.')[0]
class CourtingDisaster(_WordPressScraper):
url = 'http://www.courting-disaster.com/'
firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/'
class CowboyJedi(_WordPressScraper):
url = 'http://www.cowboyjedi.com/'
class CraftedFables(_ComicPressScraper):
url = 'http://www.caf-fiends.net/comicpress/'
class CrapIDrewOnMyLunchBreak(_BasicScraper):
url = 'http://crap.jinwicked.com/'
stripUrl = url + '%s/'

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from ..scraper import _ParserScraper
# Common base classes for comics with the same structure (same hosting
# software, for example) go here. Since those are shared by many modules,
# please don't use lists of expression, as that makes it hard to track which
# expression is for which comics.
class _WordPressScraper(_ParserScraper):
imageSearch = '//div[@id="comic"]//img'
prevSearch = "//a[contains(concat(' ', @class, ' '), ' comic-nav-previous ')]"
class _ComicPressScraper(_WordPressScraper):
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]"

View file

@ -9,6 +9,7 @@ from re import compile, escape, IGNORECASE
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import _WordPressScraper
class FalconTwin(_BasicScraper):
@ -116,7 +117,7 @@ class FonFlatter(_BasicScraper):
self.stripUrl % "2006/09/21/danke",
self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
self.stripUrl % "2005/10/19/naq-never-asked-questions",
)
)
class ForLackOfABetterComic(_BasicScraper):
@ -129,6 +130,10 @@ class ForLackOfABetterComic(_BasicScraper):
help = 'Index format: number'
class FowlLanguage(_WordPressScraper):
url = 'http://www.fowllanguagecomics.com/'
class Fragile(_ParserScraper):
url = 'http://www.fragilestory.com/'
imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img'
@ -153,10 +158,10 @@ class FredoAndPidjin(_BasicScraper):
compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/old/[^"]+\.[a-z]+)')),
)
multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
starter = indirectStarter(
url,
compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
class Freefall(_BasicScraper):

View file

@ -1,10 +1,15 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import bounceStarter
from .common import _WordPressScraper
class HagarTheHorrible(_BasicScraper):
@ -28,17 +33,22 @@ class HagarTheHorrible(_BasicScraper):
return starturl
class HappyJar(_WordPressScraper):
url = 'http://www.happyjar.com/'
class HarkAVagrant(_BasicScraper):
url = 'http://www.harkavagrant.com/'
rurl = escape(url)
starter = bounceStarter(url,
compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonnext.png")))
starter = bounceStarter(
url, compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonnext.png")))
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, after='BORDER'))
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
after='BORDER'))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonprevious.png"))
tagre("img", "src", "buttonprevious.png"))
help = 'Index format: number'
@classmethod
@ -48,10 +58,16 @@ class HarkAVagrant(_BasicScraper):
return '%s-%s' % (num, filename)
class Hipsters(_WordPressScraper):
url = 'http://www.hipsters-comic.com/'
firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/'
class HorribleVille(_BasicScraper):
url = 'http://horribleville.com/'
stripUrl = url + 'd/%s.html'
firstStripUrl = stripUrl % '20051220'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') +
tagre("img", "src", r'/images/previous\.png'))
help = 'Index format: yyyymmdd'

View file

@ -1,10 +1,13 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
from .common import _WordPressScraper
class IAmArg(_BasicScraper):
@ -27,6 +30,10 @@ class ICanBarelyDraw(_BasicScraper):
help = 'Index format: number'
class IDreamOfAJeanieBottle(_WordPressScraper):
url = 'http://jeaniebottle.com/'
class InternetWebcomic(_BasicScraper):
url = 'http://www.internet-webcomic.com/'
rurl = escape(url)
@ -44,3 +51,7 @@ class IrregularWebcomic(_BasicScraper):
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
help = 'Index format: nnn'
class ItsWalky(_WordPressScraper):
url = 'http://www.itswalky.com/'

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre

View file

@ -4,10 +4,12 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from .common import _WordPressScraper
class MacHall(_BasicScraper):
@ -95,6 +97,10 @@ class MaxOveracts(_ParserScraper):
help = 'Index format: nnn'
class Meek(_WordPressScraper):
url = 'http://www.meekcomic.com/'
class MegaTokyo(_BasicScraper):
url = 'http://megatokyo.com/'
stripUrl = url + 'strip/%s'
@ -104,6 +110,14 @@ class MegaTokyo(_BasicScraper):
help = 'Index format: nnnn'
class Meiosis(_WordPressScraper):
url = 'http://meiosiswebcomic.com/'
class Melonpool(_WordPressScraper):
url = 'http://www.melonpool.com/'
class MenageA3(_BasicScraper):
adult = True
url = 'http://www.ma3comic.com/'
@ -124,6 +138,12 @@ class Misfile(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class MistyTheMouse(_WordPressScraper):
url = 'http://www.mistythemouse.com/'
prevSearch = '//a[@rel="prev"]'
firstStripUrl = 'http://www.mistythemouse.com/?p=12'
class MonsieurLeChien(_BasicScraper):
url = 'http://www.monsieur-le-chien.fr/'
stripUrl = url + 'index.php?planche=%s'

View file

@ -8,6 +8,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper
class Namesake(_BasicScraper):
@ -30,7 +31,7 @@ class NamirDeiter(_BasicScraper):
stripUrl = url + 'comics/index.php?date=%s'
firstStripUrl = stripUrl % '19991128'
imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'")+"Previous")
prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'") + "Previous")
help = 'Index format: yyyymmdd'
@ -50,6 +51,11 @@ class NatalieDee(_BasicScraper):
return '%s-%s' % (date, filename)
class Nedroid(_WordPressScraper):
url = 'http://nedroid.com/'
prevSearch = '//a[@rel="prev"]'
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
@ -76,6 +82,11 @@ class NeoEarth(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class NerfNow(_WordPressScraper):
url = 'https://www.nerfnow.com/'
prevSearch = '//li[@id="nav_previous"]/a'
class NewAdventuresOfBobbin(_BasicScraper):
url = 'http://www.bobbin-comic.com/bobbin_strips/'
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
@ -103,6 +114,11 @@ class NichtLustig(_BasicScraper):
url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
class Nicky510(_WordPressScraper):
url = 'http://www.nickyitis.com/'
prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]"
class Nimona(_BasicScraper):
url = 'http://gingerhaze.com/nimona/'
stripUrl = url + '%s/'
@ -146,7 +162,8 @@ class NotInventedHere(_BasicScraper):
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'on/2009-9-21'
imageSearch = compile(tagre("img", "src", r'(http://thiswas.notinventedhe.re/on/\d+-\d+-\d+)'))
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous')
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)') +
'\s*Previous')
help = 'Index format: yyyy-mm-dd'

View file

@ -1,8 +1,13 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from re import compile, escape
from __future__ import absolute_import, division, print_function
from ..util import tagre
from ..scraper import _BasicScraper
from .common import _WordPressScraper
class NineteenSeventySeven(_WordPressScraper):
name = '1977'
url = 'http://1977thecomic.com/'

View file

@ -4,10 +4,13 @@
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper
class OctopusPie(_ParserScraper):
@ -26,7 +29,8 @@ class OddFish(_BasicScraper):
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'tv-tentacles'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: stripname'
@ -65,7 +69,8 @@ class OkCancel(_BasicScraper):
stripUrl = url + 'comic/%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
prevSearch = compile(tagre("div", "class", "previous") +
tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd'
@ -85,10 +90,16 @@ class OneQuestion(_BasicScraper):
stripUrl = url + 'comic.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') +
tagre("img", "src", r'img/arrow_prev\.jpg'))
help = 'Index format: n (unpadded)'
class OnTheEdge(_WordPressScraper):
url = 'http://ontheedgecomics.com/'
firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'
class OnTheFastrack(_BasicScraper):
url = 'http://onthefastrack.com/'
stripUrl = url + 'comics/%s'

View file

@ -8,6 +8,12 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre
from .common import _WordPressScraper
class PandyLand(_WordPressScraper):
url = 'http://pandyland.net/'
firstStripUrl = 'http://pandyland.net/1/'
class ParadigmShift(_BasicScraper):

View file

@ -10,6 +10,7 @@ from datetime import datetime
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre, getPageContent
from .common import _WordPressScraper
class SabrinaOnline(_BasicScraper):
@ -42,6 +43,10 @@ class SafelyEndangered(_BasicScraper):
help = 'Index format: yyyy/mm/stripname'
class SailorsunOrg(_WordPressScraper):
url = 'http://sailorsun.org/'
class SamAndFuzzy(_BasicScraper):
url = 'http://www.samandfuzzy.com/'
stripUrl = 'http://samandfuzzy.com/%s'
@ -166,6 +171,11 @@ class SexyLosers(_BasicScraper):
return index + '-' + title
class Sharksplode(_WordPressScraper):
url = 'http://sharksplode.com/'
textSearch = '//div[@id="comic"]//img/@alt'
class Sheldon(_BasicScraper):
url = 'http://www.sheldoncomics.com/'
rurl = escape(url)
@ -237,6 +247,10 @@ class SinFest(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class Sithrah(_WordPressScraper):
url = 'http://sithrah.com/'
class SkinDeep(_BasicScraper):
url = 'http://www.skindeepcomic.com/'
stripUrl = url + 'archive/%s/'
@ -271,6 +285,10 @@ class SleeplessDomain(_ParserScraper):
return start + "-" + pageUrl.rsplit('/', 1)[-1]
class SlightlyDamned(_WordPressScraper):
url = 'http://www.sdamned.com/'
class SluggyFreelance(_BasicScraper):
url = 'http://www.sluggy.com/'
stripUrl = url + 'comics/archives/daily/%s'
@ -445,6 +463,10 @@ class SpareParts(_BasicScraper):
help = 'Index format: yyyymmdd'
class SPQRBlues(_WordPressScraper):
url = 'http://spqrblues.com/IV/'
class StandStillStaySilent(_ParserScraper):
url = 'http://www.sssscomic.com/comic.php'
rurl = escape(url)

View file

@ -8,6 +8,7 @@ from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper
class TheBrads(_BasicScraper):
@ -30,6 +31,10 @@ class TheDevilsPanties(_BasicScraper):
help = 'Index format: number'
class TheDreamlandChronicles(_WordPressScraper):
url = 'http://www.thedreamlandchronicles.com/'
class TheGamerCat(_ParserScraper):
url = "http://www.thegamercat.com/"
stripUrl = url + "comic/%s/"
@ -40,6 +45,10 @@ class TheGamerCat(_ParserScraper):
help = 'Index format: stripname'
class TheGentlemansArmchair(_WordPressScraper):
url = 'http://thegentlemansarmchair.com/'
class TheLandscaper(_BasicScraper):
url = 'http://landscaper.visual-assault.net/comic/latest'
rurl = escape(url)
@ -52,6 +61,10 @@ class TheLandscaper(_BasicScraper):
help = 'Index format: name'
class TheMelvinChronicles(_WordPressScraper):
url = 'http://melvin.jeaniebottle.com/'
class TheNoob(_BasicScraper):
url = 'http://www.thenoobcomic.com/index.php'
stripUrl = url + '?pos=%s'

View file

@ -1,79 +1,19 @@
# -*- coding: utf-8 -*-
from dosagelib.helpers import indirectStarter
from ..scraper import make_scraper, _ParserScraper
from ..helpers import indirectStarter
from ..scraper import make_scraper
from .common import _WordPressScraper
class _WordpressScraper(_ParserScraper):
imageSearch = ('//div[@id="comic"]//img',
'//div[@class="webcomic-image"]//img')
prevSearch = ("//a[contains(concat(' ', text(), ' '), ' Prev ')]",
"//a[contains(concat(' ', text(), ' '), ' Previous ')]",
"//a[contains(concat(' ', @class, ' '), ' navi-prev ')]",
"//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]",
"//a[contains(concat(' ', @class, ' '), ' navi-previous ')]",
"//a[contains(concat(' ', @class, ' '), ' previous-webcomic-link ')]")
def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None):
def add(name, url, starter=None):
attrs = dict(
name=name,
url=url
)
if lang:
attrs['lang'] = lang
if firstUrl:
attrs['firstUrl'] = url + firstUrl
if starter:
attrs['starter'] = starter
if textSearch:
attrs['textSearch'] = textSearch
globals()[name] = make_scraper(name, _WordpressScraper, **attrs)
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
class Amya(_WordpressScraper):
url = 'http://www.amyachronicles.com/'
add('1997', 'http://1977thecomic.com/')
add('Alice', 'http://www.alicecomics.com/',
starter=indirectStarter('http://www.alicecomics.com/', '//a[text()="Latest Alice!"]'))
add('AxeCop', 'http://axecop.com/comic/season-two/')
add('Bardsworth', 'http://www.bardsworth.com/')
add('BloodBound', 'http://bloodboundcomic.com/', 'comic/06112006/')
add('BratHalla', 'http://brat-halla.com/')
add('BroodHollow', 'http://broodhollow.chainsawsuit.com/', 'page/2012/10/06/book-1-curious-little-thing')
add('Buni', 'http://www.bunicomic.com/')
add('BusinessCat', 'http://www.businesscat.happyjar.com/')
add('Catena', 'http://catenamanor.com/')
add('CatsAndCameras', 'http://catsncameras.com/')
add('CraftedFables', 'http://www.caf-fiends.net/comicpress/')
add('CourtingDisaster', 'http://www.courting-disaster.com/', 'comic/courting-disaster-17/')
add('CowboyJedi', 'http://www.cowboyjedi.com/')
add('FowlLanguage', 'http://www.fowllanguagecomics.com/')
add('HappyJar', 'http://www.happyjar.com/')
add('Hipsters', 'http://www.hipsters-comic.com/', 'comic/hip01/')
add('IDreamOfAJeanieBottle', 'http://jeaniebottle.com/')
add('ItsWalky', 'http://www.itswalky.com/')
add('KatzenfutterGeleespritzer', 'http://www.katzenfuttergeleespritzer.de/', 'comics/gert-grendil/', lang='de')
add('Meek', 'http://www.meekcomic.com/')
add('Meiosis', 'http://meiosiswebcomic.com/')
add('Melonpool', 'http://www.melonpool.com/')
add('MistyTheMouse', 'http://www.mistythemouse.com/')
add('Nedroid', 'http://nedroid.com/')
add('NerfNow', 'https://www.nerfnow.com/')
add('Nicky510', 'http://www.nickyitis.com/')
add('OnTheEdge', 'http://ontheedgecomics.com/', 'comic/ote0001/')
add('PandyLand', 'http://pandyland.net/', '1/')
add('SailorsunOrg', 'http://sailorsun.org/')
add('Sharksplode', 'http://sharksplode.com/', textSearch='//div[@id="comic"]//img/@alt')
add('Sithrah', 'http://sithrah.com/')
add('SlightlyDamned', 'http://www.sdamned.com/')
add('SPQRBlues', 'http://spqrblues.com/IV/')
add('TheDreamlandChronicles', 'http://www.thedreamlandchronicles.com/')
add('TheGentlemansArmchair', 'http://thegentlemansarmchair.com/')
add('TheMelvinChronicles', 'http://melvin.jeaniebottle.com/')
add('YAFGC', 'http://yafgc.net/')
# all comics on HijiNKS ENSUE
for (name, starterXPath) in [
('HijinksEnsue', '//h4[text()="Read The Latest HijiNKS ENSUE"]/..//a'),

View file

@ -1,7 +1,11 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
from __future__ import absolute_import, division, print_function
from .common import _WordPressScraper
class YAFGC(_WordPressScraper):
url = 'http://yafgc.net/'