2016-04-21 19:28:41 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2013-03-11 20:50:49 +00:00
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2014-01-05 15:50:57 +00:00
|
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
2016-04-21 19:28:41 +00:00
|
|
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
|
|
|
|
|
|
|
from __future__ import absolute_import, division, print_function
|
2013-03-11 20:50:49 +00:00
|
|
|
|
|
|
|
from re import compile
|
|
|
|
from ..scraper import make_scraper
|
|
|
|
from ..util import tagre
|
|
|
|
|
|
|
|
|
|
|
|
_imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
|
|
|
_prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') +
|
2016-05-15 23:05:18 +00:00
|
|
|
'(?:Previous comic' + '|' +
|
|
|
|
tagre("img", "alt", "Previous comic") + '|' +
|
|
|
|
tagre("img", "src", "images/back\.gif") +
|
|
|
|
')')
|
|
|
|
|
2013-03-11 20:50:49 +00:00
|
|
|
|
|
|
|
def add(name, url):
|
|
|
|
classname = 'ComicGenesis_%s' % name
|
|
|
|
if '/d/' in url:
|
|
|
|
stripUrl = url.split('/d/')[0] + '/d/%s.html'
|
|
|
|
else:
|
|
|
|
stripUrl = url + 'd/%s.html'
|
|
|
|
|
2016-04-21 19:28:41 +00:00
|
|
|
def _prevUrlModifier(self, prev_url):
|
|
|
|
if prev_url:
|
2016-05-15 23:05:18 +00:00
|
|
|
return prev_url.replace(
|
|
|
|
"keenspace.com", "comicgenesis.com").replace(
|
|
|
|
"keenspot.com", "comicgenesis.com").replace(
|
|
|
|
"toonspace.com", "comicgenesis.com").replace(
|
|
|
|
"comicgen.com", "comicgenesis.com")
|
2013-03-11 20:50:49 +00:00
|
|
|
|
2016-05-15 23:05:18 +00:00
|
|
|
globals()[classname] = make_scraper(
|
|
|
|
classname,
|
2013-03-11 20:50:49 +00:00
|
|
|
name='ComicGenesis/' + name,
|
|
|
|
url=url,
|
|
|
|
stripUrl=stripUrl,
|
2016-05-15 23:05:18 +00:00
|
|
|
imageSearch=_imageSearch,
|
|
|
|
prevSearch=_prevSearch,
|
|
|
|
prevUrlModifier=_prevUrlModifier,
|
|
|
|
multipleImagesPerStrip=True,
|
|
|
|
help='Index format: yyyymmdd',
|
2013-03-11 20:50:49 +00:00
|
|
|
)
|
|
|
|
|
2013-03-12 19:47:38 +00:00
|
|
|
# Comicgenesis has a lot of comics, but most of them are disallowed by robots.txt
|
2013-03-11 20:50:49 +00:00
|
|
|
# do not edit anything below since these entries are generated from scripts/update.sh
|
|
|
|
# DO NOT REMOVE
|
|
|
|
add('AAAAA', 'http://aaaaa.comicgenesis.com/')
|
|
|
|
add('AdventuresofKiltman', 'http://kiltman.comicgenesis.com/')
|
|
|
|
add('AmorModerno', 'http://amormoderno.comicgenesis.com/')
|
|
|
|
add('AnythingButRealLife', 'http://anythingbutreallife.comicgenesis.com/')
|
|
|
|
add('Ardra', 'http://ardra.comicgenesis.com/')
|
|
|
|
add('Artwork', 'http://artwork.comicgenesis.com/')
|
|
|
|
add('BabeintheWoods', 'http://babeinthewoods.comicgenesis.com/')
|
|
|
|
add('BackwaterPlanet', 'http://bobthespirit.comicgenesis.com/')
|
|
|
|
add('BendyStrawVampires', 'http://bsvampires.comicgenesis.com/')
|
|
|
|
add('BlindSight', 'http://blindsight.comicgenesis.com/')
|
|
|
|
add('BreakingtheDoldrum', 'http://breakingthedoldrum.comicgenesis.com/')
|
|
|
|
add('Candi', 'http://candicomics.com/')
|
2013-03-12 19:47:38 +00:00
|
|
|
add('CorporateLife', 'http://corporatelife.comicgenesis.com/')
|
2013-03-11 20:50:49 +00:00
|
|
|
add('DarkWelkin', 'http://darkwelkin.comicgenesis.com/')
|
|
|
|
add('DemonEater', 'http://demoneater.comicgenesis.com/')
|
|
|
|
add('DoodleDiaries', 'http://doodlediaries.comicgenesis.com/')
|
|
|
|
add('DormSweetDorm', 'http://dormsweetdorm.comicgenesis.com/')
|
|
|
|
add('DoubleyouTeeEff', 'http://doubleyouteeeff.comicgenesis.com/')
|
|
|
|
add('DragonsBane', 'http://jasonwhitewaterz.comicgenesis.com/')
|
|
|
|
add('Dreamaniac', 'http://dreamaniaccomic.comicgenesis.com/')
|
|
|
|
add('ElnifiChronicles', 'http://elnifichronicles.comicgenesis.com/')
|
|
|
|
add('EvesApple', 'http://evesapple.comicgenesis.com/')
|
|
|
|
add('FancyThat', 'http://fancythat.comicgenesis.com/')
|
|
|
|
add('FantasyQwest', 'http://creatorauthorman.comicgenesis.com/')
|
2013-03-12 19:47:38 +00:00
|
|
|
add('Fantazine', 'http://fantazin.comicgenesis.com/')
|
2013-03-11 20:50:49 +00:00
|
|
|
add('Flounderville', 'http://flounderville.comicgenesis.com/')
|
2013-03-12 19:47:38 +00:00
|
|
|
add('GEM', 'http://keltzy.comicgenesis.com/')
|
2013-03-11 20:50:49 +00:00
|
|
|
add('Gonefor300days', 'http://g4300d.comicgenesis.com/')
|
|
|
|
add('IBlameDanny', 'http://vileterror.comicgenesis.com/')
|
2013-03-12 19:47:38 +00:00
|
|
|
add('ImpendingDoom', 'http://impending.comicgenesis.com/')
|
2013-03-11 20:50:49 +00:00
|
|
|
add('InANutshell', 'http://nutshellcomics.comicgenesis.com/')
|
|
|
|
add('KernyMantisComics', 'http://kernymantis.comicgenesis.com/')
|
|
|
|
add('KitsuneJewel', 'http://kitsunejewel.comicgenesis.com/')
|
|
|
|
add('KittyCattyGames', 'http://kittycattygames.comicgenesis.com/')
|
|
|
|
add('KiwiDayN', 'http://kiwidayn.comicgenesis.com/')
|
|
|
|
add('KungFounded', 'http://kungfounded.comicgenesis.com/')
|
|
|
|
add('LabBratz', 'http://labbratz.comicgenesis.com/')
|
|
|
|
add('Laserwing', 'http://laserwing.comicgenesis.com/')
|
|
|
|
add('LumiasKingdom', 'http://lumia.comicgenesis.com/')
|
|
|
|
add('Majestic7', 'http://majestic7.comicgenesis.com/')
|
|
|
|
add('MaximumWhimsy', 'http://maximumwhimsy.comicgenesis.com/')
|
|
|
|
add('MenschunsererZeitGerman', 'http://muz.comicgenesis.com/')
|
|
|
|
add('MoonCrest24', 'http://mooncrest.comicgenesis.com/d/20121117.html')
|
|
|
|
add('Mushian', 'http://tentoumushi.comicgenesis.com/')
|
|
|
|
add('NightwolfCentral', 'http://nightwolfcentral.comicgenesis.com/')
|
|
|
|
add('NoTimeForLife', 'http://randyraven.comicgenesis.com/')
|
|
|
|
add('NoneMoreComic', 'http://nonemore.comicgenesis.com/')
|
|
|
|
add('ODCKS', 'http://odcks.comicgenesis.com/')
|
|
|
|
add('OfDoom', 'http://ofdoom.comicgenesis.com/')
|
|
|
|
add('OpportunityofaLifetime', 'http://carpathia.comicgenesis.com/')
|
|
|
|
add('Orbz', 'http://orbz.comicgenesis.com/')
|
|
|
|
add('OwMySanity', 'http://owmysanity.comicgenesis.com/')
|
|
|
|
add('PhantomThesis', 'http://phantomthesis.comicgenesis.com/')
|
|
|
|
add('ProfessorSaltinesAstrodynamicDirigible', 'http://drsaltine.comicgenesis.com/')
|
|
|
|
add('PsychicDyslexiaInstitute', 'http://pdi.comicgenesis.com/')
|
|
|
|
add('PublicidadeEnganosa', 'http://publicidadeenganosa.comicgenesis.com/')
|
|
|
|
add('RandomAxeOfKindness', 'http://randomaxe.comicgenesis.com/')
|
|
|
|
add('SalemUncommons', 'http://salemuncommons.comicgenesis.com/')
|
|
|
|
add('SamandElisAdventures', 'http://sameliadv.comicgenesis.com/')
|
|
|
|
add('SarahZero', 'http://plughead.comicgenesis.com/')
|
|
|
|
add('SixByNineCollege', 'http://sixbyninecollege.comicgenesis.com/')
|
|
|
|
add('SpoononHighandFireontheMountian', 'http://spoon.comicgenesis.com/')
|
|
|
|
add('SynapticMisfires', 'http://synapticmisfires.comicgenesis.com/')
|
|
|
|
add('TakingStock', 'http://mapaghimagsik.comicgenesis.com/')
|
|
|
|
add('TemplarArizona', 'http://templaraz.comicgenesis.com/')
|
|
|
|
add('TheAdventuresofKaniraBaxter', 'http://kanirabaxter.comicgenesis.com/')
|
|
|
|
add('TheAdventuresofVindibuddSuperheroInTraining', 'http://vindibudd.comicgenesis.com/d/20070720.html')
|
|
|
|
add('TheEasyBreather', 'http://easybreather.comicgenesis.com/')
|
2013-05-22 05:19:16 +00:00
|
|
|
add('TheLounge', 'http://thelounge.comicgenesis.com/')
|
2013-03-11 20:50:49 +00:00
|
|
|
add('TheMisadventuresofOkk', 'http://okk.comicgenesis.com/')
|
|
|
|
add('ThePath', 'http://thepath.comicgenesis.com/')
|
|
|
|
add('TheTalesofKalduras', 'http://kalduras.comicgenesis.com/')
|
|
|
|
add('Unconventional', 'http://unconventional.comicgenesis.com/')
|
|
|
|
add('WarMageNC17', 'http://warmage.comicgenesis.com/')
|
|
|
|
add('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'http://dannormnsanidey.comicgenesis.com/')
|
|
|
|
add('WhatYouDontSee', 'http://phantomlady4.comicgenesis.com/')
|
|
|
|
add('Wierdman', 'http://asa.comicgenesis.com/')
|