Fix some more comic modules (c-f).

This commit is contained in:
Tobias Gruetzmacher 2016-09-30 00:15:45 +02:00
parent 0637d20ec3
commit 98c98ddfab
5 changed files with 24 additions and 59 deletions

21
dosagelib/plugins/c.py Executable file → Normal file
View file

@ -156,15 +156,9 @@ class Champ2010(_BasicScraper):
help = 'Index format: yy-dd-mm'
class ChannelAte(_BasicScraper):
class ChannelAte(_WordPressScraper):
url = 'http://www.channelate.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src",
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href",
r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class ChasingTheSunset(_BasicScraper):
@ -274,17 +268,6 @@ class CourtingDisaster(_WordPressScraper):
firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/'
class CowboyJedi(_WordPressScraper):
url = 'http://www.cowboyjedi.com/'
nextSearch = '//a[%s]' % xpath_class('comic-nav-next')
starter = bounceStarter
def shouldSkipUrl(self, url, data):
return url in (
self.url + 'comic/darth-bart-wont-stop/',
)
class CraftedFables(_WordPressScraper):
url = 'http://www.caf-fiends.net/comicpress/'
prevSearch = '//a[@rel="prev"]'

View file

@ -13,19 +13,12 @@ from ..util import tagre
from .common import _WordPressScraper, xpath_class
class DailyDose(_ParserScraper):
url = 'http://dailydoseofcomics.com/'
starter = indirectStarter
imageSearch = '//p/a/img'
prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[@rel="bookmark"]'
class DamnLol(_ParserScraper):
url = 'http://www.damnlol.com/'
prevSearch = '//a[@id="prev"]'
nextSearch = '//a[@id="next"]'
imageSearch = '//div[@id="hideFooter"]/img'
# Classes for next and previous seem to be swapped...
prevSearch = '//a[%s]' % xpath_class("next")
nextSearch = '//a[%s]' % xpath_class("previous")
imageSearch = '//img[@id="post-image"]'
starter = bounceStarter
def namer(self, image_url, page_url):
@ -122,7 +115,7 @@ class DieFruehreifen(_BasicScraper):
class DieselSweeties(_ParserScraper):
url = 'http://dieselsweeties.com/'
stripUrl = url + 'ics/%s/'
stripUrl = url + 'ics/%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//img[@class="xomic"]'
prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
@ -174,12 +167,6 @@ class DMFA(_BasicScraper):
help = 'Index format: nnn (normally, some specials)'
class DoctorCat(_WordPressScraper):
url = 'http://doctorcatmd.com/'
firstStripUrl = url + 'comic/doctor-cat'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
class DoemainOfOurOwn(_BasicScraper):
url = 'http://www.doemain.com/'
stripUrl = url + 'index.cgi/%s'
@ -229,11 +216,11 @@ class Dracula(_BasicScraper):
help = 'Index format: nnn'
class DreamKeepersPrelude(_BasicScraper):
class DreamKeepersPrelude(_ParserScraper):
url = 'http://www.dreamkeeperscomic.com/Prelude.php'
stripUrl = url + '?pg=%s'
imageSearch = compile(r'(images/PreludeNew/.+?)"')
prevSearch = compile(r'(Prelude.php\?pg=.+?)"')
imageSearch = '//div[@class="Preludecomic"]/table//a/img'
prevSearch = '//a[@id="prev"]'
help = 'Index format: n'

View file

@ -163,12 +163,7 @@ class EverybodyLovesEricRaymond(_BasicScraper):
help = 'Index format: name-of-old-comic'
class EverydayBlues(_WordPressScraper):
url = 'http://everydayblues.everydayblues.net/'
firstStripUrl = url + '2010/02/11/sometimes/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
# Seems to be GeoBlocked from Europe?
class EvilDiva(_BasicScraper):
url = 'http://www.evildivacomics.com/'
stripUrl = url + '?p=%s'

View file

@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from .common import _WPNaviIn, _WordPressScraper
from .common import _WPNaviIn, _WordPressScraper, xpath_class
class FalconTwin(_BasicScraper):
@ -37,12 +37,12 @@ class FantasyRealms(_BasicScraper):
starter = indirectStarter
class FauxPas(_BasicScraper):
class FauxPas(_ParserScraper):
url = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi'
stripUrl = url + '?%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'<img .*src="(.*fp/fp.*(png|jpg|gif))"')
prevSearch = compile(r'<a href="(pl-fp1\.cgi\?\d+)">Previous Strip')
imageSearch = '//img[@name]'
prevSearch = '//a[img[@alt="Previous"]]'
help = 'Index format: nnn'
@ -126,19 +126,14 @@ class Fragile(_ParserScraper):
firstStripUrl = url + 'strips/chapter_01'
class FredoAndPidjin(_BasicScraper):
class FredoAndPidjin(_ParserScraper):
url = 'http://www.pidjin.net/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
help = 'Index format: yyyy/mm/dd/number-index'
imageSearch = (
compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d+/\d+/\d+[^"]+\.[a-z]+)')),
compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/old/[^"]+\.[a-z]+)')),
)
imageSearch = '//div[%s]//img' % xpath_class("episode")
multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
latestSearch = compile(tagre('a', 'href', "(" + url +
r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
prevSearch = '//span[%s]/a' % xpath_class("prev")
latestSearch = '//section[%s]//a' % xpath_class("latest")
starter = indirectStarter

View file

@ -15,6 +15,7 @@ class Removed(Scraper):
'del': 'Comic was removed from the web.',
'block': 'The comic site is blocking us.',
'unk': 'Comic was removed for an unknown reason.',
'brk': 'Comic navigation is broken.',
}
def __init__(self, name, reason='del'):
@ -173,14 +174,18 @@ class Removed(Scraper):
cls('ComicGenesis/CryHavoc'),
cls('ComicGenesis/SueosdelSur'),
cls('Commissioned'),
cls('CowboyJedi', 'brk'),
cls('Creators/BCinSpanish'),
cls('Creators/GirlsandSportsinSpanish'),
cls('Creators/RugratsinSpanish'),
cls('CtrlAltDel', 'block'),
cls('CtrlAltDel/Sillies', 'block'),
cls('DailyDose'),
cls('DeathToTheExtremist'),
cls('DoctorCat', 'brk'),
cls('Ellerbisms'),
cls('Eriadan'),
cls('EverydayBlues'),
cls('FeyWinds'),
cls('FilibusterCartoons'),
cls('FowlLanguage', 'block'),