dosage/dosagelib/plugins/w.py

214 lines
8 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-02-05 18:51:46 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2012-11-26 06:13:32 +00:00
from re import compile, IGNORECASE
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from ..scraper import _BasicScraper
2012-11-26 06:13:32 +00:00
from ..util import tagre
2013-04-03 18:30:51 +00:00
from ..helpers import indirectStarter
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class WapsiSquare(_BasicScraper):
url = 'http://wapsisquare.com/'
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
2013-04-03 18:30:51 +00:00
help = 'Index format: stripname'
2012-06-20 19:58:13 +00:00
2013-02-06 21:08:36 +00:00
class WastedTalent(_BasicScraper):
url = 'http://www.wastedtalent.ca/'
stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev"))
help = 'Index format: stripname'
2013-03-06 19:21:10 +00:00
class WayfarersMoon(_BasicScraper):
url = 'http://www.wayfarersmoon.com/'
stripUrl = url + 'index.php?page=%s'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
2013-04-03 18:30:51 +00:00
class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.webdesignerdepot\.com/\d+/\d+/comics-of-the-week-\d+/)')))
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = (
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/\d+s?\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/Christmas\d+\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics\d+[a-z0-9]*/\d+a?\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
)
multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(http://www\.webdesignerdepot\.com/\d+/\d+/[^']+)", before='prev', quote="'"))
help = 'Index format: yyyy/mm/stripname'
description = "The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist whos one of the most published, prolific and versatile cartoonists in the world today."
def shouldSkipUrl(self, url):
"""Skip non-comic URLs."""
return 'comics-of-the-week' not in url
@classmethod
def namer(cls, imageUrl, pageUrl):
imagename = imageUrl.rsplit('/', 1)[1]
week = compile(r'week-(\d+)').search(pageUrl).group(1)
return "%s-%s" % (week, imagename)
2013-03-06 19:21:10 +00:00
class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
2012-06-20 19:58:13 +00:00
class WhiteNinja(_BasicScraper):
url = 'http://www.whiteninjacomics.com/comics.shtml'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhyTheLongFace(_BasicScraper):
url = 'http://www.absurdnotions.org/wtlf200709.html'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
2012-06-20 19:58:13 +00:00
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm'
class Wigu(_BasicScraper):
url = 'http://wigucomics.com/'
stripUrl = url + 'adventures/index.php?comic=%s'
2012-11-26 06:13:32 +00:00
imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
help = 'Index format: n'
2012-06-20 19:58:13 +00:00
2013-02-06 21:08:36 +00:00
class Wonderella(_BasicScraper):
url = 'http://nonadventures.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://nonadventures\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://nonadventures\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name'
class WorldOfMrToast(_BasicScraper):
baseurl = 'http://www.theimaginaryworld.com/'
url = baseurl + 'mrTcomicA.html'
stripUrl = baseurl + '%s.html'
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
# list the archive links since there is no prev/next navigation
prevurls = (
url,
baseurl + 'mrTcomicW02.html',
baseurl + 'mrTcomicW01.html',
baseurl + 'mrGcomic03.html',
baseurl + 'mrGcomic02.html',
baseurl + 'mrGcomic01.html',
baseurl + 'mrTcomicT05.html',
baseurl + 'mrTcomicT04.html',
baseurl + 'mrTcomicT03.html',
baseurl + 'mrTcomicT02.html',
baseurl + 'mrTcomicT01.html',
baseurl + 'mrTcomicIW3.html',
baseurl + 'mrTcomicIW2.html',
baseurl + 'mrTcomicIW1.html',
)
firstStripUrl = prevurls[-1]
multipleImagesPerStrip = True
help = 'Index format: none'
def getPrevUrl(self, url, data, baseUrl):
idx = self.prevurls.index(url)
try:
return self.prevurls[idx+1]
except IndexError:
return None
2013-03-21 17:33:16 +00:00
class WormWorldSaga(_BasicScraper):
url = 'http://www.wormworldsaga.com/'
stripUrl = url + 'chapters/%s/index.php'
firstStripUrl = stripUrl % 'chapter01/EN'
imageSearch = (
compile(tagre("img", "src", r'(images/CH\d+_\d+\.[^"]+)')),
compile(tagre("img", "src", r'(panels/CH\d+_[^"]+)')),
)
latestChapter = 4
multipleImagesPerStrip = True
2013-03-26 16:34:27 +00:00
@classmethod
2013-03-21 17:33:16 +00:00
def starter(cls):
return '%schapters/chapter%02d/%s/index.php' % (
cls.url, cls.latestChapter, cls.lang.upper())
def getPrevUrl(self, url, data, baseUrl):
"""Find previous URL."""
if 'chapter04' in url:
return url.replace('chapter04', 'chapter03')
if 'chapter03' in url:
return url.replace('chapter03', 'chapter02')
if 'chapter02' in url:
return url.replace('chapter02', 'chapter01')
return None
class WormWorldSagaGerman(WormWorldSaga):
lang = 'de'
class WormWorldSagaSpanish(WormWorldSaga):
lang = 'es'
class WormWorldSagaFrench(WormWorldSaga):
lang = 'fr'
2012-06-20 19:58:13 +00:00
class WotNow(_BasicScraper):
url = 'http://shadowburn.binmode.com/wotnow/'
stripUrl = url + 'comic.php?comic_id=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
help = 'Index format: n (unpadded)'
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _WorldOfWarcraftEh(_BasicScraper):
url = 'http://woweh.com/'
stripUrl = None
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
class Wulffmorgenthaler(_BasicScraper):
url = 'http://wumocomicstrip.com/'
stripUrl = url + '%s/'
2012-12-04 06:02:40 +00:00
imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))
2012-11-26 06:13:32 +00:00
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
help = 'Index format: yyyy/mm/dd'
2012-06-20 19:58:13 +00:00
class WhiteNoise(_BasicScraper):
url = 'http://www.wncomic.com/archive.php'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'
class Wondermark(_BasicScraper):
url = 'http://wondermark.com/'
stripUrl = url + '%s/'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
prevSearch = compile(r'<a href="(.+?)" rel="prev">')
help = 'Index format: nnn'