dosage/dosagelib/plugins/w.py

106 lines
3.9 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2012-11-21 20:57:26 +00:00
# Copyright (C) 2012 Bastian Kleineidam
2012-11-26 06:13:32 +00:00
from re import compile, IGNORECASE
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from ..scraper import _BasicScraper
2012-11-26 06:13:32 +00:00
from ..util import tagre
2012-06-20 19:58:13 +00:00
class WayfarersMoon(_BasicScraper):
latestUrl = 'http://www.wayfarersmoon.com/'
2012-11-26 06:13:32 +00:00
stripUrl = latestUrl + 'index.php?page=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
class WhiteNinja(_BasicScraper):
latestUrl = 'http://www.whiteninjacomics.com/comics.shtml'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhiteNoise(_BasicScraper):
latestUrl = 'http://www.wncomic.com/archive.php'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First .+?"(archive.+?)".+?top_back')
help = 'Index format: n'
class WhyTheLongFace(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
2012-06-20 19:58:13 +00:00
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm'
class Wigu(_BasicScraper):
2012-11-26 06:13:32 +00:00
latestUrl = 'http://wigucomics.com/'
stripUrl = latestUrl + 'adventures/index.php?comic=%s'
imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
help = 'Index format: n'
2012-06-20 19:58:13 +00:00
class WotNow(_BasicScraper):
latestUrl = 'http://shadowburn.binmode.com/wotnow/'
stripUrl = latestUrl + 'comic.php?comic_id=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
help = 'Index format: n (unpadded)'
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _WorldOfWarcraftEh(_BasicScraper):
2012-06-20 19:58:13 +00:00
latestUrl = 'http://woweh.com/'
stripUrl = None
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
class Wulffmorgenthaler(_BasicScraper):
2012-11-26 06:13:32 +00:00
latestUrl = 'http://wumocomicstrip.com/'
stripUrl = latestUrl + '%s/'
2012-12-04 06:02:40 +00:00
imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))
2012-11-26 06:13:32 +00:00
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
help = 'Index format: yyyy/mm/dd'
2012-06-20 19:58:13 +00:00
class WhiteNoise(_BasicScraper):
latestUrl = 'http://www.wncomic.com/archive.php'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'
class WapsiSquare(_BasicScraper):
latestUrl = 'http://wapsisquare.com/'
stripUrl = latestUrl + 'comic/%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: strip-name'
class WeCanSleepTomorrow(_BasicScraper):
latestUrl = 'http://wecansleeptomorrow.com/'
2012-11-26 06:13:32 +00:00
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
2012-06-20 19:58:13 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
class Wondermark(_BasicScraper):
latestUrl = 'http://wondermark.com/'
stripUrl = latestUrl + '%s/'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
prevSearch = compile(r'<a href="(.+?)" rel="prev">')
help = 'Index format: nnn'