dosage/dosagelib/plugins/w.py

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam

from re import compile, escape, IGNORECASE

from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import indirectStarter


class WapsiSquare(_BasicScraper):
    url = 'http://wapsisquare.com/'
    rurl = escape(url)
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % '09092001'
    imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
    prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
    help = 'Index format: stripname'


class WastedTalent(_BasicScraper):
    url = 'http://www.wastedtalent.ca/'
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % 'anime-crack'
    imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev"))
    help = 'Index format: stripname'


class WayfarersMoon(_BasicScraper):
    url = 'http://www.wayfarersmoon.com/'
    stripUrl = url + 'index.php?page=%s'
    firstStripUrl = stripUrl % '0'
    imageSearch = compile(r'<img src="(/admin.+?)"')
    prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
    help = 'Index format: nn'


class WebDesignerCOTW(_BasicScraper):
    url = 'http://www.webdesignerdepot.com/'
    rurl = escape(url)
    starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
    imageSearch = (
        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/\d+s?\.[^"]+)')),
        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/Christmas\d+\.[^"]+)')),
        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics\d+[a-z0-9]*/\d+a?\.[^"]+)')),
        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
    )
    multipleImagesPerStrip = True
    prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'"))
    help = 'Index format: yyyy/mm/stripname'
    description = u"The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist who’s one of the most published, prolific and versatile cartoonists in the world today."

    def shouldSkipUrl(self, url):
        """Skip non-comic URLs."""
        return 'comics-of-the-week' not in url

    @classmethod
    def namer(cls, imageUrl, pageUrl):
        imagename = imageUrl.rsplit('/', 1)[1]
        week = compile(r'week-(\d+)').search(pageUrl).group(1)
        return "%s-%s" % (week, imagename)


class WeCanSleepTomorrow(_BasicScraper):
    description = u'We Can Sleep Tomorrow - Life does not take bathroom breaks. A webcomic that updates Mondays and Fridays'
    url = 'http://wecansleeptomorrow.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
    help = 'Index format: yyyy/mm/dd/stripname'


class Weregeek(_BasicScraper):
    description = u'Weregeek'
    url = 'http://www.weregeek.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2006/11/27/'
    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'((%s)?(/)?\d+/\d+/\d+/)'% rurl)+'\s*'+ tagre('img', 'src', '[^"]*previous_day.gif'))
    help = 'Index format: yyyy/mm/dd'


class WhiteNinja(_BasicScraper):
    baseUrl = 'http://www.whiteninjacomics.com/'
    url = baseUrl + 'comics.shtml'
    stripUrl = baseUrl + 'comics/%s.shtml'
    imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
    prevSearch = compile(r'(/comics/.+?shtml).+?previous')
    help = 'Index format: s (comic name)'


class WhiteNoise(_BasicScraper):
    baseUrl = 'http://www.wncomic.com/'
    url = baseUrl + 'archive.php'
    stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(r'(istrip_files/strips/.+?)"')
    prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
    help = 'Index format: n'


class WhyTheLongFace(_BasicScraper):
    baseUrl = 'http://www.absurdnotions.org/'
    rurl = escape(baseUrl)
    url = baseUrl + 'wtlf200709.html'
    stripUrl = baseUrl + 'wtlf%s.html'
    firstStripUrl = stripUrl % '200306'
    imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE)
    multipleImagesPerStrip = True
    prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
    help = 'Index format: yyyymm'


class Wigu(_BasicScraper):
    description = u'WIGU: A COMIC ON INTERNET'
    url = 'http://wigucomics.com/'
    stripUrl = url + 'oc/index.php?comic=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', after="go back"))
    help = 'Index format: n'


class Wonderella(_BasicScraper):
    url = 'http://nonadventures.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
    imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') + r"\s*" +
        tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
    help = 'Index format: yyyy/mm/dd/name'


class Wondermark(_BasicScraper):
    url = 'http://wondermark.com/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '001'
    imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
    prevSearch = compile(r'<a href="(.+?)" rel="prev">')
    help = 'Index format: nnn'


class WorldOfMrToast(_BasicScraper):
    baseUrl = 'http://www.theimaginaryworld.com/'
    url = baseUrl + 'mrTcomicA.html'
    stripUrl = baseUrl + '%s.html'
    imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
    # list the archive links since there is no prev/next navigation
    prevurls = (
        url,
        baseUrl + 'mrTcomicW02.html',
        baseUrl + 'mrTcomicW01.html',
        baseUrl + 'mrGcomic03.html',
        baseUrl + 'mrGcomic02.html',
        baseUrl + 'mrGcomic01.html',
        baseUrl + 'mrTcomicT05.html',
        baseUrl + 'mrTcomicT04.html',
        baseUrl + 'mrTcomicT03.html',
        baseUrl + 'mrTcomicT02.html',
        baseUrl + 'mrTcomicT01.html',
        baseUrl + 'mrTcomicIW3.html',
        baseUrl + 'mrTcomicIW2.html',
        baseUrl + 'mrTcomicIW1.html',
    )
    firstStripUrl = prevurls[-1]
    multipleImagesPerStrip = True
    help = 'Index format: none'

    def getPrevUrl(self, url, data, baseUrl):
        idx = self.prevurls.index(url)
        try:
            return self.prevurls[idx+1]
        except IndexError:
            return None


class WormWorldSaga(_BasicScraper):
    description = u'The Wormworld Saga Online Graphic Novel by Daniel Lieske - An Epic Fantasy Adventure for all Ages'
    url = 'http://www.wormworldsaga.com/'
    stripUrl = url + 'chapters/%s/index.php'
    firstStripUrl = stripUrl % 'chapter01/EN'
    imageSearch = (
        compile(tagre("img", "src", r'(images/CH\d+_\d+\.[^"]+)')),
        compile(tagre("img", "src", r'(panels/CH\d+_[^"]+)')),
    )
    latestChapter = 5
    multipleImagesPerStrip = True

    @classmethod
    def starter(cls):
        return '%schapters/chapter%02d/%s/index.php' % (
            cls.url, cls.latestChapter, cls.lang.upper())

    def getPrevUrl(self, url, data, baseUrl):
        """Find previous URL."""
        if 'chapter04' in url:
            return url.replace('chapter04', 'chapter03')
        if 'chapter03' in url:
            return url.replace('chapter03', 'chapter02')
        if 'chapter02' in url:
            return url.replace('chapter02', 'chapter01')
        return None


class WormWorldSagaGerman(WormWorldSaga):
    lang = 'de'

class WormWorldSagaSpanish(WormWorldSaga):
    lang = 'es'

class WormWorldSagaFrench(WormWorldSaga):
    lang = 'fr'


class WotNow(_BasicScraper):
    url = 'http://shadowburn.binmode.com/wotnow/'
    stripUrl = url + 'comic.php?comic_id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
    prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
    help = 'Index format: n (unpadded)'


# XXX disallowed by robots.txt
class _WorldOfWarcraftEh(_BasicScraper):
    url = 'http://woweh.com/'
    stripUrl = None
    imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
    prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')


class Wulffmorgenthaler(_BasicScraper):
    description = u"Entertainment - Since 2002. Wulff & Morgenthaler's Personal humoristic social commentary on life, nostalgia and the World in general. Nothing is taboo: They deal with Politics, News, Entertainment, Technology, Culture, and Weirdo Beavers"
    url = 'http://kindofnormal.com/wumo/'
    rurl = escape(url)
    stripUrl = url + '%s'
    imageSearch = compile(tagre("div", "class", r'box-content') + r"\s*" +
        tagre("a", "href", r'[^"]+') + r"\s*" +
        tagre("img", "src", r'(http://kindofnormal\.com/img/wumo/\d+/\d+/[^/"]+)'))
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("i", "class", r'icon-chevron-left'))
    help = 'Index format: yyyy/mm/dd'
-												Updated copyright for all source files.

											
										
										
											2012-06-20 20:41:04 +00:00
+								# -*- coding: iso-8859-1 -*-
 								# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
-												Rename latestUrl in url

											
										
										
											2013-02-05 18:51:46 +00:00
+								# Copyright (C) 2012-2013 Bastian Kleineidam
-												Fix some comics.

											
										
										
											2012-11-21 20:57:26 +00:00
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								from re import compile, escape, IGNORECASE
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
-												A lot of refactoring.

											
										
										
											2012-10-11 10:03:12 +00:00
+								from ..scraper import _BasicScraper
-												Fix some comics.

											
										
										
											2012-11-26 06:13:32 +00:00
+								from ..util import tagre
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
+								from ..helpers import indirectStarter
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								class WapsiSquare(_BasicScraper):
 								    url = 'http://wapsisquare.com/'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    rurl = escape(url)
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    stripUrl = url + 'comic/%s/'
 								    firstStripUrl = stripUrl % '09092001'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								    prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
+								    help = 'Index format: stripname'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
-												Added some comics.

											
										
										
											2013-02-06 21:08:36 +00:00
+								class WastedTalent(_BasicScraper):
 								    url = 'http://www.wastedtalent.ca/'
 								    stripUrl = url + 'comic/%s'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % 'anime-crack'
-												Added some comics.

											
										
										
											2013-02-06 21:08:36 +00:00
+								    imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
 								    prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev"))
 								    help = 'Index format: stripname'
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								class WayfarersMoon(_BasicScraper):
 								    url = 'http://www.wayfarersmoon.com/'
 								    stripUrl = url + 'index.php?page=%s'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '0'
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								    imageSearch = compile(r'<img src="(/admin.+?)"')
 								    prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
 								    help = 'Index format: nn'
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
+								class WebDesignerCOTW(_BasicScraper):
 								    url = 'http://www.webdesignerdepot.com/'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    rurl = escape(url)
 								    starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
+								    stripUrl = url + '%s/'
 								    firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
 								    imageSearch = (
 								        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/\d+s?\.[^"]+)')),
 								        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/Christmas\d+\.[^"]+)')),
 								        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics\d+[a-z0-9]*/\d+a?\.[^"]+)')),
 								        compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
 								    )
 								    multipleImagesPerStrip = True
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'"))
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
+								    help = 'Index format: yyyy/mm/stripname'
-												Description must be unicode.

											
										
										
											2013-04-29 05:27:59 +00:00
+								    description = u"The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist who’s one of the most published, prolific and versatile cartoonists in the world today."
-												Add WebDesignerCOTW

											
										
										
											2013-04-03 18:30:51 +00:00
 								    def shouldSkipUrl(self, url):
 								        """Skip non-comic URLs."""
 								        return 'comics-of-the-week' not in url
 								    @classmethod
 								    def namer(cls, imageUrl, pageUrl):
 								        imagename = imageUrl.rsplit('/', 1)[1]
 								        week = compile(r'week-(\d+)').search(pageUrl).group(1)
 								        return "%s-%s" % (week, imagename)
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								class WeCanSleepTomorrow(_BasicScraper):
-												Add some descriptions.

											
										
										
											2013-04-14 07:02:14 +00:00
+								    description = u'We Can Sleep Tomorrow - Life does not take bathroom breaks. A webcomic that updates Mondays and Fridays'
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								    url = 'http://wecansleeptomorrow.com/'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    rurl = escape(url)
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								    stripUrl = url + '%s/'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
 								    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
-												Sort comics.

											
										
										
											2013-03-06 19:21:10 +00:00
+								    help = 'Index format: yyyy/mm/dd/stripname'
-												added comic Gaia

											
										
										
											2013-12-10 04:08:15 +00:00
+								class Weregeek(_BasicScraper):
 								    description = u'Weregeek'
 								    url = 'http://www.weregeek.com/'
 								    rurl = escape(url)
 								    stripUrl = url + '%s/'
 								    firstStripUrl = stripUrl % '2006/11/27/'
 								    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
 								    prevSearch = compile(tagre("a", "href", r'((%s)?(/)?\d+/\d+/\d+/)'% rurl)+'\s*'+ tagre('img', 'src', '[^"]*previous_day.gif'))
 								    help = 'Index format: yyyy/mm/dd'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								class WhiteNinja(_BasicScraper):
-												s/baseurl/baseUrl/g

											
										
										
											2013-04-13 18:58:00 +00:00
+								    baseUrl = 'http://www.whiteninjacomics.com/'
 								    url = baseUrl + 'comics.shtml'
 								    stripUrl = baseUrl + 'comics/%s.shtml'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								    imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
 								    prevSearch = compile(r'(/comics/.+?shtml).+?previous')
 								    help = 'Index format: s (comic name)'
-												Sort entries.

											
										
										
											2013-04-10 16:36:33 +00:00
+								class WhiteNoise(_BasicScraper):
-												s/baseurl/baseUrl/g

											
										
										
											2013-04-13 18:58:00 +00:00
+								    baseUrl = 'http://www.wncomic.com/'
 								    url = baseUrl + 'archive.php'
 								    stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '1'
-												Sort entries.

											
										
										
											2013-04-10 16:36:33 +00:00
+								    imageSearch = compile(r'(istrip_files/strips/.+?)"')
 								    prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
 								    help = 'Index format: n'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								class WhyTheLongFace(_BasicScraper):
-												s/baseurl/baseUrl/g

											
										
										
											2013-04-13 18:58:00 +00:00
+								    baseUrl = 'http://www.absurdnotions.org/'
 								    rurl = escape(baseUrl)
 								    url = baseUrl + 'wtlf200709.html'
 								    stripUrl = baseUrl + 'wtlf%s.html'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '200306'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE)
-												Fix comics.

											
										
										
											2012-12-04 06:02:40 +00:00
+								    multipleImagesPerStrip = True
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								    prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
 								    help = 'Index format: yyyymm'
 								class Wigu(_BasicScraper):
-												Add some descriptions.

											
										
										
											2013-04-14 07:02:14 +00:00
+								    description = u'WIGU: A COMIC ON INTERNET'
-												Always have an url attribute in comic scrapers.

											
										
										
											2013-02-04 20:00:26 +00:00
+								    url = 'http://wigucomics.com/'
-												Fix broken comics

											
										
										
											2013-11-12 17:33:14 +00:00
+								    stripUrl = url + 'oc/index.php?comic=%s'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '1'
-												Fix broken comics

											
										
										
											2013-11-12 17:33:14 +00:00
+								    imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
 								    prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', after="go back"))
-												Fix some comics.

											
										
										
											2012-11-26 06:13:32 +00:00
+								    help = 'Index format: n'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
-												Added some comics.

											
										
										
											2013-02-06 21:08:36 +00:00
+								class Wonderella(_BasicScraper):
 								    url = 'http://nonadventures.com/'
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    rurl = escape(url)
-												Added some comics.

											
										
										
											2013-02-06 21:08:36 +00:00
+								    stripUrl = url + '%s/'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
-												Fix broken comics

											
										
										
											2013-11-12 17:33:14 +00:00
+								    imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') + r"\s*" +
 								        tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-												Use re.escape and add some firstStripUrl.

											
										
										
											2013-04-10 16:19:11 +00:00
+								    prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
-												Added some comics.

											
										
										
											2013-02-06 21:08:36 +00:00
+								    help = 'Index format: yyyy/mm/dd/name'
-												Sort entries.

											
										
										
											2013-04-10 16:36:33 +00:00
+								class Wondermark(_BasicScraper):
 								    url = 'http://wondermark.com/'
 								    stripUrl = url + '%s/'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '001'
-												Sort entries.

											
										
										
											2013-04-10 16:36:33 +00:00
+								    imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
 								    prevSearch = compile(r'<a href="(.+?)" rel="prev">')
 								    help = 'Index format: nnn'
-												Added some comic strips and cleanup the scraper code.

											
										
										
											2013-03-06 19:00:30 +00:00
+								class WorldOfMrToast(_BasicScraper):
-												s/baseurl/baseUrl/g

											
										
										
											2013-04-13 18:58:00 +00:00
+								    baseUrl = 'http://www.theimaginaryworld.com/'
 								    url = baseUrl + 'mrTcomicA.html'
 								    stripUrl = baseUrl + '%s.html'
-												Added some comic strips and cleanup the scraper code.

											
										
										
											2013-03-06 19:00:30 +00:00
+								    imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
 								    # list the archive links since there is no prev/next navigation
 								    prevurls = (
 								        url,
-												s/baseurl/baseUrl/g

											
										
										
											2013-04-13 18:58:00 +00:00
+								        baseUrl + 'mrTcomicW02.html',
 								        baseUrl + 'mrTcomicW01.html',
 								        baseUrl + 'mrGcomic03.html',
 								        baseUrl + 'mrGcomic02.html',
 								        baseUrl + 'mrGcomic01.html',
 								        baseUrl + 'mrTcomicT05.html',
 								        baseUrl + 'mrTcomicT04.html',
 								        baseUrl + 'mrTcomicT03.html',
 								        baseUrl + 'mrTcomicT02.html',
 								        baseUrl + 'mrTcomicT01.html',
 								        baseUrl + 'mrTcomicIW3.html',
 								        baseUrl + 'mrTcomicIW2.html',
 								        baseUrl + 'mrTcomicIW1.html',
-												Added some comic strips and cleanup the scraper code.

											
										
										
											2013-03-06 19:00:30 +00:00
+								    )
 								    firstStripUrl = prevurls[-1]
 								    multipleImagesPerStrip = True
 								    help = 'Index format: none'
 								    def getPrevUrl(self, url, data, baseUrl):
 								        idx = self.prevurls.index(url)
 								        try:
 								            return self.prevurls[idx+1]
 								        except IndexError:
 								            return None
-												Added some comics, fixed some.

											
										
										
											2013-03-21 17:33:16 +00:00
+								class WormWorldSaga(_BasicScraper):
-												Add some descriptions.

											
										
										
											2013-04-14 07:02:14 +00:00
+								    description = u'The Wormworld Saga Online Graphic Novel by Daniel Lieske - An Epic Fantasy Adventure for all Ages'
-												Added some comics, fixed some.

											
										
										
											2013-03-21 17:33:16 +00:00
+								    url = 'http://www.wormworldsaga.com/'
 								    stripUrl = url + 'chapters/%s/index.php'
 								    firstStripUrl = stripUrl % 'chapter01/EN'
 								    imageSearch = (
 								        compile(tagre("img", "src", r'(images/CH\d+_\d+\.[^"]+)')),
 								        compile(tagre("img", "src", r'(panels/CH\d+_[^"]+)')),
 								    )
-												Updated wormworld chapter.

											
										
										
											2013-04-30 04:42:49 +00:00
+								    latestChapter = 5
-												Added some comics, fixed some.

											
										
										
											2013-03-21 17:33:16 +00:00
+								    multipleImagesPerStrip = True
-												Fix WormWorldSaga*

											
										
										
											2013-03-26 16:34:27 +00:00
+								    @classmethod
-												Added some comics, fixed some.

											
										
										
											2013-03-21 17:33:16 +00:00
+								    def starter(cls):
 								        return '%schapters/chapter%02d/%s/index.php' % (
 								            cls.url, cls.latestChapter, cls.lang.upper())
 								    def getPrevUrl(self, url, data, baseUrl):
 								        """Find previous URL."""
 								        if 'chapter04' in url:
 								            return url.replace('chapter04', 'chapter03')
 								        if 'chapter03' in url:
 								            return url.replace('chapter03', 'chapter02')
 								        if 'chapter02' in url:
 								            return url.replace('chapter02', 'chapter01')
 								        return None
 								class WormWorldSagaGerman(WormWorldSaga):
 								    lang = 'de'
 								class WormWorldSagaSpanish(WormWorldSaga):
 								    lang = 'es'
 								class WormWorldSagaFrench(WormWorldSaga):
 								    lang = 'fr'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								class WotNow(_BasicScraper):
-												Always have an url attribute in comic scrapers.

											
										
										
											2013-02-04 20:00:26 +00:00
+								    url = 'http://shadowburn.binmode.com/wotnow/'
 								    stripUrl = url + 'comic.php?comic_id=%s'
-												Add firstStripUrls.

											
										
										
											2013-04-10 21:57:09 +00:00
+								    firstStripUrl = stripUrl % '1'
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								    imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
 								    prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
 								    help = 'Index format: n (unpadded)'
-												Various comics are fixed.

											
										
										
											2012-12-13 20:05:27 +00:00
+								# XXX disallowed by robots.txt
 								class _WorldOfWarcraftEh(_BasicScraper):
-												Always have an url attribute in comic scrapers.

											
										
										
											2013-02-04 20:00:26 +00:00
+								    url = 'http://woweh.com/'
-												Updated documentation and fix some comics.

											
										
										
											2012-11-20 17:53:53 +00:00
+								    stripUrl = None
-												Initial commit to Github.

											
										
										
											2012-06-20 19:58:13 +00:00
+								    imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
 								    prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
 								class Wulffmorgenthaler(_BasicScraper):
-												Add some descriptions.

											
										
										
											2013-04-14 07:02:14 +00:00
+								    description = u"Entertainment - Since 2002. Wulff & Morgenthaler's Personal humoristic social commentary on life, nostalgia and the World in general. Nothing is taboo: They deal with Politics, News, Entertainment, Technology, Culture, and Weirdo Beavers"
-												Fix broken comics

											
										
										
											2013-11-12 17:33:14 +00:00
+								    url = 'http://kindofnormal.com/wumo/'
 								    rurl = escape(url)
-												Comic fixes.

											
										
										
											2013-05-25 21:24:33 +00:00
+								    stripUrl = url + '%s'
-												Fix broken comics

											
										
										
											2013-11-12 17:33:14 +00:00
+								    imageSearch = compile(tagre("div", "class", r'box-content') + r"\s*" +
 								        tagre("a", "href", r'[^"]+') + r"\s*" +
 								        tagre("img", "src", r'(http://kindofnormal\.com/img/wumo/\d+/\d+/[^/"]+)'))
 								    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("i", "class", r'icon-chevron-left'))
-												Fix some comics.

											
										
										
											2012-11-26 06:13:32 +00:00
+								    help = 'Index format: yyyy/mm/dd'