dosage/dosagelib/plugins/b.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2019 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function

from re import compile, escape

from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn


class BadassMuthas(_BasicScraper):
    url = 'http://badassmuthas.com/pages/comic.php'
    stripUrl = url + '?%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)') +
                         tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
    help = 'Index format: nnn'


class BadMachinery(_ParserScraper):
    url = 'http://scarygoround.com/'
    stripUrl = url + '?date=%s'
    firstStripUrl = stripUrl % '20090918'
    imageSearch = '//img[@class="comicimg"]'
    prevSearch = '//a[contains(text(), "Previous")]'
    broken_html_bugfix = True
    help = 'Index format: yyyymmdd'


class BalderDash(_ComicControlScraper):
    url = 'http://www.balderdashcomic.com/'


class Bardsworth(_WordPressScraper):
    url = 'http://www.bardsworth.com/'
    latestSearch = '//a[@rel="bookmark"]'
    starter = indirectStarter


class Baroquen(_BasicScraper):
    url = 'http://www.baroquencomics.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2008/11/05/raise-the-curtains'
    imageSearch = compile(tagre("img", "src", r'(%sComics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='prev'))
    help = 'Index format: yyyy/mm/dd/strip-name'


class Bearmageddon(_WordPressScraper):
    url = 'http://bearmageddon.com/bearmo/page-1/'
    firstStripUrl = url
    latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
    starter = indirectStarter


class Beetlebum(_BasicScraper):
    url = 'http://blog.beetlebum.de/'
    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
    starter = indirectStarter
    multipleImagesPerStrip = True
    imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
    prevSearch = compile(tagre('a', 'href',
                               r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
                               after='prev'))
    latestSearch = compile(tagre('a', 'href',
                                 r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl,
                                 after='bookmark'))
    help = 'Index format: yyyy/mm/dd/striptitle'
    lang = 'de'

    def namer(self, image_url, page_url):
        indexes = tuple(page_url.rstrip('/').split('/')[-4:])
        name = '%s-%s-%s-%s' % indexes
        name = name + '_' + image_url.split('/')[-1]
        return name


class Bethellium(_ParserScraper):
    stripUrl = 'http://dbcomics.darkblueworkshop.com/bethellium/%s/'
    firstStripUrl = stripUrl % 'chapter-1/webcomic-bethellium-chapter-1-cover'
    url = firstStripUrl
    imageSearch = '//div[@class="webcomic-image"]//img'
    prevSearch = '//a[contains(@class, "previous-webcomic-link")]'
    latestSearch = '//a[contains(@class, "last-webcomic-link")]'
    starter = indirectStarter

    def namer(self, imageUrl, pageUrl):
        # Prepend chapter title to page filenames
        chapter = pageUrl.rstrip('/').rsplit('/', 3)[-2]
        chapter = chapter.replace('chapter-1', 'chapter-1-the-magic-city')
        page = imageUrl.rsplit('/', 1)[-1]
        return chapter + '_' + page


class BetterDays(_ParserScraper):
    url = 'http://jaynaylor.com/betterdays/'
    stripUrl = url + 'archives/%s.html'
    firstStripUrl = stripUrl % '2003/04/post-2'
    imageSearch = '//img[contains(@src, "/betterdays/comic/")]'
    prevSearch = '//a[contains(text(), "Previous")]'
    endOfLife = True
    help = 'Index format: yyyy/mm/<your guess>'


class BetweenFailures(_BasicScraper):
    url = 'http://betweenfailures.com/'
    rurl = escape(url)
    stripUrl = url + 'comics1/%s'
    imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl,
                               after="previous"))
    help = 'Index format: stripname'


class BiggerThanCheeses(_BasicScraper):
    url = 'http://www.biggercheese.com/'
    stripUrl = url + 'index.php?comic=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(r'src="(comics/.+?)" alt')
    prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')
    help = 'Index format: n (unpadded)'


class BillyTheDunce(_ParserScraper):
    url = 'http://www.duncepress.com/'
    firstStripUrl = url + '2009/06/an-introduction-of-sorts'
    imageSearch = '//div[@class="entry"]/p[1]/a'
    prevSearch = '//a[@rel="prev"]'
    latestSearch = '//h2[@class="post-title"]/a'
    starter = indirectStarter


class BlankIt(_ParserScraper):
    url = 'http://blankitcomics.com/'
    firstStripUrl = url + 'comic/well-what-would-you-do'
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
    latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
    starter = indirectStarter


class BlondeSunrise(_ParserScraper):
    url = 'https://blondesunrise.com/comic.php?page=latest'
    firstStripUrl = 'https://blondesunrise.com/comic/comic.php?page=1'
    imageSearch = '//img[contains(@src, "comic_imgs/")]'
    prevSearch = '//a[img[contains(@src, "previous")]]'


class BloodBound(_WordPressScraper):
    url = 'http://bloodboundcomic.com/'
    firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'


class Bloodline(_WordPressScraper):
    url = 'http://w0lfmare.xepher.net/'
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % 'pg-1-2'
    imageSearch = '//div[@id="comic"]//img[not(contains(@src, "TWC-vote-image"))]'

    def namer(self, imageUrl, pageUrl):
        # Fix filenames of early comics
        return imageUrl.rsplit('/', 1)[-1].replace('gen-6', 'Bloodline')


class BloomingFaeries(_WordPressScraper):
    adult = True
    url = 'http://www.bloomingfaeries.com/'
    firstStripUrl = url + 'comic/public/pit-stop/'

    def namer(self, image_url, page_url):
        return "_".join(image_url.rsplit('/', 3)[1:])


class BMovieComic(_BasicScraper):
    url = 'http://www.bmoviecomic.com/'
    stripUrl = url + '?cid=%s'
    firstStripUrl = stripUrl % '8'
    imageSearch = compile(r'"(comics/.+?)"')
    prevSearch = compile(r'(\?cid=.+?)".+?Prev')
    help = 'Index format: n'


class BobWhite(_ParserScraper):
    url = 'http://www.bobwhitecomics.com/'
    imageSearch = '//span[%s]/img' % xpath_class('webcomic-object')
    prevSearch = '//a[@rel="previous"]'


class BookOfBiff(_BasicScraper):
    url = 'http://thebookofbiff.com/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2006/01/02/4'
    imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
    help = 'Index format: yyyy/mm/dd/stripnum-stripname'


class BoredAndEvil(_BasicScraper):
    url = 'http://www.boredandevil.com/'
    stripUrl = url + '?date=%s'
    firstStripUrl = stripUrl % '2004-06-07'
    imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
    prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
    latestSearch = prevSearch
    starter = indirectStarter
    help = 'Index format: yyyy-mm-dd'


class BratHalla(_WordPressScraper):
    url = 'http://brat-halla.com/'


class Brink(_BasicScraper):
    url = 'http://paperfangs.com/brink/'
    rurl = escape(url)
    stripUrl = url + '?p=%s'
    firstStripUrl = stripUrl % '5'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
    help = 'Index format: number'


class BroodHollow(_WordPressScraper):
    url = 'http://broodhollow.chainsawsuit.com/'
    firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'

    def shouldSkipUrl(self, url, data):
        return data.xpath('//div[@id="comic"]//iframe')


class Buni(_WordPressScraper):
    url = 'http://www.bunicomic.com/'


class BusinessCat(_WPNaviIn):
    url = 'http://www.businesscat.happyjar.com/'


class ButImACatPerson(_WordPressScraper):
    url = 'http://www.bicatperson.com/'
    stripUrl = 'comic/%s/'
    firstStripUrl = 'sketches-1'


class ButtercupFestival(_ParserScraper):
    url = 'http://www.buttercupfestival.com/'
    stripUrl = url + '%s.htm'
    firstStripUrl = stripUrl % '2-1'
    imageSearch = '//center/img'
    prevSearch = '//a[text()="previous"]'
    help = 'Index format: 2-number'


class ButternutSquash(_BasicScraper):
    url = 'http://www.butternutsquash.net/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2003/04/16/meet-da-punks'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
    help = 'Index format: yyyy/mm/dd/strip-name-author-name'


class ButterSafe(_BasicScraper):
    url = 'http://buttersafe.com/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl,
                               after="prev"))
    help = 'Index format: yyyy/mm/dd/stripname'
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`# -- coding: utf-8 --`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Fix a bunch of flake8 issues 2019-11-03 23:16:25 +00:00			`# Copyright (C) 2015-2019 Tobias Gruetzmacher`
Fix some comics. 2012-11-21 20:57:26 +00:00
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`from __future__ import absolute_import, division, print_function`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
BloomingFaeries: Don't download every page twice. (Also, simplify namer, switch to _ParserScraper) 2016-04-05 21:58:43 +00:00			`from ..util import tagre`
Add Blade Kitten as an example for the new parser. 2014-07-23 18:54:00 +00:00			`from ..scraper import _BasicScraper, _ParserScraper`
Move xpath_class to helpers module. 2017-02-13 21:41:17 +00:00			`from ..helpers import indirectStarter, xpath_class`
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Sort comics. 2013-03-06 19:21:10 +00:00			`class BadassMuthas(_BasicScraper):`
			`url = 'http://badassmuthas.com/pages/comic.php'`
			`stripUrl = url + '?%s'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '1'`
Sort comics. 2013-03-06 19:21:10 +00:00			`imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`prevSearch = compile(tagre("a", "href", r'([^"]+)') +`
			`tagre("img", "src", r'/images/comicsbuttonBack\.gif'))`
Sort comics. 2013-03-06 19:21:10 +00:00			`help = 'Index format: nnn'`


Fix some more comic modules. 2016-04-26 22:31:27 +00:00			`class BadMachinery(_ParserScraper):`
Added some comics. 2013-02-06 21:08:36 +00:00			`url = 'http://scarygoround.com/'`
			`stripUrl = url + '?date=%s'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '20090918'`
Fix some more comic modules. 2016-04-26 22:31:27 +00:00			`imageSearch = '//img[@class="comicimg"]'`
			`prevSearch = '//a[contains(text(), "Previous")]'`
Move libxml < 2.9.3 workaround to base class. 2016-05-02 21:22:06 +00:00			`broken_html_bugfix = True`
Added some comics. 2013-02-06 21:08:36 +00:00			`help = 'Index format: yyyymmdd'`


Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-03 22:12:53 +00:00			`class BalderDash(_ComicControlScraper):`
			`url = 'http://www.balderdashcomic.com/'`


Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`class Bardsworth(_WordPressScraper):`
			`url = 'http://www.bardsworth.com/'`
Read starter parameters from class. This allows to specify starters in a more declarative and dynamic way. 2016-04-12 21:11:39 +00:00			`latestSearch = '//a[@rel="bookmark"]'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = indirectStarter`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00

Sort comics. 2013-03-06 19:21:10 +00:00			`class Baroquen(_BasicScraper):`
			`url = 'http://www.baroquencomics.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Sort comics. 2013-03-06 19:21:10 +00:00			`stripUrl = url + '%s/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '2008/11/05/raise-the-curtains'`
			`imageSearch = compile(tagre("img", "src", r'(%sComics/[^"]+)' % rurl))`
			`prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='prev'))`
Sort comics. 2013-03-06 19:21:10 +00:00			`help = 'Index format: yyyy/mm/dd/strip-name'`


Move more comics to common WordPressScraper. 2016-04-10 21:04:34 +00:00			`class Bearmageddon(_WordPressScraper):`
Some minor fixes to make some modules work again. 2017-11-27 00:04:35 +00:00			`url = 'http://bearmageddon.com/bearmo/page-1/'`
			`firstStripUrl = url`
			`latestSearch = '//a[%s]' % xpath_class('comic-nav-last')`
Fix some modules (a&b). 2016-09-28 23:29:01 +00:00			`starter = indirectStarter`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00

Added comic Beetlebum (http://blog.beetlebum.de/). 2015-02-01 00:07:35 +00:00			`class Beetlebum(_BasicScraper):`
			`url = 'http://blog.beetlebum.de/'`
			`rurl = escape(url)`
			`stripUrl = url + '%s'`
			`firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = indirectStarter`
Added comic Beetlebum (http://blog.beetlebum.de/). 2015-02-01 00:07:35 +00:00			`multipleImagesPerStrip = True`
Fix coding style. 2015-03-22 16:13:53 +00:00			`imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`prevSearch = compile(tagre('a', 'href',`
			`r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,`
			`after='prev'))`
Read starter parameters from class. This allows to specify starters in a more declarative and dynamic way. 2016-04-12 21:11:39 +00:00			`latestSearch = compile(tagre('a', 'href',`
			`r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl,`
			`after='bookmark'))`
Added comic Beetlebum (http://blog.beetlebum.de/). 2015-02-01 00:07:35 +00:00			`help = 'Index format: yyyy/mm/dd/striptitle'`
			`lang = 'de'`
Fix coding style. 2015-03-22 16:13:53 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
			`indexes = tuple(page_url.rstrip('/').split('/')[-4:])`
Added comic Beetlebum (http://blog.beetlebum.de/). 2015-02-01 00:07:35 +00:00			`name = '%s-%s-%s-%s' % indexes`
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`name = name + '_' + image_url.split('/')[-1]`
Added comic Beetlebum (http://blog.beetlebum.de/). 2015-02-01 00:07:35 +00:00			`return name`


Add Bethellium 2019-06-29 22:06:46 +00:00			`class Bethellium(_ParserScraper):`
			`stripUrl = 'http://dbcomics.darkblueworkshop.com/bethellium/%s/'`
			`firstStripUrl = stripUrl % 'chapter-1/webcomic-bethellium-chapter-1-cover'`
			`url = firstStripUrl`
			`imageSearch = '//div[@class="webcomic-image"]//img'`
			`prevSearch = '//a[contains(@class, "previous-webcomic-link")]'`
			`latestSearch = '//a[contains(@class, "last-webcomic-link")]'`
			`starter = indirectStarter`

			`def namer(self, imageUrl, pageUrl):`
			`# Prepend chapter title to page filenames`
			`chapter = pageUrl.rstrip('/').rsplit('/', 3)[-2]`
			`chapter = chapter.replace('chapter-1', 'chapter-1-the-magic-city')`
			`page = imageUrl.rsplit('/', 1)[-1]`
			`return chapter + '_' + page`


Switch BetterDays to parser scraper 2019-06-13 02:22:15 +00:00			`class BetterDays(_ParserScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://jaynaylor.com/betterdays/'`
			`stripUrl = url + 'archives/%s.html'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '2003/04/post-2'`
Switch BetterDays to parser scraper 2019-06-13 02:22:15 +00:00			`imageSearch = '//img[contains(@src, "/betterdays/comic/")]'`
			`prevSearch = '//a[contains(text(), "Previous")]'`
			`endOfLife = True`
Updated documentation and fix some comics. 2012-11-20 17:53:53 +00:00			`help = 'Index format: yyyy/mm/<your guess>'`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Sort comics. 2013-03-06 19:21:10 +00:00			`class BetweenFailures(_BasicScraper):`
			`url = 'http://betweenfailures.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Fix some comics. 2013-04-28 17:58:38 +00:00			`stripUrl = url + 'comics1/%s'`
			`imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl,`
			`after="previous"))`
Fix some comics. 2013-04-28 17:58:38 +00:00			`help = 'Index format: stripname'`
Sort comics. 2013-03-06 19:21:10 +00:00

Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class BiggerThanCheeses(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.biggercheese.com/'`
			`stripUrl = url + 'index.php?comic=%s'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '1'`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`imageSearch = compile(r'src="(comics/.+?)" alt')`
			`prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')`
			`help = 'Index format: n (unpadded)'`


Fix some more comic modules. 2016-04-26 22:31:27 +00:00			`class BillyTheDunce(_ParserScraper):`
Sort comics. 2013-03-06 19:21:10 +00:00			`url = 'http://www.duncepress.com/'`
Fix some more comic modules. 2016-04-26 22:31:27 +00:00			`firstStripUrl = url + '2009/06/an-introduction-of-sorts'`
			`imageSearch = '//div[@class="entry"]/p[1]/a'`
			`prevSearch = '//a[@rel="prev"]'`
			`latestSearch = '//h2[@class="post-title"]/a'`
			`starter = indirectStarter`
Sort comics. 2013-03-06 19:21:10 +00:00

Fix some more modules. 2017-05-14 22:27:28 +00:00			`class BlankIt(_ParserScraper):`
Sort comics. 2013-03-06 19:21:10 +00:00			`url = 'http://blankitcomics.com/'`
Fix some more modules. 2017-05-14 22:27:28 +00:00			`firstStripUrl = url + 'comic/well-what-would-you-do'`
			`imageSearch = '//div[@id="comic"]//img'`
			`prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')`
			`latestSearch = '//a[%s]' % xpath_class('comic-nav-last')`
			`starter = indirectStarter`
Sort comics. 2013-03-06 19:21:10 +00:00

Add BlondeSunrise (fixes #142) 2019-12-02 23:14:57 +00:00			`class BlondeSunrise(_ParserScraper):`
			`url = 'https://blondesunrise.com/comic.php?page=latest'`
			`firstStripUrl = 'https://blondesunrise.com/comic/comic.php?page=1'`
			`imageSearch = '//img[contains(@src, "comic_imgs/")]'`
			`prevSearch = '//a[img[contains(@src, "previous")]]'`


Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`class BloodBound(_WordPressScraper):`
			`url = 'http://bloodboundcomic.com/'`
			`firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'`


Add Bloodline 2019-06-23 08:35:10 +00:00			`class Bloodline(_WordPressScraper):`
			`url = 'http://w0lfmare.xepher.net/'`
			`stripUrl = url + 'comic/%s'`
			`firstStripUrl = stripUrl % 'pg-1-2'`
			`imageSearch = '//div[@id="comic"]//img[not(contains(@src, "TWC-vote-image"))]'`

			`def namer(self, imageUrl, pageUrl):`
			`# Fix filenames of early comics`
			`return imageUrl.rsplit('/', 1)[-1].replace('gen-6', 'Bloodline')`


Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`class BloomingFaeries(_WordPressScraper):`
added comic Blooming Faeries 2015-09-02 14:13:42 +00:00			`adult = True`
			`url = 'http://www.bloomingfaeries.com/'`
BloomingFaeries: Don't download every page twice. (Also, simplify namer, switch to _ParserScraper) 2016-04-05 21:58:43 +00:00			`firstStripUrl = url + 'comic/public/pit-stop/'`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
BloomingFaeries: Don't download every page twice. (Also, simplify namer, switch to _ParserScraper) 2016-04-05 21:58:43 +00:00			`return "_".join(image_url.rsplit('/', 3)[1:])`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00
			`class BMovieComic(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.bmoviecomic.com/'`
			`stripUrl = url + '?cid=%s'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`firstStripUrl = stripUrl % '8'`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`imageSearch = compile(r'"(comics/.+?)"')`
			`prevSearch = compile(r'(\?cid=.+?)".+?Prev')`
			`help = 'Index format: n'`


Restore BobWhite. 2016-06-05 16:31:30 +00:00			`class BobWhite(_ParserScraper):`
			`url = 'http://www.bobwhitecomics.com/'`
			`imageSearch = '//span[%s]/img' % xpath_class('webcomic-object')`
			`prevSearch = '//a[@rel="previous"]'`


Sort comics. 2013-03-06 19:21:10 +00:00			`class BookOfBiff(_BasicScraper):`
Fix broken comics 2013-11-12 17:33:14 +00:00			`url = 'http://thebookofbiff.com/'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % '2006/01/02/4'`
Sort comics. 2013-03-06 19:21:10 +00:00			`imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`help = 'Index format: yyyy/mm/dd/stripnum-stripname'`
Sort comics. 2013-03-06 19:21:10 +00:00

Sort entries. 2013-04-10 16:36:33 +00:00			`class BoredAndEvil(_BasicScraper):`
			`url = 'http://www.boredandevil.com/'`
			`stripUrl = url + '?date=%s'`
			`firstStripUrl = stripUrl % '2004-06-07'`
			`imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))`
			`prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')`
Read starter parameters from class. This allows to specify starters in a more declarative and dynamic way. 2016-04-12 21:11:39 +00:00			`latestSearch = prevSearch`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = indirectStarter`
Sort entries. 2013-04-10 16:36:33 +00:00			`help = 'Index format: yyyy-mm-dd'`


Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`class BratHalla(_WordPressScraper):`
			`url = 'http://brat-halla.com/'`


Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`class Brink(_BasicScraper):`
			`url = 'http://paperfangs.com/brink/'`
			`rurl = escape(url)`
			`stripUrl = url + '?p=%s'`
			`firstStripUrl = stripUrl % '5'`
			`imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))`
			`prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))`
			`help = 'Index format: number'`


Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`class BroodHollow(_WordPressScraper):`
			`url = 'http://broodhollow.chainsawsuit.com/'`
			`firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing'`

Fix some modules (a&b). 2016-09-28 23:29:01 +00:00			`def shouldSkipUrl(self, url, data):`
			`return data.xpath('//div[@id="comic"]//iframe')`

Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00
			`class Buni(_WordPressScraper):`
			`url = 'http://www.bunicomic.com/'`


Move all HijinksEnsue comics into alphabetic files. 2016-05-01 23:25:34 +00:00			`class BusinessCat(_WPNaviIn):`
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-01 22:14:31 +00:00			`url = 'http://www.businesscat.happyjar.com/'`


Add ButImACatPerson 2019-06-20 05:23:42 +00:00			`class ButImACatPerson(_WordPressScraper):`
			`url = 'http://www.bicatperson.com/'`
			`stripUrl = 'comic/%s/'`
			`firstStripUrl = 'sketches-1'`

Fix a bunch of flake8 issues 2019-11-03 23:16:25 +00:00
Enable some comics based on current policy. 2015-07-17 23:21:29 +00:00			`class ButtercupFestival(_ParserScraper):`
Added some comics. 2013-02-06 21:08:36 +00:00			`url = 'http://www.buttercupfestival.com/'`
Enable some comics based on current policy. 2015-07-17 23:21:29 +00:00			`stripUrl = url + '%s.htm'`
			`firstStripUrl = stripUrl % '2-1'`
			`imageSearch = '//center/img'`
			`prevSearch = '//a[text()="previous"]'`
			`help = 'Index format: 2-number'`
Added some comics. 2013-02-06 21:08:36 +00:00

Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class ButternutSquash(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.butternutsquash.net/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % '2003/04/16/meet-da-punks'`
			`imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))`
			`prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: yyyy/mm/dd/strip-name-author-name'`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00

			`class ButterSafe(_BasicScraper):`
			`url = 'http://buttersafe.com/'`
			`rurl = escape(url)`
			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'`
			`imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))`
			`prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl,`
			`after="prev"))`
			`help = 'Index format: yyyy/mm/dd/stripname'`