Fix some modules.

This commit is contained in:
Tobias Gruetzmacher 2017-02-12 02:16:38 +01:00
parent 20ca5d7fc2
commit ebe98bc8ba
6 changed files with 36 additions and 66 deletions

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -219,20 +219,19 @@ class DreamKeepersPrelude(_ParserScraper):
help = 'Index format: n'
class DresdenCodak(_BasicScraper):
class DresdenCodak(_ParserScraper):
url = 'http://dresdencodak.com/'
rurl = escape(url)
stripUrl = None
startUrl = url + 'cat/comic/'
firstStripUrl = url + '2007/02/08/pom/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("img", "src", r"%sm_prev2?\.png" % rurl,
quote=""))
latestSearch = compile(tagre("div", "id", "preview") +
tagre("a", "href",
r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
imageSearch = '//section[%s]//img' % xpath_class('entry-content')
prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
starter = indirectStarter
# Blog and comic are mixed...
def shouldSkipUrl(self, url, data):
return not data.xpath(self.imageSearch)
class DrFun(_BasicScraper):
baseUrl = 'http://www.ibiblio.org/Dave/'
@ -287,10 +286,3 @@ class DumbingOfAge(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: yyyy/comic/book-num/seriesname/stripname'
class DungeonsAndDenizens(_WordPressScraper):
url = 'http://dungeond.com/'
firstStripUrl = url + '2005/08/23/08232005/'
endOfLife = True
prevSearch = '//a[%s]' % xpath_class('navi-prev')

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -27,15 +27,6 @@ class KevinAndKell(_BasicScraper):
return self.stripUrl % tuple(map(int, index.split('-')))
class Key(_BasicScraper):
baseUrl = 'http://key.shadilyn.com/'
url = baseUrl + 'latestpage.html'
stripUrl = baseUrl + 'pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
class KickInTheHead(_WordPressScraper):
url = 'http://www.kickinthehead.org/'
firstStripUrl = url + '2003/03/20/ipod-envy/'

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class MacHall(_BasicScraper):
@ -71,14 +71,12 @@ class Marilith(_BasicScraper):
help = 'Index format: yyyymmdd'
class MarriedToTheSea(_BasicScraper):
class MarriedToTheSea(_ParserScraper):
url = 'http://www.marriedtothesea.com/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '022806'
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
before="overflow"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
imageSearch = '//div[%s]//p/img' % xpath_class('jumbotron')
prevSearch = '//a[contains(text(), "Yesterday")]'
help = 'Index format: mmddyy'
def namer(self, image_url, page_url):
@ -201,6 +199,7 @@ class MysteriesOfTheArcana(_ParserScraper):
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="navprevious"]'
class MonsterUnderTheBed(_WordPressScraper):
adult = True
url = 'http://themonsterunderthebed.net/'
adult = True
url = 'http://themonsterunderthebed.net/'

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -113,12 +113,12 @@ class Optipess(_WordPressScraper):
textOptional = True
class OurHomePlanet(_BasicScraper):
url = 'http://gdk.gd-kun.net/'
stripUrl = url + '%s.html'
class OurHomePlanet(_ParserScraper):
url = 'http://www.ourhomeplanet.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '01'
imageSearch = compile(r'<img src="(pages/comic.+?)"')
prevSearch = compile(r'coords="50,18,95,65".+?href="(.+?\.html)".+?alt=')
imageSearch = '//a[@rel="next"]/img'
prevSearch = '//a[@rel="prev"]'
help = 'Index format: n (unpadded)'

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -109,7 +109,7 @@ class PHDComics(_ParserScraper):
url = baseUrl + 'comics.php'
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//img[@id="comic"]'
imageSearch = '//img[@id="comic2"]'
prevSearch = '//a[img[contains(@src, "prev_button")]]'
nextSearch = '//a[img[contains(@src, "next_button")]]'
help = 'Index format: n (unpadded)'
@ -138,15 +138,6 @@ class PiledHigherAndDeeper(PHDComics):
namer = queryNamer('comicid', use_page_url=True)
class Pimpette(_ParserScraper):
url = 'http://pimpette.ca/'
stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '20030905'
imageSearch = '//div[@id="strip"]/img'
prevSearch = '//a[text()="previous"]'
help = 'Index format: yyyymmdd'
class Pixel(_BasicScraper):
url = 'http://pixelcomic.net/'
rurl = escape(url)

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -65,7 +65,7 @@ class SandraOnTheRocks(_BasicScraper):
class ScandinaviaAndTheWorld(_ParserScraper):
url = 'http://satwcomic.com/'
url = 'https://satwcomic.com/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
starter = indirectStarter
@ -156,7 +156,7 @@ class SequentialArt(_BasicScraper):
class SexyLosers(_ParserScraper):
adult = True
url = 'http://www.sexylosers.com/'
url = 'https://www.sexylosers.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '003'
imageSearch = '//div[@class="entry-content"]//img'
@ -240,13 +240,10 @@ class Sithrah(_ParserScraper):
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
class SkinDeep(_BasicScraper):
class SkinDeep(_ParserScraper):
url = 'http://www.skindeepcomic.com/'
stripUrl = url + 'archive/%s/'
imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="previous-webcomic-link"))
help = 'Index format: custom'
imageSearch = '//a[%s]/img' % xpath_class('webcomic-link')
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
class SleeplessDomain(_ComicControlScraper):
@ -408,9 +405,9 @@ class StarCrossdDestiny(_ParserScraper):
class StationV3(_ParserScraper):
url = 'http://www.stationv3.com/'
stripUrl = url + 'd2/%s.html'
firstStripUrl = stripUrl % '20150628'
imageSearch = '//img[contains(@src,"/comics2/")]'
stripUrl = url + 'd3/%s.html'
firstStripUrl = stripUrl % '20170101'
imageSearch = '//img[contains(@src,"/comics3/")]'
prevSearch = '//a[img[contains(@src,"/previous2")]]'
help = 'Index format: yyyymmdd'