Fix a bunch of comic modules.

This commit is contained in:
Tobias Gruetzmacher 2016-10-31 06:57:47 +01:00
parent 446b81fc45
commit 47e2502ec7
10 changed files with 52 additions and 105 deletions

View file

@ -167,11 +167,12 @@ class DMFA(_BasicScraper):
help = 'Index format: nnn (normally, some specials)'
class DoemainOfOurOwn(_BasicScraper):
class DoemainOfOurOwn(_ParserScraper):
url = 'http://www.doemain.com/'
stripUrl = url + 'index.cgi/%s'
imageSearch = compile(r"<img border='0' width='\d+' height='\d+' src='(/strips/\d{4}/\d{6}-[^\']+)'")
prevSearch = compile(r'<a href="(/index\.cgi/\d{4}-\d{2}-\d{2})"><img width="\d+" height="\d+" border="\d+" alt="Previous Strip"')
imageSearch = '//td/img[contains(@src, "/strips/")]'
prevSearch = '//a[img[@alt="Previous Strip"]]'
endOfLife = True
help = 'Index format: yyyy-mm-dd'
@ -194,17 +195,11 @@ class DominicDeegan(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class DorkTower(_BasicScraper):
class DorkTower(_ParserScraper):
url = 'http://www.dorktower.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
imageSearch = compile(tagre("div", "class", "entry-content") +
"\s*<p>\s*" +
tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl,
after=' alt'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/'
imageSearch = '//div[%s]//a/img' % xpath_class('entry-content')
prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
class Dracula(_BasicScraper):

View file

@ -56,17 +56,10 @@ class EatLiver(_ParserScraper):
latestSearch = '//a[@rel="bookmark"]'
class EatThatToast(_BasicScraper):
class EatThatToast(_WordPressScraper):
url = 'http://eatthattoast.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'thewizard/'
imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after='comic-nav-base comic-nav-previous'))
textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" +
tagre("img", "alt", r'([^"]+)'))
help = 'Index Format: name'
firstStripUrl = url + 'comic/thewizard/'
textSearch = _WordPressScraper.imageSearch + '/@alt'
class EdibleDirt(_BasicScraper):
@ -225,13 +218,12 @@ class ExtraLife(_BasicScraper):
help = 'Index format: stripname'
class ExtraOrdinary(_BasicScraper):
class ExtraOrdinary(_ParserScraper):
url = 'http://www.exocomics.com/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '01'
prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl))
prevSearch = '//a[%s]' % xpath_class('prev')
imageSearch = '//img[%s]' % xpath_class('image-style-main-comic')
help = 'Index format: number'

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import indirectStarter
from .common import _ComicControlScraper
from .common import _ComicControlScraper, xpath_class
class JackCannon(_BasicScraper):
@ -53,6 +53,8 @@ class JoeAndMonkey(_BasicScraper):
class JohnnyWander(_ComicControlScraper):
imageSearch = ('//ul[%s]/li/@data-src' % xpath_class('cc-showbig'),
_ComicControlScraper.imageSearch)
url = 'http://www.johnnywander.com/'

View file

@ -257,6 +257,7 @@ class Removed(Scraper):
cls('PensAndTales/FireflyCross'),
cls('PetiteSymphony/Djandora'),
cls('PetiteSymphony/Generation17'),
cls('PunksAndNerds', 'mis'),
cls('PunksAndNerdsOld'),
cls('RedsPlanet'),
cls('SmackJeeves/Aarrevaara'),
@ -329,6 +330,7 @@ class Removed(Scraper):
cls('Stubble'),
cls('SuburbanTribe'),
cls('TheOuterQuarter'),
cls('TheParkingLotIsFull'),
cls('ThunderAndLightning'),
cls('TinyKittenTeeth'),
cls('TwoTwoOneFour'),

View file

@ -203,6 +203,7 @@ class Precocious(_ParserScraper):
prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
help = 'Index format: yyyy/mm/dd'
class PrinceOfSartar(_WordPressScraper):
url = 'http://www.princeofsartar.com/'
stripUrl = url + 'comic/%s/'
@ -219,6 +220,7 @@ class PrinceOfSartar(_WordPressScraper):
image_ext = image_url.rsplit('.', 1)[1]
return '%s.%s' % (title, image_ext)
class PS238(_ParserScraper):
url = 'http://ps238.nodwick.com/'
stripUrl = url + 'comic/%s/'
@ -227,14 +229,6 @@ class PS238(_ParserScraper):
help = 'Index format: yyyy-mm-dd'
class PunksAndNerds(_WordPressScraper):
url = 'http://www.punksandnerds.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '15'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
help = 'Index format: nnn'
class PvPonline(_BasicScraper):
url = 'http://pvponline.com/comic'
stripUrl = url + '%s'

View file

@ -9,7 +9,6 @@ from re import compile
from six.moves.urllib.parse import urljoin
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper, xpath_class
@ -80,8 +79,6 @@ class RomanticallyApocalyptic(_ParserScraper):
firstStripUrl = stripUrl % '0'
imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel')
prevSearch = '//a[@accesskey="p"]'
latestSearch = '//a[span[%s]]' % xpath_class('glyphicon-fast-forward')
starter = indirectStarter
help = 'Index format: n'
adult = True

View file

@ -298,33 +298,17 @@ class SluggyFreelance(_BasicScraper):
help = 'Index format: yymmdd'
class SMBC(_ParserScraper):
class SMBC(_ComicControlScraper):
url = 'http://www.smbc-comics.com/'
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
firstStripUrl = url + 'comic/2002-09-05'
multipleImagesPerStrip = True
imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img']
prevSearch = '//a[@class="prev"]'
help = 'Index format: nnnn'
textSearch = '//img[@id="cc-comic"]/@title'
def namer(self, image_url, page_url):
"""Remove random noise from name."""
return image_url.rsplit('-', 1)[-1]
def shouldSkipUrl(self, url, data):
"""Skip promo or missing update pages."""
return url in (
self.stripUrl % '2865',
self.stripUrl % '2653',
self.stripUrl % '2424',
self.stripUrl % '2226',
self.stripUrl % '2069',
self.stripUrl % '1895',
self.stripUrl % '1896',
self.stripUrl % '1589',
)
class SnowFlame(_WordPressScraper):
url = 'http://www.snowflamecomic.com/'
@ -375,23 +359,22 @@ class Sorcery101(_ParserScraper):
help = 'Index format: stripname'
class SpaceTrawler(_WordPressScraper):
base_url = 'http://spacetrawler.com/'
url = base_url + '2013/12/24/spacetrawler-379/'
firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
endOfLife = True
class SpaceJunkArlia(_BasicScraper):
url = 'http://spacejunkarlia.com'
stripUrl = url + '/index.php?strip_id=%s'
class SpaceJunkArlia(_ParserScraper):
url = 'http://spacejunkarlia.com/'
stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(tagre('img', 'src', r'(comics/[^"]+)'))
prevSearch = compile(tagre('a', 'href', r'(\?strip_id=\d+)') + '&lt;<')
imageSearch = '//div[%s]/img' % xpath_class('content')
prevSearch = '//a[text()="<"]'
help = 'Index format: number'
class SpaceTrawler(_ParserScraper):
url = 'https://www.baldwinpage.com/spacetrawler/'
firstStripUrl = url + '2010/01/01/spacetrawler-4/'
imageSearch = '//img[%s]' % xpath_class('size-full')
prevSearch = '//a[@rel="prev"]'
class Spamusement(_BasicScraper):
url = 'http://spamusement.com/'
rurl = escape(url)
@ -487,7 +470,7 @@ class StrongFemaleProtagonist(_ParserScraper):
stripUrl = url + '%s/'
css = True
imageSearch = 'article p img'
prevSearch = 'div.nav-previous > a'
prevSearch = 'a.page-nav__item--left'
help = 'Index format: issue-?/page-??'
def shouldSkipUrl(self, url, data):
@ -499,7 +482,7 @@ class StrongFemaleProtagonist(_ParserScraper):
self.stripUrl % 'issue-5/newspaper',
self.stripUrl % 'issue-5/hiatus-1',
self.stripUrl % 'issue-5/hiatus-2',
self.stripUrl % 'ssue-1/no-page',
self.stripUrl % 'issue-1/no-page',
)
@ -532,6 +515,7 @@ class StuffNoOneToldMe(_BasicScraper):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics', # no comic
self.stripUrl % '2012/08/self-rant', # no comic
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video

View file

@ -14,14 +14,11 @@ from .common import (_ComicControlScraper, _TumblrScraper, _WordPressScraper,
xpath_class)
class TheBrads(_BasicScraper):
url = 'http://bradcolbow.com/archive/C4/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'P125'
imageSearch = compile(tagre("img", "src", r'(http://s3\.amazonaws\.com/the_brads/the-?brads[-_][^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://bradcolbow\.com/archive/C4/[^"]+)', before="prev"))
class TheBrads(_ParserScraper):
url = 'http://bradcolbow.com/archive/'
imageSearch = '//div[%s]//img' % xpath_class('entry')
prevSearch = '//a[%s]' % xpath_class('prev')
multipleImagesPerStrip = True
help = 'Index format: a letter and a number'
class TheDevilsPanties(_BasicScraper):
@ -88,17 +85,6 @@ class TheOrderOfTheStick(_BasicScraper):
return page_url.rsplit('/', 1)[-1][:-5]
class TheParkingLotIsFull(_BasicScraper):
baseUrl = 'http://plif.courageunfettered.com/'
url = baseUrl + 'archive/arch2002.htm'
stripUrl = baseUrl + 'archive/arch%s.htm'
firstStripUrl = stripUrl % '1998'
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
multipleImagesPerStrip = True
prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
help = 'Index format: nnn'
class TheThinHLine(_TumblrScraper):
url = 'http://thinhline.tumblr.com/'
firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
@ -147,13 +133,10 @@ class ThreePanelSoul(_ComicControlScraper):
class ToonHole(_WordPressScraper):
url = 'http://toonhole.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
prevSearch = '//a[@rel="prev"]'
help = 'Index format: yyyy/mm/stripname'
firstStripUrl = url + 'comic/toon-hole-coming-soon-2010/'
def shouldSkipUrl(self, url, data):
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
return url in (self.url + "comic/if-game-of-thrones-was-animated/",)
class TracyAndTristan(_BasicScraper):

View file

@ -6,8 +6,9 @@
from __future__ import absolute_import, division, print_function
from re import compile
from ..scraper import _BasicScraper
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from .common import xpath_class
class VampireCheerleaders(_BasicScraper):
@ -51,13 +52,10 @@ class VictimsOfTheSystem(_BasicScraper):
help = 'Index format: nnn-nnn'
class ViiviJaWagner(_BasicScraper):
class ViiviJaWagner(_ParserScraper):
url = 'http://www.hs.fi/viivijawagner/'
stripUrl = None
imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)',
before="prev-cm"))
help = 'Index format: none'
imageSearch = '//div[@id="full-comic"]//img'
prevSearch = '//a[%s]' % xpath_class('prev-cm')
lang = 'fi'
def namer(self, image_url, page_url):

View file

@ -23,9 +23,9 @@ class ZapComic(_ParserScraper):
class Zapiro(_ParserScraper):
url = 'http://mg.co.za/zapiro/'
starter = bounceStarter
imageSearch = '//div[@id="cartoon_full_size"]//img'
prevSearch = '//li[@class="nav_older"]/a'
nextSearch = '//li[@class="nav_newer"]/a'
imageSearch = '//img[%s]' % xpath_class('img-fluid')
prevSearch = '//a[%s]' % xpath_class('left')
nextSearch = '//a[%s]' % xpath_class('right')
def namer(self, image_url, page_url):
parts = page_url.rsplit('/', 1)