Fix a bunch of comic modules.
This commit is contained in:
parent
446b81fc45
commit
47e2502ec7
10 changed files with 52 additions and 105 deletions
|
@ -167,11 +167,12 @@ class DMFA(_BasicScraper):
|
|||
help = 'Index format: nnn (normally, some specials)'
|
||||
|
||||
|
||||
class DoemainOfOurOwn(_BasicScraper):
|
||||
class DoemainOfOurOwn(_ParserScraper):
|
||||
url = 'http://www.doemain.com/'
|
||||
stripUrl = url + 'index.cgi/%s'
|
||||
imageSearch = compile(r"<img border='0' width='\d+' height='\d+' src='(/strips/\d{4}/\d{6}-[^\']+)'")
|
||||
prevSearch = compile(r'<a href="(/index\.cgi/\d{4}-\d{2}-\d{2})"><img width="\d+" height="\d+" border="\d+" alt="Previous Strip"')
|
||||
imageSearch = '//td/img[contains(@src, "/strips/")]'
|
||||
prevSearch = '//a[img[@alt="Previous Strip"]]'
|
||||
endOfLife = True
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
@ -194,17 +195,11 @@ class DominicDeegan(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class DorkTower(_BasicScraper):
|
||||
class DorkTower(_ParserScraper):
|
||||
url = 'http://www.dorktower.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
|
||||
imageSearch = compile(tagre("div", "class", "entry-content") +
|
||||
"\s*<p>\s*" +
|
||||
tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl,
|
||||
after=' alt'))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous")
|
||||
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
|
||||
firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/'
|
||||
imageSearch = '//div[%s]//a/img' % xpath_class('entry-content')
|
||||
prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
|
||||
|
||||
|
||||
class Dracula(_BasicScraper):
|
||||
|
|
|
@ -56,17 +56,10 @@ class EatLiver(_ParserScraper):
|
|||
latestSearch = '//a[@rel="bookmark"]'
|
||||
|
||||
|
||||
class EatThatToast(_BasicScraper):
|
||||
class EatThatToast(_WordPressScraper):
|
||||
url = 'http://eatthattoast.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % 'thewizard/'
|
||||
imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after='comic-nav-base comic-nav-previous'))
|
||||
textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" +
|
||||
tagre("img", "alt", r'([^"]+)'))
|
||||
help = 'Index Format: name'
|
||||
firstStripUrl = url + 'comic/thewizard/'
|
||||
textSearch = _WordPressScraper.imageSearch + '/@alt'
|
||||
|
||||
|
||||
class EdibleDirt(_BasicScraper):
|
||||
|
@ -225,13 +218,12 @@ class ExtraLife(_BasicScraper):
|
|||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class ExtraOrdinary(_BasicScraper):
|
||||
class ExtraOrdinary(_ParserScraper):
|
||||
url = 'http://www.exocomics.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '01'
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev"))
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl))
|
||||
prevSearch = '//a[%s]' % xpath_class('prev')
|
||||
imageSearch = '//img[%s]' % xpath_class('image-style-main-comic')
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import indirectStarter
|
||||
from .common import _ComicControlScraper
|
||||
from .common import _ComicControlScraper, xpath_class
|
||||
|
||||
|
||||
class JackCannon(_BasicScraper):
|
||||
|
@ -53,6 +53,8 @@ class JoeAndMonkey(_BasicScraper):
|
|||
|
||||
|
||||
class JohnnyWander(_ComicControlScraper):
|
||||
imageSearch = ('//ul[%s]/li/@data-src' % xpath_class('cc-showbig'),
|
||||
_ComicControlScraper.imageSearch)
|
||||
url = 'http://www.johnnywander.com/'
|
||||
|
||||
|
||||
|
|
|
@ -257,6 +257,7 @@ class Removed(Scraper):
|
|||
cls('PensAndTales/FireflyCross'),
|
||||
cls('PetiteSymphony/Djandora'),
|
||||
cls('PetiteSymphony/Generation17'),
|
||||
cls('PunksAndNerds', 'mis'),
|
||||
cls('PunksAndNerdsOld'),
|
||||
cls('RedsPlanet'),
|
||||
cls('SmackJeeves/Aarrevaara'),
|
||||
|
@ -329,6 +330,7 @@ class Removed(Scraper):
|
|||
cls('Stubble'),
|
||||
cls('SuburbanTribe'),
|
||||
cls('TheOuterQuarter'),
|
||||
cls('TheParkingLotIsFull'),
|
||||
cls('ThunderAndLightning'),
|
||||
cls('TinyKittenTeeth'),
|
||||
cls('TwoTwoOneFour'),
|
||||
|
|
|
@ -203,6 +203,7 @@ class Precocious(_ParserScraper):
|
|||
prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
|
||||
class PrinceOfSartar(_WordPressScraper):
|
||||
url = 'http://www.princeofsartar.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
|
@ -219,6 +220,7 @@ class PrinceOfSartar(_WordPressScraper):
|
|||
image_ext = image_url.rsplit('.', 1)[1]
|
||||
return '%s.%s' % (title, image_ext)
|
||||
|
||||
|
||||
class PS238(_ParserScraper):
|
||||
url = 'http://ps238.nodwick.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
|
@ -227,14 +229,6 @@ class PS238(_ParserScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class PunksAndNerds(_WordPressScraper):
|
||||
url = 'http://www.punksandnerds.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '15'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class PvPonline(_BasicScraper):
|
||||
url = 'http://pvponline.com/comic'
|
||||
stripUrl = url + '%s'
|
||||
|
|
|
@ -9,7 +9,6 @@ from re import compile
|
|||
from six.moves.urllib.parse import urljoin
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _WordPressScraper, xpath_class
|
||||
|
||||
|
@ -80,8 +79,6 @@ class RomanticallyApocalyptic(_ParserScraper):
|
|||
firstStripUrl = stripUrl % '0'
|
||||
imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel')
|
||||
prevSearch = '//a[@accesskey="p"]'
|
||||
latestSearch = '//a[span[%s]]' % xpath_class('glyphicon-fast-forward')
|
||||
starter = indirectStarter
|
||||
help = 'Index format: n'
|
||||
adult = True
|
||||
|
||||
|
|
|
@ -298,33 +298,17 @@ class SluggyFreelance(_BasicScraper):
|
|||
help = 'Index format: yymmdd'
|
||||
|
||||
|
||||
class SMBC(_ParserScraper):
|
||||
class SMBC(_ComicControlScraper):
|
||||
url = 'http://www.smbc-comics.com/'
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
firstStripUrl = url + 'comic/2002-09-05'
|
||||
multipleImagesPerStrip = True
|
||||
imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img']
|
||||
prevSearch = '//a[@class="prev"]'
|
||||
help = 'Index format: nnnn'
|
||||
textSearch = '//img[@id="cc-comic"]/@title'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
"""Remove random noise from name."""
|
||||
return image_url.rsplit('-', 1)[-1]
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip promo or missing update pages."""
|
||||
return url in (
|
||||
self.stripUrl % '2865',
|
||||
self.stripUrl % '2653',
|
||||
self.stripUrl % '2424',
|
||||
self.stripUrl % '2226',
|
||||
self.stripUrl % '2069',
|
||||
self.stripUrl % '1895',
|
||||
self.stripUrl % '1896',
|
||||
self.stripUrl % '1589',
|
||||
)
|
||||
|
||||
|
||||
class SnowFlame(_WordPressScraper):
|
||||
url = 'http://www.snowflamecomic.com/'
|
||||
|
@ -375,23 +359,22 @@ class Sorcery101(_ParserScraper):
|
|||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class SpaceTrawler(_WordPressScraper):
|
||||
base_url = 'http://spacetrawler.com/'
|
||||
url = base_url + '2013/12/24/spacetrawler-379/'
|
||||
firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
|
||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class SpaceJunkArlia(_BasicScraper):
|
||||
url = 'http://spacejunkarlia.com'
|
||||
stripUrl = url + '/index.php?strip_id=%s'
|
||||
class SpaceJunkArlia(_ParserScraper):
|
||||
url = 'http://spacejunkarlia.com/'
|
||||
stripUrl = url + '?strip_id=%s'
|
||||
firstStripUrl = stripUrl % '0'
|
||||
imageSearch = compile(tagre('img', 'src', r'(comics/[^"]+)'))
|
||||
prevSearch = compile(tagre('a', 'href', r'(\?strip_id=\d+)') + '<<')
|
||||
imageSearch = '//div[%s]/img' % xpath_class('content')
|
||||
prevSearch = '//a[text()="<"]'
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class SpaceTrawler(_ParserScraper):
|
||||
url = 'https://www.baldwinpage.com/spacetrawler/'
|
||||
firstStripUrl = url + '2010/01/01/spacetrawler-4/'
|
||||
imageSearch = '//img[%s]' % xpath_class('size-full')
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
||||
|
||||
class Spamusement(_BasicScraper):
|
||||
url = 'http://spamusement.com/'
|
||||
rurl = escape(url)
|
||||
|
@ -487,7 +470,7 @@ class StrongFemaleProtagonist(_ParserScraper):
|
|||
stripUrl = url + '%s/'
|
||||
css = True
|
||||
imageSearch = 'article p img'
|
||||
prevSearch = 'div.nav-previous > a'
|
||||
prevSearch = 'a.page-nav__item--left'
|
||||
help = 'Index format: issue-?/page-??'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
@ -499,7 +482,7 @@ class StrongFemaleProtagonist(_ParserScraper):
|
|||
self.stripUrl % 'issue-5/newspaper',
|
||||
self.stripUrl % 'issue-5/hiatus-1',
|
||||
self.stripUrl % 'issue-5/hiatus-2',
|
||||
self.stripUrl % 'ssue-1/no-page',
|
||||
self.stripUrl % 'issue-1/no-page',
|
||||
)
|
||||
|
||||
|
||||
|
@ -532,6 +515,7 @@ class StuffNoOneToldMe(_BasicScraper):
|
|||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return url in (
|
||||
self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics', # no comic
|
||||
self.stripUrl % '2012/08/self-rant', # no comic
|
||||
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
|
||||
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
|
||||
|
|
|
@ -14,14 +14,11 @@ from .common import (_ComicControlScraper, _TumblrScraper, _WordPressScraper,
|
|||
xpath_class)
|
||||
|
||||
|
||||
class TheBrads(_BasicScraper):
|
||||
url = 'http://bradcolbow.com/archive/C4/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'P125'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://s3\.amazonaws\.com/the_brads/the-?brads[-_][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://bradcolbow\.com/archive/C4/[^"]+)', before="prev"))
|
||||
class TheBrads(_ParserScraper):
|
||||
url = 'http://bradcolbow.com/archive/'
|
||||
imageSearch = '//div[%s]//img' % xpath_class('entry')
|
||||
prevSearch = '//a[%s]' % xpath_class('prev')
|
||||
multipleImagesPerStrip = True
|
||||
help = 'Index format: a letter and a number'
|
||||
|
||||
|
||||
class TheDevilsPanties(_BasicScraper):
|
||||
|
@ -88,17 +85,6 @@ class TheOrderOfTheStick(_BasicScraper):
|
|||
return page_url.rsplit('/', 1)[-1][:-5]
|
||||
|
||||
|
||||
class TheParkingLotIsFull(_BasicScraper):
|
||||
baseUrl = 'http://plif.courageunfettered.com/'
|
||||
url = baseUrl + 'archive/arch2002.htm'
|
||||
stripUrl = baseUrl + 'archive/arch%s.htm'
|
||||
firstStripUrl = stripUrl % '1998'
|
||||
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class TheThinHLine(_TumblrScraper):
|
||||
url = 'http://thinhline.tumblr.com/'
|
||||
firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
|
||||
|
@ -147,13 +133,10 @@ class ThreePanelSoul(_ComicControlScraper):
|
|||
|
||||
class ToonHole(_WordPressScraper):
|
||||
url = 'http://toonhole.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
help = 'Index format: yyyy/mm/stripname'
|
||||
firstStripUrl = url + 'comic/toon-hole-coming-soon-2010/'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
|
||||
return url in (self.url + "comic/if-game-of-thrones-was-animated/",)
|
||||
|
||||
|
||||
class TracyAndTristan(_BasicScraper):
|
||||
|
|
|
@ -6,8 +6,9 @@
|
|||
from __future__ import absolute_import, division, print_function
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from .common import xpath_class
|
||||
|
||||
|
||||
class VampireCheerleaders(_BasicScraper):
|
||||
|
@ -51,13 +52,10 @@ class VictimsOfTheSystem(_BasicScraper):
|
|||
help = 'Index format: nnn-nnn'
|
||||
|
||||
|
||||
class ViiviJaWagner(_BasicScraper):
|
||||
class ViiviJaWagner(_ParserScraper):
|
||||
url = 'http://www.hs.fi/viivijawagner/'
|
||||
stripUrl = None
|
||||
imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)',
|
||||
before="prev-cm"))
|
||||
help = 'Index format: none'
|
||||
imageSearch = '//div[@id="full-comic"]//img'
|
||||
prevSearch = '//a[%s]' % xpath_class('prev-cm')
|
||||
lang = 'fi'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
|
|
|
@ -23,9 +23,9 @@ class ZapComic(_ParserScraper):
|
|||
class Zapiro(_ParserScraper):
|
||||
url = 'http://mg.co.za/zapiro/'
|
||||
starter = bounceStarter
|
||||
imageSearch = '//div[@id="cartoon_full_size"]//img'
|
||||
prevSearch = '//li[@class="nav_older"]/a'
|
||||
nextSearch = '//li[@class="nav_newer"]/a'
|
||||
imageSearch = '//img[%s]' % xpath_class('img-fluid')
|
||||
prevSearch = '//a[%s]' % xpath_class('left')
|
||||
nextSearch = '//a[%s]' % xpath_class('right')
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
parts = page_url.rsplit('/', 1)
|
||||
|
|
Loading…
Reference in a new issue