Fix some more comic modules.
This commit is contained in:
parent
be1a63da0c
commit
a6cf4e7040
10 changed files with 50 additions and 116 deletions
|
@ -186,19 +186,6 @@ class BoredAndEvil(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class BoyOnAStickAndSlither(_BasicScraper):
|
|
||||||
url = 'http://www.boasas.com/'
|
|
||||||
stripUrl = url + 'page/%s'
|
|
||||||
firstStripUrl = stripUrl % '2'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
|
|
||||||
"<span>Next page")
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
|
||||||
return page_url.rsplit('/')[-1]
|
|
||||||
|
|
||||||
|
|
||||||
class BratHalla(_WordPressScraper):
|
class BratHalla(_WordPressScraper):
|
||||||
url = 'http://brat-halla.com/'
|
url = 'http://brat-halla.com/'
|
||||||
|
|
||||||
|
|
|
@ -126,8 +126,8 @@ class DieselSweeties(_ParserScraper):
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//img[@class="xomic"]'
|
imageSearch = '//img[@class="xomic"]'
|
||||||
prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
|
prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
|
||||||
nextSearch = '//div[@id="prev"]//a[contains(text(), "next")]'
|
latestSearch = prevSearch
|
||||||
starter = bounceStarter
|
starter = indirectStarter
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -46,19 +46,13 @@ class EasilyAmused(_WordPressScraper):
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class EatLiver(_BasicScraper):
|
class EatLiver(_ParserScraper):
|
||||||
url = 'http://www.eatliver.com/'
|
url = 'http://www.eatliver.com/'
|
||||||
rurl = escape(url)
|
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
stripUrl = url + "i.php?n=%s"
|
multipleImagesPerStrip = True
|
||||||
firstStripUrl = stripUrl % '1'
|
imageSearch = '//div[%s]//img' % xpath_class('post-content')
|
||||||
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
|
prevSearch = '//a[@rel="prev"]'
|
||||||
before="image_src"))
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
|
||||||
"<< Previous")
|
|
||||||
latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
|
|
||||||
tagre("img", "src", r'img/small/[^"]+') +
|
|
||||||
r"</a>\s*<br")
|
|
||||||
|
|
||||||
|
|
||||||
class EatThatToast(_BasicScraper):
|
class EatThatToast(_BasicScraper):
|
||||||
|
@ -185,18 +179,9 @@ class EvilDiva(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class EvilInc(_BasicScraper):
|
class EvilInc(_WordPressScraper):
|
||||||
url = 'http://evil-inc.com/'
|
url = 'http://evil-inc.com/'
|
||||||
stripUrl = url + 'comic/%s'
|
firstStripUrl = url + 'comic/monday-3/'
|
||||||
firstStripUrl = stripUrl % 'monday-3'
|
|
||||||
imageSearch = compile(
|
|
||||||
tagre("div", "id", "comic") +
|
|
||||||
r'\s*.*\s*' + # filter out the variant href tag
|
|
||||||
tagre("img", "src",
|
|
||||||
r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("span", "class", "mininav-prev") +
|
|
||||||
tagre("a", "href", r'([^"]+)'))
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class Evilish(_ParserScraper):
|
class Evilish(_ParserScraper):
|
||||||
|
|
|
@ -92,9 +92,9 @@ class Flipside(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class FonFlatter(_ParserScraper):
|
class FonFlatter(_ParserScraper):
|
||||||
url = 'http://www.fonflatter.de/'
|
url = 'https://www.fonflatter.de/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman'
|
firstStripUrl = url + '2005/09/20/01-begegnung-mit-batman/'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
imageSearch = r'//img[re:test(@src, "/fred_\d+")]'
|
imageSearch = r'//img[re:test(@src, "/fred_\d+")]'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
|
|
|
@ -52,9 +52,11 @@ class LasLindas(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class LastNerdsOnEarth(_ParserScraper):
|
class LastNerdsOnEarth(_ParserScraper):
|
||||||
url = 'http://www.lastnerdsonearth.com/latest/'
|
baseUrl = 'http://www.lastnerdsonearth.com/'
|
||||||
|
url = baseUrl + 'latest/'
|
||||||
|
firstStripUrl = baseUrl + 'ch1p1'
|
||||||
imageSearch = '//div[@id="content"]/a/img'
|
imageSearch = '//div[@id="content"]/a/img'
|
||||||
prevSearch = '//div[@id="comicnav"]/a[last()-2]'
|
prevSearch = '//div[@id="comicnav"]/a[img[contains(@src, "nav-prev")]]'
|
||||||
|
|
||||||
|
|
||||||
class LeastICouldDo(_BasicScraper):
|
class LeastICouldDo(_BasicScraper):
|
||||||
|
@ -97,7 +99,7 @@ class LoFiJinks(_WPNaviIn):
|
||||||
|
|
||||||
|
|
||||||
class LookingForGroup(_ParserScraper):
|
class LookingForGroup(_ParserScraper):
|
||||||
url = 'http://www.lfgcomic.com/'
|
url = 'http://www.lfg.co/'
|
||||||
stripUrl = url + 'page/%s/'
|
stripUrl = url + 'page/%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
css = True
|
css = True
|
||||||
|
|
|
@ -93,6 +93,7 @@ class Optipess(_WordPressScraper):
|
||||||
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
|
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
|
||||||
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
||||||
textSearch = '//div[@id="comic"]//img/@alt'
|
textSearch = '//div[@id="comic"]//img/@alt'
|
||||||
|
textOptional = True
|
||||||
|
|
||||||
|
|
||||||
class OurHomePlanet(_BasicScraper):
|
class OurHomePlanet(_BasicScraper):
|
||||||
|
|
|
@ -117,7 +117,9 @@ class PHDComics(_ParserScraper):
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return url in (
|
return url in (
|
||||||
self.stripUrl % '1669', # video
|
# video
|
||||||
|
self.stripUrl % '1880',
|
||||||
|
self.stripUrl % '1669',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,15 +127,9 @@ class Picklewhistle(_ComicControlScraper):
|
||||||
url = 'http://www.picklewhistle.com/'
|
url = 'http://www.picklewhistle.com/'
|
||||||
|
|
||||||
|
|
||||||
class PicPakDog(_BasicScraper):
|
class PicPakDog(_WordPressScraper):
|
||||||
url = 'http://www.picpak.net/'
|
url = 'http://www.picpak.net/'
|
||||||
rurl = escape(url)
|
firstStripUrl = url + 'comic/dogs-cant-spell/'
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'dogs-cant-spell'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
|
|
||||||
after="nav-prev"))
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
# Keep, because naming is different to PHDComics...
|
# Keep, because naming is different to PHDComics...
|
||||||
|
@ -199,14 +195,12 @@ class PoorlyDrawnLines(_BasicScraper):
|
||||||
help = 'Index Format: name'
|
help = 'Index Format: name'
|
||||||
|
|
||||||
|
|
||||||
class Precocious(_BasicScraper):
|
class Precocious(_ParserScraper):
|
||||||
url = 'http://www.precociouscomic.com/'
|
url = 'http://www.precociouscomic.com/'
|
||||||
starter = indirectStarter
|
|
||||||
stripUrl = url + 'archive/comic/%s'
|
stripUrl = url + 'archive/comic/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
|
firstStripUrl = stripUrl % '2009/03/09'
|
||||||
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
imageSearch = '//img[contains(@src, "/comics/")]'
|
||||||
latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
|
prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
|
||||||
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -145,7 +145,7 @@ class ThreePanelSoul(_ComicControlScraper):
|
||||||
|
|
||||||
|
|
||||||
class ToonHole(_WordPressScraper):
|
class ToonHole(_WordPressScraper):
|
||||||
url = 'http://www.toonhole.com/'
|
url = 'http://toonhole.com/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
|
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
|
|
|
@ -7,20 +7,15 @@ from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from .common import _ComicControlScraper, _WordPressScraper
|
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
|
||||||
|
|
||||||
|
|
||||||
class WapsiSquare(_BasicScraper):
|
class WapsiSquare(_WordPressScraper):
|
||||||
url = 'http://wapsisquare.com/'
|
url = 'http://wapsisquare.com/'
|
||||||
rurl = escape(url)
|
firstStripUrl = url + 'comic/09092001/'
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = stripUrl % '09092001'
|
|
||||||
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
|
|
||||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class WastedTalent(_BasicScraper):
|
class WastedTalent(_BasicScraper):
|
||||||
|
@ -33,15 +28,6 @@ class WastedTalent(_BasicScraper):
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
class WayfarersMoon(_BasicScraper):
|
|
||||||
url = 'http://www.wayfarersmoon.com/'
|
|
||||||
stripUrl = url + 'index.php?page=%s'
|
|
||||||
firstStripUrl = stripUrl % '0'
|
|
||||||
imageSearch = compile(r'<img src="(/admin.+?)"')
|
|
||||||
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
|
|
||||||
help = 'Index format: nn'
|
|
||||||
|
|
||||||
|
|
||||||
class WebDesignerCOTW(_BasicScraper):
|
class WebDesignerCOTW(_BasicScraper):
|
||||||
url = 'http://www.webdesignerdepot.com/'
|
url = 'http://www.webdesignerdepot.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
@ -91,23 +77,10 @@ class Weregeek(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
class WhiteNinja(_BasicScraper):
|
class WhiteNoise(_WordPressScraper):
|
||||||
baseUrl = 'http://www.whiteninjacomics.com/'
|
url = 'http://whitenoisecomic.com/'
|
||||||
url = baseUrl + 'comics.shtml'
|
firstStripUrl = url + 'comic/book-one/'
|
||||||
stripUrl = baseUrl + 'comics/%s.shtml'
|
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
|
||||||
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
|
|
||||||
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
|
|
||||||
help = 'Index format: s (comic name)'
|
|
||||||
|
|
||||||
|
|
||||||
class WhiteNoise(_BasicScraper):
|
|
||||||
baseUrl = 'http://www.wncomic.com/'
|
|
||||||
url = baseUrl + 'archive.php'
|
|
||||||
stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
|
||||||
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
|
|
||||||
help = 'Index format: n'
|
|
||||||
|
|
||||||
|
|
||||||
class Whomp(_ComicControlScraper):
|
class Whomp(_ComicControlScraper):
|
||||||
|
@ -129,13 +102,13 @@ class WhyTheLongFace(_BasicScraper):
|
||||||
help = 'Index format: yyyymm'
|
help = 'Index format: yyyymm'
|
||||||
|
|
||||||
|
|
||||||
class Wigu(_BasicScraper):
|
class Wigu(_ParserScraper):
|
||||||
url = 'http://wigucomics.com/'
|
stripUrl = 'http://www.wigucomics.com/adventures/index.php?comic=%s'
|
||||||
stripUrl = url + 'oc/index.php?comic=%s'
|
url = stripUrl % '-1'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
|
imageSearch = '//div[@id="comic"]//img[contains(@src, "/comics/")]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
|
prevSearch = '//a[@alt="go back"]'
|
||||||
after="go back"))
|
endOfLife = True
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
@ -164,7 +137,6 @@ class Wondermark(_BasicScraper):
|
||||||
class WorldOfMrToast(_BasicScraper):
|
class WorldOfMrToast(_BasicScraper):
|
||||||
baseUrl = 'http://www.theimaginaryworld.com/'
|
baseUrl = 'http://www.theimaginaryworld.com/'
|
||||||
url = baseUrl + 'mrTcomicA.html'
|
url = baseUrl + 'mrTcomicA.html'
|
||||||
stripUrl = baseUrl + '%s.html'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
|
||||||
# list the archive links since there is no prev/next navigation
|
# list the archive links since there is no prev/next navigation
|
||||||
prevurls = (
|
prevurls = (
|
||||||
|
@ -185,9 +157,9 @@ class WorldOfMrToast(_BasicScraper):
|
||||||
)
|
)
|
||||||
firstStripUrl = prevurls[-1]
|
firstStripUrl = prevurls[-1]
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: none'
|
endOfLife = True
|
||||||
|
|
||||||
def getPrevUrl(self, url, data, baseUrl):
|
def getPrevUrl(self, url, data):
|
||||||
idx = self.prevurls.index(url)
|
idx = self.prevurls.index(url)
|
||||||
try:
|
try:
|
||||||
return self.prevurls[idx + 1]
|
return self.prevurls[idx + 1]
|
||||||
|
|
|
@ -20,23 +20,16 @@ class ZapComic(_ParserScraper):
|
||||||
prevSearch = 'a.previous-comic-link'
|
prevSearch = 'a.previous-comic-link'
|
||||||
|
|
||||||
|
|
||||||
class Zapiro(_BasicScraper):
|
class Zapiro(_ParserScraper):
|
||||||
url = 'http://www.mg.co.za/zapiro/'
|
url = 'http://mg.co.za/zapiro/'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
stripUrl = 'http://mg.co.za/cartoon/%s'
|
imageSearch = '//div[@id="cartoon_full_size"]//img'
|
||||||
firstStripUrl = stripUrl % 'zapiro_681'
|
prevSearch = '//li[@class="nav_older"]/a'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
nextSearch = '//li[@class="nav_newer"]/a'
|
||||||
prevSearch = compile(tagre("li", "class", r'nav_older') +
|
|
||||||
tagre("a", "href",
|
|
||||||
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
|
||||||
nextSearch = compile(tagre("li", "class", r'nav_older') +
|
|
||||||
tagre("a", "href",
|
|
||||||
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
|
||||||
help = 'Index format: yyyy-mm-dd-stripname'
|
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
name = image_url.split('/')[-3]
|
parts = page_url.rsplit('/', 1)
|
||||||
return name
|
return parts[1]
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(_WordPressScraper):
|
class ZenPencils(_WordPressScraper):
|
||||||
|
|
Loading…
Reference in a new issue