Fix some more comic modules.

This commit is contained in:
Tobias Gruetzmacher 2016-05-16 23:16:29 +02:00
parent be1a63da0c
commit a6cf4e7040
10 changed files with 50 additions and 116 deletions

View file

@ -186,19 +186,6 @@ class BoredAndEvil(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
"<span>Next page")
help = 'Index format: n (unpadded)'
def namer(self, image_url, page_url):
return page_url.rsplit('/')[-1]
class BratHalla(_WordPressScraper): class BratHalla(_WordPressScraper):
url = 'http://brat-halla.com/' url = 'http://brat-halla.com/'

View file

@ -126,8 +126,8 @@ class DieselSweeties(_ParserScraper):
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = '//img[@class="xomic"]' imageSearch = '//img[@class="xomic"]'
prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]' prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
nextSearch = '//div[@id="prev"]//a[contains(text(), "next")]' latestSearch = prevSearch
starter = bounceStarter starter = indirectStarter
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'

View file

@ -46,19 +46,13 @@ class EasilyAmused(_WordPressScraper):
starter = indirectStarter starter = indirectStarter
class EatLiver(_BasicScraper): class EatLiver(_ParserScraper):
url = 'http://www.eatliver.com/' url = 'http://www.eatliver.com/'
rurl = escape(url)
starter = indirectStarter starter = indirectStarter
stripUrl = url + "i.php?n=%s" multipleImagesPerStrip = True
firstStripUrl = stripUrl % '1' imageSearch = '//div[%s]//img' % xpath_class('post-content')
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl, prevSearch = '//a[@rel="prev"]'
before="image_src")) latestSearch = '//a[@rel="bookmark"]'
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
"&#060;&#060; Previous")
latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
tagre("img", "src", r'img/small/[^"]+') +
r"</a>\s*<br")
class EatThatToast(_BasicScraper): class EatThatToast(_BasicScraper):
@ -185,18 +179,9 @@ class EvilDiva(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class EvilInc(_BasicScraper): class EvilInc(_WordPressScraper):
url = 'http://evil-inc.com/' url = 'http://evil-inc.com/'
stripUrl = url + 'comic/%s' firstStripUrl = url + 'comic/monday-3/'
firstStripUrl = stripUrl % 'monday-3'
imageSearch = compile(
tagre("div", "id", "comic") +
r'\s*.*\s*' + # filter out the variant href tag
tagre("img", "src",
r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'([^"]+)'))
help = 'Index format: stripname'
class Evilish(_ParserScraper): class Evilish(_ParserScraper):

View file

@ -92,9 +92,9 @@ class Flipside(_BasicScraper):
class FonFlatter(_ParserScraper): class FonFlatter(_ParserScraper):
url = 'http://www.fonflatter.de/' url = 'https://www.fonflatter.de/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman' firstStripUrl = url + '2005/09/20/01-begegnung-mit-batman/'
lang = 'de' lang = 'de'
imageSearch = r'//img[re:test(@src, "/fred_\d+")]' imageSearch = r'//img[re:test(@src, "/fred_\d+")]'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'

View file

@ -52,9 +52,11 @@ class LasLindas(_BasicScraper):
class LastNerdsOnEarth(_ParserScraper): class LastNerdsOnEarth(_ParserScraper):
url = 'http://www.lastnerdsonearth.com/latest/' baseUrl = 'http://www.lastnerdsonearth.com/'
url = baseUrl + 'latest/'
firstStripUrl = baseUrl + 'ch1p1'
imageSearch = '//div[@id="content"]/a/img' imageSearch = '//div[@id="content"]/a/img'
prevSearch = '//div[@id="comicnav"]/a[last()-2]' prevSearch = '//div[@id="comicnav"]/a[img[contains(@src, "nav-prev")]]'
class LeastICouldDo(_BasicScraper): class LeastICouldDo(_BasicScraper):
@ -97,7 +99,7 @@ class LoFiJinks(_WPNaviIn):
class LookingForGroup(_ParserScraper): class LookingForGroup(_ParserScraper):
url = 'http://www.lfgcomic.com/' url = 'http://www.lfg.co/'
stripUrl = url + 'page/%s/' stripUrl = url + 'page/%s/'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
css = True css = True

View file

@ -93,6 +93,7 @@ class Optipess(_WordPressScraper):
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/' firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
prevSearch = '//a[%s]' % xpath_class('navi-prev') prevSearch = '//a[%s]' % xpath_class('navi-prev')
textSearch = '//div[@id="comic"]//img/@alt' textSearch = '//div[@id="comic"]//img/@alt'
textOptional = True
class OurHomePlanet(_BasicScraper): class OurHomePlanet(_BasicScraper):

View file

@ -117,7 +117,9 @@ class PHDComics(_ParserScraper):
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return url in (
self.stripUrl % '1669', # video # video
self.stripUrl % '1880',
self.stripUrl % '1669',
) )
@ -125,15 +127,9 @@ class Picklewhistle(_ComicControlScraper):
url = 'http://www.picklewhistle.com/' url = 'http://www.picklewhistle.com/'
class PicPakDog(_BasicScraper): class PicPakDog(_WordPressScraper):
url = 'http://www.picpak.net/' url = 'http://www.picpak.net/'
rurl = escape(url) firstStripUrl = url + 'comic/dogs-cant-spell/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'dogs-cant-spell'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
after="nav-prev"))
help = 'Index format: stripname'
# Keep, because naming is different to PHDComics... # Keep, because naming is different to PHDComics...
@ -199,14 +195,12 @@ class PoorlyDrawnLines(_BasicScraper):
help = 'Index Format: name' help = 'Index Format: name'
class Precocious(_BasicScraper): class Precocious(_ParserScraper):
url = 'http://www.precociouscomic.com/' url = 'http://www.precociouscomic.com/'
starter = indirectStarter
stripUrl = url + 'archive/comic/%s' stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))')) firstStripUrl = stripUrl % '2009/03/09'
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png")) imageSearch = '//img[contains(@src, "/comics/")]'
latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
help = 'Index format: yyyy/mm/dd' help = 'Index format: yyyy/mm/dd'

View file

@ -145,7 +145,7 @@ class ThreePanelSoul(_ComicControlScraper):
class ToonHole(_WordPressScraper): class ToonHole(_WordPressScraper):
url = 'http://www.toonhole.com/' url = 'http://toonhole.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010' firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'

View file

@ -7,20 +7,15 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre from ..util import tagre
from ..helpers import indirectStarter from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class WapsiSquare(_BasicScraper): class WapsiSquare(_WordPressScraper):
url = 'http://wapsisquare.com/' url = 'http://wapsisquare.com/'
rurl = escape(url) firstStripUrl = url + 'comic/09092001/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '09092001'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: stripname'
class WastedTalent(_BasicScraper): class WastedTalent(_BasicScraper):
@ -33,15 +28,6 @@ class WastedTalent(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class WayfarersMoon(_BasicScraper):
url = 'http://www.wayfarersmoon.com/'
stripUrl = url + 'index.php?page=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
class WebDesignerCOTW(_BasicScraper): class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/' url = 'http://www.webdesignerdepot.com/'
rurl = escape(url) rurl = escape(url)
@ -91,23 +77,10 @@ class Weregeek(_BasicScraper):
help = 'Index format: yyyy/mm/dd' help = 'Index format: yyyy/mm/dd'
class WhiteNinja(_BasicScraper): class WhiteNoise(_WordPressScraper):
baseUrl = 'http://www.whiteninjacomics.com/' url = 'http://whitenoisecomic.com/'
url = baseUrl + 'comics.shtml' firstStripUrl = url + 'comic/book-one/'
stripUrl = baseUrl + 'comics/%s.shtml' prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhiteNoise(_BasicScraper):
baseUrl = 'http://www.wncomic.com/'
url = baseUrl + 'archive.php'
stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'
class Whomp(_ComicControlScraper): class Whomp(_ComicControlScraper):
@ -129,13 +102,13 @@ class WhyTheLongFace(_BasicScraper):
help = 'Index format: yyyymm' help = 'Index format: yyyymm'
class Wigu(_BasicScraper): class Wigu(_ParserScraper):
url = 'http://wigucomics.com/' stripUrl = 'http://www.wigucomics.com/adventures/index.php?comic=%s'
stripUrl = url + 'oc/index.php?comic=%s' url = stripUrl % '-1'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)')) imageSearch = '//div[@id="comic"]//img[contains(@src, "/comics/")]'
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', prevSearch = '//a[@alt="go back"]'
after="go back")) endOfLife = True
help = 'Index format: n' help = 'Index format: n'
@ -164,7 +137,6 @@ class Wondermark(_BasicScraper):
class WorldOfMrToast(_BasicScraper): class WorldOfMrToast(_BasicScraper):
baseUrl = 'http://www.theimaginaryworld.com/' baseUrl = 'http://www.theimaginaryworld.com/'
url = baseUrl + 'mrTcomicA.html' url = baseUrl + 'mrTcomicA.html'
stripUrl = baseUrl + '%s.html'
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
# list the archive links since there is no prev/next navigation # list the archive links since there is no prev/next navigation
prevurls = ( prevurls = (
@ -185,9 +157,9 @@ class WorldOfMrToast(_BasicScraper):
) )
firstStripUrl = prevurls[-1] firstStripUrl = prevurls[-1]
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: none' endOfLife = True
def getPrevUrl(self, url, data, baseUrl): def getPrevUrl(self, url, data):
idx = self.prevurls.index(url) idx = self.prevurls.index(url)
try: try:
return self.prevurls[idx + 1] return self.prevurls[idx + 1]

View file

@ -20,23 +20,16 @@ class ZapComic(_ParserScraper):
prevSearch = 'a.previous-comic-link' prevSearch = 'a.previous-comic-link'
class Zapiro(_BasicScraper): class Zapiro(_ParserScraper):
url = 'http://www.mg.co.za/zapiro/' url = 'http://mg.co.za/zapiro/'
starter = bounceStarter starter = bounceStarter
stripUrl = 'http://mg.co.za/cartoon/%s' imageSearch = '//div[@id="cartoon_full_size"]//img'
firstStripUrl = stripUrl % 'zapiro_681' prevSearch = '//li[@class="nav_older"]/a'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)')) nextSearch = '//li[@class="nav_newer"]/a'
prevSearch = compile(tagre("li", "class", r'nav_older') +
tagre("a", "href",
r'(http://mg\.co\.za/cartoon/[^"]+)'))
nextSearch = compile(tagre("li", "class", r'nav_older') +
tagre("a", "href",
r'(http://mg\.co\.za/cartoon/[^"]+)'))
help = 'Index format: yyyy-mm-dd-stripname'
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
name = image_url.split('/')[-3] parts = page_url.rsplit('/', 1)
return name return parts[1]
class ZenPencils(_WordPressScraper): class ZenPencils(_WordPressScraper):