Fix some more comic modules.

This commit is contained in:
Tobias Gruetzmacher 2016-05-16 23:16:29 +02:00
parent be1a63da0c
commit a6cf4e7040
10 changed files with 50 additions and 116 deletions

View file

@ -186,19 +186,6 @@ class BoredAndEvil(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
"<span>Next page")
help = 'Index format: n (unpadded)'
def namer(self, image_url, page_url):
return page_url.rsplit('/')[-1]
class BratHalla(_WordPressScraper):
url = 'http://brat-halla.com/'

View file

@ -126,8 +126,8 @@ class DieselSweeties(_ParserScraper):
firstStripUrl = stripUrl % '1'
imageSearch = '//img[@class="xomic"]'
prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
nextSearch = '//div[@id="prev"]//a[contains(text(), "next")]'
starter = bounceStarter
latestSearch = prevSearch
starter = indirectStarter
help = 'Index format: n (unpadded)'

View file

@ -46,19 +46,13 @@ class EasilyAmused(_WordPressScraper):
starter = indirectStarter
class EatLiver(_BasicScraper):
class EatLiver(_ParserScraper):
url = 'http://www.eatliver.com/'
rurl = escape(url)
starter = indirectStarter
stripUrl = url + "i.php?n=%s"
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
before="image_src"))
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
"&#060;&#060; Previous")
latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
tagre("img", "src", r'img/small/[^"]+') +
r"</a>\s*<br")
multipleImagesPerStrip = True
imageSearch = '//div[%s]//img' % xpath_class('post-content')
prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[@rel="bookmark"]'
class EatThatToast(_BasicScraper):
@ -185,18 +179,9 @@ class EvilDiva(_BasicScraper):
help = 'Index format: n (unpadded)'
class EvilInc(_BasicScraper):
class EvilInc(_WordPressScraper):
url = 'http://evil-inc.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'monday-3'
imageSearch = compile(
tagre("div", "id", "comic") +
r'\s*.*\s*' + # filter out the variant href tag
tagre("img", "src",
r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'([^"]+)'))
help = 'Index format: stripname'
firstStripUrl = url + 'comic/monday-3/'
class Evilish(_ParserScraper):

View file

@ -92,9 +92,9 @@ class Flipside(_BasicScraper):
class FonFlatter(_ParserScraper):
url = 'http://www.fonflatter.de/'
url = 'https://www.fonflatter.de/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman'
firstStripUrl = url + '2005/09/20/01-begegnung-mit-batman/'
lang = 'de'
imageSearch = r'//img[re:test(@src, "/fred_\d+")]'
prevSearch = '//a[@rel="prev"]'

View file

@ -52,9 +52,11 @@ class LasLindas(_BasicScraper):
class LastNerdsOnEarth(_ParserScraper):
url = 'http://www.lastnerdsonearth.com/latest/'
baseUrl = 'http://www.lastnerdsonearth.com/'
url = baseUrl + 'latest/'
firstStripUrl = baseUrl + 'ch1p1'
imageSearch = '//div[@id="content"]/a/img'
prevSearch = '//div[@id="comicnav"]/a[last()-2]'
prevSearch = '//div[@id="comicnav"]/a[img[contains(@src, "nav-prev")]]'
class LeastICouldDo(_BasicScraper):
@ -97,7 +99,7 @@ class LoFiJinks(_WPNaviIn):
class LookingForGroup(_ParserScraper):
url = 'http://www.lfgcomic.com/'
url = 'http://www.lfg.co/'
stripUrl = url + 'page/%s/'
firstStripUrl = stripUrl % '1'
css = True

View file

@ -93,6 +93,7 @@ class Optipess(_WordPressScraper):
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
prevSearch = '//a[%s]' % xpath_class('navi-prev')
textSearch = '//div[@id="comic"]//img/@alt'
textOptional = True
class OurHomePlanet(_BasicScraper):

View file

@ -117,7 +117,9 @@ class PHDComics(_ParserScraper):
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '1669', # video
# video
self.stripUrl % '1880',
self.stripUrl % '1669',
)
@ -125,15 +127,9 @@ class Picklewhistle(_ComicControlScraper):
url = 'http://www.picklewhistle.com/'
class PicPakDog(_BasicScraper):
class PicPakDog(_WordPressScraper):
url = 'http://www.picpak.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'dogs-cant-spell'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
after="nav-prev"))
help = 'Index format: stripname'
firstStripUrl = url + 'comic/dogs-cant-spell/'
# Keep, because naming is different to PHDComics...
@ -199,14 +195,12 @@ class PoorlyDrawnLines(_BasicScraper):
help = 'Index Format: name'
class Precocious(_BasicScraper):
class Precocious(_ParserScraper):
url = 'http://www.precociouscomic.com/'
starter = indirectStarter
stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
firstStripUrl = stripUrl % '2009/03/09'
imageSearch = '//img[contains(@src, "/comics/")]'
prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
help = 'Index format: yyyy/mm/dd'

View file

@ -145,7 +145,7 @@ class ThreePanelSoul(_ComicControlScraper):
class ToonHole(_WordPressScraper):
url = 'http://www.toonhole.com/'
url = 'http://toonhole.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
prevSearch = '//a[@rel="prev"]'

View file

@ -7,20 +7,15 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
class WapsiSquare(_BasicScraper):
class WapsiSquare(_WordPressScraper):
url = 'http://wapsisquare.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '09092001'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: stripname'
firstStripUrl = url + 'comic/09092001/'
class WastedTalent(_BasicScraper):
@ -33,15 +28,6 @@ class WastedTalent(_BasicScraper):
help = 'Index format: stripname'
class WayfarersMoon(_BasicScraper):
url = 'http://www.wayfarersmoon.com/'
stripUrl = url + 'index.php?page=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/'
rurl = escape(url)
@ -91,23 +77,10 @@ class Weregeek(_BasicScraper):
help = 'Index format: yyyy/mm/dd'
class WhiteNinja(_BasicScraper):
baseUrl = 'http://www.whiteninjacomics.com/'
url = baseUrl + 'comics.shtml'
stripUrl = baseUrl + 'comics/%s.shtml'
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhiteNoise(_BasicScraper):
baseUrl = 'http://www.wncomic.com/'
url = baseUrl + 'archive.php'
stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'
class WhiteNoise(_WordPressScraper):
url = 'http://whitenoisecomic.com/'
firstStripUrl = url + 'comic/book-one/'
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
class Whomp(_ComicControlScraper):
@ -129,13 +102,13 @@ class WhyTheLongFace(_BasicScraper):
help = 'Index format: yyyymm'
class Wigu(_BasicScraper):
url = 'http://wigucomics.com/'
stripUrl = url + 'oc/index.php?comic=%s'
class Wigu(_ParserScraper):
stripUrl = 'http://www.wigucomics.com/adventures/index.php?comic=%s'
url = stripUrl % '-1'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
after="go back"))
imageSearch = '//div[@id="comic"]//img[contains(@src, "/comics/")]'
prevSearch = '//a[@alt="go back"]'
endOfLife = True
help = 'Index format: n'
@ -164,7 +137,6 @@ class Wondermark(_BasicScraper):
class WorldOfMrToast(_BasicScraper):
baseUrl = 'http://www.theimaginaryworld.com/'
url = baseUrl + 'mrTcomicA.html'
stripUrl = baseUrl + '%s.html'
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
# list the archive links since there is no prev/next navigation
prevurls = (
@ -185,9 +157,9 @@ class WorldOfMrToast(_BasicScraper):
)
firstStripUrl = prevurls[-1]
multipleImagesPerStrip = True
help = 'Index format: none'
endOfLife = True
def getPrevUrl(self, url, data, baseUrl):
def getPrevUrl(self, url, data):
idx = self.prevurls.index(url)
try:
return self.prevurls[idx + 1]

View file

@ -20,23 +20,16 @@ class ZapComic(_ParserScraper):
prevSearch = 'a.previous-comic-link'
class Zapiro(_BasicScraper):
url = 'http://www.mg.co.za/zapiro/'
class Zapiro(_ParserScraper):
url = 'http://mg.co.za/zapiro/'
starter = bounceStarter
stripUrl = 'http://mg.co.za/cartoon/%s'
firstStripUrl = stripUrl % 'zapiro_681'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
prevSearch = compile(tagre("li", "class", r'nav_older') +
tagre("a", "href",
r'(http://mg\.co\.za/cartoon/[^"]+)'))
nextSearch = compile(tagre("li", "class", r'nav_older') +
tagre("a", "href",
r'(http://mg\.co\.za/cartoon/[^"]+)'))
help = 'Index format: yyyy-mm-dd-stripname'
imageSearch = '//div[@id="cartoon_full_size"]//img'
prevSearch = '//li[@class="nav_older"]/a'
nextSearch = '//li[@class="nav_newer"]/a'
def namer(self, image_url, page_url):
name = image_url.split('/')[-3]
return name
parts = page_url.rsplit('/', 1)
return parts[1]
class ZenPencils(_WordPressScraper):