Fix some more modules.

This commit is contained in:
Tobias Gruetzmacher 2017-05-15 00:27:28 +02:00
parent ddd3fb418c
commit b8484cde50
5 changed files with 22 additions and 53 deletions

View file

@ -237,12 +237,6 @@ class Annyseed(_ParserScraper):
return self.FIX_RE.sub('', image_url) return self.FIX_RE.sub('', image_url)
class AoiHouse(_ParserScraper):
url = 'http://www.aoihouse.net/'
imageSearch = '//div[@id="comic"]/a[2]/img'
prevSearch = '//a[@id="cndprev"]'
class AppleGeeks(_BasicScraper): class AppleGeeks(_BasicScraper):
url = 'http://www.applegeeks.com/' url = 'http://www.applegeeks.com/'
stripUrl = url + 'comics/viewcomic.php?issue=%s' stripUrl = url + 'comics/viewcomic.php?issue=%s'

View file

@ -122,14 +122,13 @@ class BillyTheDunce(_ParserScraper):
starter = indirectStarter starter = indirectStarter
class BlankIt(_BasicScraper): class BlankIt(_ParserScraper):
url = 'http://blankitcomics.com/' url = 'http://blankitcomics.com/'
stripUrl = url + '%s/' firstStripUrl = url + 'comic/well-what-would-you-do'
firstStripUrl = stripUrl % '0001' imageSearch = '//div[@id="comic"]//img'
imageSearch = compile(tagre("img", "src", prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
r'(http://blankitcomics\.com/bicomics/[^"]+)')) latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) starter = indirectStarter
help = 'Index format: stripname'
class BloodBound(_WordPressScraper): class BloodBound(_WordPressScraper):

View file

@ -70,20 +70,10 @@ class Catalyst(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class CatAndGirl(_BasicScraper): class CatAndGirl(_ParserScraper):
url = 'http://catandgirl.com/' url = 'http://catandgirl.com/'
rurl = escape(url) imageSearch = '//div[@id="comic"]//img'
stripUrl = url + '?p=%s' prevSearch = '//a[@rel="prev"]'
firstStripUrl = stripUrl % '1602'
imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous</a>")
help = 'Index format: n (unpadded)'
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '4299',
)
class Catena(_WordPressScraper): class Catena(_WordPressScraper):
@ -207,27 +197,11 @@ class Concession(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class CoolCatStudio(_BasicScraper):
url = 'http://www.coolcatstudio.com/'
rurl = escape(url)
stripUrl = url + 'strips-cat/%s'
firstStripUrl = stripUrl % 'first'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sstrips-cat/[^"]+)' % rurl, before="prev"))
help = 'Index format: ccsyyyymmdd'
class CorydonCafe(_ParserScraper): class CorydonCafe(_ParserScraper):
url = 'http://corydoncafe.com/' url = 'http://corydoncafe.com/'
starter = indirectStarter
stripUrl = url + '%s.php'
imageSearch = "//center[2]//img" imageSearch = "//center[2]//img"
prevSearch = '//a[@title="prev"]' prevSearch = '//a[@title="prev"]'
latestSearch = '//ul//a' multipleImagesPerStrip = True
help = 'Index format: yyyy/stripname'
def namer(self, image_url, page_url):
return page_url.split('/')[-1].split('.')[0]
class CourtingDisaster(_WordPressScraper): class CourtingDisaster(_WordPressScraper):
@ -279,6 +253,10 @@ class Curtailed(_WordPressScraper):
url = 'http://curtailedcomic.com/' url = 'http://curtailedcomic.com/'
firstStripUrl = url + 'comic/001-sneeze/' firstStripUrl = url + 'comic/001-sneeze/'
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return 'comic/sitrep-1' in url
class Curvy(_ParserScraper): class Curvy(_ParserScraper):
url = 'http://www.c.urvy.org/' url = 'http://www.c.urvy.org/'

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter, xpath_class from ..helpers import indirectStarter, bounceStarter, xpath_class
from ..util import tagre from ..util import tagre
from .common import _WPNaviIn from .common import _ComicControlScraper, _WPNaviIn
class DamnLol(_ParserScraper): class DamnLol(_ParserScraper):
@ -38,13 +38,9 @@ class Damonk(_BasicScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class DangerouslyChloe(_BasicScraper): class DangerouslyChloe(_ComicControlScraper):
url = 'http://www.dangerouslychloe.com/' url = 'http://www.dangerouslychloe.com/'
stripUrl = url + 'strips-dc/%s' firstStripUrl = url + 'strips-dc/Chapter_1_-_That_damned_girl'
firstStripUrl = stripUrl % 'chapter_1_-_that_damned_girl'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-dc/[^"]+)', before="cn[id]prevt"))
help = 'Index format: name'
class DarthsAndDroids(_BasicScraper): class DarthsAndDroids(_BasicScraper):
@ -222,8 +218,9 @@ class DresdenCodak(_ParserScraper):
url = 'http://dresdencodak.com/' url = 'http://dresdencodak.com/'
startUrl = url + 'cat/comic/' startUrl = url + 'cat/comic/'
firstStripUrl = url + '2007/02/08/pom/' firstStripUrl = url + '2007/02/08/pom/'
imageSearch = '//section[%s]//img' % xpath_class('entry-content') imageSearch = '//section[%s]//img[%s]' % (
prevSearch = '//a[@rel="prev"]' xpath_class('entry-content'), xpath_class('aligncenter'))
prevSearch = '//a[img[contains(@src, "prev")]]'
latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link') latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
starter = indirectStarter starter = indirectStarter
@ -271,7 +268,7 @@ class Drowtales(_BasicScraper):
stripUrl = url + '?sid=%s' stripUrl = url + '?sid=%s'
firstStripUrl = stripUrl % '4192' firstStripUrl = stripUrl % '4192'
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(%smainarchive/[^"]+)' % rurl)), compile(tagre("img", "src", r'((%s)?mainarchive/[^"]+)' % rurl)),
compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'), compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'),
) )
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top")) prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))

View file

@ -194,6 +194,7 @@ class Removed(Scraper):
cls('ComicGenesis/IBlameDanny'), cls('ComicGenesis/IBlameDanny'),
cls('ComicGenesis/SueosdelSur'), cls('ComicGenesis/SueosdelSur'),
cls('Commissioned'), cls('Commissioned'),
cls('CoolCatStudio'),
cls('CowboyJedi', 'brk'), cls('CowboyJedi', 'brk'),
cls('Creators/BCinSpanish'), cls('Creators/BCinSpanish'),
cls('Creators/GirlsandSportsinSpanish'), cls('Creators/GirlsandSportsinSpanish'),