Fix some more modules.
This commit is contained in:
parent
ddd3fb418c
commit
b8484cde50
5 changed files with 22 additions and 53 deletions
|
@ -237,12 +237,6 @@ class Annyseed(_ParserScraper):
|
|||
return self.FIX_RE.sub('', image_url)
|
||||
|
||||
|
||||
class AoiHouse(_ParserScraper):
|
||||
url = 'http://www.aoihouse.net/'
|
||||
imageSearch = '//div[@id="comic"]/a[2]/img'
|
||||
prevSearch = '//a[@id="cndprev"]'
|
||||
|
||||
|
||||
class AppleGeeks(_BasicScraper):
|
||||
url = 'http://www.applegeeks.com/'
|
||||
stripUrl = url + 'comics/viewcomic.php?issue=%s'
|
||||
|
|
|
@ -122,14 +122,13 @@ class BillyTheDunce(_ParserScraper):
|
|||
starter = indirectStarter
|
||||
|
||||
|
||||
class BlankIt(_BasicScraper):
|
||||
class BlankIt(_ParserScraper):
|
||||
url = 'http://blankitcomics.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '0001'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
r'(http://blankitcomics\.com/bicomics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
|
||||
help = 'Index format: stripname'
|
||||
firstStripUrl = url + 'comic/well-what-would-you-do'
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
|
||||
latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
|
||||
starter = indirectStarter
|
||||
|
||||
|
||||
class BloodBound(_WordPressScraper):
|
||||
|
|
|
@ -70,20 +70,10 @@ class Catalyst(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class CatAndGirl(_BasicScraper):
|
||||
class CatAndGirl(_ParserScraper):
|
||||
url = 'http://catandgirl.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '1602'
|
||||
imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous</a>")
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return url in (
|
||||
self.stripUrl % '4299',
|
||||
)
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
|
||||
|
||||
class Catena(_WordPressScraper):
|
||||
|
@ -207,27 +197,11 @@ class Concession(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class CoolCatStudio(_BasicScraper):
|
||||
url = 'http://www.coolcatstudio.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'strips-cat/%s'
|
||||
firstStripUrl = stripUrl % 'first'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sstrips-cat/[^"]+)' % rurl, before="prev"))
|
||||
help = 'Index format: ccsyyyymmdd'
|
||||
|
||||
|
||||
class CorydonCafe(_ParserScraper):
|
||||
url = 'http://corydoncafe.com/'
|
||||
starter = indirectStarter
|
||||
stripUrl = url + '%s.php'
|
||||
imageSearch = "//center[2]//img"
|
||||
prevSearch = '//a[@title="prev"]'
|
||||
latestSearch = '//ul//a'
|
||||
help = 'Index format: yyyy/stripname'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
return page_url.split('/')[-1].split('.')[0]
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
|
||||
class CourtingDisaster(_WordPressScraper):
|
||||
|
@ -279,6 +253,10 @@ class Curtailed(_WordPressScraper):
|
|||
url = 'http://curtailedcomic.com/'
|
||||
firstStripUrl = url + 'comic/001-sneeze/'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return 'comic/sitrep-1' in url
|
||||
|
||||
|
||||
class Curvy(_ParserScraper):
|
||||
url = 'http://www.c.urvy.org/'
|
||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter, xpath_class
|
||||
from ..util import tagre
|
||||
from .common import _WPNaviIn
|
||||
from .common import _ComicControlScraper, _WPNaviIn
|
||||
|
||||
|
||||
class DamnLol(_ParserScraper):
|
||||
|
@ -38,13 +38,9 @@ class Damonk(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class DangerouslyChloe(_BasicScraper):
|
||||
class DangerouslyChloe(_ComicControlScraper):
|
||||
url = 'http://www.dangerouslychloe.com/'
|
||||
stripUrl = url + 'strips-dc/%s'
|
||||
firstStripUrl = stripUrl % 'chapter_1_-_that_damned_girl'
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-dc/[^"]+)', before="cn[id]prevt"))
|
||||
help = 'Index format: name'
|
||||
firstStripUrl = url + 'strips-dc/Chapter_1_-_That_damned_girl'
|
||||
|
||||
|
||||
class DarthsAndDroids(_BasicScraper):
|
||||
|
@ -222,8 +218,9 @@ class DresdenCodak(_ParserScraper):
|
|||
url = 'http://dresdencodak.com/'
|
||||
startUrl = url + 'cat/comic/'
|
||||
firstStripUrl = url + '2007/02/08/pom/'
|
||||
imageSearch = '//section[%s]//img' % xpath_class('entry-content')
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
imageSearch = '//section[%s]//img[%s]' % (
|
||||
xpath_class('entry-content'), xpath_class('aligncenter'))
|
||||
prevSearch = '//a[img[contains(@src, "prev")]]'
|
||||
latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
|
||||
starter = indirectStarter
|
||||
|
||||
|
@ -271,7 +268,7 @@ class Drowtales(_BasicScraper):
|
|||
stripUrl = url + '?sid=%s'
|
||||
firstStripUrl = stripUrl % '4192'
|
||||
imageSearch = (
|
||||
compile(tagre("img", "src", r'(%smainarchive/[^"]+)' % rurl)),
|
||||
compile(tagre("img", "src", r'((%s)?mainarchive/[^"]+)' % rurl)),
|
||||
compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'),
|
||||
)
|
||||
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
||||
|
|
|
@ -194,6 +194,7 @@ class Removed(Scraper):
|
|||
cls('ComicGenesis/IBlameDanny'),
|
||||
cls('ComicGenesis/SueosdelSur'),
|
||||
cls('Commissioned'),
|
||||
cls('CoolCatStudio'),
|
||||
cls('CowboyJedi', 'brk'),
|
||||
cls('Creators/BCinSpanish'),
|
||||
cls('Creators/GirlsandSportsinSpanish'),
|
||||
|
|
Loading…
Reference in a new issue