Fix some more modules.
This commit is contained in:
parent
ddd3fb418c
commit
b8484cde50
5 changed files with 22 additions and 53 deletions
|
@ -237,12 +237,6 @@ class Annyseed(_ParserScraper):
|
||||||
return self.FIX_RE.sub('', image_url)
|
return self.FIX_RE.sub('', image_url)
|
||||||
|
|
||||||
|
|
||||||
class AoiHouse(_ParserScraper):
|
|
||||||
url = 'http://www.aoihouse.net/'
|
|
||||||
imageSearch = '//div[@id="comic"]/a[2]/img'
|
|
||||||
prevSearch = '//a[@id="cndprev"]'
|
|
||||||
|
|
||||||
|
|
||||||
class AppleGeeks(_BasicScraper):
|
class AppleGeeks(_BasicScraper):
|
||||||
url = 'http://www.applegeeks.com/'
|
url = 'http://www.applegeeks.com/'
|
||||||
stripUrl = url + 'comics/viewcomic.php?issue=%s'
|
stripUrl = url + 'comics/viewcomic.php?issue=%s'
|
||||||
|
|
|
@ -122,14 +122,13 @@ class BillyTheDunce(_ParserScraper):
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class BlankIt(_BasicScraper):
|
class BlankIt(_ParserScraper):
|
||||||
url = 'http://blankitcomics.com/'
|
url = 'http://blankitcomics.com/'
|
||||||
stripUrl = url + '%s/'
|
firstStripUrl = url + 'comic/well-what-would-you-do'
|
||||||
firstStripUrl = stripUrl % '0001'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
imageSearch = compile(tagre("img", "src",
|
prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
|
||||||
r'(http://blankitcomics\.com/bicomics/[^"]+)'))
|
latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
|
starter = indirectStarter
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class BloodBound(_WordPressScraper):
|
class BloodBound(_WordPressScraper):
|
||||||
|
|
|
@ -70,20 +70,10 @@ class Catalyst(_BasicScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class CatAndGirl(_BasicScraper):
|
class CatAndGirl(_ParserScraper):
|
||||||
url = 'http://catandgirl.com/'
|
url = 'http://catandgirl.com/'
|
||||||
rurl = escape(url)
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
stripUrl = url + '?p=%s'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
firstStripUrl = stripUrl % '1602'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous</a>")
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
"""Skip pages without images."""
|
|
||||||
return url in (
|
|
||||||
self.stripUrl % '4299',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Catena(_WordPressScraper):
|
class Catena(_WordPressScraper):
|
||||||
|
@ -207,27 +197,11 @@ class Concession(_BasicScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class CoolCatStudio(_BasicScraper):
|
|
||||||
url = 'http://www.coolcatstudio.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'strips-cat/%s'
|
|
||||||
firstStripUrl = stripUrl % 'first'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sstrips-cat/[^"]+)' % rurl, before="prev"))
|
|
||||||
help = 'Index format: ccsyyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class CorydonCafe(_ParserScraper):
|
class CorydonCafe(_ParserScraper):
|
||||||
url = 'http://corydoncafe.com/'
|
url = 'http://corydoncafe.com/'
|
||||||
starter = indirectStarter
|
|
||||||
stripUrl = url + '%s.php'
|
|
||||||
imageSearch = "//center[2]//img"
|
imageSearch = "//center[2]//img"
|
||||||
prevSearch = '//a[@title="prev"]'
|
prevSearch = '//a[@title="prev"]'
|
||||||
latestSearch = '//ul//a'
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyy/stripname'
|
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
|
||||||
return page_url.split('/')[-1].split('.')[0]
|
|
||||||
|
|
||||||
|
|
||||||
class CourtingDisaster(_WordPressScraper):
|
class CourtingDisaster(_WordPressScraper):
|
||||||
|
@ -279,6 +253,10 @@ class Curtailed(_WordPressScraper):
|
||||||
url = 'http://curtailedcomic.com/'
|
url = 'http://curtailedcomic.com/'
|
||||||
firstStripUrl = url + 'comic/001-sneeze/'
|
firstStripUrl = url + 'comic/001-sneeze/'
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
"""Skip pages without images."""
|
||||||
|
return 'comic/sitrep-1' in url
|
||||||
|
|
||||||
|
|
||||||
class Curvy(_ParserScraper):
|
class Curvy(_ParserScraper):
|
||||||
url = 'http://www.c.urvy.org/'
|
url = 'http://www.c.urvy.org/'
|
||||||
|
|
|
@ -10,7 +10,7 @@ from re import compile, escape
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter, xpath_class
|
from ..helpers import indirectStarter, bounceStarter, xpath_class
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .common import _WPNaviIn
|
from .common import _ComicControlScraper, _WPNaviIn
|
||||||
|
|
||||||
|
|
||||||
class DamnLol(_ParserScraper):
|
class DamnLol(_ParserScraper):
|
||||||
|
@ -38,13 +38,9 @@ class Damonk(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class DangerouslyChloe(_BasicScraper):
|
class DangerouslyChloe(_ComicControlScraper):
|
||||||
url = 'http://www.dangerouslychloe.com/'
|
url = 'http://www.dangerouslychloe.com/'
|
||||||
stripUrl = url + 'strips-dc/%s'
|
firstStripUrl = url + 'strips-dc/Chapter_1_-_That_damned_girl'
|
||||||
firstStripUrl = stripUrl % 'chapter_1_-_that_damned_girl'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-dc/[^"]+)', before="cn[id]prevt"))
|
|
||||||
help = 'Index format: name'
|
|
||||||
|
|
||||||
|
|
||||||
class DarthsAndDroids(_BasicScraper):
|
class DarthsAndDroids(_BasicScraper):
|
||||||
|
@ -222,8 +218,9 @@ class DresdenCodak(_ParserScraper):
|
||||||
url = 'http://dresdencodak.com/'
|
url = 'http://dresdencodak.com/'
|
||||||
startUrl = url + 'cat/comic/'
|
startUrl = url + 'cat/comic/'
|
||||||
firstStripUrl = url + '2007/02/08/pom/'
|
firstStripUrl = url + '2007/02/08/pom/'
|
||||||
imageSearch = '//section[%s]//img' % xpath_class('entry-content')
|
imageSearch = '//section[%s]//img[%s]' % (
|
||||||
prevSearch = '//a[@rel="prev"]'
|
xpath_class('entry-content'), xpath_class('aligncenter'))
|
||||||
|
prevSearch = '//a[img[contains(@src, "prev")]]'
|
||||||
latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
|
latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
@ -271,7 +268,7 @@ class Drowtales(_BasicScraper):
|
||||||
stripUrl = url + '?sid=%s'
|
stripUrl = url + '?sid=%s'
|
||||||
firstStripUrl = stripUrl % '4192'
|
firstStripUrl = stripUrl % '4192'
|
||||||
imageSearch = (
|
imageSearch = (
|
||||||
compile(tagre("img", "src", r'(%smainarchive/[^"]+)' % rurl)),
|
compile(tagre("img", "src", r'((%s)?mainarchive/[^"]+)' % rurl)),
|
||||||
compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'),
|
compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'),
|
||||||
)
|
)
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
||||||
|
|
|
@ -194,6 +194,7 @@ class Removed(Scraper):
|
||||||
cls('ComicGenesis/IBlameDanny'),
|
cls('ComicGenesis/IBlameDanny'),
|
||||||
cls('ComicGenesis/SueosdelSur'),
|
cls('ComicGenesis/SueosdelSur'),
|
||||||
cls('Commissioned'),
|
cls('Commissioned'),
|
||||||
|
cls('CoolCatStudio'),
|
||||||
cls('CowboyJedi', 'brk'),
|
cls('CowboyJedi', 'brk'),
|
||||||
cls('Creators/BCinSpanish'),
|
cls('Creators/BCinSpanish'),
|
||||||
cls('Creators/GirlsandSportsinSpanish'),
|
cls('Creators/GirlsandSportsinSpanish'),
|
||||||
|
|
Loading…
Reference in a new issue