Mark KeenSpot/GeneCatlow completed :(
This commit is contained in:
parent
f79d3c9309
commit
b79f22fb65
2 changed files with 23 additions and 22 deletions
|
@ -62,13 +62,13 @@ class KeenSpot(_ParserScraper):
|
||||||
cls('ChoppingBlock', 'choppingblock'),
|
cls('ChoppingBlock', 'choppingblock'),
|
||||||
cls('ClichFlamb', 'clicheflambe'),
|
cls('ClichFlamb', 'clicheflambe'),
|
||||||
cls('CountYourSheep', 'countyoursheep'),
|
cls('CountYourSheep', 'countyoursheep'),
|
||||||
cls('CrowScare', 'crowscare', last="20111031"),
|
cls('CrowScare', 'crowscare', last='20111031'),
|
||||||
cls('Dreamless', 'dreamless', last="20100726"),
|
cls('Dreamless', 'dreamless', last='20100726'),
|
||||||
cls('EverythingJake', 'everythingjake'),
|
cls('EverythingJake', 'everythingjake'),
|
||||||
cls('Exposure', 'exposure'),
|
cls('Exposure', 'exposure'),
|
||||||
cls('FallOutToyWorks', 'fallouttoyworks'),
|
cls('FallOutToyWorks', 'fallouttoyworks'),
|
||||||
cls('FriarAndBrimstone', 'friarandbrimstone'),
|
cls('FriarAndBrimstone', 'friarandbrimstone'),
|
||||||
cls('GeneCatlow', 'genecatlow'),
|
cls('GeneCatlow', 'genecatlow', last='20170412'),
|
||||||
cls('GodMode', 'godmode'),
|
cls('GodMode', 'godmode'),
|
||||||
cls('GreenWake', 'greenwake'),
|
cls('GreenWake', 'greenwake'),
|
||||||
cls('HeadTrip', 'headtrip'),
|
cls('HeadTrip', 'headtrip'),
|
||||||
|
@ -85,18 +85,18 @@ class KeenSpot(_ParserScraper):
|
||||||
cls('MarryMe', 'marryme'),
|
cls('MarryMe', 'marryme'),
|
||||||
cls('MedusasDaughter', 'medusasdaughter'),
|
cls('MedusasDaughter', 'medusasdaughter'),
|
||||||
cls('MonsterMassacre', 'monstermassacre'),
|
cls('MonsterMassacre', 'monstermassacre'),
|
||||||
cls('MysticRevolution', 'mysticrevolution', path="?cid=%s"),
|
cls('MysticRevolution', 'mysticrevolution', path='?cid=%s'),
|
||||||
cls('NoPinkPonies', 'nopinkponies'),
|
cls('NoPinkPonies', 'nopinkponies'),
|
||||||
cls('NoRoomForMagic', 'noroomformagic'),
|
cls('NoRoomForMagic', 'noroomformagic'),
|
||||||
cls('OutThere', 'outthere'),
|
cls('OutThere', 'outthere'),
|
||||||
cls('Porcelain', 'porcelain'),
|
cls('Porcelain', 'porcelain'),
|
||||||
cls('PunchAnPie', 'punchanpie', path="daily/%s.html"),
|
cls('PunchAnPie', 'punchanpie', path='daily/%s.html'),
|
||||||
cls('QUILTBAG', 'quiltbag'),
|
cls('QUILTBAG', 'quiltbag'),
|
||||||
cls('RedSpike', 'redspike'),
|
cls('RedSpike', 'redspike'),
|
||||||
cls('RumbleFall', 'rumblefall'),
|
cls('RumbleFall', 'rumblefall'),
|
||||||
cls('SamuraisBlood', 'samuraisblood'),
|
cls('SamuraisBlood', 'samuraisblood'),
|
||||||
cls('Sharky', 'sharky'),
|
cls('Sharky', 'sharky'),
|
||||||
cls('ShockwaveDarkside', 'shockwave', path="2d/%s.html"),
|
cls('ShockwaveDarkside', 'shockwave', path='2d/%s.html'),
|
||||||
cls('SomethingHappens', 'somethinghappens'),
|
cls('SomethingHappens', 'somethinghappens'),
|
||||||
cls('SoreThumbs', 'sorethumbs'),
|
cls('SoreThumbs', 'sorethumbs'),
|
||||||
cls('Striptease', 'striptease'),
|
cls('Striptease', 'striptease'),
|
||||||
|
|
|
@ -16,25 +16,26 @@ from dosagelib.util import check_robotstxt
|
||||||
|
|
||||||
|
|
||||||
class KeenSpotUpdater(ComicListUpdater):
|
class KeenSpotUpdater(ComicListUpdater):
|
||||||
dup_templates = ('Creators/%s', "GoComics/%s", "ComicGenesis/%s")
|
dup_templates = ('Creators/%s', 'GoComics/%s', 'ComicGenesis/%s')
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
excluded_comics = (
|
excluded_comics = (
|
||||||
# non-standard navigation
|
# non-standard navigation
|
||||||
"BrawlInTheFamily",
|
'BrawlInTheFamily',
|
||||||
"Flipside",
|
'Flipside',
|
||||||
"LastBlood",
|
'LastBlood',
|
||||||
"TheGodChild",
|
'TheGodChild',
|
||||||
"Twokinds",
|
'Twokinds',
|
||||||
"Yirmumah",
|
'Yirmumah',
|
||||||
)
|
)
|
||||||
|
|
||||||
extra = {
|
extra = {
|
||||||
'CrowScare': 'last="20111031"',
|
'CrowScare': "last='20111031'",
|
||||||
'Dreamless': 'last="20100726"',
|
'Dreamless': "last='20100726'",
|
||||||
'MysticRevolution': 'path="?cid=%s"',
|
'GeneCatlow': "last='20170412'",
|
||||||
'PunchAnPie': 'path="daily/%s.html"',
|
'MysticRevolution': "path='?cid=%s'",
|
||||||
'ShockwaveDarkside': 'path="2d/%s.html"',
|
'PunchAnPie': "path='daily/%s.html'",
|
||||||
|
'ShockwaveDarkside': "path='2d/%s.html'",
|
||||||
}
|
}
|
||||||
|
|
||||||
def collect_results(self):
|
def collect_results(self):
|
||||||
|
@ -43,14 +44,14 @@ class KeenSpotUpdater(ComicListUpdater):
|
||||||
|
|
||||||
for comiclink in data.xpath('//td[@id]/a'):
|
for comiclink in data.xpath('//td[@id]/a'):
|
||||||
comicurl = comiclink.attrib['href']
|
comicurl = comiclink.attrib['href']
|
||||||
name = comiclink.xpath("string()")
|
name = comiclink.xpath('string()')
|
||||||
try:
|
try:
|
||||||
if "/d/" not in comicurl:
|
if '/d/' not in comicurl:
|
||||||
check_robotstxt(comicurl + "d/", self.session)
|
check_robotstxt(comicurl + 'd/', self.session)
|
||||||
else:
|
else:
|
||||||
check_robotstxt(comicurl, self.session)
|
check_robotstxt(comicurl, self.session)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print("[%s] INFO: robots.txt denied: %s" % (name, e))
|
print('[%s] INFO: robots.txt denied: %s' % (name, e))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.add_comic(name, comicurl)
|
self.add_comic(name, comicurl)
|
||||||
|
|
Loading…
Reference in a new issue