Mark KeenSpot/GeneCatlow completed :(

This commit is contained in:
Techwolf 2019-06-12 19:26:17 -07:00 committed by Tobias Gruetzmacher
parent f79d3c9309
commit b79f22fb65
2 changed files with 23 additions and 22 deletions

View file

@ -62,13 +62,13 @@ class KeenSpot(_ParserScraper):
cls('ChoppingBlock', 'choppingblock'),
cls('ClichFlamb', 'clicheflambe'),
cls('CountYourSheep', 'countyoursheep'),
cls('CrowScare', 'crowscare', last="20111031"),
cls('Dreamless', 'dreamless', last="20100726"),
cls('CrowScare', 'crowscare', last='20111031'),
cls('Dreamless', 'dreamless', last='20100726'),
cls('EverythingJake', 'everythingjake'),
cls('Exposure', 'exposure'),
cls('FallOutToyWorks', 'fallouttoyworks'),
cls('FriarAndBrimstone', 'friarandbrimstone'),
cls('GeneCatlow', 'genecatlow'),
cls('GeneCatlow', 'genecatlow', last='20170412'),
cls('GodMode', 'godmode'),
cls('GreenWake', 'greenwake'),
cls('HeadTrip', 'headtrip'),
@ -85,18 +85,18 @@ class KeenSpot(_ParserScraper):
cls('MarryMe', 'marryme'),
cls('MedusasDaughter', 'medusasdaughter'),
cls('MonsterMassacre', 'monstermassacre'),
cls('MysticRevolution', 'mysticrevolution', path="?cid=%s"),
cls('MysticRevolution', 'mysticrevolution', path='?cid=%s'),
cls('NoPinkPonies', 'nopinkponies'),
cls('NoRoomForMagic', 'noroomformagic'),
cls('OutThere', 'outthere'),
cls('Porcelain', 'porcelain'),
cls('PunchAnPie', 'punchanpie', path="daily/%s.html"),
cls('PunchAnPie', 'punchanpie', path='daily/%s.html'),
cls('QUILTBAG', 'quiltbag'),
cls('RedSpike', 'redspike'),
cls('RumbleFall', 'rumblefall'),
cls('SamuraisBlood', 'samuraisblood'),
cls('Sharky', 'sharky'),
cls('ShockwaveDarkside', 'shockwave', path="2d/%s.html"),
cls('ShockwaveDarkside', 'shockwave', path='2d/%s.html'),
cls('SomethingHappens', 'somethinghappens'),
cls('SoreThumbs', 'sorethumbs'),
cls('Striptease', 'striptease'),

View file

@ -16,25 +16,26 @@ from dosagelib.util import check_robotstxt
class KeenSpotUpdater(ComicListUpdater):
dup_templates = ('Creators/%s', "GoComics/%s", "ComicGenesis/%s")
dup_templates = ('Creators/%s', 'GoComics/%s', 'ComicGenesis/%s')
# names of comics to exclude
excluded_comics = (
# non-standard navigation
"BrawlInTheFamily",
"Flipside",
"LastBlood",
"TheGodChild",
"Twokinds",
"Yirmumah",
'BrawlInTheFamily',
'Flipside',
'LastBlood',
'TheGodChild',
'Twokinds',
'Yirmumah',
)
extra = {
'CrowScare': 'last="20111031"',
'Dreamless': 'last="20100726"',
'MysticRevolution': 'path="?cid=%s"',
'PunchAnPie': 'path="daily/%s.html"',
'ShockwaveDarkside': 'path="2d/%s.html"',
'CrowScare': "last='20111031'",
'Dreamless': "last='20100726'",
'GeneCatlow': "last='20170412'",
'MysticRevolution': "path='?cid=%s'",
'PunchAnPie': "path='daily/%s.html'",
'ShockwaveDarkside': "path='2d/%s.html'",
}
def collect_results(self):
@ -43,14 +44,14 @@ class KeenSpotUpdater(ComicListUpdater):
for comiclink in data.xpath('//td[@id]/a'):
comicurl = comiclink.attrib['href']
name = comiclink.xpath("string()")
name = comiclink.xpath('string()')
try:
if "/d/" not in comicurl:
check_robotstxt(comicurl + "d/", self.session)
if '/d/' not in comicurl:
check_robotstxt(comicurl + 'd/', self.session)
else:
check_robotstxt(comicurl, self.session)
except IOError as e:
print("[%s] INFO: robots.txt denied: %s" % (name, e))
print('[%s] INFO: robots.txt denied: %s' % (name, e))
continue
self.add_comic(name, comicurl)