Mark KeenSpot/GeneCatlow completed :(

This commit is contained in:
Techwolf 2019-06-12 19:26:17 -07:00 committed by Tobias Gruetzmacher
parent f79d3c9309
commit b79f22fb65
2 changed files with 23 additions and 22 deletions

View file

@ -62,13 +62,13 @@ class KeenSpot(_ParserScraper):
cls('ChoppingBlock', 'choppingblock'), cls('ChoppingBlock', 'choppingblock'),
cls('ClichFlamb', 'clicheflambe'), cls('ClichFlamb', 'clicheflambe'),
cls('CountYourSheep', 'countyoursheep'), cls('CountYourSheep', 'countyoursheep'),
cls('CrowScare', 'crowscare', last="20111031"), cls('CrowScare', 'crowscare', last='20111031'),
cls('Dreamless', 'dreamless', last="20100726"), cls('Dreamless', 'dreamless', last='20100726'),
cls('EverythingJake', 'everythingjake'), cls('EverythingJake', 'everythingjake'),
cls('Exposure', 'exposure'), cls('Exposure', 'exposure'),
cls('FallOutToyWorks', 'fallouttoyworks'), cls('FallOutToyWorks', 'fallouttoyworks'),
cls('FriarAndBrimstone', 'friarandbrimstone'), cls('FriarAndBrimstone', 'friarandbrimstone'),
cls('GeneCatlow', 'genecatlow'), cls('GeneCatlow', 'genecatlow', last='20170412'),
cls('GodMode', 'godmode'), cls('GodMode', 'godmode'),
cls('GreenWake', 'greenwake'), cls('GreenWake', 'greenwake'),
cls('HeadTrip', 'headtrip'), cls('HeadTrip', 'headtrip'),
@ -85,18 +85,18 @@ class KeenSpot(_ParserScraper):
cls('MarryMe', 'marryme'), cls('MarryMe', 'marryme'),
cls('MedusasDaughter', 'medusasdaughter'), cls('MedusasDaughter', 'medusasdaughter'),
cls('MonsterMassacre', 'monstermassacre'), cls('MonsterMassacre', 'monstermassacre'),
cls('MysticRevolution', 'mysticrevolution', path="?cid=%s"), cls('MysticRevolution', 'mysticrevolution', path='?cid=%s'),
cls('NoPinkPonies', 'nopinkponies'), cls('NoPinkPonies', 'nopinkponies'),
cls('NoRoomForMagic', 'noroomformagic'), cls('NoRoomForMagic', 'noroomformagic'),
cls('OutThere', 'outthere'), cls('OutThere', 'outthere'),
cls('Porcelain', 'porcelain'), cls('Porcelain', 'porcelain'),
cls('PunchAnPie', 'punchanpie', path="daily/%s.html"), cls('PunchAnPie', 'punchanpie', path='daily/%s.html'),
cls('QUILTBAG', 'quiltbag'), cls('QUILTBAG', 'quiltbag'),
cls('RedSpike', 'redspike'), cls('RedSpike', 'redspike'),
cls('RumbleFall', 'rumblefall'), cls('RumbleFall', 'rumblefall'),
cls('SamuraisBlood', 'samuraisblood'), cls('SamuraisBlood', 'samuraisblood'),
cls('Sharky', 'sharky'), cls('Sharky', 'sharky'),
cls('ShockwaveDarkside', 'shockwave', path="2d/%s.html"), cls('ShockwaveDarkside', 'shockwave', path='2d/%s.html'),
cls('SomethingHappens', 'somethinghappens'), cls('SomethingHappens', 'somethinghappens'),
cls('SoreThumbs', 'sorethumbs'), cls('SoreThumbs', 'sorethumbs'),
cls('Striptease', 'striptease'), cls('Striptease', 'striptease'),

View file

@ -16,25 +16,26 @@ from dosagelib.util import check_robotstxt
class KeenSpotUpdater(ComicListUpdater): class KeenSpotUpdater(ComicListUpdater):
dup_templates = ('Creators/%s', "GoComics/%s", "ComicGenesis/%s") dup_templates = ('Creators/%s', 'GoComics/%s', 'ComicGenesis/%s')
# names of comics to exclude # names of comics to exclude
excluded_comics = ( excluded_comics = (
# non-standard navigation # non-standard navigation
"BrawlInTheFamily", 'BrawlInTheFamily',
"Flipside", 'Flipside',
"LastBlood", 'LastBlood',
"TheGodChild", 'TheGodChild',
"Twokinds", 'Twokinds',
"Yirmumah", 'Yirmumah',
) )
extra = { extra = {
'CrowScare': 'last="20111031"', 'CrowScare': "last='20111031'",
'Dreamless': 'last="20100726"', 'Dreamless': "last='20100726'",
'MysticRevolution': 'path="?cid=%s"', 'GeneCatlow': "last='20170412'",
'PunchAnPie': 'path="daily/%s.html"', 'MysticRevolution': "path='?cid=%s'",
'ShockwaveDarkside': 'path="2d/%s.html"', 'PunchAnPie': "path='daily/%s.html'",
'ShockwaveDarkside': "path='2d/%s.html'",
} }
def collect_results(self): def collect_results(self):
@ -43,14 +44,14 @@ class KeenSpotUpdater(ComicListUpdater):
for comiclink in data.xpath('//td[@id]/a'): for comiclink in data.xpath('//td[@id]/a'):
comicurl = comiclink.attrib['href'] comicurl = comiclink.attrib['href']
name = comiclink.xpath("string()") name = comiclink.xpath('string()')
try: try:
if "/d/" not in comicurl: if '/d/' not in comicurl:
check_robotstxt(comicurl + "d/", self.session) check_robotstxt(comicurl + 'd/', self.session)
else: else:
check_robotstxt(comicurl, self.session) check_robotstxt(comicurl, self.session)
except IOError as e: except IOError as e:
print("[%s] INFO: robots.txt denied: %s" % (name, e)) print('[%s] INFO: robots.txt denied: %s' % (name, e))
continue continue
self.add_comic(name, comicurl) self.add_comic(name, comicurl)