Mark KeenSpot/GeneCatlow completed :(
This commit is contained in:
parent
f79d3c9309
commit
b79f22fb65
2 changed files with 23 additions and 22 deletions
|
@ -62,13 +62,13 @@ class KeenSpot(_ParserScraper):
|
|||
cls('ChoppingBlock', 'choppingblock'),
|
||||
cls('ClichFlamb', 'clicheflambe'),
|
||||
cls('CountYourSheep', 'countyoursheep'),
|
||||
cls('CrowScare', 'crowscare', last="20111031"),
|
||||
cls('Dreamless', 'dreamless', last="20100726"),
|
||||
cls('CrowScare', 'crowscare', last='20111031'),
|
||||
cls('Dreamless', 'dreamless', last='20100726'),
|
||||
cls('EverythingJake', 'everythingjake'),
|
||||
cls('Exposure', 'exposure'),
|
||||
cls('FallOutToyWorks', 'fallouttoyworks'),
|
||||
cls('FriarAndBrimstone', 'friarandbrimstone'),
|
||||
cls('GeneCatlow', 'genecatlow'),
|
||||
cls('GeneCatlow', 'genecatlow', last='20170412'),
|
||||
cls('GodMode', 'godmode'),
|
||||
cls('GreenWake', 'greenwake'),
|
||||
cls('HeadTrip', 'headtrip'),
|
||||
|
@ -85,18 +85,18 @@ class KeenSpot(_ParserScraper):
|
|||
cls('MarryMe', 'marryme'),
|
||||
cls('MedusasDaughter', 'medusasdaughter'),
|
||||
cls('MonsterMassacre', 'monstermassacre'),
|
||||
cls('MysticRevolution', 'mysticrevolution', path="?cid=%s"),
|
||||
cls('MysticRevolution', 'mysticrevolution', path='?cid=%s'),
|
||||
cls('NoPinkPonies', 'nopinkponies'),
|
||||
cls('NoRoomForMagic', 'noroomformagic'),
|
||||
cls('OutThere', 'outthere'),
|
||||
cls('Porcelain', 'porcelain'),
|
||||
cls('PunchAnPie', 'punchanpie', path="daily/%s.html"),
|
||||
cls('PunchAnPie', 'punchanpie', path='daily/%s.html'),
|
||||
cls('QUILTBAG', 'quiltbag'),
|
||||
cls('RedSpike', 'redspike'),
|
||||
cls('RumbleFall', 'rumblefall'),
|
||||
cls('SamuraisBlood', 'samuraisblood'),
|
||||
cls('Sharky', 'sharky'),
|
||||
cls('ShockwaveDarkside', 'shockwave', path="2d/%s.html"),
|
||||
cls('ShockwaveDarkside', 'shockwave', path='2d/%s.html'),
|
||||
cls('SomethingHappens', 'somethinghappens'),
|
||||
cls('SoreThumbs', 'sorethumbs'),
|
||||
cls('Striptease', 'striptease'),
|
||||
|
|
|
@ -16,25 +16,26 @@ from dosagelib.util import check_robotstxt
|
|||
|
||||
|
||||
class KeenSpotUpdater(ComicListUpdater):
|
||||
dup_templates = ('Creators/%s', "GoComics/%s", "ComicGenesis/%s")
|
||||
dup_templates = ('Creators/%s', 'GoComics/%s', 'ComicGenesis/%s')
|
||||
|
||||
# names of comics to exclude
|
||||
excluded_comics = (
|
||||
# non-standard navigation
|
||||
"BrawlInTheFamily",
|
||||
"Flipside",
|
||||
"LastBlood",
|
||||
"TheGodChild",
|
||||
"Twokinds",
|
||||
"Yirmumah",
|
||||
'BrawlInTheFamily',
|
||||
'Flipside',
|
||||
'LastBlood',
|
||||
'TheGodChild',
|
||||
'Twokinds',
|
||||
'Yirmumah',
|
||||
)
|
||||
|
||||
extra = {
|
||||
'CrowScare': 'last="20111031"',
|
||||
'Dreamless': 'last="20100726"',
|
||||
'MysticRevolution': 'path="?cid=%s"',
|
||||
'PunchAnPie': 'path="daily/%s.html"',
|
||||
'ShockwaveDarkside': 'path="2d/%s.html"',
|
||||
'CrowScare': "last='20111031'",
|
||||
'Dreamless': "last='20100726'",
|
||||
'GeneCatlow': "last='20170412'",
|
||||
'MysticRevolution': "path='?cid=%s'",
|
||||
'PunchAnPie': "path='daily/%s.html'",
|
||||
'ShockwaveDarkside': "path='2d/%s.html'",
|
||||
}
|
||||
|
||||
def collect_results(self):
|
||||
|
@ -43,14 +44,14 @@ class KeenSpotUpdater(ComicListUpdater):
|
|||
|
||||
for comiclink in data.xpath('//td[@id]/a'):
|
||||
comicurl = comiclink.attrib['href']
|
||||
name = comiclink.xpath("string()")
|
||||
name = comiclink.xpath('string()')
|
||||
try:
|
||||
if "/d/" not in comicurl:
|
||||
check_robotstxt(comicurl + "d/", self.session)
|
||||
if '/d/' not in comicurl:
|
||||
check_robotstxt(comicurl + 'd/', self.session)
|
||||
else:
|
||||
check_robotstxt(comicurl, self.session)
|
||||
except IOError as e:
|
||||
print("[%s] INFO: robots.txt denied: %s" % (name, e))
|
||||
print('[%s] INFO: robots.txt denied: %s' % (name, e))
|
||||
continue
|
||||
|
||||
self.add_comic(name, comicurl)
|
||||
|
|
Loading…
Reference in a new issue