Fix some modules.

This commit is contained in:
Tobias Gruetzmacher 2017-02-06 00:05:05 +01:00
parent 06e925449e
commit 20ca5d7fc2
5 changed files with 15 additions and 20 deletions

View file

@ -231,6 +231,10 @@ class Annyseed(_ParserScraper):
imageSearch = '//div/img[contains(@src, "Annyseed")]'
prevSearch = '//a[img[@name="Previousbtn"]]'
help = 'Index format: nnn'
FIX_RE = compile(r'Annyseed/Finished%20For%20Print/')
def imageUrlModifier(self, image_url, data):
return self.FIX_RE.sub('', image_url)
class AoiHouse(_ParserScraper):

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -206,9 +206,7 @@ class ComicFury(_ParserScraper):
# BeyondTheOrdinary has a duplicate in SmackJeeves/BeyondTheOrdinary
cls('BibleBelt', 'biblebelt'),
cls('BicycleBoy', 'bicycleboy'),
cls('BigBookOfLameJokes', 'bigbook'),
cls('BilateralComics', 'bilateralcomics'),
cls('BiMorphon', 'bimorphon'),
cls('BionicleTales', 'bionicletales'),
cls('BioSyte', 'biosyte'),
cls('Birdman', 'birdman'),
@ -728,7 +726,6 @@ class ComicFury(_ParserScraper):
cls('MushroomGo', 'mushroomgo'),
cls('MutantElf', 'mutantelf'),
cls('Mutigenx', 'mutigenx'),
cls('MuttInTheMiddle', 'muttinthemiddle'),
cls('MVPL', 'mvpl'),
cls('MyForgottenPast', 'myforgottenpast'),
cls('MyGirlfriendTheSecretAgent', 'mygfthesecagent'),
@ -849,7 +846,6 @@ class ComicFury(_ParserScraper):
cls('RED', 'redthecomic'),
# RedVelvetRequiem has a duplicate in SmackJeeves/RedVelvetRequiem
cls('RegardingDandelions', 'regardingdandelions'),
cls('ReiketsuouNoKimi', 'rnk'),
cls('Remedy', 'remedy'),
cls('RememberBedlam', 'bedlam'),
cls('RequiemsGate', 'requiemsgate'),
@ -943,7 +939,6 @@ class ComicFury(_ParserScraper):
cls('StarSovereignSeriesMuladhara', 'muladhara'),
cls('STARWARSXWingAlliance', 'x-wingalliance'),
cls('STASonicTheAdventure', 'sta'),
cls('SteamSword', 'steamsword'),
cls('StereotyPixs', 'stereotypixs'),
cls('StevenAndTheCrystalGMs', 'crystalgms'),
cls('StickLife', 'sticklife'),
@ -1118,7 +1113,6 @@ class ComicFury(_ParserScraper):
cls('TigerWrestling', 'anybodythere'),
cls('Timezone', 'timezone'),
cls('Tinytown', 'tinytown'),
cls('Tiziana', 'tiziana'),
cls('TM47', 'tm47'),
cls('TohvelinTuhinoita', 'tuhinaloota'),
cls('TOLVA', 'tolva'),

View file

@ -69,9 +69,9 @@ class ComicSherpa(_ParserScraper):
cls('DBCartoons', 'csnvt'),
cls('DevinCraneComicStripGhostwriter', 'csadf'),
cls('DoghouseInYourSoul', 'cstwx'),
cls('DoingTime', 'csvuk'),
cls('DontPickTheFlowers', 'cswfs'),
cls('Dragin', 'cswgz'),
cls('DrWhiskers', 'cswvl'),
cls('DumbQuestionBadAnswer', 'cskro'),
cls('DungeonHordes', 'csnlo'),
cls('DustSpecks', 'csqgq'),

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -27,17 +27,10 @@ class EarthsongSaga(_ParserScraper):
'earthsongsaga.com/') for x in urls]
def namer(self, image_url, page_url):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(.*)\.\w+$',
IGNORECASE).search(image_url)
if not imgmatch:
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
IGNORECASE).search(image_url)
suffix = "cover"
else:
suffix = ""
return 'vol%02d_ch%02d_%02d%s' % (
int(imgmatch.group(1)), int(imgmatch.group(2)),
int(imgmatch.group(3)), suffix)
return 'vol%02d_ch%02d_%s' % (
int(imgmatch.group(1)), int(imgmatch.group(2)), imgmatch.group(3))
class EasilyAmused(_WordPressScraper):
@ -209,6 +202,9 @@ class ExtraFabulousComics(_WordPressScraper):
pagepart = compile(r'/comic/([^/]+)/$').search(page_url).group(1)
return '_'.join((pagepart, imagename))
def shouldSkipUrl(self, url, data):
return data.xpath('//div[@id="comic"]//iframe')
class ExtraLife(_BasicScraper):
url = 'http://www.myextralife.com/'

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2017 Tobias Gruetzmacher
"""
Script to get ComicFury comics and save the info in a JSON file for further
processing.
@ -107,6 +107,7 @@ class ComicFuryUpdater(ComicListUpdater):
# images gone
"BaseballCapsAndTiaras",
"BiMorphon",
"CROSSWORLDSNEXUS",
"Fathead",
"GOODBYEREPTILIANS",