Fix some SmackJeeves comics.

This commit is contained in:
Tobias Gruetzmacher 2016-10-30 14:30:45 +01:00
parent b6d99945f6
commit 51ed898f5d
2 changed files with 33 additions and 24 deletions

View file

@ -17,6 +17,7 @@ class Removed(Scraper):
'unk': 'Comic was removed for an unknown reason.', 'unk': 'Comic was removed for an unknown reason.',
'brk': 'Comic navigation is broken.', 'brk': 'Comic navigation is broken.',
'mov': 'Comic moved to a new hoster and no new module was written.', 'mov': 'Comic moved to a new hoster and no new module was written.',
'mis': 'Pages are missing from the comic.',
} }
def __init__(self, name, reason='del'): def __init__(self, name, reason='del'):
@ -258,9 +259,11 @@ class Removed(Scraper):
cls('PetiteSymphony/Generation17'), cls('PetiteSymphony/Generation17'),
cls('PunksAndNerdsOld'), cls('PunksAndNerdsOld'),
cls('RedsPlanet'), cls('RedsPlanet'),
cls('SmackJeeves/Aarrevaara'),
cls('SmackJeeves/AchievementStuck'), cls('SmackJeeves/AchievementStuck'),
cls('SmackJeeves/Allthatglitters'), cls('SmackJeeves/Allthatglitters'),
cls('SmackJeeves/AngelBeast'), cls('SmackJeeves/AngelBeast'),
cls('SmackJeeves/BetweenWorlds'),
cls('SmackJeeves/BeyondTemptation'), cls('SmackJeeves/BeyondTemptation'),
cls('SmackJeeves/Bloodyfairytale'), cls('SmackJeeves/Bloodyfairytale'),
cls('SmackJeeves/BLOT'), cls('SmackJeeves/BLOT'),
@ -286,8 +289,10 @@ class Removed(Scraper):
cls('SmackJeeves/Ianua'), cls('SmackJeeves/Ianua'),
cls('SmackJeeves/ImminentMoose'), cls('SmackJeeves/ImminentMoose'),
cls('SmackJeeves/InthePride'), cls('SmackJeeves/InthePride'),
cls('SmackJeeves/Intoxicated'),
cls('SmackJeeves/Knife'), cls('SmackJeeves/Knife'),
cls('SmackJeeves/Kranburn'), cls('SmackJeeves/Kranburn'),
cls('SmackJeeves/LatchkeyKingdom'),
cls('SmackJeeves/LoveTwister'), cls('SmackJeeves/LoveTwister'),
cls('SmackJeeves/MegaManiacs'), cls('SmackJeeves/MegaManiacs'),
cls('SmackJeeves/MewsDynasty'), cls('SmackJeeves/MewsDynasty'),
@ -296,13 +301,16 @@ class Removed(Scraper):
cls('SmackJeeves/NihilWandasJourney'), cls('SmackJeeves/NihilWandasJourney'),
cls('SmackJeeves/OddContact'), cls('SmackJeeves/OddContact'),
cls('SmackJeeves/OneFrameGags'), cls('SmackJeeves/OneFrameGags'),
cls('SmackJeeves/Paripety'),
cls('SmackJeeves/Plotlessnesses'), cls('SmackJeeves/Plotlessnesses'),
cls('SmackJeeves/PokemonGleamingCrystal', 'mis'),
cls('SmackJeeves/PRAGUERACE'), cls('SmackJeeves/PRAGUERACE'),
cls('SmackJeeves/PumpkinFlower'), cls('SmackJeeves/PumpkinFlower'),
cls('SmackJeeves/Razor'), cls('SmackJeeves/Razor'),
cls('SmackJeeves/SAKANA'), cls('SmackJeeves/SAKANA'),
cls('SmackJeeves/SerendipityAnEquestrianTale'), cls('SmackJeeves/SerendipityAnEquestrianTale'),
cls('SmackJeeves/ShacklesInstallment02'), cls('SmackJeeves/ShacklesInstallment02'),
cls('SmackJeeves/SoulGuardian'),
cls('SmackJeeves/TechnicolorLondon'), cls('SmackJeeves/TechnicolorLondon'),
cls('SmackJeeves/TeKscloset'), cls('SmackJeeves/TeKscloset'),
cls('SmackJeeves/TheAttackoftheRecoloursSeason1'), cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),

View file

@ -5,6 +5,8 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from six.moves.urllib.parse import urlsplit
from ..util import quote from ..util import quote
from ..scraper import _ParserScraper from ..scraper import _ParserScraper
from ..output import out from ..output import out
@ -19,20 +21,24 @@ class SmackJeeves(_ParserScraper):
prevSearch = ( prevSearch = (
'//a[@class="nav-prev"]' + ONLY_COMICS, '//a[@class="nav-prev"]' + ONLY_COMICS,
'//a[img[@id="sj_nav_prev_img"]]' + ONLY_COMICS,
'//a[img[re:test(@alt, "prev", "i")]]' + ONLY_COMICS, '//a[img[re:test(@alt, "prev", "i")]]' + ONLY_COMICS,
'//a[img[re:test(@src, "/(prev|back)")]]' + ONLY_COMICS, '//a[img[re:test(@src, "/(prev|back|Yun2lVQ)")]]' + ONLY_COMICS,
'//a[re:test(@title, "previous", "i")]' + ONLY_COMICS, '//a[re:test(@title, "previous", "i")]' + ONLY_COMICS,
'//a[re:test(text(), "prev|back", "i")]' + ONLY_COMICS, '//a[re:test(text(), "prev|back|atras", "i")]' + ONLY_COMICS,
'//select[@class="jumpbox"]/preceding::a[1]' + ONLY_COMICS, '//select[@class="jumpbox"]/preceding::a[1]' + ONLY_COMICS,
'//form[@name="jumpbox"]/preceding::a[1]' + ONLY_COMICS,
) )
nextSearch = ( nextSearch = (
'//a[@class="nav-next"]' + ONLY_COMICS, '//a[@class="nav-next"]' + ONLY_COMICS,
'//a[img[@id="sj_nav_next_img"]]' + ONLY_COMICS,
'//a[img[re:test(@alt, "next", "i")]]' + ONLY_COMICS, '//a[img[re:test(@alt, "next", "i")]]' + ONLY_COMICS,
'//a[img[re:test(@src, "/next", "i")]]' + ONLY_COMICS, '//a[img[re:test(@src, "/(next|wJkQA07)", "i")]]' + ONLY_COMICS,
'//a[re:test(@title, "next", "i")]' + ONLY_COMICS, '//a[re:test(@title, "next", "i")]' + ONLY_COMICS,
'//a[re:test(text(), "next", "i")]' + ONLY_COMICS, '//a[re:test(text(), "next|siguiente", "i")]' + ONLY_COMICS,
'//select[@class="jumpbox"]/following::a[1]' + ONLY_COMICS, '//select[@class="jumpbox"]/following::a[1]' + ONLY_COMICS,
'//form[@name="jumpbox"]/following::a[1]' + ONLY_COMICS,
) )
imageSearch = ( imageSearch = (
@ -57,7 +63,8 @@ class SmackJeeves(_ParserScraper):
"""Get start URL.""" """Get start URL."""
start = self.url start = self.url
if self.adult: if self.adult:
start = 'http://www.smackjeeves.com/mature.php?ref=' + quote(start) host = urlsplit(self.url).hostname
start = 'http://%s/mature.php?ref=%s' % (host, quote(start))
data = self.getPage(start) data = self.getPage(start)
startimg = None startimg = None
if not self.shouldSkipUrl(start, data): if not self.shouldSkipUrl(start, data):
@ -80,7 +87,8 @@ class SmackJeeves(_ParserScraper):
return "%s_%s" % (name, num) return "%s_%s" % (name, num)
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
return data.xpath('//img[contains(@src, "/images/image_na.gif")]') return (data.xpath('//img[contains(@src, "/images/image_na.gif")]') or
'/851756/175-final-story-15-paradise' in url)
@classmethod @classmethod
def getmodules(cls): def getmodules(cls):
@ -92,7 +100,7 @@ class SmackJeeves(_ParserScraper):
cls('2Kingdoms', sub='2kingdoms'), cls('2Kingdoms', sub='2kingdoms'),
cls('355Days', sub='355days'), cls('355Days', sub='355days'),
cls('AB', sub='alistairandboggart', adult=True), cls('AB', sub='alistairandboggart', adult=True),
cls('AceOfHearts', sub='aceof-hearts', adult=True), cls('AceOfHearts', sub='aceof-hearts'),
cls('ADoodleADay', sub='adoodleaday'), cls('ADoodleADay', sub='adoodleaday'),
cls('AGirlAndHerShadow', sub='agirlandhershadow'), cls('AGirlAndHerShadow', sub='agirlandhershadow'),
cls('AGirlontheServer', sub='girlontheserver'), cls('AGirlontheServer', sub='girlontheserver'),
@ -104,7 +112,6 @@ class SmackJeeves(_ParserScraper):
cls('AQuestionOfCharacter', sub='aqoc'), cls('AQuestionOfCharacter', sub='aqoc'),
cls('ASongforElise', sub='asongforelise', adult=True), cls('ASongforElise', sub='asongforelise', adult=True),
cls('AYuriCollab', sub='ayuricollabbitches', adult=True), cls('AYuriCollab', sub='ayuricollabbitches', adult=True),
cls('Aarrevaara', sub='aarrevaara'),
cls('AcidMonday', sub='acidmonday', adult=True), cls('AcidMonday', sub='acidmonday', adult=True),
cls('Adalsysla', sub='adalsysla'), cls('Adalsysla', sub='adalsysla'),
cls('AddictiveScience', sub='addictivescience'), cls('AddictiveScience', sub='addictivescience'),
@ -153,7 +160,6 @@ class SmackJeeves(_ParserScraper):
cls('Betovering', sub='betovering', adult=True), cls('Betovering', sub='betovering', adult=True),
cls('BettencourtHotel', sub='bettencourt'), cls('BettencourtHotel', sub='bettencourt'),
cls('BetweenLightandDark', sub='bld'), cls('BetweenLightandDark', sub='bld'),
cls('BetweenWorlds', sub='betweenworlds', adult=True),
cls('Betwin', sub='be-twin'), cls('Betwin', sub='be-twin'),
cls('BeyondTheOrdinary', sub='bto'), cls('BeyondTheOrdinary', sub='bto'),
cls('BioRevelation', sub='biorevelation'), cls('BioRevelation', sub='biorevelation'),
@ -174,7 +180,7 @@ class SmackJeeves(_ParserScraper):
cls('BreachofAgency', sub='breachofagency'), cls('BreachofAgency', sub='breachofagency'),
cls('BreakfastonaCliff', sub='boac'), cls('BreakfastonaCliff', sub='boac'),
cls('Burn', sub='burn'), cls('Burn', sub='burn'),
cls('ByTheBook', sub='bythebook'), cls('ByTheBook', sub='bythebook', adult=True),
cls('CafeAmargo', sub='cafeamargo'), cls('CafeAmargo', sub='cafeamargo'),
cls('CafeSuada', sub='cafesuada'), cls('CafeSuada', sub='cafesuada'),
cls('Cambion', sub='cambion', adult=True), cls('Cambion', sub='cambion', adult=True),
@ -264,7 +270,7 @@ class SmackJeeves(_ParserScraper):
cls('ExperimentalMegaman', sub='ex90081'), cls('ExperimentalMegaman', sub='ex90081'),
cls('EyesofaDigimon', sub='eoad'), cls('EyesofaDigimon', sub='eoad'),
cls('FailureConfetti', sub='failureconfetti'), cls('FailureConfetti', sub='failureconfetti'),
cls('FairyTaleRejects', host='fairytalerejects.thewebcomic.com', adult=True), cls('FairyTaleRejects', sub='fairytalerejects', adult=True),
cls('FaithlessDigitals', sub='faithlessdigitals'), cls('FaithlessDigitals', sub='faithlessdigitals'),
cls('FalconersDailyStrips', sub='falcdaily'), cls('FalconersDailyStrips', sub='falcdaily'),
cls('FallenAngelslove', sub='fallen-angels-love'), cls('FallenAngelslove', sub='fallen-angels-love'),
@ -285,7 +291,7 @@ class SmackJeeves(_ParserScraper):
cls('FrobertTheDemon', sub='frobby'), cls('FrobertTheDemon', sub='frobby'),
cls('FromnowonImagirl', sub='fromnowonimagirl'), cls('FromnowonImagirl', sub='fromnowonimagirl'),
cls('FruitloopAndMrDownbeat', sub='fruitbeat'), cls('FruitloopAndMrDownbeat', sub='fruitbeat'),
cls('FullSpectrumTherapy', sub='fst', adult=True), cls('FullSpectrumTherapy', sub='fst'),
cls('GamerCafe', sub='gamercafe'), cls('GamerCafe', sub='gamercafe'),
cls('GamesPeoplePlayUpdatedWeekly', sub='gamespeopleplay'), cls('GamesPeoplePlayUpdatedWeekly', sub='gamespeopleplay'),
cls('GardenofHearts', sub='gardenofhearts'), cls('GardenofHearts', sub='gardenofhearts'),
@ -330,7 +336,6 @@ class SmackJeeves(_ParserScraper):
cls('Inhuman', sub='inhumancomic'), cls('Inhuman', sub='inhumancomic'),
cls('InsideOuTAYuriTale', sub='insideout-a-yuri-tale'), cls('InsideOuTAYuriTale', sub='insideout-a-yuri-tale'),
cls('InspiredByADream', sub='inspiredbyadream'), cls('InspiredByADream', sub='inspiredbyadream'),
cls('Intoxicated', sub='intoxicated', adult=True),
cls('Itsan8BitWorldBlankWorld', sub='8bitblankworld'), cls('Itsan8BitWorldBlankWorld', sub='8bitblankworld'),
cls('JackiesStory', sub='jackiestory'), cls('JackiesStory', sub='jackiestory'),
cls('Jantar', sub='jantar'), cls('Jantar', sub='jantar'),
@ -370,7 +375,6 @@ class SmackJeeves(_ParserScraper):
cls('LOKI', sub='loki'), cls('LOKI', sub='loki'),
cls('LastBlockStanding', sub='lastblockstanding'), cls('LastBlockStanding', sub='lastblockstanding'),
cls('LastLivingSouls', sub='lastlivingsouls'), cls('LastLivingSouls', sub='lastlivingsouls'),
cls('LatchkeyKingdom', sub='latchkeykingdom'),
cls('LavenderLegend', sub='lavenderlegend'), cls('LavenderLegend', sub='lavenderlegend'),
cls('LeCirquedObscure', sub='cirquedobscure'), cls('LeCirquedObscure', sub='cirquedobscure'),
cls('LedbyaMadMan', sub='ledbyamadman'), cls('LedbyaMadMan', sub='ledbyamadman'),
@ -487,7 +491,6 @@ class SmackJeeves(_ParserScraper):
cls('PantsParty', sub='partypants'), cls('PantsParty', sub='partypants'),
cls('PanzerDragonandEnigmaCompleteEdition', sub='panzerdragonandenigma'), cls('PanzerDragonandEnigmaCompleteEdition', sub='panzerdragonandenigma'),
cls('Paradox', sub='paradoxcomic', adult=True), cls('Paradox', sub='paradoxcomic', adult=True),
cls('Paripety', sub='paripety'),
cls('Pause', sub='pause'), cls('Pause', sub='pause'),
cls('PencilviewUpdatesMondayscough', sub='pencilview'), cls('PencilviewUpdatesMondayscough', sub='pencilview'),
cls('Perinto', sub='perinto'), cls('Perinto', sub='perinto'),
@ -498,11 +501,10 @@ class SmackJeeves(_ParserScraper):
cls('PlasticKings', sub='plastickings'), cls('PlasticKings', sub='plastickings'),
cls('PlatonicBoyfriends', sub='platonicboyfriends'), cls('PlatonicBoyfriends', sub='platonicboyfriends'),
cls('PlayTime', sub='dollysplaytime'), cls('PlayTime', sub='dollysplaytime'),
cls('PleasyBeMyBoytoy', sub='pleasebemyboytoy', adult=True), cls('PleaseBeMyBoytoy', sub='pleasebemyboytoy'),
cls('PokeVenturous', sub='pokeventuras'), cls('PokeVenturous', sub='pokeventuras'),
cls('PokemonBeta', sub='pokemonbeta'), cls('PokemonBeta', sub='pokemonbeta'),
cls('PokemonCrystalDoubleNuzlockeChallenge', sub='miinuzlocke'), cls('PokemonCrystalDoubleNuzlockeChallenge', sub='miinuzlocke'),
cls('PokemonGleamingCrystal', sub='gleamingcrystal'),
cls('PokemonLANDSKY', sub='landsky'), cls('PokemonLANDSKY', sub='landsky'),
cls('PokemonMysteryDungeonTeamCrystal', sub='crystalmysterydungeon'), cls('PokemonMysteryDungeonTeamCrystal', sub='crystalmysterydungeon'),
cls('PokemonParallel', sub='pokemon-parallel'), cls('PokemonParallel', sub='pokemon-parallel'),
@ -520,7 +522,7 @@ class SmackJeeves(_ParserScraper):
cls('QueerQueen', sub='queerqueen'), cls('QueerQueen', sub='queerqueen'),
cls('RANDOM', sub='randomthecomic'), cls('RANDOM', sub='randomthecomic'),
cls('ROSIER', sub='rosier'), cls('ROSIER', sub='rosier'),
cls('RainbowMansion', sub='rainbow-mansion.thewebcomic.com', adult=True), cls('RainbowMansion', host='rainbow-mansion.thewebcomic.com', adult=True),
cls('RainLGBT', sub='rainlgbt'), cls('RainLGBT', sub='rainlgbt'),
cls('RainxSasori', sub='rainxsasori', adult=True), cls('RainxSasori', sub='rainxsasori', adult=True),
cls('RareCandyTreatment', host='www.rarecandytreatment.com'), cls('RareCandyTreatment', host='www.rarecandytreatment.com'),
@ -532,11 +534,11 @@ class SmackJeeves(_ParserScraper):
cls('Replica', sub='replica', adult=True), cls('Replica', sub='replica', adult=True),
cls('Respectable', sub='respectable', adult=True), cls('Respectable', sub='respectable', adult=True),
cls('ReturntoEden', sub='rte'), cls('ReturntoEden', sub='rte'),
cls('RiversideExtras', host='www.riversidecomics.co', adult=True), cls('RiversideExtras', host='www.riversidecomics.co'),
cls('RottenApple', sub='rottenapple'), cls('RottenApple', sub='rottenapple'),
cls('RoyalIcing', sub='royalicing'), cls('RoyalIcing', sub='royalicing'),
cls('RuScrewed', host='ru-screwed', adult=True), cls('RuScrewed', sub='ru-screwed'),
cls('RubyNation', host='www.therubynation.com'), cls('RubyNation', sub='therubynation'),
cls('RuderiQuest', sub='ruderi'), cls('RuderiQuest', sub='ruderi'),
cls('RuneSpark', sub='runespark'), cls('RuneSpark', sub='runespark'),
cls('RyuManwebcomicversion', sub='ryuman-web'), cls('RyuManwebcomicversion', sub='ryuman-web'),
@ -586,7 +588,6 @@ class SmackJeeves(_ParserScraper):
cls('SonicFuture', sub='sonicfuture'), cls('SonicFuture', sub='sonicfuture'),
cls('SonicSchoolRedo', sub='sonicschoolredo'), cls('SonicSchoolRedo', sub='sonicschoolredo'),
cls('SonicUniverseAsk', sub='sonicuniverseask'), cls('SonicUniverseAsk', sub='sonicuniverseask'),
cls('SoulGuardian', sub='soulguardian'),
cls('SouthernCross', host='southerncross.thewebcomic.com'), cls('SouthernCross', host='southerncross.thewebcomic.com'),
cls('SovereignTheMostAmazingComicEver', sub='mostamazingcomicever'), cls('SovereignTheMostAmazingComicEver', sub='mostamazingcomicever'),
cls('SpaghettiAndMeatballs', sub='spaghettiandmeatballs', adult=True), cls('SpaghettiAndMeatballs', sub='spaghettiandmeatballs', adult=True),
@ -703,15 +704,15 @@ class SmackJeeves(_ParserScraper):
cls('Troublenextdoor', sub='troublenextdoor'), cls('Troublenextdoor', sub='troublenextdoor'),
cls('UglyBoysLove', sub='shounenai'), cls('UglyBoysLove', sub='shounenai'),
cls('Uglygame', sub='uglygame'), cls('Uglygame', sub='uglygame'),
cls('UndertheDeadSkies', host='underthedeadskies.thewebcomic.com', adult=True), cls('UnderTheDeadSkies', host='www.underthedeadskies.com'),
cls('UnicampaLapis', sub='ual'), cls('UnicampaLapis', sub='ual'),
cls('UpDown', sub='updown', adult=True), cls('UpDown', sub='updown', adult=True),
cls('UshalaatWorldsEnd', sub='ushala', adult=True), cls('UshalaatWorldsEnd', sub='ushala', adult=True),
cls('VACANT', sub='vacant'), cls('VACANT', sub='vacant'),
cls('Vacan7', sub='vacan7', adult=True), cls('Vacan7', sub='vacan7', adult=True),
cls('VerloreGeleentheid', host='verlore.thewebcomic.com'), cls('VerloreGeleentheid', host='verlore.thewebcomic.com'),
cls('Void', sub='vtgtahr', adult=True),
cls('VoidMisadventures', sub='voidmisadventures'), cls('VoidMisadventures', sub='voidmisadventures'),
cls('VoidTheGuideToAHealthyRelationship', sub='vtgtahr', adult=True),
cls('VoyageoftheBrokenPromise', sub='voyageofthebrokenpromise', adult=True), cls('VoyageoftheBrokenPromise', sub='voyageofthebrokenpromise', adult=True),
cls('WHATaboutSHADOWS', sub='was'), cls('WHATaboutSHADOWS', sub='was'),
cls('WakeEcho', sub='echo'), cls('WakeEcho', sub='echo'),