Adopt SmackJeeves to new site design (fixes #144)
Some things got lost on the way: - Since there is no comprehensive comic directory anymore, removed automatic update script. New comics need to be added manually. - Some authors used the opportunity to move from SmackJeeves somewhere else - some of those got new modules (either standalone or ComicFury) - Abunch of comics just disappeared...
This commit is contained in:
parent
02c0da24fa
commit
963db7f448
11 changed files with 696 additions and 898 deletions
|
@ -634,6 +634,7 @@ class ComicFury(_ParserScraper):
|
||||||
cls('MadGirl', 'madgirl'),
|
cls('MadGirl', 'madgirl'),
|
||||||
cls('MagicElDesencuentro', 'magiceldesencuentro', 'es'),
|
cls('MagicElDesencuentro', 'magiceldesencuentro', 'es'),
|
||||||
cls('MagicTheScattering', 'magicthescattering'),
|
cls('MagicTheScattering', 'magicthescattering'),
|
||||||
|
cls('Magience', 'magience'),
|
||||||
cls('MAGISAPARASAYOupdatesMonFri', 'mag-isa'),
|
cls('MAGISAPARASAYOupdatesMonFri', 'mag-isa'),
|
||||||
cls('MagnaComica', 'magnacomica'),
|
cls('MagnaComica', 'magnacomica'),
|
||||||
cls('Maluk', 'maluk'),
|
cls('Maluk', 'maluk'),
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2018 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
@ -214,6 +214,12 @@ class DorkTower(_ParserScraper):
|
||||||
prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
|
prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
|
||||||
|
|
||||||
|
|
||||||
|
class DoomsdayMyDear(_ParserScraper):
|
||||||
|
url = 'http://doomsdaymydear.com/'
|
||||||
|
imageSearch = '//img[{}]'.format(xpath_class('attachment-full'))
|
||||||
|
prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
|
||||||
|
|
||||||
|
|
||||||
class Dracula(_BasicScraper):
|
class Dracula(_BasicScraper):
|
||||||
url = 'http://draculacomic.net/'
|
url = 'http://draculacomic.net/'
|
||||||
stripUrl = url + 'comic.php?comicID=%s'
|
stripUrl = url + 'comic.php?comicID=%s'
|
||||||
|
|
|
@ -109,6 +109,10 @@ class FonFlatter(_ParserScraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ForestHill(_WordPressScraper):
|
||||||
|
url = 'https://www.foresthillcomic.org/'
|
||||||
|
|
||||||
|
|
||||||
class ForLackOfABetterComic(_BasicScraper):
|
class ForLackOfABetterComic(_BasicScraper):
|
||||||
url = 'http://forlackofabettercomic.com/'
|
url = 'http://forlackofabettercomic.com/'
|
||||||
rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
|
rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
@ -411,21 +411,27 @@ class Removed(Scraper):
|
||||||
cls('SmackJeeves/AchievementStuck'),
|
cls('SmackJeeves/AchievementStuck'),
|
||||||
cls('SmackJeeves/AGirlAndHerShadow'),
|
cls('SmackJeeves/AGirlAndHerShadow'),
|
||||||
cls('SmackJeeves/Allthatglitters'),
|
cls('SmackJeeves/Allthatglitters'),
|
||||||
|
cls('SmackJeeves/AloversRule'),
|
||||||
cls('SmackJeeves/Anathemacomics'),
|
cls('SmackJeeves/Anathemacomics'),
|
||||||
cls('SmackJeeves/AngelBeast'),
|
cls('SmackJeeves/AngelBeast'),
|
||||||
cls('SmackJeeves/ArchportCityChronicles'),
|
cls('SmackJeeves/ArchportCityChronicles'),
|
||||||
cls('SmackJeeves/AwesomeSauce'),
|
cls('SmackJeeves/AwesomeSauce'),
|
||||||
|
cls('SmackJeeves/BetweenLightandDark'),
|
||||||
cls('SmackJeeves/BetweenWorlds'),
|
cls('SmackJeeves/BetweenWorlds'),
|
||||||
cls('SmackJeeves/BeyondTemptation'),
|
cls('SmackJeeves/BeyondTemptation'),
|
||||||
|
cls('SmackJeeves/BLDShortComics'),
|
||||||
cls('SmackJeeves/Bloodyfairytale'),
|
cls('SmackJeeves/Bloodyfairytale'),
|
||||||
cls('SmackJeeves/BLOT'),
|
cls('SmackJeeves/BLOT'),
|
||||||
cls('SmackJeeves/BlueWell'),
|
cls('SmackJeeves/BlueWell'),
|
||||||
|
cls('SmackJeeves/BreakfastonaCliff'),
|
||||||
cls('SmackJeeves/CafeAmargo'),
|
cls('SmackJeeves/CafeAmargo'),
|
||||||
cls('SmackJeeves/Captor'),
|
cls('SmackJeeves/Captor'),
|
||||||
cls('SmackJeeves/ChaosTheory2005'),
|
cls('SmackJeeves/ChaosTheory2005'),
|
||||||
|
cls('SmackJeeves/CleanCure'),
|
||||||
cls('SmackJeeves/DaddysGirl'),
|
cls('SmackJeeves/DaddysGirl'),
|
||||||
cls('SmackJeeves/Debtsettlement'),
|
cls('SmackJeeves/Debtsettlement'),
|
||||||
cls('SmackJeeves/DebtSettlement2OperationExtinction'),
|
cls('SmackJeeves/DebtSettlement2OperationExtinction'),
|
||||||
|
cls('SmackJeeves/DefyingGravityTheFourGreatGuardians'),
|
||||||
cls('SmackJeeves/Destinationunknown'),
|
cls('SmackJeeves/Destinationunknown'),
|
||||||
cls('SmackJeeves/DevilTrainee'),
|
cls('SmackJeeves/DevilTrainee'),
|
||||||
cls('SmackJeeves/DevilTraineeSpanish'),
|
cls('SmackJeeves/DevilTraineeSpanish'),
|
||||||
|
@ -436,7 +442,9 @@ class Removed(Scraper):
|
||||||
cls('SmackJeeves/EternalKnights'),
|
cls('SmackJeeves/EternalKnights'),
|
||||||
cls('SmackJeeves/EvD'),
|
cls('SmackJeeves/EvD'),
|
||||||
cls('SmackJeeves/FeathersPI'),
|
cls('SmackJeeves/FeathersPI'),
|
||||||
|
cls('SmackJeeves/FemmeSchism'),
|
||||||
cls('SmackJeeves/FireWire'),
|
cls('SmackJeeves/FireWire'),
|
||||||
|
cls('SmackJeeves/FrenzyRedux'),
|
||||||
cls('SmackJeeves/FrogKing'),
|
cls('SmackJeeves/FrogKing'),
|
||||||
cls('SmackJeeves/FuckMyLife'),
|
cls('SmackJeeves/FuckMyLife'),
|
||||||
cls('SmackJeeves/FurtherDowntheRabbitHole'),
|
cls('SmackJeeves/FurtherDowntheRabbitHole'),
|
||||||
|
@ -445,21 +453,33 @@ class Removed(Scraper):
|
||||||
cls('SmackJeeves/GraveImpressions'),
|
cls('SmackJeeves/GraveImpressions'),
|
||||||
cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'),
|
cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'),
|
||||||
cls('SmackJeeves/Harfang'),
|
cls('SmackJeeves/Harfang'),
|
||||||
|
cls('SmackJeeves/HIPS'),
|
||||||
cls('SmackJeeves/HotChocolate'),
|
cls('SmackJeeves/HotChocolate'),
|
||||||
|
cls('SmackJeeves/Hybristorific'),
|
||||||
cls('SmackJeeves/Ianua'),
|
cls('SmackJeeves/Ianua'),
|
||||||
cls('SmackJeeves/ImminentMoose'),
|
cls('SmackJeeves/ImminentMoose'),
|
||||||
cls('SmackJeeves/InthePride'),
|
cls('SmackJeeves/InthePride'),
|
||||||
cls('SmackJeeves/Intoxicated'),
|
cls('SmackJeeves/Intoxicated'),
|
||||||
|
cls('SmackJeeves/Jantarpol'),
|
||||||
cls('SmackJeeves/Knife'),
|
cls('SmackJeeves/Knife'),
|
||||||
cls('SmackJeeves/Kranburn'),
|
cls('SmackJeeves/Kranburn'),
|
||||||
|
cls('SmackJeeves/KuroNeko'),
|
||||||
|
cls('SmackJeeves/LastLivingSouls'),
|
||||||
cls('SmackJeeves/LatchkeyKingdom'),
|
cls('SmackJeeves/LatchkeyKingdom'),
|
||||||
|
cls('SmackJeeves/LegendsofMobiusBookOne'),
|
||||||
|
cls('SmackJeeves/LiliBleu'),
|
||||||
cls('SmackJeeves/LoveTwister'),
|
cls('SmackJeeves/LoveTwister'),
|
||||||
|
cls('SmackJeeves/MagicalGirlAlice'),
|
||||||
|
cls('SmackJeeves/MasqueradeWTTM'),
|
||||||
|
cls('SmackJeeves/MegaManBattleNetwork7'),
|
||||||
cls('SmackJeeves/MegaManiacs'),
|
cls('SmackJeeves/MegaManiacs'),
|
||||||
cls('SmackJeeves/MerirosvotSeikkailumerella'),
|
cls('SmackJeeves/MerirosvotSeikkailumerella'),
|
||||||
cls('SmackJeeves/MewsDynasty'),
|
cls('SmackJeeves/MewsDynasty'),
|
||||||
|
cls('SmackJeeves/MixupofallMixups'),
|
||||||
cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'),
|
cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'),
|
||||||
cls('SmackJeeves/MoonlitDawnAMythicalTale'),
|
cls('SmackJeeves/MoonlitDawnAMythicalTale'),
|
||||||
cls('SmackJeeves/MyBoyfriendisaMobBoss'),
|
cls('SmackJeeves/MyBoyfriendisaMobBoss'),
|
||||||
|
cls('SmackJeeves/MyTrollLife'),
|
||||||
cls('SmackJeeves/NihilWandasJourney'),
|
cls('SmackJeeves/NihilWandasJourney'),
|
||||||
cls('SmackJeeves/OddContact'),
|
cls('SmackJeeves/OddContact'),
|
||||||
cls('SmackJeeves/OneFrameGags'),
|
cls('SmackJeeves/OneFrameGags'),
|
||||||
|
@ -472,24 +492,37 @@ class Removed(Scraper):
|
||||||
cls('SmackJeeves/PumpkinFlower'),
|
cls('SmackJeeves/PumpkinFlower'),
|
||||||
cls('SmackJeeves/Razor'),
|
cls('SmackJeeves/Razor'),
|
||||||
cls('SmackJeeves/SAKANA'),
|
cls('SmackJeeves/SAKANA'),
|
||||||
|
cls('SmackJeeves/SenoireDelirium'),
|
||||||
cls('SmackJeeves/SerendipityAnEquestrianTale'),
|
cls('SmackJeeves/SerendipityAnEquestrianTale'),
|
||||||
cls('SmackJeeves/ShacklesInstallment02'),
|
cls('SmackJeeves/ShacklesInstallment02'),
|
||||||
|
cls('SmackJeeves/SimonSues'),
|
||||||
|
cls('SmackJeeves/SonicUniverseAsk'),
|
||||||
cls('SmackJeeves/SoulGuardian'),
|
cls('SmackJeeves/SoulGuardian'),
|
||||||
|
cls('SmackJeeves/Spidersilk', 'mov'),
|
||||||
|
cls('SmackJeeves/Symbios'),
|
||||||
cls('SmackJeeves/TechnicolorLondon'),
|
cls('SmackJeeves/TechnicolorLondon'),
|
||||||
cls('SmackJeeves/TeKscloset'),
|
cls('SmackJeeves/TeKscloset'),
|
||||||
cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),
|
cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),
|
||||||
|
cls('SmackJeeves/TheCurtandTonyShow'),
|
||||||
|
cls('SmackJeeves/TheDarkAgeofMobius'),
|
||||||
|
cls('SmackJeeves/TheHobbitbic'),
|
||||||
cls('SmackJeeves/ThehumanBEing'),
|
cls('SmackJeeves/ThehumanBEing'),
|
||||||
|
cls('SmackJeeves/TheKeyToReality'),
|
||||||
|
cls('SmackJeeves/TheLostland'),
|
||||||
cls('SmackJeeves/TheMewExperiment'),
|
cls('SmackJeeves/TheMewExperiment'),
|
||||||
cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'),
|
cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'),
|
||||||
cls('SmackJeeves/TheSomewhereOther'),
|
cls('SmackJeeves/TheSomewhereOther'),
|
||||||
cls('SmackJeeves/TheWastelands', 'mis'),
|
cls('SmackJeeves/TheWastelands', 'mis'),
|
||||||
|
cls('SmackJeeves/ThinkBeforeYouThink', 'mov'),
|
||||||
cls('SmackJeeves/ThroughTheWonkyEye'),
|
cls('SmackJeeves/ThroughTheWonkyEye'),
|
||||||
cls('SmackJeeves/TitleUnrelated'),
|
cls('SmackJeeves/TitleUnrelated'),
|
||||||
cls('SmackJeeves/TotalPokemonIsland'),
|
cls('SmackJeeves/TotalPokemonIsland'),
|
||||||
cls('SmackJeeves/TrillyAndSilly'),
|
cls('SmackJeeves/TrillyAndSilly'),
|
||||||
|
cls('SmackJeeves/TRIPP'),
|
||||||
cls('SmackJeeves/VampireFetish'),
|
cls('SmackJeeves/VampireFetish'),
|
||||||
cls('SmackJeeves/WolfWolf'),
|
cls('SmackJeeves/WolfWolf'),
|
||||||
cls('SmackJeeves/WonderTheatre'),
|
cls('SmackJeeves/WonderTheatre'),
|
||||||
|
cls('SmackJeeves/YouAreTheReasonForTheEndOfTheWorld'),
|
||||||
cls('SnowFlakes'),
|
cls('SnowFlakes'),
|
||||||
cls('StrawberryDeathCake'),
|
cls('StrawberryDeathCake'),
|
||||||
cls('Stubble'),
|
cls('Stubble'),
|
||||||
|
@ -629,6 +662,11 @@ class Renamed(Scraper):
|
||||||
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
|
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
|
||||||
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
|
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
|
||||||
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
|
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
|
||||||
|
cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
|
||||||
|
cls('SmackJeeves/ForestHill', 'ForestHill'),
|
||||||
|
cls('SmackJeeves/Magience', 'ComicFury/Magience'),
|
||||||
|
cls('SmackJeeves/RiversideExtras', 'RiversideExtras'),
|
||||||
|
cls('SmackJeeves/StarTrip', 'StarTrip'),
|
||||||
cls('TracyAndTristan', 'ComicFury/TracyAndTristan'),
|
cls('TracyAndTristan', 'ComicFury/TracyAndTristan'),
|
||||||
cls('Wulffmorgenthaler', 'WuMo'),
|
cls('Wulffmorgenthaler', 'WuMo'),
|
||||||
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
|
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
@ -108,6 +108,12 @@ class Replay(_ParserScraper):
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
class RiversideExtras(_ParserScraper):
|
||||||
|
url = 'https://riversidecomics.com/'
|
||||||
|
imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image'))
|
||||||
|
prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
|
||||||
|
|
||||||
|
|
||||||
class RomanticallyApocalyptic(_ParserScraper):
|
class RomanticallyApocalyptic(_ParserScraper):
|
||||||
url = 'http://romanticallyapocalyptic.com/'
|
url = 'http://romanticallyapocalyptic.com/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
|
|
|
@ -507,6 +507,10 @@ class StarfireAgency(_WordPressScraper):
|
||||||
return chapter + '_' + filename
|
return chapter + '_' + filename
|
||||||
|
|
||||||
|
|
||||||
|
class StarTrip(_ComicControlScraper):
|
||||||
|
url = 'https://www.startripcomic.com/'
|
||||||
|
|
||||||
|
|
||||||
class StationV3(_ParserScraper):
|
class StationV3(_ParserScraper):
|
||||||
url = 'http://www.stationv3.com/'
|
url = 'http://www.stationv3.com/'
|
||||||
stripUrl = url + 'd3/%s.html'
|
stripUrl = url + 'd3/%s.html'
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
"""
|
"""
|
||||||
Script to get ComicFury comics and save the info in a JSON file for further
|
Script to get ComicFury comics and save the info in a JSON file for further
|
||||||
processing.
|
processing.
|
||||||
|
@ -20,7 +20,7 @@ class ComicFuryUpdater(ComicListUpdater):
|
||||||
MIN_COMICS = 90
|
MIN_COMICS = 90
|
||||||
|
|
||||||
dup_templates = ('ComicSherpa/%s', 'Creators/%s', 'GoComics/%s',
|
dup_templates = ('ComicSherpa/%s', 'Creators/%s', 'GoComics/%s',
|
||||||
'KeenSpot/%s', 'SmackJeeves/%s', 'Arcamax/%s')
|
'KeenSpot/%s', 'Arcamax/%s')
|
||||||
|
|
||||||
langmap = {
|
langmap = {
|
||||||
'german': 'de',
|
'german': 'de',
|
||||||
|
@ -158,8 +158,8 @@ class ComicFuryUpdater(ComicListUpdater):
|
||||||
def collect_results(self):
|
def collect_results(self):
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
# Sort by page count, so we can abort when we get under some threshold.
|
# Sort by page count, so we can abort when we get under some threshold.
|
||||||
baseUrl = ('http://comicfury.com/search.php?search=1&webcomics=1&' +
|
baseUrl = ('https://comicfury.com/search.php?search=1&webcomics=1&' +
|
||||||
'query=&worder=1&asc=0&incvi=1&incse=1&incnu=1&incla=1&' +
|
'query=&worder=1&asc=0&incvi=2&incnu=2&incla=2&incse=2&' +
|
||||||
'all_ge=1&all_st=1&all_la=1&page=%d')
|
'all_ge=1&all_st=1&all_la=1&page=%d')
|
||||||
last_count = 999
|
last_count = 999
|
||||||
page = 1
|
page = 1
|
||||||
|
|
|
@ -8,7 +8,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
|
list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
echo "Executing ${script}.py"
|
echo "Executing ${script}.py"
|
||||||
|
|
|
@ -1,172 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
|
||||||
"""
|
|
||||||
Script to get a list of smackjeeves.com comics and save the info in a JSON file
|
|
||||||
for further processing.
|
|
||||||
"""
|
|
||||||
from __future__ import absolute_import, division, print_function
|
|
||||||
|
|
||||||
import sys
|
|
||||||
from six.moves.urllib.parse import urlsplit
|
|
||||||
|
|
||||||
from scriptutil import ComicListUpdater
|
|
||||||
|
|
||||||
|
|
||||||
class SmackJeevesUpdater(ComicListUpdater):
|
|
||||||
# Absolute minumum number of pages a comic may have (restrict search space)
|
|
||||||
MIN_COMICS = 90
|
|
||||||
|
|
||||||
# names of comics to exclude
|
|
||||||
excluded_comics = (
|
|
||||||
# comic moved/we have a better module
|
|
||||||
"Amya",
|
|
||||||
"Footloose",
|
|
||||||
"TitleUnrelated",
|
|
||||||
|
|
||||||
# does not follow standard layout
|
|
||||||
"300DaysOfSyao",
|
|
||||||
"ADifferentPerspective",
|
|
||||||
"Captor",
|
|
||||||
"ClubLove",
|
|
||||||
"Comatose",
|
|
||||||
"DeSTRESS",
|
|
||||||
"DreamCatcher",
|
|
||||||
"Fumiko",
|
|
||||||
"GART",
|
|
||||||
"GarytheAlchemist",
|
|
||||||
"ItoshiisCrazyNuzlockeAdventures",
|
|
||||||
"JennyHaniver",
|
|
||||||
"KiLAiLO",
|
|
||||||
"LoudEra",
|
|
||||||
"LunarHill",
|
|
||||||
"Mafiagame",
|
|
||||||
"MylifewithFel",
|
|
||||||
"MyLifewithFelENESPANOL",
|
|
||||||
"NegativeZen",
|
|
||||||
"Nemutionpobae",
|
|
||||||
"NightShot",
|
|
||||||
"NormalIsBoring",
|
|
||||||
"OpticalDisarray",
|
|
||||||
"PicturesofYou",
|
|
||||||
"Pornjunkiesstrip",
|
|
||||||
"PrettyUgly",
|
|
||||||
"Project217",
|
|
||||||
"RemmyzRandomz",
|
|
||||||
"Ribon",
|
|
||||||
"RubysWorld",
|
|
||||||
"ShinkaTheLastEevee",
|
|
||||||
"SimplePixel",
|
|
||||||
"SladesMansionofawesomeness",
|
|
||||||
"SpaceSchool",
|
|
||||||
"SushiGummy",
|
|
||||||
"TC2KsPokemobians",
|
|
||||||
"TheAfterSubtract",
|
|
||||||
"ThePokemonArtBox",
|
|
||||||
"THEVOIDWEBCOMIC",
|
|
||||||
"ToDefeatThemAll",
|
|
||||||
"TwoKeys",
|
|
||||||
"Vbcomics",
|
|
||||||
"WerewolfRichard",
|
|
||||||
|
|
||||||
# has no previous comic link
|
|
||||||
"ThreadCrashers",
|
|
||||||
"AchievementStuck",
|
|
||||||
|
|
||||||
# images are 403 forbidden
|
|
||||||
"AngelJunkPileFelix",
|
|
||||||
"AntavioussGenLab",
|
|
||||||
"Harfang",
|
|
||||||
"Okamirai",
|
|
||||||
|
|
||||||
# missing images
|
|
||||||
"AGirlAndHerShadow",
|
|
||||||
"Carciphona",
|
|
||||||
"CatboyattheCon",
|
|
||||||
"ContraandtheSpamDump",
|
|
||||||
"Darkkyosshorts",
|
|
||||||
"DollarStoreCaviar",
|
|
||||||
"EdgeofDecember",
|
|
||||||
"EvD",
|
|
||||||
"HAndJ",
|
|
||||||
"HEARD",
|
|
||||||
"IwillbenapoSpamDump",
|
|
||||||
"KirbysoftheAlternateDimension",
|
|
||||||
"Letsreviewshallwe",
|
|
||||||
"MegaManSpriteExpo",
|
|
||||||
"OmnisSpriteShowcase",
|
|
||||||
"PiecesofBrokenGlass",
|
|
||||||
"PlatonicManagementDilemma",
|
|
||||||
"SecretSanta2011",
|
|
||||||
"SerendipityAnEquestrianTale",
|
|
||||||
"SJArtCollab",
|
|
||||||
"SlightlyDifferent",
|
|
||||||
"TheAttackoftheRecoloursSeason1",
|
|
||||||
"ThroughTheWonkyEye",
|
|
||||||
"TotallyKotor",
|
|
||||||
"WinterMelody",
|
|
||||||
"ZonowTheHedgehog",
|
|
||||||
|
|
||||||
# missing previous link
|
|
||||||
"BambooArmonicKnightsGuild",
|
|
||||||
|
|
||||||
# broken host name
|
|
||||||
"Razor",
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, name):
|
|
||||||
super(SmackJeevesUpdater, self).__init__(name)
|
|
||||||
self.sleep = 1.5
|
|
||||||
|
|
||||||
def handle_url(self, url):
|
|
||||||
"""Parse one search result page."""
|
|
||||||
data = self.get_url(url)
|
|
||||||
|
|
||||||
num = 999
|
|
||||||
for comictag in data.cssselect('a.card'):
|
|
||||||
page_url = comictag.attrib['href']
|
|
||||||
name = comictag.cssselect('div.title')[0].text
|
|
||||||
# search for url in extra page
|
|
||||||
data2 = self.get_url(page_url)
|
|
||||||
|
|
||||||
# find out how many images this comic has
|
|
||||||
mo = data2.cssselect('div.num-pages div.value')
|
|
||||||
num = int(mo[0].text.strip().replace(',', ''))
|
|
||||||
|
|
||||||
mo = data2.cssselect('div.buttons a:last-child')
|
|
||||||
comic_url = mo[0].attrib['href']
|
|
||||||
# search for adult flag
|
|
||||||
adult = data2.cssselect('div.mature')
|
|
||||||
updates = data2.cssselect('div.updates div.value')[0].text_content()
|
|
||||||
self.add_comic(name, (comic_url, len(adult) > 0, updates), num)
|
|
||||||
|
|
||||||
next_url = data.cssselect("a.next")[0].attrib['href']
|
|
||||||
return (next_url, num)
|
|
||||||
|
|
||||||
def collect_results(self):
|
|
||||||
"""Parse all search result pages."""
|
|
||||||
# Sort by number of comics, so we can abort when we get under some
|
|
||||||
# threshold.
|
|
||||||
next_url = "http://www.smackjeeves.com/search.php?last_update=6&sort_by=5"
|
|
||||||
last_count = 999
|
|
||||||
while last_count >= self.MIN_COMICS:
|
|
||||||
print(last_count, file=sys.stderr)
|
|
||||||
next_url, last_count = self.handle_url(next_url)
|
|
||||||
|
|
||||||
def get_entry(self, name, data):
|
|
||||||
sub, top = urlsplit(data[0]).hostname.split('.', 1)
|
|
||||||
if top.lower() == "smackjeeves.com":
|
|
||||||
opt = "sub='%s'" % sub
|
|
||||||
else:
|
|
||||||
opt = "host='%s.%s'" % (sub, top)
|
|
||||||
if data[1]:
|
|
||||||
opt += ", adult=True"
|
|
||||||
if data[2] == 'Completed':
|
|
||||||
opt += ", endOfLife=True"
|
|
||||||
return u"cls('%s', %s)," % (name, opt)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
SmackJeevesUpdater(__file__).run()
|
|
|
@ -10,7 +10,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
|
list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
target="${d}/../dosagelib/plugins/${script}.py"
|
target="${d}/../dosagelib/plugins/${script}.py"
|
||||||
|
|
Loading…
Reference in a new issue