Adopt SmackJeeves to new site design (fixes #144)
Some things got lost on the way: - Since there is no comprehensive comic directory anymore, removed automatic update script. New comics need to be added manually. - Some authors used the opportunity to move from SmackJeeves somewhere else - some of those got new modules (either standalone or ComicFury) - Abunch of comics just disappeared...
This commit is contained in:
parent
02c0da24fa
commit
963db7f448
11 changed files with 696 additions and 898 deletions
|
@ -634,6 +634,7 @@ class ComicFury(_ParserScraper):
|
|||
cls('MadGirl', 'madgirl'),
|
||||
cls('MagicElDesencuentro', 'magiceldesencuentro', 'es'),
|
||||
cls('MagicTheScattering', 'magicthescattering'),
|
||||
cls('Magience', 'magience'),
|
||||
cls('MAGISAPARASAYOupdatesMonFri', 'mag-isa'),
|
||||
cls('MagnaComica', 'magnacomica'),
|
||||
cls('Maluk', 'maluk'),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2018 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -214,6 +214,12 @@ class DorkTower(_ParserScraper):
|
|||
prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
|
||||
|
||||
|
||||
class DoomsdayMyDear(_ParserScraper):
|
||||
url = 'http://doomsdaymydear.com/'
|
||||
imageSearch = '//img[{}]'.format(xpath_class('attachment-full'))
|
||||
prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
|
||||
|
||||
|
||||
class Dracula(_BasicScraper):
|
||||
url = 'http://draculacomic.net/'
|
||||
stripUrl = url + 'comic.php?comicID=%s'
|
||||
|
|
|
@ -109,6 +109,10 @@ class FonFlatter(_ParserScraper):
|
|||
)
|
||||
|
||||
|
||||
class ForestHill(_WordPressScraper):
|
||||
url = 'https://www.foresthillcomic.org/'
|
||||
|
||||
|
||||
class ForLackOfABetterComic(_BasicScraper):
|
||||
url = 'http://forlackofabettercomic.com/'
|
||||
rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -411,21 +411,27 @@ class Removed(Scraper):
|
|||
cls('SmackJeeves/AchievementStuck'),
|
||||
cls('SmackJeeves/AGirlAndHerShadow'),
|
||||
cls('SmackJeeves/Allthatglitters'),
|
||||
cls('SmackJeeves/AloversRule'),
|
||||
cls('SmackJeeves/Anathemacomics'),
|
||||
cls('SmackJeeves/AngelBeast'),
|
||||
cls('SmackJeeves/ArchportCityChronicles'),
|
||||
cls('SmackJeeves/AwesomeSauce'),
|
||||
cls('SmackJeeves/BetweenLightandDark'),
|
||||
cls('SmackJeeves/BetweenWorlds'),
|
||||
cls('SmackJeeves/BeyondTemptation'),
|
||||
cls('SmackJeeves/BLDShortComics'),
|
||||
cls('SmackJeeves/Bloodyfairytale'),
|
||||
cls('SmackJeeves/BLOT'),
|
||||
cls('SmackJeeves/BlueWell'),
|
||||
cls('SmackJeeves/BreakfastonaCliff'),
|
||||
cls('SmackJeeves/CafeAmargo'),
|
||||
cls('SmackJeeves/Captor'),
|
||||
cls('SmackJeeves/ChaosTheory2005'),
|
||||
cls('SmackJeeves/CleanCure'),
|
||||
cls('SmackJeeves/DaddysGirl'),
|
||||
cls('SmackJeeves/Debtsettlement'),
|
||||
cls('SmackJeeves/DebtSettlement2OperationExtinction'),
|
||||
cls('SmackJeeves/DefyingGravityTheFourGreatGuardians'),
|
||||
cls('SmackJeeves/Destinationunknown'),
|
||||
cls('SmackJeeves/DevilTrainee'),
|
||||
cls('SmackJeeves/DevilTraineeSpanish'),
|
||||
|
@ -436,7 +442,9 @@ class Removed(Scraper):
|
|||
cls('SmackJeeves/EternalKnights'),
|
||||
cls('SmackJeeves/EvD'),
|
||||
cls('SmackJeeves/FeathersPI'),
|
||||
cls('SmackJeeves/FemmeSchism'),
|
||||
cls('SmackJeeves/FireWire'),
|
||||
cls('SmackJeeves/FrenzyRedux'),
|
||||
cls('SmackJeeves/FrogKing'),
|
||||
cls('SmackJeeves/FuckMyLife'),
|
||||
cls('SmackJeeves/FurtherDowntheRabbitHole'),
|
||||
|
@ -445,21 +453,33 @@ class Removed(Scraper):
|
|||
cls('SmackJeeves/GraveImpressions'),
|
||||
cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'),
|
||||
cls('SmackJeeves/Harfang'),
|
||||
cls('SmackJeeves/HIPS'),
|
||||
cls('SmackJeeves/HotChocolate'),
|
||||
cls('SmackJeeves/Hybristorific'),
|
||||
cls('SmackJeeves/Ianua'),
|
||||
cls('SmackJeeves/ImminentMoose'),
|
||||
cls('SmackJeeves/InthePride'),
|
||||
cls('SmackJeeves/Intoxicated'),
|
||||
cls('SmackJeeves/Jantarpol'),
|
||||
cls('SmackJeeves/Knife'),
|
||||
cls('SmackJeeves/Kranburn'),
|
||||
cls('SmackJeeves/KuroNeko'),
|
||||
cls('SmackJeeves/LastLivingSouls'),
|
||||
cls('SmackJeeves/LatchkeyKingdom'),
|
||||
cls('SmackJeeves/LegendsofMobiusBookOne'),
|
||||
cls('SmackJeeves/LiliBleu'),
|
||||
cls('SmackJeeves/LoveTwister'),
|
||||
cls('SmackJeeves/MagicalGirlAlice'),
|
||||
cls('SmackJeeves/MasqueradeWTTM'),
|
||||
cls('SmackJeeves/MegaManBattleNetwork7'),
|
||||
cls('SmackJeeves/MegaManiacs'),
|
||||
cls('SmackJeeves/MerirosvotSeikkailumerella'),
|
||||
cls('SmackJeeves/MewsDynasty'),
|
||||
cls('SmackJeeves/MixupofallMixups'),
|
||||
cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'),
|
||||
cls('SmackJeeves/MoonlitDawnAMythicalTale'),
|
||||
cls('SmackJeeves/MyBoyfriendisaMobBoss'),
|
||||
cls('SmackJeeves/MyTrollLife'),
|
||||
cls('SmackJeeves/NihilWandasJourney'),
|
||||
cls('SmackJeeves/OddContact'),
|
||||
cls('SmackJeeves/OneFrameGags'),
|
||||
|
@ -472,24 +492,37 @@ class Removed(Scraper):
|
|||
cls('SmackJeeves/PumpkinFlower'),
|
||||
cls('SmackJeeves/Razor'),
|
||||
cls('SmackJeeves/SAKANA'),
|
||||
cls('SmackJeeves/SenoireDelirium'),
|
||||
cls('SmackJeeves/SerendipityAnEquestrianTale'),
|
||||
cls('SmackJeeves/ShacklesInstallment02'),
|
||||
cls('SmackJeeves/SimonSues'),
|
||||
cls('SmackJeeves/SonicUniverseAsk'),
|
||||
cls('SmackJeeves/SoulGuardian'),
|
||||
cls('SmackJeeves/Spidersilk', 'mov'),
|
||||
cls('SmackJeeves/Symbios'),
|
||||
cls('SmackJeeves/TechnicolorLondon'),
|
||||
cls('SmackJeeves/TeKscloset'),
|
||||
cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),
|
||||
cls('SmackJeeves/TheCurtandTonyShow'),
|
||||
cls('SmackJeeves/TheDarkAgeofMobius'),
|
||||
cls('SmackJeeves/TheHobbitbic'),
|
||||
cls('SmackJeeves/ThehumanBEing'),
|
||||
cls('SmackJeeves/TheKeyToReality'),
|
||||
cls('SmackJeeves/TheLostland'),
|
||||
cls('SmackJeeves/TheMewExperiment'),
|
||||
cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'),
|
||||
cls('SmackJeeves/TheSomewhereOther'),
|
||||
cls('SmackJeeves/TheWastelands', 'mis'),
|
||||
cls('SmackJeeves/ThinkBeforeYouThink', 'mov'),
|
||||
cls('SmackJeeves/ThroughTheWonkyEye'),
|
||||
cls('SmackJeeves/TitleUnrelated'),
|
||||
cls('SmackJeeves/TotalPokemonIsland'),
|
||||
cls('SmackJeeves/TrillyAndSilly'),
|
||||
cls('SmackJeeves/TRIPP'),
|
||||
cls('SmackJeeves/VampireFetish'),
|
||||
cls('SmackJeeves/WolfWolf'),
|
||||
cls('SmackJeeves/WonderTheatre'),
|
||||
cls('SmackJeeves/YouAreTheReasonForTheEndOfTheWorld'),
|
||||
cls('SnowFlakes'),
|
||||
cls('StrawberryDeathCake'),
|
||||
cls('Stubble'),
|
||||
|
@ -629,6 +662,11 @@ class Renamed(Scraper):
|
|||
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
|
||||
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
|
||||
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
|
||||
cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
|
||||
cls('SmackJeeves/ForestHill', 'ForestHill'),
|
||||
cls('SmackJeeves/Magience', 'ComicFury/Magience'),
|
||||
cls('SmackJeeves/RiversideExtras', 'RiversideExtras'),
|
||||
cls('SmackJeeves/StarTrip', 'StarTrip'),
|
||||
cls('TracyAndTristan', 'ComicFury/TracyAndTristan'),
|
||||
cls('Wulffmorgenthaler', 'WuMo'),
|
||||
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -108,6 +108,12 @@ class Replay(_ParserScraper):
|
|||
return name
|
||||
|
||||
|
||||
class RiversideExtras(_ParserScraper):
|
||||
url = 'https://riversidecomics.com/'
|
||||
imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image'))
|
||||
prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
|
||||
|
||||
|
||||
class RomanticallyApocalyptic(_ParserScraper):
|
||||
url = 'http://romanticallyapocalyptic.com/'
|
||||
stripUrl = url + '%s'
|
||||
|
|
|
@ -507,6 +507,10 @@ class StarfireAgency(_WordPressScraper):
|
|||
return chapter + '_' + filename
|
||||
|
||||
|
||||
class StarTrip(_ComicControlScraper):
|
||||
url = 'https://www.startripcomic.com/'
|
||||
|
||||
|
||||
class StationV3(_ParserScraper):
|
||||
url = 'http://www.stationv3.com/'
|
||||
stripUrl = url + 'd3/%s.html'
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
"""
|
||||
Script to get ComicFury comics and save the info in a JSON file for further
|
||||
processing.
|
||||
|
@ -20,7 +20,7 @@ class ComicFuryUpdater(ComicListUpdater):
|
|||
MIN_COMICS = 90
|
||||
|
||||
dup_templates = ('ComicSherpa/%s', 'Creators/%s', 'GoComics/%s',
|
||||
'KeenSpot/%s', 'SmackJeeves/%s', 'Arcamax/%s')
|
||||
'KeenSpot/%s', 'Arcamax/%s')
|
||||
|
||||
langmap = {
|
||||
'german': 'de',
|
||||
|
@ -158,8 +158,8 @@ class ComicFuryUpdater(ComicListUpdater):
|
|||
def collect_results(self):
|
||||
"""Parse all search result pages."""
|
||||
# Sort by page count, so we can abort when we get under some threshold.
|
||||
baseUrl = ('http://comicfury.com/search.php?search=1&webcomics=1&' +
|
||||
'query=&worder=1&asc=0&incvi=1&incse=1&incnu=1&incla=1&' +
|
||||
baseUrl = ('https://comicfury.com/search.php?search=1&webcomics=1&' +
|
||||
'query=&worder=1&asc=0&incvi=2&incnu=2&incla=2&incse=2&' +
|
||||
'all_ge=1&all_st=1&all_la=1&page=%d')
|
||||
last_count = 999
|
||||
page = 1
|
||||
|
|
|
@ -8,7 +8,7 @@ d=$(dirname $0)
|
|||
if [ $# -ge 1 ]; then
|
||||
list="$*"
|
||||
else
|
||||
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
|
||||
list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
|
||||
fi
|
||||
for script in $list; do
|
||||
echo "Executing ${script}.py"
|
||||
|
|
|
@ -1,172 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
"""
|
||||
Script to get a list of smackjeeves.com comics and save the info in a JSON file
|
||||
for further processing.
|
||||
"""
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import sys
|
||||
from six.moves.urllib.parse import urlsplit
|
||||
|
||||
from scriptutil import ComicListUpdater
|
||||
|
||||
|
||||
class SmackJeevesUpdater(ComicListUpdater):
|
||||
# Absolute minumum number of pages a comic may have (restrict search space)
|
||||
MIN_COMICS = 90
|
||||
|
||||
# names of comics to exclude
|
||||
excluded_comics = (
|
||||
# comic moved/we have a better module
|
||||
"Amya",
|
||||
"Footloose",
|
||||
"TitleUnrelated",
|
||||
|
||||
# does not follow standard layout
|
||||
"300DaysOfSyao",
|
||||
"ADifferentPerspective",
|
||||
"Captor",
|
||||
"ClubLove",
|
||||
"Comatose",
|
||||
"DeSTRESS",
|
||||
"DreamCatcher",
|
||||
"Fumiko",
|
||||
"GART",
|
||||
"GarytheAlchemist",
|
||||
"ItoshiisCrazyNuzlockeAdventures",
|
||||
"JennyHaniver",
|
||||
"KiLAiLO",
|
||||
"LoudEra",
|
||||
"LunarHill",
|
||||
"Mafiagame",
|
||||
"MylifewithFel",
|
||||
"MyLifewithFelENESPANOL",
|
||||
"NegativeZen",
|
||||
"Nemutionpobae",
|
||||
"NightShot",
|
||||
"NormalIsBoring",
|
||||
"OpticalDisarray",
|
||||
"PicturesofYou",
|
||||
"Pornjunkiesstrip",
|
||||
"PrettyUgly",
|
||||
"Project217",
|
||||
"RemmyzRandomz",
|
||||
"Ribon",
|
||||
"RubysWorld",
|
||||
"ShinkaTheLastEevee",
|
||||
"SimplePixel",
|
||||
"SladesMansionofawesomeness",
|
||||
"SpaceSchool",
|
||||
"SushiGummy",
|
||||
"TC2KsPokemobians",
|
||||
"TheAfterSubtract",
|
||||
"ThePokemonArtBox",
|
||||
"THEVOIDWEBCOMIC",
|
||||
"ToDefeatThemAll",
|
||||
"TwoKeys",
|
||||
"Vbcomics",
|
||||
"WerewolfRichard",
|
||||
|
||||
# has no previous comic link
|
||||
"ThreadCrashers",
|
||||
"AchievementStuck",
|
||||
|
||||
# images are 403 forbidden
|
||||
"AngelJunkPileFelix",
|
||||
"AntavioussGenLab",
|
||||
"Harfang",
|
||||
"Okamirai",
|
||||
|
||||
# missing images
|
||||
"AGirlAndHerShadow",
|
||||
"Carciphona",
|
||||
"CatboyattheCon",
|
||||
"ContraandtheSpamDump",
|
||||
"Darkkyosshorts",
|
||||
"DollarStoreCaviar",
|
||||
"EdgeofDecember",
|
||||
"EvD",
|
||||
"HAndJ",
|
||||
"HEARD",
|
||||
"IwillbenapoSpamDump",
|
||||
"KirbysoftheAlternateDimension",
|
||||
"Letsreviewshallwe",
|
||||
"MegaManSpriteExpo",
|
||||
"OmnisSpriteShowcase",
|
||||
"PiecesofBrokenGlass",
|
||||
"PlatonicManagementDilemma",
|
||||
"SecretSanta2011",
|
||||
"SerendipityAnEquestrianTale",
|
||||
"SJArtCollab",
|
||||
"SlightlyDifferent",
|
||||
"TheAttackoftheRecoloursSeason1",
|
||||
"ThroughTheWonkyEye",
|
||||
"TotallyKotor",
|
||||
"WinterMelody",
|
||||
"ZonowTheHedgehog",
|
||||
|
||||
# missing previous link
|
||||
"BambooArmonicKnightsGuild",
|
||||
|
||||
# broken host name
|
||||
"Razor",
|
||||
)
|
||||
|
||||
def __init__(self, name):
|
||||
super(SmackJeevesUpdater, self).__init__(name)
|
||||
self.sleep = 1.5
|
||||
|
||||
def handle_url(self, url):
|
||||
"""Parse one search result page."""
|
||||
data = self.get_url(url)
|
||||
|
||||
num = 999
|
||||
for comictag in data.cssselect('a.card'):
|
||||
page_url = comictag.attrib['href']
|
||||
name = comictag.cssselect('div.title')[0].text
|
||||
# search for url in extra page
|
||||
data2 = self.get_url(page_url)
|
||||
|
||||
# find out how many images this comic has
|
||||
mo = data2.cssselect('div.num-pages div.value')
|
||||
num = int(mo[0].text.strip().replace(',', ''))
|
||||
|
||||
mo = data2.cssselect('div.buttons a:last-child')
|
||||
comic_url = mo[0].attrib['href']
|
||||
# search for adult flag
|
||||
adult = data2.cssselect('div.mature')
|
||||
updates = data2.cssselect('div.updates div.value')[0].text_content()
|
||||
self.add_comic(name, (comic_url, len(adult) > 0, updates), num)
|
||||
|
||||
next_url = data.cssselect("a.next")[0].attrib['href']
|
||||
return (next_url, num)
|
||||
|
||||
def collect_results(self):
|
||||
"""Parse all search result pages."""
|
||||
# Sort by number of comics, so we can abort when we get under some
|
||||
# threshold.
|
||||
next_url = "http://www.smackjeeves.com/search.php?last_update=6&sort_by=5"
|
||||
last_count = 999
|
||||
while last_count >= self.MIN_COMICS:
|
||||
print(last_count, file=sys.stderr)
|
||||
next_url, last_count = self.handle_url(next_url)
|
||||
|
||||
def get_entry(self, name, data):
|
||||
sub, top = urlsplit(data[0]).hostname.split('.', 1)
|
||||
if top.lower() == "smackjeeves.com":
|
||||
opt = "sub='%s'" % sub
|
||||
else:
|
||||
opt = "host='%s.%s'" % (sub, top)
|
||||
if data[1]:
|
||||
opt += ", adult=True"
|
||||
if data[2] == 'Completed':
|
||||
opt += ", endOfLife=True"
|
||||
return u"cls('%s', %s)," % (name, opt)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
SmackJeevesUpdater(__file__).run()
|
|
@ -10,7 +10,7 @@ d=$(dirname $0)
|
|||
if [ $# -ge 1 ]; then
|
||||
list="$*"
|
||||
else
|
||||
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
|
||||
list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
|
||||
fi
|
||||
for script in $list; do
|
||||
target="${d}/../dosagelib/plugins/${script}.py"
|
||||
|
|
Loading…
Reference in a new issue