Adopt SmackJeeves to new site design (fixes #144)

Some things got lost on the way: - Since there is no comprehensive comic directory anymore, removed automatic update script. New comics need to be added manually. - Some authors used the opportunity to move from SmackJeeves somewhere else - some of those got new modules (either standalone or ComicFury) - Abunch of comics just disappeared...
2019-12-26 22:03:18 +01:00 · 2019-12-26 22:03:18 +01:00 · 963db7f448
commit 963db7f448
parent 02c0da24fa
11 changed files with 696 additions and 898 deletions
--- a/dosagelib/plugins/comicfury.py
+++ b/dosagelib/plugins/comicfury.py
@ -634,6 +634,7 @@ class ComicFury(_ParserScraper):
            cls('MadGirl', 'madgirl'),
            cls('MagicElDesencuentro', 'magiceldesencuentro', 'es'),
            cls('MagicTheScattering', 'magicthescattering'),
+            cls('Magience', 'magience'),
            cls('MAGISAPARASAYOupdatesMonFri', 'mag-isa'),
            cls('MagnaComica', 'magnacomica'),
            cls('Maluk', 'maluk'),
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2018 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -214,6 +214,12 @@ class DorkTower(_ParserScraper):
    prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')


+class DoomsdayMyDear(_ParserScraper):
+    url = 'http://doomsdaymydear.com/'
+    imageSearch = '//img[{}]'.format(xpath_class('attachment-full'))
+    prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
+
+
 class Dracula(_BasicScraper):
    url = 'http://draculacomic.net/'
    stripUrl = url + 'comic.php?comicID=%s'
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -109,6 +109,10 @@ class FonFlatter(_ParserScraper):
        )


+class ForestHill(_WordPressScraper):
+    url = 'https://www.foresthillcomic.org/'
+
+
 class ForLackOfABetterComic(_BasicScraper):
    url = 'http://forlackofabettercomic.com/'
    rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -411,21 +411,27 @@ class Removed(Scraper):
            cls('SmackJeeves/AchievementStuck'),
            cls('SmackJeeves/AGirlAndHerShadow'),
            cls('SmackJeeves/Allthatglitters'),
+            cls('SmackJeeves/AloversRule'),
            cls('SmackJeeves/Anathemacomics'),
            cls('SmackJeeves/AngelBeast'),
            cls('SmackJeeves/ArchportCityChronicles'),
            cls('SmackJeeves/AwesomeSauce'),
+            cls('SmackJeeves/BetweenLightandDark'),
            cls('SmackJeeves/BetweenWorlds'),
            cls('SmackJeeves/BeyondTemptation'),
+            cls('SmackJeeves/BLDShortComics'),
            cls('SmackJeeves/Bloodyfairytale'),
            cls('SmackJeeves/BLOT'),
            cls('SmackJeeves/BlueWell'),
+            cls('SmackJeeves/BreakfastonaCliff'),
            cls('SmackJeeves/CafeAmargo'),
            cls('SmackJeeves/Captor'),
            cls('SmackJeeves/ChaosTheory2005'),
+            cls('SmackJeeves/CleanCure'),
            cls('SmackJeeves/DaddysGirl'),
            cls('SmackJeeves/Debtsettlement'),
            cls('SmackJeeves/DebtSettlement2OperationExtinction'),
+            cls('SmackJeeves/DefyingGravityTheFourGreatGuardians'),
            cls('SmackJeeves/Destinationunknown'),
            cls('SmackJeeves/DevilTrainee'),
            cls('SmackJeeves/DevilTraineeSpanish'),
@ -436,7 +442,9 @@ class Removed(Scraper):
            cls('SmackJeeves/EternalKnights'),
            cls('SmackJeeves/EvD'),
            cls('SmackJeeves/FeathersPI'),
+            cls('SmackJeeves/FemmeSchism'),
            cls('SmackJeeves/FireWire'),
+            cls('SmackJeeves/FrenzyRedux'),
            cls('SmackJeeves/FrogKing'),
            cls('SmackJeeves/FuckMyLife'),
            cls('SmackJeeves/FurtherDowntheRabbitHole'),
@ -445,21 +453,33 @@ class Removed(Scraper):
            cls('SmackJeeves/GraveImpressions'),
            cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'),
            cls('SmackJeeves/Harfang'),
+            cls('SmackJeeves/HIPS'),
            cls('SmackJeeves/HotChocolate'),
+            cls('SmackJeeves/Hybristorific'),
            cls('SmackJeeves/Ianua'),
            cls('SmackJeeves/ImminentMoose'),
            cls('SmackJeeves/InthePride'),
            cls('SmackJeeves/Intoxicated'),
+            cls('SmackJeeves/Jantarpol'),
            cls('SmackJeeves/Knife'),
            cls('SmackJeeves/Kranburn'),
+            cls('SmackJeeves/KuroNeko'),
+            cls('SmackJeeves/LastLivingSouls'),
            cls('SmackJeeves/LatchkeyKingdom'),
+            cls('SmackJeeves/LegendsofMobiusBookOne'),
+            cls('SmackJeeves/LiliBleu'),
            cls('SmackJeeves/LoveTwister'),
+            cls('SmackJeeves/MagicalGirlAlice'),
+            cls('SmackJeeves/MasqueradeWTTM'),
+            cls('SmackJeeves/MegaManBattleNetwork7'),
            cls('SmackJeeves/MegaManiacs'),
            cls('SmackJeeves/MerirosvotSeikkailumerella'),
            cls('SmackJeeves/MewsDynasty'),
+            cls('SmackJeeves/MixupofallMixups'),
            cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'),
            cls('SmackJeeves/MoonlitDawnAMythicalTale'),
            cls('SmackJeeves/MyBoyfriendisaMobBoss'),
+            cls('SmackJeeves/MyTrollLife'),
            cls('SmackJeeves/NihilWandasJourney'),
            cls('SmackJeeves/OddContact'),
            cls('SmackJeeves/OneFrameGags'),
@ -472,24 +492,37 @@ class Removed(Scraper):
            cls('SmackJeeves/PumpkinFlower'),
            cls('SmackJeeves/Razor'),
            cls('SmackJeeves/SAKANA'),
+            cls('SmackJeeves/SenoireDelirium'),
            cls('SmackJeeves/SerendipityAnEquestrianTale'),
            cls('SmackJeeves/ShacklesInstallment02'),
+            cls('SmackJeeves/SimonSues'),
+            cls('SmackJeeves/SonicUniverseAsk'),
            cls('SmackJeeves/SoulGuardian'),
+            cls('SmackJeeves/Spidersilk', 'mov'),
+            cls('SmackJeeves/Symbios'),
            cls('SmackJeeves/TechnicolorLondon'),
            cls('SmackJeeves/TeKscloset'),
            cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),
+            cls('SmackJeeves/TheCurtandTonyShow'),
+            cls('SmackJeeves/TheDarkAgeofMobius'),
+            cls('SmackJeeves/TheHobbitbic'),
            cls('SmackJeeves/ThehumanBEing'),
+            cls('SmackJeeves/TheKeyToReality'),
+            cls('SmackJeeves/TheLostland'),
            cls('SmackJeeves/TheMewExperiment'),
            cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'),
            cls('SmackJeeves/TheSomewhereOther'),
            cls('SmackJeeves/TheWastelands', 'mis'),
+            cls('SmackJeeves/ThinkBeforeYouThink', 'mov'),
            cls('SmackJeeves/ThroughTheWonkyEye'),
            cls('SmackJeeves/TitleUnrelated'),
            cls('SmackJeeves/TotalPokemonIsland'),
            cls('SmackJeeves/TrillyAndSilly'),
+            cls('SmackJeeves/TRIPP'),
            cls('SmackJeeves/VampireFetish'),
            cls('SmackJeeves/WolfWolf'),
            cls('SmackJeeves/WonderTheatre'),
+            cls('SmackJeeves/YouAreTheReasonForTheEndOfTheWorld'),
            cls('SnowFlakes'),
            cls('StrawberryDeathCake'),
            cls('Stubble'),
@ -629,6 +662,11 @@ class Renamed(Scraper):
            cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
            cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
            cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
+            cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
+            cls('SmackJeeves/ForestHill', 'ForestHill'),
+            cls('SmackJeeves/Magience', 'ComicFury/Magience'),
+            cls('SmackJeeves/RiversideExtras', 'RiversideExtras'),
+            cls('SmackJeeves/StarTrip', 'StarTrip'),
            cls('TracyAndTristan', 'ComicFury/TracyAndTristan'),
            cls('Wulffmorgenthaler', 'WuMo'),
            cls('ZebraGirl', 'ComicFury/ZebraGirl'),
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -108,6 +108,12 @@ class Replay(_ParserScraper):
        return name


+class RiversideExtras(_ParserScraper):
+    url = 'https://riversidecomics.com/'
+    imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image'))
+    prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
+
+
 class RomanticallyApocalyptic(_ParserScraper):
    url = 'http://romanticallyapocalyptic.com/'
    stripUrl = url + '%s'
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -507,6 +507,10 @@ class StarfireAgency(_WordPressScraper):
        return chapter + '_' + filename


+class StarTrip(_ComicControlScraper):
+    url = 'https://www.startripcomic.com/'
+
+
 class StationV3(_ParserScraper):
    url = 'http://www.stationv3.com/'
    stripUrl = url + 'd3/%s.html'
--- a/dosagelib/plugins/smackjeeves.py
+++ b/dosagelib/plugins/smackjeeves.py
--- a/scripts/comicfury.py
+++ b/scripts/comicfury.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher
 """
 Script to get ComicFury comics and save the info in a JSON file for further
 processing.
@ -20,7 +20,7 @@ class ComicFuryUpdater(ComicListUpdater):
    MIN_COMICS = 90

    dup_templates = ('ComicSherpa/%s', 'Creators/%s', 'GoComics/%s',
-                     'KeenSpot/%s', 'SmackJeeves/%s', 'Arcamax/%s')
+                     'KeenSpot/%s', 'Arcamax/%s')

    langmap = {
        'german': 'de',
@ -158,8 +158,8 @@ class ComicFuryUpdater(ComicListUpdater):
    def collect_results(self):
        """Parse all search result pages."""
        # Sort by page count, so we can abort when we get under some threshold.
-        baseUrl = ('http://comicfury.com/search.php?search=1&webcomics=1&' +
-                   'query=&worder=1&asc=0&incvi=1&incse=1&incnu=1&incla=1&' +
+        baseUrl = ('https://comicfury.com/search.php?search=1&webcomics=1&' +
+                   'query=&worder=1&asc=0&incvi=2&incnu=2&incla=2&incse=2&' +
                   'all_ge=1&all_st=1&all_la=1&page=%d')
        last_count = 999
        page = 1
--- a/scripts/generate_json.sh
+++ b/scripts/generate_json.sh
@ -8,7 +8,7 @@ d=$(dirname $0)
 if [ $# -ge 1 ]; then
  list="$*"
 else
-  list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
+  list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
 fi
 for script in $list; do
  echo "Executing ${script}.py"
--- a/scripts/smackjeeves.py
+++ b/scripts/smackjeeves.py
@ -1,172 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
-"""
-Script to get a list of smackjeeves.com comics and save the info in a JSON file
-for further processing.
-"""
-from __future__ import absolute_import, division, print_function
-
-import sys
-from six.moves.urllib.parse import urlsplit
-
-from scriptutil import ComicListUpdater
-
-
-class SmackJeevesUpdater(ComicListUpdater):
-    # Absolute minumum number of pages a comic may have (restrict search space)
-    MIN_COMICS = 90
-
-    # names of comics to exclude
-    excluded_comics = (
-        # comic moved/we have a better module
-        "Amya",
-        "Footloose",
-        "TitleUnrelated",
-
-        # does not follow standard layout
-        "300DaysOfSyao",
-        "ADifferentPerspective",
-        "Captor",
-        "ClubLove",
-        "Comatose",
-        "DeSTRESS",
-        "DreamCatcher",
-        "Fumiko",
-        "GART",
-        "GarytheAlchemist",
-        "ItoshiisCrazyNuzlockeAdventures",
-        "JennyHaniver",
-        "KiLAiLO",
-        "LoudEra",
-        "LunarHill",
-        "Mafiagame",
-        "MylifewithFel",
-        "MyLifewithFelENESPANOL",
-        "NegativeZen",
-        "Nemutionpobae",
-        "NightShot",
-        "NormalIsBoring",
-        "OpticalDisarray",
-        "PicturesofYou",
-        "Pornjunkiesstrip",
-        "PrettyUgly",
-        "Project217",
-        "RemmyzRandomz",
-        "Ribon",
-        "RubysWorld",
-        "ShinkaTheLastEevee",
-        "SimplePixel",
-        "SladesMansionofawesomeness",
-        "SpaceSchool",
-        "SushiGummy",
-        "TC2KsPokemobians",
-        "TheAfterSubtract",
-        "ThePokemonArtBox",
-        "THEVOIDWEBCOMIC",
-        "ToDefeatThemAll",
-        "TwoKeys",
-        "Vbcomics",
-        "WerewolfRichard",
-
-        # has no previous comic link
-        "ThreadCrashers",
-        "AchievementStuck",
-
-        # images are 403 forbidden
-        "AngelJunkPileFelix",
-        "AntavioussGenLab",
-        "Harfang",
-        "Okamirai",
-
-        # missing images
-        "AGirlAndHerShadow",
-        "Carciphona",
-        "CatboyattheCon",
-        "ContraandtheSpamDump",
-        "Darkkyosshorts",
-        "DollarStoreCaviar",
-        "EdgeofDecember",
-        "EvD",
-        "HAndJ",
-        "HEARD",
-        "IwillbenapoSpamDump",
-        "KirbysoftheAlternateDimension",
-        "Letsreviewshallwe",
-        "MegaManSpriteExpo",
-        "OmnisSpriteShowcase",
-        "PiecesofBrokenGlass",
-        "PlatonicManagementDilemma",
-        "SecretSanta2011",
-        "SerendipityAnEquestrianTale",
-        "SJArtCollab",
-        "SlightlyDifferent",
-        "TheAttackoftheRecoloursSeason1",
-        "ThroughTheWonkyEye",
-        "TotallyKotor",
-        "WinterMelody",
-        "ZonowTheHedgehog",
-
-        # missing previous link
-        "BambooArmonicKnightsGuild",
-
-        # broken host name
-        "Razor",
-    )
-
-    def __init__(self, name):
-        super(SmackJeevesUpdater, self).__init__(name)
-        self.sleep = 1.5
-
-    def handle_url(self, url):
-        """Parse one search result page."""
-        data = self.get_url(url)
-
-        num = 999
-        for comictag in data.cssselect('a.card'):
-            page_url = comictag.attrib['href']
-            name = comictag.cssselect('div.title')[0].text
-            # search for url in extra page
-            data2 = self.get_url(page_url)
-
-            # find out how many images this comic has
-            mo = data2.cssselect('div.num-pages div.value')
-            num = int(mo[0].text.strip().replace(',', ''))
-
-            mo = data2.cssselect('div.buttons a:last-child')
-            comic_url = mo[0].attrib['href']
-            # search for adult flag
-            adult = data2.cssselect('div.mature')
-            updates = data2.cssselect('div.updates div.value')[0].text_content()
-            self.add_comic(name, (comic_url, len(adult) > 0, updates), num)
-
-        next_url = data.cssselect("a.next")[0].attrib['href']
-        return (next_url, num)
-
-    def collect_results(self):
-        """Parse all search result pages."""
-        # Sort by number of comics, so we can abort when we get under some
-        # threshold.
-        next_url = "http://www.smackjeeves.com/search.php?last_update=6&sort_by=5"
-        last_count = 999
-        while last_count >= self.MIN_COMICS:
-            print(last_count, file=sys.stderr)
-            next_url, last_count = self.handle_url(next_url)
-
-    def get_entry(self, name, data):
-        sub, top = urlsplit(data[0]).hostname.split('.', 1)
-        if top.lower() == "smackjeeves.com":
-            opt = "sub='%s'" % sub
-        else:
-            opt = "host='%s.%s'" % (sub, top)
-        if data[1]:
-            opt += ", adult=True"
-        if data[2] == 'Completed':
-            opt += ", endOfLife=True"
-        return u"cls('%s', %s)," % (name, opt)
-
-
-if __name__ == '__main__':
-    SmackJeevesUpdater(__file__).run()
--- a/scripts/update_plugins.sh
+++ b/scripts/update_plugins.sh
@ -10,7 +10,7 @@ d=$(dirname $0)
 if [ $# -ge 1 ]; then
  list="$*"
 else
-  list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
+  list="arcamax comicfury comicgenesis creators gocomics keenspot webcomicfactory comicskingdom"
 fi
 for script in $list; do
  target="${d}/../dosagelib/plugins/${script}.py"