From df2048cb34579c28556a1e459240d5012984c7a0 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 5 Jun 2016 21:47:58 +0200 Subject: [PATCH] Keep track of removed and moved comics (fixes #41). I plan on keeping this list for at least ~ 2 releases and then purging older entries... --- dosagelib/cmd.py | 22 +- dosagelib/director.py | 7 +- dosagelib/plugins/e.py | 1 - dosagelib/plugins/kindofnormal.py | 2 - dosagelib/plugins/old.py | 342 ++++++++++++++++++++++++++++++ dosagelib/scraper.py | 11 +- 6 files changed, 364 insertions(+), 21 deletions(-) create mode 100644 dosagelib/plugins/old.py diff --git a/dosagelib/cmd.py b/dosagelib/cmd.py index a91ed8e5d..27e38ac20 100644 --- a/dosagelib/cmd.py +++ b/dosagelib/cmd.py @@ -7,8 +7,9 @@ from __future__ import absolute_import, division, print_function import os import argparse +import six -from . import events, configuration, singleton, director, __version__ +from . import events, configuration, singleton, director, scraper, __version__ from .output import out from .util import internal_error, strlimit @@ -89,6 +90,10 @@ def setup_options(): # multimatch is only used for development, eg. testing if all comics of a scripted plugin are working parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS) + # List all comic modules, even those normally suppressed, because they + # are not "real" (moved & removed) + parser.add_argument('--list-all', action='store_true', + help=argparse.SUPPRESS) parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)') try: @@ -157,7 +162,7 @@ def display_comic_help(scraperobj): orig_context = out.context out.context = scraperobj.name try: - out.info(u"URL: " + scraperobj.url) + out.info(u"URL: " + six.text_type(scraperobj.url)) out.info(u"Language: " + scraperobj.language()) if scraperobj.adult: out.info(u"Adult comic, use option --adult to fetch.") @@ -225,8 +230,9 @@ def run(options): return display_version(options.verbose) if options.list: return do_list() - if options.singlelist: - return do_list(column_list=False, verbose=options.verbose) + if options.singlelist or options.list_all: + return do_list(column_list=False, verbose=options.verbose, + listall=options.list_all) # after this a list of comic strips is needed if not options.comic: out.warn(u'No comics specified, bailing out!') @@ -238,14 +244,14 @@ def run(options): return director.getComics(options) -def do_list(column_list=True, verbose=False): +def do_list(column_list=True, verbose=False, listall=False): """List available comics.""" with out.pager(): out.info(u'Available comic scrapers:') out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT) out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG) - scrapers = sorted(director.getAllScrapers(listing=True), - key=lambda s: s.name) + scrapers = sorted(scraper.get_scrapers(listall), + key=lambda s: s.name.lower()) if column_list: num, disabled = do_column_list(scrapers) else: @@ -267,7 +273,7 @@ def do_single_list(scrapers, verbose=False): display_comic_help(scraperobj) else: out.info(get_tagged_scraper_name(scraperobj, reasons=disabled)) - return num, disabled + return num + 1, disabled def do_column_list(scrapers): diff --git a/dosagelib/director.py b/dosagelib/director.py index 31b7c28cb..83bd25e93 100644 --- a/dosagelib/director.py +++ b/dosagelib/director.py @@ -190,18 +190,13 @@ def finish(): out.warn("Waiting for download threads to finish.") -def getAllScrapers(listing=False): - """Get all scrapers.""" - return getScrapers(['@@'], listing=listing) - - def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False): """Get scraper objects for the given comics.""" if '@' in comics: # only scrapers whose directory already exists if len(comics) > 1: out.warn(u"using '@' as comic name ignores all other specified comics.") - for scraperobj in scraper.get_scrapers(): + for scraperobj in scraper.get_scrapers(include_removed=True): dirname = getDirname(scraperobj.name) if os.path.isdir(os.path.join(basepath, dirname)): if shouldRunScraper(scraperobj, adult, listing): diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index 9fced1df6..6f8bd0cec 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -152,7 +152,6 @@ class Eryl(_WordPressScraper): firstStripUrl = url latestSearch = WP_LATEST_SEARCH starter = indirectStarter - help = 'This was known as DarkWings in previous Dosage versions' class EverybodyLovesEricRaymond(_BasicScraper): diff --git a/dosagelib/plugins/kindofnormal.py b/dosagelib/plugins/kindofnormal.py index 5bca69019..8d91d4965 100644 --- a/dosagelib/plugins/kindofnormal.py +++ b/dosagelib/plugins/kindofnormal.py @@ -22,6 +22,4 @@ class KindOfNormal(_ParserScraper): cls('MeAndDanielle', 'meanddanielle'), cls('TruthFacts', 'truthfacts'), cls('Wumo', 'wumo'), - # name in previous versions - cls('Wulffmorgenthaler', 'wumo'), ] diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py new file mode 100644 index 000000000..3e85f41c1 --- /dev/null +++ b/dosagelib/plugins/old.py @@ -0,0 +1,342 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + +from ..scraper import Scraper + + +class Removed(Scraper): + REASONS = { + 'jsh': 'Site is very JavaScript-heavy, writing a module would be' + + ' very complicated.', + 'del': 'Comic was removed from the web.', + 'block': 'The comic site is blocking us.', + 'unk': 'Comic was removed for an unknown reason.', + } + + def __init__(self, name, reason='del'): + super(Removed, self).__init__(name) + self.reason = reason + + def getDisabledReasons(self): + return {'rem-' + self.reason: self.REASONS[self.reason]} + + @classmethod + def getmodules(cls): + return [ + # Removed in 2.16 + cls('AbleAndBaker'), + cls('AlsoBagels'), + cls('Antics'), + cls('Arcamax/BleekerTheRechargeableDog'), + cls('BackwaterPlanet'), + cls('BigFatWhale'), + cls('Blip'), + cls('BoxerHockey'), + cls('BoyOnAStickAndSlither', 'jsh'), + cls('BrentalFloss'), + cls('BrentalFloss/FlossedInTime'), + cls('BrentalFloss/GuestComics'), + cls('BrightlyWound'), + cls('CheckerboardNightmare'), + cls('ComicFury/30years'), + cls('ComicFury/AAB'), + cls('ComicFury/AdventuresofMaggie'), + cls('ComicFury/Aether'), + cls('ComicFury/Afairtrade'), + cls('ComicFury/Afrodays'), + cls('ComicFury/Albinobros'), + cls('ComicFury/Alexanderandlucas'), + cls('ComicFury/Alittlebitofeverything'), + cls('ComicFury/Americanextremists'), + cls('ComicFury/AmericanNerd'), + cls('ComicFury/Amtheatre'), + cls('ComicFury/Angstcomic'), + cls('ComicFury/Applepine'), + cls('ComicFury/Atm'), + cls('ComicFury/Atomicmonkey'), + cls('ComicFury/Baseballcapsandtiaras'), + cls('ComicFury/BATB'), + cls('ComicFury/BetweenRounds'), + cls('ComicFury/BrokenReality'), + cls('ComicFury/BTTF'), + cls('ComicFury/Cannonadeofhogwash'), + cls('ComicFury/CatHero'), + cls('ComicFury/Chocolava'), + cls('ComicFury/ChristianHumberReloaded'), + cls('ComicFury/Cockeyed'), + cls('ComicFury/CoftheA'), + cls('ComicFury/CompanyMan'), + cls('ComicFury/Complicatedd'), + cls('ComicFury/Conplicated'), + cls('ComicFury/Crowbar'), + cls('ComicFury/Crowbars'), + cls('ComicFury/Curvyboneyosis'), + cls('ComicFury/Dandk'), + cls('ComicFury/Davidandtherobot'), + cls('ComicFury/DenizensAttentionComic'), + cls('ComicFury/Disturbingcomics'), + cls('ComicFury/Docapoc'), + cls('ComicFury/Elfcomic'), + cls('ComicFury/EMT'), + cls('ComicFury/EternityC'), + cls('ComicFury/Fathead'), + cls('ComicFury/Fexpression'), + cls('ComicFury/FireBorn2'), + cls('ComicFury/Foxtales'), + cls('ComicFury/Fpk'), + cls('ComicFury/Ghostassassin'), + cls('ComicFury/Gillimurphy'), + cls('ComicFury/Glomshire'), + cls('ComicFury/Goldrushdynllewcomics'), + cls('ComicFury/Grandline3point5'), + cls('ComicFury/Halloween2012'), + cls('ComicFury/Halloween2013'), + cls('ComicFury/HIRI'), + cls('ComicFury/Hitmen'), + cls('ComicFury/Honeyvenom'), + cls('ComicFury/Insanitycorp'), + cls('ComicFury/Inviziblecomixgroup'), + cls('ComicFury/Isb'), + cls('ComicFury/Its'), + cls('ComicFury/Jenfferscartoonphotomanipulaion'), + cls('ComicFury/Jenffersshow'), + cls('ComicFury/Joysworldcomic'), + cls('ComicFury/Judgedred'), + cls('ComicFury/Jump2'), + cls('ComicFury/Kachingcomic'), + cls('ComicFury/Kazaandgwenna'), + cls('ComicFury/Kevinzombie'), + cls('ComicFury/Kindergardencrisis'), + cls('ComicFury/Kirahitogame'), + cls('ComicFury/Ladyspectra'), + cls('ComicFury/Lastcallcomic'), + cls('ComicFury/Lazy'), + cls('ComicFury/Lena'), + cls('ComicFury/Letitride'), + cls('ComicFury/Lola2'), + cls('ComicFury/LORDDARKE'), + cls('ComicFury/Lp'), + cls('ComicFury/LucidsDream'), + cls('ComicFury/Lvl30psy'), + cls('ComicFury/Maddog'), + cls('ComicFury/Magisa'), + cls('ComicFury/Merelymortal'), + cls('ComicFury/Midnightpeanutbutter'), + cls('ComicFury/Minarga'), + cls('ComicFury/MoizmadComix'), + cls('ComicFury/Moths'), + cls('ComicFury/MyHorribleSite'), + cls('ComicFury/Neighborscomic'), + cls('ComicFury/Nojetpack'), + cls('ComicFury/NoSongs'), + cls('ComicFury/Nostalgiaofeden'), + cls('ComicFury/Ocarinaoftim'), + cls('ComicFury/Pandemonium'), + cls('ComicFury/Paperstreamer'), + cls('ComicFury/Peepsnperks'), + cls('ComicFury/PersonaFTW'), + cls('ComicFury/Pilgrimenespanol'), + cls('ComicFury/Pilgrimsprogress'), + cls('ComicFury/PiratesLife'), + cls('ComicFury/PobrePucho'), + cls('ComicFury/Poussiere'), + cls('ComicFury/Pt'), + cls('ComicFury/Punch'), + cls('ComicFury/Rangerrandom'), + cls('ComicFury/Raspcat'), + cls('ComicFury/RealLifeTrips'), + cls('ComicFury/Romanjack'), + cls('ComicFury/RPS'), + cls('ComicFury/RPT'), + cls('ComicFury/Rvr'), + cls('ComicFury/Sarakleeyo'), + cls('ComicFury/Sawbladersblacknuzlocke'), + cls('ComicFury/Schizmatic'), + cls('ComicFury/Seconds'), + cls('ComicFury/SeeYourFeels'), + cls('ComicFury/Serengetti'), + cls('ComicFury/Shonenpunkremix'), + cls('ComicFury/Sinjetpack'), + cls('ComicFury/Spf1337'), + cls('ComicFury/Sscomic'), + cls('ComicFury/Teenagedragon'), + cls('ComicFury/Theashes'), + cls('ComicFury/TheButterflyEffect'), + cls('ComicFury/Thecrease'), + cls('ComicFury/TwentyQuidAmusements'), + cls('ComicFury/Wowwithatwistdamaclesandkejallcomic'), + cls('ComicGenesis/CryHavoc'), + cls('ComicGenesis/SueosdelSur'), + cls('Commissioned'), + cls('Creators/BCinSpanish'), + cls('Creators/GirlsandSportsinSpanish'), + cls('Creators/RugratsinSpanish'), + cls('CtrlAltDel', 'block'), + cls('CtrlAltDel/Sillies', 'block'), + cls('DeathToTheExtremist'), + cls('Ellerbisms'), + cls('Eriadan'), + cls('FeyWinds'), + cls('FilibusterCartoons'), + cls('FowlLanguage', 'block'), + cls('GoComics/BenSargent'), + cls('GoComics/BillyAndCo'), + cls('GoComics/BlackboardDaze'), + cls('GoComics/Boogerbrain'), + cls('GoComics/BotBrothers'), + cls('GoComics/BrilliantMines'), + cls('GoComics/BuffaloChips'), + cls('GoComics/BuzzaWuzza'), + cls('GoComics/Cortoons'), + cls('GoComics/FrankBlunt'), + cls('GoComics/GoComicsontheRoad'), + cls('GoComics/HIP'), + cls('GoComics/HoodootheUnwiseOwl'), + cls('GoComics/JustPosted'), + cls('GoComics/KatetheGreat'), + cls('GoComics/MidLifewAlan'), + cls('GoComics/Pi'), + cls('GoComics/PigtimesCartoon'), + cls('GoComics/RedMeat'), + cls('GoComics/RicigsToonTrivia'), + cls('GoComics/SantavsDracula'), + cls('GoComics/Scurvyville'), + cls('GoComics/TheAcerbicCaf'), + cls('GoComics/TheAdventuresofTeetyBallerina'), + cls('GoComics/TOWHOMITMAYCONCERN'), + cls('GoComics/ZacharyNixonJohnson'), + cls('GunnerkrigCourt'), + cls('HorribleVille'), + cls('KatzenfutterGeleespritzer'), + cls('KillerKomics'), + cls('Lint'), + cls('LinuxComFridayFunnies'), + cls('NekkoAndJoruba'), + cls('NekoTheKitty'), + cls('NewAdventuresOfBobbin'), + cls('Nnewts'), + cls('OddFish'), + cls('OneQuestion'), + cls('OnTheFasttrack'), + cls('OrnerBoy'), + cls('PensAndTales/Evilish'), + cls('PensAndTales/FireflyCross'), + cls('PetiteSymphony/Djandora'), + cls('PetiteSymphony/Generation17'), + cls('PunksAndNerdsOld'), + cls('RedsPlanet'), + cls('SmackJeeves/AchievementStuck'), + cls('SmackJeeves/Allthatglitters'), + cls('SmackJeeves/AngelBeast'), + cls('SmackJeeves/BeyondTemptation'), + cls('SmackJeeves/Bloodyfairytale'), + cls('SmackJeeves/BLOT'), + cls('SmackJeeves/Captor'), + cls('SmackJeeves/ChaosTheory2005'), + cls('SmackJeeves/Debtsettlement'), + cls('SmackJeeves/DebtSettlement2OperationExtinction'), + cls('SmackJeeves/Destinationunknown'), + cls('SmackJeeves/DevilTrainee'), + cls('SmackJeeves/DevilTraineeSpanish'), + cls('SmackJeeves/ElfenLiedDifferences'), + cls('SmackJeeves/FeathersPI'), + cls('SmackJeeves/FireWire'), + cls('SmackJeeves/FrogKing'), + cls('SmackJeeves/FuckMyLife'), + cls('SmackJeeves/FurtherDowntheRabbitHole'), + cls('SmackJeeves/GATEKEEPER'), + cls('SmackJeeves/GearTheTakedown'), + cls('SmackJeeves/GraveImpressions'), + cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'), + cls('SmackJeeves/Harfang'), + cls('SmackJeeves/HotChocolate'), + cls('SmackJeeves/Ianua'), + cls('SmackJeeves/ImminentMoose'), + cls('SmackJeeves/InthePride'), + cls('SmackJeeves/Knife'), + cls('SmackJeeves/Kranburn'), + cls('SmackJeeves/LoveTwister'), + cls('SmackJeeves/MegaManiacs'), + cls('SmackJeeves/MewsDynasty'), + cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'), + cls('SmackJeeves/MoonlitDawnAMythicalTale'), + cls('SmackJeeves/NihilWandasJourney'), + cls('SmackJeeves/OddContact'), + cls('SmackJeeves/OneFrameGags'), + cls('SmackJeeves/Plotlessnesses'), + cls('SmackJeeves/PRAGUERACE'), + cls('SmackJeeves/PumpkinFlower'), + cls('SmackJeeves/Razor'), + cls('SmackJeeves/SAKANA'), + cls('SmackJeeves/SerendipityAnEquestrianTale'), + cls('SmackJeeves/ShacklesInstallment02'), + cls('SmackJeeves/TechnicolorLondon'), + cls('SmackJeeves/TeKscloset'), + cls('SmackJeeves/TheAttackoftheRecoloursSeason1'), + cls('SmackJeeves/ThehumanBEing'), + cls('SmackJeeves/TheMewExperiment'), + cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'), + cls('SmackJeeves/TheSomewhereOther'), + cls('SmackJeeves/TitleUnrelated'), + cls('SmackJeeves/TotalPokemonIsland'), + cls('SmackJeeves/TrillyAndSilly'), + cls('SmackJeeves/VampireFetish'), + cls('SmackJeeves/WolfWolf'), + cls('SmackJeeves/WonderTheatre'), + cls('SnowFlakes'), + cls('StrawberryDeathCake'), + cls('Stubble'), + cls('SuburbanTribe'), + cls('TheOuterQuarter'), + cls('ThunderAndLightning'), + cls('TinyKittenTeeth'), + cls('TwoTwoOneFour'), + cls('WayfarersMoon'), + cls('WebcomicsNation/AgnesQuill'), + cls('WebcomicsNation/MyMuse'), + cls('WebcomicsNation/NekkoAndJoruba'), + cls('WhiteNinja'), + cls('WLP/ShadowChasers'), + cls('WotNow'), + ] + + +class Renamed(Scraper): + MSG = 'Comic module was renamed to "%s", please rename the directory.' + count = 0 + + @classmethod + def counter(cls): + cls.count += 1 + return cls.count + + def __init__(self, name, newname): + super(Renamed, self).__init__(name) + self.newname = newname + self.i = self.counter() + + def getDisabledReasons(self): + return {'ren-%i' % self.i: self.MSG % self.newname} + + @classmethod + def getmodules(cls): + return [ + # Renamed in 2.16 + cls('1997', '1977'), + cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'), + cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'), + cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'), + cls('ComicFury/Doublejump', 'ComicFury/DoubleJumpGameComics'), + cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'), + cls('Creators/ArchieinSpanish', 'Creators/ArchieSpanish'), + cls('Creators/HeathcliffinSpanish', 'Creators/HeathcliffSpanish'), + cls('Creators/TheWizardofIdinSpanish', 'Creators/WizardOfIdSpanish'), + cls('DarkWings', 'Eryl'), + cls('FoulLanguage', 'GoComics/FowlLanguage'), + cls('Wulffmorgenthaler', 'WuMo'), + cls('ZebraGirl', 'ComicFury/ZebraGirl'), + ] diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index c13c4b038..49b560437 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -535,7 +535,7 @@ def find_scrapers(comic, multiple_allowed=False): raise ValueError("empty comic name") candidates = [] cname = comic.lower() - for scrapers in get_scrapers(): + for scrapers in get_scrapers(include_removed=True): lname = scrapers.name.lower() if lname == cname: # perfect match @@ -543,7 +543,7 @@ def find_scrapers(comic, multiple_allowed=False): return [scrapers] else: candidates.append(scrapers) - elif cname in lname: + elif cname in lname and scrapers.url: candidates.append(scrapers) if len(candidates) > 1 and not multiple_allowed: comics = ", ".join(x.name for x in candidates) @@ -556,7 +556,7 @@ def find_scrapers(comic, multiple_allowed=False): _scrapers = None -def get_scrapers(): +def get_scrapers(include_removed=False): """Find all comic scraper classes in the plugins directory. The result is cached. @return: list of Scraper classes @@ -572,7 +572,10 @@ def get_scrapers(): check_scrapers() out.debug(u"... %d modules loaded from %d classes." % ( len(_scrapers), len(plugins))) - return _scrapers + if include_removed: + return _scrapers + else: + return [x for x in _scrapers if x.url] def check_scrapers():