Keep track of removed and moved comics (fixes #41).

I plan on keeping this list for at least ~ 2 releases and then purging
older entries...
This commit is contained in:
Tobias Gruetzmacher 2016-06-05 21:47:58 +02:00
parent 9b755a7e6c
commit df2048cb34
6 changed files with 364 additions and 21 deletions

View file

@ -7,8 +7,9 @@ from __future__ import absolute_import, division, print_function
import os
import argparse
import six
from . import events, configuration, singleton, director, __version__
from . import events, configuration, singleton, director, scraper, __version__
from .output import out
from .util import internal_error, strlimit
@ -89,6 +90,10 @@ def setup_options():
# multimatch is only used for development, eg. testing if all comics of a scripted plugin are working
parser.add_argument('--multimatch', action='store_true',
help=argparse.SUPPRESS)
# List all comic modules, even those normally suppressed, because they
# are not "real" (moved & removed)
parser.add_argument('--list-all', action='store_true',
help=argparse.SUPPRESS)
parser.add_argument('comic', nargs='*',
help='comic module name (including case insensitive substrings)')
try:
@ -157,7 +162,7 @@ def display_comic_help(scraperobj):
orig_context = out.context
out.context = scraperobj.name
try:
out.info(u"URL: " + scraperobj.url)
out.info(u"URL: " + six.text_type(scraperobj.url))
out.info(u"Language: " + scraperobj.language())
if scraperobj.adult:
out.info(u"Adult comic, use option --adult to fetch.")
@ -225,8 +230,9 @@ def run(options):
return display_version(options.verbose)
if options.list:
return do_list()
if options.singlelist:
return do_list(column_list=False, verbose=options.verbose)
if options.singlelist or options.list_all:
return do_list(column_list=False, verbose=options.verbose,
listall=options.list_all)
# after this a list of comic strips is needed
if not options.comic:
out.warn(u'No comics specified, bailing out!')
@ -238,14 +244,14 @@ def run(options):
return director.getComics(options)
def do_list(column_list=True, verbose=False):
def do_list(column_list=True, verbose=False, listall=False):
"""List available comics."""
with out.pager():
out.info(u'Available comic scrapers:')
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(director.getAllScrapers(listing=True),
key=lambda s: s.name)
scrapers = sorted(scraper.get_scrapers(listall),
key=lambda s: s.name.lower())
if column_list:
num, disabled = do_column_list(scrapers)
else:
@ -267,7 +273,7 @@ def do_single_list(scrapers, verbose=False):
display_comic_help(scraperobj)
else:
out.info(get_tagged_scraper_name(scraperobj, reasons=disabled))
return num, disabled
return num + 1, disabled
def do_column_list(scrapers):

View file

@ -190,18 +190,13 @@ def finish():
out.warn("Waiting for download threads to finish.")
def getAllScrapers(listing=False):
"""Get all scrapers."""
return getScrapers(['@@'], listing=listing)
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
"""Get scraper objects for the given comics."""
if '@' in comics:
# only scrapers whose directory already exists
if len(comics) > 1:
out.warn(u"using '@' as comic name ignores all other specified comics.")
for scraperobj in scraper.get_scrapers():
for scraperobj in scraper.get_scrapers(include_removed=True):
dirname = getDirname(scraperobj.name)
if os.path.isdir(os.path.join(basepath, dirname)):
if shouldRunScraper(scraperobj, adult, listing):

View file

@ -152,7 +152,6 @@ class Eryl(_WordPressScraper):
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter
help = 'This was known as DarkWings in previous Dosage versions'
class EverybodyLovesEricRaymond(_BasicScraper):

View file

@ -22,6 +22,4 @@ class KindOfNormal(_ParserScraper):
cls('MeAndDanielle', 'meanddanielle'),
cls('TruthFacts', 'truthfacts'),
cls('Wumo', 'wumo'),
# name in previous versions
cls('Wulffmorgenthaler', 'wumo'),
]

342
dosagelib/plugins/old.py Normal file
View file

@ -0,0 +1,342 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from ..scraper import Scraper
class Removed(Scraper):
REASONS = {
'jsh': 'Site is very JavaScript-heavy, writing a module would be' +
' very complicated.',
'del': 'Comic was removed from the web.',
'block': 'The comic site is blocking us.',
'unk': 'Comic was removed for an unknown reason.',
}
def __init__(self, name, reason='del'):
super(Removed, self).__init__(name)
self.reason = reason
def getDisabledReasons(self):
return {'rem-' + self.reason: self.REASONS[self.reason]}
@classmethod
def getmodules(cls):
return [
# Removed in 2.16
cls('AbleAndBaker'),
cls('AlsoBagels'),
cls('Antics'),
cls('Arcamax/BleekerTheRechargeableDog'),
cls('BackwaterPlanet'),
cls('BigFatWhale'),
cls('Blip'),
cls('BoxerHockey'),
cls('BoyOnAStickAndSlither', 'jsh'),
cls('BrentalFloss'),
cls('BrentalFloss/FlossedInTime'),
cls('BrentalFloss/GuestComics'),
cls('BrightlyWound'),
cls('CheckerboardNightmare'),
cls('ComicFury/30years'),
cls('ComicFury/AAB'),
cls('ComicFury/AdventuresofMaggie'),
cls('ComicFury/Aether'),
cls('ComicFury/Afairtrade'),
cls('ComicFury/Afrodays'),
cls('ComicFury/Albinobros'),
cls('ComicFury/Alexanderandlucas'),
cls('ComicFury/Alittlebitofeverything'),
cls('ComicFury/Americanextremists'),
cls('ComicFury/AmericanNerd'),
cls('ComicFury/Amtheatre'),
cls('ComicFury/Angstcomic'),
cls('ComicFury/Applepine'),
cls('ComicFury/Atm'),
cls('ComicFury/Atomicmonkey'),
cls('ComicFury/Baseballcapsandtiaras'),
cls('ComicFury/BATB'),
cls('ComicFury/BetweenRounds'),
cls('ComicFury/BrokenReality'),
cls('ComicFury/BTTF'),
cls('ComicFury/Cannonadeofhogwash'),
cls('ComicFury/CatHero'),
cls('ComicFury/Chocolava'),
cls('ComicFury/ChristianHumberReloaded'),
cls('ComicFury/Cockeyed'),
cls('ComicFury/CoftheA'),
cls('ComicFury/CompanyMan'),
cls('ComicFury/Complicatedd'),
cls('ComicFury/Conplicated'),
cls('ComicFury/Crowbar'),
cls('ComicFury/Crowbars'),
cls('ComicFury/Curvyboneyosis'),
cls('ComicFury/Dandk'),
cls('ComicFury/Davidandtherobot'),
cls('ComicFury/DenizensAttentionComic'),
cls('ComicFury/Disturbingcomics'),
cls('ComicFury/Docapoc'),
cls('ComicFury/Elfcomic'),
cls('ComicFury/EMT'),
cls('ComicFury/EternityC'),
cls('ComicFury/Fathead'),
cls('ComicFury/Fexpression'),
cls('ComicFury/FireBorn2'),
cls('ComicFury/Foxtales'),
cls('ComicFury/Fpk'),
cls('ComicFury/Ghostassassin'),
cls('ComicFury/Gillimurphy'),
cls('ComicFury/Glomshire'),
cls('ComicFury/Goldrushdynllewcomics'),
cls('ComicFury/Grandline3point5'),
cls('ComicFury/Halloween2012'),
cls('ComicFury/Halloween2013'),
cls('ComicFury/HIRI'),
cls('ComicFury/Hitmen'),
cls('ComicFury/Honeyvenom'),
cls('ComicFury/Insanitycorp'),
cls('ComicFury/Inviziblecomixgroup'),
cls('ComicFury/Isb'),
cls('ComicFury/Its'),
cls('ComicFury/Jenfferscartoonphotomanipulaion'),
cls('ComicFury/Jenffersshow'),
cls('ComicFury/Joysworldcomic'),
cls('ComicFury/Judgedred'),
cls('ComicFury/Jump2'),
cls('ComicFury/Kachingcomic'),
cls('ComicFury/Kazaandgwenna'),
cls('ComicFury/Kevinzombie'),
cls('ComicFury/Kindergardencrisis'),
cls('ComicFury/Kirahitogame'),
cls('ComicFury/Ladyspectra'),
cls('ComicFury/Lastcallcomic'),
cls('ComicFury/Lazy'),
cls('ComicFury/Lena'),
cls('ComicFury/Letitride'),
cls('ComicFury/Lola2'),
cls('ComicFury/LORDDARKE'),
cls('ComicFury/Lp'),
cls('ComicFury/LucidsDream'),
cls('ComicFury/Lvl30psy'),
cls('ComicFury/Maddog'),
cls('ComicFury/Magisa'),
cls('ComicFury/Merelymortal'),
cls('ComicFury/Midnightpeanutbutter'),
cls('ComicFury/Minarga'),
cls('ComicFury/MoizmadComix'),
cls('ComicFury/Moths'),
cls('ComicFury/MyHorribleSite'),
cls('ComicFury/Neighborscomic'),
cls('ComicFury/Nojetpack'),
cls('ComicFury/NoSongs'),
cls('ComicFury/Nostalgiaofeden'),
cls('ComicFury/Ocarinaoftim'),
cls('ComicFury/Pandemonium'),
cls('ComicFury/Paperstreamer'),
cls('ComicFury/Peepsnperks'),
cls('ComicFury/PersonaFTW'),
cls('ComicFury/Pilgrimenespanol'),
cls('ComicFury/Pilgrimsprogress'),
cls('ComicFury/PiratesLife'),
cls('ComicFury/PobrePucho'),
cls('ComicFury/Poussiere'),
cls('ComicFury/Pt'),
cls('ComicFury/Punch'),
cls('ComicFury/Rangerrandom'),
cls('ComicFury/Raspcat'),
cls('ComicFury/RealLifeTrips'),
cls('ComicFury/Romanjack'),
cls('ComicFury/RPS'),
cls('ComicFury/RPT'),
cls('ComicFury/Rvr'),
cls('ComicFury/Sarakleeyo'),
cls('ComicFury/Sawbladersblacknuzlocke'),
cls('ComicFury/Schizmatic'),
cls('ComicFury/Seconds'),
cls('ComicFury/SeeYourFeels'),
cls('ComicFury/Serengetti'),
cls('ComicFury/Shonenpunkremix'),
cls('ComicFury/Sinjetpack'),
cls('ComicFury/Spf1337'),
cls('ComicFury/Sscomic'),
cls('ComicFury/Teenagedragon'),
cls('ComicFury/Theashes'),
cls('ComicFury/TheButterflyEffect'),
cls('ComicFury/Thecrease'),
cls('ComicFury/TwentyQuidAmusements'),
cls('ComicFury/Wowwithatwistdamaclesandkejallcomic'),
cls('ComicGenesis/CryHavoc'),
cls('ComicGenesis/SueosdelSur'),
cls('Commissioned'),
cls('Creators/BCinSpanish'),
cls('Creators/GirlsandSportsinSpanish'),
cls('Creators/RugratsinSpanish'),
cls('CtrlAltDel', 'block'),
cls('CtrlAltDel/Sillies', 'block'),
cls('DeathToTheExtremist'),
cls('Ellerbisms'),
cls('Eriadan'),
cls('FeyWinds'),
cls('FilibusterCartoons'),
cls('FowlLanguage', 'block'),
cls('GoComics/BenSargent'),
cls('GoComics/BillyAndCo'),
cls('GoComics/BlackboardDaze'),
cls('GoComics/Boogerbrain'),
cls('GoComics/BotBrothers'),
cls('GoComics/BrilliantMines'),
cls('GoComics/BuffaloChips'),
cls('GoComics/BuzzaWuzza'),
cls('GoComics/Cortoons'),
cls('GoComics/FrankBlunt'),
cls('GoComics/GoComicsontheRoad'),
cls('GoComics/HIP'),
cls('GoComics/HoodootheUnwiseOwl'),
cls('GoComics/JustPosted'),
cls('GoComics/KatetheGreat'),
cls('GoComics/MidLifewAlan'),
cls('GoComics/Pi'),
cls('GoComics/PigtimesCartoon'),
cls('GoComics/RedMeat'),
cls('GoComics/RicigsToonTrivia'),
cls('GoComics/SantavsDracula'),
cls('GoComics/Scurvyville'),
cls('GoComics/TheAcerbicCaf'),
cls('GoComics/TheAdventuresofTeetyBallerina'),
cls('GoComics/TOWHOMITMAYCONCERN'),
cls('GoComics/ZacharyNixonJohnson'),
cls('GunnerkrigCourt'),
cls('HorribleVille'),
cls('KatzenfutterGeleespritzer'),
cls('KillerKomics'),
cls('Lint'),
cls('LinuxComFridayFunnies'),
cls('NekkoAndJoruba'),
cls('NekoTheKitty'),
cls('NewAdventuresOfBobbin'),
cls('Nnewts'),
cls('OddFish'),
cls('OneQuestion'),
cls('OnTheFasttrack'),
cls('OrnerBoy'),
cls('PensAndTales/Evilish'),
cls('PensAndTales/FireflyCross'),
cls('PetiteSymphony/Djandora'),
cls('PetiteSymphony/Generation17'),
cls('PunksAndNerdsOld'),
cls('RedsPlanet'),
cls('SmackJeeves/AchievementStuck'),
cls('SmackJeeves/Allthatglitters'),
cls('SmackJeeves/AngelBeast'),
cls('SmackJeeves/BeyondTemptation'),
cls('SmackJeeves/Bloodyfairytale'),
cls('SmackJeeves/BLOT'),
cls('SmackJeeves/Captor'),
cls('SmackJeeves/ChaosTheory2005'),
cls('SmackJeeves/Debtsettlement'),
cls('SmackJeeves/DebtSettlement2OperationExtinction'),
cls('SmackJeeves/Destinationunknown'),
cls('SmackJeeves/DevilTrainee'),
cls('SmackJeeves/DevilTraineeSpanish'),
cls('SmackJeeves/ElfenLiedDifferences'),
cls('SmackJeeves/FeathersPI'),
cls('SmackJeeves/FireWire'),
cls('SmackJeeves/FrogKing'),
cls('SmackJeeves/FuckMyLife'),
cls('SmackJeeves/FurtherDowntheRabbitHole'),
cls('SmackJeeves/GATEKEEPER'),
cls('SmackJeeves/GearTheTakedown'),
cls('SmackJeeves/GraveImpressions'),
cls('SmackJeeves/GreenKirbyandabunchofotherpeopledoinstuff'),
cls('SmackJeeves/Harfang'),
cls('SmackJeeves/HotChocolate'),
cls('SmackJeeves/Ianua'),
cls('SmackJeeves/ImminentMoose'),
cls('SmackJeeves/InthePride'),
cls('SmackJeeves/Knife'),
cls('SmackJeeves/Kranburn'),
cls('SmackJeeves/LoveTwister'),
cls('SmackJeeves/MegaManiacs'),
cls('SmackJeeves/MewsDynasty'),
cls('SmackJeeves/MomthegamestorerippedusoffAGAIN'),
cls('SmackJeeves/MoonlitDawnAMythicalTale'),
cls('SmackJeeves/NihilWandasJourney'),
cls('SmackJeeves/OddContact'),
cls('SmackJeeves/OneFrameGags'),
cls('SmackJeeves/Plotlessnesses'),
cls('SmackJeeves/PRAGUERACE'),
cls('SmackJeeves/PumpkinFlower'),
cls('SmackJeeves/Razor'),
cls('SmackJeeves/SAKANA'),
cls('SmackJeeves/SerendipityAnEquestrianTale'),
cls('SmackJeeves/ShacklesInstallment02'),
cls('SmackJeeves/TechnicolorLondon'),
cls('SmackJeeves/TeKscloset'),
cls('SmackJeeves/TheAttackoftheRecoloursSeason1'),
cls('SmackJeeves/ThehumanBEing'),
cls('SmackJeeves/TheMewExperiment'),
cls('SmackJeeves/TheRandomObscureFairyTaleNoOnesEverReallyHeardOf'),
cls('SmackJeeves/TheSomewhereOther'),
cls('SmackJeeves/TitleUnrelated'),
cls('SmackJeeves/TotalPokemonIsland'),
cls('SmackJeeves/TrillyAndSilly'),
cls('SmackJeeves/VampireFetish'),
cls('SmackJeeves/WolfWolf'),
cls('SmackJeeves/WonderTheatre'),
cls('SnowFlakes'),
cls('StrawberryDeathCake'),
cls('Stubble'),
cls('SuburbanTribe'),
cls('TheOuterQuarter'),
cls('ThunderAndLightning'),
cls('TinyKittenTeeth'),
cls('TwoTwoOneFour'),
cls('WayfarersMoon'),
cls('WebcomicsNation/AgnesQuill'),
cls('WebcomicsNation/MyMuse'),
cls('WebcomicsNation/NekkoAndJoruba'),
cls('WhiteNinja'),
cls('WLP/ShadowChasers'),
cls('WotNow'),
]
class Renamed(Scraper):
MSG = 'Comic module was renamed to "%s", please rename the directory.'
count = 0
@classmethod
def counter(cls):
cls.count += 1
return cls.count
def __init__(self, name, newname):
super(Renamed, self).__init__(name)
self.newname = newname
self.i = self.counter()
def getDisabledReasons(self):
return {'ren-%i' % self.i: self.MSG % self.newname}
@classmethod
def getmodules(cls):
return [
# Renamed in 2.16
cls('1997', '1977'),
cls('ComicFury/Alya', 'ComicFury/AlyaTheLastChildOfLight'),
cls('ComicFury/Boatcrash', 'ComicFury/BoatcrashChronicles'),
cls('ComicFury/Crimsonpixel', 'ComicFury/CrimsonPixelComics'),
cls('ComicFury/Doublejump', 'ComicFury/DoubleJumpGameComics'),
cls('ComicFury/Elektroanthology', 'ComicFury/ElektrosComicAnthology'),
cls('Creators/ArchieinSpanish', 'Creators/ArchieSpanish'),
cls('Creators/HeathcliffinSpanish', 'Creators/HeathcliffSpanish'),
cls('Creators/TheWizardofIdinSpanish', 'Creators/WizardOfIdSpanish'),
cls('DarkWings', 'Eryl'),
cls('FoulLanguage', 'GoComics/FowlLanguage'),
cls('Wulffmorgenthaler', 'WuMo'),
cls('ZebraGirl', 'ComicFury/ZebraGirl'),
]

View file

@ -535,7 +535,7 @@ def find_scrapers(comic, multiple_allowed=False):
raise ValueError("empty comic name")
candidates = []
cname = comic.lower()
for scrapers in get_scrapers():
for scrapers in get_scrapers(include_removed=True):
lname = scrapers.name.lower()
if lname == cname:
# perfect match
@ -543,7 +543,7 @@ def find_scrapers(comic, multiple_allowed=False):
return [scrapers]
else:
candidates.append(scrapers)
elif cname in lname:
elif cname in lname and scrapers.url:
candidates.append(scrapers)
if len(candidates) > 1 and not multiple_allowed:
comics = ", ".join(x.name for x in candidates)
@ -556,7 +556,7 @@ def find_scrapers(comic, multiple_allowed=False):
_scrapers = None
def get_scrapers():
def get_scrapers(include_removed=False):
"""Find all comic scraper classes in the plugins directory.
The result is cached.
@return: list of Scraper classes
@ -572,7 +572,10 @@ def get_scrapers():
check_scrapers()
out.debug(u"... %d modules loaded from %d classes." % (
len(_scrapers), len(plugins)))
return _scrapers
if include_removed:
return _scrapers
else:
return [x for x in _scrapers if x.url]
def check_scrapers():