Remove DrunkDuck for now.

- It's been disabled for ages - Needs a major rework - I don't want to add that many comics anyways... - This also gets rid of make_scraper :)
2016-06-05 22:20:11 +02:00 · 2016-06-05 22:20:11 +02:00 · 215d597573
commit 215d597573
parent 67d0d38100
3 changed files with 0 additions and 1542 deletions
--- a/dosagelib/plugins/drunkduck.py
+++ b/dosagelib/plugins/drunkduck.py
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -589,8 +589,3 @@ def check_scrapers():
            raise ValueError('duplicate scrapers %s and %s found' %
                             (name1, name2))
        d[name] = scraper
-
-
-def make_scraper(classname, scraperType=_BasicScraper, **attributes):
-    """Make a new scraper class with given name and attributes."""
-    return type(classname, (scraperType,), attributes)
--- a/scripts/drunkduck.py
+++ b/scripts/drunkduck.py
@ -1,239 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
-"""
-Script to get a list of drunkduck comics and save the info in a JSON file for
-further processing.
-"""
-from __future__ import absolute_import, division, print_function
-
-import codecs
-import re
-import sys
-import os
-
-import requests
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))  # noqa
-from dosagelib.util import tagre, get_page, unquote, unescape
-from scriptutil import (contains_case_insensitive, capfirst, save_result,
-                        load_result, truncate_name, asciify)
-
-json_file = __file__.replace(".py", ".json")
-
-# names of comics to exclude
-exclude_comics = [
-    "A_Call_to_Destiny__NC_17", # start page requires login
-    "A_Call_to_Destiny_Reloaded", # start page requires login
-    "A_Day_in_the_Life_for_Erik", # broken images
-    "A_Fairly_Twisted_Reality", # start page requires login
-    "Al_and_Scout", # broken images
-    "ANGELOU_____Las_aventuras_de_Nikole", # broken images
-    "Apartment_408_Full_Size", # broken images
-    "Apple_Valley", # broken images
-    "Apt_408_Minis", # broken images
-    "Art_dump", # broken images
-    "Atxs", # broken images
-    "A_Word_Of_Wisdom", # broken images
-    "Bhaddland", # start page requires login
-    "Binary_Souls_Other_Dimensions", # broken images
-    "BK_Shattered_Hate", # broken images
-    "Blonde_Marvel", # broken images
-    "Bouncing_Orbs_of_Beauty", # start page requires login
-    "Brathalla", # broken images
-    "Busty_Solar", # start page requires login
-    "Caggage", # page moved
-    "Chomp", # broken images
-    "Chu_and_Kenny", # broken images
-    "Coga_Suro_2", # broken images
-    "Crack", # broken images
-    "Creepy_Girl_and_Her_Zombie_Dog", # broken images
-    "CuoreVoodoo", # broken images
-    "Dairyaire", # broken images
-    "Dead_Strangers", # broken images
-    "DIS", # broken images
-    "Dot_TXT", # broken images
-    "Dreadnought_Invasion_Six", # broken images
-    "Drunk_Duck_Awards_2011", # no content
-    "Drunk_Duck_Awards_2012", # no content
-    "Emerald_Winter", # broken images
-    "Enter_the_Duck_2", # broken images
-    "Ffff", # broken images
-    "Found_Art", # broken images
-    "Function_Over_Fashion", # broken images
-    "Funday_Morning", # broken images
-    "Greys_journey", # broken images
-    "Head_over_Heart", # broken images
-    "Hurrocks_Fardel", # broken images
-    "I_Fell_in_Love_With_a_Vampire_Catgirl_Part_2_Lovers_at_the_End_of_the_World", # start page requires login
-    "Illusional_Beauty", # broken images
-    "Indigo_Bunting__Vampire", # start page requires login
-    "Irrumator", # start page requires login
-    "Its_A_Boy_Thing", # start page requires login
-    "Inside_OuT", # broken images
-    "Iron_Wolf", # broken images
-    "Journey_to_Raifina", # broken images
-    "KALA_dan", # broken images
-    "Kokuahiru_comics", # start page requires login
-    "Kuro_Shouri", # page moved
-    "Legacy_of_Blaze", # broken images
-    "Live_to_tell", # start page requires login
-    "Locoma", # broken images
-    "London_Underworld", # broken images
-    "Louder_Than_Bombs", # broken images
-    "Lucky_Dawg", # broken images
-    "Lugnor_Riders", # missing
-    "Mario_in_Johto", # broken images
-    "Mary_Sue_Academy", # borken images
-    "Master", # start page requires login
-    "Mastermind_BTRN", # broken images
-    "MAYA_____The_legend_of_Wolf", # broken images
-    "Megaman_Zero", # broken images
-    "Monster_Lover", # start page is broken
-    "Monster_Lover_Destinys_Path", # start page requires login
-    "M_Organ_Art", # start page requires login
-    "Morning_Squirtz", # start page requires login
-    "MOSAIC", # broken images
-    "My_Angel_and_My_Devil", # broken images
-    "Nemution_Jewel", # start page requires login
-    "Nemution_Redux", # start page requires login
-    "New_Pages", # broken images
-    "NIGHTSHADE_THE_MERRY_WIDOW", # start page requires login
-    "Ninja_Shizatch", # broken images
-    "No_Need_for_Bushido", # duplicate
-    "Normalcy_is_for_Wimps", # broken images
-    "MIKYAGU", # broken images
-    "One_Third_Of_Your_Life_Is_Spent_Sleeping_One_Third_Of_Your_Life_Is_Spent_Working_And_Half_Of_One_Third_Is_Spent_Waiting_The_Question_Is_It_Really_Your_Life", # broken images
-    "OTENBA_Files", # start page requires login
-    "Panacea", # start page requires login
-    "Parker_Lot", # broken images
-    "Peter_And_The_Wolf", # start page requires login
-    "Perspectives", # broken images
-    "Pokemon_Sinnoh_Surfer", # broken images
-    "Pokemon_World_Trainers", # broken images
-    "Potpourri_of_Lascivious_Whimsy", # start page requires login
-    "Pr0nCrest", # start page requires login
-    "Punished_girls", # start page requires login
-    "Powerjeff", # broken images
-    "Comicarotica", # start page requires login
-    "Dark_Sisters", # start page requires login
-    "Death_P0rn", # start page requires login
-    "Dreams_in_Synergy", # broken images
-    "GNight_Shade", # start page requires login
-    "GRIND", # start page requires login
-    "HUSS", # start page requires login
-    "Red_Dog_Venue", # start page is broken
-    "Richas_Erotic_Adventures", # start page requires login
-    "Rubber_girls", # start page requires login
-    "Robomeks", # broken images
-    "Robot_Friday", # broken images
-    "SFA", # start page requires login
-    "Shadow_Root", # start page requires login
-    "Shiro_Karasu", # start page requires login
-    "Shelter_of_Wings", # broken images
-    "Some_Notes", # broken images
-    "Sonic_Advanced_Online", # broken images
-    "Sonic_and_tails_corner", # broken images
-    "Sonic_Unreal", # broken images
-    "Space_Farmer", # start page requires login
-    "Splices_of_Life", # broken images
-    "STARSEARCHERS", # broken images
-    "Tales_of_Schlock", # start page requires login
-    "Ted_The_Terrible_Superhero", # broken images
-    "Terra_online_comic", # broken images
-    "The_Auragon_Base", # broken images
-    "The_Bend", # broken images
-    "The_Chronicles_of_Drew", # broken images
-    "The_Devils_Horn", # broken images
-    "The_Dragon_and_the_Lemur", # start page requires login
-    "The_Fighting_Stranger", # broken images
-    "The_Mighty_Omega", # broken images
-    "The_Misadventures_of_Everyone", # start page requires login
-    "The_NEW_Life_Of_TimmY", # broken images
-    "The_SSA", # broken images
-    "Tinas_Story", # start page requires login
-    "Tony_The_Hedgehog", # broken images
-    "Trapped_in_a_Comic", # start page requires login
-    "Twonks_and_Plonkers", # broken images, no real content
-    "U_Chuu_No_Hoshi_Hotoshi_Tsuko", # broken images
-    "Unsound_of_Mind", # broken images
-    "Vampire_Chronicles__Dark_Lust", # start page requires login
-    "WarMage", # start page requires login
-    "Watashi_No_Ame", # broken images
-    "Weave", # broken images
-    "Weirdlings", # template error
-    "Welcome_To_Border_City", # broken images
-    "What_comes_first", # start page requires login
-    "Within_Shadows", # broken images
-    "Xolta", # start page requires login
-    "XTIN__The_Dragons_Dream_World", # start page requires login
-    "X_UP", # start page requires login
-    "Zandars_Saga", # start page requires login
-]
-
-
-def handle_url(url, session, url_matcher, num_matcher, res):
-    """Parse one search result page."""
-    try:
-        data = get_page(url, session).text
-    except IOError as msg:
-        print("ERROR:", msg, file=sys.stderr)
-        return
-    for match in url_matcher.finditer(data):
-        comicurl = unquote(unescape(match.group(1)))
-        path = comicurl[:-1].rsplit('/')[-1]
-        name = capfirst(asciify(path))
-        if contains_case_insensitive(res, name):
-            # we cannot handle two comics that only differ in case
-            print("INFO: skipping possible duplicate", repr(name), file=sys.stderr)
-            continue
-        if name in exclude_comics:
-            continue
-        # find out how many images this comic has
-        end = match.end(1)
-        mo = num_matcher.search(data[end:])
-        if not mo:
-            print("ERROR:", repr(data[end:end + 300]), file=sys.stderr)
-            continue
-        num = int(mo.group(1))
-        res[name] = (path, num)
-
-
-def get_results():
-    """Parse all search result pages."""
-    base = "http://www.theduckwebcomics.com/search/?page=%d&search=&type=0&type=1&last_update="
-    href = re.compile(tagre("a", "href", r'(/[^"]+/)', before="size24 yanone blue"))
-    num = re.compile(r'(\d+) pages?</span>')
-    # store info in a dictionary {name -> number of comics}
-    res = {}
-    # a search for an empty string returned 825 result pages
-    result_pages = 825
-    print("Parsing", result_pages, "search result pages...", file=sys.stderr)
-    session = requests.Session()
-    for i in range(1, result_pages + 1):
-        print(i, file=sys.stderr, end=" ")
-        handle_url(base % i, session, href, num, res)
-    save_result(res, json_file)
-
-
-def print_results(args):
-    """Print all comics that have at least the given number of minimum comic strips."""
-    min_comics, filename = args
-    min_comics = int(min_comics)
-    with codecs.open(filename, 'a', 'utf-8') as fp:
-        for name, entry in sorted(load_result(json_file).items()):
-            if name in exclude_comics:
-                continue
-            path, num = entry
-            if num >= min_comics:
-                fp.write(u"add(%r, %r)\n" % (
-                  str(truncate_name(name)), str(path)))
-
-
-if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        print_results(sys.argv[1:])
-    else:
-        get_results()