diff --git a/dosagelib/plugins/universal.py b/dosagelib/plugins/universal.py deleted file mode 100644 index e886dc08e..000000000 --- a/dosagelib/plugins/universal.py +++ /dev/null @@ -1,137 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2013 Bastian Kleineidam -""" -The Universal comics only have some samples, but those samples are always the newest ones. -""" -import datetime -from re import compile, escape -from ..scraper import make_scraper -from ..util import tagre, getPageContent - - -def parse_strdate(strdate): - """Parse date string. XXX this is locale dependant but it should not be.""" - return datetime.datetime.strptime(strdate, "%A, %B %d, %Y") - -_imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[^"]+)') + r'\s+

published') - -def add(name, shortname): - url = 'http://www.universaluclick.com%s' % shortname - classname = 'Universal_%s' % name - - @classmethod - def namer(cls, imageUrl, pageUrl): - """Parse publish date from page content which looks like: - Marmaduke -

published: Sunday, November 11, 2012

- """ - data = getPageContent(pageUrl, cls.session)[0] - ro = compile(tagre("img", "src", escape(imageUrl)) + r'\s+

published: ([^<]+)') - mo = ro.search(data) - if mo: - strdate = mo.group(1) - return parse_strdate(strdate).strftime("%Y%m%d") - - globals()[classname] = make_scraper(classname, - name='Universal/' + name, - url = url, - stripUrl = url + '%s/', - imageSearch = _imageSearch, - multipleImagesPerStrip = True, - prevSearch = None, - help = 'Index format: none', - namer = namer, - ) - -# do not edit anything below since these entries are generated from scripts/update.sh -# DO NOT REMOVE -#add('9ChickweedLane', '/comics/strip/9chickweedlane') -#add('AdamAtHome', '/comics/strip/adamathome') -#add('AlleyOop', '/comics/strip/alley-oop') -#add('ArloandJanis', '/comics/strip/arloandjanis') -#add('BadReporter', '/comics/badreporter') -#add('Baldo', '/comics/strip/baldo') -#add('Betty', '/comics/strip/betty') -#add('BigNate', '/comics/strip/bignate') -#add('Biographic', '/comics/strip/biographic') -add('Brevitystrip', '/comics/strip/brevity') -#add('CalvinandHobbes', '/comics/strip/calvinandhobbes') -#add('Cathy', '/comics/strip/cathy') -#add('Cleats', '/comics/strip/cleats') -#add('ClosetoHome', '/comics/panel/closetohome') -#add('Cornered', '/comics/panel/cornered') -#add('CowandBoyClassics', '/comics/strip/cowandboy') -#add('CuldeSac', '/comics/strip/culdesac') -#add('Doonesbury', '/comics/strip/doonesbury') -#add('Drabble', '/comics/strip/drabble') -#add('FMinus', '/comics/strip/fminus') -#add('ForBetterorForWorse', '/comics/strip/forbetterorforworse') -#add('FoxTrot', '/comics/strip/foxtrot') -#add('FrankAndErnest', '/comics/strip/frankandernest') -#add('Frazz', '/comics/strip/frazz') -#add('FredBasset', '/comics/strip/fredbasset') -#add('FreshlySqueezed', '/comics/strip/freshlysqueezed') -#add('Garfield', '/comics/strip/garfield') -#add('GetFuzzy', '/comics/strip/getfuzzy') -#add('GingerMeggs', '/comics/strip/gingermeggs') -#add('Graffiti', '/comics/panel/graffiti') -#add('GrandAvenue', '/comics/strip/grand-avenue') -#add('HealthCapsules', '/comics/panel/healthcapsules') -#add('HeartoftheCity', '/comics/strip/heartofthecity') -#add('Herman', '/comics/panel/herman') -#add('InkPen', '/comics/strip/inkpen') -#add('IntheBleachers', '/comics/panel/inthebleachers') -#add('IntheSticks', '/comics/strip/inthesticks') -#add('JumpStart', '/comics/strip/jumpstart') -#add('KidCity', '/comics/strip/kidcity') -#add('KidSpot', '/comics/panel/kidspot') -#add('KitNCarlyle', '/comics/panel/kitncarlyle') -#add('LaCucaracha', '/comics/strip/lacucaracha') -#add('Lio', '/comics/strip/lio') -#add('Lola', '/comics/strip/lola') -#add('Luann', '/comics/strip/luann') -add('MagicEye', '/comics/strip/magiceye') -#add('MagicinaMinute', '/comics/strip/magicinaminute') -#add('Marmaduke', '/comics/panel/marmaduke') -add('MerlinsWorldofMarvels', '/comics/strip/merlinsworldofmarvels') -#add('ModeratelyConfused', '/comics/panel/moderately-confused') -#add('Monty', '/comics/strip/monty') -#add('MuttAndJeff', '/comics/strip/muttandjeff') -#add('Nancy', '/comics/strip/nancy') -#add('NonSequitur', '/comics/strip/nonsequitur') -add('NonSequiturPanel', '/comics/panel/non-sequitur-panel') -#add('OfftheMark', '/comics/panel/offthemark') -#add('Overboard', '/comics/strip/overboard') -#add('OvertheHedge', '/comics/strip/overthehedge') -#add('Peanuts', '/comics/strip/peanuts') -#add('PearlsBeforeSwine', '/comics/strip/pearlsbeforeswine') -#add('PoochCafe', '/comics/strip/poochcafe') -add('Portuguese', '/comics/category/portuguese') -#add('PricklyCity', '/comics/strip/pricklycity') -#add('RealLifeAdventures', '/comics/panel/reallifeadventures') -#add('RealityCheck', '/comics/panel/realitycheck') -#add('RedandRover', '/comics/strip/redandrover') -#add('RipHaywire', '/comics/strip/riphaywire') -#add('RipleysBelieveItorNot', '/comics/panel/ripleysbelieveitornot') -#add('RoseisRose', '/comics/strip/roseisrose') -#add('RudyPark', '/comics/strip/rudypark') -#add('Shortcuts', '/comics/strip/shortcuts') -#add('SouptoNutz', '/comics/strip/soup-to-nutz') -#add('StoneSoup', '/comics/strip/stonesoup') -#add('TankMcNamara', '/comics/strip/tankmcnamara') -#add('Tarzan', '/comics/strip/tarzan') -#add('Thatababy', '/comics/strip/thatababy') -#add('TheArgyleSweater', '/comics/panel/theargylesweater') -#add('TheBornLoser', '/comics/strip/the-born-loser') -#add('TheBuckets', '/comics/strip/thebuckets') -#add('TheDinetteSet', '/comics/panel/dinetteset') -#add('TheDuplex', '/comics/strip/duplex') -#add('TheElderberries', '/comics/strip/theelderberries') -#add('TheFlyingMcCoys', '/comics/panel/theflyingmccoys') -#add('TheFuscoBrothers', '/comics/strip/thefuscobrothers') -#add('TheGrizzwells', '/comics/strip/thegrizzwells') -#add('TheKnightLife', '/comics/strip/theknightlife') -#add('TomtheDancingBug', '/comics/strip/tomthedancingbug') -#add('UncleArtsFunland', '/comics/strip/uncleartsfunland') -#add('Ziggy', '/comics/panel/ziggy') diff --git a/scripts/generate_json.sh b/scripts/generate_json.sh index 587a9319e..32e588e3a 100755 --- a/scripts/generate_json.sh +++ b/scripts/generate_json.sh @@ -1,8 +1,9 @@ -#!/bin/sh -e +#!/bin/sh +set -e set -u d=$(dirname $0) -for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do +for script in creators gocomics drunkduck keenspot smackjeeves arcamax; do echo "Executing ${script}.py" "${d}/${script}.py" done diff --git a/scripts/mktestscript.sh b/scripts/mktestscript.sh index ea62873fc..7ad9f5e2b 100755 --- a/scripts/mktestscript.sh +++ b/scripts/mktestscript.sh @@ -1,5 +1,6 @@ -#!/bin/sh -e +#!/bin/sh # Copyright (C) 2012 Bastian Kleineidam +set -e set -u # generates a convenience test script from failed tests diff --git a/scripts/universal.py b/scripts/universal.py deleted file mode 100755 index 4585fa78b..000000000 --- a/scripts/universal.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2012-2013 Bastian Kleineidam -""" -Script to get universal comics and save the info in a JSON file for further processing. -""" -from __future__ import print_function -import re -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -from dosagelib.util import getPageContent, asciify, unescape -from dosagelib.scraper import get_scrapers -from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name - -json_file = __file__.replace(".py", ".json") - -#
  • 9 Chickweed Lane -url_matcher = re.compile(r'
  • ([^<]+)') - -# names of comics to exclude -exclude_comics = [ - "BusinessAndFinance", # not a comic - "ComicPanel", # not a comic - "ComicsAZ", # not a comic - "ComicStrip", # not a comic - "Espaol", # not a comic - "Family", # not a comic - "ForKids", # not a comic - "JamesBond", # not a comic - "Men", # not a comic - "NEA", # not a comic - "PeanutsPortuguese", # not found - "Pets", # not a comic - "SundayOnly", # not a comic - "WebExclusive", # not a comic - "Women", # not a comic -] - - -def handle_url(url, res): - """Parse one search result page.""" - print("Parsing", url, file=sys.stderr) - try: - data, baseUrl = getPageContent(url) - except IOError as msg: - print("ERROR:", msg, file=sys.stderr) - return - for match in url_matcher.finditer(data): - shortname = match.group(1) - name = unescape(match.group(2)) - name = asciify(name.replace('&', 'And').replace('@', 'At')) - name = capfirst(name) - if name in exclude_comics: - continue - if contains_case_insensitive(res, name): - # we cannot handle two comics that only differ in case - print("INFO: skipping possible duplicate", name, file=sys.stderr) - continue - res[name] = shortname - - -def get_results(): - """Parse all search result pages.""" - # store info in a dictionary {name -> shortname} - res = {} - handle_url('http://www.universaluclick.com/comics/list', res) - save_result(res, json_file) - - -def has_comic(name): - """Check if comic name already exists.""" - cname = ("Creators/%s" % name).lower() - gname = ("GoComics/%s" % name).lower() - for scraperclass in get_scrapers(): - lname = scraperclass.get_name().lower() - if lname == cname or lname == gname: - return True - return False - - -def print_results(args): - """Print all comics that have at least the given number of minimum comic strips.""" - for name, shortname in sorted(load_result(json_file).items()): - if name in exclude_comics: - continue - if has_comic(name): - prefix = '#' - else: - prefix = '' - print("%sadd(%r, %r)" % (prefix, str(truncate_name(name)), str(shortname))) - - -if __name__ == '__main__': - if len(sys.argv) > 1: - print_results(sys.argv[1:]) - else: - get_results() diff --git a/scripts/update_plugins.sh b/scripts/update_plugins.sh index fdf015e81..8a372fdec 100755 --- a/scripts/update_plugins.sh +++ b/scripts/update_plugins.sh @@ -1,11 +1,12 @@ -#!/bin/sh -e +#!/bin/sh # Copyright (C) 2012-2013 Bastian Kleineidam +set -e set -u mincomics=100 d=$(dirname $0) -for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do +for script in creators gocomics drunkduck keenspot smackjeeves arcamax; do target="${d}/../dosagelib/plugins/${script}.py" echo "Upating $target" "${d}/removeafter.py" "$target" "# DO NOT REMOVE"