diff --git a/dosagelib/plugins/creators.py b/dosagelib/plugins/creators.py
index c297f39d6..f609dc6f5 100644
--- a/dosagelib/plugins/creators.py
+++ b/dosagelib/plugins/creators.py
@@ -1,79 +1,143 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015 Tobias Gruetzmacher
from re import compile
-from ..scraper import make_scraper
+from ..scraper import _ParserScraper
from ..util import tagre
-_imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
+class _Creators(_ParserScraper):
+ url = 'http://www.creators.com/comics/'
+ imageSearch = '//td/a[@class="z"]'
+ prevSearch = '//a[contains(@class,"time_l")]'
+ help = 'Index format: n'
-def add(name, path):
- baseUrl = 'http://www.creators.com'
- classname = 'Creators_%s' % name
- globals()[classname] = make_scraper(classname,
- name = 'Creators/' + name,
- url = baseUrl + path + '.html',
- stripUrl = baseUrl + path + '/%s.html',
- lang = 'es' if name.lower().endswith('spanish') else 'en',
- imageSearch = _imageSearch,
- prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
- tagre("img", "src", r'/img_comics/arrow_l\.gif')),
- help = 'Index format: n',
- )
+ @classmethod
+ def getName(cls):
+ return 'Creators/' + cls.__name__
-# do not edit anything below since these entries are generated from scripts/update.sh
+ @classmethod
+ def starter(cls):
+ return cls.url + cls.path + '.html'
+
+ def getIndexStripUrl(self, index):
+ return self.url + self.path + '/%s.html' % index
+
+class _CreatorsEs(_Creators):
+ lang = 'es'
+
+ def shouldSkipUrl(self, url, data):
+ """Images are 404..."""
+ return url in (
+ self.url + 'heathcliff-spanish/139736.html'
+ )
+
+# Some comics are not listed on the "all" page (too old?)
+class WinnieThePooh(_Creators):
+ path = u'winnie-the-pooh'
+
+class Recess(_Creators):
+ path = u'recess'
+
+class NaturalSelection(_Creators):
+ path = u'natural-selection'
+
+class FlightDeck(_Creators):
+ path = u'flight-deck'
+
+# do not edit anything below since these entries are generated from scripts/update_plugins.sh
# DO NOT REMOVE
-# duplicate of gocomics add('Agnes', '/comics/agnes')
-# duplicate of gocomics add('AndyCapp', '/comics/andy-capp')
-add('Archie', '/comics/archie')
-add('ArchieinSpanish', '/comics/archie-spanish')
-# duplicate of gocomics add('AskShagg', '/comics/ask-shagg')
-# duplicate of gocomics add('BC', '/comics/bc')
-add('BCinSpanish', '/comics/bc-spanish')
-# duplicate of gocomics add('BallardStreet', '/comics/ballard-street')
-add('CafeconLeche', '/comics/cafe-con-leche')
-# duplicate of gocomics add('ChuckleBros', '/comics/chuckle-bros')
-# duplicate of gocomics add('DaddysHome', '/comics/daddys-home')
-# duplicate of gocomics add('DiamondLil', '/comics/diamond-lil')
-# duplicate of gocomics add('DogEatDoug', '/comics/dog-eat-doug')
-# duplicate of gocomics add('DogsofCKennel', '/comics/dogs-of-c-kennel')
-add('DonaldDuck', '/comics/donald-duck')
-add('Flare', '/comics/flare')
-add('FlightDeck', '/comics/flight-deck')
-# duplicate of gocomics add('FloandFriends', '/comics/flo-and-friends')
-# duplicate of gocomics add('ForHeavensSake', '/comics/for-heavens-sake')
-# duplicate of gocomics add('FreeRange', '/comics/free-range')
-add('GirlsAndSports', '/comics/girls-and-sports')
-add('GirlsandSportsinSpanish', '/comics/girls-and-sports-spanish')
-# duplicate of gocomics add('Heathcliff', '/comics/heathcliff')
-add('HeathcliffinSpanish', '/comics/heathcliff-spanish')
-# duplicate of gocomics add('HerbandJamaal', '/comics/herb-and-jamaal')
-add('HomeOffice', '/comics/stay-at-home-dad')
-add('HopeAndDeath', '/comics/hope-and-death')
-# duplicate of gocomics add('LibertyMeadows', '/comics/liberty-meadows')
-add('LongStoryShort', '/comics/long-story-short')
-add('MickeyMouse', '/comics/mickey-mouse')
-# duplicate of gocomics add('Momma', '/comics/momma')
-# duplicate of gocomics add('NestHeads', '/comics/nest-heads')
-add('OffCenter', '/comics/off-center')
-# duplicate of gocomics add('OnaClaireDay', '/comics/on-a-claire-day')
-# duplicate of gocomics add('OneBigHappy', '/comics/one-big-happy')
-add('Recess', '/comics/recess')
-# duplicate of gocomics add('Rubes', '/comics/rubes')
-add('Rugrats', '/comics/rugrats')
-add('RugratsinSpanish', '/comics/rugrats-spanish')
-# duplicate of gocomics add('ScaryGary', '/comics/scary-gary')
-# duplicate of gocomics add('SpeedBump', '/comics/speed-bump')
-# duplicate of gocomics add('StrangeBrew', '/comics/strange-brew')
-# duplicate of gocomics add('TheBarn', '/comics/the-barn')
-# duplicate of gocomics add('TheDinetteSet', '/comics/dinette-set')
-# duplicate of gocomics add('TheMeaningofLila', '/comics/meaning-of-lila')
-# duplicate of gocomics add('TheOtherCoast', '/comics/the-other-coast')
-add('TheQuigmans', '/comics/the-quigmans')
-add('TheWizardofIdinSpanish', '/comics/wizard-of-id-spanish')
-# duplicate of gocomics add('ThinLines', '/comics/thin-lines')
-# duplicate of gocomics add('WeePals', '/comics/wee-pals')
-# duplicate of gocomics add('WizardofId', '/comics/wizard-of-id')
-# duplicate of gocomics add('WorkingitOut', '/comics/working-it-out')
-# duplicate of gocomics add('ZackHill', '/comics/zack-hill')
+# Agnes has a duplicate in gocomics
+# AndyCapp has a duplicate in gocomics
+class Archie(_Creators):
+ path = u'archie'
+
+class ArchieinSpanish(_CreatorsEs):
+ path = u'archie-spanish'
+
+# AskShagg has a duplicate in gocomics
+# BC has a duplicate in gocomics
+class BCinSpanish(_CreatorsEs):
+ path = u'bc-spanish'
+
+# BallardStreet has a duplicate in gocomics
+class CafeconLeche(_Creators):
+ path = u'cafe-con-leche'
+
+# ChuckleBros has a duplicate in gocomics
+# DaddysHome has a duplicate in gocomics
+# DiamondLil has a duplicate in gocomics
+# DogEatDoug has a duplicate in gocomics
+# DogsofCKennel has a duplicate in gocomics
+class DonaldDuck(_Creators):
+ path = u'donald-duck'
+
+class Doodles(_Creators):
+ path = u'doodles'
+
+class Flare(_Creators):
+ path = u'flare'
+
+class FlightDeck(_Creators):
+ path = u'flight-deck'
+
+# FloandFriends has a duplicate in gocomics
+# ForHeavensSake has a duplicate in gocomics
+# FreeRange has a duplicate in gocomics
+class GirlsAndSports(_Creators):
+ path = u'girls-and-sports'
+
+class GirlsandSportsinSpanish(_CreatorsEs):
+ path = u'girls-and-sports-spanish'
+
+# Heathcliff has a duplicate in gocomics
+class HeathcliffinSpanish(_CreatorsEs):
+ path = u'heathcliff-spanish'
+
+# HerbandJamaal has a duplicate in gocomics
+class HomeOffice(_Creators):
+ path = u'stay-at-home-dad'
+
+class HopeAndDeath(_Creators):
+ path = u'hope-and-death'
+
+# LibertyMeadows has a duplicate in gocomics
+class LongStoryShort(_Creators):
+ path = u'long-story-short'
+
+class MickeyMouse(_Creators):
+ path = u'mickey-mouse'
+
+# Momma has a duplicate in gocomics
+# NestHeads has a duplicate in gocomics
+class OffCenter(_Creators):
+ path = u'off-center'
+
+# OnaClaireDay has a duplicate in gocomics
+# OneBigHappy has a duplicate in gocomics
+# Rubes has a duplicate in gocomics
+class Rugrats(_Creators):
+ path = u'rugrats'
+
+class RugratsinSpanish(_CreatorsEs):
+ path = u'rugrats-spanish'
+
+# ScaryGary has a duplicate in gocomics
+# SpeedBump has a duplicate in gocomics
+# StrangeBrew has a duplicate in gocomics
+# TheBarn has a duplicate in gocomics
+# TheDinetteSet has a duplicate in gocomics
+# TheMeaningofLila has a duplicate in gocomics
+# TheOtherCoast has a duplicate in gocomics
+class TheQuigmans(_Creators):
+ path = u'the-quigmans'
+
+class TheWizardofIdinSpanish(_CreatorsEs):
+ path = u'wizard-of-id-spanish'
+
+# ThinLines has a duplicate in gocomics
+# WeePals has a duplicate in gocomics
+# WizardofId has a duplicate in gocomics
+# WorkingitOut has a duplicate in gocomics
+# ZackHill has a duplicate in gocomics
diff --git a/scripts/creators.py b/scripts/creators.py
index 88b4759ce..6e7ab816c 100755
--- a/scripts/creators.py
+++ b/scripts/creators.py
@@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
json_file = __file__.replace(".py", ".json")
-url_matcher = re.compile(tagre("a", "href", r'(/comics/[^/]+)\.html') + r'([^<]+)')
+url_matcher = re.compile(tagre("a", "href", r'/comics/([^/]+)\.html') + r'([^<]+)')
# names of comics to exclude
exclude_comics = [
@@ -67,16 +67,15 @@ def print_results(args):
"""Print comics."""
min_comics, filename = args
with codecs.open(filename, 'a', 'utf-8') as fp:
- for name, url in sorted(load_result(json_file).items()):
+ for name, path in sorted(load_result(json_file).items()):
if name in exclude_comics:
continue
+ lang = 'Es' if name.lower().endswith('spanish') else ''
if has_gocomics_comic(name):
- prefix = u'# duplicate of gocomics '
+ fp.write(u'# %s has a duplicate in gocomics\n' % truncate_name(name))
else:
- prefix = u''
- fp.write(u"%sadd(%r, %r)\n" % (
- prefix, str(truncate_name(name)), str(url))
- )
+ fp.write(u"class %s(_Creators%s):\n path = %r\n\n" %
+ (truncate_name(name), lang, path))
if __name__ == '__main__':