Remove make_scraper magic from creators module.

This commit is contained in:
Tobias Gruetzmacher 2015-11-04 23:43:31 +01:00
parent 94470d564c
commit 7f7a69818b
2 changed files with 139 additions and 76 deletions

View file

@ -1,79 +1,143 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015 Tobias Gruetzmacher
from re import compile
from ..scraper import make_scraper
from ..scraper import _ParserScraper
from ..util import tagre
_imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
class _Creators(_ParserScraper):
url = 'http://www.creators.com/comics/'
imageSearch = '//td/a[@class="z"]'
prevSearch = '//a[contains(@class,"time_l")]'
help = 'Index format: n'
def add(name, path):
baseUrl = 'http://www.creators.com'
classname = 'Creators_%s' % name
globals()[classname] = make_scraper(classname,
name = 'Creators/' + name,
url = baseUrl + path + '.html',
stripUrl = baseUrl + path + '/%s.html',
lang = 'es' if name.lower().endswith('spanish') else 'en',
imageSearch = _imageSearch,
prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
help = 'Index format: n',
@classmethod
def getName(cls):
return 'Creators/' + cls.__name__
@classmethod
def starter(cls):
return cls.url + cls.path + '.html'
def getIndexStripUrl(self, index):
return self.url + self.path + '/%s.html' % index
class _CreatorsEs(_Creators):
lang = 'es'
def shouldSkipUrl(self, url, data):
"""Images are 404..."""
return url in (
self.url + 'heathcliff-spanish/139736.html'
)
# do not edit anything below since these entries are generated from scripts/update.sh
# Some comics are not listed on the "all" page (too old?)
class WinnieThePooh(_Creators):
path = u'winnie-the-pooh'
class Recess(_Creators):
path = u'recess'
class NaturalSelection(_Creators):
path = u'natural-selection'
class FlightDeck(_Creators):
path = u'flight-deck'
# do not edit anything below since these entries are generated from scripts/update_plugins.sh
# DO NOT REMOVE
# duplicate of gocomics add('Agnes', '/comics/agnes')
# duplicate of gocomics add('AndyCapp', '/comics/andy-capp')
add('Archie', '/comics/archie')
add('ArchieinSpanish', '/comics/archie-spanish')
# duplicate of gocomics add('AskShagg', '/comics/ask-shagg')
# duplicate of gocomics add('BC', '/comics/bc')
add('BCinSpanish', '/comics/bc-spanish')
# duplicate of gocomics add('BallardStreet', '/comics/ballard-street')
add('CafeconLeche', '/comics/cafe-con-leche')
# duplicate of gocomics add('ChuckleBros', '/comics/chuckle-bros')
# duplicate of gocomics add('DaddysHome', '/comics/daddys-home')
# duplicate of gocomics add('DiamondLil', '/comics/diamond-lil')
# duplicate of gocomics add('DogEatDoug', '/comics/dog-eat-doug')
# duplicate of gocomics add('DogsofCKennel', '/comics/dogs-of-c-kennel')
add('DonaldDuck', '/comics/donald-duck')
add('Flare', '/comics/flare')
add('FlightDeck', '/comics/flight-deck')
# duplicate of gocomics add('FloandFriends', '/comics/flo-and-friends')
# duplicate of gocomics add('ForHeavensSake', '/comics/for-heavens-sake')
# duplicate of gocomics add('FreeRange', '/comics/free-range')
add('GirlsAndSports', '/comics/girls-and-sports')
add('GirlsandSportsinSpanish', '/comics/girls-and-sports-spanish')
# duplicate of gocomics add('Heathcliff', '/comics/heathcliff')
add('HeathcliffinSpanish', '/comics/heathcliff-spanish')
# duplicate of gocomics add('HerbandJamaal', '/comics/herb-and-jamaal')
add('HomeOffice', '/comics/stay-at-home-dad')
add('HopeAndDeath', '/comics/hope-and-death')
# duplicate of gocomics add('LibertyMeadows', '/comics/liberty-meadows')
add('LongStoryShort', '/comics/long-story-short')
add('MickeyMouse', '/comics/mickey-mouse')
# duplicate of gocomics add('Momma', '/comics/momma')
# duplicate of gocomics add('NestHeads', '/comics/nest-heads')
add('OffCenter', '/comics/off-center')
# duplicate of gocomics add('OnaClaireDay', '/comics/on-a-claire-day')
# duplicate of gocomics add('OneBigHappy', '/comics/one-big-happy')
add('Recess', '/comics/recess')
# duplicate of gocomics add('Rubes', '/comics/rubes')
add('Rugrats', '/comics/rugrats')
add('RugratsinSpanish', '/comics/rugrats-spanish')
# duplicate of gocomics add('ScaryGary', '/comics/scary-gary')
# duplicate of gocomics add('SpeedBump', '/comics/speed-bump')
# duplicate of gocomics add('StrangeBrew', '/comics/strange-brew')
# duplicate of gocomics add('TheBarn', '/comics/the-barn')
# duplicate of gocomics add('TheDinetteSet', '/comics/dinette-set')
# duplicate of gocomics add('TheMeaningofLila', '/comics/meaning-of-lila')
# duplicate of gocomics add('TheOtherCoast', '/comics/the-other-coast')
add('TheQuigmans', '/comics/the-quigmans')
add('TheWizardofIdinSpanish', '/comics/wizard-of-id-spanish')
# duplicate of gocomics add('ThinLines', '/comics/thin-lines')
# duplicate of gocomics add('WeePals', '/comics/wee-pals')
# duplicate of gocomics add('WizardofId', '/comics/wizard-of-id')
# duplicate of gocomics add('WorkingitOut', '/comics/working-it-out')
# duplicate of gocomics add('ZackHill', '/comics/zack-hill')
# Agnes has a duplicate in gocomics
# AndyCapp has a duplicate in gocomics
class Archie(_Creators):
path = u'archie'
class ArchieinSpanish(_CreatorsEs):
path = u'archie-spanish'
# AskShagg has a duplicate in gocomics
# BC has a duplicate in gocomics
class BCinSpanish(_CreatorsEs):
path = u'bc-spanish'
# BallardStreet has a duplicate in gocomics
class CafeconLeche(_Creators):
path = u'cafe-con-leche'
# ChuckleBros has a duplicate in gocomics
# DaddysHome has a duplicate in gocomics
# DiamondLil has a duplicate in gocomics
# DogEatDoug has a duplicate in gocomics
# DogsofCKennel has a duplicate in gocomics
class DonaldDuck(_Creators):
path = u'donald-duck'
class Doodles(_Creators):
path = u'doodles'
class Flare(_Creators):
path = u'flare'
class FlightDeck(_Creators):
path = u'flight-deck'
# FloandFriends has a duplicate in gocomics
# ForHeavensSake has a duplicate in gocomics
# FreeRange has a duplicate in gocomics
class GirlsAndSports(_Creators):
path = u'girls-and-sports'
class GirlsandSportsinSpanish(_CreatorsEs):
path = u'girls-and-sports-spanish'
# Heathcliff has a duplicate in gocomics
class HeathcliffinSpanish(_CreatorsEs):
path = u'heathcliff-spanish'
# HerbandJamaal has a duplicate in gocomics
class HomeOffice(_Creators):
path = u'stay-at-home-dad'
class HopeAndDeath(_Creators):
path = u'hope-and-death'
# LibertyMeadows has a duplicate in gocomics
class LongStoryShort(_Creators):
path = u'long-story-short'
class MickeyMouse(_Creators):
path = u'mickey-mouse'
# Momma has a duplicate in gocomics
# NestHeads has a duplicate in gocomics
class OffCenter(_Creators):
path = u'off-center'
# OnaClaireDay has a duplicate in gocomics
# OneBigHappy has a duplicate in gocomics
# Rubes has a duplicate in gocomics
class Rugrats(_Creators):
path = u'rugrats'
class RugratsinSpanish(_CreatorsEs):
path = u'rugrats-spanish'
# ScaryGary has a duplicate in gocomics
# SpeedBump has a duplicate in gocomics
# StrangeBrew has a duplicate in gocomics
# TheBarn has a duplicate in gocomics
# TheDinetteSet has a duplicate in gocomics
# TheMeaningofLila has a duplicate in gocomics
# TheOtherCoast has a duplicate in gocomics
class TheQuigmans(_Creators):
path = u'the-quigmans'
class TheWizardofIdinSpanish(_CreatorsEs):
path = u'wizard-of-id-spanish'
# ThinLines has a duplicate in gocomics
# WeePals has a duplicate in gocomics
# WizardofId has a duplicate in gocomics
# WorkingitOut has a duplicate in gocomics
# ZackHill has a duplicate in gocomics

View file

@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
json_file = __file__.replace(".py", ".json")
url_matcher = re.compile(tagre("a", "href", r'(/comics/[^/]+)\.html') + r'<strong>([^<]+)</strong>')
url_matcher = re.compile(tagre("a", "href", r'/comics/([^/]+)\.html') + r'<strong>([^<]+)</strong>')
# names of comics to exclude
exclude_comics = [
@ -67,16 +67,15 @@ def print_results(args):
"""Print comics."""
min_comics, filename = args
with codecs.open(filename, 'a', 'utf-8') as fp:
for name, url in sorted(load_result(json_file).items()):
for name, path in sorted(load_result(json_file).items()):
if name in exclude_comics:
continue
lang = 'Es' if name.lower().endswith('spanish') else ''
if has_gocomics_comic(name):
prefix = u'# duplicate of gocomics '
fp.write(u'# %s has a duplicate in gocomics\n' % truncate_name(name))
else:
prefix = u''
fp.write(u"%sadd(%r, %r)\n" % (
prefix, str(truncate_name(name)), str(url))
)
fp.write(u"class %s(_Creators%s):\n path = %r\n\n" %
(truncate_name(name), lang, path))
if __name__ == '__main__':