Remove make_scraper magic from creators module.
This commit is contained in:
parent
94470d564c
commit
7f7a69818b
2 changed files with 139 additions and 76 deletions
|
@ -1,79 +1,143 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015 Tobias Gruetzmacher
|
||||
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..scraper import _ParserScraper
|
||||
from ..util import tagre
|
||||
|
||||
_imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
|
||||
class _Creators(_ParserScraper):
|
||||
url = 'http://www.creators.com/comics/'
|
||||
imageSearch = '//td/a[@class="z"]'
|
||||
prevSearch = '//a[contains(@class,"time_l")]'
|
||||
help = 'Index format: n'
|
||||
|
||||
def add(name, path):
|
||||
baseUrl = 'http://www.creators.com'
|
||||
classname = 'Creators_%s' % name
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name = 'Creators/' + name,
|
||||
url = baseUrl + path + '.html',
|
||||
stripUrl = baseUrl + path + '/%s.html',
|
||||
lang = 'es' if name.lower().endswith('spanish') else 'en',
|
||||
imageSearch = _imageSearch,
|
||||
prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
|
||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
||||
help = 'Index format: n',
|
||||
@classmethod
|
||||
def getName(cls):
|
||||
return 'Creators/' + cls.__name__
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
return cls.url + cls.path + '.html'
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
return self.url + self.path + '/%s.html' % index
|
||||
|
||||
class _CreatorsEs(_Creators):
|
||||
lang = 'es'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Images are 404..."""
|
||||
return url in (
|
||||
self.url + 'heathcliff-spanish/139736.html'
|
||||
)
|
||||
|
||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
||||
# Some comics are not listed on the "all" page (too old?)
|
||||
class WinnieThePooh(_Creators):
|
||||
path = u'winnie-the-pooh'
|
||||
|
||||
class Recess(_Creators):
|
||||
path = u'recess'
|
||||
|
||||
class NaturalSelection(_Creators):
|
||||
path = u'natural-selection'
|
||||
|
||||
class FlightDeck(_Creators):
|
||||
path = u'flight-deck'
|
||||
|
||||
# do not edit anything below since these entries are generated from scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# duplicate of gocomics add('Agnes', '/comics/agnes')
|
||||
# duplicate of gocomics add('AndyCapp', '/comics/andy-capp')
|
||||
add('Archie', '/comics/archie')
|
||||
add('ArchieinSpanish', '/comics/archie-spanish')
|
||||
# duplicate of gocomics add('AskShagg', '/comics/ask-shagg')
|
||||
# duplicate of gocomics add('BC', '/comics/bc')
|
||||
add('BCinSpanish', '/comics/bc-spanish')
|
||||
# duplicate of gocomics add('BallardStreet', '/comics/ballard-street')
|
||||
add('CafeconLeche', '/comics/cafe-con-leche')
|
||||
# duplicate of gocomics add('ChuckleBros', '/comics/chuckle-bros')
|
||||
# duplicate of gocomics add('DaddysHome', '/comics/daddys-home')
|
||||
# duplicate of gocomics add('DiamondLil', '/comics/diamond-lil')
|
||||
# duplicate of gocomics add('DogEatDoug', '/comics/dog-eat-doug')
|
||||
# duplicate of gocomics add('DogsofCKennel', '/comics/dogs-of-c-kennel')
|
||||
add('DonaldDuck', '/comics/donald-duck')
|
||||
add('Flare', '/comics/flare')
|
||||
add('FlightDeck', '/comics/flight-deck')
|
||||
# duplicate of gocomics add('FloandFriends', '/comics/flo-and-friends')
|
||||
# duplicate of gocomics add('ForHeavensSake', '/comics/for-heavens-sake')
|
||||
# duplicate of gocomics add('FreeRange', '/comics/free-range')
|
||||
add('GirlsAndSports', '/comics/girls-and-sports')
|
||||
add('GirlsandSportsinSpanish', '/comics/girls-and-sports-spanish')
|
||||
# duplicate of gocomics add('Heathcliff', '/comics/heathcliff')
|
||||
add('HeathcliffinSpanish', '/comics/heathcliff-spanish')
|
||||
# duplicate of gocomics add('HerbandJamaal', '/comics/herb-and-jamaal')
|
||||
add('HomeOffice', '/comics/stay-at-home-dad')
|
||||
add('HopeAndDeath', '/comics/hope-and-death')
|
||||
# duplicate of gocomics add('LibertyMeadows', '/comics/liberty-meadows')
|
||||
add('LongStoryShort', '/comics/long-story-short')
|
||||
add('MickeyMouse', '/comics/mickey-mouse')
|
||||
# duplicate of gocomics add('Momma', '/comics/momma')
|
||||
# duplicate of gocomics add('NestHeads', '/comics/nest-heads')
|
||||
add('OffCenter', '/comics/off-center')
|
||||
# duplicate of gocomics add('OnaClaireDay', '/comics/on-a-claire-day')
|
||||
# duplicate of gocomics add('OneBigHappy', '/comics/one-big-happy')
|
||||
add('Recess', '/comics/recess')
|
||||
# duplicate of gocomics add('Rubes', '/comics/rubes')
|
||||
add('Rugrats', '/comics/rugrats')
|
||||
add('RugratsinSpanish', '/comics/rugrats-spanish')
|
||||
# duplicate of gocomics add('ScaryGary', '/comics/scary-gary')
|
||||
# duplicate of gocomics add('SpeedBump', '/comics/speed-bump')
|
||||
# duplicate of gocomics add('StrangeBrew', '/comics/strange-brew')
|
||||
# duplicate of gocomics add('TheBarn', '/comics/the-barn')
|
||||
# duplicate of gocomics add('TheDinetteSet', '/comics/dinette-set')
|
||||
# duplicate of gocomics add('TheMeaningofLila', '/comics/meaning-of-lila')
|
||||
# duplicate of gocomics add('TheOtherCoast', '/comics/the-other-coast')
|
||||
add('TheQuigmans', '/comics/the-quigmans')
|
||||
add('TheWizardofIdinSpanish', '/comics/wizard-of-id-spanish')
|
||||
# duplicate of gocomics add('ThinLines', '/comics/thin-lines')
|
||||
# duplicate of gocomics add('WeePals', '/comics/wee-pals')
|
||||
# duplicate of gocomics add('WizardofId', '/comics/wizard-of-id')
|
||||
# duplicate of gocomics add('WorkingitOut', '/comics/working-it-out')
|
||||
# duplicate of gocomics add('ZackHill', '/comics/zack-hill')
|
||||
# Agnes has a duplicate in gocomics
|
||||
# AndyCapp has a duplicate in gocomics
|
||||
class Archie(_Creators):
|
||||
path = u'archie'
|
||||
|
||||
class ArchieinSpanish(_CreatorsEs):
|
||||
path = u'archie-spanish'
|
||||
|
||||
# AskShagg has a duplicate in gocomics
|
||||
# BC has a duplicate in gocomics
|
||||
class BCinSpanish(_CreatorsEs):
|
||||
path = u'bc-spanish'
|
||||
|
||||
# BallardStreet has a duplicate in gocomics
|
||||
class CafeconLeche(_Creators):
|
||||
path = u'cafe-con-leche'
|
||||
|
||||
# ChuckleBros has a duplicate in gocomics
|
||||
# DaddysHome has a duplicate in gocomics
|
||||
# DiamondLil has a duplicate in gocomics
|
||||
# DogEatDoug has a duplicate in gocomics
|
||||
# DogsofCKennel has a duplicate in gocomics
|
||||
class DonaldDuck(_Creators):
|
||||
path = u'donald-duck'
|
||||
|
||||
class Doodles(_Creators):
|
||||
path = u'doodles'
|
||||
|
||||
class Flare(_Creators):
|
||||
path = u'flare'
|
||||
|
||||
class FlightDeck(_Creators):
|
||||
path = u'flight-deck'
|
||||
|
||||
# FloandFriends has a duplicate in gocomics
|
||||
# ForHeavensSake has a duplicate in gocomics
|
||||
# FreeRange has a duplicate in gocomics
|
||||
class GirlsAndSports(_Creators):
|
||||
path = u'girls-and-sports'
|
||||
|
||||
class GirlsandSportsinSpanish(_CreatorsEs):
|
||||
path = u'girls-and-sports-spanish'
|
||||
|
||||
# Heathcliff has a duplicate in gocomics
|
||||
class HeathcliffinSpanish(_CreatorsEs):
|
||||
path = u'heathcliff-spanish'
|
||||
|
||||
# HerbandJamaal has a duplicate in gocomics
|
||||
class HomeOffice(_Creators):
|
||||
path = u'stay-at-home-dad'
|
||||
|
||||
class HopeAndDeath(_Creators):
|
||||
path = u'hope-and-death'
|
||||
|
||||
# LibertyMeadows has a duplicate in gocomics
|
||||
class LongStoryShort(_Creators):
|
||||
path = u'long-story-short'
|
||||
|
||||
class MickeyMouse(_Creators):
|
||||
path = u'mickey-mouse'
|
||||
|
||||
# Momma has a duplicate in gocomics
|
||||
# NestHeads has a duplicate in gocomics
|
||||
class OffCenter(_Creators):
|
||||
path = u'off-center'
|
||||
|
||||
# OnaClaireDay has a duplicate in gocomics
|
||||
# OneBigHappy has a duplicate in gocomics
|
||||
# Rubes has a duplicate in gocomics
|
||||
class Rugrats(_Creators):
|
||||
path = u'rugrats'
|
||||
|
||||
class RugratsinSpanish(_CreatorsEs):
|
||||
path = u'rugrats-spanish'
|
||||
|
||||
# ScaryGary has a duplicate in gocomics
|
||||
# SpeedBump has a duplicate in gocomics
|
||||
# StrangeBrew has a duplicate in gocomics
|
||||
# TheBarn has a duplicate in gocomics
|
||||
# TheDinetteSet has a duplicate in gocomics
|
||||
# TheMeaningofLila has a duplicate in gocomics
|
||||
# TheOtherCoast has a duplicate in gocomics
|
||||
class TheQuigmans(_Creators):
|
||||
path = u'the-quigmans'
|
||||
|
||||
class TheWizardofIdinSpanish(_CreatorsEs):
|
||||
path = u'wizard-of-id-spanish'
|
||||
|
||||
# ThinLines has a duplicate in gocomics
|
||||
# WeePals has a duplicate in gocomics
|
||||
# WizardofId has a duplicate in gocomics
|
||||
# WorkingitOut has a duplicate in gocomics
|
||||
# ZackHill has a duplicate in gocomics
|
||||
|
|
|
@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
|
|||
|
||||
json_file = __file__.replace(".py", ".json")
|
||||
|
||||
url_matcher = re.compile(tagre("a", "href", r'(/comics/[^/]+)\.html') + r'<strong>([^<]+)</strong>')
|
||||
url_matcher = re.compile(tagre("a", "href", r'/comics/([^/]+)\.html') + r'<strong>([^<]+)</strong>')
|
||||
|
||||
# names of comics to exclude
|
||||
exclude_comics = [
|
||||
|
@ -67,16 +67,15 @@ def print_results(args):
|
|||
"""Print comics."""
|
||||
min_comics, filename = args
|
||||
with codecs.open(filename, 'a', 'utf-8') as fp:
|
||||
for name, url in sorted(load_result(json_file).items()):
|
||||
for name, path in sorted(load_result(json_file).items()):
|
||||
if name in exclude_comics:
|
||||
continue
|
||||
lang = 'Es' if name.lower().endswith('spanish') else ''
|
||||
if has_gocomics_comic(name):
|
||||
prefix = u'# duplicate of gocomics '
|
||||
fp.write(u'# %s has a duplicate in gocomics\n' % truncate_name(name))
|
||||
else:
|
||||
prefix = u''
|
||||
fp.write(u"%sadd(%r, %r)\n" % (
|
||||
prefix, str(truncate_name(name)), str(url))
|
||||
)
|
||||
fp.write(u"class %s(_Creators%s):\n path = %r\n\n" %
|
||||
(truncate_name(name), lang, path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in a new issue