Remove make_scraper magic from creators module.
This commit is contained in:
parent
94470d564c
commit
7f7a69818b
2 changed files with 139 additions and 76 deletions
|
@ -1,79 +1,143 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015 Tobias Gruetzmacher
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
from ..scraper import make_scraper
|
from ..scraper import _ParserScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
_imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
|
class _Creators(_ParserScraper):
|
||||||
|
url = 'http://www.creators.com/comics/'
|
||||||
|
imageSearch = '//td/a[@class="z"]'
|
||||||
|
prevSearch = '//a[contains(@class,"time_l")]'
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
def add(name, path):
|
@classmethod
|
||||||
baseUrl = 'http://www.creators.com'
|
def getName(cls):
|
||||||
classname = 'Creators_%s' % name
|
return 'Creators/' + cls.__name__
|
||||||
globals()[classname] = make_scraper(classname,
|
|
||||||
name = 'Creators/' + name,
|
|
||||||
url = baseUrl + path + '.html',
|
|
||||||
stripUrl = baseUrl + path + '/%s.html',
|
|
||||||
lang = 'es' if name.lower().endswith('spanish') else 'en',
|
|
||||||
imageSearch = _imageSearch,
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
|
|
||||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
|
||||||
help = 'Index format: n',
|
|
||||||
)
|
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
@classmethod
|
||||||
|
def starter(cls):
|
||||||
|
return cls.url + cls.path + '.html'
|
||||||
|
|
||||||
|
def getIndexStripUrl(self, index):
|
||||||
|
return self.url + self.path + '/%s.html' % index
|
||||||
|
|
||||||
|
class _CreatorsEs(_Creators):
|
||||||
|
lang = 'es'
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
"""Images are 404..."""
|
||||||
|
return url in (
|
||||||
|
self.url + 'heathcliff-spanish/139736.html'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Some comics are not listed on the "all" page (too old?)
|
||||||
|
class WinnieThePooh(_Creators):
|
||||||
|
path = u'winnie-the-pooh'
|
||||||
|
|
||||||
|
class Recess(_Creators):
|
||||||
|
path = u'recess'
|
||||||
|
|
||||||
|
class NaturalSelection(_Creators):
|
||||||
|
path = u'natural-selection'
|
||||||
|
|
||||||
|
class FlightDeck(_Creators):
|
||||||
|
path = u'flight-deck'
|
||||||
|
|
||||||
|
# do not edit anything below since these entries are generated from scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# DO NOT REMOVE
|
||||||
# duplicate of gocomics add('Agnes', '/comics/agnes')
|
# Agnes has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('AndyCapp', '/comics/andy-capp')
|
# AndyCapp has a duplicate in gocomics
|
||||||
add('Archie', '/comics/archie')
|
class Archie(_Creators):
|
||||||
add('ArchieinSpanish', '/comics/archie-spanish')
|
path = u'archie'
|
||||||
# duplicate of gocomics add('AskShagg', '/comics/ask-shagg')
|
|
||||||
# duplicate of gocomics add('BC', '/comics/bc')
|
class ArchieinSpanish(_CreatorsEs):
|
||||||
add('BCinSpanish', '/comics/bc-spanish')
|
path = u'archie-spanish'
|
||||||
# duplicate of gocomics add('BallardStreet', '/comics/ballard-street')
|
|
||||||
add('CafeconLeche', '/comics/cafe-con-leche')
|
# AskShagg has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('ChuckleBros', '/comics/chuckle-bros')
|
# BC has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('DaddysHome', '/comics/daddys-home')
|
class BCinSpanish(_CreatorsEs):
|
||||||
# duplicate of gocomics add('DiamondLil', '/comics/diamond-lil')
|
path = u'bc-spanish'
|
||||||
# duplicate of gocomics add('DogEatDoug', '/comics/dog-eat-doug')
|
|
||||||
# duplicate of gocomics add('DogsofCKennel', '/comics/dogs-of-c-kennel')
|
# BallardStreet has a duplicate in gocomics
|
||||||
add('DonaldDuck', '/comics/donald-duck')
|
class CafeconLeche(_Creators):
|
||||||
add('Flare', '/comics/flare')
|
path = u'cafe-con-leche'
|
||||||
add('FlightDeck', '/comics/flight-deck')
|
|
||||||
# duplicate of gocomics add('FloandFriends', '/comics/flo-and-friends')
|
# ChuckleBros has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('ForHeavensSake', '/comics/for-heavens-sake')
|
# DaddysHome has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('FreeRange', '/comics/free-range')
|
# DiamondLil has a duplicate in gocomics
|
||||||
add('GirlsAndSports', '/comics/girls-and-sports')
|
# DogEatDoug has a duplicate in gocomics
|
||||||
add('GirlsandSportsinSpanish', '/comics/girls-and-sports-spanish')
|
# DogsofCKennel has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('Heathcliff', '/comics/heathcliff')
|
class DonaldDuck(_Creators):
|
||||||
add('HeathcliffinSpanish', '/comics/heathcliff-spanish')
|
path = u'donald-duck'
|
||||||
# duplicate of gocomics add('HerbandJamaal', '/comics/herb-and-jamaal')
|
|
||||||
add('HomeOffice', '/comics/stay-at-home-dad')
|
class Doodles(_Creators):
|
||||||
add('HopeAndDeath', '/comics/hope-and-death')
|
path = u'doodles'
|
||||||
# duplicate of gocomics add('LibertyMeadows', '/comics/liberty-meadows')
|
|
||||||
add('LongStoryShort', '/comics/long-story-short')
|
class Flare(_Creators):
|
||||||
add('MickeyMouse', '/comics/mickey-mouse')
|
path = u'flare'
|
||||||
# duplicate of gocomics add('Momma', '/comics/momma')
|
|
||||||
# duplicate of gocomics add('NestHeads', '/comics/nest-heads')
|
class FlightDeck(_Creators):
|
||||||
add('OffCenter', '/comics/off-center')
|
path = u'flight-deck'
|
||||||
# duplicate of gocomics add('OnaClaireDay', '/comics/on-a-claire-day')
|
|
||||||
# duplicate of gocomics add('OneBigHappy', '/comics/one-big-happy')
|
# FloandFriends has a duplicate in gocomics
|
||||||
add('Recess', '/comics/recess')
|
# ForHeavensSake has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('Rubes', '/comics/rubes')
|
# FreeRange has a duplicate in gocomics
|
||||||
add('Rugrats', '/comics/rugrats')
|
class GirlsAndSports(_Creators):
|
||||||
add('RugratsinSpanish', '/comics/rugrats-spanish')
|
path = u'girls-and-sports'
|
||||||
# duplicate of gocomics add('ScaryGary', '/comics/scary-gary')
|
|
||||||
# duplicate of gocomics add('SpeedBump', '/comics/speed-bump')
|
class GirlsandSportsinSpanish(_CreatorsEs):
|
||||||
# duplicate of gocomics add('StrangeBrew', '/comics/strange-brew')
|
path = u'girls-and-sports-spanish'
|
||||||
# duplicate of gocomics add('TheBarn', '/comics/the-barn')
|
|
||||||
# duplicate of gocomics add('TheDinetteSet', '/comics/dinette-set')
|
# Heathcliff has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('TheMeaningofLila', '/comics/meaning-of-lila')
|
class HeathcliffinSpanish(_CreatorsEs):
|
||||||
# duplicate of gocomics add('TheOtherCoast', '/comics/the-other-coast')
|
path = u'heathcliff-spanish'
|
||||||
add('TheQuigmans', '/comics/the-quigmans')
|
|
||||||
add('TheWizardofIdinSpanish', '/comics/wizard-of-id-spanish')
|
# HerbandJamaal has a duplicate in gocomics
|
||||||
# duplicate of gocomics add('ThinLines', '/comics/thin-lines')
|
class HomeOffice(_Creators):
|
||||||
# duplicate of gocomics add('WeePals', '/comics/wee-pals')
|
path = u'stay-at-home-dad'
|
||||||
# duplicate of gocomics add('WizardofId', '/comics/wizard-of-id')
|
|
||||||
# duplicate of gocomics add('WorkingitOut', '/comics/working-it-out')
|
class HopeAndDeath(_Creators):
|
||||||
# duplicate of gocomics add('ZackHill', '/comics/zack-hill')
|
path = u'hope-and-death'
|
||||||
|
|
||||||
|
# LibertyMeadows has a duplicate in gocomics
|
||||||
|
class LongStoryShort(_Creators):
|
||||||
|
path = u'long-story-short'
|
||||||
|
|
||||||
|
class MickeyMouse(_Creators):
|
||||||
|
path = u'mickey-mouse'
|
||||||
|
|
||||||
|
# Momma has a duplicate in gocomics
|
||||||
|
# NestHeads has a duplicate in gocomics
|
||||||
|
class OffCenter(_Creators):
|
||||||
|
path = u'off-center'
|
||||||
|
|
||||||
|
# OnaClaireDay has a duplicate in gocomics
|
||||||
|
# OneBigHappy has a duplicate in gocomics
|
||||||
|
# Rubes has a duplicate in gocomics
|
||||||
|
class Rugrats(_Creators):
|
||||||
|
path = u'rugrats'
|
||||||
|
|
||||||
|
class RugratsinSpanish(_CreatorsEs):
|
||||||
|
path = u'rugrats-spanish'
|
||||||
|
|
||||||
|
# ScaryGary has a duplicate in gocomics
|
||||||
|
# SpeedBump has a duplicate in gocomics
|
||||||
|
# StrangeBrew has a duplicate in gocomics
|
||||||
|
# TheBarn has a duplicate in gocomics
|
||||||
|
# TheDinetteSet has a duplicate in gocomics
|
||||||
|
# TheMeaningofLila has a duplicate in gocomics
|
||||||
|
# TheOtherCoast has a duplicate in gocomics
|
||||||
|
class TheQuigmans(_Creators):
|
||||||
|
path = u'the-quigmans'
|
||||||
|
|
||||||
|
class TheWizardofIdinSpanish(_CreatorsEs):
|
||||||
|
path = u'wizard-of-id-spanish'
|
||||||
|
|
||||||
|
# ThinLines has a duplicate in gocomics
|
||||||
|
# WeePals has a duplicate in gocomics
|
||||||
|
# WizardofId has a duplicate in gocomics
|
||||||
|
# WorkingitOut has a duplicate in gocomics
|
||||||
|
# ZackHill has a duplicate in gocomics
|
||||||
|
|
|
@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
url_matcher = re.compile(tagre("a", "href", r'(/comics/[^/]+)\.html') + r'<strong>([^<]+)</strong>')
|
url_matcher = re.compile(tagre("a", "href", r'/comics/([^/]+)\.html') + r'<strong>([^<]+)</strong>')
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
|
@ -67,16 +67,15 @@ def print_results(args):
|
||||||
"""Print comics."""
|
"""Print comics."""
|
||||||
min_comics, filename = args
|
min_comics, filename = args
|
||||||
with codecs.open(filename, 'a', 'utf-8') as fp:
|
with codecs.open(filename, 'a', 'utf-8') as fp:
|
||||||
for name, url in sorted(load_result(json_file).items()):
|
for name, path in sorted(load_result(json_file).items()):
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
|
lang = 'Es' if name.lower().endswith('spanish') else ''
|
||||||
if has_gocomics_comic(name):
|
if has_gocomics_comic(name):
|
||||||
prefix = u'# duplicate of gocomics '
|
fp.write(u'# %s has a duplicate in gocomics\n' % truncate_name(name))
|
||||||
else:
|
else:
|
||||||
prefix = u''
|
fp.write(u"class %s(_Creators%s):\n path = %r\n\n" %
|
||||||
fp.write(u"%sadd(%r, %r)\n" % (
|
(truncate_name(name), lang, path))
|
||||||
prefix, str(truncate_name(name)), str(url))
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue