2012-06-20 20:41:04 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2012-06-20 19:58:13 +00:00
|
|
|
from re import compile, IGNORECASE, sub
|
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
from ..scraper import _BasicScraper
|
|
|
|
from ..util import fetchUrl
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
class _UClickScraper(_BasicScraper):
|
|
|
|
homepage = 'http://content.uclick.com/a2z.html'
|
|
|
|
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
|
2012-11-13 18:10:19 +00:00
|
|
|
stripUrl = property(lambda self: self.latestUrl + '%s/')
|
2012-06-20 19:58:13 +00:00
|
|
|
imageSearch = compile(r'<img[^>]+src="(http://synd.imgsrv.uclick.com/comics/\w+/\d{4}/[^"]+\.gif)"', IGNORECASE)
|
|
|
|
prevSearch = compile(r'<a href="(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)">Previous date', IGNORECASE)
|
|
|
|
help = 'Index format: yyyy/mm/dd'
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def starter(cls):
|
|
|
|
return cls.baseUrl % (cls.shortName,)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def fetchSubmodules(cls):
|
|
|
|
exclusions = (
|
|
|
|
'index',
|
|
|
|
)
|
|
|
|
|
2012-10-11 10:03:12 +00:00
|
|
|
# XXX refactor this mess
|
2012-06-20 19:58:13 +00:00
|
|
|
submoduleSearch = compile(r'(<A HREF="http://content.uclick.com/content/\w+.html">[^>]+?</a>)', IGNORECASE)
|
|
|
|
partsMatch = compile(r'<A HREF="http://content.uclick.com/content/(\w+?).html">([^>]+?)</a>', IGNORECASE)
|
|
|
|
matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
|
|
|
|
possibles = [partsMatch.match(match).groups() for match in matches]
|
|
|
|
|
|
|
|
def normalizeName(name):
|
|
|
|
name = sub(r'&(.)acute;', r'\1', name).title()
|
|
|
|
return ''.join([c for c in name if c.isalnum()])
|
|
|
|
|
|
|
|
def fetchSubmodule(module):
|
|
|
|
try:
|
|
|
|
return fetchUrl(cls.baseUrl % module, cls.imageSearch)
|
|
|
|
except:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
|
|
|
|
|
|
|
|
|
|
|
|
def uclick(name, shortName):
|
|
|
|
return type('UClick_%s' % name,
|
|
|
|
(_UClickScraper,),
|
|
|
|
dict(name='UClick/' + name, shortName=shortName))
|
|
|
|
|
|
|
|
comics = {
|
|
|
|
'5thWave': 'fw',
|
|
|
|
'9To5': 'tmntf',
|
|
|
|
'AdamHome': 'ad',
|
|
|
|
'Agnes': 'cragn',
|
|
|
|
'AlcarazLalo': 'la',
|
|
|
|
'AlcarazLaloSpanish': 'spla',
|
|
|
|
'AndersonNick': 'wpnan',
|
|
|
|
'AndyCapp': 'crcap',
|
|
|
|
'AnimalCrackers': 'tmani',
|
|
|
|
'Annie': 'tmann',
|
|
|
|
'AsayChuck': 'crcas',
|
|
|
|
'AskShagg': 'crask',
|
|
|
|
'AuthTony': 'ta',
|
|
|
|
'BadReporter': 'bad',
|
|
|
|
'Baldo': 'ba',
|
|
|
|
'BaldoSpanish': 'be',
|
|
|
|
'BallardStreet': 'crbal',
|
|
|
|
'BarkEaterLake': 'bark',
|
|
|
|
'BarstowDonna': 'dba',
|
|
|
|
'BC': 'crbc',
|
|
|
|
'BCSpanish': 'crbcs',
|
|
|
|
'BeattieBruce': 'crbbe',
|
|
|
|
'BennetClay': 'wpcbe',
|
|
|
|
'BensonLisa': 'wplbe',
|
|
|
|
'BensonSteve': 'crsbe',
|
|
|
|
'BigTop': 'bt',
|
|
|
|
'Biographic': 'biov',
|
|
|
|
'Bleeker': 'blk',
|
|
|
|
'BobTheSquirrel': 'bob',
|
|
|
|
'BoilingPoint': 'boil',
|
|
|
|
'BokChip': 'crcbo',
|
|
|
|
'BoNanas': 'bon',
|
|
|
|
'Boomerangs': 'boom',
|
|
|
|
'BoondocksThe': 'bo',
|
|
|
|
'BottomLiners': 'tmbot',
|
|
|
|
'BoundAndGagged': 'tmbou',
|
|
|
|
'Brainwaves': 'bwv',
|
|
|
|
'BreenSteve': 'crsbr',
|
|
|
|
'BrendaStarr': 'tmbre',
|
|
|
|
'BrewsterRockit': 'tmrkt',
|
|
|
|
'BrittChris': 'crcbr',
|
|
|
|
'BroomHilda': 'tmbro',
|
|
|
|
'Candorville': 'cand',
|
|
|
|
'CarlsonStuart': 'sc',
|
|
|
|
'CatalinoKen': 'crkca',
|
|
|
|
'Cathy': 'ca',
|
|
|
|
'CathySpanish': 'spca',
|
|
|
|
'CEstLaVie': 'clv',
|
|
|
|
'CityThe': 'derf',
|
|
|
|
'ClearBlueWater': 'cbw',
|
|
|
|
'Cleats': 'cle',
|
|
|
|
'CloseToHome': 'cl',
|
|
|
|
'CombsPaul': 'tmcmb',
|
|
|
|
'CompuToon': 'tmcom',
|
|
|
|
'Condorito': 'cond',
|
|
|
|
'ConradPaul': 'tmpco',
|
|
|
|
'Cornered': 'co',
|
|
|
|
'CulDeSac': 'cds',
|
|
|
|
'DanzigerJeff': 'jd',
|
|
|
|
'DaviesMatt': 'tmmda',
|
|
|
|
'DeepCover': 'deep',
|
|
|
|
'DeeringJohn': 'crjde',
|
|
|
|
'DickTracy': 'tmdic',
|
|
|
|
'DinetteSetThe': 'crdin',
|
|
|
|
'DogEatDoug': 'crdog',
|
|
|
|
'DonWright': 'tmdow',
|
|
|
|
'Doodles': 'tmdoo',
|
|
|
|
'Doonesbury': 'db',
|
|
|
|
'DuplexThe': 'dp',
|
|
|
|
'Eek': 'eek',
|
|
|
|
'ElderberriesThe': 'eld',
|
|
|
|
'FacesInTheNews': 'kw',
|
|
|
|
'FlightDeck': 'crfd',
|
|
|
|
'FloAndFriends': 'crflo',
|
|
|
|
'FlyingMccoysThe': 'fmc',
|
|
|
|
'ForBetterOrForWorse': 'fb',
|
|
|
|
'ForHeavenSSake': 'crfhs',
|
|
|
|
'FoxtrotClassics': 'ftcl',
|
|
|
|
'Foxtrot': 'ft',
|
|
|
|
'FoxtrotSpanish': 'spft',
|
|
|
|
'FrankAndErnest': 'fa',
|
|
|
|
'FredBassetSpanish': 'spfba',
|
|
|
|
'FredBasset': 'tmfba',
|
|
|
|
'FrogApplause': 'frog',
|
|
|
|
'FuscoBrothersThe': 'fu',
|
|
|
|
'Garfield': 'ga',
|
|
|
|
'GarfieldSpanish': 'gh',
|
|
|
|
'GasolineAlley': 'tmgas',
|
|
|
|
'GaturroSpanish': 'spgat',
|
|
|
|
'GilThorp': 'tmgil',
|
|
|
|
'GingerMeggs': 'gin',
|
|
|
|
'GingerMeggsSpanish': 'spgin',
|
|
|
|
'GirlsAndSports': 'crgis',
|
|
|
|
'GorrellBob': 'crbgo',
|
|
|
|
'GoTeamBob': 'gtb',
|
|
|
|
'HammondBruce': 'hb',
|
|
|
|
'HandelsmanWalt': 'tmwha',
|
|
|
|
'HeartOfTheCity': 'hc',
|
|
|
|
'Heathcliff': 'crhea',
|
|
|
|
'HeathcliffSpanish': 'crhes',
|
|
|
|
'HerbAndJamaal': 'crher',
|
|
|
|
'HigginsJack': 'jh',
|
|
|
|
'HomeAndAway': 'wphaa',
|
|
|
|
'HorseyDavid': 'tmdho',
|
|
|
|
'Housebroken': 'tmhou',
|
|
|
|
'HubertAndAbby': 'haa',
|
|
|
|
'IdiotBox': 'ibox',
|
|
|
|
'ImagineThis': 'imt',
|
|
|
|
'InkPen': 'ink',
|
|
|
|
'InTheBleachers': 'bl',
|
|
|
|
'ItsAllAboutYou': 'wpiay',
|
|
|
|
'JamesBondSpanish': 'spjb',
|
|
|
|
'JonesClay': 'crcjo',
|
|
|
|
'KallaugherKevin': 'cwkal',
|
|
|
|
'KChroniclesThe': 'kk',
|
|
|
|
'KelleySteve': 'crske',
|
|
|
|
'Kudzu': 'tmkud',
|
|
|
|
'LaCucaracha': 'lc',
|
|
|
|
'LegendOfBill': 'lob',
|
|
|
|
'LibertyMeadows': 'crlib',
|
|
|
|
'Lio': 'lio',
|
|
|
|
'LittleDogLost': 'wpldl',
|
|
|
|
'LocherDick': 'tmdlo',
|
|
|
|
'LooseParts': 'tmloo',
|
|
|
|
'LostSheep': 'lost',
|
|
|
|
'LoweChan': 'tmclo',
|
|
|
|
'LuckovichMike': 'crmlu',
|
|
|
|
'LuckyCow': 'luc',
|
|
|
|
'MarkstienGary': 'crgma',
|
|
|
|
'MarletteDoug': 'tmdma',
|
|
|
|
'MccoyGlenn': 'gm',
|
|
|
|
'MeaningOfLilaThe': 'crlil',
|
|
|
|
'MeehanStreak': 'tmmee',
|
|
|
|
'MiddletonsThe': 'tmmid',
|
|
|
|
'MinimumSecurity': 'ms',
|
|
|
|
'ModestyBlaiseSpanish': 'spmb',
|
|
|
|
'Momma': 'crmom',
|
|
|
|
'MorinJim': 'cwjmo',
|
|
|
|
'MuttJeffSpanish': 'spmut',
|
|
|
|
'MythTickle': 'myth',
|
|
|
|
'NAoQV': 'naqv',
|
|
|
|
'NaturalSelection': 'crns',
|
|
|
|
'NestHeads': 'cpnst',
|
|
|
|
'Neurotica': 'neu',
|
|
|
|
'NonSequitur': 'nq',
|
|
|
|
'OhmanJack': 'tmjoh',
|
|
|
|
'OliphantPat': 'po',
|
|
|
|
'OnAClaireDay': 'crocd',
|
|
|
|
'OneBigHappy': 'crobh',
|
|
|
|
'OtherCoastThe': 'crtoc',
|
|
|
|
'OutOfTheGenePool': 'wpgen',
|
|
|
|
'Overboard': 'ob',
|
|
|
|
'OverboardSpanish': 'spob',
|
|
|
|
'PepeSpanish': 'sppep',
|
|
|
|
'PettJoel': 'jp',
|
|
|
|
'Pibgorn': 'pib',
|
|
|
|
'Pickles': 'wppic',
|
|
|
|
'Pluggers': 'tmplu',
|
|
|
|
'PoochCafe': 'poc',
|
|
|
|
'PoochCafeSpanish': 'sppoc',
|
|
|
|
'PopCulture': 'pop',
|
|
|
|
'PowellDwane': 'crdpo',
|
|
|
|
'Preteena': 'pr',
|
|
|
|
'PricklyCity': 'prc',
|
|
|
|
'QuigmansThe': 'tmqui',
|
|
|
|
'RallComic': 'tr',
|
|
|
|
'RamirezMicheal': 'crmrm',
|
|
|
|
'RamseyMarshall': 'crmra',
|
|
|
|
'RealLifeAdventures': 'rl',
|
|
|
|
'RedAndRover': 'wpred',
|
|
|
|
'RedMeat': 'red',
|
|
|
|
'ReynoldsUnwrapped': 'rw',
|
|
|
|
'RonaldinhoGaucho': 'ron',
|
|
|
|
'RonaldinhoGauchoSpanish': 'spron',
|
|
|
|
'Rubes': 'crrub',
|
|
|
|
'SackSteve': 'tmssa',
|
|
|
|
'SargentBen': 'bs',
|
|
|
|
'SargentBenSpanish': 'spbs',
|
|
|
|
'SendHelp': 'send',
|
|
|
|
'ShenemanDrew': 'tmdsh',
|
|
|
|
'SherffiusDrew': 'crjsh',
|
|
|
|
'Shoecabbage': 'shcab',
|
|
|
|
'Shoe': 'tmsho',
|
|
|
|
'SigmundSpanish': 'spsig',
|
|
|
|
'Slowpoke': 'slow',
|
|
|
|
'SmallWorld': 'small',
|
|
|
|
'SpaceIsThePlace': 'sitp',
|
|
|
|
'SpeedBump': 'crspe',
|
|
|
|
'StanisScott': 'crsst',
|
|
|
|
'StateOfTheUnion': 'crsou',
|
|
|
|
'StayskalWayne': 'tmwst',
|
|
|
|
'StoneSoup': 'ss',
|
|
|
|
'StrangeBrew': 'crstr',
|
|
|
|
'SummersDana': 'tmdsu',
|
|
|
|
'SuttonImpact': 'stn',
|
|
|
|
'Sylvia': 'tmsyl',
|
|
|
|
'SzepPaul': 'crpsz',
|
|
|
|
'TankMcnamara': 'tm',
|
|
|
|
'TeenageMutantNinjaTurtles': 'tmnt',
|
|
|
|
'TelnaesAnn': 'tmate',
|
|
|
|
'TheArgyleSweater': 'tas',
|
|
|
|
'ThePinkPanther': 'tmpnk',
|
|
|
|
'TheWizardOfId': 'crwiz',
|
|
|
|
'TheWizardOfIdSpanish': 'crwis',
|
|
|
|
'ThInk': 'think',
|
|
|
|
'ThompsonMike': 'crmth',
|
|
|
|
'ThroughThickAndThin': 'cpthk',
|
|
|
|
'TinySepuku': 'tiny',
|
|
|
|
'Toby': 'toby',
|
|
|
|
'TolesTom': 'tt',
|
|
|
|
'TomTheDancingBug': 'td',
|
|
|
|
'TooMuchCoffeeMan': 'tmcm',
|
|
|
|
'Trevor': 'trev',
|
|
|
|
'TutelandiaSpanish': 'sptut',
|
|
|
|
'VarvelGary': 'crgva',
|
|
|
|
'WassermanDan': 'tmdwa',
|
|
|
|
'WatchYourHead': 'wpwyh',
|
|
|
|
'Waylay': 'min',
|
|
|
|
'WeePals': 'crwee',
|
|
|
|
'WinnieThePooh': 'crwin',
|
|
|
|
'WitOfTheWorld': 'cwwit',
|
|
|
|
'WorkingItOut': 'crwio',
|
|
|
|
'WriteDon': 'tmdow',
|
|
|
|
'YennySpanish': 'spyen',
|
|
|
|
'Yenny': 'yen',
|
|
|
|
'ZackHill': 'crzhi',
|
|
|
|
'ZiggySpanish': 'spzi',
|
|
|
|
'Ziggy': 'zi',
|
|
|
|
}
|
|
|
|
|
|
|
|
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))
|