dosage/dosagelib/plugins/uc.py
2012-06-20 21:58:13 +02:00

279 lines
7.8 KiB
Python

from re import compile, IGNORECASE, sub
from ..helpers import _BasicScraper
from ..util import fetchManyMatches, fetchUrl
class _UClickScraper(_BasicScraper):
homepage = 'http://content.uclick.com/a2z.html'
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
imageUrl = property(lambda self: self.latestUrl + '%s/')
imageSearch = compile(r'<img[^>]+src="(http://synd.imgsrv.uclick.com/comics/\w+/\d{4}/[^"]+\.gif)"', IGNORECASE)
prevSearch = compile(r'<a href="(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)">Previous date', IGNORECASE)
help = 'Index format: yyyy/mm/dd'
@classmethod
def starter(cls):
return cls.baseUrl % (cls.shortName,)
@classmethod
def fetchSubmodules(cls):
exclusions = (
'index',
)
submoduleSearch = compile(r'(<A HREF="http://content.uclick.com/content/\w+.html">[^>]+?</a>)', IGNORECASE)
partsMatch = compile(r'<A HREF="http://content.uclick.com/content/(\w+?).html">([^>]+?)</a>', IGNORECASE)
matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
possibles = [partsMatch.match(match).groups() for match in matches]
def normalizeName(name):
name = sub(r'&(.)acute;', r'\1', name).title()
return ''.join([c for c in name if c.isalnum()])
def fetchSubmodule(module):
try:
return fetchUrl(cls.baseUrl % module, cls.imageSearch)
except:
return False
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
def uclick(name, shortName):
return type('UClick_%s' % name,
(_UClickScraper,),
dict(name='UClick/' + name, shortName=shortName))
comics = {
'5thWave': 'fw',
'9To5': 'tmntf',
'AdamHome': 'ad',
'Agnes': 'cragn',
'AlcarazLalo': 'la',
'AlcarazLaloSpanish': 'spla',
'AndersonNick': 'wpnan',
'AndyCapp': 'crcap',
'AnimalCrackers': 'tmani',
'Annie': 'tmann',
'AsayChuck': 'crcas',
'AskShagg': 'crask',
'AuthTony': 'ta',
'BadReporter': 'bad',
'Baldo': 'ba',
'BaldoSpanish': 'be',
'BallardStreet': 'crbal',
'BarkEaterLake': 'bark',
'BarstowDonna': 'dba',
'BC': 'crbc',
'BCSpanish': 'crbcs',
'BeattieBruce': 'crbbe',
'BennetClay': 'wpcbe',
'BensonLisa': 'wplbe',
'BensonSteve': 'crsbe',
'BigTop': 'bt',
'Biographic': 'biov',
'Bleeker': 'blk',
'BobTheSquirrel': 'bob',
'BoilingPoint': 'boil',
'BokChip': 'crcbo',
'BoNanas': 'bon',
'Boomerangs': 'boom',
'BoondocksThe': 'bo',
'BottomLiners': 'tmbot',
'BoundAndGagged': 'tmbou',
'Brainwaves': 'bwv',
'BreenSteve': 'crsbr',
'BrendaStarr': 'tmbre',
'BrewsterRockit': 'tmrkt',
'BrittChris': 'crcbr',
'BroomHilda': 'tmbro',
'Candorville': 'cand',
'CarlsonStuart': 'sc',
'CatalinoKen': 'crkca',
'Cathy': 'ca',
'CathySpanish': 'spca',
'CEstLaVie': 'clv',
'CityThe': 'derf',
'ClearBlueWater': 'cbw',
'Cleats': 'cle',
'CloseToHome': 'cl',
'CombsPaul': 'tmcmb',
'CompuToon': 'tmcom',
'Condorito': 'cond',
'ConradPaul': 'tmpco',
'Cornered': 'co',
'CulDeSac': 'cds',
'DanzigerJeff': 'jd',
'DaviesMatt': 'tmmda',
'DeepCover': 'deep',
'DeeringJohn': 'crjde',
'DickTracy': 'tmdic',
'DinetteSetThe': 'crdin',
'DogEatDoug': 'crdog',
'DonWright': 'tmdow',
'Doodles': 'tmdoo',
'Doonesbury': 'db',
'DuplexThe': 'dp',
'Eek': 'eek',
'ElderberriesThe': 'eld',
'FacesInTheNews': 'kw',
'FlightDeck': 'crfd',
'FloAndFriends': 'crflo',
'FlyingMccoysThe': 'fmc',
'ForBetterOrForWorse': 'fb',
'ForHeavenSSake': 'crfhs',
'FoxtrotClassics': 'ftcl',
'Foxtrot': 'ft',
'FoxtrotSpanish': 'spft',
'FrankAndErnest': 'fa',
'FredBassetSpanish': 'spfba',
'FredBasset': 'tmfba',
'FrogApplause': 'frog',
'FuscoBrothersThe': 'fu',
'Garfield': 'ga',
'GarfieldSpanish': 'gh',
'GasolineAlley': 'tmgas',
'GaturroSpanish': 'spgat',
'GilThorp': 'tmgil',
'GingerMeggs': 'gin',
'GingerMeggsSpanish': 'spgin',
'GirlsAndSports': 'crgis',
'GorrellBob': 'crbgo',
'GoTeamBob': 'gtb',
'HammondBruce': 'hb',
'HandelsmanWalt': 'tmwha',
'HeartOfTheCity': 'hc',
'Heathcliff': 'crhea',
'HeathcliffSpanish': 'crhes',
'HerbAndJamaal': 'crher',
'HigginsJack': 'jh',
'HomeAndAway': 'wphaa',
'HorseyDavid': 'tmdho',
'Housebroken': 'tmhou',
'HubertAndAbby': 'haa',
'IdiotBox': 'ibox',
'ImagineThis': 'imt',
'InkPen': 'ink',
'InTheBleachers': 'bl',
'ItsAllAboutYou': 'wpiay',
'JamesBondSpanish': 'spjb',
'JonesClay': 'crcjo',
'KallaugherKevin': 'cwkal',
'KChroniclesThe': 'kk',
'KelleySteve': 'crske',
'Kudzu': 'tmkud',
'LaCucaracha': 'lc',
'LegendOfBill': 'lob',
'LibertyMeadows': 'crlib',
'Lio': 'lio',
'LittleDogLost': 'wpldl',
'LocherDick': 'tmdlo',
'LooseParts': 'tmloo',
'LostSheep': 'lost',
'LoweChan': 'tmclo',
'LuckovichMike': 'crmlu',
'LuckyCow': 'luc',
'MarkstienGary': 'crgma',
'MarletteDoug': 'tmdma',
'MccoyGlenn': 'gm',
'MeaningOfLilaThe': 'crlil',
'MeehanStreak': 'tmmee',
'MiddletonsThe': 'tmmid',
'MinimumSecurity': 'ms',
'ModestyBlaiseSpanish': 'spmb',
'Momma': 'crmom',
'MorinJim': 'cwjmo',
'MuttJeffSpanish': 'spmut',
'MythTickle': 'myth',
'NAoQV': 'naqv',
'NaturalSelection': 'crns',
'NestHeads': 'cpnst',
'Neurotica': 'neu',
'NonSequitur': 'nq',
'OhmanJack': 'tmjoh',
'OliphantPat': 'po',
'OnAClaireDay': 'crocd',
'OneBigHappy': 'crobh',
'OtherCoastThe': 'crtoc',
'OutOfTheGenePool': 'wpgen',
'Overboard': 'ob',
'OverboardSpanish': 'spob',
'PepeSpanish': 'sppep',
'PettJoel': 'jp',
'Pibgorn': 'pib',
'Pickles': 'wppic',
'Pluggers': 'tmplu',
'PoochCafe': 'poc',
'PoochCafeSpanish': 'sppoc',
'PopCulture': 'pop',
'PowellDwane': 'crdpo',
'Preteena': 'pr',
'PricklyCity': 'prc',
'QuigmansThe': 'tmqui',
'RallComic': 'tr',
'RamirezMicheal': 'crmrm',
'RamseyMarshall': 'crmra',
'RealLifeAdventures': 'rl',
'RedAndRover': 'wpred',
'RedMeat': 'red',
'ReynoldsUnwrapped': 'rw',
'RonaldinhoGaucho': 'ron',
'RonaldinhoGauchoSpanish': 'spron',
'Rubes': 'crrub',
'SackSteve': 'tmssa',
'SargentBen': 'bs',
'SargentBenSpanish': 'spbs',
'SendHelp': 'send',
'ShenemanDrew': 'tmdsh',
'SherffiusDrew': 'crjsh',
'Shoecabbage': 'shcab',
'Shoe': 'tmsho',
'SigmundSpanish': 'spsig',
'Slowpoke': 'slow',
'SmallWorld': 'small',
'SpaceIsThePlace': 'sitp',
'SpeedBump': 'crspe',
'StanisScott': 'crsst',
'StateOfTheUnion': 'crsou',
'StayskalWayne': 'tmwst',
'StoneSoup': 'ss',
'StrangeBrew': 'crstr',
'SummersDana': 'tmdsu',
'SuttonImpact': 'stn',
'Sylvia': 'tmsyl',
'SzepPaul': 'crpsz',
'TankMcnamara': 'tm',
'TeenageMutantNinjaTurtles': 'tmnt',
'TelnaesAnn': 'tmate',
'TheArgyleSweater': 'tas',
'ThePinkPanther': 'tmpnk',
'TheWizardOfId': 'crwiz',
'TheWizardOfIdSpanish': 'crwis',
'ThInk': 'think',
'ThompsonMike': 'crmth',
'ThroughThickAndThin': 'cpthk',
'TinySepuku': 'tiny',
'Toby': 'toby',
'TolesTom': 'tt',
'TomTheDancingBug': 'td',
'TooMuchCoffeeMan': 'tmcm',
'Trevor': 'trev',
'TutelandiaSpanish': 'sptut',
'VarvelGary': 'crgva',
'WassermanDan': 'tmdwa',
'WatchYourHead': 'wpwyh',
'Waylay': 'min',
'WeePals': 'crwee',
'WinnieThePooh': 'crwin',
'WitOfTheWorld': 'cwwit',
'WorkingItOut': 'crwio',
'WriteDon': 'tmdow',
'YennySpanish': 'spyen',
'Yenny': 'yen',
'ZackHill': 'crzhi',
'ZiggySpanish': 'spzi',
'Ziggy': 'zi',
}
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))