From 1cc7d39047733a179db841a84edf913745922a32 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 7 Mar 2013 23:08:17 +0100 Subject: [PATCH] Fix some comics. --- dosagelib/plugins/c.py | 13 ++++++++++--- dosagelib/plugins/comicfury.py | 5 ----- dosagelib/plugins/e.py | 8 -------- dosagelib/plugins/g.py | 6 +++--- dosagelib/plugins/gocomics.py | 6 ++---- dosagelib/plugins/k.py | 9 --------- dosagelib/plugins/y.py | 8 -------- scripts/comicfury.py | 5 +++++ scripts/gocomics.py | 4 +++- 9 files changed, 23 insertions(+), 41 deletions(-) diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index 30a322a63..360b690a2 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -38,11 +38,17 @@ class CaseyAndAndy(_BasicScraper): class CaribbeanBlue(_BasicScraper): url = 'http://cblue.katbox.net/' stripUrl = url + 'comic/%s/' - #http://cblue.katbox.net/wp-content/uploads/cb270en.png?6949c1 - imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/uploads/cb[^"]+)')) + imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/uploads/sites/\d+/\d+/\d+/cb[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://cblue\.katbox\.net/comic/[^"]+)', after="previous")) help = 'Index format: nnn-stripname' + def shouldSkipUrl(self, url): + """Skip pages without images.""" + return url in ( + "http://cblue.katbox.net/comic/filler-stall-them/", + "http://cblue.katbox.net/comic/filler-kimi-figurine-now-available/", + ) + class Catalyst(_BasicScraper): baseUrl = "http://catalyst.spiderforest.com/" @@ -301,7 +307,8 @@ class CompanyY(_BasicScraper): class CorydonCafe(_BasicScraper): url = 'http://corydoncafe.com/' - starter = bounceStarter(url, compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="next", quote="'"))) + starter = indirectStarter(url, + compile(tagre("a", "href", r'(\./\d+/[^"]+)'))) stripUrl = url + '%s.php' imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'")) prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'")) diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index 446daa70f..d02cccea4 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -147,7 +147,6 @@ add('Frontier2170', 'http://frontier2170.thecomicseries.com/', u"Life as an inde add('Fullmetalbrothers', 'http://fullmetalbrothers.thecomicseries.com/', u'') #add('Fusion', 'http://fusion.thecomicseries.com/', u"What's inside the pages of this little book isn't so much a comic as it is a viewport into another dimension where you can follow Savunn's hectic life as a Cambodian teen (and superhero fan-gal) as she tries her best to make her mark as her world's newest superhero, Fusion! As reader, you will serve as her confidant as you watch the reality show of her life unfold with each turn of the page.") add('Fuzzballandscuzzball', 'http://fuzzballandscuzzball.thecomicseries.com/', u'Fuzzball and Scuzzball are brothers who live in the gritty urban hell that is Happy City, New Jersey. They founded their own detective agency, and seldom have clients. These are their antics.') -add('Fuzzylittleninjas', 'http://fuzzylittleninjas.thecomicseries.com/', u'Welcome everyone, Fuzzy Lil Ninjas Proudly Presents Shades of Gray. Updates to the Web comic on every Sunday. Please Click on the Latest Tab to see the most Recent Comic, Enjoy.') add('Galbertofbruges', 'http://galbertofbruges.thecomicseries.com/', u'A comic based on the book "The Murder of Charles the Good, Count of Flanders"') add('GalleryOfFreaks', 'http://GalleryOfFreaks.thecomicseries.com/', u'') add('Glomshire', 'http://Glomshire.thecomicseries.com/', u"The misadventures of the inhabitants of Glomshire, a medieval(ish)land of undetermined origin. LEGO\xae is a trademark of the LEGO Group, which does not sponsor, authorize or endorse this comic (and we don't blame them!)") @@ -285,7 +284,6 @@ add('Spades', 'http://Spades.thecomicseries.com/', u"DISCONTINUED Spades is crea add('Spf1337', 'http://spf1337.thecomicseries.com/', u'A webcomic about the little things in life that let you get through the hard times. With groan-inducing puns and hilarious antics, SPF is here to entertain you. Or is it the other way around...?') add('Spooncomic', 'http://spooncomic.thecomicseries.com/', u'A evil wizard turned Charles\' whole town turned into a kitchen (that\'s right). Cursed with the body of a SPOON, he has embarked on a journey to find the wizard and along the way, he will meet other "cursed" ones.') add('Sscomic', 'http://ss-comic.thecomicseries.com/', u'') -add('Stardustthecat', 'http://stardustthecat.thecomicseries.com/', u'Stardust the Cat is a comic about a cat and a mouse who watch TV and occasionally have adventures.') add('Starraccoon', 'http://starraccoon.thecomicseries.com/', u"it's bugs bunny meets spiderman an alien raccoon escapes to earth after a evil warlord destroys his planet now he's on run from the goverment the men in black alien bounty hunters and the warlord himself") #add('Stickfodder', 'http://stickfodder.thecomicseries.com/', u"Stick figures at their finest and not so finest. Jokes, violence and harsh language? HELL YEAH! It's just fun for the entire family.") add('StrangeAttractors', 'http://StrangeAttractors.thecomicseries.com/', u'Strange Attractors is a humorous post-modern retro-science fiction comic. Sophie, the curator of the vast Museum of Lost Things, becomes embroiled in an epoch spanning war between magic and science, in which the comics she loves are actually coded histories, and she\u2019s become a magnet for all sorts of cosmic weirdness.') @@ -332,14 +330,11 @@ add('Tussenkatersenspraakwater', 'http://Tussenkatersenspraakwater.thecomicserie add('Unfortunatecircumstances', 'http://unfortunatecircumstances.thecomicseries.com/', u"This is my webcomic. A stream of consciousness piece of dribble that doesn't have any kind of cohesive form or topic. It's just me playing around. Have fun!") add('Unreliable', 'http://unreliable.thecomicseries.com/', u'Non-sequitur autobiographical comic-type things that will hopefully help me with my discipline.') add('Usbcomic', 'http://usbcomic.thecomicseries.com/', u'Warning: Not a pokemon sprite comic!!! Set in the year 2100 where the latest trend is artefially intelligent USB memory sticks. These can be used to help around the house, solve problems or even face off against each-other in (usually friendly) challenges. Our story follows the adventure of 16 year old Josh young; a teenager from edge-tech city, in the southwest of the UK (built in 2050, about 10 miles from Bristol).') -add('V4', 'http://v4.thecomicseries.com/', u'Enemies are coming to wipe out anyone and everyone in their path. Leaders of various territories are banding together to try and survive the coming battles. Underworld creatures and sorcerers battle for power near the end of the world.') -add('Verboten', 'http://verboten.thecomicseries.com/', u'The universe has always been an enigmatic place to human kind. It is said that eons ago, the only thing that existed was a dark, foreboding void. In time, however the Gods created the stars, the planets, and above all, life. This was their masterpiece work. Each God contributed a portion of their power to the creation of human kind....but....the Gods were betrayed by one of their own; The God of Time. After cursing humanity, and nearly driving the rest of the Gods to extinction, he was eventually sealed, and his powerful weapon was banished to the furthest reaches of the universe. But...there are other fearsome creatures that reside in the darkness. Evil things. Living nightmares that seep into this world from where our dimension meets another...') add('Violentblue', 'http://violentblue.thecomicseries.com/', u'A retrogamer, A comic fan, a punk Grrrl and a mean squirrel deal with life, pop culture, God and each other.') add('VisualDiary', 'http://VisualDiary.thecomicseries.com/', u'Two sides to an aspiring artist. Multiple personality disorder? Perhaps. Laughs? Definitely. "The Early Years" part of the comic that updates on Wednesday will more story based than the gags on the normal updates. Updates: Mon, Tues, Thurs, Fri - regular randomness Wednesday - "Early Years" Saturday - Art day') add('Wakethesleepers', 'http://wakethesleepers.com/', u'An ancient, deadly curse has inflicted Locke Rinannis. He now must race against time as he struggles to find a cure before his life is consumed by it. Along the way, he makes allies, confronts powerful enemies, and unearths the mystery of a lore long forgotten... (Updates every Friday)') add('Wayofthemetagamer', 'http://wayofthemetagamer.thecomicseries.com/', u"Bob buys the Dungeons and Dragons book off eBay. Hilarity ensues. Plot follows. Also tropes. Lots and lots of tropes. Also no fourth wall. Actually, it's mostly about the lack of fourth wall. We probably should have mentioned that bit right at the top. Actually, yeah, move that to the top- you're writing this LIVE?") add('Whenfoxesfly', 'http://whenfoxesfly.thecomicseries.com/', u'Starting back in 2005 (with a different host), "When Foxes Fly" is an anthro spoof of superheroes and villains. One could classify it as a Christian comic, but others might say it would more accurately be described as a webcomic presented from a Christian perspective. In any event, it follows the adventures of Filbert (a flying fox) and Rusty as they discover that childlike Faith in Eternal Truth is more powerful than any superhero or villain.') -add('WindRiders', 'http://WindRiders.thecomicseries.com/', u'The world has changed. A new military project to fight terrorism; troups of warriors able to fly due to a revolutionary technolgy, raised since childhood to become fighters, killers. A dangerous technology. This world is not ours anymore...') add('Winstonsworld', 'http://winstonsworld.thecomicseries.com/', u'Updated daily, M-F Winston is a curious little boy with a big imagination. Come join him as he learns about sexism, racism, deism, communism, materialism, and joy!') add('Woodsofevil', 'http://woodsofevil.thecomicseries.com/', u'Is back.') add('Wordstoliveby', 'http://wordstoliveby.thecomicseries.com/', u"You will most likely never see anything I have drawn here because I am an awful drawer. I will however do my best to make sure this comic includes wit, sarcasm, bad language, humor, random bits of helpful (and not so helpful) information. If I can't manage that? Ah well...fuck you you judgmental jerk.") diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index d2f747f07..3189d13c9 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -137,14 +137,6 @@ class Exiern(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' -class ExiernDarkReflections(_BasicScraper): - url = 'http://darkreflections.exiern.com/' - stripUrl = url + 'index.php?strip_id=%s' - imageSearch = compile(r'"(istrip.+?)"') - prevSearch = compile(r'First.+?(/index.+?)".+?prev') - help = 'Index format: n' - - class ExploitationNow(_BasicScraper): url = 'http://www.exploitationnow.com/' stripUrl = url + '%s' diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 76a790160..ad36a275a 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -131,9 +131,9 @@ class Gunshow(_BasicScraper): class GUComics(_BasicScraper): - url = 'http://www.gucomics.com/comic/' - stripUrl = url + '?cdate=%s' + url = 'http://www.gucomics.com/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/comic/\?cdate=\d+)') + + prevSearch = compile(tagre("a", "href", r'(/\d+)') + tagre("img", "src", r'/images/nav/prev\.png')) help = 'Index format: yyyymmdd' diff --git a/dosagelib/plugins/gocomics.py b/dosagelib/plugins/gocomics.py index 480be978a..bae853882 100644 --- a/dosagelib/plugins/gocomics.py +++ b/dosagelib/plugins/gocomics.py @@ -4,7 +4,7 @@ from re import compile from ..scraper import make_scraper -from ..util import tagre, quote +from ..util import tagre from ..helpers import bounceStarter _imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')) @@ -25,7 +25,7 @@ def add(name, shortname): url = url, starter = bounceStarter(url, _nextSearch), name='GoComics/' + name, - stripUrl=baseUrl + quote(shortname) + '/%s', + stripUrl=baseUrl + shortname + '/%s', imageSearch = _imageSearch, prevSearch = _prevSearch, help='Index format: yyyy/mm/dd', @@ -194,7 +194,6 @@ add('DudeandDude', '/dudedude') add('DumbQuestionBadAnswer', '/dumb-question-bad-answer') add('DustSpecks', '/dust-specks') add('EGGMEN', '/eggmen') -add('EclecticCartoons', '/eclectic-cartoons') add('Eddie', '/eddie') add('Eek', '/eek') add('EmmyLou', '/emmy-lou') @@ -601,7 +600,6 @@ add('ViewsLatinAmerica', '/viewslatinamerica') add('ViewsMidEast', '/viewsmideast') add('ViewsoftheWorld', '/viewsoftheworld') add('ViiviAndWagner', '/viivi-and-wagner') -add('VoicesInTheDark', '/voices-in-the-dark') add('WTDuck', '/wtduck') add('WaltHandelsman', '/walthandelsman') add('WatchYourHead', '/watchyourhead') diff --git a/dosagelib/plugins/k.py b/dosagelib/plugins/k.py index e235e46e3..7f4b4d8c3 100644 --- a/dosagelib/plugins/k.py +++ b/dosagelib/plugins/k.py @@ -25,15 +25,6 @@ class Key(_BasicScraper): help = 'Index format: nnn' -class KhaosKomix(_BasicScraper): - adult = True - url = 'http://www.khaoskomix.com/' - stripUrl = url + 'komix/%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.khaoskomix\.com/komiximg/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.khaoskomix\.com/komix/[^"]+)', after="Prev")) - help = 'Index format: stripname' - - class KillerKomics(_BasicScraper): url = 'http://www.killerkomics.com/web-comics/index_ang.cfm' stripUrl = 'http://www.killerkomics.com/web-comics/%s.cfm' diff --git a/dosagelib/plugins/y.py b/dosagelib/plugins/y.py index 85d1a9432..2ddebf4b2 100644 --- a/dosagelib/plugins/y.py +++ b/dosagelib/plugins/y.py @@ -14,11 +14,3 @@ class YAFGC(_BasicScraper): prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') + tagre("img", "src", r'/img/navbar/go_to_previous\.gif')) help = 'Index format: n' - - -class YouSayItFirst(_BasicScraper): - url = 'http://www.yousayitfirst.com/' - stripUrl = url + 'comics/index.php?date=%s' - imageSearch = compile(tagre("img", "src", r"(http://www\.yousayitfirst\.com/comics/[^>']+)", quote="'?")) - prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)', quote="'") + "Previous") - help = 'Index format: yyyymmdd' diff --git a/scripts/comicfury.py b/scripts/comicfury.py index 0103b6ace..deb4f83f7 100755 --- a/scripts/comicfury.py +++ b/scripts/comicfury.py @@ -26,6 +26,7 @@ exclude_comics = [ "6tsc", # unsuitable navigation "Archininja", # unsuitable navigation "BoozerandStoner", # unsuitable navigation + "Fuzzylittleninjas", # unsuitable navigation "Kaze", # unsuitable navigation "Sweetcheeriosandorangejuice", # unsuitable navigation "Coolstorybro", # unsuitable navigation @@ -173,6 +174,7 @@ exclude_comics = [ "Slightlyeccentric", # unsuitable navigation "Smbhax", # unsuitable navigation "SpiritSquire1", # unsuitable navigation + "Stardustthecat", # unsuitable navigation "Sticklife", # unsuitable navigation "StickMisadventures", # unsuitable navigation "StrangerThanFiction", # unsuitable navigation @@ -191,11 +193,14 @@ exclude_comics = [ "Townburgcity", # unsuitable navigation "Tuhinaloota", # unsuitable navigation "UFPA", # unsuitable navigation + "V4", # unsuitable navigation + "Verboten", # unsuitable navigation "Warg", # unsuitable navigation "Warrior27", # unsuitable navigation "Wastedpotential", # unsuitable navigation "Wcf", # unsuitable navigation "Whoseline", # unsuitable navigation + "WindRiders", # unsuitable navigation "WitchesTeaParty", # unsuitable navigation "Woohooligan", # unsuitable navigation "XWingAlliance", # unsuitable navigation diff --git a/scripts/gocomics.py b/scripts/gocomics.py index ab794037c..ab89d12c1 100755 --- a/scripts/gocomics.py +++ b/scripts/gocomics.py @@ -20,7 +20,6 @@ url_matcher = re.compile(tagre("a", "href", r'(/[^"]+)', after="alpha_list") + r # names of comics to exclude exclude_comics = [ - "FrikkFrakkAndFrank", # too few comics "Apocalypseharry", # too few comics "BatkidandBatrat", # too few comics "BETWEENTHELINES", # comic unavailable @@ -31,7 +30,9 @@ exclude_comics = [ "DellAndSteve", # too few comics "Dilbert", # redirect "DutchnPals", # too few comics + "EclecticCartoons", # missing images "FlexandTone", # too few comics + "FrikkFrakkAndFrank", # too few comics "InkeeDoodles", # comic unavailable "MaggiesComics", # too few comics "OfMiceandMud", # too few comics @@ -43,6 +44,7 @@ exclude_comics = [ "Slowpoke", # comic moved "SparComics", # comic unavailable "SurvivingSingle", # comic unavailable + "VoicesInTheDark", # too few comics "WhatTheFrak", # too few comics "ZeekyZebraandCompany", # too few comics ]