Fix some comics.

This commit is contained in:
Bastian Kleineidam 2013-03-07 23:08:17 +01:00
parent 0215ae82af
commit 1cc7d39047
9 changed files with 23 additions and 41 deletions

View file

@ -38,11 +38,17 @@ class CaseyAndAndy(_BasicScraper):
class CaribbeanBlue(_BasicScraper):
url = 'http://cblue.katbox.net/'
stripUrl = url + 'comic/%s/'
#http://cblue.katbox.net/wp-content/uploads/cb270en.png?6949c1
imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/uploads/cb[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/uploads/sites/\d+/\d+/\d+/cb[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://cblue\.katbox\.net/comic/[^"]+)', after="previous"))
help = 'Index format: nnn-stripname'
def shouldSkipUrl(self, url):
"""Skip pages without images."""
return url in (
"http://cblue.katbox.net/comic/filler-stall-them/",
"http://cblue.katbox.net/comic/filler-kimi-figurine-now-available/",
)
class Catalyst(_BasicScraper):
baseUrl = "http://catalyst.spiderforest.com/"
@ -301,7 +307,8 @@ class CompanyY(_BasicScraper):
class CorydonCafe(_BasicScraper):
url = 'http://corydoncafe.com/'
starter = bounceStarter(url, compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="next", quote="'")))
starter = indirectStarter(url,
compile(tagre("a", "href", r'(\./\d+/[^"]+)')))
stripUrl = url + '%s.php'
imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'"))

View file

@ -147,7 +147,6 @@ add('Frontier2170', 'http://frontier2170.thecomicseries.com/', u"Life as an inde
add('Fullmetalbrothers', 'http://fullmetalbrothers.thecomicseries.com/', u'')
#add('Fusion', 'http://fusion.thecomicseries.com/', u"What's inside the pages of this little book isn't so much a comic as it is a viewport into another dimension where you can follow Savunn's hectic life as a Cambodian teen (and superhero fan-gal) as she tries her best to make her mark as her world's newest superhero, Fusion! As reader, you will serve as her confidant as you watch the reality show of her life unfold with each turn of the page.")
add('Fuzzballandscuzzball', 'http://fuzzballandscuzzball.thecomicseries.com/', u'Fuzzball and Scuzzball are brothers who live in the gritty urban hell that is Happy City, New Jersey. They founded their own detective agency, and seldom have clients. These are their antics.')
add('Fuzzylittleninjas', 'http://fuzzylittleninjas.thecomicseries.com/', u'Welcome everyone, Fuzzy Lil Ninjas Proudly Presents Shades of Gray. Updates to the Web comic on every Sunday. Please Click on the Latest Tab to see the most Recent Comic, Enjoy.')
add('Galbertofbruges', 'http://galbertofbruges.thecomicseries.com/', u'A comic based on the book "The Murder of Charles the Good, Count of Flanders"')
add('GalleryOfFreaks', 'http://GalleryOfFreaks.thecomicseries.com/', u'')
add('Glomshire', 'http://Glomshire.thecomicseries.com/', u"The misadventures of the inhabitants of Glomshire, a medieval(ish)land of undetermined origin. LEGO\xae is a trademark of the LEGO Group, which does not sponsor, authorize or endorse this comic (and we don't blame them!)")
@ -285,7 +284,6 @@ add('Spades', 'http://Spades.thecomicseries.com/', u"DISCONTINUED Spades is crea
add('Spf1337', 'http://spf1337.thecomicseries.com/', u'A webcomic about the little things in life that let you get through the hard times. With groan-inducing puns and hilarious antics, SPF is here to entertain you. Or is it the other way around...?')
add('Spooncomic', 'http://spooncomic.thecomicseries.com/', u'A evil wizard turned Charles\' whole town turned into a kitchen (that\'s right). Cursed with the body of a SPOON, he has embarked on a journey to find the wizard and along the way, he will meet other "cursed" ones.')
add('Sscomic', 'http://ss-comic.thecomicseries.com/', u'')
add('Stardustthecat', 'http://stardustthecat.thecomicseries.com/', u'Stardust the Cat is a comic about a cat and a mouse who watch TV and occasionally have adventures.')
add('Starraccoon', 'http://starraccoon.thecomicseries.com/', u"it's bugs bunny meets spiderman an alien raccoon escapes to earth after a evil warlord destroys his planet now he's on run from the goverment the men in black alien bounty hunters and the warlord himself")
#add('Stickfodder', 'http://stickfodder.thecomicseries.com/', u"Stick figures at their finest and not so finest. Jokes, violence and harsh language? HELL YEAH! It's just fun for the entire family.")
add('StrangeAttractors', 'http://StrangeAttractors.thecomicseries.com/', u'Strange Attractors is a humorous post-modern retro-science fiction comic. Sophie, the curator of the vast Museum of Lost Things, becomes embroiled in an epoch spanning war between magic and science, in which the comics she loves are actually coded histories, and she\u2019s become a magnet for all sorts of cosmic weirdness.')
@ -332,14 +330,11 @@ add('Tussenkatersenspraakwater', 'http://Tussenkatersenspraakwater.thecomicserie
add('Unfortunatecircumstances', 'http://unfortunatecircumstances.thecomicseries.com/', u"This is my webcomic. A stream of consciousness piece of dribble that doesn't have any kind of cohesive form or topic. It's just me playing around. Have fun!")
add('Unreliable', 'http://unreliable.thecomicseries.com/', u'Non-sequitur autobiographical comic-type things that will hopefully help me with my discipline.')
add('Usbcomic', 'http://usbcomic.thecomicseries.com/', u'Warning: Not a pokemon sprite comic!!! Set in the year 2100 where the latest trend is artefially intelligent USB memory sticks. These can be used to help around the house, solve problems or even face off against each-other in (usually friendly) challenges. Our story follows the adventure of 16 year old Josh young; a teenager from edge-tech city, in the southwest of the UK (built in 2050, about 10 miles from Bristol).')
add('V4', 'http://v4.thecomicseries.com/', u'Enemies are coming to wipe out anyone and everyone in their path. Leaders of various territories are banding together to try and survive the coming battles. Underworld creatures and sorcerers battle for power near the end of the world.')
add('Verboten', 'http://verboten.thecomicseries.com/', u'The universe has always been an enigmatic place to human kind. It is said that eons ago, the only thing that existed was a dark, foreboding void. In time, however the Gods created the stars, the planets, and above all, life. This was their masterpiece work. Each God contributed a portion of their power to the creation of human kind....but....the Gods were betrayed by one of their own; The God of Time. After cursing humanity, and nearly driving the rest of the Gods to extinction, he was eventually sealed, and his powerful weapon was banished to the furthest reaches of the universe. But...there are other fearsome creatures that reside in the darkness. Evil things. Living nightmares that seep into this world from where our dimension meets another...')
add('Violentblue', 'http://violentblue.thecomicseries.com/', u'A retrogamer, A comic fan, a punk Grrrl and a mean squirrel deal with life, pop culture, God and each other.')
add('VisualDiary', 'http://VisualDiary.thecomicseries.com/', u'Two sides to an aspiring artist. Multiple personality disorder? Perhaps. Laughs? Definitely. "The Early Years" part of the comic that updates on Wednesday will more story based than the gags on the normal updates. Updates: Mon, Tues, Thurs, Fri - regular randomness Wednesday - "Early Years" Saturday - Art day')
add('Wakethesleepers', 'http://wakethesleepers.com/', u'An ancient, deadly curse has inflicted Locke Rinannis. He now must race against time as he struggles to find a cure before his life is consumed by it. Along the way, he makes allies, confronts powerful enemies, and unearths the mystery of a lore long forgotten... (Updates every Friday)')
add('Wayofthemetagamer', 'http://wayofthemetagamer.thecomicseries.com/', u"Bob buys the Dungeons and Dragons book off eBay. Hilarity ensues. Plot follows. Also tropes. Lots and lots of tropes. Also no fourth wall. Actually, it's mostly about the lack of fourth wall. We probably should have mentioned that bit right at the top. Actually, yeah, move that to the top- you're writing this LIVE?")
add('Whenfoxesfly', 'http://whenfoxesfly.thecomicseries.com/', u'Starting back in 2005 (with a different host), "When Foxes Fly" is an anthro spoof of superheroes and villains. One could classify it as a Christian comic, but others might say it would more accurately be described as a webcomic presented from a Christian perspective. In any event, it follows the adventures of Filbert (a flying fox) and Rusty as they discover that childlike Faith in Eternal Truth is more powerful than any superhero or villain.')
add('WindRiders', 'http://WindRiders.thecomicseries.com/', u'The world has changed. A new military project to fight terrorism; troups of warriors able to fly due to a revolutionary technolgy, raised since childhood to become fighters, killers. A dangerous technology. This world is not ours anymore...')
add('Winstonsworld', 'http://winstonsworld.thecomicseries.com/', u'Updated daily, M-F Winston is a curious little boy with a big imagination. Come join him as he learns about sexism, racism, deism, communism, materialism, and joy!')
add('Woodsofevil', 'http://woodsofevil.thecomicseries.com/', u'Is back.')
add('Wordstoliveby', 'http://wordstoliveby.thecomicseries.com/', u"You will most likely never see anything I have drawn here because I am an awful drawer. I will however do my best to make sure this comic includes wit, sarcasm, bad language, humor, random bits of helpful (and not so helpful) information. If I can't manage that? Ah well...fuck you you judgmental jerk.")

View file

@ -137,14 +137,6 @@ class Exiern(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripname'
class ExiernDarkReflections(_BasicScraper):
url = 'http://darkreflections.exiern.com/'
stripUrl = url + 'index.php?strip_id=%s'
imageSearch = compile(r'"(istrip.+?)"')
prevSearch = compile(r'First.+?(/index.+?)".+?prev')
help = 'Index format: n'
class ExploitationNow(_BasicScraper):
url = 'http://www.exploitationnow.com/'
stripUrl = url + '%s'

View file

@ -131,9 +131,9 @@ class Gunshow(_BasicScraper):
class GUComics(_BasicScraper):
url = 'http://www.gucomics.com/comic/'
stripUrl = url + '?cdate=%s'
url = 'http://www.gucomics.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/comic/\?cdate=\d+)') +
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
tagre("img", "src", r'/images/nav/prev\.png'))
help = 'Index format: yyyymmdd'

View file

@ -4,7 +4,7 @@
from re import compile
from ..scraper import make_scraper
from ..util import tagre, quote
from ..util import tagre
from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)'))
@ -25,7 +25,7 @@ def add(name, shortname):
url = url,
starter = bounceStarter(url, _nextSearch),
name='GoComics/' + name,
stripUrl=baseUrl + quote(shortname) + '/%s',
stripUrl=baseUrl + shortname + '/%s',
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help='Index format: yyyy/mm/dd',
@ -194,7 +194,6 @@ add('DudeandDude', '/dudedude')
add('DumbQuestionBadAnswer', '/dumb-question-bad-answer')
add('DustSpecks', '/dust-specks')
add('EGGMEN', '/eggmen')
add('EclecticCartoons', '/eclectic-cartoons')
add('Eddie', '/eddie')
add('Eek', '/eek')
add('EmmyLou', '/emmy-lou')
@ -601,7 +600,6 @@ add('ViewsLatinAmerica', '/viewslatinamerica')
add('ViewsMidEast', '/viewsmideast')
add('ViewsoftheWorld', '/viewsoftheworld')
add('ViiviAndWagner', '/viivi-and-wagner')
add('VoicesInTheDark', '/voices-in-the-dark')
add('WTDuck', '/wtduck')
add('WaltHandelsman', '/walthandelsman')
add('WatchYourHead', '/watchyourhead')

View file

@ -25,15 +25,6 @@ class Key(_BasicScraper):
help = 'Index format: nnn'
class KhaosKomix(_BasicScraper):
adult = True
url = 'http://www.khaoskomix.com/'
stripUrl = url + 'komix/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.khaoskomix\.com/komiximg/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.khaoskomix\.com/komix/[^"]+)', after="Prev"))
help = 'Index format: stripname'
class KillerKomics(_BasicScraper):
url = 'http://www.killerkomics.com/web-comics/index_ang.cfm'
stripUrl = 'http://www.killerkomics.com/web-comics/%s.cfm'

View file

@ -14,11 +14,3 @@ class YAFGC(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') +
tagre("img", "src", r'/img/navbar/go_to_previous\.gif'))
help = 'Index format: n'
class YouSayItFirst(_BasicScraper):
url = 'http://www.yousayitfirst.com/'
stripUrl = url + 'comics/index.php?date=%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.yousayitfirst\.com/comics/[^>']+)", quote="'?"))
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)', quote="'") + "Previous")
help = 'Index format: yyyymmdd'

View file

@ -26,6 +26,7 @@ exclude_comics = [
"6tsc", # unsuitable navigation
"Archininja", # unsuitable navigation
"BoozerandStoner", # unsuitable navigation
"Fuzzylittleninjas", # unsuitable navigation
"Kaze", # unsuitable navigation
"Sweetcheeriosandorangejuice", # unsuitable navigation
"Coolstorybro", # unsuitable navigation
@ -173,6 +174,7 @@ exclude_comics = [
"Slightlyeccentric", # unsuitable navigation
"Smbhax", # unsuitable navigation
"SpiritSquire1", # unsuitable navigation
"Stardustthecat", # unsuitable navigation
"Sticklife", # unsuitable navigation
"StickMisadventures", # unsuitable navigation
"StrangerThanFiction", # unsuitable navigation
@ -191,11 +193,14 @@ exclude_comics = [
"Townburgcity", # unsuitable navigation
"Tuhinaloota", # unsuitable navigation
"UFPA", # unsuitable navigation
"V4", # unsuitable navigation
"Verboten", # unsuitable navigation
"Warg", # unsuitable navigation
"Warrior27", # unsuitable navigation
"Wastedpotential", # unsuitable navigation
"Wcf", # unsuitable navigation
"Whoseline", # unsuitable navigation
"WindRiders", # unsuitable navigation
"WitchesTeaParty", # unsuitable navigation
"Woohooligan", # unsuitable navigation
"XWingAlliance", # unsuitable navigation

View file

@ -20,7 +20,6 @@ url_matcher = re.compile(tagre("a", "href", r'(/[^"]+)', after="alpha_list") + r
# names of comics to exclude
exclude_comics = [
"FrikkFrakkAndFrank", # too few comics
"Apocalypseharry", # too few comics
"BatkidandBatrat", # too few comics
"BETWEENTHELINES", # comic unavailable
@ -31,7 +30,9 @@ exclude_comics = [
"DellAndSteve", # too few comics
"Dilbert", # redirect
"DutchnPals", # too few comics
"EclecticCartoons", # missing images
"FlexandTone", # too few comics
"FrikkFrakkAndFrank", # too few comics
"InkeeDoodles", # comic unavailable
"MaggiesComics", # too few comics
"OfMiceandMud", # too few comics
@ -43,6 +44,7 @@ exclude_comics = [
"Slowpoke", # comic moved
"SparComics", # comic unavailable
"SurvivingSingle", # comic unavailable
"VoicesInTheDark", # too few comics
"WhatTheFrak", # too few comics
"ZeekyZebraandCompany", # too few comics
]