Add some descriptions.

This commit is contained in:
Bastian Kleineidam 2013-04-14 09:02:14 +02:00
parent 4850626fc2
commit e37a80fdc1
27 changed files with 291 additions and 0 deletions

View file

@ -1,5 +1,9 @@
Dosage 2.1 (released xx.4.2013)
Features:
- comics: Add description from webpages. Descriptions are displayed
with dosage -m.
Fixes:
- cmdline: Fix output encoding errors on comic listing.
Closes: GH bug #24

View file

@ -9,6 +9,7 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter
class AbleAndBaker(_BasicScraper):
description = u"Able and Baker: Hatin' and Dictatin'"
url = 'http://www.jimburgessdesign.com/comics/index.php'
stripUrl = url + '?comic=%s'
firstStripUrl = stripUrl % '1'
@ -96,6 +97,7 @@ class AGirlAndHerFed(_BasicScraper):
class AhoiPolloi(_BasicScraper):
description = u'ahoi polloi - ein f\xfcllhorn voller f\xfchlh\xf6rner'
url = 'http://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s'
firstStripUrl = stripUrl % '20060306'
@ -120,6 +122,7 @@ class AirForceBlues(_BasicScraper):
class ALessonIsLearned(_BasicScraper):
description = u'A Lesson Is Learned But The Damage Is Irreversible'
url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch)
@ -130,6 +133,7 @@ class ALessonIsLearned(_BasicScraper):
class Alice(_BasicScraper):
description = u'The little webcomic with the BIG imagination'
url = 'http://alice.alicecomics.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -139,6 +143,7 @@ class Alice(_BasicScraper):
class AlienLovesPredator(_BasicScraper):
description = u'Abe (the Alien) and Preston (the Predator) represent in NYC'
url = 'http://alienlovespredator.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
@ -148,6 +153,7 @@ class AlienLovesPredator(_BasicScraper):
class AlienShores(_BasicScraper):
description = u'A webcomic about four guys forming a band. They find that being a band is more than just playing the music.'
baseUrl = 'http://alienshores.com/'
rurl = escape(baseUrl)
url = baseUrl + 'alienshores_band/'
@ -168,6 +174,7 @@ class ALLCAPS(_BasicScraper):
class AllTheGrowingThings(_BasicScraper):
description = u'All The Growing Things - A Tale of Gardens, monsters, and old ladies'
url = 'http://growingthings.typodmary.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -178,6 +185,7 @@ class AllTheGrowingThings(_BasicScraper):
class AlphaLuna(_BasicScraper):
description = u'Luna, a young girl discovers what lies in her soul: a werewolf beast and a destiny. An adventure manga story for werecreatures fans.'
url = 'http://www.alphaluna.net/'
stripUrl = url + 'issue-%s/'
firstStripUrl = stripUrl % '1/cover'
@ -195,6 +203,7 @@ class AlphaLunaSpanish(AlphaLuna):
class AlsoBagels(_BasicScraper):
description = u'Also, Bagels - A Comic of Inept Redundancy'
url = 'http://alsobagels.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comic/%s/'
@ -231,6 +240,7 @@ class AmazingSuperPowers(_BasicScraper):
class Amya(_BasicScraper):
description = u'A Graphic Novel'
url = 'http://www.amyachronicles.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'
@ -241,6 +251,7 @@ class Amya(_BasicScraper):
class Angband(_BasicScraper):
description = u'Angband - Tales From The Pit'
url = 'http://angband.calamarain.net/'
stripUrl = url + 'view.php?date=%s'
firstStripUrl = stripUrl % '2005-12-30'
@ -297,6 +308,7 @@ class ASofterWorld(_BasicScraper):
class AstronomyPOTD(_BasicScraper):
description = u'A different astronomy and space science related image is featured each day, along with a brief explanation.'
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
url = baseUrl + 'astropix.html'
starter = bounceStarter(url,

View file

@ -18,6 +18,7 @@ class BackwaterPlanet(_BasicScraper):
class BadassMuthas(_BasicScraper):
description = u'Nobody wants to work for a living. Get yourself some super-powers and come ill with us. Full color update every Friday.'
url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s'
firstStripUrl = stripUrl % '1'
@ -36,6 +37,7 @@ class BadMachinery(_BasicScraper):
class Bardsworth(_BasicScraper):
description = u'Bardsworth - Magic, Mischief, and Cookies'
url = 'http://www.bardsworth.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
@ -84,6 +86,7 @@ class BetweenFailures(_BasicScraper):
class BigFatWhale(_BasicScraper):
description = u'A weekly comic strip for those who are not dumb.'
url = 'http://www.bigfatwhale.com/'
stripUrl = url + 'archives/bfw_%s.htm'
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)'))
@ -101,6 +104,7 @@ class BiggerThanCheeses(_BasicScraper):
class BillyTheDunce(_BasicScraper):
description = u"Billy the Dunce: A webcomic about some genius kids, some supernatural creatures, and one dumb kid who's stuck with them. Like Goonies, but with more Lovecraft."
url = 'http://www.duncepress.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -111,6 +115,7 @@ class BillyTheDunce(_BasicScraper):
class BizarreUprising(_BasicScraper):
description = u"Bizarre Uprising - Manga that's not just good, it's good for you!"
url = 'http://www.bizarreuprising.com/'
stripUrl = url + 'view/%s'
firstStripUrl = stripUrl % '1/awakening-splash'
@ -120,6 +125,7 @@ class BizarreUprising(_BasicScraper):
class BlankIt(_BasicScraper):
description = u'An absurd, insane, and delightful webcomic from Aric McKeown and Lem Pew.'
url = 'http://blankitcomics.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '0001'
@ -143,6 +149,7 @@ class Blip(_BasicScraper):
class BloodBound(_BasicScraper):
description = u'Demonic Vampire Hotness'
url = 'http://bloodboundcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -162,6 +169,7 @@ class BlueCrashKit(_BasicScraper):
class BMovieComic(_BasicScraper):
description = u"A group of unlikely heroes tackles monsters, mutants and aliens from Hollywood's past and present. See what happens. Or they'll say you haven't seen it."
url = 'http://www.bmoviecomic.com/'
stripUrl = url + '?cid=%s'
firstStripUrl = stripUrl % '8'
@ -215,6 +223,7 @@ class BoxerHockey(_BasicScraper):
class BoyOnAStickAndSlither(_BasicScraper):
description = u'A comic about killer bees, time travel, ethics and despair.'
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2'
@ -228,6 +237,7 @@ class BoyOnAStickAndSlither(_BasicScraper):
class BratHalla(_BasicScraper):
description = u'Norse mythology webcomic where young Thor, Loki, Balder, Hod and more face off against grade school and make an old man out of their immortal dad Odin'
url = 'http://brat-halla.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '1-balder-dash'
@ -237,6 +247,7 @@ class BratHalla(_BasicScraper):
class BrentalFloss(_BasicScraper):
description = u'brentalfloss the comic :: Off To The Races'
url = 'http://brentalflossthecomic.com/'
stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
@ -286,6 +297,7 @@ class _BringBackRoomies(_BasicScraper):
class Brink(_BasicScraper):
description = u"BRINK - You're not as crazy as you think you are"
url = 'http://paperfangs.com/brink/'
rurl = escape(url)
stripUrl = url + '?p=%s'
@ -296,6 +308,7 @@ class Brink(_BasicScraper):
class BrightlyWound(_BasicScraper):
description = u'A webcomic of physics, astronomy, math, and grammar.'
baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s'
@ -306,6 +319,7 @@ class BrightlyWound(_BasicScraper):
class BroodHollow(_BasicScraper):
description = u'Broodhollow - A MWF cosmic horror adventure comic by Kris Straub'
url = 'http://broodhollow.chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/'

View file

@ -41,6 +41,7 @@ class CaseyAndAndy(_BasicScraper):
class CasuallyKayla(_BasicScraper):
description = u'Casually Kayla: Keeping it as Casual as possible'
url = 'http://casuallykayla.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '89'
@ -81,6 +82,7 @@ class Catena(_BasicScraper):
class CatsAndCameras(_BasicScraper):
description = u'Just when you thought it was safe to go to the photographer'
url = 'http://catsncameras.com/cnc/'
rurl = escape(url)
stripUrl = url + '?p=%s'
@ -91,6 +93,7 @@ class CatsAndCameras(_BasicScraper):
class ChainsawSuit(_BasicScraper):
description = u'internet humor, fresh-cut'
url = 'http://chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -101,6 +104,7 @@ class ChainsawSuit(_BasicScraper):
class Champ2010(_BasicScraper):
description = u'Champ2010 - an almost daily journal comic from jed collins who is not drinking this year. webcomic'
baseUrl = 'http://jedcollins.com/champ2010/'
rurl = escape(baseUrl)
# the latest URL is hard coded since the comic is discontinued
@ -113,6 +117,7 @@ class Champ2010(_BasicScraper):
class ChannelAte(_BasicScraper):
description = u'Comics and Cartoons by Ryan Hudson'
url = 'http://www.channelate.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -122,6 +127,7 @@ class ChannelAte(_BasicScraper):
class ChasingTheSunset(_BasicScraper):
description = u'Chasing the Sunset | Fantasy Webcomic | Elves, Pixies and a blue dragon with orange stripes.'
url = 'http://www.fantasycomic.com/'
stripUrl = url + 'index.php?p=c%s'
firstStripUrl = stripUrl % '1'
@ -131,6 +137,7 @@ class ChasingTheSunset(_BasicScraper):
class CheckerboardNightmare(_BasicScraper):
description = u'Checkerboard Nightmare by Kristofer Straub - A Webcomics Institution'
url = 'http://www.checkerboardnightmare.com/'
stripUrl = url + 'd/%s.shtml'
firstStripUrl = stripUrl % '20001110'
@ -257,6 +264,7 @@ class CoolCatStudio(_BasicScraper):
class CorydonCafe(_BasicScraper):
description = u'Corydon Cafe humorous online comic archive of abstruse awesomeness created by a starving artist'
url = 'http://corydoncafe.com/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(\./\d+/[^"]+)')))
@ -271,6 +279,7 @@ class CorydonCafe(_BasicScraper):
class CourtingDisaster(_BasicScraper):
description = u'Courting Disaster by Brad Guigar - A Daily Webcomic'
url = 'http://www.courting-disaster.com/'
stripUrl = url + 'archive/%s.html'
firstStripUrl = stripUrl % '20050112'
@ -280,6 +289,7 @@ class CourtingDisaster(_BasicScraper):
class CowboyJedi(_BasicScraper):
description = u'A Long Time Ago In A Webcomic Updated Weekly...'
url = 'http://www.cowboyjedi.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -290,6 +300,7 @@ class CowboyJedi(_BasicScraper):
class CrapIDrewOnMyLunchBreak(_BasicScraper):
description = u'A semi-biographical web comic about the struggles and occasional humour of daily life, pets, friends, and more. Currently completing the missing archive comics with your help.'
url = 'http://crap.jinwicked.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/07/30/jin-and-josh-decide-to-move'
@ -313,6 +324,7 @@ class CtrlAltDelSillies(CtrlAltDel):
class CrimsonDark(_BasicScraper):
description = u'A Sci-Fi webcomic set in space in the distant future.'
url = 'http://www.davidcsimon.com/crimsondark/'
stripUrl = url + 'index.php?view=comic&strip_id=%s'
firstStripUrl = stripUrl % '1'
@ -345,6 +357,7 @@ class CucumberQuest(_BasicScraper):
class Curvy(_BasicScraper):
description = u'An erotic sci-fi adventure comic for adults.'
url = 'http://www.c.urvy.org/'
stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20080329'

View file

@ -71,6 +71,7 @@ class DangerouslyChloe(_BasicScraper):
class DarkWings(_BasicScraper):
description = u"Dark Wings - You Can't Reach Heaven on Broken Wings"
url = 'http://www.flowerlarkstudios.com/dark-wings/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -121,6 +122,7 @@ class DeepFried(_BasicScraper):
class DemolitionSquad(_BasicScraper):
description = u'Demolitionsquad.de ist die erste deutsche Videospiel-Webcomic-Seite nach amerikanischen Vorbild und noch viel mehr als das. Auf Demolitionsquad.de findet der wissbegierige, spielebegeisterte Nutzer Comicstrips zu aktuellen Videospielen die ihm die Wartezeit auf den kommenden Top-Titel weiter ves\xfcssen.'
url = 'http://www.demolitionsquad.de/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(no_cache/comicstrips/einzelansicht/archive/[^"]+)')))
@ -219,6 +221,7 @@ class DominicDeegan(_BasicScraper):
class DorkTower(_BasicScraper):
description = u'The Place for All Things Dork'
url = 'http://www.dorktower.com/'
rurl = escape(url)
stripUrl = url + '%s/'

View file

@ -66,6 +66,7 @@ class ElfOnlyInn(_BasicScraper):
class ElGoonishShive(_BasicScraper):
description = u'Fantasy sci-fi comic about a group of teenagers and the bizarre, strange and supernatural circumstances of their lives.'
name = 'KeenSpot/ElGoonishShive'
url = 'http://www.egscomics.com/'
stripUrl = url + '?date=%s'
@ -84,6 +85,7 @@ class ElGoonishShiveNP(_BasicScraper):
class Ellerbisms(_BasicScraper):
description = u'Ellerbisms - A diary comic by Marc Ellerby'
url = 'http://www.ellerbisms.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
@ -141,6 +143,7 @@ class EvilDiva(_BasicScraper):
class EvilInc(_BasicScraper):
description = u'Evil Inc. by Brad Guigar - Daily Super-Villain Webcomic and Comics Blog'
url = 'http://www.evil-comic.com/'
stripUrl = url + 'archive/%s.html'
firstStripUrl = stripUrl % '20050530'
@ -150,6 +153,7 @@ class EvilInc(_BasicScraper):
class Exiern(_BasicScraper):
description = u'Barbarian Typhan-Knee defeated the wizard...and became Tiffany!'
url = 'http://www.exiern.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -160,6 +164,7 @@ class Exiern(_BasicScraper):
class ExploitationNow(_BasicScraper):
description = u'Exploitation Now - That somewhat naughty webcomic classic by Michael Poe'
url = 'http://www.exploitationnow.com/'
rurl = escape(url)
stripUrl = url + '%s'

View file

@ -69,6 +69,7 @@ class FeyWinds(_BasicScraper):
class FilibusterCartoons(_BasicScraper):
description = u'Political cartoons from Canada!'
url = 'http://www.filibustercartoons.com/'
rurl = escape(url)
stripUrl = url + 'index.php/%s'
@ -99,6 +100,7 @@ class FlakyPastry(_BasicScraper):
class Flemcomics(_BasicScraper):
description = u'FLEM Comics - More fun than a sack of dead kittens.'
url = 'http://www.flemcomics.com/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
@ -118,6 +120,7 @@ class Flipside(_BasicScraper):
class FonFlatter(_BasicScraper):
description = u'Fred, eine stets zuversichtliche, einfallsreiche, zumeist aber alberne und etwas naive Fledermaus, flattert bereits seit 2005 t\xe4glich durch das weltweite Netz'
url = 'http://www.fonflatter.de/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -137,6 +140,7 @@ class FonFlatter(_BasicScraper):
class Footloose(_BasicScraper):
description = u'Footloose, a Webcomic By alice Nuttall and Emily Brady'
url = 'http://footloosecomic.com/footloose/today.php'
stripUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
firstStripUrl = stripUrl % '1'
@ -177,6 +181,7 @@ class FredoAndPidjin(_BasicScraper):
class FullFrontalNerdity(_BasicScraper):
description = u'Full Frontal Nerdity by Aaron Williams - Comics about liking all things gaming, technological, mythological, and internetological.'
url = 'http://ffn.nodwick.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'

View file

@ -10,6 +10,7 @@ from ..util import tagre
class Galaxion(_BasicScraper):
description = u'Galaxion - Life. Love. Hyperspace.'
url = 'http://galaxioncomics.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -20,6 +21,7 @@ class Galaxion(_BasicScraper):
class Garanos(_BasicScraper):
description = u'Garanos - A dramatic fantasy webcomic with a dash of adventure, gothic horror, and romance for flavor.'
baseUrl = 'http://garanos.alexheberling.com/'
rurl = escape(baseUrl)
url = baseUrl + 'pages/page-1/'
@ -42,6 +44,7 @@ class GastroPhobia(_BasicScraper):
class Geeks(_BasicScraper):
description = u'Geeks Trying To Be Funny'
url = 'http://sevenfloorsdown.com/geeks/'
stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '10'
@ -74,6 +77,7 @@ class GirlsWithSlingshots(_BasicScraper):
class GlassHalfEmpty(_BasicScraper):
description = u'A Glass Half Empty cartoon by Dan Markowitz'
url = 'http://www.defectivity.com/ghe/index.php'
stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '0'
@ -83,6 +87,7 @@ class GlassHalfEmpty(_BasicScraper):
class GleefulNihilism(_BasicScraper):
description = u'pointless comics with a sideways grin'
url = 'http://gleefulnihilism.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s/'
@ -127,6 +132,7 @@ class GoneWithTheBlastwave(_BasicScraper):
class GrrlPower(_BasicScraper):
description = u'Grrl Power - A webcomic about superheroines.'
url = 'http://www.grrlpowercomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'

View file

@ -1,3 +1,4 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, escape
@ -48,6 +49,7 @@ class HarkAVagrant(_BasicScraper):
class HijinksEnsue(_BasicScraper):
description = u'HijiNKS ENSUE is a geek pop culture webcomic that makes fun of the latest news in tv, movies, Sci-Fi, technology and the Internet'
url = 'http://hijinksensue.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -58,6 +60,7 @@ class HijinksEnsue(_BasicScraper):
class Hipsters(_BasicScraper):
description = u'a weekly webcomic series by Adrian vom Baur - Hipsters vs. Vampires - Hipsters vs. Dinosaurs - Hipsters vs. Robots'
url = 'http://www.hipsters-comic.com/'
rurl = escape(url)
stripUrl = url + '%s/'

View file

@ -8,6 +8,7 @@ from ..util import tagre
class IAmArg(_BasicScraper):
description = u'An Internet comic of non sequitur Geekiness. Updates Monday, Wednesday and Friday'
url = 'http://iamarg.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -37,6 +38,7 @@ class IDreamOfAJeanieBottle(_BasicScraper):
class InsideOut(_BasicScraper):
description = u'Inside Out Comic - A new kind of Weird'
url = 'http://www.insideoutcomic.com/'
stripUrl = url + 'html/%s.html'
firstStripUrl = stripUrl % '1_snake_suicide'

View file

@ -8,6 +8,7 @@ from ..util import tagre
class JackCannon(_BasicScraper):
description = u'The Fancy Adventures of Jack Cannon \u2013 A Webcomic -'
url = 'http://fancyadventures.com/'
rurl = escape(url)
stripUrl = url + '%s/'

View file

@ -9,6 +9,7 @@ from ..helpers import indirectStarter
class KatzenfutterGeleespritzer(_BasicScraper):
description = u'Webcomic mit Geschichten aus dem Alltag eines geeky Cartoonisten und Comiczeichners'
url = 'http://www.katzenfuttergeleespritzer.de/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
@ -23,6 +24,7 @@ class KatzenfutterGeleespritzer(_BasicScraper):
class KevinAndKell(_BasicScraper):
description = u'Kevin & Kell by Bill Holbrook'
url = 'http://www.kevinandkell.com/'
stripUrl = url + '%s/kk%s%s.html'
firstStripUrl = stripUrl % ('1995', '09', '03')
@ -35,6 +37,7 @@ class KevinAndKell(_BasicScraper):
class Key(_BasicScraper):
description = u'Key - Fantasy Adventure Webcomic'
baseUrl = 'http://key.shadilyn.com/'
url = baseUrl + 'latestpage.html'
stripUrl = baseUrl + 'pages/%s.html'
@ -54,6 +57,7 @@ class KickInTheHead(_BasicScraper):
class KillerKomics(_BasicScraper):
description = u'Web comic - Lorsque tu repasses une chemise, fait attention pour ne pas oublier un endroit... - Web comic drole avec bonhommes allumettes, webcomic avec beaucoup de sang et violence. Pour ceux qui veulent rire \xe0 la violence.'
baseUrl = 'http://www.killerkomics.com/web-comics/'
url = baseUrl + 'index_ang.cfm'
stripUrl = baseUrl + '%s.cfm'
@ -92,6 +96,7 @@ class Kukuburi(_BasicScraper):
class KuroShouri(_BasicScraper):
description = u'a webcomic by Kikirini and DJ-Anarchy'
url = 'http://kuroshouri.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s'

View file

@ -19,6 +19,7 @@ class LasLindas(_BasicScraper):
class LeastICouldDo(_BasicScraper):
description = u'A daily webcomic series about the life of Rayne Summers. Created by Ryan Sohmer and Lar deSouza.'
url = 'http://www.leasticoulddo.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
@ -41,6 +42,7 @@ class Lint(_BasicScraper):
class LittleGamers(_BasicScraper):
description = u'The comic everyone knows, but no one reads'
url = 'http://www.little-gamers.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2000/12/01/99'
@ -50,6 +52,7 @@ class LittleGamers(_BasicScraper):
class LoadingArtist(_BasicScraper):
description = u'A webcomic by Gregor Czaykowski'
url = 'http://www.loadingartist.com/'
rurl = escape(url)
stripUrl = url + '%s/'

View file

@ -35,6 +35,7 @@ class MagickChicks(_BasicScraper):
class ManlyGuysDoingManlyThings(_BasicScraper):
description = u'Manly Guys Doing Manly Things \xbb Updated Mondays or whenever I feel like it'
url = 'http://thepunchlineismachismo.com/'
rurl = escape(url)
stripUrl = url + 'archives/comic/%s'
@ -54,6 +55,7 @@ class Marilith(_BasicScraper):
class MarryMe(_BasicScraper):
description = u'MARRY ME: an online graphic novel by Bobby Crosby and Remy "Eisu" Mokhtar'
url = 'http://marryme.keenspot.com/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
@ -99,6 +101,7 @@ class MenageA3(_BasicScraper):
class Melonpool(_BasicScraper):
description = u"Star Trek Meets Gilligan's Island"
url = 'http://www.melonpool.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'

View file

@ -23,6 +23,7 @@ class Namesake(_BasicScraper):
class NamirDeiter(_BasicScraper):
description = u'Namir Deiter, by Isabel Marks'
url = 'http://www.namirdeiter.com/'
rurl = escape(url)
stripUrl = url + 'comics/index.php?date=%s'
@ -43,6 +44,7 @@ class Nedroid(_BasicScraper):
class NeoEarth(_BasicScraper):
description = u'Neo-Earth - Web comic by Bryan King - Updated Every Monday'
url = 'http://www.neo-earth.com/NE/'
stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '2007-03-23'
@ -121,6 +123,7 @@ class Nnewts(_BasicScraper):
class Nodwick(_BasicScraper):
description = u'Welcome to Nodwick.com - A fantasy farce for the whole party!'
url = 'http://comic.nodwick.com/'
rurl = escape(url)
stripUrl = url + "?p=%s"
@ -130,6 +133,7 @@ class Nodwick(_BasicScraper):
class NobodyScores(_BasicScraper):
description = u'Nobody Scores! A little comic about inevitable disaster'
url = 'http://nobodyscores.loosenutstudio.com/'
rurl = escape(url)
stripUrl = url + 'index.php?id=%s'
@ -151,6 +155,7 @@ class NoNeedForBushido(_BasicScraper):
class Nukees(_BasicScraper):
description = u'Nukees, The Atomic Comic Strip'
url = 'http://www.nukees.com/'
stripUrl = url + 'd/%s'
firstStripUrl = stripUrl % '19970121'

View file

@ -9,6 +9,7 @@ from ..scraper import _BasicScraper
class NineteenNinetySeven(_BasicScraper):
description = u'1977 the Comic - Sex, Drugs and Rock and Roll Just Not in That Order'
name = '1997'
url = 'http://www.1977thecomic.com/'
stripUrl = url + '%s/'

View file

@ -110,6 +110,7 @@ class OurHomePlanet(_BasicScraper):
class OverCompensating(_BasicScraper):
description = u'OVERCOMPENSATING: The Journal Comic With a Seething Disdain for Reality.'
url = 'http://www.overcompensating.com/'
stripUrl = url + 'posts/%s.html'
firstStripUrl = stripUrl % '20040929'

View file

@ -19,6 +19,7 @@ class PandyLand(_BasicScraper):
class ParadigmShift(_BasicScraper):
description = u'A Paranormal Graphic Novel by Dirk I. Tiede'
url = 'http://www.paradigmshiftmanga.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after="next-comic-link")))
stripUrl = url + 'ps/%s.html'
@ -40,6 +41,7 @@ class ParallelUniversum(_BasicScraper):
class PartiallyClips(_BasicScraper):
description = u'PartiallyClips - The true stories behind your favorite clip art.'
url = 'http://partiallyclips.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -97,6 +99,7 @@ class PennyArcade(_BasicScraper):
class PeppermintSaga(_BasicScraper):
description = u'Sexy Fucking Fantasy Adventure Webcomic - NSFW'
url = 'http://www.pepsaga.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
@ -104,6 +107,7 @@ class PeppermintSaga(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'
adult = True
class PHDComics(_BasicScraper):
@ -118,6 +122,7 @@ class PHDComics(_BasicScraper):
class PicPakDog(_BasicScraper):
description = u'A comic by Kim Belding'
url = 'http://www.picpak.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'

View file

@ -50,6 +50,7 @@ class RedMeat(_BasicScraper):
class RedString(_BasicScraper):
description = u'A web comics about love and growing up. Art by Gina Biggs.'
url = 'http://www.redstring.strawberrycomics.com/'
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '434'

View file

@ -42,6 +42,7 @@ class SandraAndWoo(_BasicScraper):
class SandraAndWooGerman(_BasicScraper):
description = u'Sandra und Woo: ein Webcomic \xfcber Freundschaft, das Leben und die Kunst (keine) Eichh\xf6rnchen zu essen; mit dem M\xe4dchen Sandra und ihrem Waschb\xe4ren Woo in den Hauptrollen'
url = 'http://www.sandraandwoo.com/woode/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -142,6 +143,7 @@ class SexyLosers(_BasicScraper):
class ShadowGirls(_BasicScraper):
description = u"It's like H.P. Lovecraft meets the Gilmore Girls!"
url = 'http://www.shadowgirlscomic.com/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % 'book-1/chapter-1-broken-dreams/welcome'
@ -279,6 +281,7 @@ class SnowFlakes(_BasicScraper):
class SnowFlame(_BasicScraper):
description = u'The fan-comic series featuring "The Man Powered by Cocaine"'
url = 'http://www.snowflamecomic.com/'
rurl = escape(url)
stripUrl = url + '?comic=snowflame-%s-%s'
@ -336,6 +339,7 @@ class SpaceTrawler(_BasicScraper):
class Spamusement(_BasicScraper):
description = u'Spamusement! Poorly-drawn cartoons inspired by actual spam subject lines!'
url = 'http://spamusement.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s'
@ -346,6 +350,7 @@ class Spamusement(_BasicScraper):
class SpareParts(_BasicScraper):
description = u'Spare Parts by Terrence and Isabel Marks!'
baseUrl = 'http://www.sparepartscomics.com/'
url = baseUrl + 'comics/?date=20080328'
stripUrl = baseUrl + 'comics/index.php?date=%s'
@ -506,6 +511,7 @@ class _StrangeCandy(_BasicScraper):
class SupernormalStep(_BasicScraper):
description = u'Supernormal Step - Magic, Face Punching, and a Robot or Two'
url = 'http://supernormalstep.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'

View file

@ -9,6 +9,7 @@ from ..util import tagre
class TheBrads(_BasicScraper):
description = u'ArchiveFirst World Problems Comic - By Brad Colbow'
url = 'http://bradcolbow.com/archive/C4/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'P125'
@ -19,6 +20,7 @@ class TheBrads(_BasicScraper):
class TheDevilsPanties(_BasicScraper):
description = u"It's not Satanic Porn"
url = 'http://thedevilspanties.com/'
stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '300'
@ -128,6 +130,7 @@ class _TwoLumps(_BasicScraper):
class TwoTwoOneFour(_BasicScraper):
description = u'Artwork, comics, graphic novels, music, articles, and various silliness by Troy McQuinn'
url = 'http://www.nitrocosm.com/go/2214_classic/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -138,6 +141,7 @@ class TwoTwoOneFour(_BasicScraper):
class TheWhiteboard(_BasicScraper):
description = u'The Whiteboard, a somewhat paintball-related webcomic by "Doc" Nickel'
url = 'http://www.the-whiteboard.com/'
stripUrl = url + 'auto%s.html'
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
@ -146,6 +150,7 @@ class TheWhiteboard(_BasicScraper):
class HMHigh(_BasicScraper):
description = u'Welcome to the website of professional artist and illustrator, Angel Smith and of Fallen Angel Media Ltd; An independent art, design & publishing house and event management company based in Bristol, England'
name = 'TheFallenAngel/HMHigh'
baseUrl = 'http://www.thefallenangel.co.uk/'
url = baseUrl + 'hmhigh/'

View file

@ -20,6 +20,7 @@ class Undertow(_BasicScraper):
class UnicornJelly(_BasicScraper):
description = u'UNICORN JELLY anime manga comic strip by Jennifer Diane Reitz'
baseUrl = 'http://unicornjelly.com/'
url = baseUrl + 'uni666.html'
stripUrl = baseUrl + 'uni%s.html'

View file

@ -66,6 +66,7 @@ class WebDesignerCOTW(_BasicScraper):
class WeCanSleepTomorrow(_BasicScraper):
description = u'We Can Sleep Tomorrow - Life does not take bathroom breaks. A webcomic that updates Mondays and Fridays'
url = 'http://wecansleeptomorrow.com/'
rurl = escape(url)
stripUrl = url + '%s/'
@ -106,6 +107,7 @@ class WhyTheLongFace(_BasicScraper):
class Wigu(_BasicScraper):
description = u'WIGU: A COMIC ON INTERNET'
url = 'http://wigucomics.com/'
stripUrl = url + 'adventures/index.php?comic=%s'
firstStripUrl = stripUrl % '1'
@ -168,6 +170,7 @@ class WorldOfMrToast(_BasicScraper):
class WormWorldSaga(_BasicScraper):
description = u'The Wormworld Saga Online Graphic Novel by Daniel Lieske - An Epic Fantasy Adventure for all Ages'
url = 'http://www.wormworldsaga.com/'
stripUrl = url + 'chapters/%s/index.php'
firstStripUrl = stripUrl % 'chapter01/EN'
@ -222,6 +225,7 @@ class _WorldOfWarcraftEh(_BasicScraper):
class Wulffmorgenthaler(_BasicScraper):
description = u"Entertainment - Since 2002. Wulff & Morgenthaler's Personal humoristic social commentary on life, nostalgia and the World in general. Nothing is taboo: They deal with Politics, News, Entertainment, Technology, Culture, and Weirdo Beavers"
url = 'http://wumocomicstrip.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))

View file

@ -8,6 +8,7 @@ from ..util import tagre
class YAFGC(_BasicScraper):
description = u'Yet Another Fantasy Gamer Comic'
url = 'http://yafgc.net/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '1'

File diff suppressed because one or more lines are too long

99
scripts/mkdescription.py Executable file
View file

@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2013 Bastian Kleineidam
from __future__ import print_function
import sys
import os
# for dosage import
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.scraper import get_scraperclasses
from scriptutil import save_result, load_result
from bs4 import BeautifulSoup
import requests
# User-Agent: Iceweasel (Firefox) 15.02 (Debian)
UserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120724 Debian Iceweasel/15.02"
json_file = __file__.replace(".py", ".json")
def get_scraper_url(scraperclass):
"""Get base or starter url."""
if hasattr(scraperclass, 'baseUrl'):
return scraperclass.baseUrl
return scraperclass.url
def classname(clazz):
"""Get name of given class."""
return clazz.__name__
def elem_text(elem, sep=u" "):
"""Get text content of a BeautifulSoup HTML element node."""
return sep.join(elem.stripped_strings)
def get_description(url, lang):
"""Get URL description from meta information."""
headers = {'User-Agent': UserAgent}
try:
req = requests.get(url, headers=headers)
except Exception as msg:
print("Error: %s" % msg)
return None
if req.status_code != requests.codes.ok:
print("WARN: HTTP %d" % req.status_code)
return u""
doc = BeautifulSoup(req.text)
elem = doc.find("meta", dict(property="og:description"))
if elem:
return elem["content"]
for elem in doc.find_all("meta", dict(name="description")):
if "content" in elem:
return elem["content"]
elem = doc.find('title')
if elem:
return elem_text(elem)
def main(args):
"""Get scraper descriptions from google results."""
if os.path.isfile(json_file):
result = load_result(json_file)
else:
result = {}
if args:
tofind = args[0]
else:
tofind = None
for scraperclass in sorted(get_scraperclasses(), key=classname):
key = classname(scraperclass)
if tofind and key != tofind:
continue
tofind = None
if '_' in key:
continue
print(key)
if scraperclass.description:
continue
if key in result:
continue
url = get_scraper_url(scraperclass)
print(url)
lang = scraperclass.lang
description = get_description(url, lang)
if description:
print(description)
# store result
module = scraperclass.__module__
result[key] = dict(description=description, module=module, url=url)
save_result(result, json_file)
else:
print("No description found")
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

82
scripts/update_description.py Executable file
View file

@ -0,0 +1,82 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2013 Bastian Kleineidam
from __future__ import print_function
import sys
import os
import re
import codecs
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from scriptutil import load_result, save_result, format_description
from dosagelib.scraper import get_scraperclasses
json_file = __file__.replace("update_description.py", "mkdescription.json")
def has_description(classname):
"""Check if scraper with given classname already has a description."""
for scraperclass in get_scraperclasses():
if scraperclass.__name__ == classname:
return scraperclass.description
raise ValueError("Unknown scraper class %s" % classname)
EncodingMatch = re.compile(r'# -\*- coding: ([-a-zA-Z0-9]+) -\*-')
def get_encoding(filename):
"""Get an encoding of a .py filename."""
with open(filename, 'r') as f:
for line in f:
mo = EncodingMatch.search(line)
if mo:
return mo.group(1)
break
raise ValueError("No encoding line at %s" % filename)
def answer(classname, info):
"""Ask user if description is accurate."""
description = info['description'].strip()
print()
prompt = u'%s: %s [y/N]? ' % (classname, description)
a = raw_input(prompt.encode('utf-8'))
return a.lower().startswith('y')
def main(args):
"""Get scraper descriptions from google results."""
if os.path.isfile(json_file):
result = load_result(json_file)
else:
result = {}
for classname, info in sorted(result.items()):
if has_description(classname) or '_' in classname:
continue
if info.get('answer') == 'no':
continue
if not answer(classname, info):
info['answer'] = 'no'
save_result(result, json_file)
continue
filename = info['module'].replace('.', os.sep) + ".py"
encoding = get_encoding(filename)
with codecs.open(filename, 'r', encoding) as f:
with codecs.open(filename+"_", 'w', encoding) as out:
write_description(f, out, classname, info)
os.rename(filename+"_", filename)
return 0
def write_description(f, out, classname, info):
"""Add description to class."""
for line in f:
out.write(line)
if line.startswith('class %s(_BasicScraper):' % classname):
description = format_description(info['description'])
out.write(u' description = %r\n' % description)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))