Add some descriptions.
This commit is contained in:
parent
4850626fc2
commit
e37a80fdc1
27 changed files with 291 additions and 0 deletions
|
@ -1,5 +1,9 @@
|
|||
Dosage 2.1 (released xx.4.2013)
|
||||
|
||||
Features:
|
||||
- comics: Add description from webpages. Descriptions are displayed
|
||||
with dosage -m.
|
||||
|
||||
Fixes:
|
||||
- cmdline: Fix output encoding errors on comic listing.
|
||||
Closes: GH bug #24
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter
|
|||
|
||||
|
||||
class AbleAndBaker(_BasicScraper):
|
||||
description = u"Able and Baker: Hatin' and Dictatin'"
|
||||
url = 'http://www.jimburgessdesign.com/comics/index.php'
|
||||
stripUrl = url + '?comic=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -96,6 +97,7 @@ class AGirlAndHerFed(_BasicScraper):
|
|||
|
||||
|
||||
class AhoiPolloi(_BasicScraper):
|
||||
description = u'ahoi polloi - ein f\xfcllhorn voller f\xfchlh\xf6rner'
|
||||
url = 'http://ahoipolloi.blogger.de/'
|
||||
stripUrl = url + '?day=%s'
|
||||
firstStripUrl = stripUrl % '20060306'
|
||||
|
@ -120,6 +122,7 @@ class AirForceBlues(_BasicScraper):
|
|||
|
||||
|
||||
class ALessonIsLearned(_BasicScraper):
|
||||
description = u'A Lesson Is Learned But The Damage Is Irreversible'
|
||||
url = 'http://www.alessonislearned.com/'
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
|
||||
starter = indirectStarter(url, prevSearch)
|
||||
|
@ -130,6 +133,7 @@ class ALessonIsLearned(_BasicScraper):
|
|||
|
||||
|
||||
class Alice(_BasicScraper):
|
||||
description = u'The little webcomic with the BIG imagination'
|
||||
url = 'http://alice.alicecomics.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -139,6 +143,7 @@ class Alice(_BasicScraper):
|
|||
|
||||
|
||||
class AlienLovesPredator(_BasicScraper):
|
||||
description = u'Abe (the Alien) and Preston (the Predator) represent in NYC'
|
||||
url = 'http://alienlovespredator.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
|
||||
|
@ -148,6 +153,7 @@ class AlienLovesPredator(_BasicScraper):
|
|||
|
||||
|
||||
class AlienShores(_BasicScraper):
|
||||
description = u'A webcomic about four guys forming a band. They find that being a band is more than just playing the music.'
|
||||
baseUrl = 'http://alienshores.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'alienshores_band/'
|
||||
|
@ -168,6 +174,7 @@ class ALLCAPS(_BasicScraper):
|
|||
|
||||
|
||||
class AllTheGrowingThings(_BasicScraper):
|
||||
description = u'All The Growing Things - A Tale of Gardens, monsters, and old ladies'
|
||||
url = 'http://growingthings.typodmary.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -178,6 +185,7 @@ class AllTheGrowingThings(_BasicScraper):
|
|||
|
||||
|
||||
class AlphaLuna(_BasicScraper):
|
||||
description = u'Luna, a young girl discovers what lies in her soul: a werewolf beast and a destiny. An adventure manga story for werecreatures fans.'
|
||||
url = 'http://www.alphaluna.net/'
|
||||
stripUrl = url + 'issue-%s/'
|
||||
firstStripUrl = stripUrl % '1/cover'
|
||||
|
@ -195,6 +203,7 @@ class AlphaLunaSpanish(AlphaLuna):
|
|||
|
||||
|
||||
class AlsoBagels(_BasicScraper):
|
||||
description = u'Also, Bagels - A Comic of Inept Redundancy'
|
||||
url = 'http://alsobagels.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php/comic/%s/'
|
||||
|
@ -231,6 +240,7 @@ class AmazingSuperPowers(_BasicScraper):
|
|||
|
||||
|
||||
class Amya(_BasicScraper):
|
||||
description = u'A Graphic Novel'
|
||||
url = 'http://www.amyachronicles.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'archives/%s'
|
||||
|
@ -241,6 +251,7 @@ class Amya(_BasicScraper):
|
|||
|
||||
|
||||
class Angband(_BasicScraper):
|
||||
description = u'Angband - Tales From The Pit'
|
||||
url = 'http://angband.calamarain.net/'
|
||||
stripUrl = url + 'view.php?date=%s'
|
||||
firstStripUrl = stripUrl % '2005-12-30'
|
||||
|
@ -297,6 +308,7 @@ class ASofterWorld(_BasicScraper):
|
|||
|
||||
|
||||
class AstronomyPOTD(_BasicScraper):
|
||||
description = u'A different astronomy and space science related image is featured each day, along with a brief explanation.'
|
||||
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
|
||||
url = baseUrl + 'astropix.html'
|
||||
starter = bounceStarter(url,
|
||||
|
|
|
@ -18,6 +18,7 @@ class BackwaterPlanet(_BasicScraper):
|
|||
|
||||
|
||||
class BadassMuthas(_BasicScraper):
|
||||
description = u'Nobody wants to work for a living. Get yourself some super-powers and come ill with us. Full color update every Friday.'
|
||||
url = 'http://badassmuthas.com/pages/comic.php'
|
||||
stripUrl = url + '?%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -36,6 +37,7 @@ class BadMachinery(_BasicScraper):
|
|||
|
||||
|
||||
class Bardsworth(_BasicScraper):
|
||||
description = u'Bardsworth - Magic, Mischief, and Cookies'
|
||||
url = 'http://www.bardsworth.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -84,6 +86,7 @@ class BetweenFailures(_BasicScraper):
|
|||
|
||||
|
||||
class BigFatWhale(_BasicScraper):
|
||||
description = u'A weekly comic strip for those who are not dumb.'
|
||||
url = 'http://www.bigfatwhale.com/'
|
||||
stripUrl = url + 'archives/bfw_%s.htm'
|
||||
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)'))
|
||||
|
@ -101,6 +104,7 @@ class BiggerThanCheeses(_BasicScraper):
|
|||
|
||||
|
||||
class BillyTheDunce(_BasicScraper):
|
||||
description = u"Billy the Dunce: A webcomic about some genius kids, some supernatural creatures, and one dumb kid who's stuck with them. Like Goonies, but with more Lovecraft."
|
||||
url = 'http://www.duncepress.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -111,6 +115,7 @@ class BillyTheDunce(_BasicScraper):
|
|||
|
||||
|
||||
class BizarreUprising(_BasicScraper):
|
||||
description = u"Bizarre Uprising - Manga that's not just good, it's good for you!"
|
||||
url = 'http://www.bizarreuprising.com/'
|
||||
stripUrl = url + 'view/%s'
|
||||
firstStripUrl = stripUrl % '1/awakening-splash'
|
||||
|
@ -120,6 +125,7 @@ class BizarreUprising(_BasicScraper):
|
|||
|
||||
|
||||
class BlankIt(_BasicScraper):
|
||||
description = u'An absurd, insane, and delightful webcomic from Aric McKeown and Lem Pew.'
|
||||
url = 'http://blankitcomics.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '0001'
|
||||
|
@ -143,6 +149,7 @@ class Blip(_BasicScraper):
|
|||
|
||||
|
||||
class BloodBound(_BasicScraper):
|
||||
description = u'Demonic Vampire Hotness'
|
||||
url = 'http://bloodboundcomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -162,6 +169,7 @@ class BlueCrashKit(_BasicScraper):
|
|||
|
||||
|
||||
class BMovieComic(_BasicScraper):
|
||||
description = u"A group of unlikely heroes tackles monsters, mutants and aliens from Hollywood's past and present. See what happens. Or they'll say you haven't seen it."
|
||||
url = 'http://www.bmoviecomic.com/'
|
||||
stripUrl = url + '?cid=%s'
|
||||
firstStripUrl = stripUrl % '8'
|
||||
|
@ -215,6 +223,7 @@ class BoxerHockey(_BasicScraper):
|
|||
|
||||
|
||||
class BoyOnAStickAndSlither(_BasicScraper):
|
||||
description = u'A comic about killer bees, time travel, ethics and despair.'
|
||||
url = 'http://www.boasas.com/'
|
||||
stripUrl = url + 'page/%s'
|
||||
firstStripUrl = stripUrl % '2'
|
||||
|
@ -228,6 +237,7 @@ class BoyOnAStickAndSlither(_BasicScraper):
|
|||
|
||||
|
||||
class BratHalla(_BasicScraper):
|
||||
description = u'Norse mythology webcomic where young Thor, Loki, Balder, Hod and more face off against grade school and make an old man out of their immortal dad Odin'
|
||||
url = 'http://brat-halla.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % '1-balder-dash'
|
||||
|
@ -237,6 +247,7 @@ class BratHalla(_BasicScraper):
|
|||
|
||||
|
||||
class BrentalFloss(_BasicScraper):
|
||||
description = u'brentalfloss the comic :: Off To The Races'
|
||||
url = 'http://brentalflossthecomic.com/'
|
||||
stripUrl = url + '?id=%s'
|
||||
fristStripUrl = stripUrl % '1'
|
||||
|
@ -286,6 +297,7 @@ class _BringBackRoomies(_BasicScraper):
|
|||
|
||||
|
||||
class Brink(_BasicScraper):
|
||||
description = u"BRINK - You're not as crazy as you think you are"
|
||||
url = 'http://paperfangs.com/brink/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -296,6 +308,7 @@ class Brink(_BasicScraper):
|
|||
|
||||
|
||||
class BrightlyWound(_BasicScraper):
|
||||
description = u'A webcomic of physics, astronomy, math, and grammar.'
|
||||
baseUrl = 'http://www.brightlywound.com/'
|
||||
url = baseUrl + '?comic=137'
|
||||
stripUrl = baseUrl + '?comic=%s'
|
||||
|
@ -306,6 +319,7 @@ class BrightlyWound(_BasicScraper):
|
|||
|
||||
|
||||
class BroodHollow(_BasicScraper):
|
||||
description = u'Broodhollow - A MWF cosmic horror adventure comic by Kris Straub'
|
||||
url = 'http://broodhollow.chainsawsuit.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -41,6 +41,7 @@ class CaseyAndAndy(_BasicScraper):
|
|||
|
||||
|
||||
class CasuallyKayla(_BasicScraper):
|
||||
description = u'Casually Kayla: Keeping it as Casual as possible'
|
||||
url = 'http://casuallykayla.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '89'
|
||||
|
@ -81,6 +82,7 @@ class Catena(_BasicScraper):
|
|||
|
||||
|
||||
class CatsAndCameras(_BasicScraper):
|
||||
description = u'Just when you thought it was safe to go to the photographer'
|
||||
url = 'http://catsncameras.com/cnc/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -91,6 +93,7 @@ class CatsAndCameras(_BasicScraper):
|
|||
|
||||
|
||||
class ChainsawSuit(_BasicScraper):
|
||||
description = u'internet humor, fresh-cut'
|
||||
url = 'http://chainsawsuit.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -101,6 +104,7 @@ class ChainsawSuit(_BasicScraper):
|
|||
|
||||
|
||||
class Champ2010(_BasicScraper):
|
||||
description = u'Champ2010 - an almost daily journal comic from jed collins who is not drinking this year. webcomic'
|
||||
baseUrl = 'http://jedcollins.com/champ2010/'
|
||||
rurl = escape(baseUrl)
|
||||
# the latest URL is hard coded since the comic is discontinued
|
||||
|
@ -113,6 +117,7 @@ class Champ2010(_BasicScraper):
|
|||
|
||||
|
||||
class ChannelAte(_BasicScraper):
|
||||
description = u'Comics and Cartoons by Ryan Hudson'
|
||||
url = 'http://www.channelate.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -122,6 +127,7 @@ class ChannelAte(_BasicScraper):
|
|||
|
||||
|
||||
class ChasingTheSunset(_BasicScraper):
|
||||
description = u'Chasing the Sunset | Fantasy Webcomic | Elves, Pixies and a blue dragon with orange stripes.'
|
||||
url = 'http://www.fantasycomic.com/'
|
||||
stripUrl = url + 'index.php?p=c%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -131,6 +137,7 @@ class ChasingTheSunset(_BasicScraper):
|
|||
|
||||
|
||||
class CheckerboardNightmare(_BasicScraper):
|
||||
description = u'Checkerboard Nightmare by Kristofer Straub - A Webcomics Institution'
|
||||
url = 'http://www.checkerboardnightmare.com/'
|
||||
stripUrl = url + 'd/%s.shtml'
|
||||
firstStripUrl = stripUrl % '20001110'
|
||||
|
@ -257,6 +264,7 @@ class CoolCatStudio(_BasicScraper):
|
|||
|
||||
|
||||
class CorydonCafe(_BasicScraper):
|
||||
description = u'Corydon Cafe humorous online comic archive of abstruse awesomeness created by a starving artist'
|
||||
url = 'http://corydoncafe.com/'
|
||||
starter = indirectStarter(url,
|
||||
compile(tagre("a", "href", r'(\./\d+/[^"]+)')))
|
||||
|
@ -271,6 +279,7 @@ class CorydonCafe(_BasicScraper):
|
|||
|
||||
|
||||
class CourtingDisaster(_BasicScraper):
|
||||
description = u'Courting Disaster by Brad Guigar - A Daily Webcomic'
|
||||
url = 'http://www.courting-disaster.com/'
|
||||
stripUrl = url + 'archive/%s.html'
|
||||
firstStripUrl = stripUrl % '20050112'
|
||||
|
@ -280,6 +289,7 @@ class CourtingDisaster(_BasicScraper):
|
|||
|
||||
|
||||
class CowboyJedi(_BasicScraper):
|
||||
description = u'A Long Time Ago In A Webcomic Updated Weekly...'
|
||||
url = 'http://www.cowboyjedi.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -290,6 +300,7 @@ class CowboyJedi(_BasicScraper):
|
|||
|
||||
|
||||
class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||
description = u'A semi-biographical web comic about the struggles and occasional humour of daily life, pets, friends, and more. Currently completing the missing archive comics with your help.'
|
||||
url = 'http://crap.jinwicked.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2003/07/30/jin-and-josh-decide-to-move'
|
||||
|
@ -313,6 +324,7 @@ class CtrlAltDelSillies(CtrlAltDel):
|
|||
|
||||
|
||||
class CrimsonDark(_BasicScraper):
|
||||
description = u'A Sci-Fi webcomic set in space in the distant future.'
|
||||
url = 'http://www.davidcsimon.com/crimsondark/'
|
||||
stripUrl = url + 'index.php?view=comic&strip_id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -345,6 +357,7 @@ class CucumberQuest(_BasicScraper):
|
|||
|
||||
|
||||
class Curvy(_BasicScraper):
|
||||
description = u'An erotic sci-fi adventure comic for adults.'
|
||||
url = 'http://www.c.urvy.org/'
|
||||
stripUrl = url + '?date=%s'
|
||||
firstStripUrl = stripUrl % '20080329'
|
||||
|
|
|
@ -71,6 +71,7 @@ class DangerouslyChloe(_BasicScraper):
|
|||
|
||||
|
||||
class DarkWings(_BasicScraper):
|
||||
description = u"Dark Wings - You Can't Reach Heaven on Broken Wings"
|
||||
url = 'http://www.flowerlarkstudios.com/dark-wings/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -121,6 +122,7 @@ class DeepFried(_BasicScraper):
|
|||
|
||||
|
||||
class DemolitionSquad(_BasicScraper):
|
||||
description = u'Demolitionsquad.de ist die erste deutsche Videospiel-Webcomic-Seite nach amerikanischen Vorbild und noch viel mehr als das. Auf Demolitionsquad.de findet der wissbegierige, spielebegeisterte Nutzer Comicstrips zu aktuellen Videospielen die ihm die Wartezeit auf den kommenden Top-Titel weiter ves\xfcssen.'
|
||||
url = 'http://www.demolitionsquad.de/'
|
||||
starter = indirectStarter(url,
|
||||
compile(tagre("a", "href", r'(no_cache/comicstrips/einzelansicht/archive/[^"]+)')))
|
||||
|
@ -219,6 +221,7 @@ class DominicDeegan(_BasicScraper):
|
|||
|
||||
|
||||
class DorkTower(_BasicScraper):
|
||||
description = u'The Place for All Things Dork'
|
||||
url = 'http://www.dorktower.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -66,6 +66,7 @@ class ElfOnlyInn(_BasicScraper):
|
|||
|
||||
|
||||
class ElGoonishShive(_BasicScraper):
|
||||
description = u'Fantasy sci-fi comic about a group of teenagers and the bizarre, strange and supernatural circumstances of their lives.'
|
||||
name = 'KeenSpot/ElGoonishShive'
|
||||
url = 'http://www.egscomics.com/'
|
||||
stripUrl = url + '?date=%s'
|
||||
|
@ -84,6 +85,7 @@ class ElGoonishShiveNP(_BasicScraper):
|
|||
|
||||
|
||||
class Ellerbisms(_BasicScraper):
|
||||
description = u'Ellerbisms - A diary comic by Marc Ellerby'
|
||||
url = 'http://www.ellerbisms.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -141,6 +143,7 @@ class EvilDiva(_BasicScraper):
|
|||
|
||||
|
||||
class EvilInc(_BasicScraper):
|
||||
description = u'Evil Inc. by Brad Guigar - Daily Super-Villain Webcomic and Comics Blog'
|
||||
url = 'http://www.evil-comic.com/'
|
||||
stripUrl = url + 'archive/%s.html'
|
||||
firstStripUrl = stripUrl % '20050530'
|
||||
|
@ -150,6 +153,7 @@ class EvilInc(_BasicScraper):
|
|||
|
||||
|
||||
class Exiern(_BasicScraper):
|
||||
description = u'Barbarian Typhan-Knee defeated the wizard...and became Tiffany!'
|
||||
url = 'http://www.exiern.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -160,6 +164,7 @@ class Exiern(_BasicScraper):
|
|||
|
||||
|
||||
class ExploitationNow(_BasicScraper):
|
||||
description = u'Exploitation Now - That somewhat naughty webcomic classic by Michael Poe'
|
||||
url = 'http://www.exploitationnow.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s'
|
||||
|
|
|
@ -69,6 +69,7 @@ class FeyWinds(_BasicScraper):
|
|||
|
||||
|
||||
class FilibusterCartoons(_BasicScraper):
|
||||
description = u'Political cartoons from Canada!'
|
||||
url = 'http://www.filibustercartoons.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php/%s'
|
||||
|
@ -99,6 +100,7 @@ class FlakyPastry(_BasicScraper):
|
|||
|
||||
|
||||
class Flemcomics(_BasicScraper):
|
||||
description = u'FLEM Comics - More fun than a sack of dead kittens.'
|
||||
url = 'http://www.flemcomics.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
|
@ -118,6 +120,7 @@ class Flipside(_BasicScraper):
|
|||
|
||||
|
||||
class FonFlatter(_BasicScraper):
|
||||
description = u'Fred, eine stets zuversichtliche, einfallsreiche, zumeist aber alberne und etwas naive Fledermaus, flattert bereits seit 2005 t\xe4glich durch das weltweite Netz'
|
||||
url = 'http://www.fonflatter.de/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -137,6 +140,7 @@ class FonFlatter(_BasicScraper):
|
|||
|
||||
|
||||
class Footloose(_BasicScraper):
|
||||
description = u'Footloose, a Webcomic By alice Nuttall and Emily Brady'
|
||||
url = 'http://footloosecomic.com/footloose/today.php'
|
||||
stripUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -177,6 +181,7 @@ class FredoAndPidjin(_BasicScraper):
|
|||
|
||||
|
||||
class FullFrontalNerdity(_BasicScraper):
|
||||
description = u'Full Frontal Nerdity by Aaron Williams - Comics about liking all things gaming, technological, mythological, and internetological.'
|
||||
url = 'http://ffn.nodwick.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
|
|
@ -10,6 +10,7 @@ from ..util import tagre
|
|||
|
||||
|
||||
class Galaxion(_BasicScraper):
|
||||
description = u'Galaxion - Life. Love. Hyperspace.'
|
||||
url = 'http://galaxioncomics.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -20,6 +21,7 @@ class Galaxion(_BasicScraper):
|
|||
|
||||
|
||||
class Garanos(_BasicScraper):
|
||||
description = u'Garanos - A dramatic fantasy webcomic with a dash of adventure, gothic horror, and romance for flavor.'
|
||||
baseUrl = 'http://garanos.alexheberling.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'pages/page-1/'
|
||||
|
@ -42,6 +44,7 @@ class GastroPhobia(_BasicScraper):
|
|||
|
||||
|
||||
class Geeks(_BasicScraper):
|
||||
description = u'Geeks Trying To Be Funny'
|
||||
url = 'http://sevenfloorsdown.com/geeks/'
|
||||
stripUrl = url + 'archives/%s'
|
||||
firstStripUrl = stripUrl % '10'
|
||||
|
@ -74,6 +77,7 @@ class GirlsWithSlingshots(_BasicScraper):
|
|||
|
||||
|
||||
class GlassHalfEmpty(_BasicScraper):
|
||||
description = u'A Glass Half Empty cartoon by Dan Markowitz'
|
||||
url = 'http://www.defectivity.com/ghe/index.php'
|
||||
stripUrl = url + '?strip_id=%s'
|
||||
firstStripUrl = stripUrl % '0'
|
||||
|
@ -83,6 +87,7 @@ class GlassHalfEmpty(_BasicScraper):
|
|||
|
||||
|
||||
class GleefulNihilism(_BasicScraper):
|
||||
description = u'pointless comics with a sideways grin'
|
||||
url = 'http://gleefulnihilism.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comics/%s/'
|
||||
|
@ -127,6 +132,7 @@ class GoneWithTheBlastwave(_BasicScraper):
|
|||
|
||||
|
||||
class GrrlPower(_BasicScraper):
|
||||
description = u'Grrl Power - A webcomic about superheroines.'
|
||||
url = 'http://www.grrlpowercomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'archives/%s'
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||
|
||||
from re import compile, escape
|
||||
|
@ -48,6 +49,7 @@ class HarkAVagrant(_BasicScraper):
|
|||
|
||||
|
||||
class HijinksEnsue(_BasicScraper):
|
||||
description = u'HijiNKS ENSUE is a geek pop culture webcomic that makes fun of the latest news in tv, movies, Sci-Fi, technology and the Internet'
|
||||
url = 'http://hijinksensue.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -58,6 +60,7 @@ class HijinksEnsue(_BasicScraper):
|
|||
|
||||
|
||||
class Hipsters(_BasicScraper):
|
||||
description = u'a weekly webcomic series by Adrian vom Baur - Hipsters vs. Vampires - Hipsters vs. Dinosaurs - Hipsters vs. Robots'
|
||||
url = 'http://www.hipsters-comic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..util import tagre
|
|||
|
||||
|
||||
class IAmArg(_BasicScraper):
|
||||
description = u'An Internet comic of non sequitur Geekiness. Updates Monday, Wednesday and Friday'
|
||||
url = 'http://iamarg.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -37,6 +38,7 @@ class IDreamOfAJeanieBottle(_BasicScraper):
|
|||
|
||||
|
||||
class InsideOut(_BasicScraper):
|
||||
description = u'Inside Out Comic - A new kind of Weird'
|
||||
url = 'http://www.insideoutcomic.com/'
|
||||
stripUrl = url + 'html/%s.html'
|
||||
firstStripUrl = stripUrl % '1_snake_suicide'
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..util import tagre
|
|||
|
||||
|
||||
class JackCannon(_BasicScraper):
|
||||
description = u'The Fancy Adventures of Jack Cannon \u2013 A Webcomic -'
|
||||
url = 'http://fancyadventures.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..helpers import indirectStarter
|
|||
|
||||
|
||||
class KatzenfutterGeleespritzer(_BasicScraper):
|
||||
description = u'Webcomic mit Geschichten aus dem Alltag eines geeky Cartoonisten und Comiczeichners'
|
||||
url = 'http://www.katzenfuttergeleespritzer.de/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comic/%s/'
|
||||
|
@ -23,6 +24,7 @@ class KatzenfutterGeleespritzer(_BasicScraper):
|
|||
|
||||
|
||||
class KevinAndKell(_BasicScraper):
|
||||
description = u'Kevin & Kell by Bill Holbrook'
|
||||
url = 'http://www.kevinandkell.com/'
|
||||
stripUrl = url + '%s/kk%s%s.html'
|
||||
firstStripUrl = stripUrl % ('1995', '09', '03')
|
||||
|
@ -35,6 +37,7 @@ class KevinAndKell(_BasicScraper):
|
|||
|
||||
|
||||
class Key(_BasicScraper):
|
||||
description = u'Key - Fantasy Adventure Webcomic'
|
||||
baseUrl = 'http://key.shadilyn.com/'
|
||||
url = baseUrl + 'latestpage.html'
|
||||
stripUrl = baseUrl + 'pages/%s.html'
|
||||
|
@ -54,6 +57,7 @@ class KickInTheHead(_BasicScraper):
|
|||
|
||||
|
||||
class KillerKomics(_BasicScraper):
|
||||
description = u'Web comic - Lorsque tu repasses une chemise, fait attention pour ne pas oublier un endroit... - Web comic drole avec bonhommes allumettes, webcomic avec beaucoup de sang et violence. Pour ceux qui veulent rire \xe0 la violence.'
|
||||
baseUrl = 'http://www.killerkomics.com/web-comics/'
|
||||
url = baseUrl + 'index_ang.cfm'
|
||||
stripUrl = baseUrl + '%s.cfm'
|
||||
|
@ -92,6 +96,7 @@ class Kukuburi(_BasicScraper):
|
|||
|
||||
|
||||
class KuroShouri(_BasicScraper):
|
||||
description = u'a webcomic by Kikirini and DJ-Anarchy'
|
||||
url = 'http://kuroshouri.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?webcomic_post=%s'
|
||||
|
|
|
@ -19,6 +19,7 @@ class LasLindas(_BasicScraper):
|
|||
|
||||
|
||||
class LeastICouldDo(_BasicScraper):
|
||||
description = u'A daily webcomic series about the life of Rayne Summers. Created by Ryan Sohmer and Lar deSouza.'
|
||||
url = 'http://www.leasticoulddo.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comic/%s'
|
||||
|
@ -41,6 +42,7 @@ class Lint(_BasicScraper):
|
|||
|
||||
|
||||
class LittleGamers(_BasicScraper):
|
||||
description = u'The comic everyone knows, but no one reads'
|
||||
url = 'http://www.little-gamers.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2000/12/01/99'
|
||||
|
@ -50,6 +52,7 @@ class LittleGamers(_BasicScraper):
|
|||
|
||||
|
||||
class LoadingArtist(_BasicScraper):
|
||||
description = u'A webcomic by Gregor Czaykowski'
|
||||
url = 'http://www.loadingartist.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -35,6 +35,7 @@ class MagickChicks(_BasicScraper):
|
|||
|
||||
|
||||
class ManlyGuysDoingManlyThings(_BasicScraper):
|
||||
description = u'Manly Guys Doing Manly Things \xbb Updated Mondays or whenever I feel like it'
|
||||
url = 'http://thepunchlineismachismo.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'archives/comic/%s'
|
||||
|
@ -54,6 +55,7 @@ class Marilith(_BasicScraper):
|
|||
|
||||
|
||||
class MarryMe(_BasicScraper):
|
||||
description = u'MARRY ME: an online graphic novel by Bobby Crosby and Remy "Eisu" Mokhtar'
|
||||
url = 'http://marryme.keenspot.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
|
@ -99,6 +101,7 @@ class MenageA3(_BasicScraper):
|
|||
|
||||
|
||||
class Melonpool(_BasicScraper):
|
||||
description = u"Star Trek Meets Gilligan's Island"
|
||||
url = 'http://www.melonpool.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
|
|
@ -23,6 +23,7 @@ class Namesake(_BasicScraper):
|
|||
|
||||
|
||||
class NamirDeiter(_BasicScraper):
|
||||
description = u'Namir Deiter, by Isabel Marks'
|
||||
url = 'http://www.namirdeiter.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comics/index.php?date=%s'
|
||||
|
@ -43,6 +44,7 @@ class Nedroid(_BasicScraper):
|
|||
|
||||
|
||||
class NeoEarth(_BasicScraper):
|
||||
description = u'Neo-Earth - Web comic by Bryan King - Updated Every Monday'
|
||||
url = 'http://www.neo-earth.com/NE/'
|
||||
stripUrl = url + 'index.php?date=%s'
|
||||
firstStripUrl = stripUrl % '2007-03-23'
|
||||
|
@ -121,6 +123,7 @@ class Nnewts(_BasicScraper):
|
|||
|
||||
|
||||
class Nodwick(_BasicScraper):
|
||||
description = u'Welcome to Nodwick.com - A fantasy farce for the whole party!'
|
||||
url = 'http://comic.nodwick.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + "?p=%s"
|
||||
|
@ -130,6 +133,7 @@ class Nodwick(_BasicScraper):
|
|||
|
||||
|
||||
class NobodyScores(_BasicScraper):
|
||||
description = u'Nobody Scores! A little comic about inevitable disaster'
|
||||
url = 'http://nobodyscores.loosenutstudio.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
|
@ -151,6 +155,7 @@ class NoNeedForBushido(_BasicScraper):
|
|||
|
||||
|
||||
class Nukees(_BasicScraper):
|
||||
description = u'Nukees, The Atomic Comic Strip'
|
||||
url = 'http://www.nukees.com/'
|
||||
stripUrl = url + 'd/%s'
|
||||
firstStripUrl = stripUrl % '19970121'
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..scraper import _BasicScraper
|
|||
|
||||
|
||||
class NineteenNinetySeven(_BasicScraper):
|
||||
description = u'1977 the Comic - Sex, Drugs and Rock and Roll Just Not in That Order'
|
||||
name = '1997'
|
||||
url = 'http://www.1977thecomic.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
|
|
@ -110,6 +110,7 @@ class OurHomePlanet(_BasicScraper):
|
|||
|
||||
|
||||
class OverCompensating(_BasicScraper):
|
||||
description = u'OVERCOMPENSATING: The Journal Comic With a Seething Disdain for Reality.'
|
||||
url = 'http://www.overcompensating.com/'
|
||||
stripUrl = url + 'posts/%s.html'
|
||||
firstStripUrl = stripUrl % '20040929'
|
||||
|
|
|
@ -19,6 +19,7 @@ class PandyLand(_BasicScraper):
|
|||
|
||||
|
||||
class ParadigmShift(_BasicScraper):
|
||||
description = u'A Paranormal Graphic Novel by Dirk I. Tiede'
|
||||
url = 'http://www.paradigmshiftmanga.com/'
|
||||
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after="next-comic-link")))
|
||||
stripUrl = url + 'ps/%s.html'
|
||||
|
@ -40,6 +41,7 @@ class ParallelUniversum(_BasicScraper):
|
|||
|
||||
|
||||
class PartiallyClips(_BasicScraper):
|
||||
description = u'PartiallyClips - The true stories behind your favorite clip art.'
|
||||
url = 'http://partiallyclips.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -97,6 +99,7 @@ class PennyArcade(_BasicScraper):
|
|||
|
||||
|
||||
class PeppermintSaga(_BasicScraper):
|
||||
description = u'Sexy Fucking Fantasy Adventure Webcomic - NSFW'
|
||||
url = 'http://www.pepsaga.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -104,6 +107,7 @@ class PeppermintSaga(_BasicScraper):
|
|||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
|
||||
help = 'Index format: number'
|
||||
adult = True
|
||||
|
||||
|
||||
class PHDComics(_BasicScraper):
|
||||
|
@ -118,6 +122,7 @@ class PHDComics(_BasicScraper):
|
|||
|
||||
|
||||
class PicPakDog(_BasicScraper):
|
||||
description = u'A comic by Kim Belding'
|
||||
url = 'http://www.picpak.net/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'comic/%s/'
|
||||
|
|
|
@ -50,6 +50,7 @@ class RedMeat(_BasicScraper):
|
|||
|
||||
|
||||
class RedString(_BasicScraper):
|
||||
description = u'A web comics about love and growing up. Art by Gina Biggs.'
|
||||
url = 'http://www.redstring.strawberrycomics.com/'
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
firstStripUrl = stripUrl % '434'
|
||||
|
|
|
@ -42,6 +42,7 @@ class SandraAndWoo(_BasicScraper):
|
|||
|
||||
|
||||
class SandraAndWooGerman(_BasicScraper):
|
||||
description = u'Sandra und Woo: ein Webcomic \xfcber Freundschaft, das Leben und die Kunst (keine) Eichh\xf6rnchen zu essen; mit dem M\xe4dchen Sandra und ihrem Waschb\xe4ren Woo in den Hauptrollen'
|
||||
url = 'http://www.sandraandwoo.com/woode/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -142,6 +143,7 @@ class SexyLosers(_BasicScraper):
|
|||
|
||||
|
||||
class ShadowGirls(_BasicScraper):
|
||||
description = u"It's like H.P. Lovecraft meets the Gilmore Girls!"
|
||||
url = 'http://www.shadowgirlscomic.com/'
|
||||
stripUrl = url + 'comics/%s'
|
||||
firstStripUrl = stripUrl % 'book-1/chapter-1-broken-dreams/welcome'
|
||||
|
@ -279,6 +281,7 @@ class SnowFlakes(_BasicScraper):
|
|||
|
||||
|
||||
class SnowFlame(_BasicScraper):
|
||||
description = u'The fan-comic series featuring "The Man Powered by Cocaine"'
|
||||
url = 'http://www.snowflamecomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?comic=snowflame-%s-%s'
|
||||
|
@ -336,6 +339,7 @@ class SpaceTrawler(_BasicScraper):
|
|||
|
||||
|
||||
class Spamusement(_BasicScraper):
|
||||
description = u'Spamusement! Poorly-drawn cartoons inspired by actual spam subject lines!'
|
||||
url = 'http://spamusement.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php/comics/view/%s'
|
||||
|
@ -346,6 +350,7 @@ class Spamusement(_BasicScraper):
|
|||
|
||||
|
||||
class SpareParts(_BasicScraper):
|
||||
description = u'Spare Parts by Terrence and Isabel Marks!'
|
||||
baseUrl = 'http://www.sparepartscomics.com/'
|
||||
url = baseUrl + 'comics/?date=20080328'
|
||||
stripUrl = baseUrl + 'comics/index.php?date=%s'
|
||||
|
@ -506,6 +511,7 @@ class _StrangeCandy(_BasicScraper):
|
|||
|
||||
|
||||
class SupernormalStep(_BasicScraper):
|
||||
description = u'Supernormal Step - Magic, Face Punching, and a Robot or Two'
|
||||
url = 'http://supernormalstep.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..util import tagre
|
|||
|
||||
|
||||
class TheBrads(_BasicScraper):
|
||||
description = u'ArchiveFirst World Problems Comic - By Brad Colbow'
|
||||
url = 'http://bradcolbow.com/archive/C4/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % 'P125'
|
||||
|
@ -19,6 +20,7 @@ class TheBrads(_BasicScraper):
|
|||
|
||||
|
||||
class TheDevilsPanties(_BasicScraper):
|
||||
description = u"It's not Satanic Porn"
|
||||
url = 'http://thedevilspanties.com/'
|
||||
stripUrl = url + 'archives/%s'
|
||||
firstStripUrl = stripUrl % '300'
|
||||
|
@ -128,6 +130,7 @@ class _TwoLumps(_BasicScraper):
|
|||
|
||||
|
||||
class TwoTwoOneFour(_BasicScraper):
|
||||
description = u'Artwork, comics, graphic novels, music, articles, and various silliness by Troy McQuinn'
|
||||
url = 'http://www.nitrocosm.com/go/2214_classic/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -138,6 +141,7 @@ class TwoTwoOneFour(_BasicScraper):
|
|||
|
||||
|
||||
class TheWhiteboard(_BasicScraper):
|
||||
description = u'The Whiteboard, a somewhat paintball-related webcomic by "Doc" Nickel'
|
||||
url = 'http://www.the-whiteboard.com/'
|
||||
stripUrl = url + 'auto%s.html'
|
||||
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
|
||||
|
@ -146,6 +150,7 @@ class TheWhiteboard(_BasicScraper):
|
|||
|
||||
|
||||
class HMHigh(_BasicScraper):
|
||||
description = u'Welcome to the website of professional artist and illustrator, Angel Smith and of Fallen Angel Media Ltd; An independent art, design & publishing house and event management company based in Bristol, England'
|
||||
name = 'TheFallenAngel/HMHigh'
|
||||
baseUrl = 'http://www.thefallenangel.co.uk/'
|
||||
url = baseUrl + 'hmhigh/'
|
||||
|
|
|
@ -20,6 +20,7 @@ class Undertow(_BasicScraper):
|
|||
|
||||
|
||||
class UnicornJelly(_BasicScraper):
|
||||
description = u'UNICORN JELLY anime manga comic strip by Jennifer Diane Reitz'
|
||||
baseUrl = 'http://unicornjelly.com/'
|
||||
url = baseUrl + 'uni666.html'
|
||||
stripUrl = baseUrl + 'uni%s.html'
|
||||
|
|
|
@ -66,6 +66,7 @@ class WebDesignerCOTW(_BasicScraper):
|
|||
|
||||
|
||||
class WeCanSleepTomorrow(_BasicScraper):
|
||||
description = u'We Can Sleep Tomorrow - Life does not take bathroom breaks. A webcomic that updates Mondays and Fridays'
|
||||
url = 'http://wecansleeptomorrow.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -106,6 +107,7 @@ class WhyTheLongFace(_BasicScraper):
|
|||
|
||||
|
||||
class Wigu(_BasicScraper):
|
||||
description = u'WIGU: A COMIC ON INTERNET'
|
||||
url = 'http://wigucomics.com/'
|
||||
stripUrl = url + 'adventures/index.php?comic=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -168,6 +170,7 @@ class WorldOfMrToast(_BasicScraper):
|
|||
|
||||
|
||||
class WormWorldSaga(_BasicScraper):
|
||||
description = u'The Wormworld Saga Online Graphic Novel by Daniel Lieske - An Epic Fantasy Adventure for all Ages'
|
||||
url = 'http://www.wormworldsaga.com/'
|
||||
stripUrl = url + 'chapters/%s/index.php'
|
||||
firstStripUrl = stripUrl % 'chapter01/EN'
|
||||
|
@ -222,6 +225,7 @@ class _WorldOfWarcraftEh(_BasicScraper):
|
|||
|
||||
|
||||
class Wulffmorgenthaler(_BasicScraper):
|
||||
description = u"Entertainment - Since 2002. Wulff & Morgenthaler's Personal humoristic social commentary on life, nostalgia and the World in general. Nothing is taboo: They deal with Politics, News, Entertainment, Technology, Culture, and Weirdo Beavers"
|
||||
url = 'http://wumocomicstrip.com/'
|
||||
stripUrl = url + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..util import tagre
|
|||
|
||||
|
||||
class YAFGC(_BasicScraper):
|
||||
description = u'Yet Another Fantasy Gamer Comic'
|
||||
url = 'http://yafgc.net/'
|
||||
stripUrl = url + '?id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
|
1
scripts/mkdescription.json
Normal file
1
scripts/mkdescription.json
Normal file
File diff suppressed because one or more lines are too long
99
scripts/mkdescription.py
Executable file
99
scripts/mkdescription.py
Executable file
|
@ -0,0 +1,99 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2013 Bastian Kleineidam
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
# for dosage import
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||
from dosagelib.scraper import get_scraperclasses
|
||||
from scriptutil import save_result, load_result
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
# User-Agent: Iceweasel (Firefox) 15.02 (Debian)
|
||||
UserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120724 Debian Iceweasel/15.02"
|
||||
|
||||
|
||||
json_file = __file__.replace(".py", ".json")
|
||||
|
||||
|
||||
def get_scraper_url(scraperclass):
|
||||
"""Get base or starter url."""
|
||||
if hasattr(scraperclass, 'baseUrl'):
|
||||
return scraperclass.baseUrl
|
||||
return scraperclass.url
|
||||
|
||||
|
||||
def classname(clazz):
|
||||
"""Get name of given class."""
|
||||
return clazz.__name__
|
||||
|
||||
|
||||
def elem_text(elem, sep=u" "):
|
||||
"""Get text content of a BeautifulSoup HTML element node."""
|
||||
return sep.join(elem.stripped_strings)
|
||||
|
||||
|
||||
def get_description(url, lang):
|
||||
"""Get URL description from meta information."""
|
||||
headers = {'User-Agent': UserAgent}
|
||||
try:
|
||||
req = requests.get(url, headers=headers)
|
||||
except Exception as msg:
|
||||
print("Error: %s" % msg)
|
||||
return None
|
||||
if req.status_code != requests.codes.ok:
|
||||
print("WARN: HTTP %d" % req.status_code)
|
||||
return u""
|
||||
doc = BeautifulSoup(req.text)
|
||||
elem = doc.find("meta", dict(property="og:description"))
|
||||
if elem:
|
||||
return elem["content"]
|
||||
for elem in doc.find_all("meta", dict(name="description")):
|
||||
if "content" in elem:
|
||||
return elem["content"]
|
||||
elem = doc.find('title')
|
||||
if elem:
|
||||
return elem_text(elem)
|
||||
|
||||
|
||||
def main(args):
|
||||
"""Get scraper descriptions from google results."""
|
||||
if os.path.isfile(json_file):
|
||||
result = load_result(json_file)
|
||||
else:
|
||||
result = {}
|
||||
if args:
|
||||
tofind = args[0]
|
||||
else:
|
||||
tofind = None
|
||||
for scraperclass in sorted(get_scraperclasses(), key=classname):
|
||||
key = classname(scraperclass)
|
||||
if tofind and key != tofind:
|
||||
continue
|
||||
tofind = None
|
||||
if '_' in key:
|
||||
continue
|
||||
print(key)
|
||||
if scraperclass.description:
|
||||
continue
|
||||
if key in result:
|
||||
continue
|
||||
url = get_scraper_url(scraperclass)
|
||||
print(url)
|
||||
lang = scraperclass.lang
|
||||
description = get_description(url, lang)
|
||||
if description:
|
||||
print(description)
|
||||
# store result
|
||||
module = scraperclass.__module__
|
||||
result[key] = dict(description=description, module=module, url=url)
|
||||
save_result(result, json_file)
|
||||
else:
|
||||
print("No description found")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(sys.argv[1:]))
|
82
scripts/update_description.py
Executable file
82
scripts/update_description.py
Executable file
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2013 Bastian Kleineidam
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import codecs
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||
from scriptutil import load_result, save_result, format_description
|
||||
from dosagelib.scraper import get_scraperclasses
|
||||
|
||||
|
||||
json_file = __file__.replace("update_description.py", "mkdescription.json")
|
||||
|
||||
|
||||
def has_description(classname):
|
||||
"""Check if scraper with given classname already has a description."""
|
||||
for scraperclass in get_scraperclasses():
|
||||
if scraperclass.__name__ == classname:
|
||||
return scraperclass.description
|
||||
raise ValueError("Unknown scraper class %s" % classname)
|
||||
|
||||
|
||||
EncodingMatch = re.compile(r'# -\*- coding: ([-a-zA-Z0-9]+) -\*-')
|
||||
|
||||
def get_encoding(filename):
|
||||
"""Get an encoding of a .py filename."""
|
||||
with open(filename, 'r') as f:
|
||||
for line in f:
|
||||
mo = EncodingMatch.search(line)
|
||||
if mo:
|
||||
return mo.group(1)
|
||||
break
|
||||
raise ValueError("No encoding line at %s" % filename)
|
||||
|
||||
|
||||
def answer(classname, info):
|
||||
"""Ask user if description is accurate."""
|
||||
description = info['description'].strip()
|
||||
print()
|
||||
prompt = u'%s: %s [y/N]? ' % (classname, description)
|
||||
a = raw_input(prompt.encode('utf-8'))
|
||||
return a.lower().startswith('y')
|
||||
|
||||
|
||||
def main(args):
|
||||
"""Get scraper descriptions from google results."""
|
||||
if os.path.isfile(json_file):
|
||||
result = load_result(json_file)
|
||||
else:
|
||||
result = {}
|
||||
for classname, info in sorted(result.items()):
|
||||
if has_description(classname) or '_' in classname:
|
||||
continue
|
||||
if info.get('answer') == 'no':
|
||||
continue
|
||||
if not answer(classname, info):
|
||||
info['answer'] = 'no'
|
||||
save_result(result, json_file)
|
||||
continue
|
||||
filename = info['module'].replace('.', os.sep) + ".py"
|
||||
encoding = get_encoding(filename)
|
||||
with codecs.open(filename, 'r', encoding) as f:
|
||||
with codecs.open(filename+"_", 'w', encoding) as out:
|
||||
write_description(f, out, classname, info)
|
||||
os.rename(filename+"_", filename)
|
||||
return 0
|
||||
|
||||
|
||||
def write_description(f, out, classname, info):
|
||||
"""Add description to class."""
|
||||
for line in f:
|
||||
out.write(line)
|
||||
if line.startswith('class %s(_BasicScraper):' % classname):
|
||||
description = format_description(info['description'])
|
||||
out.write(u' description = %r\n' % description)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(sys.argv[1:]))
|
Loading…
Reference in a new issue