From 7f78bea1af6a5a80f7c21f9dd6edc692509b0947 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Mon, 4 Feb 2013 21:00:26 +0100 Subject: [PATCH] Always have an url attribute in comic scrapers. --- dosagelib/plugins/a.py | 109 ++++++++++++------------ dosagelib/plugins/arcamax.py | 6 +- dosagelib/plugins/b.py | 94 ++++++++++---------- dosagelib/plugins/c.py | 127 ++++++++++++++-------------- dosagelib/plugins/clonemanga.py | 2 +- dosagelib/plugins/creators.py | 2 +- dosagelib/plugins/d.py | 68 +++++++-------- dosagelib/plugins/drunkduck.py | 1 + dosagelib/plugins/e.py | 73 ++++++++-------- dosagelib/plugins/f.py | 39 ++++----- dosagelib/plugins/g.py | 59 ++++++------- dosagelib/plugins/gocomics.py | 4 +- dosagelib/plugins/h.py | 4 +- dosagelib/plugins/i.py | 20 ++--- dosagelib/plugins/j.py | 20 ++--- dosagelib/plugins/k.py | 26 +++--- dosagelib/plugins/keenspot.py | 2 +- dosagelib/plugins/l.py | 18 ++-- dosagelib/plugins/m.py | 38 ++++----- dosagelib/plugins/n.py | 45 +++++----- dosagelib/plugins/nuklearpower.py | 4 +- dosagelib/plugins/num.py | 4 +- dosagelib/plugins/o.py | 32 +++---- dosagelib/plugins/p.py | 73 ++++++++-------- dosagelib/plugins/pensandtales.py | 2 +- dosagelib/plugins/petitesymphony.py | 6 +- dosagelib/plugins/q.py | 8 +- dosagelib/plugins/r.py | 19 +++-- dosagelib/plugins/s.py | 107 +++++++++++------------ dosagelib/plugins/smackjeeves.py | 1 + dosagelib/plugins/snafu.py | 2 +- dosagelib/plugins/t.py | 46 +++++----- dosagelib/plugins/u.py | 12 +-- dosagelib/plugins/universal.py | 6 +- dosagelib/plugins/v.py | 26 +++--- dosagelib/plugins/w.py | 38 ++++----- dosagelib/plugins/webcomiceu.py | 6 +- dosagelib/plugins/webcomicnation.py | 2 +- dosagelib/plugins/wlpcomics.py | 1 + dosagelib/plugins/x.py | 6 +- dosagelib/plugins/y.py | 8 +- dosagelib/plugins/z.py | 16 ++-- dosagelib/scraper.py | 2 +- scripts/mktestpage.py | 6 +- 44 files changed, 602 insertions(+), 588 deletions(-) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index f793eabf3..ef40da93a 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -9,40 +9,40 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter class ALessonIsLearned(_BasicScraper): - baseUrl = 'http://www.alessonislearned.com/' + url = 'http://www.alessonislearned.com/' prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") - starter = indirectStarter(baseUrl, prevSearch) - stripUrl = baseUrl + 'index.php?comic=%s' + starter = indirectStarter(url, prevSearch) + stripUrl = url + 'index.php?comic=%s' imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) help = 'Index format: nnn' class ASofterWorld(_BasicScraper): - latestUrl = 'http://www.asofterworld.com/' - stripUrl = latestUrl + 'index.php?id=%s' + url = 'http://www.asofterworld.com/' + stripUrl = url + 'index.php?id=%s' imageSearch = compile(tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)')) prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back') help = 'Index format: n (unpadded)' class AbleAndBaker(_BasicScraper): - latestUrl = 'http://www.jimburgessdesign.com/comics/index.php' - stripUrl = latestUrl + '?comic=%s' + url = 'http://www.jimburgessdesign.com/comics/index.php' + stripUrl = url + '?comic=%s' imageSearch = compile(tagre('img', 'src', r'(comics/.+)')) prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif') help = 'Index format: nnn' class AbominableCharlesChristopher(_BasicScraper): - latestUrl = 'http://www.abominable.cc/' - stripUrl = latestUrl + '%s' + url = 'http://www.abominable.cc/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://www\.abominable\.cc/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"[^<]+Previous") help = 'Index format: yyyy/mm/dd/comicname' class AbsurdNotions(_BasicScraper): - latestUrl = 'http://www.absurdnotions.org/page129.html' + url = 'http://www.absurdnotions.org/page129.html' stripUrl = 'http://www.absurdnotions.org/page%s.html' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) multipleImagesPerStrip = True @@ -51,9 +51,10 @@ class AbsurdNotions(_BasicScraper): class AbstruseGoose(_BasicScraper): - starter = bounceStarter('http://abstrusegoose.com/', + url = 'http://abstrusegoose.com/' + starter = bounceStarter(url, compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »")) - stripUrl = 'http://abstrusegoose.com/%s' + stripUrl = url + '%s' imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'« Previous') help = 'Index format: n (unpadded)' @@ -66,71 +67,70 @@ class AbstruseGoose(_BasicScraper): class AcademyVale(_BasicScraper): - latestUrl = 'http://www.imagerie.com/vale/' - stripUrl = latestUrl + 'avarch.cgi?%s' + url = 'http://www.imagerie.com/vale/' + stripUrl = url + 'avarch.cgi?%s' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' class Alice(_BasicScraper): - latestUrl = 'http://alice.alicecomics.com/' - stripUrl = latestUrl + '%s/' + url = 'http://alice.alicecomics.com/' + stripUrl = url + '%s/' imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/webcomic/alicecomics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://alice\.alicecomics\.com/archive/[^"]+)', after="previous")) help = 'Index format: name' class AlienLovesPredator(_BasicScraper): - latestUrl = 'http://alienlovespredator.com/' - stripUrl = latestUrl + '%s' + url = 'http://alienlovespredator.com/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/name/' class Altermeta(_BasicScraper): - latestUrl = 'http://altermeta.net/' - stripUrl = latestUrl + 'archive.php?comic=%s' + url = 'http://altermeta.net/' + stripUrl = url + 'archive.php?comic=%s' imageSearch = compile(r'') prevSearch = compile(r'Back') class AmazingSuperPowers(_BasicScraper): - latestUrl = 'http://www.amazingsuperpowers.com/' - stripUrl = latestUrl + '%s/' + url = 'http://www.amazingsuperpowers.com/' + stripUrl = url + '%s/' imageSearch = compile(tagre("img", "src", r'(http://www\.amazingsuperpowers\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.amazingsuperpowers\.com/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/name' class Angels2200(_BasicScraper): - latestUrl = 'http://www.janahoffmann.com/angels/' - stripUrl = latestUrl + '%s' + url = 'http://www.janahoffmann.com/angels/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' class AppleGeeks(_BasicScraper): - latestUrl = 'http://www.applegeeks.com/' - stripUrl = latestUrl + 'comics/viewcomic.php?issue=%s' + url = 'http://www.applegeeks.com/' + stripUrl = url + 'comics/viewcomic.php?issue=%s' imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) prevSearch = compile(r'
Previous Comic
\s*

', MULTILINE) help = 'Index format: n (unpadded)' class Achewood(_BasicScraper): - latestUrl = 'http://www.achewood.com/' - stripUrl = latestUrl + 'index.php?date=%s' + url = 'http://www.achewood.com/' + stripUrl = url + 'index.php?date=%s' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' @@ -138,8 +138,8 @@ class Achewood(_BasicScraper): class AstronomyPOTD(_BasicScraper): - starter = bounceStarter( - 'http://antwrp.gsfc.nasa.gov/apod/astropix.html', + url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html' + starter = bounceStarter(url, compile(r'>')) stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' imageSearch = compile(r'") help = 'Index format: yyyy/mm/strip-name' class ASkeweredParadise(_BasicScraper): - latestUrl = 'http://aspcomics.net/' - stripUrl = latestUrl + 'comic/%s' + url = 'http://aspcomics.net/' + stripUrl = url + 'comic/%s' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") help = 'Index format: nnn' class AGirlAndHerFed(_BasicScraper): - starter = bounceStarter('http://www.agirlandherfed.com/', + url = 'http://www.agirlandherfed.com/' + starter = bounceStarter(url, compile(r'[^>]+Back')) - stripUrl = 'http://www.agirlandherfed.com/1.%s.html' + stripUrl = url + '1.%s.html' imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) prevSearch = compile(r'[^>]+Back') help = 'Index format: nnn' class AetheriaEpics(_BasicScraper): - latestUrl = 'http://aetheria-epics.schala.net/' - stripUrl = latestUrl + '%s.html' + url = 'http://aetheria-epics.schala.net/' + stripUrl = url + '%s.html' imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") help = 'Index format: nnn' class AirForceBlues(_BasicScraper): - latestUrl = 'http://www.afblues.com/' - stripUrl = latestUrl + 'wordpress/%s' + url = 'http://www.afblues.com/' + stripUrl = url + 'wordpress/%s' imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) help = 'Index format: yyyy/mm/dd/name/' class AlienShores(_BasicScraper): - latestUrl = 'http://alienshores.com/alienshores_band/' - stripUrl = latestUrl + '%s' + url = 'http://alienshores.com/alienshores_band/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://alienshores\.com/alienshores_band/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://alienshores\.com/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/p/' class AllTheGrowingThings(_BasicScraper): - latestUrl = 'http://growingthings.typodmary.com/' - stripUrl = latestUrl + '%s/' + url = 'http://growingthings.typodmary.com/' + stripUrl = url + '%s/' imageSearch = compile(tagre("img", "src", r'(http://growingthings\.typodmary\.com/files/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://growingthings\.typodmary\.com/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/strip-name' class Amya(_BasicScraper): - latestUrl = 'http://www.amyachronicles.com/' - stripUrl = latestUrl + 'archives/%s' + url = 'http://www.amyachronicles.com/' + stripUrl = url + 'archives/%s' imageSearch = compile(tagre("img", "src", r'(http://www\.amyachronicles\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.amyachronicles\.com/archives/\d+)', after="Previous")) help = 'Index format: n' class Angband(_BasicScraper): - latestUrl = 'http://angband.calamarain.net/' - stripUrl = latestUrl + 'view.php?date=%s' + url = 'http://angband.calamarain.net/' + stripUrl = url + 'view.php?date=%s' imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") help = 'Index format: yyyy-mm-dd' class AlsoBagels(_BasicScraper): - latestUrl = 'http://alsobagels.com/' - stripUrl = latestUrl + 'index.php/comic/%s/' + url = 'http://alsobagels.com/' + stripUrl = url + 'index.php/comic/%s/' imageSearch = compile(tagre("img", "src", r'(http://alsobagels\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://alsobagels\.com/index\.php/comic/[^"]+)', after="Previous")) help = 'Index format: strip-name' class Annyseed(_BasicScraper): - latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' + url = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' stripUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) prevSearch = compile(r']+)', quote="")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + '« Previous') help = 'Index format: yyyy/mm/' class BiggerThanCheeses(_BasicScraper): - latestUrl = 'http://www.biggercheese.com/' - stripUrl = latestUrl + 'index.php?comic=%s' + url = 'http://www.biggercheese.com/' + stripUrl = url + 'index.php?comic=%s' imageSearch = compile(r'src="(comics/.+?)" alt') prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back') help = 'Index format: n (unpadded)' class BizarreUprising(_BasicScraper): - latestUrl = 'http://www.bizarreuprising.com/' - stripUrl = latestUrl + 'view/%s' + url = 'http://www.bizarreuprising.com/' + stripUrl = url + 'view/%s' imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif')) help = 'Index format: n/name' class Blip(_BasicScraper): - latestUrl = 'http://blipcomic.com/' - stripUrl = latestUrl + 'index.php?strip_id=%s' + url = 'http://blipcomic.com/' + stripUrl = url + 'index.php?strip_id=%s' imageSearch = compile(r'(istrip_files/strips/.+?)"') prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev') help = 'Index format: n' class BlueCrashKit(_BasicScraper): - latestUrl = 'http://www.bluecrashkit.com/cheese/' - stripUrl = latestUrl + 'node/%s' + url = 'http://www.bluecrashkit.com/cheese/' + stripUrl = url + 'node/%s' imageSearch = compile(r'(/cheese/files/comics/.+?)"') prevSearch = compile(r'(/cheese/node/.+?)".+?previous') help = 'Index format: non' class BMovieComic(_BasicScraper): - latestUrl = 'http://www.bmoviecomic.com/' - stripUrl = latestUrl + '?cid=%s' + url = 'http://www.bmoviecomic.com/' + stripUrl = url + '?cid=%s' imageSearch = compile(r'"(comics/.+?)"') prevSearch = compile(r'(\?cid=.+?)".+?Prev') help = 'Index format: n' @@ -70,33 +70,33 @@ class BMovieComic(_BasicScraper): ### mainline -c BratHalla:360-backup-dad-unstable-plans/ ### to get earlier comics class BratHalla(_BasicScraper): - latestUrl = 'http://brat-halla.com/' - stripUrl = latestUrl + 'comic/%s' + url = 'http://brat-halla.com/' + stripUrl = url + 'comic/%s' imageSearch = compile(r"(/comics/.+?)' target='_blank") prevSearch = compile(r'headernav2".+?"(http.+?)"') help = 'Index format: non' class Brink(_BasicScraper): - latestUrl = 'http://paperfangs.com/brink/' - stripUrl = latestUrl + '?p=%s' + url = 'http://paperfangs.com/brink/' + stripUrl = url + '?p=%s' imageSearch = compile(tagre("img", "src", r'(http://paperfangs\.com/brink/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://paperfangs\.com/brink/[^"]+)', after="prev")) help = 'Index format: n' class BoredAndEvil(_BasicScraper): - baseUrl = 'http://www.boredandevil.com/' - stripUrl = baseUrl + '?date=%s' + url = 'http://www.boredandevil.com/' + stripUrl = url + '?date=%s' imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) prevSearch = compile(r'First Comic.+Next page") help = 'Index format: n (unpadded)' @@ -107,40 +107,40 @@ class BoyOnAStickAndSlither(_BasicScraper): class ButternutSquash(_BasicScraper): - latestUrl = 'http://www.butternutsquash.net/' - stripUrl = latestUrl + '%s' + url = 'http://www.butternutsquash.net/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/strip-name-author-name' class BlankIt(_BasicScraper): - latestUrl = 'http://blankitcomics.com/' - stripUrl = latestUrl + '%s' + url = 'http://blankitcomics.com/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) help = 'Index format: yyyy/mm/dd/name' class BobWhite(_BasicScraper): - latestUrl = 'http://www.bobwhitecomics.com/' - stripUrl = latestUrl + '?webcomic_post=%s' + url = 'http://www.bobwhitecomics.com/' + stripUrl = url + '?webcomic_post=%s' imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)")) prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous') help = 'Index format: yyyymmdd' class BigFatWhale(_BasicScraper): - latestUrl = 'http://www.bigfatwhale.com/' - stripUrl = latestUrl + 'archives/bfw_%s.htm' + url = 'http://www.bigfatwhale.com/' + stripUrl = url + 'archives/bfw_%s.htm' imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)')) prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"') help = 'Index format: nnn' class BadassMuthas(_BasicScraper): - latestUrl = 'http://badassmuthas.com/pages/comic.php' - stripUrl = latestUrl + '?%s' + url = 'http://badassmuthas.com/pages/comic.php' + stripUrl = url + '?%s' imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) help = 'Index format: nnn' @@ -148,7 +148,7 @@ class BadassMuthas(_BasicScraper): class BrightlyWound(_BasicScraper): baseUrl = 'http://www.brightlywound.com/' - latestUrl = baseUrl + '?comic=137' + url = baseUrl + '?comic=137' stripUrl = baseUrl + '?comic=%s' imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'")) prevSearch = compile(r'

') help = 'Index format: yyyy-mm-dd' class BloodBound(_BasicScraper): - latestUrl = 'http://bloodboundcomic.com/' - stripUrl = latestUrl + '%s' + url = 'http://bloodboundcomic.com/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://bloodboundcomic\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://bloodboundcomic\.com/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/name' class BookOfBiff(_BasicScraper): - latestUrl = 'http://www.thebookofbiff.com/' - stripUrl = latestUrl + '%s' + url = 'http://www.thebookofbiff.com/' + stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous")) help = 'Index format: yyyy/mm/dd/stripnum-strip-name' class BillyTheDunce(_BasicScraper): - latestUrl = 'http://www.duncepress.com/' - stripUrl = latestUrl + '%s/' + url = 'http://www.duncepress.com/' + stripUrl = url + '%s/' imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)')) prevSearch = compile(r'