diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 703c40d1b..42733b20e 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -2,7 +2,7 @@ # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam -from re import compile, MULTILINE +from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper from ..helpers import regexNamer, bounceStarter, indirectStarter @@ -11,14 +11,17 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter class AbleAndBaker(_BasicScraper): url = 'http://www.jimburgessdesign.com/comics/index.php' stripUrl = url + '?comic=%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(comics/.+)')) prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif') help = 'Index format: nnn' class AbsurdNotions(_BasicScraper): - url = 'http://www.absurdnotions.org/page129.html' - stripUrl = 'http://www.absurdnotions.org/page%s.html' + baseurl = 'http://www.absurdnotions.org/' + url = baseurl + 'page129.html' + stripUrl = baseurl + 'page%s.html' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) @@ -27,11 +30,12 @@ class AbsurdNotions(_BasicScraper): class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' - starter = bounceStarter(url, - compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »")) + rurl = escape(url) + starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) stripUrl = url + '%s' - imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) - prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'« Previous') + firstStripUrl = stripUrl % '1' + imageSearch = compile(tagre('img', 'src', r'(%sstrips/[^<>"]+)' % rurl)) + prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') help = 'Index format: n (unpadded)' @classmethod @@ -44,6 +48,7 @@ class AbstruseGoose(_BasicScraper): class AcademyVale(_BasicScraper): url = 'http://www.imagerie.com/vale/' stripUrl = url + 'avarch.cgi?%s' + firstStripUrl = stripUrl % '001' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' @@ -52,7 +57,7 @@ class AcademyVale(_BasicScraper): class AhoiPolloi(_BasicScraper): url = 'http://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' - firstStripUrl = stripUrl % '20060305' + firstStripUrl = stripUrl % '20060306' multipleImagesPerStrip = True lang = 'de' imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)')) @@ -69,29 +74,33 @@ class ALessonIsLearned(_BasicScraper): prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") starter = indirectStarter(url, prevSearch) stripUrl = url + 'index.php?comic=%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) help = 'Index format: nnn' class Alice(_BasicScraper): url = 'http://alice.alicecomics.com/' + rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://alice\.alicecomics\.com/alicecomics/[^"]+)', after="previous")) + imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%salicecomics/[^"]+)' % rurl, after="previous")) help = 'Index format: name' class AlienLovesPredator(_BasicScraper): url = 'http://alienlovespredator.com/' - stripUrl = url + '%s' + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay' imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) - help = 'Index format: yyyy/mm/dd/name/' + help = 'Index format: yyyy/mm/dd/name' class AlphaLuna(_BasicScraper): url = 'http://www.alphaluna.net/' stripUrl = url + 'issue-%s/' + firstStripUrl = stripUrl % '1/cover' imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev")) help = 'Index format: issue/page (e.g. 4/05)' @@ -102,11 +111,13 @@ class AlphaLunaSpanish(AlphaLuna): lang = 'es' url = 'http://alphaluna.net/spanish/' stripUrl = url + 'issue-%s/' + firstStripUrl = stripUrl % '1/portada' class Altermeta(_BasicScraper): url = 'http://altermeta.net/' stripUrl = url + 'archive.php?comic=%s' + firstStripUrl = stripUrl % '0' imageSearch = compile(r'') prevSearch = compile(r'Back') class AmazingSuperPowers(_BasicScraper): url = 'http://www.amazingsuperpowers.com/' + rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://www\.amazingsuperpowers\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.amazingsuperpowers\.com/[^"]+)', after="prev")) + firstStripUrl = stripUrl % '2007/09/heredity' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/name' @@ -136,15 +150,18 @@ class Angels2200(_BasicScraper): class Antics(_BasicScraper): url = 'http://www.anticscomic.com/' + rurl = escape(url) stripUrl = url + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.anticscomic\.com/comics/\d+-\d+-\d+[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.anticscomic\.com/\?p=\d+)', after='prev')) + firstStripUrl = stripUrl % '3' + imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev')) help = 'Index format: number' class AppleGeeks(_BasicScraper): url = 'http://www.applegeeks.com/' stripUrl = url + 'comics/viewcomic.php?issue=%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) prevSearch = compile(r'
Previous Comic
\s*

', MULTILINE) help = 'Index format: n (unpadded)' @@ -153,6 +170,7 @@ class AppleGeeks(_BasicScraper): class Achewood(_BasicScraper): url = 'http://www.achewood.com/' stripUrl = url + 'index.php?date=%s' + firstStripUrl = stripUrl % '00000000' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' @@ -162,6 +180,7 @@ class Achewood(_BasicScraper): class ASofterWorld(_BasicScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' + tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)')) prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back') @@ -169,10 +188,12 @@ class ASofterWorld(_BasicScraper): class AstronomyPOTD(_BasicScraper): - url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html' + baseurl = 'http://antwrp.gsfc.nasa.gov/apod/' + url = baseurl + 'astropix.html' starter = bounceStarter(url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + ">")) - stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' + stripUrl = baseurl + 'ap%s.html' + firstStripUrl = stripUrl % '061012' imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "<") @@ -202,7 +223,8 @@ class AfterStrife(_BasicScraper): class ALLCAPS(_BasicScraper): url = 'http://www.allcapscomix.com/' - stripUrl = url + '%s' + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2008/08/welcome-to-all-caps' imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") help = 'Index format: yyyy/mm/strip-name' @@ -211,6 +233,7 @@ class ALLCAPS(_BasicScraper): class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % '001' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") help = 'Index format: nnn' @@ -221,6 +244,7 @@ class AGirlAndHerFed(_BasicScraper): starter = bounceStarter(url, compile(r'[^>]+Back')) stripUrl = url + '1.%s.html' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) prevSearch = compile(r'[^>]+Back') help = 'Index format: nnn' @@ -229,6 +253,7 @@ class AGirlAndHerFed(_BasicScraper): class AetheriaEpics(_BasicScraper): url = 'http://aetheria-epics.schala.net/' stripUrl = url + '%s.html' + firstStripUrl = stripUrl % '00001' imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") help = 'Index format: nnn' @@ -236,10 +261,11 @@ class AetheriaEpics(_BasicScraper): class AirForceBlues(_BasicScraper): url = 'http://www.afblues.com/' - stripUrl = url + 'wordpress/%s' + stripUrl = url + 'wordpress/%s/' + firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya' imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) - help = 'Index format: yyyy/mm/dd/name/' + help = 'Index format: yyyy/mm/dd/stripname' class AlienShores(_BasicScraper): @@ -252,23 +278,27 @@ class AlienShores(_BasicScraper): class AllTheGrowingThings(_BasicScraper): url = 'http://growingthings.typodmary.com/' + rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://growingthings\.typodmary\.com/files/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://growingthings\.typodmary\.com/[^"]+)', after="prev")) + firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things' + imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/dd/strip-name' class Amya(_BasicScraper): url = 'http://www.amyachronicles.com/' + rurl = escape(url) stripUrl = url + 'archives/%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.amyachronicles\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.amyachronicles\.com/archives/\d+)', after="Previous")) + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="Previous")) help = 'Index format: n' class Angband(_BasicScraper): url = 'http://angband.calamarain.net/' stripUrl = url + 'view.php?date=%s' + firstStripUrl = stripUrl % '2005-12-30' imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") help = 'Index format: yyyy-mm-dd' @@ -276,9 +306,10 @@ class Angband(_BasicScraper): class AlsoBagels(_BasicScraper): url = 'http://alsobagels.com/' + rurl = escape(url) stripUrl = url + 'index.php/comic/%s/' - imageSearch = compile(tagre("img", "src", r'(http://alsobagels\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://alsobagels\.com/index\.php/comic/[^"]+)', after="Previous")) + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous")) help = 'Index format: strip-name' @@ -292,10 +323,12 @@ class Annyseed(_BasicScraper): class AxeCop(_BasicScraper): url = 'http://axecop.com/' - starter = indirectStarter(url, compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/episode_\d+/)'))) + rurl = escape(url) + starter = indirectStarter(url, + compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/episode_\d+/)' % rurl))) stripUrl = url + 'index.php/acepisodes/read/%s/' firstStripUrl = stripUrl % 'episode_0' - imageSearch = compile(tagre("img", "src", r'(http://axecop\.com/images/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/[^"]+)') + + imageSearch = compile(tagre("img", "src", r'(%simages/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/[^"]+)' % rurl) + tagre("img", "src", r'http://axecop\.com/acimages/buttons/page_left\.png')) help = 'Index format: stripname' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index dd75c4b55..6093e18de 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -2,7 +2,7 @@ # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam -from re import compile +from re import compile, escape from ..util import tagre from ..scraper import _BasicScraper @@ -20,6 +20,7 @@ class BackwaterPlanet(_BasicScraper): class BadassMuthas(_BasicScraper): url = 'http://badassmuthas.com/pages/comic.php' stripUrl = url + '?%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) help = 'Index format: nnn' @@ -28,6 +29,7 @@ class BadassMuthas(_BasicScraper): class BadMachinery(_BasicScraper): url = 'http://scarygoround.com/' stripUrl = url + '?date=%s' + firstStripUrl = stripUrl % '20090918' imageSearch = compile(tagre("img", "src", r'(strips/\d+[^"]+)')) prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + 'Previous') help = 'Index format: yyyymmdd' @@ -35,32 +37,38 @@ class BadMachinery(_BasicScraper): class Bardsworth(_BasicScraper): url = 'http://www.bardsworth.com/' + rurl = escape(url) stripUrl = url + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.bardsworth\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.bardsworth\.com/[^"]+)', after="prev")) + firstStripUrl = stripUrl % '750' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: nnn' class Baroquen(_BasicScraper): url = 'http://www.baroquencomics.com/' + rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://www\.baroquencomics\.com/Comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.baroquencomics\.com/[^"]+)', after='prev')) + firstStripUrl = stripUrl % '2008/11/05/raise-the-curtains' + imageSearch = compile(tagre("img", "src", r'(%sComics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='prev')) help = 'Index format: yyyy/mm/dd/strip-name' class Bearmageddon(_BasicScraper): url = 'http://bearmageddon.com/' + rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2011/08/01/page-1' - imageSearch = compile(tagre("img", "src", r'(http://bearmageddon\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://bearmageddon\.com/\d+/\d+/\d+/[^"]+)', after='navi-prev')) + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev')) help = 'Index format: yyyy/mm/dd/stripname' class BetterDays(_BasicScraper): url = 'http://jaynaylor.com/betterdays/' stripUrl = url + 'archives/%s.html' + firstStripUrl = stripUrl % '2003/04/post-2' imageSearch = compile(tagre("img", "src", r'(/betterdays/comic/[^>]+)', quote="")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + '« Previous') help = 'Index format: yyyy/mm/' @@ -68,9 +76,10 @@ class BetterDays(_BasicScraper): class BetweenFailures(_BasicScraper): url = 'http://betweenfailures.com/' + rurl = escape(url) stripUrl = url + 'archives/archive/%s' - imageSearch = compile(tagre("img", "src", r'(http://betweenfailures\.com/wp-content/webcomic/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous")) + imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sarchives/archive/[^"]+)' % rurl, after="previous")) help = 'Index format: stripnum-strip-name' @@ -85,6 +94,7 @@ class BigFatWhale(_BasicScraper): class BiggerThanCheeses(_BasicScraper): url = 'http://www.biggercheese.com/' stripUrl = url + 'index.php?comic=%s' + firstStripUrl = stripUrl % '1' imageSearch = compile(r'src="(comics/.+?)" alt') prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back') help = 'Index format: n (unpadded)' @@ -92,15 +102,18 @@ class BiggerThanCheeses(_BasicScraper): class BillyTheDunce(_BasicScraper): url = 'http://www.duncepress.com/' + rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)')) - prevSearch = compile(r'