diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py
index cc6aeaea4..a65c93363 100644
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@@ -7,7 +7,7 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape
-from ..scraper import _BasicScraper
+from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter
from ..util import tagre
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
@@ -18,13 +18,14 @@ class Namesake(_ComicControlScraper):
url = 'http://namesakecomic.com/'
-class NamirDeiter(_BasicScraper):
- url = 'http://www.namirdeiter.com/'
- rurl = escape(url)
- stripUrl = url + 'comics/index.php?date=%s'
- firstStripUrl = stripUrl % '19991128'
- imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote=""))
- prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'") + "Previous")
+class NamirDeiter(_ParserScraper):
+ baseUrl = 'http://www.namirdeiter.com/comics/'
+ stripUrl = baseUrl + 'index.php?date=%s'
+ url = stripUrl % '20150410'
+ firstStripUrl = baseUrl
+ imageSearch = '//a/img'
+ prevSearch = '//a[text()="Previous"]'
+ endOfLife = True
help = 'Index format: yyyymmdd'
@@ -89,8 +90,8 @@ class Nicky510(_WordPressScraper):
class Nimona(_BasicScraper):
url = 'http://gingerhaze.com/nimona/'
- stripUrl = url + '%s/'
- firstStripUrl = stripUrl % "comic/page-1"
+ stripUrl = url + 'comic/%s'
+ firstStripUrl = stripUrl % "page-1"
imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)'))
prevSearch = compile(r'.*]*prev_button\.gif')
- nextSearch = compile(r'.*]*next_button\.gif')
- help = 'Index format: n (unpadded)'
namer = queryNamer('comicid', use_page_url=True)
@@ -232,11 +212,9 @@ class Precocious(_BasicScraper):
class PS238(_ParserScraper):
url = 'http://ps238.nodwick.com/'
- stripUrl = url + '/comic/%s/'
- starter = bounceStarter
+ stripUrl = url + 'comic/%s/'
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
- nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
help = 'Index format: yyyy-mm-dd'
diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py
index 300952c5d..871a18e73 100644
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@@ -5,11 +5,16 @@
from __future__ import absolute_import, division, print_function
-from re import compile, escape
+from re import compile
+try:
+ from urllib.parse import urljoin
+except ImportError:
+ from urlparse import urljoin
from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import bounceStarter
+from ..helpers import indirectStarter
from ..util import tagre
+from .common import _WordPressScraper, xpath_class
class RadioactivePanda(_BasicScraper):
@@ -20,23 +25,23 @@ class RadioactivePanda(_BasicScraper):
help = 'Index format: n (no padding)'
-class RalfTheDestroyer(_ParserScraper):
+class RalfTheDestroyer(_WordPressScraper):
url = 'http://ralfthedestroyer.com/'
- stripUrl = url + '%s/'
- css = True
- imageSearch = '#comic-1 > a:first-child img'
- prevSearch = 'td.comic_navi_left > a:nth-of-type(2)'
- help = 'Index format: stripname'
-class RealLife(_BasicScraper):
+class RealLife(_WordPressScraper):
url = 'http://reallifecomics.com/'
- rurl = escape(url)
stripUrl = url + 'comic.php?comic=%s'
- firstStripUrl = stripUrl % '991115'
- imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
- prevSearch = compile(tagre("a", "href", r'((?:%s)?comic\.php\?comic=[^"]+)' % rurl, after="nav-previous"))
- help = 'Index format: monthname-dd-yyyy)'
+ firstStripUrl = stripUrl % 'title-1'
+ help = 'Index format: monthname-dd-yyyy'
+
+ def getPrevUrl(self, url, data):
+ # "Parse" JavaScript
+ prevtag = data.find_class('comic-nav-previous')
+ if not prevtag:
+ return None
+ target = prevtag[0].get('onclick').split("'")[1]
+ return urljoin(url, target)
class RealmOfAtland(_BasicScraper):
@@ -48,26 +53,14 @@ class RealmOfAtland(_BasicScraper):
help = 'Index format: nnn'
-class RedMeat(_BasicScraper):
- baseUrl = 'http://www.redmeat.com/redmeat/'
- url = baseUrl + 'current/index.html'
- starter = bounceStarter
- stripUrl = baseUrl + '%s/index.html'
- firstStripUrl = stripUrl % '1996-06-10'
- imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
- prevSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="prev"))
- nextSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="next"))
- help = 'Index format: yyyy-mm-dd'
+class RedMeat(_ParserScraper):
+ url = 'http://www.redmeat.com/max-cannon/FreshMeat'
+ imageSearch = '//div[@class="comicStrip"]//img'
+ prevSearch = '//a[@class="prev"]'
-
-class RedsPlanet(_BasicScraper):
- url = 'http://www.redsplanet.com/comic/'
- rurl = escape(url)
- stripUrl = url + 'rp/%s/'
- firstStripUrl = stripUrl % 'pro/prologue-01'
- imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+_[^"/]+)' % rurl))
- prevSearch = compile(tagre("a", "href", r'(%srp/[^"/]+/[^"/]+/)' % rurl))
- help = 'Index format: chapter/stripname'
+ def namer(self, image_url, page_url):
+ parts = image_url.rsplit('/', 2)
+ return '_'.join(parts[1:3])
class RedString(_BasicScraper):
@@ -79,30 +72,30 @@ class RedString(_BasicScraper):
help = 'Index format: nnn'
-class RomanticallyApocalyptic(_BasicScraper):
+class RomanticallyApocalyptic(_ParserScraper):
url = 'http://romanticallyapocalyptic.com/'
- rurl = escape(url)
- stripUrl = url + '%s/'
- firstStripUrl = stripUrl % '1'
- imageSearch = compile(tagre("img", "src", r'(%sart/\d+[^"]+)' % rurl))
- prevSearch = compile(tagre("a", "href", r'(%s\d+[^"]+)' % rurl) + "\s*" +
- tagre('span', 'class', 'spritePrevious'))
+ stripUrl = url + '%s'
+ firstStripUrl = stripUrl % '0'
+ imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel')
+ prevSearch = '//a[@accesskey="p"]'
+ latestSearch = '//a[span[%s]]' % xpath_class('glyphicon-fast-forward')
+ starter = indirectStarter
help = 'Index format: n'
adult = True
-class Roza(_BasicScraper):
+class Roza(_ParserScraper):
url = 'http://www.junglestudio.com/roza/index.php'
stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '2007-05-01'
- imageSearch = compile(r'[^>].+?navtable_01.gif')
+ imageSearch = '//img[contains(@src, "pages/")]'
+ prevSearch = '//a[img[contains(@src, "navtable_01.gif")]]'
help = 'Index format: yyyy-mm-dd'
class Ruthe(_BasicScraper):
url = 'http://ruthe.de/'
- stripUrl = url + 'cartoon/%s/datum/ASC'
+ stripUrl = url + 'cartoon/%s/datum/asc/'
firstStripUrl = stripUrl % '1'
lang = 'de'
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py
index 5247a678a..513642694 100644
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@@ -46,13 +46,13 @@ class SailorsunOrg(_WordPressScraper):
url = 'http://sailorsun.org/'
-class SamAndFuzzy(_BasicScraper):
+class SamAndFuzzy(_ParserScraper):
url = 'http://www.samandfuzzy.com/'
- stripUrl = 'http://samandfuzzy.com/%s'
+ stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
- imageSearch = compile(r'(/comics/.+?)" alt')
- prevSearch = compile(r'">