'
class Annyseed(_ParserScraper):
baseUrl = 'http://www.mirrorwoodcomics.com/'
url = baseUrl + 'AnnyseedLatest.htm'
stripUrl = baseUrl + 'Annyseed%s.htm'
imageSearch = '//div/img[contains(@src, "Annyseed")]'
prevSearch = '//a[img[@name="Previousbtn"]]'
help = 'Index format: nnn'
class AoiHouse(_ParserScraper):
url = 'http://www.aoihouse.net/'
imageSearch = '//div[@id="comic"]/a[2]/img'
prevSearch = '//a[@id="cndprev"]'
class AppleGeeks(_BasicScraper):
url = 'http://www.applegeeks.com/'
stripUrl = url + 'comics/viewcomic.php?issue=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)'))
prevSearch = compile(r'Previous Comic
\s*', MULTILINE)
help = 'Index format: n (unpadded)'
class ARedTailsDream(_BasicScraper):
baseUrl = 'http://www.minnasundberg.fi/'
stripUrl = baseUrl + 'comic/page%s.php'
firstStripUrl = stripUrl % '00'
url = baseUrl + 'comic/recent.php'
imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)'))
prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') +
tagre("img", "src", r'.*?aprev.*?'))
help = 'Index format: nn'
class Ashes(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
firstStripUrl = url
latestSearch = WP_LATEST_SEARCH
starter = indirectStarter
class ASkeweredParadise(_BasicScraper):
url = 'http://aspcomics.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)") +
r"[^>]+Previous")
help = 'Index format: nnn'
class ASofterWorld(_ParserScraper):
url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@id="comicimg"]//img'
prevSearch = '//div[@id="previous"]/a'
help = 'Index format: n (unpadded)'
class AstronomyPOTD(_ParserScraper):
baseUrl = 'http://apod.nasa.gov/apod/'
url = baseUrl + 'astropix.html'
starter = bounceStarter
stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = '//a/img'
multipleImagesPerStrip = True
prevSearch = '//a[text()="<"]'
nextSearch = '//a[text()=">"]'
help = 'Index format: yymmdd'
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return data.xpath('//iframe') # videos
@classmethod
def namer(cls, image_url, page_url):
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
image_url.split('/')[-1].split('.')[0])
class AxeCop(_WordPressScraper):
url = 'http://axecop.com/comic/season-two/'