Fix comics in module a.py.

This commit is contained in:
Tobias Gruetzmacher 2016-04-07 23:21:31 +02:00
parent 0033a8046b
commit bb5b6ffcec

View file

@ -28,9 +28,9 @@ class AbstruseGoose(_BasicScraper):
textSearch = compile(tagre("img", "title", r'([^"]+)')) textSearch = compile(tagre("img", "title", r'([^"]+)'))
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, image_url, page_url):
index = int(pageUrl.rstrip('/').split('/')[-1]) index = int(page_url.rstrip('/').split('/')[-1])
name = imageUrl.split('/')[-1].split('.')[0] name = image_url.split('/')[-1].split('.')[0]
return 'c%03d-%s' % (index, name) return 'c%03d-%s' % (index, name)
@ -89,7 +89,7 @@ class AGirlAndHerFed(_BasicScraper):
class AhoiPolloi(_ParserScraper): class AhoiPolloi(_ParserScraper):
url = 'http://ahoipolloi.blogger.de/' url = 'https://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s' stripUrl = url + '?day=%s'
firstStripUrl = stripUrl % '20060306' firstStripUrl = stripUrl % '20060306'
multipleImagesPerStrip = True multipleImagesPerStrip = True
@ -108,13 +108,9 @@ class AhoyEarth(_ParserScraper):
help = 'Index format: ddmmyyyy' help = 'Index format: ddmmyyyy'
class AirForceBlues(_BasicScraper): class AirForceBlues(_WordPressScraper):
url = 'http://www.afblues.com/' url = 'http://farvatoons.com/'
stripUrl = url + 'wordpress/%s/' firstStripUrl = url + 'comic/in-texas-there-are-texans/'
firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya'
imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous'))
help = 'Index format: yyyy/mm/dd/stripname'
class ALessonIsLearned(_BasicScraper): class ALessonIsLearned(_BasicScraper):
@ -144,14 +140,9 @@ class AlienLovesPredator(_BasicScraper):
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
class AlienShores(_BasicScraper): class AlienShores(_WordPressScraper):
baseUrl = 'http://alienshores.com/' url = 'http://alienshores.com/alienshores_band/'
rurl = escape(baseUrl) firstStripUrl = url + 'AScomic/updated-cover/'
url = baseUrl + 'alienshores_band/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(%salienshores_band/wp-content/uploads/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/p<nn>/'
class AllTheGrowingThings(_BasicScraper): class AllTheGrowingThings(_BasicScraper):
@ -181,15 +172,6 @@ class AlphaLunaSpanish(AlphaLuna):
firstStripUrl = stripUrl % '1/portada' firstStripUrl = stripUrl % '1/portada'
class AlsoBagels(_BasicScraper):
url = 'http://alsobagels.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comic/%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous"))
help = 'Index format: strip-name'
class Altermeta(_BasicScraper): class Altermeta(_BasicScraper):
url = 'http://altermeta.net/' url = 'http://altermeta.net/'
rurl = escape(url) rurl = escape(url)
@ -246,28 +228,15 @@ class Angels2200(_BasicScraper):
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>' help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
class Annyseed(_BasicScraper): class Annyseed(_ParserScraper):
baseUrl = 'http://www.colourofivy.com/' baseUrl = 'http://www.mirrorwoodcomics.com/'
rurl = escape(baseUrl) url = baseUrl + 'AnnyseedLatest.htm'
url = baseUrl + 'annyseed_webcomic_latest.htm' stripUrl = baseUrl + 'Annyseed%s.htm'
stripUrl = baseUrl + 'annyseed_webcomic%s.htm' imageSearch = '//div/img[contains(@src, "Annyseed")]'
imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) prevSearch = '//a[img[@name="Previousbtn"]]'
prevSearch = compile(r'<a href="(%s[^"]+)"><img src="Last.gif"' % rurl)
help = 'Index format: nnn' help = 'Index format: nnn'
class Antics(_BasicScraper):
url = 'http://www.anticscomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '3'
imageSearch = compile(tagre("img", "src",
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after='prev'))
help = 'Index format: number'
class AoiHouse(_ParserScraper): class AoiHouse(_ParserScraper):
url = 'http://www.aoihouse.net/' url = 'http://www.aoihouse.net/'
imageSearch = '//div[@id="comic"]/a[2]/img' imageSearch = '//div[@id="comic"]/a[2]/img'
@ -319,31 +288,25 @@ class ASofterWorld(_ParserScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AstronomyPOTD(_BasicScraper): class AstronomyPOTD(_ParserScraper):
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' baseUrl = 'http://apod.nasa.gov/apod/'
url = baseUrl + 'astropix.html' url = baseUrl + 'astropix.html'
starter = bounceStarter( starter = bounceStarter(url, '//a[text()=">"]')
url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
stripUrl = baseUrl + 'ap%s.html' stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012' firstStripUrl = stripUrl % '061012'
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) imageSearch = '//a/img'
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>") prevSearch = '//a[text()="<"]'
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return data.xpath('//iframe') # videos
self.stripUrl % '130217', # video
self.stripUrl % '130218', # video
self.stripUrl % '130226', # video
self.stripUrl % '130424', # video
)
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, image_url, page_url):
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
imageUrl.split('/')[-1].split('.')[0]) image_url.split('/')[-1].split('.')[0])
class AxeCop(_WordPressScraper): class AxeCop(_WordPressScraper):