Fix comics in module a.py.
This commit is contained in:
parent
0033a8046b
commit
bb5b6ffcec
1 changed files with 25 additions and 62 deletions
|
@ -28,9 +28,9 @@ class AbstruseGoose(_BasicScraper):
|
|||
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
index = int(pageUrl.rstrip('/').split('/')[-1])
|
||||
name = imageUrl.split('/')[-1].split('.')[0]
|
||||
def namer(cls, image_url, page_url):
|
||||
index = int(page_url.rstrip('/').split('/')[-1])
|
||||
name = image_url.split('/')[-1].split('.')[0]
|
||||
return 'c%03d-%s' % (index, name)
|
||||
|
||||
|
||||
|
@ -89,7 +89,7 @@ class AGirlAndHerFed(_BasicScraper):
|
|||
|
||||
|
||||
class AhoiPolloi(_ParserScraper):
|
||||
url = 'http://ahoipolloi.blogger.de/'
|
||||
url = 'https://ahoipolloi.blogger.de/'
|
||||
stripUrl = url + '?day=%s'
|
||||
firstStripUrl = stripUrl % '20060306'
|
||||
multipleImagesPerStrip = True
|
||||
|
@ -108,13 +108,9 @@ class AhoyEarth(_ParserScraper):
|
|||
help = 'Index format: ddmmyyyy'
|
||||
|
||||
|
||||
class AirForceBlues(_BasicScraper):
|
||||
url = 'http://www.afblues.com/'
|
||||
stripUrl = url + 'wordpress/%s/'
|
||||
firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous'))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
class AirForceBlues(_WordPressScraper):
|
||||
url = 'http://farvatoons.com/'
|
||||
firstStripUrl = url + 'comic/in-texas-there-are-texans/'
|
||||
|
||||
|
||||
class ALessonIsLearned(_BasicScraper):
|
||||
|
@ -144,14 +140,9 @@ class AlienLovesPredator(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/name'
|
||||
|
||||
|
||||
class AlienShores(_BasicScraper):
|
||||
baseUrl = 'http://alienshores.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'alienshores_band/'
|
||||
stripUrl = url + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(%salienshores_band/wp-content/uploads/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/p<nn>/'
|
||||
class AlienShores(_WordPressScraper):
|
||||
url = 'http://alienshores.com/alienshores_band/'
|
||||
firstStripUrl = url + 'AScomic/updated-cover/'
|
||||
|
||||
|
||||
class AllTheGrowingThings(_BasicScraper):
|
||||
|
@ -181,15 +172,6 @@ class AlphaLunaSpanish(AlphaLuna):
|
|||
firstStripUrl = stripUrl % '1/portada'
|
||||
|
||||
|
||||
class AlsoBagels(_BasicScraper):
|
||||
url = 'http://alsobagels.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php/comic/%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous"))
|
||||
help = 'Index format: strip-name'
|
||||
|
||||
|
||||
class Altermeta(_BasicScraper):
|
||||
url = 'http://altermeta.net/'
|
||||
rurl = escape(url)
|
||||
|
@ -246,28 +228,15 @@ class Angels2200(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
|
||||
|
||||
|
||||
class Annyseed(_BasicScraper):
|
||||
baseUrl = 'http://www.colourofivy.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'annyseed_webcomic_latest.htm'
|
||||
stripUrl = baseUrl + 'annyseed_webcomic%s.htm'
|
||||
imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)'))
|
||||
prevSearch = compile(r'<a href="(%s[^"]+)"><img src="Last.gif"' % rurl)
|
||||
class Annyseed(_ParserScraper):
|
||||
baseUrl = 'http://www.mirrorwoodcomics.com/'
|
||||
url = baseUrl + 'AnnyseedLatest.htm'
|
||||
stripUrl = baseUrl + 'Annyseed%s.htm'
|
||||
imageSearch = '//div/img[contains(@src, "Annyseed")]'
|
||||
prevSearch = '//a[img[@name="Previousbtn"]]'
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Antics(_BasicScraper):
|
||||
url = 'http://www.anticscomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '3'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
|
||||
after='prev'))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class AoiHouse(_ParserScraper):
|
||||
url = 'http://www.aoihouse.net/'
|
||||
imageSearch = '//div[@id="comic"]/a[2]/img'
|
||||
|
@ -319,31 +288,25 @@ class ASofterWorld(_ParserScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class AstronomyPOTD(_BasicScraper):
|
||||
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
|
||||
class AstronomyPOTD(_ParserScraper):
|
||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||
url = baseUrl + 'astropix.html'
|
||||
starter = bounceStarter(
|
||||
url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + "></a>"))
|
||||
starter = bounceStarter(url, '//a[text()=">"]')
|
||||
stripUrl = baseUrl + 'ap%s.html'
|
||||
firstStripUrl = stripUrl % '061012'
|
||||
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
|
||||
imageSearch = '//a/img'
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "<</a>")
|
||||
prevSearch = '//a[text()="<"]'
|
||||
help = 'Index format: yymmdd'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return url in (
|
||||
self.stripUrl % '130217', # video
|
||||
self.stripUrl % '130218', # video
|
||||
self.stripUrl % '130226', # video
|
||||
self.stripUrl % '130424', # video
|
||||
)
|
||||
return data.xpath('//iframe') # videos
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
|
||||
imageUrl.split('/')[-1].split('.')[0])
|
||||
def namer(cls, image_url, page_url):
|
||||
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
||||
image_url.split('/')[-1].split('.')[0])
|
||||
|
||||
|
||||
class AxeCop(_WordPressScraper):
|
||||
|
|
Loading…
Reference in a new issue