Fix comics in module a.py.

2016-04-07 23:21:31 +02:00 · 2016-04-07 23:21:31 +02:00 · bb5b6ffcec
commit bb5b6ffcec
parent 0033a8046b
1 changed files with 25 additions and 62 deletions
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@ -28,9 +28,9 @@ class AbstruseGoose(_BasicScraper):
    textSearch = compile(tagre("img", "title", r'([^"]+)'))

    @classmethod
-    def namer(cls, imageUrl, pageUrl):
-        index = int(pageUrl.rstrip('/').split('/')[-1])
-        name = imageUrl.split('/')[-1].split('.')[0]
+    def namer(cls, image_url, page_url):
+        index = int(page_url.rstrip('/').split('/')[-1])
+        name = image_url.split('/')[-1].split('.')[0]
        return 'c%03d-%s' % (index, name)


@ -89,7 +89,7 @@ class AGirlAndHerFed(_BasicScraper):


 class AhoiPolloi(_ParserScraper):
-    url = 'http://ahoipolloi.blogger.de/'
+    url = 'https://ahoipolloi.blogger.de/'
    stripUrl = url + '?day=%s'
    firstStripUrl = stripUrl % '20060306'
    multipleImagesPerStrip = True
@ -108,13 +108,9 @@ class AhoyEarth(_ParserScraper):
    help = 'Index format: ddmmyyyy'


-class AirForceBlues(_BasicScraper):
-    url = 'http://www.afblues.com/'
-    stripUrl = url + 'wordpress/%s/'
-    firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous'))
-    help = 'Index format: yyyy/mm/dd/stripname'
+class AirForceBlues(_WordPressScraper):
+    url = 'http://farvatoons.com/'
+    firstStripUrl = url + 'comic/in-texas-there-are-texans/'


 class ALessonIsLearned(_BasicScraper):
@ -144,14 +140,9 @@ class AlienLovesPredator(_BasicScraper):
    help = 'Index format: yyyy/mm/dd/name'


-class AlienShores(_BasicScraper):
-    baseUrl = 'http://alienshores.com/'
-    rurl = escape(baseUrl)
-    url = baseUrl + 'alienshores_band/'
-    stripUrl = url + '%s'
-    imageSearch = compile(tagre("img", "src", r'(%salienshores_band/wp-content/uploads/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
-    help = 'Index format: yyyy/mm/dd/p<nn>/'
+class AlienShores(_WordPressScraper):
+    url = 'http://alienshores.com/alienshores_band/'
+    firstStripUrl = url + 'AScomic/updated-cover/'


 class AllTheGrowingThings(_BasicScraper):
@ -181,15 +172,6 @@ class AlphaLunaSpanish(AlphaLuna):
    firstStripUrl = stripUrl % '1/portada'


-class AlsoBagels(_BasicScraper):
-    url = 'http://alsobagels.com/'
-    rurl = escape(url)
-    stripUrl = url + 'index.php/comic/%s/'
-    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous"))
-    help = 'Index format: strip-name'
-
-
 class Altermeta(_BasicScraper):
    url = 'http://altermeta.net/'
    rurl = escape(url)
@ -246,28 +228,15 @@ class Angels2200(_BasicScraper):
    help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'


-class Annyseed(_BasicScraper):
-    baseUrl = 'http://www.colourofivy.com/'
-    rurl = escape(baseUrl)
-    url = baseUrl + 'annyseed_webcomic_latest.htm'
-    stripUrl = baseUrl + 'annyseed_webcomic%s.htm'
-    imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)'))
-    prevSearch = compile(r'<a href="(%s[^"]+)"><img src="Last.gif"' % rurl)
+class Annyseed(_ParserScraper):
+    baseUrl = 'http://www.mirrorwoodcomics.com/'
+    url = baseUrl + 'AnnyseedLatest.htm'
+    stripUrl = baseUrl + 'Annyseed%s.htm'
+    imageSearch = '//div/img[contains(@src, "Annyseed")]'
+    prevSearch = '//a[img[@name="Previousbtn"]]'
    help = 'Index format: nnn'


-class Antics(_BasicScraper):
-    url = 'http://www.anticscomic.com/'
-    rurl = escape(url)
-    stripUrl = url + '?p=%s'
-    firstStripUrl = stripUrl % '3'
-    imageSearch = compile(tagre("img", "src",
-                                r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
-                               after='prev'))
-    help = 'Index format: number'
-
-
 class AoiHouse(_ParserScraper):
    url = 'http://www.aoihouse.net/'
    imageSearch = '//div[@id="comic"]/a[2]/img'
@ -319,31 +288,25 @@ class ASofterWorld(_ParserScraper):
    help = 'Index format: n (unpadded)'


-class AstronomyPOTD(_BasicScraper):
-    baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
+class AstronomyPOTD(_ParserScraper):
+    baseUrl = 'http://apod.nasa.gov/apod/'
    url = baseUrl + 'astropix.html'
-    starter = bounceStarter(
-        url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
+    starter = bounceStarter(url, '//a[text()=">"]')
    stripUrl = baseUrl + 'ap%s.html'
    firstStripUrl = stripUrl % '061012'
-    imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
+    imageSearch = '//a/img'
    multipleImagesPerStrip = True
-    prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>")
+    prevSearch = '//a[text()="<"]'
    help = 'Index format: yymmdd'

    def shouldSkipUrl(self, url, data):
        """Skip pages without images."""
-        return url in (
-            self.stripUrl % '130217',  # video
-            self.stripUrl % '130218',  # video
-            self.stripUrl % '130226',  # video
-            self.stripUrl % '130424',  # video
-        )
+        return data.xpath('//iframe')  # videos

    @classmethod
-    def namer(cls, imageUrl, pageUrl):
-        return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
-                          imageUrl.split('/')[-1].split('.')[0])
+    def namer(cls, image_url, page_url):
+        return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
+                          image_url.split('/')[-1].split('.')[0])


 class AxeCop(_WordPressScraper):