From 45df462a472f3203d51b728fc9c4bf6dd0dc185d Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sun, 2 Dec 2012 18:35:06 +0100 Subject: [PATCH] Fix some comics. --- dosagelib/comic.py | 21 +-- dosagelib/plugins/a.py | 24 +-- dosagelib/plugins/b.py | 26 ++- dosagelib/plugins/c.py | 7 +- dosagelib/plugins/drunkduck.py | 36 ++-- dosagelib/plugins/gocomics.py | 9 +- dosagelib/plugins/keenspot.py | 158 +++++++++--------- dosagelib/plugins/n.py | 2 +- dosagelib/plugins/s.py | 4 +- dosagelib/plugins/smackjeeves.py | 10 +- dosagelib/plugins/snafu.py | 2 +- dosagelib/plugins/v.py | 20 +-- dosagelib/scraper.py | 9 + dosagelib/util.py | 35 ++-- scripts/drunkduck.py | 7 + scripts/gocomics.py | 1 + scripts/keenspot.py | 23 ++- scripts/mktestpage.py | 5 +- scripts/mktestscript.sh | 3 +- .../{update_plugins.py => update_plugins.sh} | 1 - 20 files changed, 222 insertions(+), 181 deletions(-) rename scripts/{update_plugins.py => update_plugins.sh} (99%) diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 63b4f0666..5cc5ed90e 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -3,12 +3,11 @@ # Copyright (C) 2012 Bastian Kleineidam import os -import locale import rfc822 import time from .output import out -from .util import urlopen, saneDataSize, normaliseURL, unquote +from .util import urlopen, normaliseURL, unquote, strsize from .events import getHandler class FetchComicError(IOError): @@ -94,29 +93,21 @@ class ComicImage(object): fn = os.path.join(comicDir, filename) if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize: self.touch(fn) - out.write('Skipping existing file "%s".' % (fn,), 1) + out.write('Skipping existing file "%s".' % fn, 1) return fn, False try: - out.write('Writing comic to file %s...' % (fn,), 3) + out.write('Writing comic to file %s...' % fn, 3) with open(fn, 'wb') as comicOut: - startTime = time.time() comicOut.write(self.urlobj.content) - endTime = time.time() self.touch(fn) - except: + except Exception: if os.path.isfile(fn): os.remove(fn) raise else: - size = os.path.getsize(fn) - bytes = locale.format('%d', size, True) - if endTime != startTime: - speed = saneDataSize(size / (endTime - startTime)) - else: - speed = '???' - attrs = dict(fn=fn, bytes=bytes, speed=speed) - out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1) + size = strsize(os.path.getsize(fn)) + out.write("Saved %s (%s)." % (fn, size), 1) getHandler().comicDownloaded(self.name, fn) return fn, True diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 7ea4e6040..559a5d223 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -5,14 +5,15 @@ from re import compile, MULTILINE from ..util import tagre from ..scraper import _BasicScraper -from ..helpers import regexNamer, bounceStarter +from ..helpers import regexNamer, bounceStarter, indirectStarter class ALessonIsLearned(_BasicScraper): - latestUrl = 'http://www.alessonislearned.com/' - stripUrl = latestUrl + 'index.php?comic=%s' - imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) + baseUrl = 'http://www.alessonislearned.com/' prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") + starter = indirectStarter(baseUrl, prevSearch) + stripUrl = baseUrl + 'index.php?comic=%s' + imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) help = 'Index format: nnn' @@ -44,6 +45,7 @@ class AbsurdNotions(_BasicScraper): latestUrl = 'http://www.absurdnotions.org/page129.html' stripUrl = 'http://www.absurdnotions.org/page%s.html' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) + multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) help = 'Index format: n (unpadded)' @@ -82,7 +84,7 @@ class Alice(_BasicScraper): class AlienLovesPredator(_BasicScraper): latestUrl = 'http://alienlovespredator.com/' stripUrl = latestUrl + '%s' - imageSearch = compile(tagre("img", "src", r'(http://alienlovespredator\.com/strips/strip_\d\.jpg)')) + imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/name/' @@ -105,7 +107,7 @@ class AltermetaOld(Altermeta): class Angels2200(_BasicScraper): latestUrl = 'http://www.janahoffmann.com/angels/' stripUrl = latestUrl + '%s' - imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^'\"]+)")) + imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' @@ -113,7 +115,7 @@ class Angels2200(_BasicScraper): class AppleGeeks(_BasicScraper): latestUrl = 'http://www.applegeeks.com/' stripUrl = latestUrl + 'comics/viewcomic.php?issue=%s' - imageSearch = compile(tagre("img", "src", r'"(strips/\d+?\..+?)"')) + imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) prevSearch = compile(r'
Previous Comic
\s*

', MULTILINE) help = 'Index format: n (unpadded)' @@ -124,7 +126,7 @@ class Achewood(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' - namer = regexNamer(compile(r'date%3D(\d{8})')) + namer = regexNamer(compile(r'date=(\d+)')) class AstronomyPOTD(_BasicScraper): @@ -132,7 +134,7 @@ class AstronomyPOTD(_BasicScraper): 'http://antwrp.gsfc.nasa.gov/apod/astropix.html', compile(r'>')) stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' - imageSearch = compile(r'') + imageSearch = compile(r'<') help = 'Index format: yymmdd' @@ -246,6 +248,6 @@ class AlsoBagels(_BasicScraper): class Annyseed(_BasicScraper): latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' stripUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' - imageSearch = compile(r']+)', quote="")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + '« Previous') help = 'Index format: yyyy/mm/' @@ -93,17 +86,18 @@ class Brink(_BasicScraper): class BoredAndEvil(_BasicScraper): - latestUrl = 'http://www.boredandevil.com/' - stripUrl = latestUrl + '?date=%s' + baseUrl = 'http://www.boredandevil.com/' + stripUrl = baseUrl + '?date=%s' imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) prevSearch = compile(r'First Comic.+Next page") help = 'Index format: n (unpadded)' @@ -149,9 +143,10 @@ class BadassMuthas(_BasicScraper): class BrightlyWound(_BasicScraper): - latestUrl = 'http://www.brightlywound.com/' - stripUrl = latestUrl + '?comic=%s' - imageSearch = compile(tagre("img", "src", r"(comic/[^']+)")) + baseUrl = 'http://www.brightlywound.com/' + latestUrl = baseUrl + '?comic=137' + stripUrl = baseUrl + '?comic=%s' + imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'")) prevSearch = compile(r'

') + multipleImagesPerStrip = True help = 'Index format: yyyy/mm/dd/nnn-stripname' @@ -124,8 +125,8 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper): class CtrlAltDel(_BasicScraper): latestUrl = 'http://www.cad-comic.com/cad/' stripUrl = latestUrl + '%s' - imageSearch = compile(r']*alt="< Previous"') +_imageSearch = compile(tagre("img", "src", r'(http://(?:www|img2)\.smackjeeves\.com/images/uploaded/comics/[^"]+)')) +_linkSearch = tagre("a", "href", r'([^"]*/comics/\d+/[^"]*)') +_prevSearch = compile(_linkSearch + '(?:]*alt="< Previous"|< Back)') +_nextSearch = compile(_linkSearch + '(?:]*alt="Next >"|Next >)') def add(name): classname = 'SmackJeeves/' + name @@ -20,8 +22,8 @@ def add(name): return pageUrl.split('/')[-2] globals()[classname] = make_scraper(classname, - starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + ']*alt="Next >"')), - stripUrl = baseUrl, + starter=bounceStarter(baseUrl, _nextSearch), + stripUrl = baseUrl + '%s/', imageSearch = _imageSearch, prevSearch = _prevSearch, help = 'Index format: nnnn (some increasing number)', diff --git a/dosagelib/plugins/snafu.py b/dosagelib/plugins/snafu.py index a8d20ac39..9d8f127e3 100644 --- a/dosagelib/plugins/snafu.py +++ b/dosagelib/plugins/snafu.py @@ -16,7 +16,7 @@ def add(name, host): globals()[classname] = make_scraper(classname, name='SnafuComics/%s' % name, latestUrl = baseUrl, - stripUrl = baseUrl + 'index.php?strip_id=%s', + stripUrl = baseUrl + '?comic_id=%s', imageSearch = _imageSearch, prevSearch = _prevSearch, help = 'Index format: n (unpadded)', diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index 9043b8f41..500a3c97d 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -8,27 +8,25 @@ from ..scraper import _BasicScraper from ..util import tagre -# XXX make dynamic -class _VGCats(_BasicScraper): +class VGCats(_BasicScraper): latestUrl = 'http://www.vgcats.com/comics/' - imageSearch = compile(r' "$script" -egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | awk '{ print "make test TESTOUTPUT=/dev/null TESTS=" $0; }' >> "$script" +egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | sort | awk '{ print "make test TESTOUTPUT=/dev/null TESTS=" $0; }' >> "$script" chmod 755 "$script" - diff --git a/scripts/update_plugins.py b/scripts/update_plugins.sh similarity index 99% rename from scripts/update_plugins.py rename to scripts/update_plugins.sh index 1a155d0fc..cb6debe7d 100755 --- a/scripts/update_plugins.py +++ b/scripts/update_plugins.sh @@ -11,4 +11,3 @@ for script in creators gocomics drunkduck universal keenspot; do "${d}/removeafter.py" "$target" "# DO NOT REMOVE" "${d}/${script}.py" $mincomics >> "$target" done -