From 0556ffd30a6616c1ee515b18fece9d9d9bfedb59 Mon Sep 17 00:00:00 2001
From: Bastian Kleineidam <calvin@debian.org>
Date: Mon, 26 Nov 2012 18:44:31 +0100
Subject: [PATCH] Fix comics, improve tests, use python-requests.

---
 doc/README.txt                   |  11 +-
 doc/changelog.txt                |   1 +
 dosagelib/comic.py               |  26 +--
 dosagelib/plugins/d.py           |   2 +-
 dosagelib/plugins/drunkduck.py   |  21 ++-
 dosagelib/plugins/fallenangel.py |  57 ++----
 dosagelib/plugins/gocomics.py    |  70 ++++----
 dosagelib/plugins/keenspot.py    |   6 +-
 dosagelib/plugins/num.py         |   1 +
 dosagelib/plugins/s.py           |   2 +-
 dosagelib/plugins/uc.py          | 296 ++++---------------------------
 dosagelib/plugins/y.py           |   1 +
 dosagelib/scraper.py             |   9 +-
 dosagelib/util.py                |  44 ++---
 requirements.txt                 |   2 +
 tests/test_comics.py             |  45 +++--
 16 files changed, 191 insertions(+), 403 deletions(-)
 create mode 100644 requirements.txt

diff --git a/doc/README.txt b/doc/README.txt
index 6e48f11c5..5041c56f2 100644
--- a/doc/README.txt
+++ b/doc/README.txt
@@ -40,10 +40,11 @@ manual page.
 
 Dependencies
 -------------
-Dosage requires Python version 2.7 or higher, which can be downloaded
-from http://www.python.org.
-No external Python modules are required - only the Python Standard Library
-that gets installed with Python.
+Python version 2.7 or higher, which can be downloaded
+from http://www.python.org/
+
+Also the python-requests module must be installed, which can be downloaded
+from http://docs.python-requests.org/en/latest/
 
 Installation
 -------------
@@ -59,7 +60,7 @@ or if you do not have root permissions:
 
 Technical Description
 ----------------------
-Dosage is written entirely in Python and relies on regular expressions to
+Dosage is written in Python and relies on regular expressions to
 do most of the grunt work.
 
 For each webcomic Dosage has a plugin module, found in the "plugins"
diff --git a/doc/changelog.txt b/doc/changelog.txt
index 5cb01c80a..81b1608e2 100644
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@@ -4,6 +4,7 @@ Features:
 - cmdline: Added proper return codes for error conditions.
 - comics: Added more robust regular expressions for HTML tags.
   They match case insensitive and ignore whitespaces now.
+- comics: Use the python-requests module for HTTP requests.
 
 Changes:
 - installation: Added support for dynamic configuration values.
diff --git a/dosagelib/comic.py b/dosagelib/comic.py
index 70e12fec4..b2e21a892 100644
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@@ -2,7 +2,6 @@
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam
 
-import urllib2
 import os
 import locale
 import rfc822
@@ -55,18 +54,24 @@ class ComicImage(object):
         """Connect to host and get meta information."""
         try:
             self.urlobj = urlopen(self.url, referrer=self.referrer)
-        except urllib2.HTTPError as he:
+        except IOError as he:
             raise FetchComicError('Unable to retrieve URL.', self.url, he.code)
 
-        if self.urlobj.info().getmaintype() != 'image' and \
-           self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
+        content_type = self.urlobj.headers.get('content-type')
+        content_type = content_type.split(';', 1)[0]
+        if '/' in content_type:
+            maintype, subtype = content_type.split('/', 1)
+        else:
+            maintype = content_type
+            subtype = None
+        if maintype != 'image' and content_type not in ('application/octet-stream', 'application/x-shockwave-flash'):
             raise FetchComicError('No suitable image found to retrieve.', self.url)
 
         # Always use mime type for file extension if it is sane.
-        if self.urlobj.info().getmaintype() == 'image':
-            self.ext = '.' + self.urlobj.info().getsubtype().replace('jpeg', 'jpg')
-        self.contentLength = int(self.urlobj.info().get('content-length', 0))
-        self.lastModified = self.urlobj.info().get('last-modified')
+        if maintype == 'image':
+            self.ext = '.' + subtype.replace('jpeg', 'jpg')
+        self.contentLength = int(self.urlobj.headers.get('content-length', 0))
+        self.lastModified = self.urlobj.headers.get('last-modified')
         out.write('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
 
     def touch(self, filename):
@@ -88,7 +93,6 @@ class ComicImage(object):
 
         fn = os.path.join(comicDir, filename)
         if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
-            self.urlobj.close()
             self.touch(fn)
             out.write('Skipping existing file "%s".' % (fn,), 1)
             return fn, False
@@ -97,7 +101,7 @@ class ComicImage(object):
             out.write('Writing comic to file %s...' % (fn,), 3)
             with open(fn, 'wb') as comicOut:
                 startTime = time.time()
-                comicOut.write(self.urlobj.read())
+                comicOut.write(self.urlobj.content)
                 endTime = time.time()
             self.touch(fn)
         except:
@@ -114,7 +118,5 @@ class ComicImage(object):
             attrs = dict(fn=fn, bytes=bytes, speed=speed)
             out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
             getHandler().comicDownloaded(self.name, fn)
-        finally:
-            self.urlobj.close()
 
         return fn, True
diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py
index 175c9b0e3..685a6dde9 100644
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@@ -13,7 +13,7 @@ from ..util import tagre, getQueryParams
 class DMFA(_BasicScraper):
     latestUrl = 'http://www.missmab.com/'
     stripUrl = latestUrl + 'Comics/Vol_%s.php'
-    imageSearch = compile(tagre("img", "src", r'(Comics/|Vol)[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
     prevSearch = compile(tagre("a", "href", r'([^"])+')+
       tagre("img", "src", r'(?:../)?Images/comicprev.gif'))
     help = 'Index format: nnn (normally, some specials)'
diff --git a/dosagelib/plugins/drunkduck.py b/dosagelib/plugins/drunkduck.py
index a8ca15c49..492da01bd 100644
--- a/dosagelib/plugins/drunkduck.py
+++ b/dosagelib/plugins/drunkduck.py
@@ -4,22 +4,29 @@
 
 from re import compile
 from ..scraper import make_scraper
-from ..helpers import bounceStarter, queryNamer
+from ..helpers import bounceStarter
 from ..util import tagre
 
 
 def add(name):
     classname = 'DrunkDuck_%s' % name
     url = 'http://www.drunkduck.com/%s/' % name
-    linkSearch = tagre("a", "href", r"(/[^/]*/index\.php\?p=\d+)", quote="'", after="The %s page")
+    linkSearch = tagre("a", "href", r"(/%s/\d+/)" % name)
+
+    @classmethod
+    def namer(cls, imageUrl, pageUrl):
+        index = int(pageUrl.rstrip('/').split('/')[-1])
+        ext = imageUrl.rsplit('.')[-1]
+        return '%d.%s' % (index, ext)
+
     globals()[classname] = make_scraper(classname,
         name = 'DrunkDuck/' + name,
-        starter = bounceStarter(url, compile(linkSearch % 'next')),
-        stripUrl = url + 'index.php?p=%s' % name,
-        imageSearch = compile(tagre("img", "src", r"(http://[a-z0-9]*\.drunkduck\.com/[^/]*/pages/[^'/]+)", quote="'")),
-        prevSearch= compile(linkSearch % 'previous'),
+        starter = bounceStarter(url, compile(linkSearch + tagre("img", "class", "arrow_next"))),
+        stripUrl = url + '%s/',
+        imageSearch = compile(tagre("img", "src", r'(http://media\.drunkduck\.com\.s3\.amazonaws\.com:80/[^"]+)', before="page-image")),
+        prevSearch= compile(linkSearch + tagre("img", "class", "arrow_prev")),
         help = 'Index format: n (unpadded)',
-        namer = queryNamer('p', usePageUrl=True),
+        namer = namer,
     )
 
 comics = (
diff --git a/dosagelib/plugins/fallenangel.py b/dosagelib/plugins/fallenangel.py
index d0fceea5f..b9d3fb933 100644
--- a/dosagelib/plugins/fallenangel.py
+++ b/dosagelib/plugins/fallenangel.py
@@ -1,47 +1,26 @@
 # -*- coding: iso-8859-1 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam
-from ..scraper import _BasicScraper
+
+from re import compile
+from ..scraper import make_scraper
+from ..util import asciify
 
 
-def fallenangel(name, shortname):
-    pass # XXX
-
-class _TheFallenAngel(_BasicScraper):
-    imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
-    prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
-    help = 'Index format: yyyymmdd'
-
-    @property
-    def baseUrl(self):
-        return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
-
-
-    @property
-    def stripUrl(self):
-        return self.baseUrl + '?date=%s'
-
-
-    def starter(self):
-        return self.baseUrl
-
-
-
-class HighMaintenance(_TheFallenAngel):
-    name = 'TheFallenAngel/HighMaintenance'
-    shortName = 'hm'
-
-
-
-class FAWK(_TheFallenAngel):
-    name = 'TheFallenAngel/FAWK'
-    shortName = 'fawk'
-
-
-
-class MalloryChan(_TheFallenAngel):
-    name = 'TheFallenAngel/MalloryChan'
-    shortName = 'mallorychan'
+def add(name, shortname):
+    latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
+    classname = asciify(name)
+    globals()[classname] = make_scraper(classname,
+        latestUrl = latestUrl,
+        stripUrl = latestUrl + '?date=%s',
+        name='FallenAngel/' + name,
+        imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"'),
+        prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"'),
+        help = 'Index format: yyyymmdd',
+    )
 
 
+add('HighMaintenance', 'hm')
+add('FAWK', 'fawk')
+add('MalloryChan', 'mallorychan')
 
diff --git a/dosagelib/plugins/gocomics.py b/dosagelib/plugins/gocomics.py
index 7a861e309..dd6dacee2 100644
--- a/dosagelib/plugins/gocomics.py
+++ b/dosagelib/plugins/gocomics.py
@@ -29,30 +29,30 @@ def add(name, repl=''):
 
 
 # http://www.gocomics.com/features
-# note that comics from creators.com are not repeated here
+# Duplicate comics from creators.com are commented out
 add('2 Cows and a Chicken')
 add('9 Chickweed Lane')
 add('9 to 5')
 add('The Academia Waltz')
 add('Adam at Home')
-add('Agnes')
+#add('Agnes')
 add('Alley Oop', repl='-')
 add('Andertoons')
-add('Andy Capp')
+#add('Andy Capp')
 add('Angry Little Girls', repl='-')
 add('Animal Crackers')
 add('Annie')
 add('The Argyle Sweater')
 add('Arlo and Janis')
-add('Ask Shagg')
-add('BC')
+#add('Ask Shagg')
+#add('BC')
 add('Back in the Day')
 add('Bad Reporter')
 add('Baldo')
-add('Ballard Street')
+#add('Ballard Street')
 add('Banana Triangle', repl='-')
 add('Barkeater Lake')
-add('The Barn')
+#add('The Barn')
 add('Barney and Clyde')
 add('Basic Instructions')
 add('Beardo')
@@ -81,13 +81,13 @@ add('Brewster Rockit')
 add('Broom Hilda')
 add('The Buckets')
 add('Buni')
-add('Cafe con Leche')
+#add('Cafe con Leche')
 add('Calvin and Hobbes')
 add('Candorville')
 add('Cathy')
 add('Cest la Vie')
 add('Cheap Thrills Cuisine', repl='-')
-add('Chuckle Bros')
+#add('Chuckle Bros')
 add('Citizen Dog')
 add('The City')
 add('Cleats')
@@ -99,15 +99,15 @@ add('Cow and Boy')
 add('CowTown')
 add('Crumb')
 add('Cul de Sac')
-add('Daddys Home')
+#add('Daddys Home')
 add('Dark Side of the Horse')
 add('Deep Cover')
-add('Diamond Lil')
+#add('Diamond Lil')
 add('Dick Tracy')
-add('The Dinette Set')
+#add('The Dinette Set')
 add('Dixie Drive', repl='-')
-add('Dog Eat Doug')
-add('Dogs of C Kennel')
+#add('Dog Eat Doug')
+#add('Dogs of C Kennel')
 add('Domestic Abuse')
 add('Doonesbury')
 add('The Doozies')
@@ -122,18 +122,18 @@ add('F Minus')
 add('Family Tree')
 add('Farcus')
 add('Fat Cats', repl='-')
-add('Flo and Friends')
+#add('Flo and Friends')
 add('The Flying McCoys')
 add('Foolish Mortals', repl='-')
 add('For Better or For Worse')
-add('For Heavens Sake')
+#add('For Heavens Sake')
 add('Fort Knox')
 add('FoxTrot')
 add('FoxTrot Classics')
 add('Frank and Ernest')
 add('Frazz')
 add('Fred Basset')
-add('Free Range')
+#add('Free Range')
 add('Freshly Squeezed')
 add('Frog Applause')
 add('The Fusco Brothers')
@@ -154,9 +154,9 @@ add('Haiku Ewe')
 add('Ham Shears')
 add('Health Capsules')
 add('Heart of the City')
-add('Heathcliff')
+#add('Heathcliff')
 add('Heavenly Nostrils')
-add('Herb and Jamaal')
+#add('Herb and Jamaal')
 add('Herman')
 add('Home and Away')
 add('HUBRIS!')
@@ -184,7 +184,7 @@ add('La Cucaracha')
 add('Last Kiss')
 add('The LeftyBosco Picture Show')
 add('Legend of Bill')
-add('Liberty Meadows')
+#add('Liberty Meadows')
 add('Lil Abner')
 add('Lio')
 add('Little Dog Lost')
@@ -201,7 +201,7 @@ add('Maintaining')
 add('Marias Day')
 add('Marmaduke')
 add('McArroni')
-add('The Meaning of Lila')
+#add('The Meaning of Lila')
 add('Medium Large')
 add('Meg Classics')
 add('The Middletons')
@@ -209,7 +209,7 @@ add('Mike du Jour')
 add('Minimum Security')
 add('Moderately Confused')
 add('Molly and the Bear')
-add('Momma')
+#add('Momma')
 add('Monty')
 add('Motley Classics')
 add('Mr. Gigi and the Squid')
@@ -217,7 +217,7 @@ add('Mutt and Jeff')
 add('My Cage')
 add('MythTickle')
 add('Nancy')
-add('Nest Heads')
+#add('Nest Heads')
 add('NEUROTICA')
 add('New Adventures of Queen Victoria')
 add('Non Sequitur')
@@ -225,10 +225,10 @@ add('The Norm Classics')
 add('Nothing is Not Something')
 add('Off the Mark')
 add('Ollie and Quentin')
-add('On A Claire Day')
-add('One Big Happy')
+#add('On A Claire Day')
+#add('One Big Happy')
 add('Ordinary Bill')
-add('The Other Coast')
+#add('The Other Coast')
 add('Out of the Gene Pool Re-Runs')
 add('Over the Hedge')
 add('Overboard')
@@ -254,10 +254,10 @@ add('Reply All')
 add('Rip Haywire')
 add('Ripleys Believe It or Not')
 add('Rose is Rose')
-add('Rubes')
+#add('Rubes')
 add('Rudy Park')
 add('Savage Chickens')
-add('Scary Gary')
+#add('Scary Gary')
 add('Shirley and Son Classics')
 add('Shoe')
 add('Shoecabbage')
@@ -266,11 +266,11 @@ add('Skin Horse')
 add('Skippy')
 add('Slowpoke')
 add('Soup to Nutz')
-add('Speed Bump')
+#add('Speed Bump')
 add('Spot the Frog')
 add('Starslip')
 add('Stone Soup')
-add('Strange Brew')
+#add('Strange Brew')
 add('The Sunshine Club')
 add('Sylvia')
 add('Tank McNamara')
@@ -280,7 +280,7 @@ add('Tales of TerraTopia')
 add('That is Priceless')
 add('Thats Life')
 add('Thatababy')
-add('Thin Lines')
+#add('Thin Lines')
 add('Tiny Sepuku')
 add('TOBY')
 add('Todays Dogg')
@@ -293,12 +293,12 @@ add('Unstrange Phenomena')
 add('U.S. Acres')
 add('Viivi and Wagner')
 add('Watch Your Head')
-add('Wee Pals')
-add('Wizard of Id')
+#add('Wee Pals')
+#add('Wizard of Id')
 add('Working Daze')
-add('Working It Out')
+#add('Working It Out')
 add('W.T. Duck')
-add('Zack Hill')
+#add('Zack Hill')
 add('Ziggy')
 
 # http://www.gocomics.com/explore/editorial_list
diff --git a/dosagelib/plugins/keenspot.py b/dosagelib/plugins/keenspot.py
index 7bb18117a..e0e8686b8 100644
--- a/dosagelib/plugins/keenspot.py
+++ b/dosagelib/plugins/keenspot.py
@@ -18,9 +18,9 @@ def add(name, urls):
         name='KeenSpot/' + name,
         latestUrl=latestUrl,
         stripUrl=baseUrl + 'd/%s.html',
-        imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
-        prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
-           '(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
+        imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')),
+        prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') +
+           '(?:Previous comic|'+tagre("img", "alt", "Previous comic")+')'),
         help = 'Index format: yyyymmdd',
     )
 
diff --git a/dosagelib/plugins/num.py b/dosagelib/plugins/num.py
index 4adb53102..7504bad25 100644
--- a/dosagelib/plugins/num.py
+++ b/dosagelib/plugins/num.py
@@ -11,6 +11,7 @@ from ..scraper import _BasicScraper
 class NineteenNinetySeven(_BasicScraper):
     name = '1997'
     latestUrl = 'http://www.1977thecomic.com/'
+    stripUrl = latestUrl + '%s'
     imageSearch = compile(tagre("img", "src", r'(http://www\.1977thecomic\.com/comics-1977/[^"]+)'))
     prevSearch = compile(tagre("a", "href", r'([^"]+)')+"Previous")
     help = 'Index format: yyyy/mm/dd/strip-name'
diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py
index fbf5235a3..0cbd3f107 100644
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@@ -61,7 +61,7 @@ class Sheldon(_BasicScraper):
     latestUrl = 'http://www.sheldoncomics.com/'
     stripUrl = latestUrl + 'archive/%s.html'
     imageSearch = compile(tagre("img", "src", r'(/strips/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'/archive/\d+\.html)', after="sidenav-prev"))
+    prevSearch = compile(tagre("a", "href", r'(/archive/\d+\.html)', after="sidenav-prev"))
     help = 'Index format: yymmdd'
 
 
diff --git a/dosagelib/plugins/uc.py b/dosagelib/plugins/uc.py
index 00d1ec553..22c16b243 100644
--- a/dosagelib/plugins/uc.py
+++ b/dosagelib/plugins/uc.py
@@ -1,280 +1,54 @@
 # -*- coding: iso-8859-1 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam
-
-from re import compile, sub
+"""
+The Universal comics only have some samples, but those samples are always the newest ones.
+"""
+import datetime
+from re import compile, escape
 from ..scraper import make_scraper
-from ..util import fetchUrl, tagre
+from ..util import tagre, asciify, getPageContent
 
 
-def add(name, shortName):
-    homepage = 'http://content.uclick.com/a2z.html'
-    baseUrl = 'http://www.uclick.com/client/zzz/%s/'
-    latestUrl = baseUrl % shortName
-    classname = 'UClick_%s' % name
+def parse_strdate(strdate):
+    """Parse date string. XXX this is locale dependant but it should not be."""
+    return datetime.datetime.strptime(strdate, "%A, %B %d, %Y")
+
+
+def add(name, category):
+    shortname = name.replace(' ', '').lower()
+    latestUrl = 'http://www.universaluclick.com/comics/%s/%s' % (category, shortname)
+    classname = 'UClick_%s' % asciify(name)
 
     @classmethod
-    def fetchSubmodules(cls):
-        exclusions = ('index',)
-        # XXX refactor this mess
-        submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
-        partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
-        matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
-        possibles = [partsMatch.match(match).groups() for match in matches]
-
-        def normalizeName(name):
-            name = sub(r'&(.)acute;', r'\1', name).title()
-            return ''.join([c for c in name if c.isalnum()])
-
-        def fetchSubmodule(module):
-            try:
-                return fetchUrl(cls.baseUrl % module, cls.imageSearch)
-            except Exception:
-                # XXX log error
-                return False
-
-        return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
+    def namer(cls, imageUrl, pageUrl):
+        """Parse publish date from page content which looks like:
+         <img alt="Marmaduke" src="http://assets.amuniversal.com/07e7f270fa08012ff506001dd8b71c47" />
+         <h4>published: Sunday, November 11, 2012</h4>
+        """
+        data = getPageContent(pageUrl)[0]
+        ro = compile(tagre("img", "src", escape(imageUrl)) + r'\s+<h4>published: ([^<]+)')
+        mo = ro.search(data)
+        if mo:
+             strdate = mo.group(1)
+             return parse_strdate(strdate).strftime("%Y%m%d")
 
     globals()[classname] = make_scraper(classname,
         name='UClick/' + name,
         latestUrl = latestUrl,
         stripUrl = latestUrl + '%s/',
-        imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')),
-        prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date'),
-        help = 'Index format: yyyy/mm/dd',
+        imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[^"]+)') + r'\s+<h4>published'),
+        multipleImagesPerStrip = True,
+        prevSearch = None,
+        help = 'Index format: none',
+        namer = namer,
     )
 
 
+# List is from http://www.universaluclick.com/comics/list
 comics = {
-    '5thWave': 'fw',
-    '9To5': 'tmntf',
-    'AdamHome': 'ad',
-    'Agnes': 'cragn',
-    'AlcarazLalo': 'la',
-    'AlcarazLaloSpanish': 'spla',
-    'AndersonNick': 'wpnan',
-    'AndyCapp': 'crcap',
-    'AnimalCrackers': 'tmani',
-    'Annie': 'tmann',
-    'AsayChuck': 'crcas',
-    'AskShagg': 'crask',
-    'AuthTony': 'ta',
-    'BadReporter': 'bad',
-    'Baldo': 'ba',
-    'BaldoSpanish': 'be',
-    'BallardStreet': 'crbal',
-    'BarkEaterLake': 'bark',
-    'BarstowDonna': 'dba',
-    'BC': 'crbc',
-    'BCSpanish': 'crbcs',
-    'BeattieBruce': 'crbbe',
-    'BennetClay': 'wpcbe',
-    'BensonLisa': 'wplbe',
-    'BensonSteve': 'crsbe',
-    'BigTop': 'bt',
-    'Biographic': 'biov',
-    'Bleeker': 'blk',
-    'BobTheSquirrel': 'bob',
-    'BoilingPoint': 'boil',
-    'BokChip': 'crcbo',
-    'BoNanas': 'bon',
-    'Boomerangs': 'boom',
-    'BoondocksThe': 'bo',
-    'BottomLiners': 'tmbot',
-    'BoundAndGagged': 'tmbou',
-    'Brainwaves': 'bwv',
-    'BreenSteve': 'crsbr',
-    'BrendaStarr': 'tmbre',
-    'BrewsterRockit': 'tmrkt',
-    'BrittChris': 'crcbr',
-    'BroomHilda': 'tmbro',
-    'Candorville': 'cand',
-    'CarlsonStuart': 'sc',
-    'CatalinoKen': 'crkca',
-    'Cathy': 'ca',
-    'CathySpanish': 'spca',
-    'CEstLaVie': 'clv',
-    'CityThe': 'derf',
-    'ClearBlueWater': 'cbw',
-    'Cleats': 'cle',
-    'CloseToHome': 'cl',
-    'CombsPaul': 'tmcmb',
-    'CompuToon': 'tmcom',
-    'Condorito': 'cond',
-    'ConradPaul': 'tmpco',
-    'Cornered': 'co',
-    'CulDeSac': 'cds',
-    'DanzigerJeff': 'jd',
-    'DaviesMatt': 'tmmda',
-    'DeepCover': 'deep',
-    'DeeringJohn': 'crjde',
-    'DickTracy': 'tmdic',
-    'DinetteSetThe': 'crdin',
-    'DogEatDoug': 'crdog',
-    'DonWright': 'tmdow',
-    'Doodles': 'tmdoo',
-    'Doonesbury': 'db',
-    'DuplexThe': 'dp',
-    'Eek': 'eek',
-    'ElderberriesThe': 'eld',
-    'FacesInTheNews': 'kw',
-    'FlightDeck': 'crfd',
-    'FloAndFriends': 'crflo',
-    'FlyingMccoysThe': 'fmc',
-    'ForBetterOrForWorse': 'fb',
-    'ForHeavenSSake': 'crfhs',
-    'FoxtrotClassics': 'ftcl',
-    'Foxtrot': 'ft',
-    'FoxtrotSpanish': 'spft',
-    'FrankAndErnest': 'fa',
-    'FredBassetSpanish': 'spfba',
-    'FredBasset': 'tmfba',
-    'FrogApplause': 'frog',
-    'FuscoBrothersThe': 'fu',
-    'Garfield': 'ga',
-    'GarfieldSpanish': 'gh',
-    'GasolineAlley': 'tmgas',
-    'GaturroSpanish': 'spgat',
-    'GilThorp': 'tmgil',
-    'GingerMeggs': 'gin',
-    'GingerMeggsSpanish': 'spgin',
-    'GirlsAndSports': 'crgis',
-    'GorrellBob': 'crbgo',
-    'GoTeamBob': 'gtb',
-    'HammondBruce': 'hb',
-    'HandelsmanWalt': 'tmwha',
-    'HeartOfTheCity': 'hc',
-    'Heathcliff': 'crhea',
-    'HeathcliffSpanish': 'crhes',
-    'HerbAndJamaal': 'crher',
-    'HigginsJack': 'jh',
-    'HomeAndAway': 'wphaa',
-    'HorseyDavid': 'tmdho',
-    'Housebroken': 'tmhou',
-    'HubertAndAbby': 'haa',
-    'IdiotBox': 'ibox',
-    'ImagineThis': 'imt',
-    'InkPen': 'ink',
-    'InTheBleachers': 'bl',
-    'ItsAllAboutYou': 'wpiay',
-    'JamesBondSpanish': 'spjb',
-    'JonesClay': 'crcjo',
-    'KallaugherKevin': 'cwkal',
-    'KChroniclesThe': 'kk',
-    'KelleySteve': 'crske',
-    'Kudzu': 'tmkud',
-    'LaCucaracha': 'lc',
-    'LegendOfBill': 'lob',
-    'LibertyMeadows': 'crlib',
-    'Lio': 'lio',
-    'LittleDogLost': 'wpldl',
-    'LocherDick': 'tmdlo',
-    'LooseParts': 'tmloo',
-    'LostSheep': 'lost',
-    'LoweChan': 'tmclo',
-    'LuckovichMike': 'crmlu',
-    'LuckyCow': 'luc',
-    'MarkstienGary': 'crgma',
-    'MarletteDoug': 'tmdma',
-    'MccoyGlenn': 'gm',
-    'MeaningOfLilaThe': 'crlil',
-    'MeehanStreak': 'tmmee',
-    'MiddletonsThe': 'tmmid',
-    'MinimumSecurity': 'ms',
-    'ModestyBlaiseSpanish': 'spmb',
-    'Momma': 'crmom',
-    'MorinJim': 'cwjmo',
-    'MuttJeffSpanish': 'spmut',
-    'MythTickle': 'myth',
-    'NAoQV': 'naqv',
-    'NaturalSelection': 'crns',
-    'NestHeads': 'cpnst',
-    'Neurotica': 'neu',
-    'NonSequitur': 'nq',
-    'OhmanJack': 'tmjoh',
-    'OliphantPat': 'po',
-    'OnAClaireDay': 'crocd',
-    'OneBigHappy': 'crobh',
-    'OtherCoastThe': 'crtoc',
-    'OutOfTheGenePool': 'wpgen',
-    'Overboard': 'ob',
-    'OverboardSpanish': 'spob',
-    'PepeSpanish': 'sppep',
-    'PettJoel': 'jp',
-    'Pibgorn': 'pib',
-    'Pickles': 'wppic',
-    'Pluggers': 'tmplu',
-    'PoochCafe': 'poc',
-    'PoochCafeSpanish': 'sppoc',
-    'PopCulture': 'pop',
-    'PowellDwane': 'crdpo',
-    'Preteena': 'pr',
-    'PricklyCity': 'prc',
-    'QuigmansThe': 'tmqui',
-    'RallComic': 'tr',
-    'RamirezMicheal': 'crmrm',
-    'RamseyMarshall': 'crmra',
-    'RealLifeAdventures': 'rl',
-    'RedAndRover': 'wpred',
-    'RedMeat': 'red',
-    'ReynoldsUnwrapped': 'rw',
-    'RonaldinhoGaucho': 'ron',
-    'RonaldinhoGauchoSpanish': 'spron',
-    'Rubes': 'crrub',
-    'SackSteve': 'tmssa',
-    'SargentBen': 'bs',
-    'SargentBenSpanish': 'spbs',
-    'SendHelp': 'send',
-    'ShenemanDrew': 'tmdsh',
-    'SherffiusDrew': 'crjsh',
-    'Shoecabbage': 'shcab',
-    'Shoe': 'tmsho',
-    'SigmundSpanish': 'spsig',
-    'Slowpoke': 'slow',
-    'SmallWorld': 'small',
-    'SpaceIsThePlace': 'sitp',
-    'SpeedBump': 'crspe',
-    'StanisScott': 'crsst',
-    'StateOfTheUnion': 'crsou',
-    'StayskalWayne': 'tmwst',
-    'StoneSoup': 'ss',
-    'StrangeBrew': 'crstr',
-    'SummersDana': 'tmdsu',
-    'SuttonImpact': 'stn',
-    'Sylvia': 'tmsyl',
-    'SzepPaul': 'crpsz',
-    'TankMcnamara': 'tm',
-    'TeenageMutantNinjaTurtles': 'tmnt',
-    'TelnaesAnn': 'tmate',
-    'TheArgyleSweater': 'tas',
-    'ThePinkPanther': 'tmpnk',
-    'TheWizardOfId': 'crwiz',
-    'TheWizardOfIdSpanish': 'crwis',
-    'ThInk': 'think',
-    'ThompsonMike': 'crmth',
-    'ThroughThickAndThin': 'cpthk',
-    'TinySepuku': 'tiny',
-    'Toby': 'toby',
-    'TolesTom': 'tt',
-    'TomTheDancingBug': 'td',
-    'TooMuchCoffeeMan': 'tmcm',
-    'Trevor': 'trev',
-    'TutelandiaSpanish': 'sptut',
-    'VarvelGary': 'crgva',
-    'WassermanDan': 'tmdwa',
-    'WatchYourHead': 'wpwyh',
-    'Waylay': 'min',
-    'WeePals': 'crwee',
-    'WinnieThePooh': 'crwin',
-    'WitOfTheWorld': 'cwwit',
-    'WorkingItOut': 'crwio',
-    'WriteDon': 'tmdow',
-    'YennySpanish': 'spyen',
-    'Yenny': 'yen',
-    'ZackHill': 'crzhi',
-    'ZiggySpanish': 'spzi',
-    'Ziggy': 'zi',
+    '9 Chickweed Lane': 'strip',
 }
 
-for name, shortname in comics.items():
-    add(name, shortname)
+for name, category in comics.items():
+    add(name, category)
diff --git a/dosagelib/plugins/y.py b/dosagelib/plugins/y.py
index 4f7833d61..d00ee05ab 100644
--- a/dosagelib/plugins/y.py
+++ b/dosagelib/plugins/y.py
@@ -2,6 +2,7 @@
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam
 
+from re import compile
 from ..scraper import _BasicScraper
 from ..util import tagre
 
diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py
index e02bb9963..144a861f2 100644
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@@ -22,6 +22,9 @@ class _BasicScraper(object):
     @cvar prevSearch: A compiled regex that will locate the URL for the
         previous strip when applied to a strip page.
     '''
+    # if more than one image per URL is expected
+    multipleImagesPerStrip = False
+    # usually the index format help
     help = 'Sorry, no help for this comic yet.'
 
     def __init__(self, indexes=None):
@@ -44,7 +47,9 @@ class _BasicScraper(object):
 
     def getStrip(self, url):
         """Get comic strip for given URL."""
-        imageUrls = fetchUrls(url, self.imageSearch)
+        imageUrls = fetchUrls(url, self.imageSearch)[0]
+        if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
+            raise ValueError("found %d images with %s" % (len(imageUrls), self.imageSearch.pattern))
         return self.getComicStrip(url, imageUrls)
 
     def getComicStrip(self, url, imageUrls):
@@ -140,11 +145,13 @@ def get_scrapers():
     """
     global _scrapers
     if _scrapers is None:
+        out.write("Loading comic modules...")
         modules = loader.get_modules()
         plugins = loader.get_plugins(modules, _BasicScraper)
         _scrapers = list(plugins)
         _scrapers.sort(key=lambda s: s.get_name())
         check_scrapers()
+        out.write("... %d modules loaded." % len(_scrapers))
     return _scrapers
 
 
diff --git a/dosagelib/util.py b/dosagelib/util.py
index 49d8f5803..c12dd0c00 100644
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@@ -4,6 +4,7 @@
 from __future__ import division, print_function
 
 import urllib2, urlparse
+import requests
 import sys
 import os
 import cgi
@@ -42,10 +43,6 @@ def tagre(tag, attribute, value, quote='"', before="", after=""):
     @return: the generated regular expression suitable for re.compile()
     @rtype: string
     """
-    if before:
-        before += "[^>]*"
-    if after:
-        after += "[^>]*"
     attrs = dict(
         tag=case_insensitive_re(tag),
         attribute=case_insensitive_re(attribute),
@@ -54,7 +51,7 @@ def tagre(tag, attribute, value, quote='"', before="", after=""):
         before=before,
         after=after,
     )
-    return r'<\s*%(tag)s\s+(?:[^>]*%(before)s\s+)?%(attribute)s\s*=\s*%(quote)s%(value)s%(quote)s[^>]*%(after)s>' % attrs
+    return r'<\s*%(tag)s\s+(?:[^>]*%(before)s[^>]*\s+)?%(attribute)s\s*=\s*%(quote)s%(value)s%(quote)s[^>]*%(after)s[^>]*>' % attrs
 
 
 def case_insensitive_re(name):
@@ -74,7 +71,7 @@ baseSearch = re.compile(tagre("base", "href", '([^"]*)'))
 def getPageContent(url):
     # read page data
     page = urlopen(url)
-    data = page.read(MAX_FILESIZE)
+    data = page.text
     # determine base URL
     baseUrl = None
     match = baseSearch.search(data)
@@ -105,7 +102,7 @@ def fetchUrls(url, imageSearch, prevSearch=None):
         imageUrl = match.group(1)
         if not imageUrl:
             raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern))
-        out.write('matched image URL %r' % imageUrl, 2)
+        out.write('matched image URL %r with pattern %s' % (imageUrl, imageSearch.pattern), 2)
         imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl)))
     if not imageUrls:
         out.write("warning: no images found at %s with pattern %s" % (url, imageSearch.pattern))
@@ -178,22 +175,18 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
     out.write('Open URL %s' % url, 2)
     assert retries >= 0, 'invalid retry value %r' % retries
     assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
-    req = urllib2.Request(url)
+    headers = {'User-Agent': UserAgent}
+    config = {"max_retries": retries}
     if referrer:
-        req.add_header('Referer', referrer)
-    req.add_header('User-Agent', UserAgent)
-    tries = 0
-    while True:
-        try:
-            return urllib2.urlopen(req)
-        except IOError as err:
-            msg = 'URL retrieval of %s failed: %s' % (url, err)
-            out.write(msg)
-            out.write('waiting %d seconds and retrying (%d)' % (retry_wait_seconds, tries), 2)
-            time.sleep(retry_wait_seconds)
-            tries += 1
-            if tries >= retries:
-                raise IOError(msg)
+        headers['Referer'] = referrer
+    try:
+        req = requests.get(url, headers=headers, config=config)
+        req.raise_for_status()
+        return req
+    except requests.exceptions.RequestException as err:
+        msg = 'URL retrieval of %s failed: %s' % (url, err)
+        out.write(msg)
+        raise IOError(msg)
 
 
 def get_columns (fp):
@@ -259,11 +252,9 @@ def internal_error(out=sys.stderr, etype=None, evalue=None, tb=None):
     print("""********** Oops, I did it again. *************
 
 You have found an internal error in %(app)s. Please write a bug report
-at %(url)s and include the following information:
-- your commandline arguments and any configuration file in ~/.dosage/
-- the system information below
+at %(url)s and include at least the information below:
 
-Not disclosing some of the information above due to privacy reasons is ok.
+Not disclosing some of the information below due to privacy reasons is ok.
 I will try to help you nonetheless, but you have to give me something
 I can work with ;) .
 """ % dict(app=AppName, url=SupportUrl), file=out)
@@ -308,6 +299,7 @@ def print_app_info(out=sys.stderr):
                     {"version": sys.version, "platform": sys.platform}, file=out)
     stime = strtime(time.time())
     print("Local time:", stime, file=out)
+    print("sys.argv", sys.argv, file=out)
 
 
 def strtime(t):
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..3288e9274
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+requests
+
diff --git a/tests/test_comics.py b/tests/test_comics.py
index 8b2f353d9..c50bcf60c 100644
--- a/tests/test_comics.py
+++ b/tests/test_comics.py
@@ -4,6 +4,7 @@
 import tempfile
 import shutil
 import re
+import os
 from itertools import islice
 from unittest import TestCase
 from dosagelib import scraper
@@ -16,6 +17,16 @@ class _ComicTester(TestCase):
     def setUp(self):
         self.name = self.scraperclass.get_name()
         self.url = self.scraperclass.starter()
+        # create a temporary directory for images
+        self.tmpdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpdir)
+
+    def get_saved_images(self):
+        """Get saved images."""
+        dirs = tuple(self.name.split('/'))
+        return os.listdir(os.path.join(self.tmpdir, *dirs))
 
     def test_comic(self):
         # Test a scraper. It must be able to traverse backward for
@@ -23,7 +34,8 @@ class _ComicTester(TestCase):
         # on at least 4 pages.
         scraperobj = self.scraperclass()
         num = empty = 0
-        for strip in islice(scraperobj.getAllStrips(), 0, 5):
+        max_strips = 5
+        for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
             images = 0
             for image in strip.getImages():
                 images += 1
@@ -35,6 +47,15 @@ class _ComicTester(TestCase):
             num += 1
         if self.scraperclass.prevSearch:
             self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
+            # check that at exactly or for multiple pages at least 5 images are saved
+            saved_images = self.get_saved_images()
+            num_images = len(saved_images)
+            if self.scraperclass.multipleImagesPerStrip:
+                self.check(num_images >= max_strips, 
+                  'saved %d %s instead of at least %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
+            else:
+                self.check(num_images == max_strips, 
+                  'saved %d %s instead of %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
         self.check(empty == 0, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
 
     def check_stripurl(self, strip):
@@ -50,28 +71,28 @@ class _ComicTester(TestCase):
         self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
 
     def save(self, image):
-        # create a temporary directory
-        tmpdir = tempfile.mkdtemp()
         try:
-            image.save(tmpdir)
+            image.save(self.tmpdir)
         except Exception as msg:
-            self.check(False, 'could not save %s to %s: %s' % (image.url, tmpdir, msg))
-        finally:
-            shutil.rmtree(tmpdir)
+            self.check(False, 'could not save %s to %s: %s' % (image.url, self.tmpdir, msg))
 
     def check(self, condition, msg):
         self.assertTrue(condition, "%s %s %s" % (self.name, self.url, msg))
 
 
+def make_comic_tester(name, **kwargs):
+    """Create and return a _ComicTester class with given name and attributes."""
+    return type(name, (_ComicTester,), kwargs)
+
+
 def generate_comic_testers():
     """For each comic scraper, create a test class."""
+    g = globals()
     # Limit number of scraper tests for now
-    max_scrapers = 100
+    max_scrapers = 10000
     for scraperclass in islice(scraper.get_scrapers(), 0, max_scrapers):
         name = 'Test'+scraperclass.__name__
-        globals()[name] = type(name,
-            (_ComicTester,),
-            dict(scraperclass=scraperclass)
-        )
+        g[name] = make_comic_tester(name, scraperclass=scraperclass)
+
 
 generate_comic_testers()