From 88e28f3923687ccff4791de611a7476a63105af5 Mon Sep 17 00:00:00 2001
From: Bastian Kleineidam <bastian.kleineidam@web.de>
Date: Fri, 8 Mar 2013 22:33:05 +0100
Subject: [PATCH] Fix some comics and add language tag.

---
 dosage                           |  43 ++++---
 dosagelib/languages.py           | 189 +++++++++++++++++++++++++++++++
 dosagelib/plugins/a.py           |   2 +
 dosagelib/plugins/b.py           |   2 +-
 dosagelib/plugins/creators.py    |   1 +
 dosagelib/plugins/drunkduck.py   |   1 +
 dosagelib/plugins/n.py           |   1 +
 dosagelib/plugins/smackjeeves.py |   1 +
 dosagelib/plugins/z.py           |  20 +++-
 dosagelib/scraper.py             |   7 +-
 dosagelib/util.py                |   6 +
 scripts/mklanguages.py           |  29 +++++
 12 files changed, 273 insertions(+), 29 deletions(-)
 create mode 100644 dosagelib/languages.py
 create mode 100755 scripts/mklanguages.py

diff --git a/dosage b/dosage
index a221f34c2..5b5fe4bcb 100755
--- a/dosage
+++ b/dosage
@@ -18,7 +18,7 @@ from collections import OrderedDict
 
 from dosagelib import events, scraper
 from dosagelib.output import out
-from dosagelib.util import internal_error, getDirname, strlimit
+from dosagelib.util import internal_error, getDirname, strlimit, getLangName
 from dosagelib.ansicolor import get_columns
 from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
 
@@ -118,7 +118,7 @@ def saveComicStrip(strip, basepath):
             filename, saved = image.save(basepath)
             if saved:
                 allskipped = False
-        except IOError as msg:
+        except Exception as msg:
             out.error('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg))
             errors += 1
     return errors, allskipped
@@ -126,21 +126,19 @@ def saveComicStrip(strip, basepath):
 
 def displayHelp(comics):
     """Print help for comic strips."""
-    try:
-        for scraperobj in getScrapers(comics):
-            displayComicHelp(scraperobj)
-    except Exception as msg:
-        out.error(msg)
-        return 1
+    for scraperobj in getScrapers(comics):
+        displayComicHelp(scraperobj)
     return 0
 
 
 def displayComicHelp(scraperobj):
     """Print description and help for a comic."""
-    out.context = scraperobj.getName()
+    out.context = getScraperName(scraperobj)
     try:
         if scraperobj.description:
             out.info("Description: " + scraperobj.description)
+        if scraperobj.lang:
+            out.info("Language: " + getLangName(scraperobj.lang))
         if scraperobj.help:
             for line in scraperobj.help.splitlines():
                 out.info(line)
@@ -157,9 +155,6 @@ def getComics(options):
     try:
         for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
             errors += getStrips(scraperobj, options)
-    except Exception as msg:
-        out.error(msg)
-        errors += 1
     finally:
         out.context = ''
         events.getHandler().end()
@@ -199,7 +194,7 @@ def run(options):
     if options.list:
         return doList()
     if options.singlelist:
-        return doList(columnList=False)
+        return doList(columnList=False, verbose=options.verbose)
     # after this a list of comic strips is needed
     if not options.comic:
         out.warn('No comics specified, bailing out!')
@@ -209,26 +204,26 @@ def run(options):
     return getComics(options)
 
 
-def doList(columnList=True):
+def doList(columnList=True, verbose=False):
     """List available comics."""
     out.info('Available comic scrapers:')
     out.info('Comics marked with [A] require age confirmation with the --adult option.')
     scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
-    try:
-        if columnList:
-            num = doColumnList(scrapers)
-        else:
-            num = doSingleList(scrapers)
-        out.info('%d supported comics.' % num)
-    except IOError:
-        pass
+    if columnList:
+        num = doColumnList(scrapers)
+    else:
+        num = doSingleList(scrapers, verbose=verbose)
+    out.info('%d supported comics.' % num)
     return 0
 
 
-def doSingleList(scrapers):
+def doSingleList(scrapers, verbose=False):
     """Get list of scraper names, one per line."""
     for num, scraperobj in enumerate(scrapers):
-        print(getScraperName(scraperobj))
+        if verbose:
+            displayComicHelp(scraperobj)
+        else:
+            print(getScraperName(scraperobj))
     return num
 
 
diff --git a/dosagelib/languages.py b/dosagelib/languages.py
new file mode 100644
index 000000000..81173588a
--- /dev/null
+++ b/dosagelib/languages.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+# ISO 693-1 language codes from pycountry
+Iso2Language = {
+    u'aa': u'Afar',
+    u'ab': u'Abkhazian',
+    u'af': u'Afrikaans',
+    u'ak': u'Akan',
+    u'sq': u'Albanian',
+    u'am': u'Amharic',
+    u'ar': u'Arabic',
+    u'an': u'Aragonese',
+    u'hy': u'Armenian',
+    u'as': u'Assamese',
+    u'av': u'Avaric',
+    u'ae': u'Avestan',
+    u'ay': u'Aymara',
+    u'az': u'Azerbaijani',
+    u'ba': u'Bashkir',
+    u'bm': u'Bambara',
+    u'eu': u'Basque',
+    u'be': u'Belarusian',
+    u'bn': u'Bengali',
+    u'bh': u'Bihari languages',
+    u'bi': u'Bislama',
+    u'bs': u'Bosnian',
+    u'br': u'Breton',
+    u'bg': u'Bulgarian',
+    u'my': u'Burmese',
+    u'ca': u'Catalan; Valencian',
+    u'ch': u'Chamorro',
+    u'ce': u'Chechen',
+    u'zh': u'Chinese',
+    u'cu': u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic',
+    u'cv': u'Chuvash',
+    u'kw': u'Cornish',
+    u'co': u'Corsican',
+    u'cr': u'Cree',
+    u'cs': u'Czech',
+    u'da': u'Danish',
+    u'dv': u'Divehi; Dhivehi; Maldivian',
+    u'nl': u'Dutch; Flemish',
+    u'dz': u'Dzongkha',
+    u'en': u'English',
+    u'eo': u'Esperanto',
+    u'et': u'Estonian',
+    u'ee': u'Ewe',
+    u'fo': u'Faroese',
+    u'fj': u'Fijian',
+    u'fi': u'Finnish',
+    u'fr': u'French',
+    u'fy': u'Western Frisian',
+    u'ff': u'Fulah',
+    u'ka': u'Georgian',
+    u'de': u'German',
+    u'gd': u'Gaelic; Scottish Gaelic',
+    u'ga': u'Irish',
+    u'gl': u'Galician',
+    u'gv': u'Manx',
+    u'el': u'Greek, Modern (1453-)',
+    u'gn': u'Guarani',
+    u'gu': u'Gujarati',
+    u'ht': u'Haitian; Haitian Creole',
+    u'ha': u'Hausa',
+    u'he': u'Hebrew',
+    u'hz': u'Herero',
+    u'hi': u'Hindi',
+    u'ho': u'Hiri Motu',
+    u'hr': u'Croatian',
+    u'hu': u'Hungarian',
+    u'ig': u'Igbo',
+    u'is': u'Icelandic',
+    u'io': u'Ido',
+    u'ii': u'Sichuan Yi; Nuosu',
+    u'iu': u'Inuktitut',
+    u'ie': u'Interlingue; Occidental',
+    u'ia': u'Interlingua (International Auxiliary Language Association)',
+    u'id': u'Indonesian',
+    u'ik': u'Inupiaq',
+    u'it': u'Italian',
+    u'jv': u'Javanese',
+    u'ja': u'Japanese',
+    u'kl': u'Kalaallisut; Greenlandic',
+    u'kn': u'Kannada',
+    u'ks': u'Kashmiri',
+    u'kr': u'Kanuri',
+    u'kk': u'Kazakh',
+    u'km': u'Central Khmer',
+    u'ki': u'Kikuyu; Gikuyu',
+    u'rw': u'Kinyarwanda',
+    u'ky': u'Kirghiz; Kyrgyz',
+    u'kv': u'Komi',
+    u'kg': u'Kongo',
+    u'ko': u'Korean',
+    u'kj': u'Kuanyama; Kwanyama',
+    u'ku': u'Kurdish',
+    u'lo': u'Lao',
+    u'la': u'Latin',
+    u'lv': u'Latvian',
+    u'li': u'Limburgan; Limburger; Limburgish',
+    u'ln': u'Lingala',
+    u'lt': u'Lithuanian',
+    u'lb': u'Luxembourgish; Letzeburgesch',
+    u'lu': u'Luba-Katanga',
+    u'lg': u'Ganda',
+    u'mk': u'Macedonian',
+    u'mh': u'Marshallese',
+    u'ml': u'Malayalam',
+    u'mi': u'Maori',
+    u'mr': u'Marathi',
+    u'ms': u'Malay',
+    u'mg': u'Malagasy',
+    u'mt': u'Maltese',
+    u'mo': u'Moldavian; Moldovan',
+    u'mn': u'Mongolian',
+    u'na': u'Nauru',
+    u'nv': u'Navajo; Navaho',
+    u'nr': u'Ndebele, South; South Ndebele',
+    u'nd': u'Ndebele, North; North Ndebele',
+    u'ng': u'Ndonga',
+    u'ne': u'Nepali',
+    u'nn': u'Norwegian Nynorsk; Nynorsk, Norwegian',
+    u'nb': u'Bokm\xe5l, Norwegian; Norwegian Bokm\xe5l',
+    u'no': u'Norwegian',
+    u'ny': u'Chichewa; Chewa; Nyanja',
+    u'oc': u'Occitan (post 1500)',
+    u'oj': u'Ojibwa',
+    u'or': u'Oriya',
+    u'om': u'Oromo',
+    u'os': u'Ossetian; Ossetic',
+    u'pa': u'Panjabi; Punjabi',
+    u'fa': u'Persian',
+    u'pi': u'Pali',
+    u'pl': u'Polish',
+    u'pt': u'Portuguese',
+    u'ps': u'Pushto; Pashto',
+    u'qu': u'Quechua',
+    u'rm': u'Romansh',
+    u'ro': u'Romanian',
+    u'rn': u'Rundi',
+    u'ru': u'Russian',
+    u'sg': u'Sango',
+    u'sa': u'Sanskrit',
+    u'si': u'Sinhala; Sinhalese',
+    u'sk': u'Slovak',
+    u'sl': u'Slovenian',
+    u'se': u'Northern Sami',
+    u'sm': u'Samoan',
+    u'sn': u'Shona',
+    u'sd': u'Sindhi',
+    u'so': u'Somali',
+    u'st': u'Sotho, Southern',
+    u'es': u'Spanish; Castilian',
+    u'sc': u'Sardinian',
+    u'sr': u'Serbian',
+    u'ss': u'Swati',
+    u'su': u'Sundanese',
+    u'sw': u'Swahili',
+    u'sv': u'Swedish',
+    u'ty': u'Tahitian',
+    u'ta': u'Tamil',
+    u'tt': u'Tatar',
+    u'te': u'Telugu',
+    u'tg': u'Tajik',
+    u'tl': u'Tagalog',
+    u'th': u'Thai',
+    u'bo': u'Tibetan',
+    u'ti': u'Tigrinya',
+    u'to': u'Tonga (Tonga Islands)',
+    u'tn': u'Tswana',
+    u'ts': u'Tsonga',
+    u'tk': u'Turkmen',
+    u'tr': u'Turkish',
+    u'tw': u'Twi',
+    u'ug': u'Uighur; Uyghur',
+    u'uk': u'Ukrainian',
+    u'ur': u'Urdu',
+    u'uz': u'Uzbek',
+    u've': u'Venda',
+    u'vi': u'Vietnamese',
+    u'vo': u'Volap\xfck',
+    u'cy': u'Welsh',
+    u'wa': u'Walloon',
+    u'wo': u'Wolof',
+    u'xh': u'Xhosa',
+    u'yi': u'Yiddish',
+    u'yo': u'Yoruba',
+    u'za': u'Zhuang; Chuang',
+    u'zu': u'Zulu',
+}
diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py
index 137bc9a76..2fcdc8ea3 100644
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@@ -54,6 +54,7 @@ class AhoiPolloi(_BasicScraper):
     stripUrl = url + '?day=%s'
     firstStripUrl = stripUrl % '20060305'
     multipleImagesPerStrip = True
+    lang = 'de'
     imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
     prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
     help = 'Index format: yyyymmdd'
@@ -98,6 +99,7 @@ class AlphaLuna(_BasicScraper):
 
 class AlphaLunaSpanish(AlphaLuna):
     name = 'AlphaLuna/Spanish'
+    lang = 'es'
     url = 'http://alphaluna.net/spanish/'
     stripUrl = url + 'issue-%s/'
 
diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py
index 4362ebe54..0ee1d58bb 100644
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@@ -172,7 +172,7 @@ class BratHalla(_BasicScraper):
 
 
 class BrentalFloss(_BasicScraper):
-    url = 'http://www.brentalflossthecomic.com/'
+    url = 'http://brentalflossthecomic.com/'
     stripUrl = url + '?id=%s'
     imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
     prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
diff --git a/dosagelib/plugins/creators.py b/dosagelib/plugins/creators.py
index a081f943a..057b14bd7 100644
--- a/dosagelib/plugins/creators.py
+++ b/dosagelib/plugins/creators.py
@@ -15,6 +15,7 @@ def add(name, path):
         name = 'Creators/' + name,
         url = baseurl + path + '.html',
         stripUrl = baseurl + path + '/%s.html',
+        lang = 'es' if name.lower().endswith('spanish') else 'en',
         imageSearch = _imageSearch,
         prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
           tagre("img", "src", r'/img_comics/arrow_l\.gif')),
diff --git a/dosagelib/plugins/drunkduck.py b/dosagelib/plugins/drunkduck.py
index b4774ad0d..ff6d2741a 100644
--- a/dosagelib/plugins/drunkduck.py
+++ b/dosagelib/plugins/drunkduck.py
@@ -43,6 +43,7 @@ def add(name, path):
         stripUrl = _url + '%s/',
         imageSearch = _imageSearch,
         prevSearch = _prevSearch,
+        lang = 'es' if name.lower().endswith('spanish') else 'en',
         help = 'Index format: n (unpadded)',
         namer = _namer,
     )
diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py
index d0f7163df..835429a43 100644
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@@ -90,6 +90,7 @@ class NekoTheKitty(_BasicScraper):
 class NichtLustig(_BasicScraper):
     url = 'http://www.nichtlustig.de/main.html'
     stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
+    lang = 'de'
     imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
     prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
     help = 'Index format: yymmdd'
diff --git a/dosagelib/plugins/smackjeeves.py b/dosagelib/plugins/smackjeeves.py
index 92f70aa1f..ba1283b70 100644
--- a/dosagelib/plugins/smackjeeves.py
+++ b/dosagelib/plugins/smackjeeves.py
@@ -57,6 +57,7 @@ def add(name, url, description, adult, bounce):
         prevSearch = _prevSearch,
         prevUrlMatchesStripUrl = not adult,
         description = description,
+        lang = 'es' if name.lower().endswith('spanish') else 'en',
         help = 'Index format: nnnn (some increasing number)',
         namer = namer,
     )
diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py
index e9bfe4234..a15d297ba 100644
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@@ -50,11 +50,27 @@ class ZombieHunters(_BasicScraper):
 class Zwarwald(_BasicScraper):
     url = "http://www.zwarwald.de/"
     stripUrl = url + 'index.php/page/%s/'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.zwarwald\.de/images/\d+/\d+/[^"]+)'))
+    # anything before page 495 seems to be flash
+    firstStripUrl = stripUrl % '495'
+    lang = 'de'
+    imageSearch = compile(tagre("img", "src", r'(http://(?:www\.zwarwald\.de|wp1163540.wp190.webpack.hosteurope.de/wordpress)/images/\d+/\d+/[^"]+)'))
     prevSearch = compile(tagre("a", "href", r'(http://www\.zwarwald\.de/index\.php/page/\d+/)') +
         tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'"))
     help = 'Index format: number'
     waitSeconds = 1
 
     def shouldSkipUrl(self, url):
-        return url in (self.stripUrl % "112",)
+        """Some pages have flash content."""
+        return url in (
+            self.stripUrl % "112",
+            self.stripUrl % "222",
+            self.stripUrl % "223",
+            self.stripUrl % "246",
+            self.stripUrl % "368",
+            self.stripUrl % '495',
+        )
+
+    @classmethod
+    def namer(cls, imageUrl, pageUrl):
+        prefix, year, month, name = imageUrl.rsplit('/', 3)
+        return "%s_%s_%s" % (year, month, name)
diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py
index 2aabbdb67..a8c44d7b6 100644
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@@ -33,6 +33,9 @@ class _BasicScraper(object):
     # a description of the comic contents
     description = ''
 
+    # langauge of the comic (two-letter ISO 639-1 code)
+    lang = 'en'
+
     # compiled regular expression that will locate the URL for the previous strip in a page
     prevSearch = None
 
@@ -42,7 +45,7 @@ class _BasicScraper(object):
     # usually the index format help
     help = ''
 
-    # wait time before downloading any pages or images
+    # wait time between downloading comic strips
     waitSeconds = 0
 
     # HTTP session storing cookies
@@ -137,7 +140,7 @@ class _BasicScraper(object):
                 out.warn("Already seen previous URL %r" % prevUrl)
                 break
             url = prevUrl
-            if self.waitSeconds:
+            if url and self.waitSeconds:
                 time.sleep(self.waitSeconds)
 
     def getPrevUrl(self, url, data, baseUrl):
diff --git a/dosagelib/util.py b/dosagelib/util.py
index ab42886a7..80209c223 100644
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@@ -17,6 +17,7 @@ from htmlentitydefs import name2codepoint
 from .decorators import memoized
 from .output import out
 from .configuration import UserAgent, AppName, App, SupportUrl
+from .languages import Iso2Language
 
 # Maximum content size for HTML pages
 MaxContentBytes = 1024 * 1024 * 2 # 2 MB
@@ -462,3 +463,8 @@ def strlimit (s, length=72):
     if length == 0:
         return ""
     return "%s..." % s[:length]
+
+
+def getLangName(code):
+    """Get name of language specified by ISO 693-1 code."""
+    return Iso2Language[code]
diff --git a/scripts/mklanguages.py b/scripts/mklanguages.py
new file mode 100755
index 000000000..1fd676c18
--- /dev/null
+++ b/scripts/mklanguages.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# update languages.py from pycountry
+import os
+import codecs
+import pycountry
+
+basepath = os.path.dirname(os.path.dirname(__file__))
+
+def main():
+    """Update language information in dosagelib/languages.py."""
+    fn =os.path.join(basepath, 'dosagelib', 'languages.py')
+    encoding = 'utf-8'
+    with codecs.open(fn, 'w', encoding) as f:
+        f.write('# -*- coding: %s -*-%s' % (encoding, os.linesep))
+        f.write('# ISO 693-1 language codes from pycountry%s' % os.linesep)
+        write_languages(f)
+
+
+def write_languages(f):
+    """Write language information."""
+    f.write("Iso2Language = {%s" % os.linesep)
+    for language in pycountry.languages:
+        if hasattr(language, 'alpha2'):
+            f.write("    %r: %r,%s" % (language.alpha2, language.name, os.linesep))
+    f.write("}%s" % os.linesep)
+
+
+if __name__ == '__main__':
+    main()