Fix some comics and add language tag.
This commit is contained in:
parent
b368f125bc
commit
88e28f3923
12 changed files with 273 additions and 29 deletions
43
dosage
43
dosage
|
@ -18,7 +18,7 @@ from collections import OrderedDict
|
||||||
|
|
||||||
from dosagelib import events, scraper
|
from dosagelib import events, scraper
|
||||||
from dosagelib.output import out
|
from dosagelib.output import out
|
||||||
from dosagelib.util import internal_error, getDirname, strlimit
|
from dosagelib.util import internal_error, getDirname, strlimit, getLangName
|
||||||
from dosagelib.ansicolor import get_columns
|
from dosagelib.ansicolor import get_columns
|
||||||
from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
|
from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
|
||||||
|
|
||||||
|
@ -118,7 +118,7 @@ def saveComicStrip(strip, basepath):
|
||||||
filename, saved = image.save(basepath)
|
filename, saved = image.save(basepath)
|
||||||
if saved:
|
if saved:
|
||||||
allskipped = False
|
allskipped = False
|
||||||
except IOError as msg:
|
except Exception as msg:
|
||||||
out.error('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg))
|
out.error('Could not save image at %s to %s: %s' % (image.referrer, image.filename, msg))
|
||||||
errors += 1
|
errors += 1
|
||||||
return errors, allskipped
|
return errors, allskipped
|
||||||
|
@ -126,21 +126,19 @@ def saveComicStrip(strip, basepath):
|
||||||
|
|
||||||
def displayHelp(comics):
|
def displayHelp(comics):
|
||||||
"""Print help for comic strips."""
|
"""Print help for comic strips."""
|
||||||
try:
|
for scraperobj in getScrapers(comics):
|
||||||
for scraperobj in getScrapers(comics):
|
displayComicHelp(scraperobj)
|
||||||
displayComicHelp(scraperobj)
|
|
||||||
except Exception as msg:
|
|
||||||
out.error(msg)
|
|
||||||
return 1
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def displayComicHelp(scraperobj):
|
def displayComicHelp(scraperobj):
|
||||||
"""Print description and help for a comic."""
|
"""Print description and help for a comic."""
|
||||||
out.context = scraperobj.getName()
|
out.context = getScraperName(scraperobj)
|
||||||
try:
|
try:
|
||||||
if scraperobj.description:
|
if scraperobj.description:
|
||||||
out.info("Description: " + scraperobj.description)
|
out.info("Description: " + scraperobj.description)
|
||||||
|
if scraperobj.lang:
|
||||||
|
out.info("Language: " + getLangName(scraperobj.lang))
|
||||||
if scraperobj.help:
|
if scraperobj.help:
|
||||||
for line in scraperobj.help.splitlines():
|
for line in scraperobj.help.splitlines():
|
||||||
out.info(line)
|
out.info(line)
|
||||||
|
@ -157,9 +155,6 @@ def getComics(options):
|
||||||
try:
|
try:
|
||||||
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
|
||||||
errors += getStrips(scraperobj, options)
|
errors += getStrips(scraperobj, options)
|
||||||
except Exception as msg:
|
|
||||||
out.error(msg)
|
|
||||||
errors += 1
|
|
||||||
finally:
|
finally:
|
||||||
out.context = ''
|
out.context = ''
|
||||||
events.getHandler().end()
|
events.getHandler().end()
|
||||||
|
@ -199,7 +194,7 @@ def run(options):
|
||||||
if options.list:
|
if options.list:
|
||||||
return doList()
|
return doList()
|
||||||
if options.singlelist:
|
if options.singlelist:
|
||||||
return doList(columnList=False)
|
return doList(columnList=False, verbose=options.verbose)
|
||||||
# after this a list of comic strips is needed
|
# after this a list of comic strips is needed
|
||||||
if not options.comic:
|
if not options.comic:
|
||||||
out.warn('No comics specified, bailing out!')
|
out.warn('No comics specified, bailing out!')
|
||||||
|
@ -209,26 +204,26 @@ def run(options):
|
||||||
return getComics(options)
|
return getComics(options)
|
||||||
|
|
||||||
|
|
||||||
def doList(columnList=True):
|
def doList(columnList=True, verbose=False):
|
||||||
"""List available comics."""
|
"""List available comics."""
|
||||||
out.info('Available comic scrapers:')
|
out.info('Available comic scrapers:')
|
||||||
out.info('Comics marked with [A] require age confirmation with the --adult option.')
|
out.info('Comics marked with [A] require age confirmation with the --adult option.')
|
||||||
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
|
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
|
||||||
try:
|
if columnList:
|
||||||
if columnList:
|
num = doColumnList(scrapers)
|
||||||
num = doColumnList(scrapers)
|
else:
|
||||||
else:
|
num = doSingleList(scrapers, verbose=verbose)
|
||||||
num = doSingleList(scrapers)
|
out.info('%d supported comics.' % num)
|
||||||
out.info('%d supported comics.' % num)
|
|
||||||
except IOError:
|
|
||||||
pass
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def doSingleList(scrapers):
|
def doSingleList(scrapers, verbose=False):
|
||||||
"""Get list of scraper names, one per line."""
|
"""Get list of scraper names, one per line."""
|
||||||
for num, scraperobj in enumerate(scrapers):
|
for num, scraperobj in enumerate(scrapers):
|
||||||
print(getScraperName(scraperobj))
|
if verbose:
|
||||||
|
displayComicHelp(scraperobj)
|
||||||
|
else:
|
||||||
|
print(getScraperName(scraperobj))
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
|
||||||
|
|
189
dosagelib/languages.py
Normal file
189
dosagelib/languages.py
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# ISO 693-1 language codes from pycountry
|
||||||
|
Iso2Language = {
|
||||||
|
u'aa': u'Afar',
|
||||||
|
u'ab': u'Abkhazian',
|
||||||
|
u'af': u'Afrikaans',
|
||||||
|
u'ak': u'Akan',
|
||||||
|
u'sq': u'Albanian',
|
||||||
|
u'am': u'Amharic',
|
||||||
|
u'ar': u'Arabic',
|
||||||
|
u'an': u'Aragonese',
|
||||||
|
u'hy': u'Armenian',
|
||||||
|
u'as': u'Assamese',
|
||||||
|
u'av': u'Avaric',
|
||||||
|
u'ae': u'Avestan',
|
||||||
|
u'ay': u'Aymara',
|
||||||
|
u'az': u'Azerbaijani',
|
||||||
|
u'ba': u'Bashkir',
|
||||||
|
u'bm': u'Bambara',
|
||||||
|
u'eu': u'Basque',
|
||||||
|
u'be': u'Belarusian',
|
||||||
|
u'bn': u'Bengali',
|
||||||
|
u'bh': u'Bihari languages',
|
||||||
|
u'bi': u'Bislama',
|
||||||
|
u'bs': u'Bosnian',
|
||||||
|
u'br': u'Breton',
|
||||||
|
u'bg': u'Bulgarian',
|
||||||
|
u'my': u'Burmese',
|
||||||
|
u'ca': u'Catalan; Valencian',
|
||||||
|
u'ch': u'Chamorro',
|
||||||
|
u'ce': u'Chechen',
|
||||||
|
u'zh': u'Chinese',
|
||||||
|
u'cu': u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic',
|
||||||
|
u'cv': u'Chuvash',
|
||||||
|
u'kw': u'Cornish',
|
||||||
|
u'co': u'Corsican',
|
||||||
|
u'cr': u'Cree',
|
||||||
|
u'cs': u'Czech',
|
||||||
|
u'da': u'Danish',
|
||||||
|
u'dv': u'Divehi; Dhivehi; Maldivian',
|
||||||
|
u'nl': u'Dutch; Flemish',
|
||||||
|
u'dz': u'Dzongkha',
|
||||||
|
u'en': u'English',
|
||||||
|
u'eo': u'Esperanto',
|
||||||
|
u'et': u'Estonian',
|
||||||
|
u'ee': u'Ewe',
|
||||||
|
u'fo': u'Faroese',
|
||||||
|
u'fj': u'Fijian',
|
||||||
|
u'fi': u'Finnish',
|
||||||
|
u'fr': u'French',
|
||||||
|
u'fy': u'Western Frisian',
|
||||||
|
u'ff': u'Fulah',
|
||||||
|
u'ka': u'Georgian',
|
||||||
|
u'de': u'German',
|
||||||
|
u'gd': u'Gaelic; Scottish Gaelic',
|
||||||
|
u'ga': u'Irish',
|
||||||
|
u'gl': u'Galician',
|
||||||
|
u'gv': u'Manx',
|
||||||
|
u'el': u'Greek, Modern (1453-)',
|
||||||
|
u'gn': u'Guarani',
|
||||||
|
u'gu': u'Gujarati',
|
||||||
|
u'ht': u'Haitian; Haitian Creole',
|
||||||
|
u'ha': u'Hausa',
|
||||||
|
u'he': u'Hebrew',
|
||||||
|
u'hz': u'Herero',
|
||||||
|
u'hi': u'Hindi',
|
||||||
|
u'ho': u'Hiri Motu',
|
||||||
|
u'hr': u'Croatian',
|
||||||
|
u'hu': u'Hungarian',
|
||||||
|
u'ig': u'Igbo',
|
||||||
|
u'is': u'Icelandic',
|
||||||
|
u'io': u'Ido',
|
||||||
|
u'ii': u'Sichuan Yi; Nuosu',
|
||||||
|
u'iu': u'Inuktitut',
|
||||||
|
u'ie': u'Interlingue; Occidental',
|
||||||
|
u'ia': u'Interlingua (International Auxiliary Language Association)',
|
||||||
|
u'id': u'Indonesian',
|
||||||
|
u'ik': u'Inupiaq',
|
||||||
|
u'it': u'Italian',
|
||||||
|
u'jv': u'Javanese',
|
||||||
|
u'ja': u'Japanese',
|
||||||
|
u'kl': u'Kalaallisut; Greenlandic',
|
||||||
|
u'kn': u'Kannada',
|
||||||
|
u'ks': u'Kashmiri',
|
||||||
|
u'kr': u'Kanuri',
|
||||||
|
u'kk': u'Kazakh',
|
||||||
|
u'km': u'Central Khmer',
|
||||||
|
u'ki': u'Kikuyu; Gikuyu',
|
||||||
|
u'rw': u'Kinyarwanda',
|
||||||
|
u'ky': u'Kirghiz; Kyrgyz',
|
||||||
|
u'kv': u'Komi',
|
||||||
|
u'kg': u'Kongo',
|
||||||
|
u'ko': u'Korean',
|
||||||
|
u'kj': u'Kuanyama; Kwanyama',
|
||||||
|
u'ku': u'Kurdish',
|
||||||
|
u'lo': u'Lao',
|
||||||
|
u'la': u'Latin',
|
||||||
|
u'lv': u'Latvian',
|
||||||
|
u'li': u'Limburgan; Limburger; Limburgish',
|
||||||
|
u'ln': u'Lingala',
|
||||||
|
u'lt': u'Lithuanian',
|
||||||
|
u'lb': u'Luxembourgish; Letzeburgesch',
|
||||||
|
u'lu': u'Luba-Katanga',
|
||||||
|
u'lg': u'Ganda',
|
||||||
|
u'mk': u'Macedonian',
|
||||||
|
u'mh': u'Marshallese',
|
||||||
|
u'ml': u'Malayalam',
|
||||||
|
u'mi': u'Maori',
|
||||||
|
u'mr': u'Marathi',
|
||||||
|
u'ms': u'Malay',
|
||||||
|
u'mg': u'Malagasy',
|
||||||
|
u'mt': u'Maltese',
|
||||||
|
u'mo': u'Moldavian; Moldovan',
|
||||||
|
u'mn': u'Mongolian',
|
||||||
|
u'na': u'Nauru',
|
||||||
|
u'nv': u'Navajo; Navaho',
|
||||||
|
u'nr': u'Ndebele, South; South Ndebele',
|
||||||
|
u'nd': u'Ndebele, North; North Ndebele',
|
||||||
|
u'ng': u'Ndonga',
|
||||||
|
u'ne': u'Nepali',
|
||||||
|
u'nn': u'Norwegian Nynorsk; Nynorsk, Norwegian',
|
||||||
|
u'nb': u'Bokm\xe5l, Norwegian; Norwegian Bokm\xe5l',
|
||||||
|
u'no': u'Norwegian',
|
||||||
|
u'ny': u'Chichewa; Chewa; Nyanja',
|
||||||
|
u'oc': u'Occitan (post 1500)',
|
||||||
|
u'oj': u'Ojibwa',
|
||||||
|
u'or': u'Oriya',
|
||||||
|
u'om': u'Oromo',
|
||||||
|
u'os': u'Ossetian; Ossetic',
|
||||||
|
u'pa': u'Panjabi; Punjabi',
|
||||||
|
u'fa': u'Persian',
|
||||||
|
u'pi': u'Pali',
|
||||||
|
u'pl': u'Polish',
|
||||||
|
u'pt': u'Portuguese',
|
||||||
|
u'ps': u'Pushto; Pashto',
|
||||||
|
u'qu': u'Quechua',
|
||||||
|
u'rm': u'Romansh',
|
||||||
|
u'ro': u'Romanian',
|
||||||
|
u'rn': u'Rundi',
|
||||||
|
u'ru': u'Russian',
|
||||||
|
u'sg': u'Sango',
|
||||||
|
u'sa': u'Sanskrit',
|
||||||
|
u'si': u'Sinhala; Sinhalese',
|
||||||
|
u'sk': u'Slovak',
|
||||||
|
u'sl': u'Slovenian',
|
||||||
|
u'se': u'Northern Sami',
|
||||||
|
u'sm': u'Samoan',
|
||||||
|
u'sn': u'Shona',
|
||||||
|
u'sd': u'Sindhi',
|
||||||
|
u'so': u'Somali',
|
||||||
|
u'st': u'Sotho, Southern',
|
||||||
|
u'es': u'Spanish; Castilian',
|
||||||
|
u'sc': u'Sardinian',
|
||||||
|
u'sr': u'Serbian',
|
||||||
|
u'ss': u'Swati',
|
||||||
|
u'su': u'Sundanese',
|
||||||
|
u'sw': u'Swahili',
|
||||||
|
u'sv': u'Swedish',
|
||||||
|
u'ty': u'Tahitian',
|
||||||
|
u'ta': u'Tamil',
|
||||||
|
u'tt': u'Tatar',
|
||||||
|
u'te': u'Telugu',
|
||||||
|
u'tg': u'Tajik',
|
||||||
|
u'tl': u'Tagalog',
|
||||||
|
u'th': u'Thai',
|
||||||
|
u'bo': u'Tibetan',
|
||||||
|
u'ti': u'Tigrinya',
|
||||||
|
u'to': u'Tonga (Tonga Islands)',
|
||||||
|
u'tn': u'Tswana',
|
||||||
|
u'ts': u'Tsonga',
|
||||||
|
u'tk': u'Turkmen',
|
||||||
|
u'tr': u'Turkish',
|
||||||
|
u'tw': u'Twi',
|
||||||
|
u'ug': u'Uighur; Uyghur',
|
||||||
|
u'uk': u'Ukrainian',
|
||||||
|
u'ur': u'Urdu',
|
||||||
|
u'uz': u'Uzbek',
|
||||||
|
u've': u'Venda',
|
||||||
|
u'vi': u'Vietnamese',
|
||||||
|
u'vo': u'Volap\xfck',
|
||||||
|
u'cy': u'Welsh',
|
||||||
|
u'wa': u'Walloon',
|
||||||
|
u'wo': u'Wolof',
|
||||||
|
u'xh': u'Xhosa',
|
||||||
|
u'yi': u'Yiddish',
|
||||||
|
u'yo': u'Yoruba',
|
||||||
|
u'za': u'Zhuang; Chuang',
|
||||||
|
u'zu': u'Zulu',
|
||||||
|
}
|
|
@ -54,6 +54,7 @@ class AhoiPolloi(_BasicScraper):
|
||||||
stripUrl = url + '?day=%s'
|
stripUrl = url + '?day=%s'
|
||||||
firstStripUrl = stripUrl % '20060305'
|
firstStripUrl = stripUrl % '20060305'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
lang = 'de'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
|
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
|
prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
@ -98,6 +99,7 @@ class AlphaLuna(_BasicScraper):
|
||||||
|
|
||||||
class AlphaLunaSpanish(AlphaLuna):
|
class AlphaLunaSpanish(AlphaLuna):
|
||||||
name = 'AlphaLuna/Spanish'
|
name = 'AlphaLuna/Spanish'
|
||||||
|
lang = 'es'
|
||||||
url = 'http://alphaluna.net/spanish/'
|
url = 'http://alphaluna.net/spanish/'
|
||||||
stripUrl = url + 'issue-%s/'
|
stripUrl = url + 'issue-%s/'
|
||||||
|
|
||||||
|
|
|
@ -172,7 +172,7 @@ class BratHalla(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class BrentalFloss(_BasicScraper):
|
class BrentalFloss(_BasicScraper):
|
||||||
url = 'http://www.brentalflossthecomic.com/'
|
url = 'http://brentalflossthecomic.com/'
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
|
imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
|
prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
|
||||||
|
|
|
@ -15,6 +15,7 @@ def add(name, path):
|
||||||
name = 'Creators/' + name,
|
name = 'Creators/' + name,
|
||||||
url = baseurl + path + '.html',
|
url = baseurl + path + '.html',
|
||||||
stripUrl = baseurl + path + '/%s.html',
|
stripUrl = baseurl + path + '/%s.html',
|
||||||
|
lang = 'es' if name.lower().endswith('spanish') else 'en',
|
||||||
imageSearch = _imageSearch,
|
imageSearch = _imageSearch,
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
|
prevSearch = compile(tagre("a", "href", r'(%s/\d+\.html)' % path) +
|
||||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
||||||
|
|
|
@ -43,6 +43,7 @@ def add(name, path):
|
||||||
stripUrl = _url + '%s/',
|
stripUrl = _url + '%s/',
|
||||||
imageSearch = _imageSearch,
|
imageSearch = _imageSearch,
|
||||||
prevSearch = _prevSearch,
|
prevSearch = _prevSearch,
|
||||||
|
lang = 'es' if name.lower().endswith('spanish') else 'en',
|
||||||
help = 'Index format: n (unpadded)',
|
help = 'Index format: n (unpadded)',
|
||||||
namer = _namer,
|
namer = _namer,
|
||||||
)
|
)
|
||||||
|
|
|
@ -90,6 +90,7 @@ class NekoTheKitty(_BasicScraper):
|
||||||
class NichtLustig(_BasicScraper):
|
class NichtLustig(_BasicScraper):
|
||||||
url = 'http://www.nichtlustig.de/main.html'
|
url = 'http://www.nichtlustig.de/main.html'
|
||||||
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
|
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
|
||||||
|
lang = 'de'
|
||||||
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
|
|
|
@ -57,6 +57,7 @@ def add(name, url, description, adult, bounce):
|
||||||
prevSearch = _prevSearch,
|
prevSearch = _prevSearch,
|
||||||
prevUrlMatchesStripUrl = not adult,
|
prevUrlMatchesStripUrl = not adult,
|
||||||
description = description,
|
description = description,
|
||||||
|
lang = 'es' if name.lower().endswith('spanish') else 'en',
|
||||||
help = 'Index format: nnnn (some increasing number)',
|
help = 'Index format: nnnn (some increasing number)',
|
||||||
namer = namer,
|
namer = namer,
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,11 +50,27 @@ class ZombieHunters(_BasicScraper):
|
||||||
class Zwarwald(_BasicScraper):
|
class Zwarwald(_BasicScraper):
|
||||||
url = "http://www.zwarwald.de/"
|
url = "http://www.zwarwald.de/"
|
||||||
stripUrl = url + 'index.php/page/%s/'
|
stripUrl = url + 'index.php/page/%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.zwarwald\.de/images/\d+/\d+/[^"]+)'))
|
# anything before page 495 seems to be flash
|
||||||
|
firstStripUrl = stripUrl % '495'
|
||||||
|
lang = 'de'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.zwarwald\.de|wp1163540.wp190.webpack.hosteurope.de/wordpress)/images/\d+/\d+/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.zwarwald\.de/index\.php/page/\d+/)') +
|
prevSearch = compile(tagre("a", "href", r'(http://www\.zwarwald\.de/index\.php/page/\d+/)') +
|
||||||
tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'"))
|
tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
waitSeconds = 1
|
waitSeconds = 1
|
||||||
|
|
||||||
def shouldSkipUrl(self, url):
|
def shouldSkipUrl(self, url):
|
||||||
return url in (self.stripUrl % "112",)
|
"""Some pages have flash content."""
|
||||||
|
return url in (
|
||||||
|
self.stripUrl % "112",
|
||||||
|
self.stripUrl % "222",
|
||||||
|
self.stripUrl % "223",
|
||||||
|
self.stripUrl % "246",
|
||||||
|
self.stripUrl % "368",
|
||||||
|
self.stripUrl % '495',
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
prefix, year, month, name = imageUrl.rsplit('/', 3)
|
||||||
|
return "%s_%s_%s" % (year, month, name)
|
||||||
|
|
|
@ -33,6 +33,9 @@ class _BasicScraper(object):
|
||||||
# a description of the comic contents
|
# a description of the comic contents
|
||||||
description = ''
|
description = ''
|
||||||
|
|
||||||
|
# langauge of the comic (two-letter ISO 639-1 code)
|
||||||
|
lang = 'en'
|
||||||
|
|
||||||
# compiled regular expression that will locate the URL for the previous strip in a page
|
# compiled regular expression that will locate the URL for the previous strip in a page
|
||||||
prevSearch = None
|
prevSearch = None
|
||||||
|
|
||||||
|
@ -42,7 +45,7 @@ class _BasicScraper(object):
|
||||||
# usually the index format help
|
# usually the index format help
|
||||||
help = ''
|
help = ''
|
||||||
|
|
||||||
# wait time before downloading any pages or images
|
# wait time between downloading comic strips
|
||||||
waitSeconds = 0
|
waitSeconds = 0
|
||||||
|
|
||||||
# HTTP session storing cookies
|
# HTTP session storing cookies
|
||||||
|
@ -137,7 +140,7 @@ class _BasicScraper(object):
|
||||||
out.warn("Already seen previous URL %r" % prevUrl)
|
out.warn("Already seen previous URL %r" % prevUrl)
|
||||||
break
|
break
|
||||||
url = prevUrl
|
url = prevUrl
|
||||||
if self.waitSeconds:
|
if url and self.waitSeconds:
|
||||||
time.sleep(self.waitSeconds)
|
time.sleep(self.waitSeconds)
|
||||||
|
|
||||||
def getPrevUrl(self, url, data, baseUrl):
|
def getPrevUrl(self, url, data, baseUrl):
|
||||||
|
|
|
@ -17,6 +17,7 @@ from htmlentitydefs import name2codepoint
|
||||||
from .decorators import memoized
|
from .decorators import memoized
|
||||||
from .output import out
|
from .output import out
|
||||||
from .configuration import UserAgent, AppName, App, SupportUrl
|
from .configuration import UserAgent, AppName, App, SupportUrl
|
||||||
|
from .languages import Iso2Language
|
||||||
|
|
||||||
# Maximum content size for HTML pages
|
# Maximum content size for HTML pages
|
||||||
MaxContentBytes = 1024 * 1024 * 2 # 2 MB
|
MaxContentBytes = 1024 * 1024 * 2 # 2 MB
|
||||||
|
@ -462,3 +463,8 @@ def strlimit (s, length=72):
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return ""
|
return ""
|
||||||
return "%s..." % s[:length]
|
return "%s..." % s[:length]
|
||||||
|
|
||||||
|
|
||||||
|
def getLangName(code):
|
||||||
|
"""Get name of language specified by ISO 693-1 code."""
|
||||||
|
return Iso2Language[code]
|
||||||
|
|
29
scripts/mklanguages.py
Executable file
29
scripts/mklanguages.py
Executable file
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# update languages.py from pycountry
|
||||||
|
import os
|
||||||
|
import codecs
|
||||||
|
import pycountry
|
||||||
|
|
||||||
|
basepath = os.path.dirname(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Update language information in dosagelib/languages.py."""
|
||||||
|
fn =os.path.join(basepath, 'dosagelib', 'languages.py')
|
||||||
|
encoding = 'utf-8'
|
||||||
|
with codecs.open(fn, 'w', encoding) as f:
|
||||||
|
f.write('# -*- coding: %s -*-%s' % (encoding, os.linesep))
|
||||||
|
f.write('# ISO 693-1 language codes from pycountry%s' % os.linesep)
|
||||||
|
write_languages(f)
|
||||||
|
|
||||||
|
|
||||||
|
def write_languages(f):
|
||||||
|
"""Write language information."""
|
||||||
|
f.write("Iso2Language = {%s" % os.linesep)
|
||||||
|
for language in pycountry.languages:
|
||||||
|
if hasattr(language, 'alpha2'):
|
||||||
|
f.write(" %r: %r,%s" % (language.alpha2, language.name, os.linesep))
|
||||||
|
f.write("}%s" % os.linesep)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in a new issue