dosage/dosage

285 lines
9.8 KiB
Text
Raw Normal View History

2012-06-20 19:58:13 +00:00
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
2012-06-20 19:58:13 +00:00
# Dosage, the webcomic downloader
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from __future__ import print_function
2012-06-20 19:58:13 +00:00
import sys
import os
import optparse
from dosagelib import events, scraper
from dosagelib.output import out
2012-12-12 16:41:29 +00:00
from dosagelib.util import internal_error, getDirname, strlimit
from dosagelib.ansicolor import get_columns
from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
2012-06-20 19:58:13 +00:00
def setupOptions():
2012-09-26 14:47:39 +00:00
"""Construct option parser.
@return: new option parser
@rtype optparse.OptionParser
"""
2012-06-20 19:58:13 +00:00
usage = 'usage: %prog [options] comicModule [comicModule ...]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
2012-12-07 23:45:18 +00:00
parser.add_option('-n', '--numstrips', action='store', dest='numstrips', type='int', default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
parser.add_option('-a', '--all', action='store_true', dest='all', default=None, help='traverse and retrieve all comic strips')
2012-10-11 10:03:12 +00:00
parser.add_option('-b', '--basepath', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
parser.add_option('--baseurl', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
2012-06-20 19:58:13 +00:00
parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules')
2012-10-11 10:03:12 +00:00
parser.add_option('--singlelist', action='store_const', const=2, dest='list', help='list available comic modules in a single list')
2012-06-20 19:58:13 +00:00
parser.add_option('-V', '--version', action='store_true', dest='version', help='display the version number')
2012-10-11 10:03:12 +00:00
parser.add_option('-m', '--modulehelp', action='store_true', dest='modhelp', help='display help for comic modules')
2012-06-20 19:58:13 +00:00
parser.add_option('-t', '--timestamps', action='store_true', dest='timestamps', default=False, help='print timestamps for all output at any info level')
parser.add_option('-o', '--output', action='store', dest='output', choices=events.getHandlers(), help='output formatting for downloaded comics')
2012-12-08 20:29:41 +00:00
parser.add_option('--adult', action='store_true', dest='adult', default=False, help='confirms that you are old enough to view adult content')
2012-12-12 16:41:29 +00:00
try:
import optcomplete
optcomplete.autocomplete(parser)
except ImportError:
pass
2012-06-20 19:58:13 +00:00
return parser
2012-09-26 14:47:39 +00:00
def displayVersion():
"""Display application name, version, copyright and license."""
print(App)
print(Copyright)
print(Freeware)
print("For support see", SupportUrl)
2012-10-11 10:03:12 +00:00
return 0
2012-09-26 14:47:39 +00:00
2012-10-11 10:03:12 +00:00
def setOutputInfo(options):
"""Set global output level and timestamp option."""
out.level = 0
out.level += options.verbose
out.timestamps = options.timestamps
2012-06-20 19:58:13 +00:00
2012-10-11 16:08:18 +00:00
def saveComicStrip(strip, basepath):
2012-10-11 10:03:12 +00:00
"""Save a comic strip which can consist of multiple images."""
errors = 0
2012-10-11 17:53:37 +00:00
allskipped = True
2012-10-11 10:03:12 +00:00
for image in strip.getImages():
2012-06-20 19:58:13 +00:00
try:
2012-10-11 17:53:37 +00:00
filename, saved = image.save(basepath)
if saved:
allskipped = False
except IOError as msg:
2012-12-07 23:45:18 +00:00
out.error('Could not save %s: %s' % (image.filename, msg))
2012-10-11 10:03:12 +00:00
errors += 1
2012-10-11 17:53:37 +00:00
return errors, allskipped
2012-10-11 10:03:12 +00:00
2012-12-12 16:41:29 +00:00
def displayHelp(comics):
2012-10-11 10:03:12 +00:00
"""Print help for comic strips."""
2012-12-07 23:45:18 +00:00
try:
2012-12-12 16:41:29 +00:00
for scraperobj in getScrapers(comics):
displayComicHelp(scraperobj)
2012-12-07 23:45:18 +00:00
except ValueError as msg:
out.error(msg)
return 1
2012-10-11 10:03:12 +00:00
return 0
2012-12-07 23:45:18 +00:00
2012-12-12 16:41:29 +00:00
def displayComicHelp(scraperobj):
"""Print description and help for a comic."""
out.context = scraperobj.get_name()
try:
if scraperobj.description:
for line in scraperobj.description.splitlines():
out.info(line)
if scraperobj.help:
for line in scraperobj.help.splitlines():
out.info(line)
finally:
out.context = ''
2012-10-11 10:03:12 +00:00
def getComics(options, comics):
2012-10-11 16:02:29 +00:00
"""Retrieve given comics."""
2012-10-11 10:03:12 +00:00
errors = 0
2012-10-12 20:07:50 +00:00
if options.output:
events.installHandler(options.output, options.basepath, options.baseurl)
events.getHandler().start()
2012-12-12 16:41:29 +00:00
try:
for scraperobj in getScrapers(comics, options.basepath, options.adult):
errors += getStrips(scraperobj, options)
except ValueError as msg:
out.error(msg)
errors += 1
finally:
out.context = ''
events.getHandler().end()
return errors
def getStrips(scraperobj, options):
"""Get all strips from a scraper."""
errors = 0
out.context = scraperobj.get_name()
if options.all:
strips = scraperobj.getAllStrips()
elif options.numstrips:
strips = scraperobj.getAllStrips(options.numstrips)
else:
strips = scraperobj.getCurrentStrips()
first = True
try:
for strip in strips:
_errors, skipped = saveComicStrip(strip, options.basepath)
errors += _errors
if not first and skipped and scraperobj.indexes:
# stop when indexed retrieval skipped all images for one
# comic strip (except the first one)
out.info("Stop retrieval because image file already exists")
break
first = False
except (ValueError, IOError) as msg:
out.error(msg)
errors += 1
2012-10-11 10:03:12 +00:00
return errors
def run(options, comics):
"""Execute comic commands."""
setOutputInfo(options)
if options.version:
return displayVersion()
if options.list:
return doList(options.list == 1)
if len(comics) <= 0:
2012-12-07 23:45:18 +00:00
out.warn('No comics specified, bailing out!')
2012-10-11 10:03:12 +00:00
return 1
2012-12-07 23:45:18 +00:00
if options.modhelp:
2012-12-12 16:41:29 +00:00
return displayHelp(comics)
2012-12-07 23:45:18 +00:00
return getComics(options, comics)
2012-10-11 10:03:12 +00:00
def doList(columnList):
"""List available comics."""
2012-12-07 23:45:18 +00:00
out.info('Available comic scrapers:')
2012-10-11 10:03:12 +00:00
scrapers = getScrapers(['@@'])
if columnList:
2012-10-12 19:57:06 +00:00
num = doColumnList(scrapers)
2012-10-11 10:03:12 +00:00
else:
2012-10-12 19:57:06 +00:00
num = doSingleList(scrapers)
2012-12-07 23:45:18 +00:00
out.info('%d supported comics.' % num)
2012-12-12 16:41:29 +00:00
out.info('comics marked with [A] require age confirmation with the --adult option.')
2012-10-11 10:03:12 +00:00
return 0
def doSingleList(scrapers):
"""Get list of scraper names, one per line."""
2012-10-12 19:57:06 +00:00
for num, scraperobj in enumerate(scrapers):
2012-12-12 16:41:29 +00:00
print(getScraperName(scraperobj))
2012-10-12 19:57:06 +00:00
return num
2012-10-11 10:03:12 +00:00
def doColumnList(scrapers):
"""Get list of scraper names with multiple names per line."""
2012-10-12 19:57:06 +00:00
screenWidth = get_columns(sys.stdout)
2012-12-12 16:41:29 +00:00
# limit name length so at least two columns are there
limit = (screenWidth / 2) - 8
names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
2012-10-12 19:57:06 +00:00
num = len(names)
2012-12-12 16:41:29 +00:00
maxlen = max(len(name) for name in names)
namesPerLine = max(int(screenWidth / (maxlen + 1)), 1)
2012-10-11 10:03:12 +00:00
while names:
2012-12-12 16:41:29 +00:00
print(''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
2012-10-11 10:03:12 +00:00
del names[:namesPerLine]
2012-10-12 19:57:06 +00:00
return num
2012-10-11 10:03:12 +00:00
2012-12-12 16:41:29 +00:00
def getScraperName(scraperobj, limit=None):
"""Get comic scraper name."""
suffix = " [A]" if scraperobj.adult else ""
name = scraperobj.get_name()
if limit is not None:
name = strlimit(name, limit)
return name + suffix
2012-12-08 20:29:41 +00:00
def getScrapers(comics, basepath=None, adult=True):
2012-10-11 10:03:12 +00:00
"""Get scraper objects for the given comics."""
if '@' in comics:
# only scrapers whose directory already exists
if len(comics) > 1:
2012-12-07 23:45:18 +00:00
out.warn("using '@' as comic name ignores all other specified comics.")
2012-10-11 10:03:12 +00:00
for scraperclass in scraper.get_scrapers():
2012-12-08 20:29:41 +00:00
if not adult and scraperclass.adult:
2012-12-12 16:41:29 +00:00
warn_adult(scraperclass)
2012-12-08 20:29:41 +00:00
continue
2012-12-07 23:45:18 +00:00
dirname = getDirname(scraperclass.get_name())
2012-10-11 10:03:12 +00:00
if os.path.isdir(os.path.join(basepath, dirname)):
yield scraperclass()
elif '@@' in comics:
# all scrapers
for scraperclass in scraper.get_scrapers():
2012-12-08 20:29:41 +00:00
if not adult and scraperclass.adult:
2012-12-12 16:41:29 +00:00
warn_adult(scraperclass)
2012-12-08 20:29:41 +00:00
continue
2012-10-11 10:03:12 +00:00
yield scraperclass()
else:
2012-12-07 23:45:18 +00:00
# get only selected comic scrapers
# store them in a list to catch naming errors early
scrapers = []
2012-10-11 10:03:12 +00:00
for comic in comics:
2012-10-11 12:17:25 +00:00
if ':' in comic:
name, index = comic.split(':', 1)
2012-10-11 17:53:37 +00:00
indexes = index.split(',')
2012-06-20 19:58:13 +00:00
else:
2012-10-11 12:17:25 +00:00
name = comic
2012-10-11 17:53:37 +00:00
indexes = None
2012-12-08 20:29:41 +00:00
scraperclass = scraper.get_scraper(name)
if not adult and scraperclass.adult:
2012-12-12 16:41:29 +00:00
warn_adult(scraperclass)
2012-12-08 20:29:41 +00:00
continue
scrapers.append(scraperclass(indexes=indexes))
2012-12-07 23:45:18 +00:00
for s in scrapers:
yield s
2012-06-20 19:58:13 +00:00
2012-12-12 16:41:29 +00:00
def warn_adult(scraperclass):
"""Print warning about adult content."""
out.warn("skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.get_name())
2012-06-20 19:58:13 +00:00
def main():
2012-09-26 14:47:39 +00:00
"""Parse options and execute commands."""
2012-12-12 16:41:29 +00:00
if sys.argv[0].endswith("mainline"):
out.warn("the 'mainline' program is deprecated, please use the new 'dosage' program")
2012-06-20 19:58:13 +00:00
try:
parser = setupOptions()
options, args = parser.parse_args()
2012-12-07 23:45:18 +00:00
# eliminate duplicate comic names
comics = set(args)
res = run(options, comics)
2012-06-20 19:58:13 +00:00
except KeyboardInterrupt:
print("Aborted.")
2012-06-20 19:58:13 +00:00
res = 1
except Exception:
internal_error()
res = 2
return res
def profile():
"""Profile the loading of all scrapers."""
import cProfile
cProfile.run("scraper.get_scrapers()", "dosage.prof")
def viewprof():
"""View profile stats."""
import pstats
stats = pstats.Stats("dosage.prof")
stats.strip_dirs().sort_stats("cumulative").print_stats(100)
2012-06-20 19:58:13 +00:00
if __name__ == '__main__':
sys.exit(main())
#profile()
#viewprof()