Document some functions.

This commit is contained in:
Bastian Kleineidam 2012-09-26 16:47:39 +02:00
parent 4a53639e79
commit cc2a8df98f
9 changed files with 120 additions and 18 deletions

41
dosage
View file

@ -28,6 +28,10 @@ from dosagelib.util import is_tty, get_columns, internal_error
from dosagelib.configuration import App, Freeware, Copyright
def setupOptions():
"""Construct option parser.
@return: new option parser
@rtype optparse.OptionParser
"""
usage = 'usage: %prog [options] comicModule [comicModule ...]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
@ -44,30 +48,44 @@ def setupOptions():
parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics')
return parser
def displayVersion():
"""Display application name, version, copyright and license."""
print App
print Copyright
print Freeware
class Dosage(object):
"""Main program executing comic commands."""
def __init__(self, settings):
"""Store settings and initialize internal variables."""
self.settings = settings
self.errors = 0
def setOutputInfo(self):
"""Set global output level and timestamp option."""
out.level = 0
out.level += self.settings['verbose']
out.timestamps = self.settings['timestamps']
def saveComic(self, comic):
"""Save one comic strip in an output file."""
basepath = self.settings['basepath']
progress = self.settings.get('progress', False)
fn, saved = comic.save(basepath, progress)
return saved
def saveComics(self, comics):
"""Save a list of comics."""
saved = False
for comic in comics:
saved = self.saveComic(comic) or saved
return saved
def safeOp(self, fp, *args, **kwargs):
"""Run a function and catch and report any errors."""
try:
fp(*args, **kwargs)
except Exception:
@ -79,10 +97,12 @@ class Dosage(object):
out.writelines(traceback.format_exception_only(type, value))
def getCurrent(self):
"""Retrieve and save all current comic strips."""
out.write('Retrieving the current strip...')
self.saveComics(self.module.getCurrentComics())
def getIndex(self, index):
"""Retrieve comcis with given index."""
out.write('Retrieving index "%s"....' % (index,))
try:
self.module.setStrip(index)
@ -91,12 +111,14 @@ class Dosage(object):
out.write('No indexed retrieval support.')
def catchup(self):
"""Save all comics until the current date."""
out.write('Catching up...')
for comics in self.module:
if not self.saveComics(comics) and self.settings['catchup'] < 2:
break
def catchupIndex(self, index):
"""Retrieve and save all comics from the given index."""
out.write('Catching up from index "%s"...' % (index,))
self.module.setStrip(index)
for comics in self.module:
@ -104,15 +126,18 @@ class Dosage(object):
break
def getScrapers(self):
"""Get list of scraper objects."""
return scraper.items()
def getExistingComics(self):
"""Get all existing comic scrapers."""
for scraper in self.getScrapers():
dirname = scraper.get_name().replace('/', os.sep)
if os.path.isdir(os.path.join(self.settings['basepath'], dirname)):
yield scraper
def doList(self, columnList):
"""List available comics."""
out.write('Available comic scrapers:')
scrapers = self.getScrapers()
if len(scrapers) > 0:
@ -123,9 +148,11 @@ class Dosage(object):
out.write('%d supported comics.' % len(scrapers))
def doSingleList(self, scrapers):
"""Get list of scraper names, one per line."""
print '\n'.join(scraper.get_name() for scraper in scrapers)
def doColumnList(self, scrapers):
"""Get list of scraper names with multiple names per line."""
screenWidth = get_columns()
names = [scraper.get_name() for scraper in scrapers]
maxlen = max([len(name) for name in names])
@ -135,6 +162,7 @@ class Dosage(object):
del names[:namesPerLine]
def doCatchup(self):
"""Catchup comics."""
for comic in self.useComics():
if self.indices:
self.safeOp(self.catchupIndex, self.indices[0])
@ -142,6 +170,7 @@ class Dosage(object):
self.safeOp(self.catchup)
def doCurrent(self):
"""Get current comics."""
for comic in self.useComics():
if self.indices:
for index in self.indices:
@ -150,16 +179,19 @@ class Dosage(object):
self.safeOp(self.getCurrent)
def doHelp(self):
"""Print help for comic strips."""
for scraper in self.useComics():
for line in scraper.getHelp().splitlines():
out.write("Help: "+line)
def setupComic(self, scraper):
"""Setup the internal comic module from given scraper."""
self.module = scraper()
out.context = scraper.get_name()
return self.module
def useComics(self):
"""Set all comic modules for the defined comics."""
for comic in self.comics:
c = comic.split(':', 2)
if len(c) > 1:
@ -177,12 +209,8 @@ class Dosage(object):
else:
yield self.setupComic(scraper.get(moduleName))
def displayVersion(self):
print App
print Copyright
print Freeware
def run(self, comics):
"""Execute comic commands."""
self.setOutputInfo()
self.comics = comics
@ -191,7 +219,7 @@ class Dosage(object):
events.handler.start()
if self.settings['version']:
self.displayVersion()
displayVersion()
elif self.settings['list']:
self.doList(self.settings['list'] == 1)
elif len(comics) <= 0:
@ -206,6 +234,7 @@ class Dosage(object):
events.handler.end()
def main():
"""Parse options and execute commands."""
try:
parser = setupOptions()
options, args = parser.parse_args()

View file

@ -14,12 +14,26 @@ from .util import urlopen, saneDataSize, normaliseURL
from .progress import progressBar, OperationComplete
from .events import handler
class FetchComicError(IOError): pass
class FetchComicError(IOError):
"""Exception for comic fetching errors."""
pass
class Comic(object):
"""Download and save a single comic."""
def __init__(self, moduleName, url, referrer=None, filename=None):
"""Set URL and filename."""
self.moduleName = moduleName
url = normaliseURL(url)
self.url = normaliseURL(url)
self.referrer = referrer
if filename is None:
filename = url.split('/')[-1]
self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self):
"""Connect to host and get meta information."""
out.write('Getting headers for %s...' % (url,), 2)
try:
self.urlobj = urlopen(url, referrer=referrer)
@ -30,9 +44,6 @@ class Comic(object):
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
raise FetchComicError, ('No suitable image found to retrieve.', url)
self.filename, self.ext = os.path.splitext(url.split('/')[-1])
self.filename = filename or self.filename
self.filename = self.filename.replace(os.sep, '_')
# Always use mime type for file extension if it is sane.
if self.urlobj.info().getmaintype() == 'image':
self.ext = '.' + self.urlobj.info().getsubtype()
@ -41,6 +52,7 @@ class Comic(object):
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
def touch(self, filename):
"""Set last modified date on filename."""
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
@ -48,6 +60,8 @@ class Comic(object):
os.utime(filename, (mtime, mtime))
def save(self, basepath, showProgress=False):
"""Save comic URL to filename on disk."""
self.connect()
comicName, comicExt = self.filename, self.ext
comicSize = self.contentLength
comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep))

View file

@ -1,3 +1,6 @@
"""
Define basic configuration data like version or application name.
"""
import _Dosage_configdata as configdata
Version = configdata.version

View file

@ -7,7 +7,11 @@ import urllib
import util
class EventHandler(object):
"""Base class for writing events to files. The currently defined events are
start(), comicDownloaded() and end()."""
def __init__(self, basepath, baseurl):
"""Initialize base path and url."""
self.basepath = basepath
self.baseurl = baseurl or self.getBaseUrl()
@ -21,35 +25,46 @@ class EventHandler(object):
return 'file:///' + url + '/'
def getUrlFromFilename(self, filename):
"""Construct URL from filename."""
components = util.splitpath(util.getRelativePath(self.basepath, filename))
url = '/'.join([urllib.quote(component, '') for component in components])
return self.baseurl + url
def start(self):
"""Emit a start event. Should be overridden in subclass."""
pass
def comicDownloaded(self, comic, filename):
"""Emit a comic downloaded event. Should be overridden in subclass."""
pass
def end(self):
"""Emit an end event. Should be overridden in subclass."""
pass
class TextEventHandler(EventHandler):
"""Output nothing. XXX why?"""
pass
class RSSEventHandler(EventHandler):
"""Output in RSS format."""
def RFC822Date(self, indate):
"""Format date in rfc822 format. XXX move to util module."""
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate))
def getFilename(self):
"""Return RSS filename."""
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
def start(self):
"""Log start event."""
today = time.time()
yesterday = today - 86400
today = time.localtime(today)
yesterday = time.localtime(yesterday)
# XXX replace with conf var
link = 'https://github.com/wummel/dosage'
self.rssfn = self.getFilename()
@ -62,6 +77,7 @@ class RSSEventHandler(EventHandler):
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
def comicDownloaded(self, comic, filename):
"""Write RSS entry for downloaded comic."""
url = self.getUrlFromFilename(filename)
args = (
'%s - %s' % (comic, os.path.basename(filename)),
@ -77,16 +93,22 @@ class RSSEventHandler(EventHandler):
self.rss.insertHead(*args)
def end(self):
"""Write RSS data to file."""
self.rss.write(self.rssfn)
class HtmlEventHandler(EventHandler):
"""Output in HTML format."""
def fnFromDate(self, date):
"""Get filename from date."""
fn = time.strftime('comics-%Y%m%d.html', date)
fn = os.path.join(self.basepath, 'html', fn)
fn = os.path.abspath(fn)
return fn
def start(self):
"""Start HTML output."""
today = time.time()
yesterday = today - 86400
tomorrow = today + 86400
@ -117,12 +139,14 @@ class HtmlEventHandler(EventHandler):
self.lastComic = None
def comicDownloaded(self, comic, filename):
"""Write HTML entry for downloaded comic."""
if self.lastComic != comic:
self.newComic(comic)
url = self.getUrlFromFilename(filename)
self.html.write(' <li><a href="%s">%s</a></li>\n' % (url, os.path.basename(filename)))
def newComic(self, comic):
"""Start new comic list in HTML."""
if self.lastComic is not None:
self.html.write(' </ul>\n')
self.lastComic = comic
@ -131,6 +155,7 @@ class HtmlEventHandler(EventHandler):
''' % (comic,))
def end(self):
"""End HTML output."""
if self.lastComic is not None:
self.html.write(' </ul>\n')
self.html.write('''</ul>
@ -146,11 +171,11 @@ handlers = {
}
def getHandlers():
l = handlers.keys()
l.sort()
return l
"""Get sorted handler names."""
return sorted(handlers.keys())
def installHandler(name=None, basepath=None, baseurl=None):
"""Install a global handler with given name."""
global handler
if name is None:
name = 'text'

View file

@ -25,18 +25,22 @@ class _BasicScraper(object):
help = 'Sorry, no help for this comic yet.'
def __init__(self):
"""Initialize internal variables."""
self.currentUrl = None
self.urls = set()
def getReferrer(self, imageUrl, pageUrl):
"""Return referrer for HTTP connection."""
return self.referrer or pageUrl or self.getLatestUrl()
def getComic(self, url, pageUrl):
"""Get comic downloader for given URL and page."""
if not url:
return None
return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl))
def getCurrentComics(self):
"""Get list of current comics."""
self.currentUrl = self.getLatestUrl()
comics = self.getNextComics()
if not comics:
@ -44,6 +48,7 @@ class _BasicScraper(object):
return comics
def getNextComics(self):
"""Get all next comics."""
comics = []
while not comics and self.currentUrl and self.currentUrl not in self.urls:
comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch])
@ -61,16 +66,17 @@ class _BasicScraper(object):
return comics
def setStrip(self, index):
"""Set current comic strip URL."""
self.currentUrl = self.imageUrl % index
def getHelp(self):
"""Return help text for this scraper."""
return self.help
def __iter__(self):
"""Iterate through the strips, starting from the current one and going backward."""
if not self.currentUrl:
self.currentUrl = self.getLatestUrl()
comics = True
while comics:
comics = self.getNextComics()
@ -79,26 +85,32 @@ class _BasicScraper(object):
@classmethod
def get_name(cls):
"""Get scraper name."""
if hasattr(cls, 'name'):
return cls.name
return cls.__name__
@classmethod
def starter(cls):
"""Get starter URL from where to scrape comic strips."""
return cls.latestUrl
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Return filename for given image and page URL."""
return None
def getFilename(self, imageUrl, pageUrl):
"""Return filename for given image and page URL."""
return self.namer(imageUrl, pageUrl)
def getLatestUrl(self):
"""Get starter URL from where to scrape comic strips."""
return self.starter()
def queryNamer(paramName, usePageUrl=False):
"""Get name from URL query part."""
@staticmethod
def _namer(imageUrl, pageUrl):
url = (imageUrl, pageUrl)[usePageUrl]
@ -107,6 +119,7 @@ def queryNamer(paramName, usePageUrl=False):
def regexNamer(regex):
"""Get name from regular expression."""
@staticmethod
def _namer(imageUrl, pageUrl):
return regex.search(imageUrl).group(1)
@ -114,6 +127,7 @@ def regexNamer(regex):
def constStarter(latestUrl):
"""Start from constant URL."""
@staticmethod
def _starter():
return latestUrl
@ -121,6 +135,7 @@ def constStarter(latestUrl):
def bounceStarter(latestUrl, nextSearch):
"""Get start URL by "bouncing" back and forth one time."""
@classmethod
def _starter(cls):
url = fetchUrl(latestUrl, cls.prevSearch)
@ -131,6 +146,7 @@ def bounceStarter(latestUrl, nextSearch):
def indirectStarter(baseUrl, latestSearch):
"""Get start URL by indirection."""
@staticmethod
def _starter():
return fetchUrl(baseUrl, latestSearch)
@ -156,6 +172,7 @@ class IndirectLatestMixin(object):
__latestUrl = None
def getLatestUrl(self):
"""Get latest comic URL."""
if not self.__latestUrl:
self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch)
if hasattr(self, "nextSearch"):
@ -170,7 +187,7 @@ class IndirectLatestMixin(object):
class _PHPScraper(_BasicScraper):
"""
I implement IScraper for comics using phpComic/CUSP.
Scraper for comics using phpComic/CUSP.
This provides an easy way to define scrapers for webcomics using phpComic.
"""
@ -181,4 +198,5 @@ class _PHPScraper(_BasicScraper):
@classmethod
def starter(cls):
"""Get starter URL."""
return cls.basePath + cls.latestUrl

View file

@ -4,12 +4,16 @@
import time
class Output(object):
"""Print output with context, indentation and optional timestamps."""
def __init__(self):
"""Initialize context and indentation."""
self.context = ''
self.level = 0
self.timestamps = False
def write(self, s, level=0):
"""Write message with indentation, context and optional timestamp."""
if level > self.level:
return
if self.level > 1 or self.timestamps:
@ -19,6 +23,7 @@ class Output(object):
print '%s%s> %s' % (timestamp, self.context, s)
def writelines(self, lines, level=0):
"""Write multiple messages."""
for line in lines:
for line in line.rstrip('\n').split('\n'):
self.write(line.rstrip('\n'), level=level)

View file

@ -8,6 +8,7 @@ import time
from . import util
class Guess(object):
def __init__(self, weight):
self.weight = weight
self.guess = 0
@ -19,6 +20,7 @@ class Guess(object):
def distance(self, value):
return (self.guess - value) ** 2
class FortuneTeller(object):
weights = (0.2, 0.3, 0.4)
@ -34,7 +36,9 @@ class FortuneTeller(object):
def predict(self):
return max([(guess.best, guess) for guess in self.guesses])[1].guess
class OperationComplete(Exception): pass
class OperationComplete(Exception):
pass
def drawBar(fill, total, caption):
screenWidth = util.getWindowSize()
@ -49,6 +53,7 @@ def drawBar(fill, total, caption):
sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth))
sys.stdout.flush()
def drawBounceBar(pos, caption):
screenWidth = util.getWindowSize()
mask = '[%%s<=>%%s] %s' % (caption,)
@ -61,6 +66,7 @@ def drawBounceBar(pos, caption):
sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth))
sys.stdout.flush()
def progressBar(fn):
completed = bps = 0
count = 0

View file

@ -22,7 +22,8 @@ has_curses = has_module("curses")
has_fcntl = has_module('fcntl')
has_termios = has_module('termios')
class NoMatchError(Exception): pass
class NoMatchError(Exception):
pass
def getMatchValues(matches):
return set([match.group(1) for match in matches])

View file

@ -105,6 +105,7 @@ class MyInstallLib (install_lib, object):
return self.get_conf_output()
def get_conf_output (self):
"""Get filename for distribution configuration file."""
return self.distribution.get_conf_filename(self.install_lib)
def get_outputs (self):