diff --git a/dosage b/dosage index 523139174..046b6fbad 100755 --- a/dosage +++ b/dosage @@ -28,6 +28,10 @@ from dosagelib.util import is_tty, get_columns, internal_error from dosagelib.configuration import App, Freeware, Copyright def setupOptions(): + """Construct option parser. + @return: new option parser + @rtype optparse.OptionParser + """ usage = 'usage: %prog [options] comicModule [comicModule ...]' parser = optparse.OptionParser(usage=usage) parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity') @@ -44,30 +48,44 @@ def setupOptions(): parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics') return parser + +def displayVersion(): + """Display application name, version, copyright and license.""" + print App + print Copyright + print Freeware + + class Dosage(object): + """Main program executing comic commands.""" def __init__(self, settings): + """Store settings and initialize internal variables.""" self.settings = settings self.errors = 0 def setOutputInfo(self): + """Set global output level and timestamp option.""" out.level = 0 out.level += self.settings['verbose'] out.timestamps = self.settings['timestamps'] def saveComic(self, comic): + """Save one comic strip in an output file.""" basepath = self.settings['basepath'] progress = self.settings.get('progress', False) fn, saved = comic.save(basepath, progress) return saved def saveComics(self, comics): + """Save a list of comics.""" saved = False for comic in comics: saved = self.saveComic(comic) or saved return saved def safeOp(self, fp, *args, **kwargs): + """Run a function and catch and report any errors.""" try: fp(*args, **kwargs) except Exception: @@ -79,10 +97,12 @@ class Dosage(object): out.writelines(traceback.format_exception_only(type, value)) def getCurrent(self): + """Retrieve and save all current comic strips.""" out.write('Retrieving the current strip...') self.saveComics(self.module.getCurrentComics()) def getIndex(self, index): + """Retrieve comcis with given index.""" out.write('Retrieving index "%s"....' % (index,)) try: self.module.setStrip(index) @@ -91,12 +111,14 @@ class Dosage(object): out.write('No indexed retrieval support.') def catchup(self): + """Save all comics until the current date.""" out.write('Catching up...') for comics in self.module: if not self.saveComics(comics) and self.settings['catchup'] < 2: break def catchupIndex(self, index): + """Retrieve and save all comics from the given index.""" out.write('Catching up from index "%s"...' % (index,)) self.module.setStrip(index) for comics in self.module: @@ -104,15 +126,18 @@ class Dosage(object): break def getScrapers(self): + """Get list of scraper objects.""" return scraper.items() def getExistingComics(self): + """Get all existing comic scrapers.""" for scraper in self.getScrapers(): dirname = scraper.get_name().replace('/', os.sep) if os.path.isdir(os.path.join(self.settings['basepath'], dirname)): yield scraper def doList(self, columnList): + """List available comics.""" out.write('Available comic scrapers:') scrapers = self.getScrapers() if len(scrapers) > 0: @@ -123,9 +148,11 @@ class Dosage(object): out.write('%d supported comics.' % len(scrapers)) def doSingleList(self, scrapers): + """Get list of scraper names, one per line.""" print '\n'.join(scraper.get_name() for scraper in scrapers) def doColumnList(self, scrapers): + """Get list of scraper names with multiple names per line.""" screenWidth = get_columns() names = [scraper.get_name() for scraper in scrapers] maxlen = max([len(name) for name in names]) @@ -135,6 +162,7 @@ class Dosage(object): del names[:namesPerLine] def doCatchup(self): + """Catchup comics.""" for comic in self.useComics(): if self.indices: self.safeOp(self.catchupIndex, self.indices[0]) @@ -142,6 +170,7 @@ class Dosage(object): self.safeOp(self.catchup) def doCurrent(self): + """Get current comics.""" for comic in self.useComics(): if self.indices: for index in self.indices: @@ -150,16 +179,19 @@ class Dosage(object): self.safeOp(self.getCurrent) def doHelp(self): + """Print help for comic strips.""" for scraper in self.useComics(): for line in scraper.getHelp().splitlines(): out.write("Help: "+line) def setupComic(self, scraper): + """Setup the internal comic module from given scraper.""" self.module = scraper() out.context = scraper.get_name() return self.module def useComics(self): + """Set all comic modules for the defined comics.""" for comic in self.comics: c = comic.split(':', 2) if len(c) > 1: @@ -177,12 +209,8 @@ class Dosage(object): else: yield self.setupComic(scraper.get(moduleName)) - def displayVersion(self): - print App - print Copyright - print Freeware - def run(self, comics): + """Execute comic commands.""" self.setOutputInfo() self.comics = comics @@ -191,7 +219,7 @@ class Dosage(object): events.handler.start() if self.settings['version']: - self.displayVersion() + displayVersion() elif self.settings['list']: self.doList(self.settings['list'] == 1) elif len(comics) <= 0: @@ -206,6 +234,7 @@ class Dosage(object): events.handler.end() def main(): + """Parse options and execute commands.""" try: parser = setupOptions() options, args = parser.parse_args() diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 522d246d3..d513cf23d 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -14,12 +14,26 @@ from .util import urlopen, saneDataSize, normaliseURL from .progress import progressBar, OperationComplete from .events import handler -class FetchComicError(IOError): pass +class FetchComicError(IOError): + """Exception for comic fetching errors.""" + pass class Comic(object): + """Download and save a single comic.""" + def __init__(self, moduleName, url, referrer=None, filename=None): + """Set URL and filename.""" self.moduleName = moduleName - url = normaliseURL(url) + self.url = normaliseURL(url) + self.referrer = referrer + if filename is None: + filename = url.split('/')[-1] + self.filename, self.ext = os.path.splitext(filename) + self.filename = self.filename.replace(os.sep, '_') + self.ext = self.ext.replace(os.sep, '_') + + def connect(self): + """Connect to host and get meta information.""" out.write('Getting headers for %s...' % (url,), 2) try: self.urlobj = urlopen(url, referrer=referrer) @@ -30,9 +44,6 @@ class Comic(object): self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'): raise FetchComicError, ('No suitable image found to retrieve.', url) - self.filename, self.ext = os.path.splitext(url.split('/')[-1]) - self.filename = filename or self.filename - self.filename = self.filename.replace(os.sep, '_') # Always use mime type for file extension if it is sane. if self.urlobj.info().getmaintype() == 'image': self.ext = '.' + self.urlobj.info().getsubtype() @@ -41,6 +52,7 @@ class Comic(object): out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2) def touch(self, filename): + """Set last modified date on filename.""" if self.lastModified: tt = rfc822.parsedate(self.lastModified) if tt: @@ -48,6 +60,8 @@ class Comic(object): os.utime(filename, (mtime, mtime)) def save(self, basepath, showProgress=False): + """Save comic URL to filename on disk.""" + self.connect() comicName, comicExt = self.filename, self.ext comicSize = self.contentLength comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep)) diff --git a/dosagelib/configuration.py b/dosagelib/configuration.py index 505f5fad6..8e2033666 100644 --- a/dosagelib/configuration.py +++ b/dosagelib/configuration.py @@ -1,3 +1,6 @@ +""" +Define basic configuration data like version or application name. +""" import _Dosage_configdata as configdata Version = configdata.version diff --git a/dosagelib/events.py b/dosagelib/events.py index c2ac82e9d..c6848f07f 100644 --- a/dosagelib/events.py +++ b/dosagelib/events.py @@ -7,7 +7,11 @@ import urllib import util class EventHandler(object): + """Base class for writing events to files. The currently defined events are + start(), comicDownloaded() and end().""" + def __init__(self, basepath, baseurl): + """Initialize base path and url.""" self.basepath = basepath self.baseurl = baseurl or self.getBaseUrl() @@ -21,35 +25,46 @@ class EventHandler(object): return 'file:///' + url + '/' def getUrlFromFilename(self, filename): + """Construct URL from filename.""" components = util.splitpath(util.getRelativePath(self.basepath, filename)) url = '/'.join([urllib.quote(component, '') for component in components]) return self.baseurl + url def start(self): + """Emit a start event. Should be overridden in subclass.""" pass def comicDownloaded(self, comic, filename): + """Emit a comic downloaded event. Should be overridden in subclass.""" pass def end(self): + """Emit an end event. Should be overridden in subclass.""" pass class TextEventHandler(EventHandler): + """Output nothing. XXX why?""" pass class RSSEventHandler(EventHandler): + """Output in RSS format.""" + def RFC822Date(self, indate): + """Format date in rfc822 format. XXX move to util module.""" return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate)) def getFilename(self): + """Return RSS filename.""" return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss')) def start(self): + """Log start event.""" today = time.time() yesterday = today - 86400 today = time.localtime(today) yesterday = time.localtime(yesterday) + # XXX replace with conf var link = 'https://github.com/wummel/dosage' self.rssfn = self.getFilename() @@ -62,6 +77,7 @@ class RSSEventHandler(EventHandler): self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today)) def comicDownloaded(self, comic, filename): + """Write RSS entry for downloaded comic.""" url = self.getUrlFromFilename(filename) args = ( '%s - %s' % (comic, os.path.basename(filename)), @@ -77,16 +93,22 @@ class RSSEventHandler(EventHandler): self.rss.insertHead(*args) def end(self): + """Write RSS data to file.""" self.rss.write(self.rssfn) + class HtmlEventHandler(EventHandler): + """Output in HTML format.""" + def fnFromDate(self, date): + """Get filename from date.""" fn = time.strftime('comics-%Y%m%d.html', date) fn = os.path.join(self.basepath, 'html', fn) fn = os.path.abspath(fn) return fn def start(self): + """Start HTML output.""" today = time.time() yesterday = today - 86400 tomorrow = today + 86400 @@ -117,12 +139,14 @@ class HtmlEventHandler(EventHandler): self.lastComic = None def comicDownloaded(self, comic, filename): + """Write HTML entry for downloaded comic.""" if self.lastComic != comic: self.newComic(comic) url = self.getUrlFromFilename(filename) self.html.write('
  • %s
  • \n' % (url, os.path.basename(filename))) def newComic(self, comic): + """Start new comic list in HTML.""" if self.lastComic is not None: self.html.write(' \n') self.lastComic = comic @@ -131,6 +155,7 @@ class HtmlEventHandler(EventHandler): ''' % (comic,)) def end(self): + """End HTML output.""" if self.lastComic is not None: self.html.write(' \n') self.html.write(''' @@ -146,11 +171,11 @@ handlers = { } def getHandlers(): - l = handlers.keys() - l.sort() - return l + """Get sorted handler names.""" + return sorted(handlers.keys()) def installHandler(name=None, basepath=None, baseurl=None): + """Install a global handler with given name.""" global handler if name is None: name = 'text' diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index 1741a1e28..eb4be5dab 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -25,18 +25,22 @@ class _BasicScraper(object): help = 'Sorry, no help for this comic yet.' def __init__(self): + """Initialize internal variables.""" self.currentUrl = None self.urls = set() def getReferrer(self, imageUrl, pageUrl): + """Return referrer for HTTP connection.""" return self.referrer or pageUrl or self.getLatestUrl() def getComic(self, url, pageUrl): + """Get comic downloader for given URL and page.""" if not url: return None return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl)) def getCurrentComics(self): + """Get list of current comics.""" self.currentUrl = self.getLatestUrl() comics = self.getNextComics() if not comics: @@ -44,6 +48,7 @@ class _BasicScraper(object): return comics def getNextComics(self): + """Get all next comics.""" comics = [] while not comics and self.currentUrl and self.currentUrl not in self.urls: comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch]) @@ -61,16 +66,17 @@ class _BasicScraper(object): return comics def setStrip(self, index): + """Set current comic strip URL.""" self.currentUrl = self.imageUrl % index def getHelp(self): + """Return help text for this scraper.""" return self.help def __iter__(self): """Iterate through the strips, starting from the current one and going backward.""" if not self.currentUrl: self.currentUrl = self.getLatestUrl() - comics = True while comics: comics = self.getNextComics() @@ -79,26 +85,32 @@ class _BasicScraper(object): @classmethod def get_name(cls): + """Get scraper name.""" if hasattr(cls, 'name'): return cls.name return cls.__name__ @classmethod def starter(cls): + """Get starter URL from where to scrape comic strips.""" return cls.latestUrl @classmethod def namer(cls, imageUrl, pageUrl): + """Return filename for given image and page URL.""" return None def getFilename(self, imageUrl, pageUrl): + """Return filename for given image and page URL.""" return self.namer(imageUrl, pageUrl) def getLatestUrl(self): + """Get starter URL from where to scrape comic strips.""" return self.starter() def queryNamer(paramName, usePageUrl=False): + """Get name from URL query part.""" @staticmethod def _namer(imageUrl, pageUrl): url = (imageUrl, pageUrl)[usePageUrl] @@ -107,6 +119,7 @@ def queryNamer(paramName, usePageUrl=False): def regexNamer(regex): + """Get name from regular expression.""" @staticmethod def _namer(imageUrl, pageUrl): return regex.search(imageUrl).group(1) @@ -114,6 +127,7 @@ def regexNamer(regex): def constStarter(latestUrl): + """Start from constant URL.""" @staticmethod def _starter(): return latestUrl @@ -121,6 +135,7 @@ def constStarter(latestUrl): def bounceStarter(latestUrl, nextSearch): + """Get start URL by "bouncing" back and forth one time.""" @classmethod def _starter(cls): url = fetchUrl(latestUrl, cls.prevSearch) @@ -131,6 +146,7 @@ def bounceStarter(latestUrl, nextSearch): def indirectStarter(baseUrl, latestSearch): + """Get start URL by indirection.""" @staticmethod def _starter(): return fetchUrl(baseUrl, latestSearch) @@ -156,6 +172,7 @@ class IndirectLatestMixin(object): __latestUrl = None def getLatestUrl(self): + """Get latest comic URL.""" if not self.__latestUrl: self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch) if hasattr(self, "nextSearch"): @@ -170,7 +187,7 @@ class IndirectLatestMixin(object): class _PHPScraper(_BasicScraper): """ - I implement IScraper for comics using phpComic/CUSP. + Scraper for comics using phpComic/CUSP. This provides an easy way to define scrapers for webcomics using phpComic. """ @@ -181,4 +198,5 @@ class _PHPScraper(_BasicScraper): @classmethod def starter(cls): + """Get starter URL.""" return cls.basePath + cls.latestUrl diff --git a/dosagelib/output.py b/dosagelib/output.py index 24443c47a..79d6c31ed 100644 --- a/dosagelib/output.py +++ b/dosagelib/output.py @@ -4,12 +4,16 @@ import time class Output(object): + """Print output with context, indentation and optional timestamps.""" + def __init__(self): + """Initialize context and indentation.""" self.context = '' self.level = 0 self.timestamps = False def write(self, s, level=0): + """Write message with indentation, context and optional timestamp.""" if level > self.level: return if self.level > 1 or self.timestamps: @@ -19,6 +23,7 @@ class Output(object): print '%s%s> %s' % (timestamp, self.context, s) def writelines(self, lines, level=0): + """Write multiple messages.""" for line in lines: for line in line.rstrip('\n').split('\n'): self.write(line.rstrip('\n'), level=level) diff --git a/dosagelib/progress.py b/dosagelib/progress.py index 1c90cef79..d068380ca 100644 --- a/dosagelib/progress.py +++ b/dosagelib/progress.py @@ -8,6 +8,7 @@ import time from . import util class Guess(object): + def __init__(self, weight): self.weight = weight self.guess = 0 @@ -19,6 +20,7 @@ class Guess(object): def distance(self, value): return (self.guess - value) ** 2 + class FortuneTeller(object): weights = (0.2, 0.3, 0.4) @@ -34,7 +36,9 @@ class FortuneTeller(object): def predict(self): return max([(guess.best, guess) for guess in self.guesses])[1].guess -class OperationComplete(Exception): pass + +class OperationComplete(Exception): + pass def drawBar(fill, total, caption): screenWidth = util.getWindowSize() @@ -49,6 +53,7 @@ def drawBar(fill, total, caption): sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth)) sys.stdout.flush() + def drawBounceBar(pos, caption): screenWidth = util.getWindowSize() mask = '[%%s<=>%%s] %s' % (caption,) @@ -61,6 +66,7 @@ def drawBounceBar(pos, caption): sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth)) sys.stdout.flush() + def progressBar(fn): completed = bps = 0 count = 0 diff --git a/dosagelib/util.py b/dosagelib/util.py index 67df1e0d3..8c3f6768d 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -22,7 +22,8 @@ has_curses = has_module("curses") has_fcntl = has_module('fcntl') has_termios = has_module('termios') -class NoMatchError(Exception): pass +class NoMatchError(Exception): + pass def getMatchValues(matches): return set([match.group(1) for match in matches]) diff --git a/setup.py b/setup.py index 1a5a097ee..b194a4dc0 100644 --- a/setup.py +++ b/setup.py @@ -105,6 +105,7 @@ class MyInstallLib (install_lib, object): return self.get_conf_output() def get_conf_output (self): + """Get filename for distribution configuration file.""" return self.distribution.get_conf_filename(self.install_lib) def get_outputs (self):