Document some functions.
This commit is contained in:
parent
4a53639e79
commit
cc2a8df98f
9 changed files with 120 additions and 18 deletions
41
dosage
41
dosage
|
@ -28,6 +28,10 @@ from dosagelib.util import is_tty, get_columns, internal_error
|
||||||
from dosagelib.configuration import App, Freeware, Copyright
|
from dosagelib.configuration import App, Freeware, Copyright
|
||||||
|
|
||||||
def setupOptions():
|
def setupOptions():
|
||||||
|
"""Construct option parser.
|
||||||
|
@return: new option parser
|
||||||
|
@rtype optparse.OptionParser
|
||||||
|
"""
|
||||||
usage = 'usage: %prog [options] comicModule [comicModule ...]'
|
usage = 'usage: %prog [options] comicModule [comicModule ...]'
|
||||||
parser = optparse.OptionParser(usage=usage)
|
parser = optparse.OptionParser(usage=usage)
|
||||||
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
|
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
|
||||||
|
@ -44,30 +48,44 @@ def setupOptions():
|
||||||
parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics')
|
parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def displayVersion():
|
||||||
|
"""Display application name, version, copyright and license."""
|
||||||
|
print App
|
||||||
|
print Copyright
|
||||||
|
print Freeware
|
||||||
|
|
||||||
|
|
||||||
class Dosage(object):
|
class Dosage(object):
|
||||||
|
"""Main program executing comic commands."""
|
||||||
|
|
||||||
def __init__(self, settings):
|
def __init__(self, settings):
|
||||||
|
"""Store settings and initialize internal variables."""
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
self.errors = 0
|
self.errors = 0
|
||||||
|
|
||||||
def setOutputInfo(self):
|
def setOutputInfo(self):
|
||||||
|
"""Set global output level and timestamp option."""
|
||||||
out.level = 0
|
out.level = 0
|
||||||
out.level += self.settings['verbose']
|
out.level += self.settings['verbose']
|
||||||
out.timestamps = self.settings['timestamps']
|
out.timestamps = self.settings['timestamps']
|
||||||
|
|
||||||
def saveComic(self, comic):
|
def saveComic(self, comic):
|
||||||
|
"""Save one comic strip in an output file."""
|
||||||
basepath = self.settings['basepath']
|
basepath = self.settings['basepath']
|
||||||
progress = self.settings.get('progress', False)
|
progress = self.settings.get('progress', False)
|
||||||
fn, saved = comic.save(basepath, progress)
|
fn, saved = comic.save(basepath, progress)
|
||||||
return saved
|
return saved
|
||||||
|
|
||||||
def saveComics(self, comics):
|
def saveComics(self, comics):
|
||||||
|
"""Save a list of comics."""
|
||||||
saved = False
|
saved = False
|
||||||
for comic in comics:
|
for comic in comics:
|
||||||
saved = self.saveComic(comic) or saved
|
saved = self.saveComic(comic) or saved
|
||||||
return saved
|
return saved
|
||||||
|
|
||||||
def safeOp(self, fp, *args, **kwargs):
|
def safeOp(self, fp, *args, **kwargs):
|
||||||
|
"""Run a function and catch and report any errors."""
|
||||||
try:
|
try:
|
||||||
fp(*args, **kwargs)
|
fp(*args, **kwargs)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -79,10 +97,12 @@ class Dosage(object):
|
||||||
out.writelines(traceback.format_exception_only(type, value))
|
out.writelines(traceback.format_exception_only(type, value))
|
||||||
|
|
||||||
def getCurrent(self):
|
def getCurrent(self):
|
||||||
|
"""Retrieve and save all current comic strips."""
|
||||||
out.write('Retrieving the current strip...')
|
out.write('Retrieving the current strip...')
|
||||||
self.saveComics(self.module.getCurrentComics())
|
self.saveComics(self.module.getCurrentComics())
|
||||||
|
|
||||||
def getIndex(self, index):
|
def getIndex(self, index):
|
||||||
|
"""Retrieve comcis with given index."""
|
||||||
out.write('Retrieving index "%s"....' % (index,))
|
out.write('Retrieving index "%s"....' % (index,))
|
||||||
try:
|
try:
|
||||||
self.module.setStrip(index)
|
self.module.setStrip(index)
|
||||||
|
@ -91,12 +111,14 @@ class Dosage(object):
|
||||||
out.write('No indexed retrieval support.')
|
out.write('No indexed retrieval support.')
|
||||||
|
|
||||||
def catchup(self):
|
def catchup(self):
|
||||||
|
"""Save all comics until the current date."""
|
||||||
out.write('Catching up...')
|
out.write('Catching up...')
|
||||||
for comics in self.module:
|
for comics in self.module:
|
||||||
if not self.saveComics(comics) and self.settings['catchup'] < 2:
|
if not self.saveComics(comics) and self.settings['catchup'] < 2:
|
||||||
break
|
break
|
||||||
|
|
||||||
def catchupIndex(self, index):
|
def catchupIndex(self, index):
|
||||||
|
"""Retrieve and save all comics from the given index."""
|
||||||
out.write('Catching up from index "%s"...' % (index,))
|
out.write('Catching up from index "%s"...' % (index,))
|
||||||
self.module.setStrip(index)
|
self.module.setStrip(index)
|
||||||
for comics in self.module:
|
for comics in self.module:
|
||||||
|
@ -104,15 +126,18 @@ class Dosage(object):
|
||||||
break
|
break
|
||||||
|
|
||||||
def getScrapers(self):
|
def getScrapers(self):
|
||||||
|
"""Get list of scraper objects."""
|
||||||
return scraper.items()
|
return scraper.items()
|
||||||
|
|
||||||
def getExistingComics(self):
|
def getExistingComics(self):
|
||||||
|
"""Get all existing comic scrapers."""
|
||||||
for scraper in self.getScrapers():
|
for scraper in self.getScrapers():
|
||||||
dirname = scraper.get_name().replace('/', os.sep)
|
dirname = scraper.get_name().replace('/', os.sep)
|
||||||
if os.path.isdir(os.path.join(self.settings['basepath'], dirname)):
|
if os.path.isdir(os.path.join(self.settings['basepath'], dirname)):
|
||||||
yield scraper
|
yield scraper
|
||||||
|
|
||||||
def doList(self, columnList):
|
def doList(self, columnList):
|
||||||
|
"""List available comics."""
|
||||||
out.write('Available comic scrapers:')
|
out.write('Available comic scrapers:')
|
||||||
scrapers = self.getScrapers()
|
scrapers = self.getScrapers()
|
||||||
if len(scrapers) > 0:
|
if len(scrapers) > 0:
|
||||||
|
@ -123,9 +148,11 @@ class Dosage(object):
|
||||||
out.write('%d supported comics.' % len(scrapers))
|
out.write('%d supported comics.' % len(scrapers))
|
||||||
|
|
||||||
def doSingleList(self, scrapers):
|
def doSingleList(self, scrapers):
|
||||||
|
"""Get list of scraper names, one per line."""
|
||||||
print '\n'.join(scraper.get_name() for scraper in scrapers)
|
print '\n'.join(scraper.get_name() for scraper in scrapers)
|
||||||
|
|
||||||
def doColumnList(self, scrapers):
|
def doColumnList(self, scrapers):
|
||||||
|
"""Get list of scraper names with multiple names per line."""
|
||||||
screenWidth = get_columns()
|
screenWidth = get_columns()
|
||||||
names = [scraper.get_name() for scraper in scrapers]
|
names = [scraper.get_name() for scraper in scrapers]
|
||||||
maxlen = max([len(name) for name in names])
|
maxlen = max([len(name) for name in names])
|
||||||
|
@ -135,6 +162,7 @@ class Dosage(object):
|
||||||
del names[:namesPerLine]
|
del names[:namesPerLine]
|
||||||
|
|
||||||
def doCatchup(self):
|
def doCatchup(self):
|
||||||
|
"""Catchup comics."""
|
||||||
for comic in self.useComics():
|
for comic in self.useComics():
|
||||||
if self.indices:
|
if self.indices:
|
||||||
self.safeOp(self.catchupIndex, self.indices[0])
|
self.safeOp(self.catchupIndex, self.indices[0])
|
||||||
|
@ -142,6 +170,7 @@ class Dosage(object):
|
||||||
self.safeOp(self.catchup)
|
self.safeOp(self.catchup)
|
||||||
|
|
||||||
def doCurrent(self):
|
def doCurrent(self):
|
||||||
|
"""Get current comics."""
|
||||||
for comic in self.useComics():
|
for comic in self.useComics():
|
||||||
if self.indices:
|
if self.indices:
|
||||||
for index in self.indices:
|
for index in self.indices:
|
||||||
|
@ -150,16 +179,19 @@ class Dosage(object):
|
||||||
self.safeOp(self.getCurrent)
|
self.safeOp(self.getCurrent)
|
||||||
|
|
||||||
def doHelp(self):
|
def doHelp(self):
|
||||||
|
"""Print help for comic strips."""
|
||||||
for scraper in self.useComics():
|
for scraper in self.useComics():
|
||||||
for line in scraper.getHelp().splitlines():
|
for line in scraper.getHelp().splitlines():
|
||||||
out.write("Help: "+line)
|
out.write("Help: "+line)
|
||||||
|
|
||||||
def setupComic(self, scraper):
|
def setupComic(self, scraper):
|
||||||
|
"""Setup the internal comic module from given scraper."""
|
||||||
self.module = scraper()
|
self.module = scraper()
|
||||||
out.context = scraper.get_name()
|
out.context = scraper.get_name()
|
||||||
return self.module
|
return self.module
|
||||||
|
|
||||||
def useComics(self):
|
def useComics(self):
|
||||||
|
"""Set all comic modules for the defined comics."""
|
||||||
for comic in self.comics:
|
for comic in self.comics:
|
||||||
c = comic.split(':', 2)
|
c = comic.split(':', 2)
|
||||||
if len(c) > 1:
|
if len(c) > 1:
|
||||||
|
@ -177,12 +209,8 @@ class Dosage(object):
|
||||||
else:
|
else:
|
||||||
yield self.setupComic(scraper.get(moduleName))
|
yield self.setupComic(scraper.get(moduleName))
|
||||||
|
|
||||||
def displayVersion(self):
|
|
||||||
print App
|
|
||||||
print Copyright
|
|
||||||
print Freeware
|
|
||||||
|
|
||||||
def run(self, comics):
|
def run(self, comics):
|
||||||
|
"""Execute comic commands."""
|
||||||
self.setOutputInfo()
|
self.setOutputInfo()
|
||||||
self.comics = comics
|
self.comics = comics
|
||||||
|
|
||||||
|
@ -191,7 +219,7 @@ class Dosage(object):
|
||||||
events.handler.start()
|
events.handler.start()
|
||||||
|
|
||||||
if self.settings['version']:
|
if self.settings['version']:
|
||||||
self.displayVersion()
|
displayVersion()
|
||||||
elif self.settings['list']:
|
elif self.settings['list']:
|
||||||
self.doList(self.settings['list'] == 1)
|
self.doList(self.settings['list'] == 1)
|
||||||
elif len(comics) <= 0:
|
elif len(comics) <= 0:
|
||||||
|
@ -206,6 +234,7 @@ class Dosage(object):
|
||||||
events.handler.end()
|
events.handler.end()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
"""Parse options and execute commands."""
|
||||||
try:
|
try:
|
||||||
parser = setupOptions()
|
parser = setupOptions()
|
||||||
options, args = parser.parse_args()
|
options, args = parser.parse_args()
|
||||||
|
|
|
@ -14,12 +14,26 @@ from .util import urlopen, saneDataSize, normaliseURL
|
||||||
from .progress import progressBar, OperationComplete
|
from .progress import progressBar, OperationComplete
|
||||||
from .events import handler
|
from .events import handler
|
||||||
|
|
||||||
class FetchComicError(IOError): pass
|
class FetchComicError(IOError):
|
||||||
|
"""Exception for comic fetching errors."""
|
||||||
|
pass
|
||||||
|
|
||||||
class Comic(object):
|
class Comic(object):
|
||||||
|
"""Download and save a single comic."""
|
||||||
|
|
||||||
def __init__(self, moduleName, url, referrer=None, filename=None):
|
def __init__(self, moduleName, url, referrer=None, filename=None):
|
||||||
|
"""Set URL and filename."""
|
||||||
self.moduleName = moduleName
|
self.moduleName = moduleName
|
||||||
url = normaliseURL(url)
|
self.url = normaliseURL(url)
|
||||||
|
self.referrer = referrer
|
||||||
|
if filename is None:
|
||||||
|
filename = url.split('/')[-1]
|
||||||
|
self.filename, self.ext = os.path.splitext(filename)
|
||||||
|
self.filename = self.filename.replace(os.sep, '_')
|
||||||
|
self.ext = self.ext.replace(os.sep, '_')
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
"""Connect to host and get meta information."""
|
||||||
out.write('Getting headers for %s...' % (url,), 2)
|
out.write('Getting headers for %s...' % (url,), 2)
|
||||||
try:
|
try:
|
||||||
self.urlobj = urlopen(url, referrer=referrer)
|
self.urlobj = urlopen(url, referrer=referrer)
|
||||||
|
@ -30,9 +44,6 @@ class Comic(object):
|
||||||
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
||||||
raise FetchComicError, ('No suitable image found to retrieve.', url)
|
raise FetchComicError, ('No suitable image found to retrieve.', url)
|
||||||
|
|
||||||
self.filename, self.ext = os.path.splitext(url.split('/')[-1])
|
|
||||||
self.filename = filename or self.filename
|
|
||||||
self.filename = self.filename.replace(os.sep, '_')
|
|
||||||
# Always use mime type for file extension if it is sane.
|
# Always use mime type for file extension if it is sane.
|
||||||
if self.urlobj.info().getmaintype() == 'image':
|
if self.urlobj.info().getmaintype() == 'image':
|
||||||
self.ext = '.' + self.urlobj.info().getsubtype()
|
self.ext = '.' + self.urlobj.info().getsubtype()
|
||||||
|
@ -41,6 +52,7 @@ class Comic(object):
|
||||||
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
|
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
|
||||||
|
|
||||||
def touch(self, filename):
|
def touch(self, filename):
|
||||||
|
"""Set last modified date on filename."""
|
||||||
if self.lastModified:
|
if self.lastModified:
|
||||||
tt = rfc822.parsedate(self.lastModified)
|
tt = rfc822.parsedate(self.lastModified)
|
||||||
if tt:
|
if tt:
|
||||||
|
@ -48,6 +60,8 @@ class Comic(object):
|
||||||
os.utime(filename, (mtime, mtime))
|
os.utime(filename, (mtime, mtime))
|
||||||
|
|
||||||
def save(self, basepath, showProgress=False):
|
def save(self, basepath, showProgress=False):
|
||||||
|
"""Save comic URL to filename on disk."""
|
||||||
|
self.connect()
|
||||||
comicName, comicExt = self.filename, self.ext
|
comicName, comicExt = self.filename, self.ext
|
||||||
comicSize = self.contentLength
|
comicSize = self.contentLength
|
||||||
comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep))
|
comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep))
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
"""
|
||||||
|
Define basic configuration data like version or application name.
|
||||||
|
"""
|
||||||
import _Dosage_configdata as configdata
|
import _Dosage_configdata as configdata
|
||||||
|
|
||||||
Version = configdata.version
|
Version = configdata.version
|
||||||
|
|
|
@ -7,7 +7,11 @@ import urllib
|
||||||
import util
|
import util
|
||||||
|
|
||||||
class EventHandler(object):
|
class EventHandler(object):
|
||||||
|
"""Base class for writing events to files. The currently defined events are
|
||||||
|
start(), comicDownloaded() and end()."""
|
||||||
|
|
||||||
def __init__(self, basepath, baseurl):
|
def __init__(self, basepath, baseurl):
|
||||||
|
"""Initialize base path and url."""
|
||||||
self.basepath = basepath
|
self.basepath = basepath
|
||||||
self.baseurl = baseurl or self.getBaseUrl()
|
self.baseurl = baseurl or self.getBaseUrl()
|
||||||
|
|
||||||
|
@ -21,35 +25,46 @@ class EventHandler(object):
|
||||||
return 'file:///' + url + '/'
|
return 'file:///' + url + '/'
|
||||||
|
|
||||||
def getUrlFromFilename(self, filename):
|
def getUrlFromFilename(self, filename):
|
||||||
|
"""Construct URL from filename."""
|
||||||
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
||||||
url = '/'.join([urllib.quote(component, '') for component in components])
|
url = '/'.join([urllib.quote(component, '') for component in components])
|
||||||
return self.baseurl + url
|
return self.baseurl + url
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
"""Emit a start event. Should be overridden in subclass."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def comicDownloaded(self, comic, filename):
|
def comicDownloaded(self, comic, filename):
|
||||||
|
"""Emit a comic downloaded event. Should be overridden in subclass."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def end(self):
|
def end(self):
|
||||||
|
"""Emit an end event. Should be overridden in subclass."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class TextEventHandler(EventHandler):
|
class TextEventHandler(EventHandler):
|
||||||
|
"""Output nothing. XXX why?"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class RSSEventHandler(EventHandler):
|
class RSSEventHandler(EventHandler):
|
||||||
|
"""Output in RSS format."""
|
||||||
|
|
||||||
def RFC822Date(self, indate):
|
def RFC822Date(self, indate):
|
||||||
|
"""Format date in rfc822 format. XXX move to util module."""
|
||||||
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate))
|
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate))
|
||||||
|
|
||||||
def getFilename(self):
|
def getFilename(self):
|
||||||
|
"""Return RSS filename."""
|
||||||
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
|
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
"""Log start event."""
|
||||||
today = time.time()
|
today = time.time()
|
||||||
yesterday = today - 86400
|
yesterday = today - 86400
|
||||||
today = time.localtime(today)
|
today = time.localtime(today)
|
||||||
yesterday = time.localtime(yesterday)
|
yesterday = time.localtime(yesterday)
|
||||||
|
|
||||||
|
# XXX replace with conf var
|
||||||
link = 'https://github.com/wummel/dosage'
|
link = 'https://github.com/wummel/dosage'
|
||||||
|
|
||||||
self.rssfn = self.getFilename()
|
self.rssfn = self.getFilename()
|
||||||
|
@ -62,6 +77,7 @@ class RSSEventHandler(EventHandler):
|
||||||
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
|
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
|
||||||
|
|
||||||
def comicDownloaded(self, comic, filename):
|
def comicDownloaded(self, comic, filename):
|
||||||
|
"""Write RSS entry for downloaded comic."""
|
||||||
url = self.getUrlFromFilename(filename)
|
url = self.getUrlFromFilename(filename)
|
||||||
args = (
|
args = (
|
||||||
'%s - %s' % (comic, os.path.basename(filename)),
|
'%s - %s' % (comic, os.path.basename(filename)),
|
||||||
|
@ -77,16 +93,22 @@ class RSSEventHandler(EventHandler):
|
||||||
self.rss.insertHead(*args)
|
self.rss.insertHead(*args)
|
||||||
|
|
||||||
def end(self):
|
def end(self):
|
||||||
|
"""Write RSS data to file."""
|
||||||
self.rss.write(self.rssfn)
|
self.rss.write(self.rssfn)
|
||||||
|
|
||||||
|
|
||||||
class HtmlEventHandler(EventHandler):
|
class HtmlEventHandler(EventHandler):
|
||||||
|
"""Output in HTML format."""
|
||||||
|
|
||||||
def fnFromDate(self, date):
|
def fnFromDate(self, date):
|
||||||
|
"""Get filename from date."""
|
||||||
fn = time.strftime('comics-%Y%m%d.html', date)
|
fn = time.strftime('comics-%Y%m%d.html', date)
|
||||||
fn = os.path.join(self.basepath, 'html', fn)
|
fn = os.path.join(self.basepath, 'html', fn)
|
||||||
fn = os.path.abspath(fn)
|
fn = os.path.abspath(fn)
|
||||||
return fn
|
return fn
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
"""Start HTML output."""
|
||||||
today = time.time()
|
today = time.time()
|
||||||
yesterday = today - 86400
|
yesterday = today - 86400
|
||||||
tomorrow = today + 86400
|
tomorrow = today + 86400
|
||||||
|
@ -117,12 +139,14 @@ class HtmlEventHandler(EventHandler):
|
||||||
self.lastComic = None
|
self.lastComic = None
|
||||||
|
|
||||||
def comicDownloaded(self, comic, filename):
|
def comicDownloaded(self, comic, filename):
|
||||||
|
"""Write HTML entry for downloaded comic."""
|
||||||
if self.lastComic != comic:
|
if self.lastComic != comic:
|
||||||
self.newComic(comic)
|
self.newComic(comic)
|
||||||
url = self.getUrlFromFilename(filename)
|
url = self.getUrlFromFilename(filename)
|
||||||
self.html.write(' <li><a href="%s">%s</a></li>\n' % (url, os.path.basename(filename)))
|
self.html.write(' <li><a href="%s">%s</a></li>\n' % (url, os.path.basename(filename)))
|
||||||
|
|
||||||
def newComic(self, comic):
|
def newComic(self, comic):
|
||||||
|
"""Start new comic list in HTML."""
|
||||||
if self.lastComic is not None:
|
if self.lastComic is not None:
|
||||||
self.html.write(' </ul>\n')
|
self.html.write(' </ul>\n')
|
||||||
self.lastComic = comic
|
self.lastComic = comic
|
||||||
|
@ -131,6 +155,7 @@ class HtmlEventHandler(EventHandler):
|
||||||
''' % (comic,))
|
''' % (comic,))
|
||||||
|
|
||||||
def end(self):
|
def end(self):
|
||||||
|
"""End HTML output."""
|
||||||
if self.lastComic is not None:
|
if self.lastComic is not None:
|
||||||
self.html.write(' </ul>\n')
|
self.html.write(' </ul>\n')
|
||||||
self.html.write('''</ul>
|
self.html.write('''</ul>
|
||||||
|
@ -146,11 +171,11 @@ handlers = {
|
||||||
}
|
}
|
||||||
|
|
||||||
def getHandlers():
|
def getHandlers():
|
||||||
l = handlers.keys()
|
"""Get sorted handler names."""
|
||||||
l.sort()
|
return sorted(handlers.keys())
|
||||||
return l
|
|
||||||
|
|
||||||
def installHandler(name=None, basepath=None, baseurl=None):
|
def installHandler(name=None, basepath=None, baseurl=None):
|
||||||
|
"""Install a global handler with given name."""
|
||||||
global handler
|
global handler
|
||||||
if name is None:
|
if name is None:
|
||||||
name = 'text'
|
name = 'text'
|
||||||
|
|
|
@ -25,18 +25,22 @@ class _BasicScraper(object):
|
||||||
help = 'Sorry, no help for this comic yet.'
|
help = 'Sorry, no help for this comic yet.'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
"""Initialize internal variables."""
|
||||||
self.currentUrl = None
|
self.currentUrl = None
|
||||||
self.urls = set()
|
self.urls = set()
|
||||||
|
|
||||||
def getReferrer(self, imageUrl, pageUrl):
|
def getReferrer(self, imageUrl, pageUrl):
|
||||||
|
"""Return referrer for HTTP connection."""
|
||||||
return self.referrer or pageUrl or self.getLatestUrl()
|
return self.referrer or pageUrl or self.getLatestUrl()
|
||||||
|
|
||||||
def getComic(self, url, pageUrl):
|
def getComic(self, url, pageUrl):
|
||||||
|
"""Get comic downloader for given URL and page."""
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl))
|
return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl))
|
||||||
|
|
||||||
def getCurrentComics(self):
|
def getCurrentComics(self):
|
||||||
|
"""Get list of current comics."""
|
||||||
self.currentUrl = self.getLatestUrl()
|
self.currentUrl = self.getLatestUrl()
|
||||||
comics = self.getNextComics()
|
comics = self.getNextComics()
|
||||||
if not comics:
|
if not comics:
|
||||||
|
@ -44,6 +48,7 @@ class _BasicScraper(object):
|
||||||
return comics
|
return comics
|
||||||
|
|
||||||
def getNextComics(self):
|
def getNextComics(self):
|
||||||
|
"""Get all next comics."""
|
||||||
comics = []
|
comics = []
|
||||||
while not comics and self.currentUrl and self.currentUrl not in self.urls:
|
while not comics and self.currentUrl and self.currentUrl not in self.urls:
|
||||||
comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch])
|
comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch])
|
||||||
|
@ -61,16 +66,17 @@ class _BasicScraper(object):
|
||||||
return comics
|
return comics
|
||||||
|
|
||||||
def setStrip(self, index):
|
def setStrip(self, index):
|
||||||
|
"""Set current comic strip URL."""
|
||||||
self.currentUrl = self.imageUrl % index
|
self.currentUrl = self.imageUrl % index
|
||||||
|
|
||||||
def getHelp(self):
|
def getHelp(self):
|
||||||
|
"""Return help text for this scraper."""
|
||||||
return self.help
|
return self.help
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Iterate through the strips, starting from the current one and going backward."""
|
"""Iterate through the strips, starting from the current one and going backward."""
|
||||||
if not self.currentUrl:
|
if not self.currentUrl:
|
||||||
self.currentUrl = self.getLatestUrl()
|
self.currentUrl = self.getLatestUrl()
|
||||||
|
|
||||||
comics = True
|
comics = True
|
||||||
while comics:
|
while comics:
|
||||||
comics = self.getNextComics()
|
comics = self.getNextComics()
|
||||||
|
@ -79,26 +85,32 @@ class _BasicScraper(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_name(cls):
|
def get_name(cls):
|
||||||
|
"""Get scraper name."""
|
||||||
if hasattr(cls, 'name'):
|
if hasattr(cls, 'name'):
|
||||||
return cls.name
|
return cls.name
|
||||||
return cls.__name__
|
return cls.__name__
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def starter(cls):
|
def starter(cls):
|
||||||
|
"""Get starter URL from where to scrape comic strips."""
|
||||||
return cls.latestUrl
|
return cls.latestUrl
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
"""Return filename for given image and page URL."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def getFilename(self, imageUrl, pageUrl):
|
def getFilename(self, imageUrl, pageUrl):
|
||||||
|
"""Return filename for given image and page URL."""
|
||||||
return self.namer(imageUrl, pageUrl)
|
return self.namer(imageUrl, pageUrl)
|
||||||
|
|
||||||
def getLatestUrl(self):
|
def getLatestUrl(self):
|
||||||
|
"""Get starter URL from where to scrape comic strips."""
|
||||||
return self.starter()
|
return self.starter()
|
||||||
|
|
||||||
|
|
||||||
def queryNamer(paramName, usePageUrl=False):
|
def queryNamer(paramName, usePageUrl=False):
|
||||||
|
"""Get name from URL query part."""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _namer(imageUrl, pageUrl):
|
def _namer(imageUrl, pageUrl):
|
||||||
url = (imageUrl, pageUrl)[usePageUrl]
|
url = (imageUrl, pageUrl)[usePageUrl]
|
||||||
|
@ -107,6 +119,7 @@ def queryNamer(paramName, usePageUrl=False):
|
||||||
|
|
||||||
|
|
||||||
def regexNamer(regex):
|
def regexNamer(regex):
|
||||||
|
"""Get name from regular expression."""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _namer(imageUrl, pageUrl):
|
def _namer(imageUrl, pageUrl):
|
||||||
return regex.search(imageUrl).group(1)
|
return regex.search(imageUrl).group(1)
|
||||||
|
@ -114,6 +127,7 @@ def regexNamer(regex):
|
||||||
|
|
||||||
|
|
||||||
def constStarter(latestUrl):
|
def constStarter(latestUrl):
|
||||||
|
"""Start from constant URL."""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _starter():
|
def _starter():
|
||||||
return latestUrl
|
return latestUrl
|
||||||
|
@ -121,6 +135,7 @@ def constStarter(latestUrl):
|
||||||
|
|
||||||
|
|
||||||
def bounceStarter(latestUrl, nextSearch):
|
def bounceStarter(latestUrl, nextSearch):
|
||||||
|
"""Get start URL by "bouncing" back and forth one time."""
|
||||||
@classmethod
|
@classmethod
|
||||||
def _starter(cls):
|
def _starter(cls):
|
||||||
url = fetchUrl(latestUrl, cls.prevSearch)
|
url = fetchUrl(latestUrl, cls.prevSearch)
|
||||||
|
@ -131,6 +146,7 @@ def bounceStarter(latestUrl, nextSearch):
|
||||||
|
|
||||||
|
|
||||||
def indirectStarter(baseUrl, latestSearch):
|
def indirectStarter(baseUrl, latestSearch):
|
||||||
|
"""Get start URL by indirection."""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _starter():
|
def _starter():
|
||||||
return fetchUrl(baseUrl, latestSearch)
|
return fetchUrl(baseUrl, latestSearch)
|
||||||
|
@ -156,6 +172,7 @@ class IndirectLatestMixin(object):
|
||||||
__latestUrl = None
|
__latestUrl = None
|
||||||
|
|
||||||
def getLatestUrl(self):
|
def getLatestUrl(self):
|
||||||
|
"""Get latest comic URL."""
|
||||||
if not self.__latestUrl:
|
if not self.__latestUrl:
|
||||||
self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch)
|
self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch)
|
||||||
if hasattr(self, "nextSearch"):
|
if hasattr(self, "nextSearch"):
|
||||||
|
@ -170,7 +187,7 @@ class IndirectLatestMixin(object):
|
||||||
|
|
||||||
class _PHPScraper(_BasicScraper):
|
class _PHPScraper(_BasicScraper):
|
||||||
"""
|
"""
|
||||||
I implement IScraper for comics using phpComic/CUSP.
|
Scraper for comics using phpComic/CUSP.
|
||||||
|
|
||||||
This provides an easy way to define scrapers for webcomics using phpComic.
|
This provides an easy way to define scrapers for webcomics using phpComic.
|
||||||
"""
|
"""
|
||||||
|
@ -181,4 +198,5 @@ class _PHPScraper(_BasicScraper):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def starter(cls):
|
def starter(cls):
|
||||||
|
"""Get starter URL."""
|
||||||
return cls.basePath + cls.latestUrl
|
return cls.basePath + cls.latestUrl
|
||||||
|
|
|
@ -4,12 +4,16 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
class Output(object):
|
class Output(object):
|
||||||
|
"""Print output with context, indentation and optional timestamps."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
"""Initialize context and indentation."""
|
||||||
self.context = ''
|
self.context = ''
|
||||||
self.level = 0
|
self.level = 0
|
||||||
self.timestamps = False
|
self.timestamps = False
|
||||||
|
|
||||||
def write(self, s, level=0):
|
def write(self, s, level=0):
|
||||||
|
"""Write message with indentation, context and optional timestamp."""
|
||||||
if level > self.level:
|
if level > self.level:
|
||||||
return
|
return
|
||||||
if self.level > 1 or self.timestamps:
|
if self.level > 1 or self.timestamps:
|
||||||
|
@ -19,6 +23,7 @@ class Output(object):
|
||||||
print '%s%s> %s' % (timestamp, self.context, s)
|
print '%s%s> %s' % (timestamp, self.context, s)
|
||||||
|
|
||||||
def writelines(self, lines, level=0):
|
def writelines(self, lines, level=0):
|
||||||
|
"""Write multiple messages."""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
for line in line.rstrip('\n').split('\n'):
|
for line in line.rstrip('\n').split('\n'):
|
||||||
self.write(line.rstrip('\n'), level=level)
|
self.write(line.rstrip('\n'), level=level)
|
||||||
|
|
|
@ -8,6 +8,7 @@ import time
|
||||||
from . import util
|
from . import util
|
||||||
|
|
||||||
class Guess(object):
|
class Guess(object):
|
||||||
|
|
||||||
def __init__(self, weight):
|
def __init__(self, weight):
|
||||||
self.weight = weight
|
self.weight = weight
|
||||||
self.guess = 0
|
self.guess = 0
|
||||||
|
@ -19,6 +20,7 @@ class Guess(object):
|
||||||
def distance(self, value):
|
def distance(self, value):
|
||||||
return (self.guess - value) ** 2
|
return (self.guess - value) ** 2
|
||||||
|
|
||||||
|
|
||||||
class FortuneTeller(object):
|
class FortuneTeller(object):
|
||||||
weights = (0.2, 0.3, 0.4)
|
weights = (0.2, 0.3, 0.4)
|
||||||
|
|
||||||
|
@ -34,7 +36,9 @@ class FortuneTeller(object):
|
||||||
def predict(self):
|
def predict(self):
|
||||||
return max([(guess.best, guess) for guess in self.guesses])[1].guess
|
return max([(guess.best, guess) for guess in self.guesses])[1].guess
|
||||||
|
|
||||||
class OperationComplete(Exception): pass
|
|
||||||
|
class OperationComplete(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
def drawBar(fill, total, caption):
|
def drawBar(fill, total, caption):
|
||||||
screenWidth = util.getWindowSize()
|
screenWidth = util.getWindowSize()
|
||||||
|
@ -49,6 +53,7 @@ def drawBar(fill, total, caption):
|
||||||
sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth))
|
sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
def drawBounceBar(pos, caption):
|
def drawBounceBar(pos, caption):
|
||||||
screenWidth = util.getWindowSize()
|
screenWidth = util.getWindowSize()
|
||||||
mask = '[%%s<=>%%s] %s' % (caption,)
|
mask = '[%%s<=>%%s] %s' % (caption,)
|
||||||
|
@ -61,6 +66,7 @@ def drawBounceBar(pos, caption):
|
||||||
sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth))
|
sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
def progressBar(fn):
|
def progressBar(fn):
|
||||||
completed = bps = 0
|
completed = bps = 0
|
||||||
count = 0
|
count = 0
|
||||||
|
|
|
@ -22,7 +22,8 @@ has_curses = has_module("curses")
|
||||||
has_fcntl = has_module('fcntl')
|
has_fcntl = has_module('fcntl')
|
||||||
has_termios = has_module('termios')
|
has_termios = has_module('termios')
|
||||||
|
|
||||||
class NoMatchError(Exception): pass
|
class NoMatchError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
def getMatchValues(matches):
|
def getMatchValues(matches):
|
||||||
return set([match.group(1) for match in matches])
|
return set([match.group(1) for match in matches])
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -105,6 +105,7 @@ class MyInstallLib (install_lib, object):
|
||||||
return self.get_conf_output()
|
return self.get_conf_output()
|
||||||
|
|
||||||
def get_conf_output (self):
|
def get_conf_output (self):
|
||||||
|
"""Get filename for distribution configuration file."""
|
||||||
return self.distribution.get_conf_filename(self.install_lib)
|
return self.distribution.get_conf_filename(self.install_lib)
|
||||||
|
|
||||||
def get_outputs (self):
|
def get_outputs (self):
|
||||||
|
|
Loading…
Reference in a new issue