From 54eaadf4fceb05da06d873f59b91fb31f86e2ed6 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Tue, 20 Nov 2012 18:53:53 +0100 Subject: [PATCH] Updated documentation and fix some comics. --- Makefile | 4 +- doc/README.txt | 10 -- doc/changelog.txt | 3 +- doc/dosage.1 | 86 +++++++---------- doc/dosage.1.html | 157 +++++++++--------------------- dosage | 20 ++-- dosagelib/comic.py | 6 +- dosagelib/fileutil.py | 3 +- dosagelib/loader.py | 4 +- dosagelib/output.py | 3 +- dosagelib/plugins/a.py | 139 +++++++++----------------- dosagelib/plugins/b.py | 204 ++++++++++++--------------------------- dosagelib/plugins/c.py | 79 ++++++++------- dosagelib/plugins/d.py | 28 +++--- dosagelib/plugins/e.py | 38 +++++--- dosagelib/plugins/f.py | 18 ++-- dosagelib/plugins/g.py | 40 +++++--- dosagelib/plugins/h.py | 10 +- dosagelib/plugins/i.py | 16 ++- dosagelib/plugins/j.py | 6 +- dosagelib/plugins/k.py | 17 +++- dosagelib/plugins/l.py | 16 +-- dosagelib/plugins/m.py | 33 ++++--- dosagelib/plugins/n.py | 26 ++--- dosagelib/plugins/num.py | 1 - dosagelib/plugins/o.py | 10 +- dosagelib/plugins/p.py | 22 ++--- dosagelib/plugins/q.py | 4 +- dosagelib/plugins/r.py | 10 +- dosagelib/plugins/s.py | 65 +++++++++---- dosagelib/plugins/t.py | 18 ++-- dosagelib/plugins/u.py | 14 ++- dosagelib/plugins/uc.py | 19 ++-- dosagelib/plugins/v.py | 4 +- dosagelib/plugins/w.py | 22 ++--- dosagelib/plugins/x.py | 2 +- dosagelib/plugins/y.py | 4 +- dosagelib/plugins/z.py | 2 +- dosagelib/scraper.py | 14 --- dosagelib/util.py | 28 +++--- tests/test_comics.py | 10 +- 41 files changed, 541 insertions(+), 674 deletions(-) diff --git a/Makefile b/Makefile index d39e2d8f5..6ce9ca561 100644 --- a/Makefile +++ b/Makefile @@ -6,13 +6,11 @@ ARCHIVE:=dosage-$(VERSION).tar.gz PY_FILES_DIRS := dosage dosagelib tests *.py PY2APPOPTS ?= NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo) -MAXFAILEDTESTS:=10 # Pytest options: -# - stop after MAXFAILEDTESTS failed errors # - use multiple processors # - write test results in file # - run all tests found in the "tests" subdirectory -PYTESTOPTS:=--maxfail=$(MAXFAILEDTESTS) -n $(NUMPROCESSORS) --resultlog=testresults.txt --tb=short +PYTESTOPTS:=-n $(NUMPROCESSORS) --resultlog=testresults.txt --tb=short CHMODMINUSMINUS:=-- # directory or file with tests to run TESTS ?= tests diff --git a/doc/README.txt b/doc/README.txt index c9c1edc1c..6e48f11c5 100644 --- a/doc/README.txt +++ b/doc/README.txt @@ -38,16 +38,6 @@ strip of all of them: For advanced options and features execute `dosage -h` or look at the dosage manual page. -Offensive comics ------------------ -There are some comics supported by Dosage that may be offensive to readers or -to others that have access to the downloaded images. -SexyLosers is one module that has been discussed. Dosage offers a mechanism -to disable such modules. Modules listed in "/etc/dosage/disabled" and -"~/.dosage/disabled" will be disabled. These files should contain only one -module name per line. Note: Under Windows "~" will also expand to the user's -home directory, usually "C:\Documents and Settings\UserName". - Dependencies ------------- Dosage requires Python version 2.7 or higher, which can be downloaded diff --git a/doc/changelog.txt b/doc/changelog.txt index 95b13616d..5cb01c80a 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -10,11 +10,12 @@ Changes: - installation: Require and use Python 2.7 - comics: Removed the twisted and zope dependencies by adding an internal plugin search mechanism. +- comics: Remove the disable mechanism. - testing: Refactored the test comic routine in proper unit tests. - cmdline: Improved terminal feature detection. Fixes: -- comics: Fix a lot of comics. +- comics: Fix a lot of comics; however there are still some that won't work. - comics: Don't add empty URLs to the list of found URLs. diff --git a/doc/dosage.1 b/doc/dosage.1 index ebb9ba07d..a86fb15b3 100644 --- a/doc/dosage.1 +++ b/doc/dosage.1 @@ -2,30 +2,27 @@ .SH NAME dosage \- comic strip downloader .SH SYNOPSIS -.B dosage -.RI [ options ] -.I module -.RI [ module .\|.\|.] +\fBdosage\fP [\fIoptions\fP] \fImodule\fP... .SH DESCRIPTION .B dosage -is an application designed to keep a local \(oqmirror\(cq of specific +is an application designed to keep a local mirror of specific web comics and other picture\-based content, such as -\(oqPicture Of The Day\(cq sites, with a variety of options +\fIPicture Of The Day\fP sites, with a variety of options for updating and maintaining collections. .SH OPTIONS .TP -.BI \-b " PATH" "\fR,\fP \-\^\-basepath=" PATH -Specifies a base path to put comic subdirectories. The default is \(oqComics\(cq. +\fB\-b\fP \fIPATH\fP, \fB\-\-basepath=\fP\fIPATH\fP +Specifies a base path to put comic subdirectories. The default is \fBComics\fP. .TP -.BI \-\^\-baseurl= PATH +\fB\-\-baseurl=\fP\fIPATH\fP Specifies the base URL for output events. The default is a local file URI. .TP -.BR \-a ", " \-\^\-all +\fB\-a\fP, \fB\-\-all\fP Traverses all available strips backwards from the current one. This can be useful you want a full collection of a new comic strip, or update an existing one where files are missing. . -Catchups can start at a specific image by using the index syntax, see +Catchups can start at a specific strip by using the index syntax, see the .B INDEX SYNTAX and @@ -35,34 +32,32 @@ and want only to download the missing files. To make this task easy, the traversal ends at the first existing image file when starting from an index (excluding the index itself). .TP -.BR \-h ", " \-\^\-help +\fB\-h\fP, \fB\-\-help\fP Output brief help information. .TP -.BR \-l ", " \-\^\-list +\fB\-l\fP, \fB\-\-list\fP List available comic modules in multi\-column fashion. .TP -.BR \-\^\-singlelist +\fB\-\-singlelist\fP List available comic modules in single-column fashion. .TP -.BI \-m " MODULE" "\fR,\fP \-\^\-modulehelp=" MODULE -Output module-specific help for -.IR MODULE . +\fB\-m\fP \fIMODULE\fP, \fB\-\-modulehelp=\fP\fIMODULE\fP +Output module-specific help for \fIMODULE\fP. .TP -.BI \-o " OUTPUT" "\fR,\fP \-\^\-output=" OUTPUT -.I OUTPUT -may be any one of the following: +\fB\-o\fP \fIOUTPUT\fP, \fB\-\-output=\fP\fIOUTPUT\fP +\fIOUTPUT\fP may be any one of the following: .PP .RS .BR "html " \- Writes out an HTML file linking to the strips actually downloaded in the current run, named by date (ala dailystrips). The files can be found in the -\'html' directory of your Comics directory. +\fBhtml\fP directory of your \fBComics\fP directory. .RE .PP .RS .BR "rss " \- Writes out an RSS feed detailing what strips were downloaded in the last 24 -hours. The feed can be found in Comics/dailydose.xml. +hours. The feed can be found in \fBComics/dailydose.xml\fP. .RE .PP .RS @@ -71,13 +66,13 @@ Writes an RSS feed with all of the strips downloaded during the run, for use with your favourite RSS aggregator. .RE .TP -.BR \-t ", " \-\^\-timestamps +\fB\-t\fP, \fB\-\-timestamps\fP Print timestamps for all output at any level. .TP -.BR \-v ", " \-\^\-verbose +\fB\-v\fP, \fB\-\-verbose\fP Increase the output level by one with each occurence. .TP -.BR \-V ", " \-\^\-version +\fB\-V\fP, \fB\-\-version\fP Display the version number. .I module At least one valid @@ -90,32 +85,24 @@ arguments can be specified on the command line. Module names are case insensitive, and it is sufficient to specify a unique substring of the module name. .SH INDEX SYNTAX -One can indicate the start of a list of -.B comma seperated -indices using a -.RB \(oq : "\(cq." +Instead of starting at the latest comic strip, an index lets dosage start +at a certain strip. The index can be specified by appending a colon \fB:\fP +and the index name after the module. Multiple comma-spearated indices can +also be specified. .PP -The index format is documented when using the \fB\-\-modulehelp\fP option. -.SH OFFENSIVE COMICS -Some users may find certain comics offensive and wish to disable them. -Modules listed in -.B /etc/dosage/disabled -and -.B ~/.dosage/disabled -will be disabled. These files should contain only one module name per line. +The index name itself usually is the part of the comic strip URL that identifiess +a strip, eg. a number or a date. The expected format is documented when using +the \fB\-\-modulehelp\fP option. .SH SPECIAL SYNTAX .TP .B @ -This expands to mean all the comics currently in your \(oqComics\(cq +This expands to mean all the comics currently in your \fBComics\fP directory. All other specified comic module names will be ignored. .TP .B @@ This expands to mean all the comics available to Dosage. .PP -.B INDEX SYNTAX -can not be used with -.B SPECIAL SYNTAX -. +\fBINDEX SYNTAX\fP can not be used with \fBSPECIAL SYNTAX\fP. .SH EXAMPLES Retrieve all Mega Tokyo comics: .RS @@ -127,7 +114,7 @@ Retrieve the current comic of Cyanide and Happiness: .B dosage cyanideandhappiness .RE .PP -Retrieve the current strip of all comics in your \(oqComics\(cq directory: +Retrieve the current strip of all comics in your \fBComics\fP directory: .RS .B dosage @ .RE @@ -149,7 +136,7 @@ the beginning until an existing file is found: .SH ENVIRONMENT .IP HTTP_PROXY .B mainline -will use the specified HTTP proxy whenever possible. +will use the specified HTTP proxy when downloading URL contents. .SH NOTES Should retrieval fail on any given strip .B mainline @@ -172,15 +159,8 @@ the program run was aborted with Ctrl-C .PP Else the return value is zero. .SH BUGS -See -.I http://trac.slipgate.za.net/dosage -for a list of current development tasks and suggestions. - -.SH FILES -.IP "\fB/etc/dosage/disabled\fR" -Disables comic modules on a global scale. -.IP "\fB~/.dosage/disabled\fR" -Disables comic modules on a local scale. +Users can report or view bugs, patches or feature suggestions at +.I https://github.com/wummel/dosage/issues .SH AUTHORS Jonathan Jacobs .br diff --git a/doc/dosage.1.html b/doc/dosage.1.html index e677043f8..a2ace63d6 100644 --- a/doc/dosage.1.html +++ b/doc/dosage.1.html @@ -13,42 +13,29 @@ dosage - comic strip downloader  

SYNOPSIS

-dosage - -[options] - -module - -[module...] - +dosage [options] module...  

DESCRIPTION

dosage -is an application designed to keep a local 'mirror' of specific +is an application designed to keep a local mirror of specific web comics and other picture-based content, such as -'Picture Of The Day' sites, with a variety of options +Picture Of The Day sites, with a variety of options for updating and maintaining collections.  

OPTIONS

-
-b PATH, --basepath=PATH - -
-Specifies a base path to put comic subdirectories. The default is 'Comics'. -
--baseurl=PATH - -
+
-b PATH, --basepath=PATH
+Specifies a base path to put comic subdirectories. The default is Comics. +
--baseurl=PATH
Specifies the base URL for output events. The default is a local file URI. -
-a, --all - -
+
-a, --all
Traverses all available strips backwards from the current one. This can be useful you want a full collection of a new comic strip, or update an existing one where files are missing. -Catchups can start at a specific image by using the index syntax, see +Catchups can start at a specific strip by using the index syntax, see the INDEX SYNTAX @@ -59,30 +46,16 @@ sections for more information. This is useful when you missed some days and want only to download the missing files. To make this task easy, the traversal ends at the first existing image file when starting from an index (excluding the index itself). -
-h, --help - -
+
-h, --help
Output brief help information. -
-l, --list - -
+
-l, --list
List available comic modules in multi-column fashion. -
--singlelist - -
+
--singlelist
List available comic modules in single-column fashion. -
-m MODULE, --modulehelp=MODULE - -
-Output module-specific help for -MODULE. - -
-o OUTPUT, --output=OUTPUT - -
-OUTPUT - -may be any one of the following: +
-m MODULE, --modulehelp=MODULE
+Output module-specific help for MODULE. +
-o OUTPUT, --output=OUTPUT
+OUTPUT may be any one of the following:

@@ -91,7 +64,7 @@ may be any one of the following: Writes out an HTML file linking to the strips actually downloaded in the current run, named by date (ala dailystrips). The files can be found in the -'html' directory of your Comics directory. +html directory of your Comics directory.

@@ -100,7 +73,7 @@ current run, named by date (ala dailystrips). The files can be found in the rss - Writes out an RSS feed detailing what strips were downloaded in the last 24 -hours. The feed can be found in Comics/dailydose.xml. +hours. The feed can be found in Comics/dailydose.xml.

@@ -113,17 +86,11 @@ with your favourite RSS aggregator.

-
-t, --timestamps - -
+
-t, --timestamps
Print timestamps for all output at any level. -
-v, --verbose - -
+
-v, --verbose
Increase the output level by one with each occurence. -
-V, --version - -
+
-V, --version
Display the version number. module @@ -143,34 +110,23 @@ unique substring of the module name.  

INDEX SYNTAX

-One can indicate the start of a list of -comma seperated - -indices using a -':'. - +Instead of starting at the latest comic strip, an index lets dosage start +at a certain strip. The index can be specified by appending a colon : +and the index name after the module. Multiple comma-spearated indices can +also be specified.

-The index format is documented when using the --modulehelp option. +The index name itself usually is the part of the comic strip URL that identifiess +a strip, eg. a number or a date. The expected format is documented when using +the --modulehelp option.   -

OFFENSIVE COMICS

- -Some users may find certain comics offensive and wish to disable them. -Modules listed in -/etc/dosage/disabled - -and -~/.dosage/disabled - -will be disabled. These files should contain only one module name per line. - 

SPECIAL SYNTAX

@
-This expands to mean all the comics currently in your 'Comics' +This expands to mean all the comics currently in your Comics directory. All other specified comic module names will be ignored.
@@ @@ -179,12 +135,8 @@ This expands to mean all the comics available to Dosage.

-INDEX SYNTAX - -can not be used with -SPECIAL SYNTAX - -  +INDEX SYNTAX can not be used with SPECIAL SYNTAX. + 

EXAMPLES

Retrieve all Mega Tokyo comics: @@ -203,7 +155,7 @@ Retrieve the current comic of Cyanide and Happiness:

-Retrieve the current strip of all comics in your 'Comics' directory: +Retrieve the current strip of all comics in your Comics directory:

dosage @ @@ -232,16 +184,16 @@ the beginning until an existing file is found:
-  + 

ENVIRONMENT

HTTP_PROXY
mainline -will use the specified HTTP proxy whenever possible. +will use the specified HTTP proxy when downloading URL contents.
-  + 

NOTES

Should retrieval fail on any given strip @@ -258,7 +210,7 @@ At the time of writing, a complete Dosage collection weighs in at around 3.0GB. -  + 

RETURN VALUE

The return value greater than zero when @@ -273,24 +225,13 @@ the program run was aborted with Ctrl-C

Else the return value is zero. -  + 

BUGS

-See -http://trac.slipgate.za.net/dosage +Users can report or view bugs, patches or feature suggestions at +https://github.com/wummel/dosage/issues -for a list of current development tasks and suggestions. -

-  -

FILES

- -
-
/etc/dosage/disabled
-Disables comic modules on a global scale. -
~/.dosage/disabled
-Disables comic modules on a local scale. -
-  + 

AUTHORS

Jonathan Jacobs <korpse@slipgate.za.net> @@ -300,7 +241,7 @@ Tristan Seligmann <mithrandi@slipg
Bastian Kleineidam <
calvin@users.sourceforge.net> -  + 

COPYRIGHT

Copyright © 2004-2005 Tristan Seligmann and Jonathan Jacobs @@ -317,16 +258,14 @@ Copyright © 2012 Bastian Kleineidam
DESCRIPTION
OPTIONS
INDEX SYNTAX
-
OFFENSIVE COMICS
-
SPECIAL SYNTAX
-
EXAMPLES
-
ENVIRONMENT
-
NOTES
-
RETURN VALUE
-
BUGS
-
FILES
-
AUTHORS
-
COPYRIGHT
+
SPECIAL SYNTAX
+
EXAMPLES
+
ENVIRONMENT
+
NOTES
+
RETURN VALUE
+
BUGS
+
AUTHORS
+
COPYRIGHT

This document was created by diff --git a/dosage b/dosage index a2805b3a0..5cbaf8219 100755 --- a/dosage +++ b/dosage @@ -17,6 +17,7 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +from __future__ import print_function import sys import os import optparse @@ -24,7 +25,7 @@ import optparse from dosagelib import events, scraper from dosagelib.output import out from dosagelib.util import get_columns, internal_error -from dosagelib.configuration import App, Freeware, Copyright +from dosagelib.configuration import App, Freeware, Copyright, SupportUrl def setupOptions(): """Construct option parser. @@ -48,9 +49,10 @@ def setupOptions(): def displayVersion(): """Display application name, version, copyright and license.""" - print App - print Copyright - print Freeware + print(App) + print(Copyright) + print(Freeware) + print("For support see", SupportUrl) return 0 @@ -70,7 +72,7 @@ def saveComicStrip(strip, basepath): filename, saved = image.save(basepath) if saved: allskipped = False - except IOError, msg: + except IOError as msg: out.write('Error saving %s: %s' % (image.filename, msg)) errors += 1 return errors, allskipped @@ -123,7 +125,7 @@ def run(options, comics): if options.modhelp: return displayHelp(comics, options.basepath) return getComics(options, comics) - except ValueError, msg: + except ValueError as msg: out.write("Error: %s" % msg) return 1 @@ -143,7 +145,7 @@ def doList(columnList): def doSingleList(scrapers): """Get list of scraper names, one per line.""" for num, scraperobj in enumerate(scrapers): - print scraperobj.get_name() + print(scraperobj.get_name()) return num @@ -155,7 +157,7 @@ def doColumnList(scrapers): maxlen = max([len(name) for name in names]) namesPerLine = int(screenWidth / (maxlen + 1)) while names: - print ''.join([name.ljust(maxlen) for name in names[:namesPerLine]]) + print(''.join([name.ljust(maxlen) for name in names[:namesPerLine]])) del names[:namesPerLine] return num @@ -192,7 +194,7 @@ def main(): options, args = parser.parse_args() res = run(options, args) except KeyboardInterrupt: - print "Aborted." + print("Aborted.") res = 1 except Exception: internal_error() diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 563fb705e..82e3666e2 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -53,12 +53,12 @@ class ComicImage(object): """Connect to host and get meta information.""" try: self.urlobj = urlopen(self.url, referrer=self.referrer) - except urllib2.HTTPError, he: - raise FetchComicError, ('Unable to retrieve URL.', self.url, he.code) + except urllib2.HTTPError as he: + raise FetchComicError('Unable to retrieve URL.', self.url, he.code) if self.urlobj.info().getmaintype() != 'image' and \ self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'): - raise FetchComicError, ('No suitable image found to retrieve.', self.url) + raise FetchComicError('No suitable image found to retrieve.', self.url) # Always use mime type for file extension if it is sane. if self.urlobj.info().getmaintype() == 'image': diff --git a/dosagelib/fileutil.py b/dosagelib/fileutil.py index 121dd67b7..da98a133f 100644 --- a/dosagelib/fileutil.py +++ b/dosagelib/fileutil.py @@ -3,6 +3,7 @@ """ File and path utilities. """ +import importlib def has_module (name): """Test if given module can be imported. @@ -10,7 +11,7 @@ def has_module (name): @rtype: bool """ try: - exec "import %s as _bla" % name + importlib.import_module(name) return True except (OSError, ImportError): # some modules (for example HTMLtidy) raise OSError diff --git a/dosagelib/loader.py b/dosagelib/loader.py index bedfaac77..22fb07935 100644 --- a/dosagelib/loader.py +++ b/dosagelib/loader.py @@ -18,12 +18,12 @@ def get_modules(folder='plugins'): try: name ="..%s.%s" % (folder, modname) yield importlib.import_module(name, __name__) - except StandardError, msg: + except ImportError as msg: print "ERROR: could not load module %s: %s" % (modname, msg) def get_importable_modules(folder): - """Find all module files in the given folder that end witn '.py' and + """Find all module files in the given folder that end with '.py' and don't start with an underscore. @return module names @rtype: iterator of string diff --git a/dosagelib/output.py b/dosagelib/output.py index 79d6c31ed..f875c24b4 100644 --- a/dosagelib/output.py +++ b/dosagelib/output.py @@ -1,6 +1,7 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012 Bastian Kleineidam +from __future__ import print_function import time class Output(object): @@ -20,7 +21,7 @@ class Output(object): timestamp = time.strftime('%H:%M:%S ') else: timestamp = '' - print '%s%s> %s' % (timestamp, self.context, s) + print('%s%s> %s' % (timestamp, self.context, s)) def writelines(self, lines, level=0): """Write multiple messages.""" diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 96bc727df..ba5358474 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -3,12 +3,12 @@ from re import compile, MULTILINE from ..util import tagre from ..scraper import _BasicScraper -from ..helpers import regexNamer, bounceStarter, indirectStarter +from ..helpers import regexNamer, bounceStarter class ALessonIsLearned(_BasicScraper): latestUrl = 'http://www.alessonislearned.com/' - stripUrl = 'http://www.alessonislearned.com/lesson%s.html' + stripUrl = latestUrl + 'index.php?comic=%s' imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") help = 'Index format: nnn' @@ -16,7 +16,7 @@ class ALessonIsLearned(_BasicScraper): class ASofterWorld(_BasicScraper): latestUrl = 'http://www.asofterworld.com/' - stripUrl = 'http://www.asofterworld.com/index.php?id=%s' + stripUrl = latestUrl + 'index.php?id=%s' imageSearch = compile(tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)')) prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back') help = 'Index format: n (unpadded)' @@ -24,15 +24,15 @@ class ASofterWorld(_BasicScraper): class AbleAndBaker(_BasicScraper): latestUrl = 'http://www.jimburgessdesign.com/comics/index.php' - stripUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s' + stripUrl = latestUrl + '?comic=%s' imageSearch = compile(tagre('img', 'src', r'(comics/.+)')) prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif') help = 'Index format: nnn' class AbominableCharlesChristopher(_BasicScraper): - latestUrl = 'http://abominable.cc/' - stripUrl = 'http://abominable.cc/%s' + latestUrl = 'http://www.abominable.cc/' + stripUrl = latestUrl + '%s' imageSearch = compile(tagre("img", "src", r'(http://www\.abominable\.cc/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"[^<]+Previous") help = 'Index format: yyyy/mm/dd/comicname' @@ -49,7 +49,7 @@ class AbsurdNotions(_BasicScraper): class AbstruseGoose(_BasicScraper): starter = bounceStarter('http://abstrusegoose.com/', compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »")) - stripUrl = 'http://abstrusegoose.com/c%s.html' + stripUrl = 'http://abstrusegoose.com/%s' imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'« Previous') help = 'Index format: n (unpadded)' @@ -62,57 +62,37 @@ class AbstruseGoose(_BasicScraper): class AcademyVale(_BasicScraper): - latestUrl = 'http://imagerie.com/vale/' - stripUrl = 'http://imagerie.com/vale/avarch.cgi?%s' + latestUrl = 'http://www.imagerie.com/vale/' + stripUrl = latestUrl + 'avarch.cgi?%s' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) - prevSearch = compile(tagre('a', 'href', r'(avarch[^"]+)') + tagre('img', 'src', 'AVNavBack\.gif')) + prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' class Alice(_BasicScraper): latestUrl = 'http://alice.alicecomics.com/' - stripUrl = 'http://alice.alicecomics.com/wp-content/webcomic/alicecomics/%s.jpg' + stripUrl = latestUrl + '%s/' imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/webcomic/alicecomics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://alice.alicecomics.com/archive/[^"]+)', after="previous")) - help = 'Index format: yyyy-mm-dd' + prevSearch = compile(tagre("a", "href", r'(http://alice\.alicecomics\.com/archive/[^"]+)', after="previous")) + help = 'Index format: name' class AlienLovesPredator(_BasicScraper): - stripUrl = 'http://alienlovespredator.com/%s' - imageSearch = compile(r']+>(
\n|\n|
\n)
 
', MULTILINE) - prevSearch = compile(r'LATEST')) - + latestUrl = 'http://alienlovespredator.com/' + stripUrl = latestUrl + '%s' + imageSearch = compile(tagre("img", "src", r'(http://alienlovespredator\.com/strips/strip_\d\.jpg)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) + help = 'Index format: yyyy/mm/dd/name/' class Altermeta(_BasicScraper): latestUrl = 'http://altermeta.net/' - stripUrl = 'http://altermeta.net/archive.php?comic=%s&view=showfiller' + stripUrl = latestUrl + 'archive.php?comic=%s' imageSearch = compile(r'') prevSearch = compile(r'Back') - class Angels2200(_BasicScraper): latestUrl = 'http://www.janahoffmann.com/angels/' stripUrl = latestUrl + '%s' - imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)")) + imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^'\"]+)")) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' - class AppleGeeks(_BasicScraper): latestUrl = 'http://www.applegeeks.com/' - stripUrl = 'http://www.applegeeks.com/comics/viewcomic.php?issue=%s' + stripUrl = latestUrl + 'comics/viewcomic.php?issue=%s' imageSearch = compile(tagre("img", "src", r'"(strips/\d+?\..+?)"')) prevSearch = compile(r'
Previous Comic
\s*

', MULTILINE) help = 'Index format: n (unpadded)' @@ -140,14 +118,13 @@ class AppleGeeks(_BasicScraper): class Achewood(_BasicScraper): latestUrl = 'http://www.achewood.com/' - stripUrl = 'http://www.achewood.com/index.php?date=%s' + stripUrl = latestUrl + 'index.php?date=%s' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' namer = regexNamer(compile(r'date%3D(\d{8})')) - class AstronomyPOTD(_BasicScraper): starter = bounceStarter( 'http://antwrp.gsfc.nasa.gov/apod/astropix.html', @@ -163,7 +140,6 @@ class AstronomyPOTD(_BasicScraper): imageUrl.split('/')[-1].split('.')[0]) - class AfterStrife(_BasicScraper): latestUrl = 'http://afterstrife.com/?p=262' stripUrl = 'http://afterstrife.com/?p=%s' @@ -172,29 +148,26 @@ class AfterStrife(_BasicScraper): help = 'Index format: nnn' - class ALLCAPS(_BasicScraper): latestUrl = 'http://www.allcapscomix.com/' - stripUrl = 'http://www.allcapscomix.com/%s' + stripUrl = latestUrl + '%s' imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") help = 'Index format: yyyy/mm/strip-name' - class ASkeweredParadise(_BasicScraper): latestUrl = 'http://aspcomics.net/' - stripUrl = 'http://aspcomics.net/archindex.php?strip_id=%s' + stripUrl = latestUrl + 'comic/%s' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") help = 'Index format: nnn' - class AGirlAndHerFed(_BasicScraper): starter = bounceStarter('http://www.agirlandherfed.com/', compile(r'[^>]+Back')) - stripUrl = 'http://www.agirlandherfed.com/img/strip/%s' + stripUrl = 'http://www.agirlandherfed.com/1.%s.html' imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) prevSearch = compile(r'[^>]+Back') help = 'Index format: nnn' @@ -204,88 +177,70 @@ class AGirlAndHerFed(_BasicScraper): return pageUrl.split('?')[-1] - class AetheriaEpics(_BasicScraper): latestUrl = 'http://aetheria-epics.schala.net/' - stripUrl = 'http://aetheria-epics.schala.net/%s.html' - imageSearch = compile(r'') + stripUrl = latestUrl + '%s.html' + imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) + prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") help = 'Index format: nnn' - -class Adrift(_BasicScraper): - latestUrl = 'http://www.adriftcomic.com/' - stripUrl = 'http://www.adriftcomic.com/page%s.html' - imageSearch = compile(r'') - prevSearch = compile(r'« Previous') - help = 'Index format: nnn' - + stripUrl = latestUrl + 'wordpress/%s' + imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) + help = 'Index format: yyyy/mm/dd/name/' class AlienShores(_BasicScraper): latestUrl = 'http://alienshores.com/alienshores_band/' - stripUrl = 'http://alienshores.com/alienshores_band/?p=%s' - imageSearch = compile(r'>') - help = 'Index format: nnn' - + stripUrl = latestUrl + '%s' + imageSearch = compile(tagre("img", "src", r'(http://alienshores\.com/alienshores_band/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://alienshores\.com/[^"]+)', after="prev")) + help = 'Index format: yyyy/mm/dd/p/' class AllTheGrowingThings(_BasicScraper): - latestUrl = 'http://typodmary.com/growingthings/' - stripUrl = 'http://typodmary.com/growingthings/%s/' - imageSearch = compile(r'') prevSearch = compile(r'« Previous') help = 'Index format: yyyy/mm/dd/strip-name' - class AlsoBagels(_BasicScraper): - latestUrl = 'http://www.alsobagels.com/' - stripUrl = 'http://alsobagels.com/index.php/comic/%s/' - imageSearch = compile(r'') + latestUrl = 'http://alsobagels.com/' + stripUrl = latestUrl + 'index.php/comic/%s/' + imageSearch = compile(tagre("img", "src", r'(http://alsobagels\.com/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://alsobagels\.com/index\.php/comic/[^"]+)', after="Previous")) help = 'Index format: strip-name' - class Annyseed(_BasicScraper): latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' stripUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 797d03de7..2e778f38e 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -8,57 +8,47 @@ from ..scraper import _BasicScraper class BadlyDrawnKitties(_BasicScraper): latestUrl = 'http://www.badlydrawnkitties.com/' - stripUrl = 'http://www.badlydrawnkitties.com/new/%s.html' - imageSearch = compile(r'') + stripUrl = latestUrl + '%s.html' + imageSearch = compile(tagre("img", "src", r'(/new/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(/[^"]+)') + tagre("img", "src", r'/images/previous\.gif')) - help = 'Index format: n (unpadded)' + help = 'Index format: n/nn (unpadded)' class Bardsworth(_BasicScraper): latestUrl = 'http://www.bardsworth.com/' - stripUrl = 'http://www.bardsworth.com/archive.php?p=s%' - imageSearch = compile(r'(strips/.+?)"') - prevSearch = compile(r'"(http.+?)".+?/prev') + stripUrl = latestUrl + '?p=%s' + imageSearch = compile(tagre("img", "src", r'(http://www\.bardsworth\.com/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://www\.bardsworth\.com/[^"]+)', after="prev")) help = 'Index format: nnn' class BetterDays(_BasicScraper): - latestUrl = 'http://www.jaynaylor.com/betterdays/' - stripUrl = 'http://www.jaynaylor.com/betterdays/archives/%s' - imageSearch = compile(r'') - prevSearch = compile(r'« Previous') - help = 'Index format: yyyy/mm/.html' - - -class BetterYouThanMe(_BasicScraper): - latestUrl = 'http://betteryouthanme.net/' - stripUrl = 'http://betteryouthanme.net/archive.php?date=%s.gif' - imageSearch = compile(r'"(comics/.+?)"') - prevSearch = compile(r'"(archive.php\?date=.+?)">.+?previous') - help = 'Index format: yyyymmdd' + latestUrl = 'http://jaynaylor.com/betterdays/' + stripUrl = latestUrl + 'archives/%s.html' + imageSearch = compile(tagre("img", "src", r'(/betterdays/comic/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)') + '« Previous') + help = 'Index format: yyyy/mm/' class BiggerThanCheeses(_BasicScraper): - latestUrl = 'http://www.biggercheese.com' - stripUrl = 'http://www.biggercheese.com/index.php?comic=%s' + latestUrl = 'http://www.biggercheese.com/' + stripUrl = latestUrl + 'index.php?comic=%s' imageSearch = compile(r'src="(comics/.+?)" alt') prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back') help = 'Index format: n (unpadded)' - class BizarreUprising(_BasicScraper): latestUrl = 'http://www.bizarreuprising.com/' - stripUrl = 'http://www.bizarreuprising.com/view/%s' - imageSearch = compile(r'Next page") help = 'Index format: n (unpadded)' - class ButternutSquash(_BasicScraper): latestUrl = 'http://www.butternutsquash.net/' - stripUrl = 'http://www.butternutsquash.net/%s' + stripUrl = latestUrl + '%s' imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/strip-name-author-name' - -def blankLabel(name, baseUrl): - return type('BlankLabel_%s' % name, - (_BasicScraper,), - dict( - name='BlankLabel/' + name, - latestUrl=baseUrl, - stripUrl=baseUrl+'d/%s.html', - imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')), - prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif"), - #prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'), - help='Index format: yyyymmdd') - ) - - -checkerboard = blankLabel('CheckerboardNightmare', 'http://www.checkerboardnightmare.com/') -courtingDisaster = blankLabel('CourtingDisaster', 'http://www.courting-disaster.com/') -evilInc = blankLabel('EvilInc', 'http://www.evil-comic.com/') -greystoneInn = blankLabel('GreystoneInn', 'http://www.greystoneinn.net/') -itsWalky = blankLabel('ItsWalky', 'http://www.itswalky.com/') -# one strip name starts with %20 -#krazyLarry = blankLabel('KrazyLarry', 'http://www.krazylarry.com/') -melonpool = blankLabel('Melonpool', 'http://www.melonpool.com/') -# strip names = index.php -#realLife = blankLabel('RealLife', 'http://www.reallifecomics.com/') -schlockMercenary = blankLabel('SchlockMercenary', 'http://www.schlockmercenary.com/') -# hosted on ComicsDotCom -#sheldon = blankLabel('Sheldon', 'http://www.sheldoncomics.com/') -shortpacked = blankLabel('Shortpacked', 'http://www.shortpacked.com/') -starslipCrisis = blankLabel('StarslipCrisis', 'http://www.starslipcrisis.com/') -uglyHill = blankLabel('UglyHill', 'http://www.uglyhill.com/') - - - -class BeePower(_BasicScraper): - latestUrl = 'http://comicswithoutviolence.com/d/20080713.html' - stripUrl = 'http://comicswithoutviolence.com/d/%s.html' - imageSearch = compile(r'src="(/comics/.+?)"') - prevSearch = compile(r'(\d+\.html)">]+?src="/images/previous_day.png"') - help = 'Index format: yyyy/mm/dd' - - - class BlankIt(_BasicScraper): latestUrl = 'http://blankitcomics.com/' - stripUrl = 'http://blankitcomics.com/%s' - imageSearch = compile(r'') + stripUrl = latestUrl + '%s' + imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) help = 'Index format: yyyy/mm/dd/name' - class BobWhite(_BasicScraper): latestUrl = 'http://www.bobwhitecomics.com/' - stripUrl = 'http://www.bobwhitecomics.com/?webcomic_post=%s' + stripUrl = latestUrl + '?webcomic_post=%s' imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)")) prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous') help = 'Index format: yyyymmdd' - class BigFatWhale(_BasicScraper): latestUrl = 'http://www.bigfatwhale.com/' - stripUrl = 'http://www.bigfatwhale.com/archives/bfw_%s.htm' - imageSearch = compile(r']+?>Previous') - help = 'Index format: (sometimes chapternumber/)-yyyy-mm-dd/stripname' - - - class BrightlyWound(_BasicScraper): latestUrl = 'http://www.brightlywound.com/' - stripUrl = 'http://www.brightlywound.com/?comic=%s' - imageSearch = compile(r'') + latestUrl = 'http://robhamm.com/bluecrashkit/' + stripUrl = latestUrl + 'comics/blue-crash-kit/%s' + imageSearch = compile(tagre("img", "src", r'(http://robhamm\.com/bluecrashkit/sites/default/files/comics/[^"]+)')) + prevSearch = compile(r'