Initial commit to Github.
This commit is contained in:
commit
f91fb80a39
55 changed files with 10801 additions and 0 deletions
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
*.orig
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
/Comics
|
||||||
|
/build
|
||||||
|
/dist
|
||||||
|
/.achievements
|
||||||
|
/MANIFEST
|
||||||
|
/todo
|
||||||
|
/Changelog.patool*
|
||||||
|
/_Dosage_configdata.py
|
||||||
|
/comics.test
|
20
COPYING
Normal file
20
COPYING
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
Copyright © 2004-2008 Jonathan Jacobs and Tristan Seligmann
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
4
MANIFEST.in
Normal file
4
MANIFEST.in
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
include MANIFEST.in
|
||||||
|
include COPYING doc/*.txt
|
||||||
|
include Makefile
|
||||||
|
recursive-include tests *.py
|
88
Makefile
Normal file
88
Makefile
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
# This Makefile is only used by developers.
|
||||||
|
PYVER:=2.7
|
||||||
|
PYTHON:=python$(PYVER)
|
||||||
|
VERSION:=$(shell $(PYTHON) setup.py --version)
|
||||||
|
ARCHIVE:=dosage-$(VERSION).tar.gz
|
||||||
|
PY_FILES_DIRS := dosage dosagelib tests *.py
|
||||||
|
PY2APPOPTS ?=
|
||||||
|
NOSETESTS:=$(shell which nosetests)
|
||||||
|
NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo)
|
||||||
|
CHMODMINUSMINUS:=--
|
||||||
|
# which test modules to run
|
||||||
|
TESTS ?= tests/
|
||||||
|
# set test options, eg. to "--nologcapture"
|
||||||
|
TESTOPTS=
|
||||||
|
|
||||||
|
all:
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: chmod
|
||||||
|
chmod:
|
||||||
|
-chmod -R a+rX,u+w,go-w $(CHMODMINUSMINUS) *
|
||||||
|
find . -type d -exec chmod 755 {} \;
|
||||||
|
|
||||||
|
.PHONY: dist
|
||||||
|
dist:
|
||||||
|
git archive --format=tar --prefix=dosage-$(VERSION)/ HEAD | gzip -9 > ../$(ARCHIVE)
|
||||||
|
[ -f ../$(ARCHIVE).sha1 ] || sha1sum ../$(ARCHIVE) > ../$(ARCHIVE).sha1
|
||||||
|
[ -f ../$(ARCHIVE).asc ] || gpg --detach-sign --armor ../$(ARCHIVE)
|
||||||
|
|
||||||
|
doc/dosage.1.html: doc/dosage.1
|
||||||
|
man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@
|
||||||
|
|
||||||
|
.PHONY: release
|
||||||
|
release: distclean releasecheck dist
|
||||||
|
git tag v$(VERSION)
|
||||||
|
# @echo "Register at Python Package Index..."
|
||||||
|
# $(PYTHON) setup.py register
|
||||||
|
# freecode-submit < dosage.freecode
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: releasecheck
|
||||||
|
releasecheck: check test
|
||||||
|
@if egrep -i "xx\.|xxxx|\.xx" doc/changelog.txt > /dev/null; then \
|
||||||
|
echo "Could not release: edit doc/changelog.txt release date"; false; \
|
||||||
|
fi
|
||||||
|
# @if ! grep "Version: $(VERSION)" dosage.freecode > /dev/null; then \
|
||||||
|
# echo "Could not release: edit dosage.freecode version"; false; \
|
||||||
|
# fi
|
||||||
|
|
||||||
|
# The check programs used here are mostly local scripts on my private system.
|
||||||
|
# So for other developers there is no need to execute this target.
|
||||||
|
.PHONY: check
|
||||||
|
check:
|
||||||
|
[ ! -d .svn ] || check-nosvneolstyle -v
|
||||||
|
check-copyright
|
||||||
|
check-pofiles -v
|
||||||
|
py-tabdaddy
|
||||||
|
py-unittest2-compat tests/
|
||||||
|
|
||||||
|
.PHONY: pyflakes
|
||||||
|
pyflakes:
|
||||||
|
pyflakes $(PY_FILES_DIRS)
|
||||||
|
|
||||||
|
.PHONY: count
|
||||||
|
count:
|
||||||
|
@sloccount dosage dosagelib | grep "Total Physical Source Lines of Code"
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
find . -name \*.pyc -delete
|
||||||
|
find . -name \*.pyo -delete
|
||||||
|
rm -rf build dist
|
||||||
|
|
||||||
|
PHONY: distclean
|
||||||
|
distclean: clean
|
||||||
|
rm -rf build dist Dosage.egg-info
|
||||||
|
rm -f _Dosage_configdata.py MANIFEST
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test:
|
||||||
|
$(PYTHON) $(NOSETESTS) -v --processes=$(NUMPROCESSORS) -m "^test_.*" $(TESTOPTS) $(TESTS)
|
||||||
|
|
||||||
|
.PHONY: deb
|
||||||
|
deb:
|
||||||
|
git-buildpackage --git-export-dir=../build-area/ --git-upstream-branch=master --git-debian-branch=debian --git-ignore-new
|
||||||
|
|
||||||
|
comics:
|
||||||
|
./dosage -v @@ > comics.log 2>&1
|
1
README.md
Symbolic link
1
README.md
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
doc/README.txt
|
93
doc/README.txt
Normal file
93
doc/README.txt
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
Dosage
|
||||||
|
=======
|
||||||
|
|
||||||
|
Dosage is a powerful webcomic downloader and archiver.
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
-------------
|
||||||
|
Dosage is designed to keep a local copy of specific webcomics
|
||||||
|
and other picture-based content such as Picture of the Day sites.
|
||||||
|
With the dosage commandline script you can get the latest strip of
|
||||||
|
webcomic, or catch-up to the last strip downloaded, or download a
|
||||||
|
strip for a particular date/index (except if the webcomic's site layout
|
||||||
|
makes this impossible).
|
||||||
|
|
||||||
|
Notice
|
||||||
|
-------
|
||||||
|
This software is in no way intended to publically "broadcast" comic strips,
|
||||||
|
it is purely for personal use. Please be aware that by making these strips
|
||||||
|
publically available (without the explicit permission of the author) you
|
||||||
|
may be infringing upon various copyrights.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
------
|
||||||
|
List available comics (over 4000 at the moment):
|
||||||
|
`$ dosage -l`
|
||||||
|
|
||||||
|
Get the latest comic of for example CalvinAndHobbes and save it in the "Comics"
|
||||||
|
directory:
|
||||||
|
`$ dosage CalvinAndHobbes`
|
||||||
|
|
||||||
|
If you already have downloaded several comics and want to get the latest
|
||||||
|
strip of all of them:
|
||||||
|
`$ dosage @`
|
||||||
|
|
||||||
|
For advanced options and features execute dosage -h or look at the dosage
|
||||||
|
manual page.
|
||||||
|
|
||||||
|
Offensive comics
|
||||||
|
-----------------
|
||||||
|
There are some comics supported by Dosage that may be offensive to readers or
|
||||||
|
to others that have access to the downloaded images.
|
||||||
|
SexyLosers is one module that has been discussed. Dosage offers a mechanism
|
||||||
|
to disable such modules. Modules listed in "/etc/dosage/disabled" and
|
||||||
|
"~/.dosage/disabled" will be disabled. These files should contain only one
|
||||||
|
module name per line. Note: Under Windows "~" will also expand to the user's
|
||||||
|
home directory, usually "C:\Documents and Settings\UserName".
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
-------------
|
||||||
|
Dosage is requires Python version 2.5 or higher, which can be downloaded
|
||||||
|
from http://www.python.org.
|
||||||
|
No external Python modules are required - only the Python Standard Library
|
||||||
|
that gets installed with Python.
|
||||||
|
|
||||||
|
Installation
|
||||||
|
-------------
|
||||||
|
You can invoke Dosage directly from the source code as "./dosage". Alternatively,
|
||||||
|
you can install Dosage using python distutils by invoking setup.py in
|
||||||
|
the root of the distribution. For example:
|
||||||
|
|
||||||
|
`python setup.py install`
|
||||||
|
|
||||||
|
or if you do not have root permissions:
|
||||||
|
|
||||||
|
`python setup.py install --home=$HOME`
|
||||||
|
|
||||||
|
Technical Description
|
||||||
|
----------------------
|
||||||
|
Dosage is written entirely in Python and relies on regular expressions to
|
||||||
|
do most of the grunt work.
|
||||||
|
|
||||||
|
For each webcomic Dosage has a plugin module, found in the "plugins"
|
||||||
|
subdirectory of the dosagelib directory. Each module is a subclass of
|
||||||
|
the _BasicComic class and specifies where to download its comic images.
|
||||||
|
Some comic syndicates (ucomics for example) have a standard layout for all
|
||||||
|
comics. For such cases there are general base classes derived from _BasicComic
|
||||||
|
which help define the plugins for all comics of this syndicate.
|
||||||
|
|
||||||
|
Extending Dosage
|
||||||
|
-----------------
|
||||||
|
In order to add a new webcoming, a new module class has to be created in one of the
|
||||||
|
*.py files in the dosagelib/plugins subdirectory. Look at the existing
|
||||||
|
module classes for examples.
|
||||||
|
|
||||||
|
Reporting Bugs
|
||||||
|
---------------
|
||||||
|
You can report bugs, patches or requests at the Github issue tracker at
|
||||||
|
https://github.com/wummel/dosage/issues
|
||||||
|
|
||||||
|
Dosage currently supports a large number of comics and that number grows on
|
||||||
|
a regular basis. If you feel that there are comics that Dosage does not
|
||||||
|
currently support but should support, please feel free to request them.
|
||||||
|
|
647
doc/changelog.txt
Normal file
647
doc/changelog.txt
Normal file
|
@ -0,0 +1,647 @@
|
||||||
|
Dosage 1.7 (released xx.xx.2012)
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- cmdline: Added proper return codes for error conditions.
|
||||||
|
- comics: Added more robust regular expressions for HTML tags.
|
||||||
|
They match case insensitive and ignore whitespaces now.
|
||||||
|
|
||||||
|
Changes:
|
||||||
|
- installation: Added support for dynamic configuration values.
|
||||||
|
- comics: Removed the twisted and zope dependencies by adding
|
||||||
|
an internal plugin search mechanism.
|
||||||
|
- testing: Refactored the test comic routine in proper unit tests.
|
||||||
|
|
||||||
|
Fixes:
|
||||||
|
- comics: Adjusted Xkcd href values.
|
||||||
|
- comics: Don't add empty URLs to the list of found URLs.
|
||||||
|
|
||||||
|
|
||||||
|
Dosage v.1.6.0:
|
||||||
|
* The "Not Dead Yet" release.
|
||||||
|
* Added / Fixed / etc. comics:
|
||||||
|
- Too many to list, really.
|
||||||
|
* New dependencies:
|
||||||
|
- Twisted
|
||||||
|
- zope.interface (not zope)
|
||||||
|
* Revamped plugin system, the first step on the road to Twisted.
|
||||||
|
|
||||||
|
Dosage v.1.5.8:
|
||||||
|
* Added comics:
|
||||||
|
- BonoboConspiracy
|
||||||
|
- ChasingTheSunset
|
||||||
|
- Comedity
|
||||||
|
- GoneWithTheBlastwave
|
||||||
|
- KeenSpot/* -- a *LOT* of KeenSpot submodules
|
||||||
|
- NichtLustig
|
||||||
|
- OtenbaFiles
|
||||||
|
- Wulffmorgenthaler
|
||||||
|
- Y
|
||||||
|
* Fixed comics:
|
||||||
|
- AbstractGender
|
||||||
|
- AlienLovesPredator
|
||||||
|
- AppleGeeks
|
||||||
|
- EarthsongSaga
|
||||||
|
- NewWorld
|
||||||
|
- WhiteNinja
|
||||||
|
* Moved comics:
|
||||||
|
- KeenSpot/CatLegend (previously CatLegend)
|
||||||
|
- All KeenSpot/* comic subnames no longer have "The" prefixes.
|
||||||
|
- UClick (replaces UComics and UComicsEspanol)
|
||||||
|
* Removed comics:
|
||||||
|
- KeenSpot/TheDevilsPanties (duplicate of KeenSpot/DevilsPanties)
|
||||||
|
|
||||||
|
Dosage v.1.5.7:
|
||||||
|
* Important SmackJeeves module fix. Catchup used to loop around from the
|
||||||
|
first strip to the last one, thus potentially hammering the SmackJeeves
|
||||||
|
servers with floods of requests from neverending catchups.
|
||||||
|
* Added comics:
|
||||||
|
- AbleAndBaker
|
||||||
|
- AcademyVale
|
||||||
|
- Aikida
|
||||||
|
- Angels2200
|
||||||
|
- BetterDays
|
||||||
|
- BlankLabel (virtual module)
|
||||||
|
- BoredAndEvil
|
||||||
|
- Catharsis
|
||||||
|
- ChuckAndElmo
|
||||||
|
- CloneManga/PennyTribute
|
||||||
|
- CourtingDisaster
|
||||||
|
- DeathToTheExtremist
|
||||||
|
- DogComplex
|
||||||
|
- DownToEarth
|
||||||
|
- Dracula
|
||||||
|
- DragonTails
|
||||||
|
- DrFun
|
||||||
|
- DungeonCrawlInc
|
||||||
|
- ExtraLife
|
||||||
|
- FalconTwin
|
||||||
|
- FightCastOrEvade
|
||||||
|
- Flipside
|
||||||
|
- Housd
|
||||||
|
- JerkCity
|
||||||
|
- JoeAndMonkey
|
||||||
|
- KeenSpot/SuicideForHire
|
||||||
|
- LasLindas
|
||||||
|
- Nekobox
|
||||||
|
- Nervillsaga
|
||||||
|
- NewAdventures
|
||||||
|
- NewAdventuresOfBobbin
|
||||||
|
- Nihilism
|
||||||
|
- Nukees
|
||||||
|
- OkayPants
|
||||||
|
- PartiallyClips
|
||||||
|
- PensAndTales
|
||||||
|
- RWWR
|
||||||
|
- WebcomicsNation (virtual module)
|
||||||
|
- Yirmumah
|
||||||
|
* Fixed comics:
|
||||||
|
- Asif
|
||||||
|
- CatLegend
|
||||||
|
- CloneManga/NanasEverydayLife
|
||||||
|
- CloneManga/PaperEleven
|
||||||
|
- DrunkDuck (various comics no longer present)
|
||||||
|
- EarthsongSaga
|
||||||
|
- ErrantStory
|
||||||
|
- InkTank
|
||||||
|
- KeenSpot/<various> (ComicGenesis migration)
|
||||||
|
- KiagiSwordscat
|
||||||
|
- Qwantz
|
||||||
|
- SGVY
|
||||||
|
- SmackJeeves
|
||||||
|
- Smamusement
|
||||||
|
- SnafuComics
|
||||||
|
- UComicsEspanol
|
||||||
|
* Moved comics:
|
||||||
|
- Stubble (previously KeenSpot/Stubble)
|
||||||
|
|
||||||
|
Dosage v.1.5.6:
|
||||||
|
* Added comics:
|
||||||
|
- CandyCartoon
|
||||||
|
- CloneManga/Kanami
|
||||||
|
- Drowtales
|
||||||
|
- KeenSpot/FoxTails
|
||||||
|
- Krakow
|
||||||
|
- SmackJeeves (virtual module)
|
||||||
|
* Fixed comics:
|
||||||
|
- CrapIDrewOnMyLunchBreak
|
||||||
|
- CtrlAltDel
|
||||||
|
- DMFA
|
||||||
|
- EarthsongSaga
|
||||||
|
- EverybodyLovesEricRaymond
|
||||||
|
- GirlsWithSlingshots
|
||||||
|
- KeenSpot
|
||||||
|
- KeenSpot/WapsiSquare
|
||||||
|
- NewWorld
|
||||||
|
- PennyArcade
|
||||||
|
- PiledHigherAndDeeper
|
||||||
|
- QuestionableContent
|
||||||
|
- SluggyFreelance
|
||||||
|
- SnafuComics
|
||||||
|
- Sokora
|
||||||
|
- UComicsEspanol (updated submodules)
|
||||||
|
- UComics (updated submodules)
|
||||||
|
* Moved comics:
|
||||||
|
- CatLegend (previously KeenSpot/CatLegend)
|
||||||
|
- DominicDeegan (previously KeenSpot/DominicDeegan)
|
||||||
|
- KeenSpot/TriquetraCats (previously DrunkDuck/TriquetraCats)
|
||||||
|
- NekoTheKitty (previously KeenSpot/NekoTheKitty)
|
||||||
|
- TheNoob (previously KeenSpot/TheNoob)
|
||||||
|
|
||||||
|
Dosage v.1.5.5:
|
||||||
|
* Added comics:
|
||||||
|
- AbstractGender
|
||||||
|
- AnimeArcadia
|
||||||
|
- CaptainSNES
|
||||||
|
- DrunkDuck/Holy_Zen
|
||||||
|
- EarthsongSaga
|
||||||
|
- NinthElsewhere (9th Elsewhere)
|
||||||
|
- PebbleVersion
|
||||||
|
- SGVY (Sparkling Generation Valkyrie Yuuki)
|
||||||
|
- SuccubusJustice
|
||||||
|
- ErrantStory (previously KeenSpot/ErrantStory)
|
||||||
|
* Fixed comics:
|
||||||
|
- DrunkDuck
|
||||||
|
- PvPonline
|
||||||
|
- SluggyFreelance
|
||||||
|
|
||||||
|
Dosage v.1.5.4:
|
||||||
|
* Added comics:
|
||||||
|
- Andiwear
|
||||||
|
- DrunkDuck (virtual)
|
||||||
|
- EverybodyLovesEricRaymond
|
||||||
|
- FantasyRealms
|
||||||
|
- KeenSpot/2WayMirror
|
||||||
|
- KeenSpot/ANT
|
||||||
|
- KeenSpot/AngelTheDemoness
|
||||||
|
- KeenSpot/Apotheosis
|
||||||
|
- KeenSpot/Aquatica
|
||||||
|
- KeenSpot/BadlyDrawnKitties
|
||||||
|
- KeenSpot/BobAndFred
|
||||||
|
- KeenSpot/BrunoTheBandit
|
||||||
|
- KeenSpot/CatLegend
|
||||||
|
- KeenSpot/EdibleDirt
|
||||||
|
- KeenSpot/FelicityFlint
|
||||||
|
- KeenSpot/Flem
|
||||||
|
- KeenSpot/GreenAvenger
|
||||||
|
- KeenSpot/LangLang
|
||||||
|
- KeenSpot/Picatrix
|
||||||
|
- KeenSpot/ScandalSheet
|
||||||
|
- KeenSpot/Shifters
|
||||||
|
- KeenSpot/SoapOnARope
|
||||||
|
- KeenSpot/SuburbanJungle
|
||||||
|
- KeenSpot/TheClassMenagerie
|
||||||
|
- KeenSpot/TheDevilsPanties
|
||||||
|
- KeenSpot/ToddAndPenguin
|
||||||
|
- KeenSpot/TwoLumps
|
||||||
|
- KeenSpot/Wereworld
|
||||||
|
- KeenSpot/YouDamnKid
|
||||||
|
- SokoraRefugees
|
||||||
|
* Fixed comics:
|
||||||
|
- AbsurdNotions
|
||||||
|
- CloneManga
|
||||||
|
- PastelDefender
|
||||||
|
- PennyArcade
|
||||||
|
- SluggyFreelance
|
||||||
|
|
||||||
|
Dosage v.1.5.3:
|
||||||
|
* Fixed a bug that caused RSS output to crash if the file already existed,
|
||||||
|
but had no items.
|
||||||
|
* Added comics:
|
||||||
|
- CatAndGirl
|
||||||
|
- CloneManga
|
||||||
|
- Commissioned
|
||||||
|
- JoyOfTech
|
||||||
|
- KeenSpot/AlphaLuna
|
||||||
|
- KeenSpot/Lowroad75
|
||||||
|
- KeenSpot/Werechild
|
||||||
|
- TheWotch
|
||||||
|
- TonjaSteele
|
||||||
|
* Fixed comics:
|
||||||
|
- DieselSweeties
|
||||||
|
- LittleGamers
|
||||||
|
- PennyArcade
|
||||||
|
- StarCrossdDestiny
|
||||||
|
- VGCats
|
||||||
|
|
||||||
|
Dosage v.1.5.2:
|
||||||
|
* Removed some debugging cruft that slipped through in the last release.
|
||||||
|
* Added comics:
|
||||||
|
- KeenSpot/TheNoob
|
||||||
|
- PiledHigherAndDeeper
|
||||||
|
* Fixed comics:
|
||||||
|
- ALessonIsLearned
|
||||||
|
- Misfile
|
||||||
|
- RealLife
|
||||||
|
- UComics
|
||||||
|
- UComicsEspanol
|
||||||
|
|
||||||
|
Dosage v.1.5.1:
|
||||||
|
* Output event modules now generate proper URLs. You can now pass a base URL
|
||||||
|
with --base-url, which should correspond to --base-path. If not passed,
|
||||||
|
Dosage will try to generate a working file:/// URL, but this may not work in
|
||||||
|
some circumstances.
|
||||||
|
* RSS output tweaked.
|
||||||
|
* --list now outputs in columns; pass --single-list to get the old
|
||||||
|
behaviour (thanks TobiX).
|
||||||
|
* Added comics:
|
||||||
|
- AbsurdNotions (contributed by TobiX)
|
||||||
|
- Altermeta (contributed by TobiX)
|
||||||
|
- AModestDestiny (contributed by TobiX)
|
||||||
|
- BadBlood
|
||||||
|
- BetterYouThanMe
|
||||||
|
- Bhag (contributed by Shrimp)
|
||||||
|
- ChroniclesOfGaras (contributed by Shrimp)
|
||||||
|
- CrapIDrewOnMyLunchBreak (contributed by Shrimp)
|
||||||
|
- EternalVenture (contributed by Shrimp)
|
||||||
|
- Evercrest (contributed by TobiX)
|
||||||
|
- Frump (contributed by Shrimp)
|
||||||
|
- GUComics (contributed by TobiX)
|
||||||
|
- KeenSpot/BoomerExpress (contributed by TobiX)
|
||||||
|
- KevinAndKell (contributed by TobiX)
|
||||||
|
- LethalDosesClassic (contributed by TobiX)
|
||||||
|
- LethalDoses (contributed by TobiX)
|
||||||
|
- ListeningTo11975MHz (contributed by TobiX)
|
||||||
|
- Marilith
|
||||||
|
- MinesBigger (contributed by Shrimp)
|
||||||
|
- MyPrivateLittleHell (contributed by TobiX)
|
||||||
|
- MyWarWithCulture
|
||||||
|
- NeoGreenwood (contributed by Shrimp)
|
||||||
|
- NuklearPower (contributed by Shrimp)
|
||||||
|
- PerkiGoth (contributed by TobiX)
|
||||||
|
- PreludesEnd (contributed by Shrimp)
|
||||||
|
- ShadowInTheMirror (contributed by Shrimp)
|
||||||
|
- UComicsEspanol
|
||||||
|
- WhyTheLongFace (contributed by TobiX)
|
||||||
|
- Winter (contributed by TobiX)
|
||||||
|
* Fixed comics:
|
||||||
|
- Creators
|
||||||
|
- PennyArcade
|
||||||
|
- UComics (removed comics no longer supported and moved Spanish comics
|
||||||
|
to UComicsEspanol)
|
||||||
|
- UnicornJelly
|
||||||
|
|
||||||
|
Dosage v.1.5.0:
|
||||||
|
* Added an RSS output event. (contributed by Colin Alston)
|
||||||
|
* Dosage now sends a more descriptive User-Agent HTTP header.
|
||||||
|
* Dosage will now continue downloading strips until no new strips are
|
||||||
|
downloaded, this fixed problems with comics that had multiple strips per
|
||||||
|
page or comics that employed "precache" methods.
|
||||||
|
* Specific modules can now be disabled by specifying them in
|
||||||
|
/etc/dosage/disabled (global) and ~/.dosage/disabled (local).
|
||||||
|
* Fixed problem with division by zero error often occuring under Windows.
|
||||||
|
* Added comics:
|
||||||
|
- AlienLovesPredator (contributed by Shrimp)
|
||||||
|
- AllGrownUp (contributed by Shrimp)
|
||||||
|
- AsylumOn5thStreet (contributed by Shrimp)
|
||||||
|
- BizarreUprising (contributed by Shrimp)
|
||||||
|
- Creators/Archie
|
||||||
|
- Creators/AskShagg
|
||||||
|
- Creators/ForHeavensSake
|
||||||
|
- Creators/Rugrats
|
||||||
|
- Creators/StateOfTheUnion
|
||||||
|
- Creators/TheDinetteSet
|
||||||
|
- Creators/TheMeaningOfLila
|
||||||
|
- Creators/WeePals
|
||||||
|
- Creators/ZackHill
|
||||||
|
- DMFA (contributed by TobiX)
|
||||||
|
- DoctorRoboto (contributed by Shrimp)
|
||||||
|
- DoemainOfOurOwn
|
||||||
|
- EntertainDome (contributed by Shrimp)
|
||||||
|
- FauxPas (contributed by TobiX)
|
||||||
|
- IrregularWebcomic (contributed by TobiX)
|
||||||
|
- JamesFrancis/gonzo
|
||||||
|
- JamesFrancis/psycindom0
|
||||||
|
- JamesFrancis/psycindom1
|
||||||
|
- JamesFrancis/psycindom2
|
||||||
|
- KeenSpot/AlienDice
|
||||||
|
- KeenSpot/Avalon
|
||||||
|
- KeenSpot/CountYourSheep
|
||||||
|
- KeenSpot/DexLives (contributed by TobiX)
|
||||||
|
- KeenSpot/DominicDeegan
|
||||||
|
- KeenSpot/ElGoonishShive
|
||||||
|
- KeenSpot/ElfLife
|
||||||
|
- KeenSpot/ErrantStory
|
||||||
|
- KeenSpot/EverythingJake
|
||||||
|
- KeenSpot/FriendlyHostility
|
||||||
|
- KeenSpot/FunnyFarm
|
||||||
|
- KeenSpot/GamingGuardians
|
||||||
|
- KeenSpot/GeneCatlow
|
||||||
|
- KeenSpot/GoblinHollow (contributed by TobiX)
|
||||||
|
- KeenSpot/GreystoneInn
|
||||||
|
- KeenSpot/InAPerfectWorld (contributed by TobiX)
|
||||||
|
- KeenSpot/JoeAverage (contributed by TobiX)
|
||||||
|
- KeenSpot/MariposaRevelation (contributed by TobiX)
|
||||||
|
- KeenSpot/NaughtFramed
|
||||||
|
- KeenSpot/NekoTheKitty (contributed by TobiX)
|
||||||
|
- KeenSpot/NipAndTuck (contributed by TobiX)
|
||||||
|
- KeenSpot/OneOverZero (contributed by TobiX)
|
||||||
|
- KeenSpot/PastelDefender
|
||||||
|
- KeenSpot/RoadWaffles
|
||||||
|
- KeenSpot/Scatterplot
|
||||||
|
- KeenSpot/SchlockMercenary
|
||||||
|
- KeenSpot/TalesOfTheQuestor (contributed by TobiX)
|
||||||
|
- KeenSpot/UberSoft
|
||||||
|
- KeenSpot/UnicornJelly
|
||||||
|
- KeenSpot/WorldOfFenninRo (contributed by TobiX)
|
||||||
|
- KeenSpot/ZebraGirl
|
||||||
|
- LessThanKate (contributed by Shrimp)
|
||||||
|
- OurHomePlanet (contributed by Shrimp)
|
||||||
|
- Spamusement
|
||||||
|
- Sternstaub (contributed by Shrimp)
|
||||||
|
- TheLounge (contributed by Shrimp)
|
||||||
|
- TheOrderOfTheStick
|
||||||
|
- UComics/animatedoliphant
|
||||||
|
- UComics/anntelnaes
|
||||||
|
- UComics/askcaptainribman
|
||||||
|
- UComics/baldoespanol
|
||||||
|
- UComics/barbarabrandon
|
||||||
|
- UComics/bensargent
|
||||||
|
- UComics/billdeore
|
||||||
|
- UComics/brewsterrockit
|
||||||
|
- UComics/brucehammond
|
||||||
|
- UComics/calvinandhobbesespanol
|
||||||
|
- UComics/cathyespanol
|
||||||
|
- UComics/chanlowe
|
||||||
|
- UComics/condorito
|
||||||
|
- UComics/danasummers
|
||||||
|
- UComics/danwasserman
|
||||||
|
- UComics/davidhorsey
|
||||||
|
- UComics/dicklocher
|
||||||
|
- UComics/dickwright
|
||||||
|
- UComics/donwright
|
||||||
|
- UComics/dougmarlette
|
||||||
|
- UComics/drewsheneman
|
||||||
|
- UComics/facesinthenews
|
||||||
|
- UComics/foxtrotespanol
|
||||||
|
- UComics/fredbassetespanol
|
||||||
|
- UComics/garfieldespanol
|
||||||
|
- UComics/garyvarvel
|
||||||
|
- UComics/gaturro
|
||||||
|
- UComics/glennmccoy
|
||||||
|
- UComics/hubertandabby
|
||||||
|
- UComics/jackhiggins
|
||||||
|
- UComics/jackohman
|
||||||
|
- UComics/jeffdanziger
|
||||||
|
- UComics/laloalcaraz
|
||||||
|
- UComics/mattdavies
|
||||||
|
- UComics/modestyblaise
|
||||||
|
- UComics/muttandjeffespanol
|
||||||
|
- UComics/neurotica
|
||||||
|
- UComics/overboardespanol
|
||||||
|
- UComics/patoliphant
|
||||||
|
- UComics/paulconrad
|
||||||
|
- UComics/pepe
|
||||||
|
- UComics/poochcafeespanol
|
||||||
|
- UComics/pricklycity
|
||||||
|
- UComics/sigmund
|
||||||
|
- UComics/smallworld
|
||||||
|
- UComics/stevesack
|
||||||
|
- UComics/stuartcarlson
|
||||||
|
- UComics/tedrall
|
||||||
|
- UComics/thebigpicture
|
||||||
|
- UComics/theelderberries
|
||||||
|
- UComics/thefifthwave
|
||||||
|
- UComics/thefuscobrothers
|
||||||
|
- UComics/themiddletons
|
||||||
|
- UComics/thequigmans
|
||||||
|
- UComics/tomtoles
|
||||||
|
- UComics/tonyauth
|
||||||
|
- UComics/tutelandia
|
||||||
|
- UComics/walthandelsman
|
||||||
|
- UComics/waynestayskal
|
||||||
|
- UComics/ziggyespanol
|
||||||
|
- WiguTV
|
||||||
|
* Fixed comics:
|
||||||
|
- Dominion
|
||||||
|
- KeenSpot/GeneralProtectionFault (contributed by TobiX)
|
||||||
|
- SluggyFreelance
|
||||||
|
- UserFriendly
|
||||||
|
- VGCats (contributed by TobiX)
|
||||||
|
- Wigu
|
||||||
|
|
||||||
|
Dosage v.1.4.0:
|
||||||
|
* A manual page for 'mainline' is now inculded.
|
||||||
|
* Events output; currently the only useful handler is 'html', which
|
||||||
|
outputs an HTML page with all of the downloaded comics. These
|
||||||
|
files are named by date, and have links to the previous and next
|
||||||
|
days (similar to dailystrips).
|
||||||
|
* Added comics:
|
||||||
|
- MadamAndEve (contributed by Anthony Caetano)
|
||||||
|
- SnafuComics/Grim
|
||||||
|
- SnafuComics/KOF
|
||||||
|
- SnafuComics/PowerPuffGirls
|
||||||
|
- SnafuComics/Snafu
|
||||||
|
- SnafuComics/Tin
|
||||||
|
- TheParkingLotIsFull
|
||||||
|
- Zapiro (contributed by Anthony Caetano)
|
||||||
|
* Fixed comics:
|
||||||
|
- UserFriendly (naming fix)
|
||||||
|
|
||||||
|
Dosage v.1.3.0:
|
||||||
|
* Progress bar has been improved; specifically for gauging downloads of
|
||||||
|
unknown size
|
||||||
|
* All relevant images are now downloaded where necessary; thanks bruce :)
|
||||||
|
* Incomplete downloads are discarded
|
||||||
|
* Removed junview
|
||||||
|
* Main script is now 'mainline' (used to be 'dosage')
|
||||||
|
* Added comics:
|
||||||
|
- AstronomyPOTD
|
||||||
|
- CounterCulture
|
||||||
|
- Dominion
|
||||||
|
- Fallen
|
||||||
|
- Freefall
|
||||||
|
- GenrezvousPoint
|
||||||
|
- KeenSpot/Blindworks
|
||||||
|
- KeenSpot/BoyMeetsBoy
|
||||||
|
- KeenSpot/Scrued
|
||||||
|
- KeenSpot/Stubble
|
||||||
|
- KeenSpot/TAVision
|
||||||
|
- KeenSpot/TangsWeeklyComic
|
||||||
|
- KingFeatures
|
||||||
|
- OhMyGods
|
||||||
|
- RedMeat
|
||||||
|
- WotNow
|
||||||
|
* Fixed comics:
|
||||||
|
- MegaTokyo
|
||||||
|
- SomethingPositive (naming fix)
|
||||||
|
- TheFray (now a virtual module)
|
||||||
|
|
||||||
|
Dosage v.1.2.0:
|
||||||
|
* Progress bar is now disabled if the window size cannot be determined
|
||||||
|
* Source was restructured; the dosage script is now located in the bin/
|
||||||
|
directory.
|
||||||
|
* Added comics:
|
||||||
|
- BiggerThanCheeses
|
||||||
|
- BrickShitHouse
|
||||||
|
- ChugworthAcademy
|
||||||
|
- DandyAndCompany
|
||||||
|
- Girly
|
||||||
|
- HighPingBastard
|
||||||
|
- Jack
|
||||||
|
- KeenSpot/ChoppingBlock
|
||||||
|
- KeenSpot/SaturdayMorningBreakfastCereal
|
||||||
|
- KeenSpot/StrangeCandy
|
||||||
|
- KeenSpot/WapsiSquare
|
||||||
|
- KiagiSwordscat
|
||||||
|
- MakeWithTheFunny
|
||||||
|
- Pixel
|
||||||
|
- PockyBot
|
||||||
|
- SamAndFuzzy
|
||||||
|
- Spoonies
|
||||||
|
|
||||||
|
Dosage v.1.1.0:
|
||||||
|
* A download progress bar is now available on Linux (and probably other
|
||||||
|
UNIX-like systems)
|
||||||
|
* Timestamps are now updated even if the strip is not redownloaded
|
||||||
|
* Added comics:
|
||||||
|
- ALessonIsLearned
|
||||||
|
- ASofterWorld
|
||||||
|
- BoyOnAStickAndSlither
|
||||||
|
- Chisuji
|
||||||
|
- ExploitationNow
|
||||||
|
- KeenSpot/Ghastly
|
||||||
|
- KeenSpot/Saturnalia
|
||||||
|
- Loserz
|
||||||
|
- Qwantz
|
||||||
|
- StarCrossdDestiny
|
||||||
|
* Fixed comics:
|
||||||
|
- LittleGamers
|
||||||
|
|
||||||
|
Dosage v.1.0.1:
|
||||||
|
* Fix embarassing typo in 1.0.0 which rendered it completely unusable
|
||||||
|
(albeit a trivial fix).
|
||||||
|
|
||||||
|
Dosage v.1.0.0:
|
||||||
|
* 1.0 release, yay!
|
||||||
|
* Set modified time on downloaded images based on Last-Modified header:
|
||||||
|
Patch provided by gopalv82@yahoo.com, thanks :)
|
||||||
|
* Fixed --basepath on Windows:
|
||||||
|
Passing a path that included a drive letter didn't work.
|
||||||
|
* Added comics:
|
||||||
|
- TwoTwoOneFour
|
||||||
|
* Fixed comics:
|
||||||
|
- SluggyFreelance
|
||||||
|
|
||||||
|
Dosage v.0.3.2:
|
||||||
|
* Added comics:
|
||||||
|
- FreakCentral
|
||||||
|
- KeenSpot/AntiHeroForHire
|
||||||
|
- KeenSpot/ElfOnlyInn
|
||||||
|
- KeenSpot/GeneralProtectionFault
|
||||||
|
- KeenSpot/LimitedSpace
|
||||||
|
- KeenSpot/LostAndFound
|
||||||
|
- KeenSpot/Zortic
|
||||||
|
- RabidMonkeys
|
||||||
|
- SluggyFreelance
|
||||||
|
- SpellsAndWhistles
|
||||||
|
- SuburbanTribe
|
||||||
|
- TheFray
|
||||||
|
|
||||||
|
Dosage v.0.3.1:
|
||||||
|
* Removed external helper scripts
|
||||||
|
* Filesize displayed for downloaded files
|
||||||
|
* Various documentation changes
|
||||||
|
* Added --timestamps:
|
||||||
|
Displays timestamps before every message.
|
||||||
|
* Added comics:
|
||||||
|
- SomethingPositive
|
||||||
|
- UnderPower
|
||||||
|
- UserFriendly
|
||||||
|
- KeenSpot/QueenOfWands
|
||||||
|
- CombustibleOrange
|
||||||
|
- InkTank/*
|
||||||
|
- QuestionableContent
|
||||||
|
* Fixed comics:
|
||||||
|
- ComicsDotCom/flightdeck
|
||||||
|
- ComicsDotCom/peanuts
|
||||||
|
- ButternutSquash
|
||||||
|
- LifeOfConvenience
|
||||||
|
|
||||||
|
Dosage v.0.3.0:
|
||||||
|
* Removed filename override:
|
||||||
|
Since the comic modules now generally have sane names, this is no
|
||||||
|
longer of much use.
|
||||||
|
* Better feedback:
|
||||||
|
The various info levels (up to 3 now) provide much more informative
|
||||||
|
output.
|
||||||
|
* Comic wildcards:
|
||||||
|
@ expands to every comic already present in the basepath, and @@
|
||||||
|
expands to every single comic supported by Dosage.
|
||||||
|
* Added Comics:
|
||||||
|
- AppleGeeks
|
||||||
|
- ButternutSquash
|
||||||
|
- Comet7
|
||||||
|
- ComicsDotCom
|
||||||
|
Lots of submodules, most of them are untested.
|
||||||
|
- CtrlAltDel
|
||||||
|
- EightBitTheater
|
||||||
|
- FragileGravity
|
||||||
|
- KeenSpot/24fps
|
||||||
|
- KeenSpot/Alice
|
||||||
|
- KeenSpot/DeltaVenture
|
||||||
|
- KeenSpot/ItsWalky
|
||||||
|
- KeenSpot/PurplePussy
|
||||||
|
- KeenSpot/TheShadows
|
||||||
|
- LaurasComics
|
||||||
|
- MacHall
|
||||||
|
- Supafine
|
||||||
|
- VGCats
|
||||||
|
- WhiteNinja
|
||||||
|
* Fixed comics:
|
||||||
|
- KeenSpot/CollegeRoomiesFromHell
|
||||||
|
- KeenSpot/Wigu (renamed to Wigu)
|
||||||
|
- UComics/{mullets, nonsequitur, tomthedancingbug}
|
||||||
|
- PennyArcade
|
||||||
|
Switch back to the "low" resolution comics; some of the "high"
|
||||||
|
resolution comics are broken, and the "low" ones seem to be
|
||||||
|
identical anyway.
|
||||||
|
* Junview:
|
||||||
|
Lots of fixes / enhancements, still fairly alpha.
|
||||||
|
|
||||||
|
Dosage v.0.2.0:
|
||||||
|
* Virtual comic modules
|
||||||
|
* URL retrying:
|
||||||
|
Also, if you specify multiple comics, and one of them errors out
|
||||||
|
for some reason, Dosage will continue with the others.
|
||||||
|
* Indexed catchup:
|
||||||
|
You can now start a catchup from a specific index.
|
||||||
|
* Added comics:
|
||||||
|
- FilibusterCartoons
|
||||||
|
- GlueMeat
|
||||||
|
- RPGWorld
|
||||||
|
- RealLife
|
||||||
|
- UComics (see --list, there are around 70 submodules)
|
||||||
|
* Fixed comics:
|
||||||
|
- BasilFlint
|
||||||
|
- DiselSweeties
|
||||||
|
- SexyLosers
|
||||||
|
Generate nice filenames now.
|
||||||
|
* Comic help:
|
||||||
|
You can now pass --module-help to see module-specific help for
|
||||||
|
comic modules.
|
||||||
|
* Junview:
|
||||||
|
Image viewer written in wxPython, pretty alpha at this stage,
|
||||||
|
but feel free to play around with it if you're brave.
|
||||||
|
|
||||||
|
Dosage v.0.1.0:
|
||||||
|
* Various documentation updates
|
||||||
|
* Added comics:
|
||||||
|
- LittleGamers
|
||||||
|
- ClanOfTheCats
|
||||||
|
- DieselSweeties
|
||||||
|
- PvPonline
|
||||||
|
- RadioactivePanda
|
||||||
|
- ScaryGoRound
|
||||||
|
* Fixed comics:
|
||||||
|
- PennyArcade
|
||||||
|
The comic "bounces" when you get to the first strip, the
|
||||||
|
"previous" link points to the second comic. Work around this by
|
||||||
|
checking for the first comic.
|
||||||
|
- SexyLosers
|
||||||
|
SexyLosers seems to have implemented referrer checking recently,
|
||||||
|
this is handled by the new referrer passing support.
|
||||||
|
* Fix indexed mode up a bit:
|
||||||
|
The documentation has better examples now.
|
||||||
|
|
||||||
|
Dosage v.0.0.1:
|
||||||
|
* Initial public release
|
185
doc/dosage.1
Normal file
185
doc/dosage.1
Normal file
|
@ -0,0 +1,185 @@
|
||||||
|
.TH MAINLINE 1
|
||||||
|
.SH NAME
|
||||||
|
mainline \- command line interface to Dosage
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B mainline
|
||||||
|
.RI [ options ]
|
||||||
|
.I module
|
||||||
|
.RI [ module .\|.\|.]
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.B mainline
|
||||||
|
is a command line interface to Dosage. Dosage is a an application designed
|
||||||
|
to keep a local \(oqmirror\(cq of specific web comics and other picture\-based
|
||||||
|
content, such as \(oqPicture Of The Day\(cq sites, with a variety of options
|
||||||
|
for updating and maintaining collections.
|
||||||
|
.SH OPTIONS
|
||||||
|
.TP
|
||||||
|
.BI \-b " PATH" "\fR,\fP \-\^\-base\--path=" PATH
|
||||||
|
Specifies a base path to put comic subdirectories. The default is \(oqComics\(cq.
|
||||||
|
.TP
|
||||||
|
.BI \-\^\-base\-url= PATH
|
||||||
|
Specifies the base URL for output events. The default is a local file URI.
|
||||||
|
.TP
|
||||||
|
.BR \-c ", " \-\^\-catch-up
|
||||||
|
Traverses all available strips until an (identical) existing one is found.
|
||||||
|
This can be useful if your collection was previously up to date,
|
||||||
|
but you've missed a few days worth of strips. Alternatively you can specify
|
||||||
|
.B \-c
|
||||||
|
twice for a \(oqfull catchup\(cq, which will not stop until all comics
|
||||||
|
have been traversed. Catchups can \(oqresumed\(cq by using the index syntax, see
|
||||||
|
the
|
||||||
|
.B INDEX SYNTAX
|
||||||
|
and
|
||||||
|
.B SPECIAL SYNTAX
|
||||||
|
sections for more information.
|
||||||
|
.TP
|
||||||
|
.BR \-h ", " \-\^\-help
|
||||||
|
Output brief help information.
|
||||||
|
.TP
|
||||||
|
.BR \-l ", " \-\^\-list
|
||||||
|
List available comic modules in multi\-column fashion.
|
||||||
|
.TP
|
||||||
|
.BR \-\^\-single\-list
|
||||||
|
List available comic modules in single-column fashion.
|
||||||
|
.TP
|
||||||
|
.BI \-m " MODULE" "\fR,\fP \-\^\-module-help=" MODULE
|
||||||
|
Output module-specific help for
|
||||||
|
.IR MODULE .
|
||||||
|
.TP
|
||||||
|
.BI \-o " OUTPUT" "\fR,\fP \-\^\-output=" OUTPUT
|
||||||
|
.I OUTPUT
|
||||||
|
may be any one of the following:
|
||||||
|
.PP
|
||||||
|
.RS
|
||||||
|
.BR "text " \-
|
||||||
|
Provides no additional output and is the default value.
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
.RS
|
||||||
|
.BR "html " \-
|
||||||
|
Writes out an HTML file linking to the strips actually downloaded in the
|
||||||
|
current run, named by date (ala dailystrips). The files can be found in the
|
||||||
|
\'html' directory of your Comics directory.
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
.RS
|
||||||
|
.BR "rss " \-
|
||||||
|
Writes out an RSS feed detailing what strips were downloaded in the last 24
|
||||||
|
hours. The feed can be found in Comics/dailydose.xml.
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
.RS
|
||||||
|
.BR "rss " \-
|
||||||
|
Writes an RSS feed with all of the strips downloaded during the run, for use
|
||||||
|
with your favourite RSS aggregator.
|
||||||
|
.RE
|
||||||
|
.TP
|
||||||
|
.BR \-p ", " \-\^\-progress
|
||||||
|
Display a progress bar while downloading comics.
|
||||||
|
.TP
|
||||||
|
.BR \-t ", " \-\^\-timestamps
|
||||||
|
Print timestamps for all output at any level.
|
||||||
|
.TP
|
||||||
|
.BR \-v ", " \-\^\-verbose
|
||||||
|
Increase the output level by one with each occurence.
|
||||||
|
.TP
|
||||||
|
.BR \-V ", " \-\^\-version
|
||||||
|
Display the version number.
|
||||||
|
.I module
|
||||||
|
At least one valid
|
||||||
|
.I module
|
||||||
|
must be specified. A list of valid modules can be found by passing the
|
||||||
|
.B \-l
|
||||||
|
option. Multiple
|
||||||
|
.I module
|
||||||
|
arguments can be specified on the command line.
|
||||||
|
.SH INDEX SYNTAX
|
||||||
|
One can indicate the start of a list of
|
||||||
|
.B comma seperated
|
||||||
|
indices using a
|
||||||
|
.RB \(oq : "\(cq."
|
||||||
|
.PP
|
||||||
|
If
|
||||||
|
.I \-c
|
||||||
|
is specified with index syntax then \(oqresume\(cq mode is activated,
|
||||||
|
where a \(oqcatchup\(cq will start at the given index.
|
||||||
|
.PP
|
||||||
|
Refer to
|
||||||
|
.B EXAMPLES
|
||||||
|
for samples.
|
||||||
|
.SH OFFENSIVE COMICS
|
||||||
|
Some users may find certain comics offensive and wish to disable them.
|
||||||
|
Modules listed in
|
||||||
|
.B /etc/dosage/disabled
|
||||||
|
and
|
||||||
|
.B ~/.dosage/disabled
|
||||||
|
will be disabled. These files should contain only one module name per line.
|
||||||
|
.SH SPECIAL SYNTAX
|
||||||
|
.TP
|
||||||
|
.B @
|
||||||
|
This expands to mean all the comics currently in your \(oqComics\(cq
|
||||||
|
directory.
|
||||||
|
.TP
|
||||||
|
.B @@
|
||||||
|
This expands to mean all the comics available to Dosage.
|
||||||
|
.PP
|
||||||
|
.B INDEX SYNTAX
|
||||||
|
can be used with
|
||||||
|
.B SPECIAL SYNTAX
|
||||||
|
but this is unlikely to be useful.
|
||||||
|
.SH EXAMPLES
|
||||||
|
Retrieve the latest Mega Tokyo comic:
|
||||||
|
.RS
|
||||||
|
.B mainline MegaTokyo
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
Retrieve every strip from every comic that there is a module for:
|
||||||
|
.RS
|
||||||
|
.B mainline \-c @@
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
Retrieve all Penny Arcade strips from (and including) a given index to
|
||||||
|
the beginning regardless of whether they already exist or not:
|
||||||
|
.RS
|
||||||
|
.B mainline \-c PennyArcade:2004\-07\-22
|
||||||
|
.RE
|
||||||
|
.SH ENVIRONMENT
|
||||||
|
.IP HTTP_PROXY
|
||||||
|
.B mainline
|
||||||
|
will use the specified HTTP proxy whenever possible.
|
||||||
|
.SH NOTES
|
||||||
|
Should retrieval fail on any given strip
|
||||||
|
.B mainline
|
||||||
|
will attempt to retry. However the retry information is only outputted
|
||||||
|
in the
|
||||||
|
.B second
|
||||||
|
and successive output levels.
|
||||||
|
.PP
|
||||||
|
At the time of writing, a
|
||||||
|
.B complete
|
||||||
|
Dosage collection weighs in at around 3.0GB.
|
||||||
|
.SH RETURN VALUE
|
||||||
|
The return value is 2 when
|
||||||
|
.IP \(bu
|
||||||
|
a program error occurred.
|
||||||
|
.PP
|
||||||
|
The return value is 1 when
|
||||||
|
.IP \(bu
|
||||||
|
comics could not be found or downloaded
|
||||||
|
.IP \(bu
|
||||||
|
the program run was aborted with Ctrl-C
|
||||||
|
.PP
|
||||||
|
Else the return value is zero.
|
||||||
|
.SH BUGS
|
||||||
|
See
|
||||||
|
.I http://trac.slipgate.za.net/dosage
|
||||||
|
for a list of current development tasks and suggestions.
|
||||||
|
.SH FILES
|
||||||
|
.IP "\fB/etc/dosage/disabled\fR"
|
||||||
|
Disables comic modules on a global scale.
|
||||||
|
.IP "\fB~/.dosage/disabled\fR"
|
||||||
|
Disables comic modules on a local scale.
|
||||||
|
.SH AUTHORS
|
||||||
|
.BR mainline " and " Dosage
|
||||||
|
were written by Jonathan Jacobs <korpse@slipgate.za.net> and Tristan Seligmann
|
||||||
|
<mithrandi@slipgate.za.net>. This manual page was written by Jonathan Jacobs.
|
329
doc/dosage.1.html
Normal file
329
doc/dosage.1.html
Normal file
|
@ -0,0 +1,329 @@
|
||||||
|
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML><HEAD><TITLE>Man page of MAINLINE</TITLE>
|
||||||
|
</HEAD><BODY>
|
||||||
|
<H1>MAINLINE</H1>
|
||||||
|
Section: User Commands (1)<BR><A HREF="#index">Index</A>
|
||||||
|
<A HREF="../index.html">Return to Main Contents</A><HR>
|
||||||
|
|
||||||
|
<A NAME="lbAB"> </A>
|
||||||
|
<H2>NAME</H2>
|
||||||
|
|
||||||
|
mainline - command line interface to Dosage
|
||||||
|
<A NAME="lbAC"> </A>
|
||||||
|
<H2>SYNOPSIS</H2>
|
||||||
|
|
||||||
|
<B>mainline</B>
|
||||||
|
|
||||||
|
[<I>options</I>]
|
||||||
|
|
||||||
|
<I>module</I>
|
||||||
|
|
||||||
|
[<I>module</I>...]
|
||||||
|
|
||||||
|
<A NAME="lbAD"> </A>
|
||||||
|
<H2>DESCRIPTION</H2>
|
||||||
|
|
||||||
|
<B>mainline</B>
|
||||||
|
|
||||||
|
is a command line interface to Dosage. Dosage is a an application designed
|
||||||
|
to keep a local 'mirror' of specific web comics and other picture-based
|
||||||
|
content, such as 'Picture Of The Day' sites, with a variety of options
|
||||||
|
for updating and maintaining collections.
|
||||||
|
<A NAME="lbAE"> </A>
|
||||||
|
<H2>OPTIONS</H2>
|
||||||
|
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT><B>-b</B><I> PATH</I><B></B>, --base--path=<I>PATH</I>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Specifies a base path to put comic subdirectories. The default is 'Comics'.
|
||||||
|
<DT><B>--base-url=</B><I>PATH</I>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Specifies the base URL for output events. The default is a local file URI.
|
||||||
|
<DT><B>-c</B>, <B>--catch-up</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Traverses all available strips until an (identical) existing one is found.
|
||||||
|
This can be useful if your collection was previously up to date,
|
||||||
|
but you've missed a few days worth of strips. Alternatively you can specify
|
||||||
|
<B>-c</B>
|
||||||
|
|
||||||
|
twice for a 'full catchup', which will not stop until all comics
|
||||||
|
have been traversed. Catchups can 'resumed' by using the index syntax, see
|
||||||
|
the
|
||||||
|
<B>INDEX SYNTAX</B>
|
||||||
|
|
||||||
|
and
|
||||||
|
<B>SPECIAL SYNTAX</B>
|
||||||
|
|
||||||
|
sections for more information.
|
||||||
|
<DT><B>-h</B>, <B>--help</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Output brief help information.
|
||||||
|
<DT><B>-l</B>, <B>--list</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
List available comic modules in multi-column fashion.
|
||||||
|
<DT><B>--single-list</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
List available comic modules in single-column fashion.
|
||||||
|
<DT><B>-m</B><I> MODULE</I><B></B>, --module-help=<I>MODULE</I>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Output module-specific help for
|
||||||
|
<I>MODULE</I>.
|
||||||
|
|
||||||
|
<DT><B>-o</B><I> OUTPUT</I><B></B>, --output=<I>OUTPUT</I>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
<I>OUTPUT</I>
|
||||||
|
|
||||||
|
may be any one of the following:
|
||||||
|
</DL>
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>text </B>-
|
||||||
|
|
||||||
|
Provides no additional output and is the default value.
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>html </B>-
|
||||||
|
|
||||||
|
Writes out an HTML file linking to the strips actually downloaded in the
|
||||||
|
current run, named by date (ala dailystrips). The files can be found in the
|
||||||
|
'html' directory of your Comics directory.
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>rss </B>-
|
||||||
|
|
||||||
|
Writes out an RSS feed detailing what strips were downloaded in the last 24
|
||||||
|
hours. The feed can be found in Comics/dailydose.xml.
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>rss </B>-
|
||||||
|
|
||||||
|
Writes an RSS feed with all of the strips downloaded during the run, for use
|
||||||
|
with your favourite RSS aggregator.
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT><B>-p</B>, <B>--progress</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Display a progress bar while downloading comics.
|
||||||
|
<DT><B>-t</B>, <B>--timestamps</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Print timestamps for all output at any level.
|
||||||
|
<DT><B>-v</B>, <B>--verbose</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Increase the output level by one with each occurence.
|
||||||
|
<DT><B>-V</B>, <B>--version</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
Display the version number.
|
||||||
|
<I>module</I>
|
||||||
|
|
||||||
|
At least one valid
|
||||||
|
<I>module</I>
|
||||||
|
|
||||||
|
must be specified. A list of valid modules can be found by passing the
|
||||||
|
<B>-l</B>
|
||||||
|
|
||||||
|
option. Multiple
|
||||||
|
<I>module</I>
|
||||||
|
|
||||||
|
arguments can be specified on the command line.
|
||||||
|
</DL>
|
||||||
|
<A NAME="lbAF"> </A>
|
||||||
|
<H2>INDEX SYNTAX</H2>
|
||||||
|
|
||||||
|
One can indicate the start of a list of
|
||||||
|
<B>comma seperated</B>
|
||||||
|
|
||||||
|
indices using a
|
||||||
|
'<B>:</B>'.
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
If
|
||||||
|
<I>-c</I>
|
||||||
|
|
||||||
|
is specified with index syntax then 'resume' mode is activated,
|
||||||
|
where a 'catchup' will start at the given index.
|
||||||
|
<P>
|
||||||
|
|
||||||
|
Refer to
|
||||||
|
<B>EXAMPLES</B>
|
||||||
|
|
||||||
|
for samples.
|
||||||
|
<A NAME="lbAG"> </A>
|
||||||
|
<H2>OFFENSIVE COMICS</H2>
|
||||||
|
|
||||||
|
Some users may find certain comics offensive and wish to disable them.
|
||||||
|
Modules listed in
|
||||||
|
<B>/etc/dosage/disabled</B>
|
||||||
|
|
||||||
|
and
|
||||||
|
<B>~/.dosage/disabled</B>
|
||||||
|
|
||||||
|
will be disabled. These files should contain only one module name per line.
|
||||||
|
<A NAME="lbAH"> </A>
|
||||||
|
<H2>SPECIAL SYNTAX</H2>
|
||||||
|
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT><B>@</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
This expands to mean all the comics currently in your 'Comics'
|
||||||
|
directory.
|
||||||
|
<DT><B>@@</B>
|
||||||
|
|
||||||
|
<DD>
|
||||||
|
This expands to mean all the comics available to Dosage.
|
||||||
|
</DL>
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<B>INDEX SYNTAX</B>
|
||||||
|
|
||||||
|
can be used with
|
||||||
|
<B>SPECIAL SYNTAX</B>
|
||||||
|
|
||||||
|
but this is unlikely to be useful.
|
||||||
|
<A NAME="lbAI"> </A>
|
||||||
|
<H2>EXAMPLES</H2>
|
||||||
|
|
||||||
|
Retrieve the latest Mega Tokyo comic:
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>mainline MegaTokyo</B>
|
||||||
|
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
Retrieve every strip from every comic that there is a module for:
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>mainline -c @@</B>
|
||||||
|
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
|
||||||
|
Retrieve all Penny Arcade strips from (and including) a given index to
|
||||||
|
the beginning regardless of whether they already exist or not:
|
||||||
|
<DL COMPACT><DT><DD>
|
||||||
|
<B>mainline -c PennyArcade:2004-07-22</B>
|
||||||
|
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
<A NAME="lbAJ"> </A>
|
||||||
|
<H2>ENVIRONMENT</H2>
|
||||||
|
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT>HTTP_PROXY<DD>
|
||||||
|
<B>mainline</B>
|
||||||
|
|
||||||
|
will use the specified HTTP proxy whenever possible.
|
||||||
|
</DL>
|
||||||
|
<A NAME="lbAK"> </A>
|
||||||
|
<H2>NOTES</H2>
|
||||||
|
|
||||||
|
Should retrieval fail on any given strip
|
||||||
|
<B>mainline</B>
|
||||||
|
|
||||||
|
will attempt to retry. However the retry information is only outputted
|
||||||
|
in the
|
||||||
|
<B>second</B>
|
||||||
|
|
||||||
|
and successive output levels.
|
||||||
|
<P>
|
||||||
|
|
||||||
|
At the time of writing, a
|
||||||
|
<B>complete</B>
|
||||||
|
|
||||||
|
Dosage collection weighs in at around 3.0GB.
|
||||||
|
<A NAME="lbAL"> </A>
|
||||||
|
<H2>RETURN VALUE</H2>
|
||||||
|
|
||||||
|
The return value is 2 when
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT>•<DD>
|
||||||
|
a program error occurred.
|
||||||
|
</DL>
|
||||||
|
<P>
|
||||||
|
|
||||||
|
The return value is 1 when
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT>•<DD>
|
||||||
|
comics could not be found or downloaded
|
||||||
|
<DT>•<DD>
|
||||||
|
the program run was aborted with Ctrl-C
|
||||||
|
</DL>
|
||||||
|
<P>
|
||||||
|
|
||||||
|
Else the return value is zero.
|
||||||
|
<A NAME="lbAM"> </A>
|
||||||
|
<H2>BUGS</H2>
|
||||||
|
|
||||||
|
See
|
||||||
|
<I><A HREF="http://trac.slipgate.za.net/dosage">http://trac.slipgate.za.net/dosage</A></I>
|
||||||
|
|
||||||
|
for a list of current development tasks and suggestions.
|
||||||
|
<A NAME="lbAN"> </A>
|
||||||
|
<H2>FILES</H2>
|
||||||
|
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT><B>/etc/dosage/disabled</B><DD>
|
||||||
|
Disables comic modules on a global scale.
|
||||||
|
<DT><B>~/.dosage/disabled</B><DD>
|
||||||
|
Disables comic modules on a local scale.
|
||||||
|
</DL>
|
||||||
|
<A NAME="lbAO"> </A>
|
||||||
|
<H2>AUTHORS</H2>
|
||||||
|
|
||||||
|
<B>mainline</B> and <B>Dosage</B>
|
||||||
|
|
||||||
|
were written by Jonathan Jacobs <<A HREF="mailto:korpse@slipgate.za.net">korpse@slipgate.za.net</A>> and Tristan Seligmann
|
||||||
|
<<A HREF="mailto:mithrandi@slipgate.za.net">mithrandi@slipgate.za.net</A>>. This manual page was written by Jonathan Jacobs.
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<HR>
|
||||||
|
<A NAME="index"> </A><H2>Index</H2>
|
||||||
|
<DL>
|
||||||
|
<DT><A HREF="#lbAB">NAME</A><DD>
|
||||||
|
<DT><A HREF="#lbAC">SYNOPSIS</A><DD>
|
||||||
|
<DT><A HREF="#lbAD">DESCRIPTION</A><DD>
|
||||||
|
<DT><A HREF="#lbAE">OPTIONS</A><DD>
|
||||||
|
<DT><A HREF="#lbAF">INDEX SYNTAX</A><DD>
|
||||||
|
<DT><A HREF="#lbAG">OFFENSIVE COMICS</A><DD>
|
||||||
|
<DT><A HREF="#lbAH">SPECIAL SYNTAX</A><DD>
|
||||||
|
<DT><A HREF="#lbAI">EXAMPLES</A><DD>
|
||||||
|
<DT><A HREF="#lbAJ">ENVIRONMENT</A><DD>
|
||||||
|
<DT><A HREF="#lbAK">NOTES</A><DD>
|
||||||
|
<DT><A HREF="#lbAL">RETURN VALUE</A><DD>
|
||||||
|
<DT><A HREF="#lbAM">BUGS</A><DD>
|
||||||
|
<DT><A HREF="#lbAN">FILES</A><DD>
|
||||||
|
<DT><A HREF="#lbAO">AUTHORS</A><DD>
|
||||||
|
</DL>
|
||||||
|
<HR>
|
||||||
|
This document was created by
|
||||||
|
<A HREF="/cgi-bin/man/man2html">man2html</A>,
|
||||||
|
using the manual pages.<BR>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
240
dosage
Executable file
240
dosage
Executable file
|
@ -0,0 +1,240 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Dosage, the webcomic downloader
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of version 2 of the GNU General Public License as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import optparse
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from dosagelib import events, scraper
|
||||||
|
from dosagelib.output import out
|
||||||
|
from dosagelib.util import getWindowSize, internal_error
|
||||||
|
from dosagelib.configuration import App, Freeware, Copyright
|
||||||
|
|
||||||
|
def setupOptions():
|
||||||
|
usage = 'usage: %prog [options] comicModule [comicModule ...]'
|
||||||
|
parser = optparse.OptionParser(usage=usage)
|
||||||
|
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
|
||||||
|
parser.add_option('-c', '--catch-up', action='count', dest='catchup', default=None, help='traverse and retrieve all available comics up until the strip that already exists locally, use twice to retrieve until all strips exist locally')
|
||||||
|
parser.add_option('-b', '--base-path', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
|
||||||
|
parser.add_option('--base-url', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
|
||||||
|
parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules')
|
||||||
|
parser.add_option('--single-list', action='store_const', const=2, dest='list', help='list available comic modules in a single list')
|
||||||
|
parser.add_option('-V', '--version', action='store_true', dest='version', help='display the version number')
|
||||||
|
parser.add_option('-m', '--module-help', action='store_true', dest='modhelp', help='display help for comic modules')
|
||||||
|
parser.add_option('-t', '--timestamps', action='store_true', dest='timestamps', default=False, help='print timestamps for all output at any info level')
|
||||||
|
parser.add_option('-o', '--output', action='store', dest='output', choices=events.getHandlers(), help='output formatting for downloaded comics')
|
||||||
|
try:
|
||||||
|
getWindowSize()
|
||||||
|
except NotImplementedError:
|
||||||
|
progress = False
|
||||||
|
else:
|
||||||
|
progress = True
|
||||||
|
|
||||||
|
if progress:
|
||||||
|
parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
class Dosage(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.errors = 0
|
||||||
|
|
||||||
|
def setOutputInfo(self):
|
||||||
|
out.level = 0
|
||||||
|
out.level += self.settings['verbose']
|
||||||
|
out.timestamps = self.settings['timestamps']
|
||||||
|
|
||||||
|
def saveComic(self, comic):
|
||||||
|
basepath = self.settings['basepath']
|
||||||
|
progress = self.settings.get('progress', False)
|
||||||
|
fn, saved = comic.save(basepath, progress)
|
||||||
|
return saved
|
||||||
|
|
||||||
|
def saveComics(self, comics):
|
||||||
|
saved = False
|
||||||
|
for comic in comics:
|
||||||
|
saved = self.saveComic(comic) or saved
|
||||||
|
return saved
|
||||||
|
|
||||||
|
def safeOp(self, fp, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
fp(*args, **kwargs)
|
||||||
|
except Exception:
|
||||||
|
self.errors += 1
|
||||||
|
type, value, tb = sys.exc_info()
|
||||||
|
out.write('Traceback (most recent call last):', 1)
|
||||||
|
out.writelines(traceback.format_stack(), 1)
|
||||||
|
out.writelines(traceback.format_tb(tb)[1:], 1)
|
||||||
|
out.writelines(traceback.format_exception_only(type, value))
|
||||||
|
|
||||||
|
def getCurrent(self):
|
||||||
|
out.write('Retrieving the current strip...')
|
||||||
|
self.saveComics(self.module.getCurrentComics())
|
||||||
|
|
||||||
|
def getIndex(self, index):
|
||||||
|
out.write('Retrieving index "%s"....' % (index,))
|
||||||
|
try:
|
||||||
|
self.module.setStrip(index)
|
||||||
|
self.saveComics(self.module.getNextComics())
|
||||||
|
except NotImplementedError:
|
||||||
|
out.write('No indexed retrieval support.')
|
||||||
|
|
||||||
|
def catchup(self):
|
||||||
|
out.write('Catching up...')
|
||||||
|
for comics in self.module:
|
||||||
|
if not self.saveComics(comics) and self.settings['catchup'] < 2:
|
||||||
|
break
|
||||||
|
|
||||||
|
def catchupIndex(self, index):
|
||||||
|
out.write('Catching up from index "%s"...' % (index,))
|
||||||
|
self.module.setStrip(index)
|
||||||
|
for comics in self.module:
|
||||||
|
if not self.saveComics(comics) and self.settings['catchup'] < 2:
|
||||||
|
break
|
||||||
|
|
||||||
|
def getScrapers(self):
|
||||||
|
return scraper.items()
|
||||||
|
|
||||||
|
def getExistingComics(self):
|
||||||
|
for scraper in self.getScrapers():
|
||||||
|
dirname = scraper.get_name().replace('/', os.sep)
|
||||||
|
if os.path.isdir(os.path.join(self.settings['basepath'], dirname)):
|
||||||
|
yield scraper
|
||||||
|
|
||||||
|
def doList(self, columnList):
|
||||||
|
out.write('Available comic scrapers:')
|
||||||
|
scrapers = self.getScrapers()
|
||||||
|
if columnList:
|
||||||
|
self.doColumnList(scrapers)
|
||||||
|
else:
|
||||||
|
self.doSingleList(scrapers)
|
||||||
|
out.write('%d supported comics.' % len(scrapers))
|
||||||
|
|
||||||
|
def doSingleList(self, scrapers):
|
||||||
|
print '\n'.join(scraper.get_name() for scraper in scrapers)
|
||||||
|
|
||||||
|
def doColumnList(self, scrapers):
|
||||||
|
try:
|
||||||
|
screenWidth = getWindowSize()
|
||||||
|
except NotImplementedError:
|
||||||
|
screenWidth = 80
|
||||||
|
|
||||||
|
if len(scrapers) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
names = [scraper.get_name() for scraper in scrapers]
|
||||||
|
maxlen = max([len(name) for name in names])
|
||||||
|
namesPerLine = int(screenWidth / (maxlen + 1))
|
||||||
|
|
||||||
|
while names:
|
||||||
|
print ''.join([name.ljust(maxlen) for name in names[:namesPerLine]])
|
||||||
|
del names[:namesPerLine]
|
||||||
|
|
||||||
|
def doCatchup(self):
|
||||||
|
for comic in self.useComics():
|
||||||
|
if self.indices:
|
||||||
|
self.safeOp(self.catchupIndex, self.indices[0])
|
||||||
|
else:
|
||||||
|
self.safeOp(self.catchup)
|
||||||
|
|
||||||
|
def doCurrent(self):
|
||||||
|
for comic in self.useComics():
|
||||||
|
if self.indices:
|
||||||
|
for index in self.indices:
|
||||||
|
self.safeOp(self.getIndex, index)
|
||||||
|
else:
|
||||||
|
self.safeOp(self.getCurrent)
|
||||||
|
|
||||||
|
def doHelp(self):
|
||||||
|
for scraper in self.useComics():
|
||||||
|
for line in scraper.getHelp().splitlines():
|
||||||
|
out.write("Help: "+line)
|
||||||
|
|
||||||
|
def setupComic(self, scraper):
|
||||||
|
self.module = scraper()
|
||||||
|
out.context = scraper.get_name()
|
||||||
|
return self.module
|
||||||
|
|
||||||
|
def useComics(self):
|
||||||
|
for comic in self.comics:
|
||||||
|
c = comic.split(':', 2)
|
||||||
|
if len(c) > 1:
|
||||||
|
self.indices = c[1].split(',')
|
||||||
|
else:
|
||||||
|
self.indices = None
|
||||||
|
|
||||||
|
moduleName = c[0]
|
||||||
|
if moduleName == '@':
|
||||||
|
for s in self.getExistingComics():
|
||||||
|
yield self.setupComic(s)
|
||||||
|
elif moduleName == '@@':
|
||||||
|
for s in self.getScrapers():
|
||||||
|
yield self.setupComic(s)
|
||||||
|
else:
|
||||||
|
yield self.setupComic(scraper.get(moduleName))
|
||||||
|
|
||||||
|
def displayVersion(self):
|
||||||
|
print App
|
||||||
|
print Copyright
|
||||||
|
print Freeware
|
||||||
|
|
||||||
|
def run(self, settings, comics):
|
||||||
|
self.settings = settings
|
||||||
|
self.setOutputInfo()
|
||||||
|
self.comics = comics
|
||||||
|
|
||||||
|
om = self.settings['output']
|
||||||
|
events.installHandler(om, self.settings['basepath'], self.settings['baseurl'])
|
||||||
|
events.handler.start()
|
||||||
|
|
||||||
|
if self.settings['version']:
|
||||||
|
self.displayVersion()
|
||||||
|
elif self.settings['list']:
|
||||||
|
self.doList(self.settings['list'] == 1)
|
||||||
|
elif len(comics) <= 0:
|
||||||
|
out.write('Warning: No comics specified, bailing out!')
|
||||||
|
elif self.settings['modhelp']:
|
||||||
|
self.doHelp()
|
||||||
|
elif self.settings['catchup']:
|
||||||
|
self.doCatchup()
|
||||||
|
else:
|
||||||
|
self.doCurrent()
|
||||||
|
|
||||||
|
events.handler.end()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
parser = setupOptions()
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
d = Dosage()
|
||||||
|
d.run(options.__dict__, args)
|
||||||
|
if d.errors:
|
||||||
|
res = 1
|
||||||
|
else:
|
||||||
|
res = 0
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print "Aborted."
|
||||||
|
res = 1
|
||||||
|
except Exception:
|
||||||
|
internal_error()
|
||||||
|
res = 2
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
39
dosagelib/__init__.py
Normal file
39
dosagelib/__init__.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
"""
|
||||||
|
Automated webcomic downloader. Dosage traverses webcomic websites in
|
||||||
|
order to download each strip of the comic. The intended use is for
|
||||||
|
mirroring the strips locally for ease of viewing; redistribution of the
|
||||||
|
downloaded strips may violate copyright, and is not advisable unless you
|
||||||
|
have communicated with all of the relevant copyright holders, described
|
||||||
|
your intentions, and received permission to distribute.
|
||||||
|
|
||||||
|
The primary dosage interface is currently the 'mainline' script, which
|
||||||
|
is just a thin wrapper that invokes L{dosage.mainline}. Comic modules
|
||||||
|
for each webcomic are located in L{dosage.modules}; most of these make
|
||||||
|
use of the helper base classes and mixins in L{dosage.modules.helpers},
|
||||||
|
thus making their individual implementations trivial.
|
||||||
|
|
||||||
|
@group Core modules: comic, events, output, progress, rss, util,
|
||||||
|
version
|
||||||
|
@group Interface modules: mainline
|
||||||
|
@group Comic modules: modules
|
||||||
|
|
||||||
|
@sort: modules.helpers
|
||||||
|
|
||||||
|
@author: U{Dosage development team <dosage@lists.slipgate.za.net>}
|
||||||
|
@requires: Python 2.3+
|
||||||
|
@see: U{The dosage webpage <http://slipgate.za.net/dosage>}
|
||||||
|
@see: U{The dosage Trac site <http://trac.slipgate.za.net/dosage>}
|
||||||
|
|
||||||
|
@newfield contributor: Contributor, Contributors (Alphabetical Order)
|
||||||
|
@contributor: U{Jonathan Jacobs <mailto:korpse@slipgate.za.net>}
|
||||||
|
@contributor: U{Tristan Seligmann <mailto:mithrandi@mithrandi.za.net>}
|
||||||
|
|
||||||
|
@var __license__: The license governing the use and distribution of
|
||||||
|
dosage.
|
||||||
|
"""
|
||||||
|
__docformat__ = 'epytext en'
|
||||||
|
import sys
|
||||||
|
if not (hasattr(sys, 'version_info') or
|
||||||
|
sys.version_info < (2, 5, 0, 'final', 0)):
|
||||||
|
raise SystemExit("This program requires Python 2.5 or later.")
|
||||||
|
|
101
dosagelib/comic.py
Normal file
101
dosagelib/comic.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
import urllib2
|
||||||
|
import os
|
||||||
|
import locale
|
||||||
|
import rfc822
|
||||||
|
import time
|
||||||
|
import shutil
|
||||||
|
locale.setlocale(locale.LC_ALL, '')
|
||||||
|
|
||||||
|
from .output import out
|
||||||
|
from .util import urlopen, saneDataSize, normaliseURL
|
||||||
|
from .progress import progressBar, OperationComplete
|
||||||
|
from .events import handler
|
||||||
|
|
||||||
|
class FetchComicError(IOError): pass
|
||||||
|
|
||||||
|
class Comic(object):
|
||||||
|
def __init__(self, moduleName, url, referrer=None, filename=None):
|
||||||
|
self.moduleName = moduleName
|
||||||
|
url = normaliseURL(url)
|
||||||
|
out.write('Getting headers for %s...' % (url,), 2)
|
||||||
|
try:
|
||||||
|
self.urlobj = urlopen(url, referrer=referrer)
|
||||||
|
except urllib2.HTTPError, he:
|
||||||
|
raise FetchComicError, ('Unable to retrieve URL.', url, he.code)
|
||||||
|
|
||||||
|
if self.urlobj.info().getmaintype() != 'image' and \
|
||||||
|
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
|
||||||
|
raise FetchComicError, ('No suitable image found to retrieve.', url)
|
||||||
|
|
||||||
|
self.filename, self.ext = os.path.splitext(url.split('/')[-1])
|
||||||
|
self.filename = filename or self.filename
|
||||||
|
self.filename = self.filename.replace(os.sep, '_')
|
||||||
|
# Always use mime type for file extension if it is sane.
|
||||||
|
if self.urlobj.info().getmaintype() == 'image':
|
||||||
|
self.ext = '.' + self.urlobj.info().getsubtype()
|
||||||
|
self.contentLength = int(self.urlobj.info().get('content-length', 0))
|
||||||
|
self.lastModified = self.urlobj.info().get('last-modified')
|
||||||
|
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
|
||||||
|
|
||||||
|
def touch(self, filename):
|
||||||
|
if self.lastModified:
|
||||||
|
tt = rfc822.parsedate(self.lastModified)
|
||||||
|
if tt:
|
||||||
|
mtime = time.mktime(tt)
|
||||||
|
os.utime(filename, (mtime, mtime))
|
||||||
|
|
||||||
|
def save(self, basepath, showProgress=False):
|
||||||
|
comicName, comicExt = self.filename, self.ext
|
||||||
|
comicSize = self.contentLength
|
||||||
|
comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep))
|
||||||
|
if not os.path.isdir(comicDir):
|
||||||
|
os.makedirs(comicDir)
|
||||||
|
|
||||||
|
fn = os.path.join(comicDir, '%s%s' % (self.filename, self.ext))
|
||||||
|
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
|
||||||
|
self.urlobj.close()
|
||||||
|
self.touch(fn)
|
||||||
|
out.write('Skipping existing file "%s".' % (fn,), 1)
|
||||||
|
return fn, False
|
||||||
|
|
||||||
|
try:
|
||||||
|
tmpFn = os.path.join(comicDir, '__%s%s' % (self.filename, self.ext))
|
||||||
|
out.write('Writing comic to temporary file %s...' % (tmpFn,), 3)
|
||||||
|
comicOut = file(tmpFn, 'wb')
|
||||||
|
try:
|
||||||
|
startTime = time.time()
|
||||||
|
if showProgress:
|
||||||
|
def pollData():
|
||||||
|
data = self.urlobj.read(8192)
|
||||||
|
if not data:
|
||||||
|
raise OperationComplete
|
||||||
|
comicOut.write(data)
|
||||||
|
return len(data), self.contentLength
|
||||||
|
progressBar(pollData)
|
||||||
|
else:
|
||||||
|
comicOut.write(self.urlobj.read())
|
||||||
|
endTime = time.time()
|
||||||
|
finally:
|
||||||
|
comicOut.close()
|
||||||
|
out.write('Copying temporary file (%s) to %s...' % (tmpFn, fn), 3)
|
||||||
|
shutil.copy2(tmpFn, fn)
|
||||||
|
self.touch(fn)
|
||||||
|
|
||||||
|
size = os.path.getsize(fn)
|
||||||
|
bytes = locale.format('%d', size, True)
|
||||||
|
if endTime != startTime:
|
||||||
|
speed = saneDataSize(size / (endTime - startTime))
|
||||||
|
else:
|
||||||
|
speed = '???'
|
||||||
|
attrs = dict(fn=fn, bytes=bytes, speed=speed)
|
||||||
|
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
|
||||||
|
handler.comicDownloaded(self.moduleName, fn)
|
||||||
|
self.urlobj.close()
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
out.write('Removing temporary file %s...' % (tmpFn,), 3)
|
||||||
|
os.remove(tmpFn)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return fn, True
|
19
dosagelib/configuration.py
Normal file
19
dosagelib/configuration.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import _Dosage_configdata as configdata
|
||||||
|
|
||||||
|
Version = configdata.version
|
||||||
|
ReleaseDate = configdata.release_date
|
||||||
|
AppName = configdata.name
|
||||||
|
App = AppName+u" "+Version
|
||||||
|
Author = configdata.author
|
||||||
|
HtmlAuthor = Author.replace(u' ', u' ')
|
||||||
|
Copyright = u"Copyright (C) 2004-2008 "+Author
|
||||||
|
HtmlCopyright = u"Copyright © 2004-2008 "+HtmlAuthor
|
||||||
|
Url = configdata.url
|
||||||
|
SupportUrl = Url + u"/issues"
|
||||||
|
Email = configdata.author_email
|
||||||
|
UserAgent = u"%s/%s (+%s)" % (AppName, Version, Url)
|
||||||
|
Freeware = AppName+u""" comes with ABSOLUTELY NO WARRANTY!
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions. Look at the file `LICENSE' within this
|
||||||
|
distribution."""
|
||||||
|
|
159
dosagelib/events.py
Normal file
159
dosagelib/events.py
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
import os.path
|
||||||
|
import time
|
||||||
|
import rss
|
||||||
|
import urllib
|
||||||
|
import util
|
||||||
|
|
||||||
|
class EventHandler(object):
|
||||||
|
def __init__(self, basepath, baseurl):
|
||||||
|
self.basepath = basepath
|
||||||
|
self.baseurl = baseurl or self.getBaseUrl()
|
||||||
|
|
||||||
|
def getBaseUrl(self):
|
||||||
|
'''Return a file: URL that probably points to the basedir.
|
||||||
|
|
||||||
|
This is used as a halfway sane default when the base URL is not
|
||||||
|
provided; not perfect, but should work in most cases.'''
|
||||||
|
components = util.splitpath(os.path.abspath(self.basepath))
|
||||||
|
url = '/'.join([urllib.quote(component, '') for component in components])
|
||||||
|
return 'file:///' + url + '/'
|
||||||
|
|
||||||
|
def getUrlFromFilename(self, filename):
|
||||||
|
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
||||||
|
url = '/'.join([urllib.quote(component, '') for component in components])
|
||||||
|
return self.baseurl + url
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def comicDownloaded(self, comic, filename):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def end(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class TextEventHandler(EventHandler):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RSSEventHandler(EventHandler):
|
||||||
|
def RFC822Date(self, indate):
|
||||||
|
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate))
|
||||||
|
|
||||||
|
def getFilename(self):
|
||||||
|
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
today = time.time()
|
||||||
|
yesterday = today - 86400
|
||||||
|
today = time.localtime(today)
|
||||||
|
yesterday = time.localtime(yesterday)
|
||||||
|
|
||||||
|
link = 'https://github.com/wummel/dosage'
|
||||||
|
|
||||||
|
self.rssfn = self.getFilename()
|
||||||
|
|
||||||
|
if os.path.exists(self.rssfn):
|
||||||
|
self.newfile = False
|
||||||
|
self.rss = rss.parseFeed(self.rssfn, yesterday)
|
||||||
|
else:
|
||||||
|
self.newfile = True
|
||||||
|
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
|
||||||
|
|
||||||
|
def comicDownloaded(self, comic, filename):
|
||||||
|
url = self.getUrlFromFilename(filename)
|
||||||
|
args = (
|
||||||
|
'%s - %s' % (comic, os.path.basename(filename)),
|
||||||
|
url,
|
||||||
|
'<a href="%s">View Comic</a>' % (url,),
|
||||||
|
self.RFC822Date(time.time())
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.newfile:
|
||||||
|
self.newfile = False
|
||||||
|
self.rss.addItem(*args)
|
||||||
|
else:
|
||||||
|
self.rss.insertHead(*args)
|
||||||
|
|
||||||
|
def end(self):
|
||||||
|
self.rss.write(self.rssfn)
|
||||||
|
|
||||||
|
class HtmlEventHandler(EventHandler):
|
||||||
|
def fnFromDate(self, date):
|
||||||
|
fn = time.strftime('comics-%Y%m%d.html', date)
|
||||||
|
fn = os.path.join(self.basepath, 'html', fn)
|
||||||
|
fn = os.path.abspath(fn)
|
||||||
|
return fn
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
today = time.time()
|
||||||
|
yesterday = today - 86400
|
||||||
|
tomorrow = today + 86400
|
||||||
|
today = time.localtime(today)
|
||||||
|
yesterday = time.localtime(yesterday)
|
||||||
|
tomorrow = time.localtime(tomorrow)
|
||||||
|
|
||||||
|
fn = self.fnFromDate(today)
|
||||||
|
assert not os.path.exists(fn), 'Comic page for today already exists!'
|
||||||
|
|
||||||
|
d = os.path.dirname(fn)
|
||||||
|
if not os.path.isdir(d):
|
||||||
|
os.makedirs(d)
|
||||||
|
|
||||||
|
yesterdayUrl = self.getUrlFromFilename(self.fnFromDate(yesterday))
|
||||||
|
tomorrowUrl = self.getUrlFromFilename(self.fnFromDate(tomorrow))
|
||||||
|
|
||||||
|
self.html = file(fn, 'w')
|
||||||
|
self.html.write('''<html>
|
||||||
|
<head>
|
||||||
|
<title>Comics for %s</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="%s">Previous Day</a> | <a href="%s">Next Day</a>
|
||||||
|
<ul>
|
||||||
|
''' % (time.strftime('%Y/%m/%d', today), yesterdayUrl, tomorrowUrl))
|
||||||
|
|
||||||
|
self.lastComic = None
|
||||||
|
|
||||||
|
def comicDownloaded(self, comic, filename):
|
||||||
|
if self.lastComic != comic:
|
||||||
|
self.newComic(comic)
|
||||||
|
url = self.getUrlFromFilename(filename)
|
||||||
|
self.html.write(' <li><a href="%s">%s</a></li>\n' % (url, os.path.basename(filename)))
|
||||||
|
|
||||||
|
def newComic(self, comic):
|
||||||
|
if self.lastComic is not None:
|
||||||
|
self.html.write(' </ul>\n')
|
||||||
|
self.lastComic = comic
|
||||||
|
self.html.write(''' <li>%s</li>
|
||||||
|
<ul>
|
||||||
|
''' % (comic,))
|
||||||
|
|
||||||
|
def end(self):
|
||||||
|
if self.lastComic is not None:
|
||||||
|
self.html.write(' </ul>\n')
|
||||||
|
self.html.write('''</ul>
|
||||||
|
</body>
|
||||||
|
</html>''')
|
||||||
|
self.html.close()
|
||||||
|
|
||||||
|
|
||||||
|
handlers = {
|
||||||
|
'text': TextEventHandler,
|
||||||
|
'html': HtmlEventHandler,
|
||||||
|
'rss': RSSEventHandler,
|
||||||
|
}
|
||||||
|
|
||||||
|
def getHandlers():
|
||||||
|
l = handlers.keys()
|
||||||
|
l.sort()
|
||||||
|
return l
|
||||||
|
|
||||||
|
def installHandler(name=None, basepath=None, baseurl=None):
|
||||||
|
global handler
|
||||||
|
if name is None:
|
||||||
|
name = 'text'
|
||||||
|
if basepath is None:
|
||||||
|
basepath = '.'
|
||||||
|
handler = handlers[name](basepath, baseurl)
|
||||||
|
|
||||||
|
installHandler()
|
181
dosagelib/helpers.py
Normal file
181
dosagelib/helpers.py
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .util import fetchUrl, fetchManyUrls, getQueryParams
|
||||||
|
from .comic import Comic
|
||||||
|
|
||||||
|
class _BasicScraper(object):
|
||||||
|
'''Base class with scrape functions for comics.
|
||||||
|
|
||||||
|
@type latestUrl: C{string}
|
||||||
|
@cvar latestUrl: The URL for the latest comic strip.
|
||||||
|
@type imageUrl: C{string}
|
||||||
|
@cvar imageUrl: A string that is interpolated with the strip index
|
||||||
|
to yield the URL for a particular strip.
|
||||||
|
@type imageSearch: C{regex}
|
||||||
|
@cvar imageSearch: A compiled regex that will locate the strip image URL
|
||||||
|
when applied to the strip page.
|
||||||
|
@type prevSearch: C{regex}
|
||||||
|
@cvar prevSearch: A compiled regex that will locate the URL for the
|
||||||
|
previous strip when applied to a strip page.
|
||||||
|
'''
|
||||||
|
referrer = None
|
||||||
|
help = 'Sorry, no help for this comic yet.'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.currentUrl = None
|
||||||
|
self.urls = set()
|
||||||
|
|
||||||
|
def getReferrer(self, imageUrl, pageUrl):
|
||||||
|
return self.referrer or pageUrl or self.getLatestUrl()
|
||||||
|
|
||||||
|
def getComic(self, url, pageUrl):
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl))
|
||||||
|
|
||||||
|
def getCurrentComics(self):
|
||||||
|
self.currentUrl = self.getLatestUrl()
|
||||||
|
comics = self.getNextComics()
|
||||||
|
if not comics:
|
||||||
|
raise ValueError("Could not find current comic.")
|
||||||
|
return comics
|
||||||
|
|
||||||
|
def getNextComics(self):
|
||||||
|
comics = []
|
||||||
|
while not comics and self.currentUrl and self.currentUrl not in self.urls:
|
||||||
|
comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch])
|
||||||
|
|
||||||
|
if prevUrl:
|
||||||
|
prevUrl = prevUrl[0]
|
||||||
|
else:
|
||||||
|
prevUrl = None
|
||||||
|
|
||||||
|
for comicUrl in comicUrlGroups:
|
||||||
|
comics.append(self.getComic(comicUrl, self.currentUrl))
|
||||||
|
|
||||||
|
self.urls.update([self.currentUrl])
|
||||||
|
self.currentUrl = (prevUrl, None)[prevUrl in self.urls]
|
||||||
|
return comics
|
||||||
|
|
||||||
|
def setStrip(self, index):
|
||||||
|
self.currentUrl = self.imageUrl % index
|
||||||
|
|
||||||
|
def getHelp(self):
|
||||||
|
return self.help
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
"""Iterate through the strips, starting from the current one and going backward."""
|
||||||
|
if not self.currentUrl:
|
||||||
|
self.currentUrl = self.getLatestUrl()
|
||||||
|
|
||||||
|
comics = True
|
||||||
|
while comics:
|
||||||
|
comics = self.getNextComics()
|
||||||
|
if comics:
|
||||||
|
yield comics
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_name(cls):
|
||||||
|
if hasattr(cls, 'name'):
|
||||||
|
return cls.name
|
||||||
|
return cls.__name__
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def starter(cls):
|
||||||
|
return cls.latestUrl
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getFilename(self, imageUrl, pageUrl):
|
||||||
|
return self.namer(imageUrl, pageUrl)
|
||||||
|
|
||||||
|
def getLatestUrl(self):
|
||||||
|
return self.starter()
|
||||||
|
|
||||||
|
|
||||||
|
def queryNamer(paramName, usePageUrl=False):
|
||||||
|
@staticmethod
|
||||||
|
def _namer(imageUrl, pageUrl):
|
||||||
|
url = (imageUrl, pageUrl)[usePageUrl]
|
||||||
|
return getQueryParams(url)[paramName][0]
|
||||||
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
|
def regexNamer(regex):
|
||||||
|
@staticmethod
|
||||||
|
def _namer(imageUrl, pageUrl):
|
||||||
|
return regex.search(imageUrl).group(1)
|
||||||
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
|
def constStarter(latestUrl):
|
||||||
|
@staticmethod
|
||||||
|
def _starter():
|
||||||
|
return latestUrl
|
||||||
|
return _starter
|
||||||
|
|
||||||
|
|
||||||
|
def bounceStarter(latestUrl, nextSearch):
|
||||||
|
@classmethod
|
||||||
|
def _starter(cls):
|
||||||
|
url = fetchUrl(latestUrl, cls.prevSearch)
|
||||||
|
if url:
|
||||||
|
url = fetchUrl(url, nextSearch)
|
||||||
|
return url
|
||||||
|
return _starter
|
||||||
|
|
||||||
|
|
||||||
|
def indirectStarter(baseUrl, latestSearch):
|
||||||
|
@staticmethod
|
||||||
|
def _starter():
|
||||||
|
return fetchUrl(baseUrl, latestSearch)
|
||||||
|
return _starter
|
||||||
|
|
||||||
|
|
||||||
|
class IndirectLatestMixin(object):
|
||||||
|
'''
|
||||||
|
Mixin for comics that link to the latest comic from a base page of
|
||||||
|
some kind. This also supports comics which don't link to the last comic
|
||||||
|
from the base page, but the beginning of the latest chapter or similiar
|
||||||
|
schemes. It simulates going forward until it can't find a 'next' link as
|
||||||
|
specified by the 'nextSearch' regex.
|
||||||
|
|
||||||
|
@type baseUrl: C{string}
|
||||||
|
@cvar baseUrl: the URL where the link to the latest comic is found.
|
||||||
|
@type latestSearch C{regex}
|
||||||
|
@cvar latestSearch: a compiled regex for finding the 'latest' URL.
|
||||||
|
@type nextSearch C{regex}
|
||||||
|
@cvar nextSearch: a compiled regex for finding the 'next' URL.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__latestUrl = None
|
||||||
|
|
||||||
|
def getLatestUrl(self):
|
||||||
|
if not self.__latestUrl:
|
||||||
|
self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch)
|
||||||
|
if hasattr(self, "nextSearch"):
|
||||||
|
nextUrl = fetchUrl(self.__latestUrl, self.nextSearch)
|
||||||
|
while nextUrl:
|
||||||
|
self.__latestUrl = nextUrl
|
||||||
|
nextUrl = fetchUrl(self.__latestUrl, self.nextSearch)
|
||||||
|
return self.__latestUrl
|
||||||
|
|
||||||
|
latestUrl = property(getLatestUrl)
|
||||||
|
|
||||||
|
|
||||||
|
class _PHPScraper(_BasicScraper):
|
||||||
|
"""
|
||||||
|
I implement IScraper for comics using phpComic/CUSP.
|
||||||
|
|
||||||
|
This provides an easy way to define scrapers for webcomics using phpComic.
|
||||||
|
"""
|
||||||
|
imageUrl = property(lambda self: self.basePath + 'daily.php?date=%s')
|
||||||
|
imageSearch = property(lambda self: re.compile(r'<img alt=[^>]+ src="(%scomics/\d{6}\..+?)">' % (self.basePath,)))
|
||||||
|
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def starter(cls):
|
||||||
|
return cls.basePath + cls.latestUrl
|
23
dosagelib/output.py
Normal file
23
dosagelib/output.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
class Output(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.context = ''
|
||||||
|
self.level = 0
|
||||||
|
self.timestamps = False
|
||||||
|
|
||||||
|
def write(self, s, level=0):
|
||||||
|
if level > self.level:
|
||||||
|
return
|
||||||
|
if self.level > 1 or self.timestamps:
|
||||||
|
timestamp = time.strftime('%H:%M:%S ')
|
||||||
|
else:
|
||||||
|
timestamp = ''
|
||||||
|
print '%s%s> %s' % (timestamp, self.context, s)
|
||||||
|
|
||||||
|
def writelines(self, lines, level=0):
|
||||||
|
for line in lines:
|
||||||
|
for line in line.rstrip('\n').split('\n'):
|
||||||
|
self.write(line.rstrip('\n'), level=level)
|
||||||
|
|
||||||
|
out = Output()
|
1
dosagelib/plugins/__init__.py
Normal file
1
dosagelib/plugins/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
|
338
dosagelib/plugins/a.py
Normal file
338
dosagelib/plugins/a.py
Normal file
|
@ -0,0 +1,338 @@
|
||||||
|
from re import compile, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class ALessonIsLearned(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.alessonislearned.com/'
|
||||||
|
imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(cmx/.+?)"')
|
||||||
|
prevSearch = compile(r"<a href='(index.php\?comic=.+?)'.+?previous")
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class ASofterWorld(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.asofterworld.com/'
|
||||||
|
imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.asofterworld.com/clean/[^"]+)"')
|
||||||
|
prevSearch = compile(r'"([^"]+)">back')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class AbleAndBaker(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
|
||||||
|
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+\d+?)".+previous.gif')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class AbominableCharlesChristopher(_BasicScraper):
|
||||||
|
latestUrl = 'http://abominable.cc/'
|
||||||
|
imageUrl = 'http://abominable.cc/%s'
|
||||||
|
imageSearch = compile(r'cc(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'cc(/.+?)".+?prev')
|
||||||
|
help = 'Index format: yyyy/mm/dd/comicname'
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractGender(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.abstractgender.com/'
|
||||||
|
imageUrl = 'http://www.abstractgender.com/?comic=%s'
|
||||||
|
imageSearch = compile(r'<img[^>]+src="(comics/\d+\.\w+)"')
|
||||||
|
prevSearch = compile(r'<a\W+href="(\?comic=\d+)"><img[^>]+id="comic_menu_prev"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class AbsurdNotions(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.absurdnotions.org/page129.html'
|
||||||
|
imageUrl = 'http://www.absurdnotions.org/page%s.html'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(an[^"]+)"')
|
||||||
|
prevSearch = compile(r'HREF="([^"]+)"><IMG SRC="nprev\.gif"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AbstruseGoose(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://abstrusegoose.com/',
|
||||||
|
compile(r'<a href = "(http://abstrusegoose.com/\d+)">Next »</a>'))
|
||||||
|
imageUrl = 'http://abstrusegoose.com/c%s.html'
|
||||||
|
imageSearch = compile(r'<img[^<]+src="(http://abstrusegoose.com/strips/[^<>"]+)"')
|
||||||
|
prevSearch = compile(r'<a href = "(http://abstrusegoose.com/\d+)">« Previous</a>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
index = int(pageUrl.rstrip('/').split('/')[-1])
|
||||||
|
name = imageUrl.split('/')[-1].split('.')[0]
|
||||||
|
return 'c%03d-%s' % (index, name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AcademyVale(_BasicScraper):
|
||||||
|
latestUrl = 'http://imagerie.com/vale/'
|
||||||
|
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
|
||||||
|
imageSearch = compile(r'<IMG.+?SRC="(avale\d{4}-\d{2}\..*?)"')
|
||||||
|
prevSearch = compile(r'HREF=(avarch.*?)><IMG SRC="AVNavBack.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Alice(_BasicScraper):
|
||||||
|
latestUrl = 'http://alice.alicecomics.com/'
|
||||||
|
imageUrl = 'http://alice.alicecomics.com/%s'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r' .+"?com(/.+?)" rel="prev')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AlienLovesPredator(_BasicScraper):
|
||||||
|
imageUrl = 'http://alienlovespredator.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;"> </div>', MULTILINE)
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="/images/nav_previous.jpg"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
starter = bounceStarter('http://alienlovespredator.com/index.php', compile(r'<a href="(.+?)"><img src="/images/nav_next.jpg"'))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
vol = pageUrl.split('/')[-5]
|
||||||
|
num = pageUrl.split('/')[-4]
|
||||||
|
ccc = pageUrl.split('/')[-3]
|
||||||
|
ddd = pageUrl.split('/')[-2]
|
||||||
|
return '%s-%s-%s-%s' % (vol, num, ccc, ddd)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AnarchySD(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.anarchycomic.com/page%s.php'
|
||||||
|
imageSearch = compile(r'<img.+src="../(images/page\d+\..+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(page\d+\.php)">PREVIOUS PAGE')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
starter = indirectStarter(
|
||||||
|
'http://www.anarchycomic.com/page1.php',
|
||||||
|
compile(r'<a href="(page\d+\.php)" class="style15">LATEST'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Altermeta(_BasicScraper):
|
||||||
|
latestUrl = 'http://altermeta.net/'
|
||||||
|
imageUrl = 'http://altermeta.net/archive.php?comic=%s&view=showfiller'
|
||||||
|
imageSearch = compile(r'<img src="(comics/[^"]+)" />')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)"><img src="http://altermeta\.net/template/default/images/sasha/back\.png')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AltermetaOld(Altermeta):
|
||||||
|
name = 'Altermeta/Old'
|
||||||
|
latestUrl = 'http://altermeta.net/oldarchive/index.php'
|
||||||
|
imageUrl = 'http://altermeta.net/oldarchive/archive.php?comic=%s'
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)">Back')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Angels2200(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.janahoffmann.com/angels/'
|
||||||
|
imageSearch = compile(r"<img src='(http://www.janahoffmann.com/angels/comics/[^']+)'>")
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)">« Previous</a>')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AppleGeeks(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.applegeeks.com/'
|
||||||
|
imageUrl = 'http://www.applegeeks.com/comics/viewcomic.php?issue=%s'
|
||||||
|
imageSearch = compile(r'<img src="((?:/comics/)?issue\d+?\..+?)"')
|
||||||
|
prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE)
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class AppleGeeksLite(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.applegeeks.com/lite/'
|
||||||
|
imageUrl = 'http://applegeeks.com/lite/index.php?aglitecomic=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(index.php\?aglitecomic=.+?)".+?back')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Achewood(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.achewood.com/'
|
||||||
|
imageUrl = 'http://www.achewood.com/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://m.assetbar.com/achewood/autaux.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(index\.php\?date=\d{8})" class="dateNav" title="Previous comic"')
|
||||||
|
help = 'Index format: mmddyyyy'
|
||||||
|
namer = regexNamer(compile(r'date%3D(\d{8})'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AstronomyPOTD(_BasicScraper):
|
||||||
|
starter = bounceStarter(
|
||||||
|
'http://antwrp.gsfc.nasa.gov/apod/astropix.html',
|
||||||
|
compile(r'<a href="(ap\d{6}\.html)">></a>'))
|
||||||
|
imageUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
|
||||||
|
imageSearch = compile(r'<a href="(image/\d{4}/.+\..+?)">')
|
||||||
|
prevSearch = compile(r'<a href="(ap\d{6}\.html)"><</a>')
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
|
||||||
|
imageUrl.split('/')[-1].split('.')[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AfterStrife(_BasicScraper):
|
||||||
|
latestUrl = 'http://afterstrife.com/?p=262'
|
||||||
|
imageUrl = 'http://afterstrife.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://afterstrife.com/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)" class="navi navi-prev"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AnUnrehearsedRiot(_BasicScraper):
|
||||||
|
latestUrl = 'http://unrehearsedriot.com/'
|
||||||
|
imageUrl = 'http://unrehearsedriot.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://unrehearsedriot.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://unrehearsedriot.com/.+?)" class="navi navi-prev"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ALLCAPS(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.allcapscomix.com/'
|
||||||
|
imageUrl = 'http://www.allcapscomix.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.allcapscomix.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'href="(.+?)">(◄ Previous|<span class="prev">)')
|
||||||
|
help = 'Index format: yyyy/mm/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ASkeweredParadise(_BasicScraper):
|
||||||
|
latestUrl = 'http://aspcomics.net/'
|
||||||
|
imageUrl = 'http://aspcomics.net/archindex.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/previous_day.jpg"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AGirlAndHerFed(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://www.agirlandherfed.com/',
|
||||||
|
compile(r' href="(/comic/\?\d+)" class="navigationActive">Next</a>\]'))
|
||||||
|
imageUrl = 'http://www.agirlandherfed.com/comic/?%s'
|
||||||
|
imageSearch = compile(r'<img src="(/images/comics/.+?)"')
|
||||||
|
prevSearch = compile(r' href="(/comic/\?\d+)" class="navigationActive">Previous</a>\]')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('?')[-1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AetheriaEpics(_BasicScraper):
|
||||||
|
latestUrl = 'http://aetheria-epics.schala.net/'
|
||||||
|
imageUrl = 'http://aetheria-epics.schala.net/%s.html'
|
||||||
|
imageSearch = compile(r'<td><img src="(\d{5}.\w{3,4})"')
|
||||||
|
prevSearch = compile(r'<a href="(\d{5}.html)"><img src="prev.jpg"\/>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Adrift(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.adriftcomic.com/'
|
||||||
|
imageUrl = 'http://www.adriftcomic.com/page%s.html'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(Adrift_Web_Page\d+.jpg)"')
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="AdriftBackLink.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AirForceBlues(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.afblues.com/'
|
||||||
|
imageUrl = 'http://www.afblues.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(http://www.afblues.com/comics/.+?)\'>')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.afblues.com/.+?)">« Previous')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AlienShores(_BasicScraper):
|
||||||
|
latestUrl = 'http://alienshores.com/alienshores_band/'
|
||||||
|
imageUrl = 'http://alienshores.com/alienshores_band/?p=%s'
|
||||||
|
imageSearch = compile(r'><img src="(http://alienshores.com/alienshores_band/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://alienshores.com/.+?)" rel="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AllKindsOfBees(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.allkindsofbees.com/'
|
||||||
|
imageUrl = 'http://www.allkindsofbees.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.allkindsofbees.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.allkindsofbees.com/.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AllTheGrowingThings(_BasicScraper):
|
||||||
|
latestUrl = 'http://typodmary.com/growingthings/'
|
||||||
|
imageUrl = 'http://typodmary.com/growingthings/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://typodmary.com/growingthings/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://typodmary.com/growingthings/.+?)"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Amya(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.amyachronicles.com/'
|
||||||
|
imageUrl = 'http://www.amyachronicles.com/archives/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.amyachronicles.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.amyachronicles.com/archives/.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Angband(_BasicScraper):
|
||||||
|
latestUrl = 'http://angband.calamarain.net/index.php'
|
||||||
|
imageUrl = 'http://angband.calamarain.net/view.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/Strip.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(view.php\?date\=.+?)">Previous</a>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ArcticBlast(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.arcticblastcomic.com/'
|
||||||
|
imageUrl = 'http://www.arcticblastcomic.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.arcticblastcomic.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.arcticblastcomic.com/.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ActionAthena(_BasicScraper):
|
||||||
|
latestUrl = 'http://actionathena.com/'
|
||||||
|
imageUrl = 'http://actionathena.com/2%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(http://actionathena.com/comics/.+?)\'>')
|
||||||
|
prevSearch = compile(r'<a href="(http://actionathena.com/.+?)">« Previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AlsoBagels(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.alsobagels.com/'
|
||||||
|
imageUrl = 'http://alsobagels.com/index.php/comic/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://alsobagels.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://alsobagels.com/index.php/comic/.+?)">')
|
||||||
|
help = 'Index format: strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Annyseed(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm'
|
||||||
|
imageUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm'
|
||||||
|
imageSearch = compile(r'<td width="570" height="887" valign="top"><img src="(.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.colourofivy.com/.+?)"><img src="Last.gif"')
|
||||||
|
help = 'Index format: nnn'
|
317
dosagelib/plugins/b.py
Normal file
317
dosagelib/plugins/b.py
Normal file
|
@ -0,0 +1,317 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
class BadlyDrawnKitties(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.badlydrawnkitties.com/'
|
||||||
|
imageUrl = 'http://www.badlydrawnkitties.com/new/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(/new/.+?)">')
|
||||||
|
prevSearch = compile(r'"(/new/.+?)".+?previous.gif')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class Bardsworth(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bardsworth.com/'
|
||||||
|
imageUrl = 'http://www.bardsworth.com/archive.php?p=s%'
|
||||||
|
imageSearch = compile(r'(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'"(http.+?)".+?/prev')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class BetterDays(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.jaynaylor.com/betterdays/'
|
||||||
|
imageUrl = 'http://www.jaynaylor.com/betterdays/archives/%s'
|
||||||
|
imageSearch = compile(r'<img src=(/betterdays/comic/.+?)>')
|
||||||
|
prevSearch = compile(r'<a href="(.+)">« Previous')
|
||||||
|
help = 'Index format: yyyy/mm/<your guess>.html'
|
||||||
|
|
||||||
|
|
||||||
|
class BetterYouThanMe(_BasicScraper):
|
||||||
|
latestUrl = 'http://betteryouthanme.net/'
|
||||||
|
imageUrl = 'http://betteryouthanme.net/archive.php?date=%s.gif'
|
||||||
|
imageSearch = compile(r'"(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(archive.php\?date=.+?)">.+?previous')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class BiggerThanCheeses(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.biggercheese.com'
|
||||||
|
imageUrl = 'http://www.biggercheese.com/index.php?comic=%s'
|
||||||
|
imageSearch = compile(r'src="(comics/.+?)" alt')
|
||||||
|
prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BizarreUprising(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bizarreuprising.com/'
|
||||||
|
imageUrl = 'http://www.bizarreuprising.com/view/%s'
|
||||||
|
imageSearch = compile(r'<img src="(comic/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(view/\d+/[^"]+)"><img src="images/b_prev\.gif"')
|
||||||
|
help = 'Index format: n/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Blip(_BasicScraper):
|
||||||
|
latestUrl = 'http://blipcomic.com/'
|
||||||
|
imageUrl = 'http://blipcomic.com/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class BlueCrashKit(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bluecrashkit.com/cheese/'
|
||||||
|
imageUrl = 'http://www.bluecrashkit.com/cheese/node/%s'
|
||||||
|
imageSearch = compile(r'(/cheese/files/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(/cheese/node/.+?)".+?previous')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class BMovieComic(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bmoviecomic.com/'
|
||||||
|
imageUrl = 'http://www.bmoviecomic.com/?cid=%s'
|
||||||
|
imageSearch = compile(r'"(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(\?cid=.+?)".+?Prev')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
### With BratHalla there is no 'previous' link at comic 360
|
||||||
|
### You will need to use
|
||||||
|
### mainline -c BratHalla:360-backup-dad-unstable-plans/
|
||||||
|
### to get earlier comics
|
||||||
|
class BratHalla(_BasicScraper):
|
||||||
|
latestUrl = 'http://brat-halla.com/'
|
||||||
|
imageUrl = 'http://brat-halla.com/comic/%s'
|
||||||
|
imageSearch = compile(r"(/comics/.+?)' target='_blank")
|
||||||
|
prevSearch = compile(r'headernav2".+?"(http.+?)"')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class Brink(_BasicScraper):
|
||||||
|
latestUrl = 'http://paperfangs.com/brink/'
|
||||||
|
imageUrl = 'http://paperfangs.com/brink/?p=%s'
|
||||||
|
imageSearch = compile(r'/(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'previous.+?/brink/(.+?)".+?Previous')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BonoboConspiracy(_BasicScraper):
|
||||||
|
latestUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/'
|
||||||
|
imageUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/%s'
|
||||||
|
imageSearch = compile(r'<P.+?<IMG SRC="(.+?)" ALT')
|
||||||
|
prevSearch = compile(r'ansuz.+?/(\?i=.+?)".+?Previous')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class BoredAndEvil(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.boredandevil.com/'
|
||||||
|
imageUrl = 'http://www.boredandevil.com/archive.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BoyOnAStickAndSlither(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.boasas.com/'
|
||||||
|
imageUrl = 'http://www.boasas.com/?c=%s'
|
||||||
|
imageSearch = compile(r'"(boasas/\d+\..+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="images/left_20.png"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ButternutSquash(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.butternutsquash.net/'
|
||||||
|
imageUrl = 'http://www.butternutsquash.net/v3/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.butternutsquash.net/v3/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.butternutsquash.net/v3/.+?)">(<span class="prev">◄|⇐ Previous</a>)')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Bhag(_BasicScraper):
|
||||||
|
latestUrl = 'http://bhag.sackofjustice.com/'
|
||||||
|
imageUrl = 'http://bhag.sackofjustice.com/daily.php?date='
|
||||||
|
imageSearch = compile(r'/(comics/.+?)">')
|
||||||
|
prevSearch = compile(r'first.+?/(daily.php\?date=.+?)".+?previous')
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def blankLabel(name, baseUrl):
|
||||||
|
return type('BlankLabel_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='BlankLabel/' + name,
|
||||||
|
latestUrl=baseUrl,
|
||||||
|
imageUrl='d/%s.html',
|
||||||
|
imageSearch=compile(r'"(/comic[s|/].+?)"'),
|
||||||
|
prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),
|
||||||
|
help='Index format: yyyymmdd')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
checkerboardNightmare = blankLabel('CheckerboardNightmare', 'http://www.checkerboardnightmare.com/')
|
||||||
|
courtingDisaster = blankLabel('CourtingDisaster', 'http://www.courting-disaster.com/')
|
||||||
|
evilInc = blankLabel('EvilInc', 'http://www.evil-comic.com/')
|
||||||
|
greystoneInn = blankLabel('GreystoneInn', 'http://www.greystoneinn.net/')
|
||||||
|
itsWalky = blankLabel('ItsWalky', 'http://www.itswalky.com/')
|
||||||
|
# one strip name starts with %20
|
||||||
|
#krazyLarry = blankLabel('KrazyLarry', 'http://www.krazylarry.com/')
|
||||||
|
melonpool = blankLabel('Melonpool', 'http://www.melonpool.com/')
|
||||||
|
# strip names = index.php
|
||||||
|
#realLife = blankLabel('RealLife', 'http://www.reallifecomics.com/')
|
||||||
|
schlockMercenary = blankLabel('SchlockMercenary', 'http://www.schlockmercenary.com/')
|
||||||
|
# hosted on ComicsDotCom
|
||||||
|
#sheldon = blankLabel('Sheldon', 'http://www.sheldoncomics.com/')
|
||||||
|
shortpacked = blankLabel('Shortpacked', 'http://www.shortpacked.com/')
|
||||||
|
starslipCrisis = blankLabel('StarslipCrisis', 'http://www.starslipcrisis.com/')
|
||||||
|
uglyHill = blankLabel('UglyHill', 'http://www.uglyhill.com/')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BeePower(_BasicScraper):
|
||||||
|
latestUrl = 'http://comicswithoutviolence.com/d/20080713.html'
|
||||||
|
imageUrl = 'http://comicswithoutviolence.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Bellen(_BasicScraper):
|
||||||
|
latestUrl = 'http://boxbrown.com/'
|
||||||
|
imageUrl = 'http://boxbrown.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://boxbrown.com/comics/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BlankIt(_BasicScraper):
|
||||||
|
latestUrl = 'http://blankitcomics.com/'
|
||||||
|
imageUrl = 'http://blankitcomics.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://blankitcomics.com/bicomics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BobWhite(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bobwhitecomics.com/'
|
||||||
|
imageUrl = 'http://www.bobwhitecomics.com/%s.shtml'
|
||||||
|
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"><a href="(.+?)"[^>]+?><img[^>]+?src="/images/prev.jpg">')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BigFatWhale(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bigfatwhale.com/'
|
||||||
|
imageUrl = 'http://www.bigfatwhale.com/archives/bfw_%s.htm'
|
||||||
|
imageSearch = compile(r'<img src="(archives/bfw_.+?|bfw_.+?)"')
|
||||||
|
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BadassMuthas(_BasicScraper):
|
||||||
|
latestUrl = 'http://badassmuthas.com/pages/comic.php'
|
||||||
|
imageUrl = 'http://badassmuthas.com/pages/comic.php?%s'
|
||||||
|
imageSearch = compile(r'<img src="(/images/comicsissue.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="/images/comicsbuttonBack.gif" ')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Boozeathon4Billion(_BasicScraper):
|
||||||
|
latestUrl = 'http://boozeathon4billion.com/'
|
||||||
|
imageUrl = 'http://boozeathon4billion.com/comics/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://boozeathon4billion.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
|
||||||
|
help = 'Index format: (sometimes chapternumber/)-yyyy-mm-dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BrightlyWound(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.brightlywound.com/'
|
||||||
|
imageUrl = 'http://www.brightlywound.com/?comic=%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(comic/.+?)\'')
|
||||||
|
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BlueCrashKit(_BasicScraper):
|
||||||
|
latestUrl = 'http://robhamm.com/bluecrashkit'
|
||||||
|
imageUrl = 'http://robhamm.com/comics/blue-crash-kit/%s'
|
||||||
|
imageSearch = compile(r'src="(http://robhamm.com/sites/default/files/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BloodBound(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.bloodboundcomic.com/'
|
||||||
|
imageUrl = 'http://www.bloodboundcomic.com/d/%s.html'
|
||||||
|
imageSearch = compile(r' src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(/d/.+?)"><img[^>]+?src="/images/previous_day.jpg"')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BookOfBiff(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.thebookofbiff.com/'
|
||||||
|
imageUrl = 'http://www.thebookofbiff.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.thebookofbiff.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.thebookofbiff.com/.+?)">◄ Previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BillyTheDunce(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.duncepress.com/'
|
||||||
|
imageUrl = 'http://www.duncepress.com/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.duncepress.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BackwaterPlanet(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.backwaterplanet.com/current.htm'
|
||||||
|
imageUrl = 'http://www.backwaterplanet.com/archive/bwp%s.htm'
|
||||||
|
imageSearch = compile(r'<img src="(/images/comic/bwp.+?)">')
|
||||||
|
prevSearch = compile(r'<a href="(/archive/bwp.+?)"><img src="(images/Previous.jpg|/images/Previous.jpg)"')
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Baroquen(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.baroquencomics.com/'
|
||||||
|
imageUrl = 'http://www.baroquencomics.com/2010/01/04/the-man-from-omi/'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.baroquencomics.com/Comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.baroquencomics.com/.+?)" rel="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BetweenFailures(_BasicScraper):
|
||||||
|
latestUrl = 'http://betweenfailures.com/'
|
||||||
|
imageUrl = 'http://betweenfailures.com/%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(http://betweenfailures.com/comics/.+?)\'>')
|
||||||
|
prevSearch = compile(r'<a href="(http://betweenfailures.com/.+?)">« Previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BillyTheBeaker(_BasicScraper):
|
||||||
|
latestUrl = 'http://billy.defectivejunk.com/'
|
||||||
|
imageUrl = 'http://billy.defectivejunk.com/index.php?strip=%s'
|
||||||
|
imageSearch = compile(r'<img src="(bub\d+_\d+.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(index.php\?strip\=.+?)" title="Previous strip">')
|
||||||
|
help = 'Index format: nnn'
|
495
dosagelib/plugins/c.py
Normal file
495
dosagelib/plugins/c.py
Normal file
|
@ -0,0 +1,495 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import (
|
||||||
|
_BasicScraper, constStarter, bounceStarter, indirectStarter)
|
||||||
|
from ..util import getQueryParams
|
||||||
|
|
||||||
|
|
||||||
|
class CalvinAndHobbes(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.gocomics.com/calvinandhobbes/'
|
||||||
|
imageUrl = 'http://www.gocomics.com/calvinandhobbes/%s'
|
||||||
|
imageSearch = compile(r'src="(http://picayune\.uclick\.com/comics/ch/[^"]+\.gif)"')
|
||||||
|
prevSearch = compile(r'href="(.*?)"\s+onclick="[^"]*">Previous day</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CandyCartoon(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.candycartoon.com/'
|
||||||
|
imageUrl = 'http://www.candycartoon.com/archives/%s.html'
|
||||||
|
imageSearch = compile(r'<img alt="[^"]*" src="(http://www\.candycartoon\.com/archives/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www\.candycartoon\.com/archives/\d{6}\.html)">prev')
|
||||||
|
help = 'Index format: nnnnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CaptainSNES(_BasicScraper):
|
||||||
|
latestUrl = 'http://captainsnes.com/'
|
||||||
|
imageUrl = 'http://captainsnes.com/?date=%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(http://www.captainsnes.com/comics/.+?)\'')
|
||||||
|
prevSearch = compile(r'<a href="http://www.captainsnes.com/(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CaribbeanBlue(_BasicScraper):
|
||||||
|
latestUrl = 'http://cblue.katbox.net/'
|
||||||
|
imageUrl = 'http://cblue.katbox.net/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="images/navigation_back.png"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Catena(_BasicScraper):
|
||||||
|
latestUrl = 'http://catenamanor.com/'
|
||||||
|
imageUrl = 'http://catenamanor.com/index.php?comic=%s'
|
||||||
|
imageSearch = compile(r'(comics/catena/.+?)"')
|
||||||
|
prevSearch = compile(r'First</a>.+?"(.+?)".+?Previous')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class Catharsis(_BasicScraper):
|
||||||
|
latestUrl = 'http://catharsiscomic.com/'
|
||||||
|
imageUrl = 'http://catharsiscomic.com/archive.php?strip=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)".+"Previous')
|
||||||
|
help = 'Index format: yymmdd-<your guess>.html'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChasingTheSunset(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.fantasycomic.com/'
|
||||||
|
imageUrl = 'http://www.fantasycomic.com/index.php?p=c%s'
|
||||||
|
imageSearch = compile(r'(/cmsimg/.+?)".+?comic-img')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)" title="" ><img src="(images/eye-prev.png|images/cn-prev.png)"')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Chisuji(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.chisuji.com/'
|
||||||
|
imageUrl = 'http://www.chisuji.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.chisuji.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.chisuji.com/.+?)">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChugworthAcademy(_BasicScraper):
|
||||||
|
latestUrl = 'http://chugworth.com/'
|
||||||
|
imageUrl = 'http://chugworth.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(.+?)" alt="Comic')
|
||||||
|
prevSearch = compile(r'<a href="(http://chugworth.com/\?p=\d{1,4})"[^>]+?title="Previous">')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChugworthAcademyArchive(_BasicScraper):
|
||||||
|
latestUrl = 'http://chugworth.com/archive/?strip_id=422'
|
||||||
|
imageUrl = 'http://chugworth.com/archive/?strip_id=%s'
|
||||||
|
imageSearch = compile(r'<img src=(comics/\d+.+?.\w{1,4})')
|
||||||
|
prevSearch = compile(r'<a href=\'(.+?)\'><img src=\'images/previous.gif')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CigarroAndCerveja(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.cigarro.ca/'
|
||||||
|
imageUrl = 'http://www.cigarro.ca/?p=%s'
|
||||||
|
imageSearch = compile(r"(/comics/.+?)'")
|
||||||
|
prevSearch = compile(r'(/\?p=.+?)">&laq')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CombustibleOrange(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.combustibleorange.com/'
|
||||||
|
imageUrl = 'http://www.combustibleorange.com/index.php?current=%s'
|
||||||
|
imageSearch = compile(r'<img src="(/images/comics/\d+?\.gif)"')
|
||||||
|
prevSearch = compile(r'><a href="(.+?)"><img src="images/button-last.gif" border="0">')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Comedity(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.comedity.com/'
|
||||||
|
imageUrl = 'http://www.comedity.com/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(Comedity_files/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/?index.php\?strip_id=\d+?)"> *<img alt=\"Prior Strip')
|
||||||
|
help = 'Index format: n (no padding)'
|
||||||
|
|
||||||
|
|
||||||
|
class Comet7(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.comet7.com/'
|
||||||
|
imageUrl = 'http://www.comet7.com/archive_page.php?id=%s'
|
||||||
|
imageSearch = compile(r'"(.*?/strips/.*?)"')
|
||||||
|
prevSearch = compile(r'"(.*?)".*?previous_stripf')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Commissioned(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.commissionedcomic.com/'
|
||||||
|
imageUrl = 'http://www.commissionedcomic.com/index.php?strip=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.commissionedcomic.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">‹</a>')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CoolCatStudio(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.coolcatstudio.com/'
|
||||||
|
imageUrl = 'http://www.coolcatstudio.com/index.php?p=%s'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r"href='(.+?)'>PREV")
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CourtingDisaster(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.courting-disaster.com/'
|
||||||
|
imageUrl = 'http://www.courting-disaster.com/archive/%s.html'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?)"><img src="/images/previous.gif"[^>]+?>')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||||
|
latestUrl = 'http://crap.jinwicked.com/'
|
||||||
|
imageUrl = 'http://crap.jinwicked.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://crap.jinwicked.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="http://comics.jinwicked.com/images/navigation_back.png"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CtrlAltDel(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.cad-comic.com/cad/'
|
||||||
|
imageSearch = compile(r'<img src="(/comics/\w+/\d{8}\..+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/\w+/\d{8})" class="nav-back')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.latestUrl + '%s'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CtrlAltDelSillies(CtrlAltDel):
|
||||||
|
name = 'CtrlAltDel/Sillies'
|
||||||
|
latestUrl = 'http://www.cad-comic.com/sillies/'
|
||||||
|
|
||||||
|
class Curvy(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.c.urvy.org/'
|
||||||
|
imageUrl = 'http://www.c.urvy.org/?date=%s'
|
||||||
|
imageSearch = compile(r'(/c/.+?)"')
|
||||||
|
prevSearch = compile(r'(/\?date=.+?)"><< Previous page')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
def cloneManga(name, shortName, lastStrip=None):
|
||||||
|
baseUrl = 'http://manga.clone-army.org/%s.php' % (shortName,)
|
||||||
|
imageUrl = baseUrl + '?page=%s'
|
||||||
|
if lastStrip is None:
|
||||||
|
starter = bounceStarter(baseUrl, compile(r'<a href="([^"]+)"><img src="next\.gif"'))
|
||||||
|
else:
|
||||||
|
starter = constStarter(imageUrl % (lastStrip,))
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
return '%03d' % (int(getQueryParams(pageUrl)['page'][0]),)
|
||||||
|
|
||||||
|
return type('CloneManga_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='CloneManga/' + name,
|
||||||
|
starter=starter,
|
||||||
|
imageUrl=imageUrl,
|
||||||
|
imageSearch=compile(r'<img src="(http://manga\.clone-army\.org/[^"]+)"'),
|
||||||
|
prevSearch=compile(r'<a href="([^"]+)"><img src="previous\.gif"'),
|
||||||
|
help='Index format: n',
|
||||||
|
namer=namer)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
anm = cloneManga('AprilAndMay', 'anm')
|
||||||
|
kanami = cloneManga('Kanami', 'kanami')
|
||||||
|
momoka = cloneManga('MomokaCorner', 'momoka')
|
||||||
|
nana = cloneManga('NanasEverydayLife', 'nana', '78')
|
||||||
|
pxi = cloneManga('PaperEleven', 'pxi', '311')
|
||||||
|
t42r = cloneManga('Tomoyo42sRoom', 't42r')
|
||||||
|
penny = cloneManga('PennyTribute', 'penny')
|
||||||
|
|
||||||
|
|
||||||
|
class CatAndGirl(_BasicScraper):
|
||||||
|
latestUrl = 'http://catandgirl.com/'
|
||||||
|
imageUrl = 'http://catandgirl.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://catandgirl.com/archive/.+?)"')
|
||||||
|
prevSearch = compile(r'\s+<a href="(.+?)">◄ Previous</a>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
def comicsDotCom(name, section):
|
||||||
|
baseUrl = 'http://www.comics.com/%s/%s/archive/' % (section, name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
htmlname = pageUrl.split('/')[-1]
|
||||||
|
filename = htmlname.split('.')[0]
|
||||||
|
return filename
|
||||||
|
|
||||||
|
return type('ComicsDotCom_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='ComicsDotCom/' + name,
|
||||||
|
starter=indirectStarter(baseUrl, compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_right.gif|(?:<font[^>]*?>)?Next Day)')),
|
||||||
|
imageUrl=baseUrl + 'name-%s.html',
|
||||||
|
imageSearch=compile(r'SRC="(/[\w/]+?/archive/images/\w+?\d+\..+?)"'),
|
||||||
|
prevSearch=compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_left.gif|(?:<font[^>]*?>)?Previous Day)'),
|
||||||
|
help='Index format: yyyymmdd',
|
||||||
|
namer=namer)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
|
||||||
|
agnes = comicsDotCom('agnes', 'creators')
|
||||||
|
alleyoop = comicsDotCom('alleyoop', 'comics')
|
||||||
|
andycapp = comicsDotCom('andycapp', 'creators')
|
||||||
|
arlonjanis = comicsDotCom('arlonjanis', 'comics')
|
||||||
|
ballardst = comicsDotCom('ballardst', 'creators')
|
||||||
|
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
|
||||||
|
bc = comicsDotCom('bc', 'creators')
|
||||||
|
ben = comicsDotCom('ben', 'comics')
|
||||||
|
betty = comicsDotCom('betty', 'comics')
|
||||||
|
bignate = comicsDotCom('bignate', 'comics')
|
||||||
|
bonanas = comicsDotCom('bonanas', 'wash')
|
||||||
|
bornloser = comicsDotCom('bornloser', 'comics')
|
||||||
|
buckets = comicsDotCom('buckets', 'comics')
|
||||||
|
candorville = comicsDotCom('candorville', 'wash')
|
||||||
|
cheapthrills = comicsDotCom('cheapthrills', 'wash')
|
||||||
|
chickweed = comicsDotCom('chickweed', 'comics')
|
||||||
|
committed = comicsDotCom('committed', 'comics')
|
||||||
|
dilbert = comicsDotCom('dilbert', 'comics')
|
||||||
|
drabble = comicsDotCom('drabble', 'comics')
|
||||||
|
fatcats = comicsDotCom('fatcats', 'comics')
|
||||||
|
ferdnand = comicsDotCom('ferdnand', 'comics')
|
||||||
|
flightdeck = comicsDotCom('flightdeck', 'creators')
|
||||||
|
floandfriends = comicsDotCom('floandfriends', 'creators')
|
||||||
|
franknernest = comicsDotCom('franknernest', 'comics')
|
||||||
|
frazz = comicsDotCom('frazz', 'comics')
|
||||||
|
geech = comicsDotCom('geech', 'comics')
|
||||||
|
genepool = comicsDotCom('genepool', 'wash')
|
||||||
|
getfuzzy = comicsDotCom('getfuzzy', 'comics')
|
||||||
|
gofish = comicsDotCom('gofish', 'comics')
|
||||||
|
graffiti = comicsDotCom('graffiti', 'comics')
|
||||||
|
grandave = comicsDotCom('grandave', 'comics')
|
||||||
|
grizzwells = comicsDotCom('grizzwells', 'comics')
|
||||||
|
heathcliff = comicsDotCom('heathcliff', 'creators')
|
||||||
|
hedge = comicsDotCom('hedge', 'comics')
|
||||||
|
herbnjamaal = comicsDotCom('herbnjamaal', 'creators')
|
||||||
|
herman = comicsDotCom('herman', 'comics')
|
||||||
|
humblestumble = comicsDotCom('humblestumble', 'comics')
|
||||||
|
janesworld = comicsDotCom('janesworld', 'comics')
|
||||||
|
jumpstart = comicsDotCom('jumpstart', 'comics')
|
||||||
|
kitncarlyle = comicsDotCom('kitncarlyle', 'comics')
|
||||||
|
liberty = comicsDotCom('liberty', 'creators')
|
||||||
|
lilabner = comicsDotCom('lilabner', 'comics')
|
||||||
|
luann = comicsDotCom('luann', 'comics')
|
||||||
|
marmaduke = comicsDotCom('marmaduke', 'comics')
|
||||||
|
meg = comicsDotCom('meg', 'comics')
|
||||||
|
moderatelyconfused = comicsDotCom('moderatelyconfused', 'comics')
|
||||||
|
momma = comicsDotCom('momma', 'creators')
|
||||||
|
monty = comicsDotCom('monty', 'comics')
|
||||||
|
motley = comicsDotCom('motley', 'comics')
|
||||||
|
nancy = comicsDotCom('nancy', 'comics')
|
||||||
|
naturalselection = comicsDotCom('naturalselection', 'creators')
|
||||||
|
offthemark = comicsDotCom('offthemark', 'comics')
|
||||||
|
onebighappy = comicsDotCom('onebighappy', 'creators')
|
||||||
|
othercoast = comicsDotCom('othercoast', 'creators')
|
||||||
|
pcnpixel = comicsDotCom('pcnpixel', 'wash')
|
||||||
|
peanuts = comicsDotCom('peanuts', 'comics')
|
||||||
|
pearls = comicsDotCom('pearls', 'comics')
|
||||||
|
pibgorn = comicsDotCom('pibgorn', 'comics')
|
||||||
|
pickles = comicsDotCom('pickles', 'wash')
|
||||||
|
raisingduncan = comicsDotCom('raisingduncan', 'comics')
|
||||||
|
reality = comicsDotCom('reality', 'comics')
|
||||||
|
redandrover = comicsDotCom('redandrover', 'wash')
|
||||||
|
ripleys = comicsDotCom('ripleys', 'comics')
|
||||||
|
roseisrose = comicsDotCom('roseisrose', 'comics')
|
||||||
|
rubes = comicsDotCom('rubes', 'creators')
|
||||||
|
rudypark = comicsDotCom('rudypark', 'comics')
|
||||||
|
shirleynson = comicsDotCom('shirleynson', 'comics')
|
||||||
|
soup2nutz = comicsDotCom('soup2nutz', 'comics')
|
||||||
|
speedbump = comicsDotCom('speedbump', 'creators')
|
||||||
|
spotthefrog = comicsDotCom('spotthefrog', 'comics')
|
||||||
|
strangebrew = comicsDotCom('strangebrew', 'creators')
|
||||||
|
sunshineclub = comicsDotCom('sunshineclub', 'comics')
|
||||||
|
tarzan = comicsDotCom('tarzan', 'comics')
|
||||||
|
thatslife = comicsDotCom('thatslife', 'wash')
|
||||||
|
wizardofid = comicsDotCom('wizardofid', 'creators')
|
||||||
|
workingdaze = comicsDotCom('workingdaze', 'comics')
|
||||||
|
workingitout = comicsDotCom('workingitout', 'creators')
|
||||||
|
|
||||||
|
|
||||||
|
def creators(name, shortname):
|
||||||
|
return type('Creators_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='Creators/' + name,
|
||||||
|
latestUrl='http://www.creators.com/comics_show.cfm?ComicName=%s' % (shortname,),
|
||||||
|
imageUrl=None,
|
||||||
|
imageSearch=compile(r'<img alt="[^"]+" src="(\d{4}/.+?/.+?\..+?)">'),
|
||||||
|
prevSearch=compile(r'<a href="(comics_show\.cfm\?next=\d+&ComicName=.+?)" Title="Previous Comic"'),
|
||||||
|
help='Indexing unsupported')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
arc = creators('Archie', 'arc')
|
||||||
|
shg = creators('AskShagg', 'shg')
|
||||||
|
hev = creators('ForHeavensSake', 'hev')
|
||||||
|
rug = creators('Rugrats', 'rug')
|
||||||
|
sou = creators('StateOfTheUnion', 'sou')
|
||||||
|
din = creators('TheDinetteSet', 'din')
|
||||||
|
lil = creators('TheMeaningOfLila', 'lil')
|
||||||
|
wee = creators('WeePals', 'wee')
|
||||||
|
zhi = creators('ZackHill', 'zhi')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CyanideAndHappiness(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.explosm.net/comics'
|
||||||
|
imageUrl = 'http://www.explosm.net/comics/%s'
|
||||||
|
imageSearch = compile(r'<img alt="Cyanide and Happiness, a daily webcomic" src="(http:\/\/www\.explosm\.net/db/files/Comics/\w+/\S+\.\w+)"')
|
||||||
|
prevSearch = compile(r'<a href="(/comics/\d+/?)">< Previous</a>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CrimsonDark(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.davidcsimon.com/crimsondark/'
|
||||||
|
imageUrl = 'http://www.davidcsimon.com/crimsondark/index.php?view=comic&strip_id=%s'
|
||||||
|
imageSearch = compile(r'src="(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CrimesOfCybeleCity(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pulledpunches.com/crimes/'
|
||||||
|
imageUrl = 'http://www.beaglespace.com/pulledpunches/crimes/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www\.beaglespace\.com/pulledpunches/crimes/comics/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www\.beaglespace\.com/pulledpunches/crimes/\?p=\d+)"><img src="back1\.gif"')
|
||||||
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CatsAndCameras(_BasicScraper):
|
||||||
|
latestUrl = 'http://catsncameras.com/cnc/'
|
||||||
|
imageUrl = 'hhttp://catsncameras.com/cnc/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CowboyJedi(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.cowboyjedi.com/'
|
||||||
|
imageUrl = 'http://www.cowboyjedi.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.cowboyjedi.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.cowboyjedi.com/.+?)" class="navi navi-prev"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CasuallyKayla(_BasicScraper):
|
||||||
|
latestUrl = 'http://casuallykayla.com/'
|
||||||
|
imageUrl = 'http://casuallykayla.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://casuallykayla.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Collar6(_BasicScraper):
|
||||||
|
latestUrl = 'http://collar6.com/'
|
||||||
|
imageUrl = 'http://collar6.com/%s'
|
||||||
|
imageSearch = compile(r'src="(http://collar6.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r' href="(http://collar6.com/\d+/\S+)">◄ Previous')
|
||||||
|
help = 'Index format: yyyy/namednumber'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Chester5000XYV(_BasicScraper):
|
||||||
|
latestUrl = 'http://jessfink.com/Chester5000XYV/'
|
||||||
|
imageUrl = 'http://jessfink.com/Chester5000XYV/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://jessfink.com/Chester5000XYV/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CalamitiesOfNature(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.calamitiesofnature.com/'
|
||||||
|
imageUrl = 'http://www.calamitiesofnature.com/archive/?c=%s'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(archive/\d+.+?|http://www.calamitiesofnature.com/archive/\d+.+?)"')
|
||||||
|
prevSearch = compile(r'<a id="previous" href="(http://www.calamitiesofnature.com/archive/\?c\=\d+)">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Champ2010(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.jedcollins.com/champ2010/'
|
||||||
|
imageUrl = 'http://jedcollins.com/champ2010/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://jedcollins.com/champ2010/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://jedcollins.com/champ2010/.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Chucklebrain(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.chucklebrain.com/main.php'
|
||||||
|
imageUrl = 'http://www.chucklebrain.com/main.php?img=%s'
|
||||||
|
imageSearch = compile(r'<img src="(/images/strip.+?)"')
|
||||||
|
prevSearch = compile(r'<a href=\'(/main.php\?img\=\d+)\'><img src=\'/images/previous.jpg\'')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CompanyY(_BasicScraper):
|
||||||
|
latestUrl = 'http://company-y.com/'
|
||||||
|
imageUrl = 'http://company-y.com/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://company-y.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://company-y.com/.+?)"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CorydonCafe(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://corydoncafe.com/', compile(r' href="(\./comic-\d+.html)">Next></a>'))
|
||||||
|
imageUrl = 'http://corydoncafe.com/comic-%s.html'
|
||||||
|
imageSearch = compile(r'<img src=\'(\./comics/.+?)\' ')
|
||||||
|
prevSearch = compile(r' href="(\./comic-\d+.html)"><Previous</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CraftedFables(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.craftedfables.com/'
|
||||||
|
imageUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.caf-fiends.net/craftedfables/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.caf-fiends.net/craftedfables/.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Currhue(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.currhue.com/'
|
||||||
|
imageUrl = 'http://www.currhue.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.currhue.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.currhue.com/.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
182
dosagelib/plugins/d.py
Normal file
182
dosagelib/plugins/d.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
from re import compile, IGNORECASE, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
|
||||||
|
from ..util import getQueryParams
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DMFA(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.missmab.com/'
|
||||||
|
imageUrl = 'http://missmab.com/Comics/Vol_%s.php'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(Comics/.+?|Vol.+?)">', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="(Images/comicprev.gif|../Images/comicprev.gif)" ', MULTILINE | IGNORECASE)
|
||||||
|
help = 'Index format: nnn (normally, some specials)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DandyAndCompany(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dandyandcompany.com/'
|
||||||
|
imageUrl = 'http://www.dandyandcompany.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(.*?/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.*)" class="prev"')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
class DarkWings(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.flowerlarkstudios.com/dark-wings/'
|
||||||
|
imageUrl = 'http://www.flowerlarkstudios.com/dark-wings/archive.php?day=%s'
|
||||||
|
imageSearch = compile(r'(comics/.+?)" W')
|
||||||
|
prevSearch = compile(r"first_day.+?/(archive.+?)'.+?previous_day")
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class DeathToTheExtremist(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dtecomic.com/'
|
||||||
|
imageUrl = 'http://www.dtecomic.com/?n=%s'
|
||||||
|
imageSearch = compile(r'"(comics/.*?)"')
|
||||||
|
prevSearch = compile(r'</a> <a href="(\?n=.*?)"><.+?/aprev.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class DeepFried(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.whatisdeepfried.com/'
|
||||||
|
imageUrl = 'http://www.whatisdeepfried.com/%s'
|
||||||
|
imageSearch = compile(r'(http://www.whatisdeepfried.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(http://www.whatisdeepfried.com/.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DoemainOfOurOwn(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.doemain.com/'
|
||||||
|
imageUrl = 'http://www.doemain.com/index.cgi/%s'
|
||||||
|
imageSearch = compile(r"<img border='0' width='\d+' height='\d+' src='(/strips/\d{4}/\d{6}-[^\']+)'")
|
||||||
|
prevSearch = compile(r'<a href="(/index\.cgi/\d{4}-\d{2}-\d{2})"><img width="\d+" height="\d+" border="\d+" alt="Previous Strip"')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DrFun(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
|
||||||
|
imageUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
|
||||||
|
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d{6}/df.+?)">')
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
|
||||||
|
help = 'Index format: nnnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Dracula(_BasicScraper):
|
||||||
|
latestUrl = 'http://draculacomic.net/'
|
||||||
|
imageUrl = 'http://draculacomic.net/comic.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r' <a class="archivelink" href="(.+?)">« Prev</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DragonTails(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dragon-tails.com/'
|
||||||
|
imageUrl = 'http://www.dragon-tails.com/archive.php?date=%s'
|
||||||
|
imageSearch = compile(r'"(newcomic/.+?)"')
|
||||||
|
prevSearch = compile(r'"(archive.+?)">.+n_2')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
class DreamKeepersPrelude(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dreamkeeperscomic.com/Prelude.php'
|
||||||
|
imageUrl = 'http://www.dreamkeeperscomic.com/Prelude.php?pg=%s'
|
||||||
|
imageSearch = compile(r'(images/PreludeNew/.+?)"')
|
||||||
|
prevSearch = compile(r'(Prelude.php\?pg=.+?)"')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class Drowtales(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.drowtales.com/mainarchive.php'
|
||||||
|
imageUrl = 'http://www.drowtales.com/mainarchive.php?location=%s'
|
||||||
|
imageSearch = compile(r'src=".(/tmpmanga/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="mainarchive.php(\?location=\d+)"><img src="[^"]*previousday\.gif"')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class DungeonCrawlInc(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dungeoncrawlinc.com/latest.html'
|
||||||
|
imageUrl = 'http://www.dungeoncrawlinc.com/comic%s'
|
||||||
|
imageSearch = compile(r'src="(.+?/DCI_.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">.+?back')
|
||||||
|
help = 'Index format: nnn.html'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DieselSweeties(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dieselsweeties.com/'
|
||||||
|
imageUrl = 'http://www.dieselsweeties.com/archive/%s'
|
||||||
|
imageSearch = compile(r'src="(/hstrips/.+?)"')
|
||||||
|
prevSearch = compile(r'href="(/archive/.+?)">(<img src="http://www.dieselsweeties.com/ximages/blackbackarrow160.png|previous webcomic)')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
index = int(imageUrl.split('/')[-1].split('.')[0])
|
||||||
|
return 'sw%02d' % (index,)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DominicDeegan(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dominic-deegan.com/'
|
||||||
|
imageUrl = 'http://www.dominic-deegan.com/view.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(.+?save-as=.+?)" alt')
|
||||||
|
prevSearch = compile(r'"(view.php\?date=.+?)".+?prev21')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DorkTower(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.dorktower.com/'
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="(http://www\.dorktower\.com/images/comics/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(/previous\.php\?[^"]+)"')
|
||||||
|
help = 'Index format: None'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DresdenCodak(_BasicScraper):
|
||||||
|
latestUrl = 'http://dresdencodak.com/'
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="http://dresdencodak.com(/comics/.*?\.jpg)"')
|
||||||
|
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
|
||||||
|
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DonkBirds(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.donkbirds.com/'
|
||||||
|
imageUrl = 'http://www.donkbirds.com/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DrawnByDrunks(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://www.drawnbydrunks.co.uk/', compile(r'<div class="nav-last"><a href="(.+?)">'))
|
||||||
|
imageUrl = 'http://www.drawnbydrunks.co.uk/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.drawnbydrunks.co.uk/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('=')[-1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DeathCord(_BasicScraper):
|
||||||
|
latestUrl = 'http://deathchord.com/index.php'
|
||||||
|
imageUrl = 'http://deathchord.com/__.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://deathchord.com/kill/\d+.+?)"')
|
||||||
|
prevSearch = compile(r'</a>?.+?<a href="(http://deathchord.com/.+?)"><img[^>]+?alt="Previous" />')
|
||||||
|
help = 'Index format: nnn'
|
2276
dosagelib/plugins/drunkduck.py
Normal file
2276
dosagelib/plugins/drunkduck.py
Normal file
File diff suppressed because it is too large
Load diff
182
dosagelib/plugins/e.py
Normal file
182
dosagelib/plugins/e.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class EerieCuties(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.eeriecuties.com/'
|
||||||
|
imageUrl = 'http://www.eeriecuties.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(/d/.+?.html).+?/previous_day.gif')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class EdgeTheDevilhunter(_BasicScraper):
|
||||||
|
name = 'KeenSpot/EdgeTheDevilhunter'
|
||||||
|
latestUrl = 'http://www.edgethedevilhunter.com/'
|
||||||
|
imageUrl = 'http://www.edgethedevilhunter.com/comics/%s'
|
||||||
|
imageSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)" alt')
|
||||||
|
prevSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)"><span class="prev')
|
||||||
|
help = 'Index format: mmddyyyy or name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Eriadan(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.shockdom.com/eriadan/?p=%s'
|
||||||
|
imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
|
||||||
|
prevSearch = compile(r"<link rel='prev' title='.+?' href='http://www\.shockdom\.com/eriadan/(\?p=.+?)'")
|
||||||
|
starter = indirectStarter('http://www.shockdom.com/eriadan/', compile(r'<ul class="latest2">[^<]+?<li class="list-title"><a href="(http://www\.shockdom.com/eriadan/\?p=.+?)"'))
|
||||||
|
help = 'Index format: nnn (unpadded)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return '%d' % (int(compile(r'p=(\d+)').search(pageUrl).group(1)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ElGoonishShive(_BasicScraper):
|
||||||
|
name = 'KeenSpot/ElGoonishShive'
|
||||||
|
latestUrl = 'http://www.egscomics.com/'
|
||||||
|
imageUrl = 'http://www.egscomics.com/?date=%s'
|
||||||
|
imageSearch = compile(r"'(comics/.+?)'")
|
||||||
|
prevSearch = compile(r"<a href='(/\?date=.+?)'.+?arrow_prev.gif")
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ElGoonishShiveNP(_BasicScraper):
|
||||||
|
name = 'KeenSpot/ElGoonishShiveNP'
|
||||||
|
latestUrl = 'http://www.egscomics.com/egsnp/'
|
||||||
|
imageUrl = 'http://www.egscomics.com/egsnp/?date=%s'
|
||||||
|
imageSearch = compile(r'<div class=\'comic2\'><img src=\'(comics/\d{4}/\d{2}.+?)\'')
|
||||||
|
prevSearch = compile(r'<a href=\'(.+?)\'[^>]+?onmouseover=\'\$\("navimg(6|2)"\)')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ElsieHooper(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.elsiehooper.com/todaysserial.htm'
|
||||||
|
imageUrl = 'http://www.elsiehooper.com/comics/comic%s.htm'
|
||||||
|
imageSearch = compile(r'<img src="(/comics_/.+?)">')
|
||||||
|
prevSearch = compile(r'<A href="(.+?)"><IMG (height=27 src="/images/previous.gif"|src="/images/previous.gif")', IGNORECASE)
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EmergencyExit(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.eecomics.net/'
|
||||||
|
imageUrl = ''
|
||||||
|
imageSearch = compile(r'"(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'START.+?"(.+?)"')
|
||||||
|
help = 'God help us now!'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ErrantStory(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.errantstory.com/'
|
||||||
|
imageUrl = 'http://www.errantstory.com/archive.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
|
||||||
|
prevSearch = compile(r'><a href="(.+?)"><Previous</a>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EternalVenture(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pulledpunches.com/venture/'
|
||||||
|
imageUrl = 'http://www.beaglespace.com/pulledpunches/venture/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.beaglespace.com/pulledpunches/venture/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'id="prev"><a href="(http://www.beaglespace.com/pulledpunches/venture/.+?)" ')
|
||||||
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Evercrest(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.evercrest.com/archives/20030308'
|
||||||
|
imageUrl = 'http://www.evercrest.com/archives/%s'
|
||||||
|
imageSearch = compile(r'<img.+?src="([^"]*/(images/oldstrips|archives/i)/[^"]*)"')
|
||||||
|
prevSearch = compile(r'<a.+?href="(http://www.evercrest.com/archives/\d+)">< Previous')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class EverybodyLovesEricRaymond(_BasicScraper):
|
||||||
|
latestUrl = 'http://geekz.co.uk/lovesraymond/'
|
||||||
|
imageUrl = 'http://geekz.co.uk/lovesraymond/archive/%s'
|
||||||
|
imageSearch = compile(r'<img src="((?:http://geekz.co.uk)?/lovesraymond/wp-content(?:/images)/ep\d+\w?\.jpg)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'« <a href="(http://geekz.co.uk/lovesraymond/archive/[^/"]*)">')
|
||||||
|
help = 'Index format: name-of-old-comic'
|
||||||
|
|
||||||
|
|
||||||
|
class EvilDiva(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.evildivacomics.com/'
|
||||||
|
imageUrl = 'http://www.evildivacomics.com/%s.html'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'http.+?com/(.+?)".+?"prev')
|
||||||
|
help = 'Index format: cpn (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Exiern(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.exiern.com/'
|
||||||
|
imageUrl = 'http://www.exiern.com/comic/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.exiern.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.exiern.com/.+?)" class="navi navi-prev"')
|
||||||
|
help = 'Index format: ChapterName-StripName'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExiernDarkReflections(_BasicScraper):
|
||||||
|
latestUrl = 'http://darkreflections.exiern.com/'
|
||||||
|
imageUrl = 'http://darkreflections.exiern.com/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'"(istrip.+?)"')
|
||||||
|
prevSearch = compile(r'First.+?(/index.+?)".+?prev')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExtraLife(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.myextralife.com/'
|
||||||
|
imageUrl = 'http://www.myextralife.com/comic/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.myextralife.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.myextralife.com/comic/.+?)"')
|
||||||
|
help = 'Index format: mmddyyyy'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EyeOfRamalach(_BasicScraper):
|
||||||
|
latestUrl = 'http://theeye.katbox.net/'
|
||||||
|
imageUrl = 'http://theeye.katbox.net/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class EarthsongSaga(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.earthsongsaga.com/'
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+\.html)"[^>]*><img src="(?:(?:\.\.)?/)?images/testing/prev')
|
||||||
|
starter = indirectStarter('http://www.earthsongsaga.com/',
|
||||||
|
compile(r'a href="(.+?)".+?current-page.jpg'))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl)
|
||||||
|
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExploitationNow(_BasicScraper):
|
||||||
|
latestUrl = 'http://exploitationnow.com/'
|
||||||
|
imageUrl = 'http://exploitationnow.com/comic.php?date=%s'
|
||||||
|
imageSearch = compile(r'src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(.+?)" title="\[Back\]">')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Ellerbisms(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ellerbisms.com/'
|
||||||
|
imageUrl = 'http://www.ellerbisms.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.ellerbisms.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.ellerbisms.com/.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: nnn'
|
145
dosagelib/plugins/f.py
Normal file
145
dosagelib/plugins/f.py
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
from re import compile, IGNORECASE, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class FalconTwin(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.falcontwin.com/'
|
||||||
|
imageUrl = 'http://www.falcontwin.com/index.html?strip=%s'
|
||||||
|
imageSearch = compile(r'"(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'"prev"><a href="(index.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class FauxPas(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi'
|
||||||
|
imageUrl = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi?%s'
|
||||||
|
imageSearch = compile(r'<img .*src="(.*fp/fp.*(png|jpg|gif))"')
|
||||||
|
prevSearch = compile(r'<a href="(pl-fp1\.cgi\?\d+)">Previous Strip')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class FeyWinds(_BasicScraper):
|
||||||
|
imageUrl = 'http://kitsune.rydia.net/comic/page.php?id=%s'
|
||||||
|
imageSearch = compile(r"(../comic/pages//.+?)'")
|
||||||
|
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
starter = indirectStarter('http://kitsune.rydia.net/index.html',
|
||||||
|
compile(r'(comic/page.php\?id.+?)"'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FightCastOrEvade(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.fightcastorevade.net/'
|
||||||
|
imageUrl = 'http://www.fightcastorevade.net/d/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.fightcastorevade.net/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(.+?/d/.+?)".+?previous')
|
||||||
|
help = 'Index format: yyyymmdd.html'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FilibusterCartoons(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.filibustercartoons.com/'
|
||||||
|
imageUrl = 'http://www.filibustercartoons.com/index.php/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.filibustercartoons.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src=\'(.+?/arrow-left.gif)\'')
|
||||||
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FlakyPastry(_BasicScraper):
|
||||||
|
latestUrl = 'http://flakypastry.runningwithpencils.com/index.php'
|
||||||
|
imageUrl = 'http://flakypastry.runningwithpencils.com/comic.php\?strip_id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
class Flipside(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.flipsidecomics.com/comic.php'
|
||||||
|
imageUrl = 'http://www.flipsidecomics.com/comic.php?i=%s'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
|
||||||
|
prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)"><')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
class Footloose(_BasicScraper):
|
||||||
|
latestUrl = 'http://footloosecomic.com/footloose/today.php'
|
||||||
|
imageUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
|
||||||
|
imageSearch = compile(r'<img src="/footloose/(.+?)"')
|
||||||
|
prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?)".+?(?:prev)')
|
||||||
|
# prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?html).+?(?:prev|Prev)')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FragileGravity(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.fragilegravity.com/'
|
||||||
|
imageUrl = 'http://www.fragilegravity.com/core.php?archive=%s'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)"\nonMouseover="window.status=\'Previous Strip', MULTILINE | IGNORECASE)
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Freefall(_BasicScraper):
|
||||||
|
latestUrl = 'http://freefall.purrsia.com/default.htm'
|
||||||
|
imageUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
|
||||||
|
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
|
||||||
|
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
|
||||||
|
help = 'Index format: nnnn/nnnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FantasyRealms(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.fantasyrealmsonline.com/manga/%s.php'
|
||||||
|
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
starter = indirectStarter('http://www.fantasyrealmsonline.com/',
|
||||||
|
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FullFrontalNerdity(_BasicScraper):
|
||||||
|
latestUrl = 'http://nodwick.humor.gamespy.com/ffn/index.php'
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/ffn/strips/[^"]*)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
class FunInJammies(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.funinjammies.com/'
|
||||||
|
imageUrl = 'http://www.funinjammies.com/comic.php?issue=%s'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(/comic.php.+?)" id.+?prev')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Fallen(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
|
||||||
|
help = 'Index format: nn-m (comicNumber-partNumber)'
|
||||||
|
starter = indirectStarter('http://www.fallencomic.com/fal-page.htm',
|
||||||
|
compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
num = pageUrl.split('/')[-1].split('-')[0]
|
||||||
|
part = pageUrl.split('-')[-1].split('.')[0]
|
||||||
|
return '%s-%s' % (part, num)
|
||||||
|
|
||||||
|
def setStrip(self, index):
|
||||||
|
index, part = index.split('-')
|
||||||
|
self.currentUrl = self.imageUrl % (part, index, part)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FoxTails(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.magickitsune.com/strips/current.html'
|
||||||
|
imageUrl = 'http://www.magickitsune.com/strips/%s'
|
||||||
|
imageSearch = compile(r'<img src=(img/.+?)[ |>]', IGNORECASE)
|
||||||
|
prevSearch = compile(r'(?<=first.gif)*(?<=</td>)*<a.*href=\'(.+?)\'.+?<img.+?src=\'../img/prev.gif\'>', IGNORECASE)
|
||||||
|
help = 'Index format: yyyymmdd'
|
140
dosagelib/plugins/g.py
Normal file
140
dosagelib/plugins/g.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class Galaxion(_BasicScraper):
|
||||||
|
latestUrl = 'http://galaxioncomics.com/'
|
||||||
|
imageUrl = 'http://galaxioncomics.com/?p=%s'
|
||||||
|
imageSearch = compile(r'(wordpress/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'\| <a href="http://galaxioncomics.com/(\?p=.+?)".+?vious.gif')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class Garanos(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.garanos.com/'
|
||||||
|
imageUrl = 'http://www.garanos.com/pages/page-%s'
|
||||||
|
imageSearch = compile(r'<img src=.+?(/pages/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.garanos.com/pages/page-.../)">◄ Previous<')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class GUComics(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.gucomics.com/comic/'
|
||||||
|
imageUrl = 'http://www.gucomics.com/comic/?cdate=%s'
|
||||||
|
imageSearch = compile(r'<IMG src="(/comics/\d{4}/gu_.*?)"')
|
||||||
|
prevSearch = compile(r'<A href="(/comic/\?cdate=\d+)"><IMG src="/images/cnav_prev')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GenrezvousPoint(_BasicScraper):
|
||||||
|
latestUrl = 'http://genrezvouspoint.com/'
|
||||||
|
imageUrl = 'http://genrezvouspoint.com/index.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(comics/.+?)\'')
|
||||||
|
prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GirlGenius(_BasicScraper):
|
||||||
|
latestUrl = 'http://girlgeniusonline.com/comic.php'
|
||||||
|
imageUrl = 'http://www.girlgeniusonline.com/comic.php\?date=%s'
|
||||||
|
imageSearch = compile(r"(/ggmain/strips/.+?)'")
|
||||||
|
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GirlsWithSlingshots(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.daniellecorsetto.com/gws.html'
|
||||||
|
imageUrl = 'http://www.daniellecorsetto.com/GWS%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(images/gws/GWS\d{3}.jpg)"')
|
||||||
|
prevSearch = compile(r'(archive.php\?today=\d{3}&comic=\d{3})"[^>]*><img[^>]+src="images/gwsmenu/back_off.jpg"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Girly(_BasicScraper):
|
||||||
|
latestUrl = 'http://girlyyy.com/'
|
||||||
|
imageUrl = 'http://girlyyy.com/go/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://girlyyy.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"> < prev')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Goats(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.goats.com/'
|
||||||
|
imageUrl = 'http://www.goats.com/archive/%s.html'
|
||||||
|
imageSearch = compile(r'<img.+?src="(/comix/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/archive/\d{6}.html)" class="button" title="go back">')
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GoneWithTheBlastwave(_BasicScraper):
|
||||||
|
starter = indirectStarter('http://www.blastwave-comic.com/index.php?p=comic&nro=1',
|
||||||
|
compile(r'href="(index.php\?p=comic&nro=\d+)"><img src="images/page/default/latest'))
|
||||||
|
imageUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s'
|
||||||
|
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'href="(index.php\?p=comic&nro=\d+)"><img src="images/page/default/previous')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GunnerkrigCourt(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.gunnerkrigg.com/index2.php'
|
||||||
|
imageUrl = 'http://www.gunnerkrigg.com/archive_page.php\?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src="(.+?//comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<.+?(/archive_page.php\?comicID=.+?)".+?prev')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Gunshow(_BasicScraper):
|
||||||
|
latestUrl = 'http://gunshowcomic.com/'
|
||||||
|
imageUrl = 'http://gunshowcomic.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(/d/\d+\.html)"><img[^>]+?src="/images/previous_day')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GleefulNihilism(_BasicScraper):
|
||||||
|
latestUrl = 'http://gleefulnihilism.com/'
|
||||||
|
imageUrl = 'http://gleefulnihilism.com/comics/2009/12/01/just-one-of-the-perks/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://gleefulnihilism.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GastroPhobia(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.gastrophobia.com/'
|
||||||
|
imageUrl = 'http://www.gastrophobia.com/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://gastrophobia.com/comix/[^"]+)"[^>]*>(?!<br>)')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="pix/prev.gif" ')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Geeks(_BasicScraper):
|
||||||
|
latestUrl = 'http://sevenfloorsdown.com/geeks/'
|
||||||
|
imageUrl = 'http://sevenfloorsdown.com/geeks/archives/%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(http://sevenfloorsdown.com/geeks/comics/.+?)\'')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">« Previous')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GlassHalfEmpty(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.defectivity.com/ghe/index.php'
|
||||||
|
imageUrl = 'http://www.defectivity.com/ghe/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?)"><img src="\.\./images/onback\.jpg"')
|
||||||
|
help = 'Index format: nnn'
|
65
dosagelib/plugins/h.py
Normal file
65
dosagelib/plugins/h.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
class HappyMedium(_BasicScraper):
|
||||||
|
latestUrl = 'http://happymedium.fast-bee.com/'
|
||||||
|
imageUrl = 'http://happymedium.fast-bee.com/%s'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'com(/.+?)".+?"prev">◄')
|
||||||
|
help = 'Index format: yyyy/mm/chapter-n-page-n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Heliothaumic(_BasicScraper):
|
||||||
|
latestUrl = 'http://thaumic.net/'
|
||||||
|
imageUrl = 'http://thaumic.net/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://thaumic.net/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(http://thaumic.net/.+?)">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/n(unpadded)-comicname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Housd(_BasicScraper):
|
||||||
|
latestUrl = 'http://housd.net/archive_page.php?comicID=1284'
|
||||||
|
imageUrl = 'http://housd.net/archive_page.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'"(.+?/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(h.+?comicID=.+?)".+?prev')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HateSong(_BasicScraper):
|
||||||
|
latestUrl = 'http://hatesong.com/'
|
||||||
|
imageUrl = 'http://hatesong.com/%s/'
|
||||||
|
imageSearch = compile(r'src="(http://www.hatesong.com/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="headernav"><a href="(http://hatesong.com/\d{4}/\d{2}/\d{2})')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HorribleVille(_BasicScraper):
|
||||||
|
latestUrl = 'http://horribleville.com/d/20090517.html'
|
||||||
|
imageUrl = 'http://horribleville.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HelpDesk(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ubersoft.net/'
|
||||||
|
imageUrl = 'http://www.ubersoft.net/comic/hd/%s/%s/%s'
|
||||||
|
imageSearch = compile(r'src="(http://www.ubersoft.net/files/comics/hd/hd\d{8}.png)')
|
||||||
|
prevSearch = compile(r'<a href="(/comic/.+?)">(.+?)previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HardGraft(_BasicScraper):
|
||||||
|
latestUrl = 'http://hard-graft.net/'
|
||||||
|
imageUrl = 'http://hard-graft.net/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://hard-graft.net/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)"')
|
||||||
|
help = 'Index format: nnn'
|
75
dosagelib/plugins/i.py
Normal file
75
dosagelib/plugins/i.py
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
class IDreamOfAJeanieBottle(_BasicScraper):
|
||||||
|
latestUrl = 'http://jeaniebottle.com/'
|
||||||
|
imageUrl = 'http://jeaniebottle.com/review.php?comicID='
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'First".+?(review.php.+?)".+?prev_a.gif')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class IrregularWebcomic(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.irregularwebcomic.net/'
|
||||||
|
imageUrl = 'http://www.irregularwebcomic.net/cgi-bin/comic.pl?comic=%s'
|
||||||
|
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
|
||||||
|
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class InsideOut(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.insideoutcomic.com/'
|
||||||
|
imageUrl = 'http://www.insideoutcomic.com/html/%s.html'
|
||||||
|
imageSearch = compile(r'Picture12LYR.+?C="(.+?/assets/images/.+?)"')
|
||||||
|
prevSearch = compile(r'Picture7LYR.+?F="(.+?/html/.+?)"')
|
||||||
|
help = 'Index format: n_comic_name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class InkTank(_BasicScraper):
|
||||||
|
shortName = 'inktank'
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
return self.baseUrl + self.shortName + '/'
|
||||||
|
|
||||||
|
|
||||||
|
def inkTank(name, shortName):
|
||||||
|
@classmethod
|
||||||
|
def _namer(cls, imageUrl, pageUrl):
|
||||||
|
return '20%s-%s' % (imageUrl[-6:-4], imageUrl[-12:-7])
|
||||||
|
|
||||||
|
baseUrl = 'http://www.inktank.com/%s/' % (shortName,)
|
||||||
|
return type('InkTank_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='InkTank/' + name,
|
||||||
|
latestUrl=baseUrl,
|
||||||
|
imageUrl=baseUrl + 'd/%s.html',
|
||||||
|
imageSearch=compile(r'<IMG SRC="(/images/[^/]+/cartoons/\d{2}-\d{2}-\d{2}.+?)"'),
|
||||||
|
prevSearch=compile(r'<A HREF="(/[^/]+/index.cfm\?nav=\d+?)"><IMG SRC="/images/nav_last.gif"'),
|
||||||
|
help='Index format: n (unpadded)')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
at = inkTank('AngstTechnology', 'AT')
|
||||||
|
ww = inkTank('WeakEndWarriors', 'WW')
|
||||||
|
swo = inkTank('SorryWereOpen', 'SWO')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class IlmanNaista(_BasicScraper):
|
||||||
|
latestUrl = 'http://kvantti.tky.fi/in/archive_end.shtml'
|
||||||
|
imageUrl = 'http://kvantti.tky.fi/in/%s.shtml'
|
||||||
|
imageSearch = compile(r'<img src="(kuvat/in_.+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(\d+.shtml)"><img width="90" height="45" src="deco/edellinen.png" alt="Edellinen"/></a>')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ICantDrawFeet(_BasicScraper):
|
||||||
|
latestUrl = 'http://icantdrawfeet.com/'
|
||||||
|
imageUrl = 'http://icantdrawfeet.com/%s'
|
||||||
|
imageSearch = compile(r'src="(http://icantdrawfeet.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://icantdrawfeet.com/.+?)"><img src="http://icantdrawfeet.com/pageimages/prev.png"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname'
|
39
dosagelib/plugins/j.py
Normal file
39
dosagelib/plugins/j.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
from re import compile, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Jack(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pholph.com/'
|
||||||
|
imageUrl = 'http://www.pholph.com/strip.php?id=5&sid=%s'
|
||||||
|
imageSearch = compile(r'<img src="(./artwork/.+?/Jack.+?)"')
|
||||||
|
prevSearch = compile(r'\|<a href="(.+?)">Previous Strip</a>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class JerkCity(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.jerkcity.com/'
|
||||||
|
imageUrl = 'http://www.jerkcity.com/jerkcity%s'
|
||||||
|
imageSearch = compile(r'"jerkcity.+?">.+?"(/jerkcity.+?)"')
|
||||||
|
prevSearch = compile(r'"(jerkcity.+?)">.+?"/jerkcity.+?"')
|
||||||
|
help = 'Index format: unknown'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class JoeAndMonkey(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.joeandmonkey.com/'
|
||||||
|
imageUrl = 'http://www.joeandmonkey.com/%s'
|
||||||
|
imageSearch = compile(r'"(/comic/[^"]+)"')
|
||||||
|
prevSearch = compile(r"<a href='(/\d+)'>Previous")
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class JoyOfTech(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.geekculture.com/joyoftech/index.html'
|
||||||
|
imageUrl = 'http://www.geekculture.com/joyoftech/joyarchives/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(joyimages/.+?|../joyimages/.+?)" alt="The Joy')
|
||||||
|
prevSearch = compile(r'<a href="((?:joyarchives/)?\w+\.\w{3,4})">(?:<font[^>]*>)?<img[^>]*><br>[\s\n]*Previous Joy', MULTILINE)
|
||||||
|
help = 'Index format: nnn'
|
62
dosagelib/plugins/k.py
Normal file
62
dosagelib/plugins/k.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class KernelPanic(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ubersoft.net/kpanic/'
|
||||||
|
imageUrl = 'http://www.ubersoft.net/kpanic/d/%s'
|
||||||
|
imageSearch = compile(r'src="(.+?/kp/kp.+?)" ')
|
||||||
|
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: yyyymmdd.html'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return imageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Key(_BasicScraper):
|
||||||
|
latestUrl = 'http://key.shadilyn.com/latestpage.html'
|
||||||
|
imageUrl = 'http://key.shadilyn.com/pages/%s.html'
|
||||||
|
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Krakow(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.krakowstudios.com/'
|
||||||
|
imageUrl = 'http://www.krakowstudios.com/archive.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class Kukuburi(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.kukuburi.com/current/'
|
||||||
|
imageUrl = 'http://thaumic.net/%s'
|
||||||
|
imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
|
||||||
|
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class KevinAndKell(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.kevinandkell.com/'
|
||||||
|
imageUrl = 'http://www.kevinandkell.com/%s/kk%s%s.html'
|
||||||
|
imageSearch = compile(r'<img.+?src="(/?(\d+/)?strips/kk\d+.gif)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a.+?href="(/?(\.\./)?\d+/kk\d+\.html)"[^>]*><span>Previous Strip', IGNORECASE)
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
def setStrip(self, index):
|
||||||
|
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class KillerKomics(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.killerkomics.com/web-comics/index_ang.cfm'
|
||||||
|
imageUrl = 'http://www.killerkomics.com/web-comics/%s.cfm'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
|
||||||
|
help = 'Index format: strip-name'
|
1525
dosagelib/plugins/keenspot.py
Normal file
1525
dosagelib/plugins/keenspot.py
Normal file
File diff suppressed because it is too large
Load diff
89
dosagelib/plugins/l.py
Normal file
89
dosagelib/plugins/l.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LasLindas(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.katbox.net/laslindas/'
|
||||||
|
imageUrl = 'http://www.katbox.net/laslindas/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'"(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><[^>]+?alt="Back"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LastBlood(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.lastblood.net/main/'
|
||||||
|
imageUrl = 'http://www.lastblood.net/main/%s'
|
||||||
|
imageSearch = compile(r'(/comicfolder/.+?)" alt')
|
||||||
|
prevSearch = compile(r'Previous Comic:</small><br />« <a href="(.+?)">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/(page number and name)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LesbianPiratesFromOuterSpace(_BasicScraper):
|
||||||
|
latestUrl = 'http://rosalarian.com/lesbianpirates/'
|
||||||
|
imageUrl = 'http://rosalarian.com/lesbianpirates/?p=%s'
|
||||||
|
imageSearch = compile(r'(/lesbianpirates/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'/(\?p=.+?)">«')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Lint(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.purnicellin.com/lint/'
|
||||||
|
imageUrl = 'http://www.purnicellin.com/lint/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.purnicellin.com/lint/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/num-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LookingForGroup(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.lfgcomic.com/page/latest'
|
||||||
|
imageUrl = 'http://www.lfgcomic.com/page/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://newcdn.lfgcomic.com/uploads/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/page/\d+)" id="navtop-prev"')
|
||||||
|
starter = indirectStarter('http://www.lfgcomic.com/', compile(r'<a href="(/page/\d+)" id="feature-preview"'))
|
||||||
|
nameSearch = compile(r'/page/(\d+)')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
return self.nameSearch.search(pageUrl).group(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Loserz(_BasicScraper):
|
||||||
|
latestUrl = 'http://bukucomics.com/loserz/'
|
||||||
|
imageUrl = 'http://bukucomics.com/loserz/go/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://bukucomics.com/loserz/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"> < ')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LittleGamers(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.little-gamers.com/'
|
||||||
|
imageUrl = 'http://www.little-gamers.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.little-gamers.com/comics/[^"]+)"')
|
||||||
|
prevSearch = compile(r'href="(.+?)"><img id="comic-nav-prev"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LegoRobot(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.legorobotcomics.com/'
|
||||||
|
imageUrl = 'http://www.legorobotcomics.com/?id=%s'
|
||||||
|
imageSearch = compile(r'id="the_comic" src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(\?id=\d+)"><img src="images/back.png"')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LeastICouldDo(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.leasticoulddo.com/'
|
||||||
|
imageUrl = 'http://www.leasticoulddo.com/comic/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://cdn.leasticoulddo.com/comics/\d{8}.\w{1,4})" />')
|
||||||
|
prevSearch = compile(r'<a href="(/comic/\d{8})">Previous</a>')
|
||||||
|
help = 'Index format: yyyymmdd'
|
107
dosagelib/plugins/m.py
Normal file
107
dosagelib/plugins/m.py
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, queryNamer
|
||||||
|
|
||||||
|
|
||||||
|
class MadamAndEve(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
|
||||||
|
imageUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
|
||||||
|
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">')
|
||||||
|
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
|
||||||
|
help = 'Index format: (none)'
|
||||||
|
|
||||||
|
|
||||||
|
class MagicHigh(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.doomnstuff.com/magichigh/index.php'
|
||||||
|
imageUrl = 'http://www.doomnstuff.com/magichigh/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'First .+?"(/magichigh.+?)".+?top_back')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Marilith(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.marilith.com/'
|
||||||
|
imageUrl = 'http://www.marilith.com/archive.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)" border')
|
||||||
|
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MarryMe(_BasicScraper):
|
||||||
|
latestUrl = 'http://marrymemovie.com/main/'
|
||||||
|
imageUrl = 'http://marrymemovie.com/main/%s'
|
||||||
|
imageSearch = compile(r'(/comicfolder/.+?)"')
|
||||||
|
prevSearch = compile(r'Previous Comic:</small><br />« <a href="(.+?)">')
|
||||||
|
help = 'Index format: good luck !'
|
||||||
|
|
||||||
|
|
||||||
|
class Meek(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.meekcomic.com/'
|
||||||
|
imageUrl = 'http://www.meekcomic.com/%s'
|
||||||
|
imageSearch = compile(r'meekcomic.com(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'\s.+?(http://www.meekcomic.com/.+?)".+?Previous<')
|
||||||
|
help = 'Index format: yyyy/mm/dd/ch-p/'
|
||||||
|
|
||||||
|
|
||||||
|
class MegaTokyo(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.megatokyo.com/'
|
||||||
|
imageUrl = 'http://www.megatokyo.com/strip/%s'
|
||||||
|
imageSearch = compile(r'"(strips/.+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'"(./strip/\d+?)">Prev')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
class MyPrivateLittleHell(_BasicScraper):
|
||||||
|
latestUrl = 'http://mutt.purrsia.com/mplh/'
|
||||||
|
imageUrl = 'http://mutt.purrsia.com/mplh/?date=%s'
|
||||||
|
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
|
||||||
|
help = 'Index format: mm/dd/yyyy'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MacHall(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.machall.com/'
|
||||||
|
imageUrl = 'http://www.machall.com/view.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img[^>]+?src=\'drop_shadow/previous.gif\'>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Misfile(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.misfile.com/'
|
||||||
|
imageUrl = 'http://www.misfile.com/?page=%s'
|
||||||
|
imageSearch = compile(r'<img src="(overlay\.php\?pageCalled=\d+)">')
|
||||||
|
prevSearch = compile(r'<a href="(\?page=\d+)"><img src="/images/back\.gif"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
namer = queryNamer('pageCalled')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MysteriesOfTheArcana(_BasicScraper):
|
||||||
|
latestUrl = 'http://mysteriesofthearcana.com/'
|
||||||
|
imageUrl = 'http://mysteriesofthearcana.com/index.php?action=comics&cid='
|
||||||
|
imageSearch = compile(r'(image.php\?type=com&i=.+?)"')
|
||||||
|
prevSearch = compile(r'(index.php\?action=comics&cid=.+?)".+?show_prev1')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MysticRevolution(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.mysticrev.com/index.php'
|
||||||
|
imageUrl = 'http://www.mysticrev.com/index.php?cid=%s'
|
||||||
|
imageSearch = compile(r'(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(\?cid=.+?)".+?prev.gif')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MontyAndWooly(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.montyandwoolley.co.uk/'
|
||||||
|
imageUrl = 'http://montyandwoolley.co.uk/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://montyandwoolley.co.uk/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
171
dosagelib/plugins/n.py
Normal file
171
dosagelib/plugins/n.py
Normal file
|
@ -0,0 +1,171 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter, _PHPScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NamirDeiter(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.namirdeiter.com/'
|
||||||
|
imageUrl = 'http://www.namirdeiter.com/comics/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img.+?(/comics/\d{8}.+?)[\'|\"]')
|
||||||
|
prevSearch = compile(r'(/comics/index.php\?date=.+?|http://www.namirdeiter.com/comics/index.php\?date=.+?)[\'|\"].+?previous')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NeoEarth(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.neo-earth.com/NE/'
|
||||||
|
imageUrl = 'http://www.neo-earth.com/NE/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Nervillsaga(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nervillsaga.com/'
|
||||||
|
imageUrl = 'http://www.nervillsaga.com/index.php?s=%s'
|
||||||
|
imageSearch = compile(r'"(pic/.+?)"')
|
||||||
|
prevSearch = compile(r'"(.+?)">Previous')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NewAdventuresOfBobbin(_BasicScraper):
|
||||||
|
latestUrl = 'http://bobbin-comic.com/'
|
||||||
|
imageUrl = 'http://www.bobbin-comic.com/wordpress/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.bobbin-comic.com/wordpress/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NewWorld(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.tfsnewworld.com/'
|
||||||
|
imageUrl = 'http://www.tfsnewworld.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.tfsnewworld.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Nicky510(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nicky510.com/'
|
||||||
|
imageUrl = 'http://www.nicky510.com/%s'
|
||||||
|
imageSearch = compile(r'(http://www.nicky510.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.nicky510.com/.+?)" class="navi navi-prev"')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NoNeedForBushido(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.noneedforbushido.com/latest/'
|
||||||
|
imageUrl = 'http://www.noneedforbushido.com/%s'
|
||||||
|
imageSearch = compile(r'<div class="comics"><img src="([^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)" title="[^"]*" class="previous-comic-link')
|
||||||
|
help = 'Index format: yyyy/comic/nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Nukees(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nukees.com/'
|
||||||
|
imageUrl = 'http://www.nukees.com/d/%s'
|
||||||
|
imageSearch = compile(r'"comic".+?"(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(/d/.+?)".+?previous')
|
||||||
|
help = 'Index format: yyyymmdd.html'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class _NuklearPower(_BasicScraper):
|
||||||
|
imageSearch = compile(r'<img src="(http://www.nuklearpower.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'><a href="(.+?)">Previous</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def baseUrl(self):
|
||||||
|
return 'http://www.nuklearpower.com/%s/' % (self.shortName,)
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
return self.baseUrl
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.baseUrl + '%s'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NP8BitTheater(_NuklearPower):
|
||||||
|
name = 'NuklearPower/8BitTheater'
|
||||||
|
shortName = '8-bit-theater'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NPWarbot(_NuklearPower):
|
||||||
|
name = 'NuklearPower/Warbot'
|
||||||
|
shortName = 'warbot'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NPHIKYM(_NuklearPower):
|
||||||
|
name = 'NuklearPower/HowIKilledYourMaster'
|
||||||
|
shortName = 'hikym'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NPAtomicRobo(_NuklearPower):
|
||||||
|
name = 'NuklearPower/AtomicRobo'
|
||||||
|
shortName = 'atomic-robo'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NekoTheKitty(_PHPScraper):
|
||||||
|
basePath = 'http://www.nekothekitty.net/cusp/'
|
||||||
|
latestUrl = 'latest.php'
|
||||||
|
prevSearch = compile(r"<a href=\"(http://www\.nekothekitty\.net/cusp/daily\.php\?date=\d+)\"><img[^>]+alt='Previous Comic'")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NichtLustig(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.nichtlustig.de/toondb/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="([^"]+)" id="cartoon"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(\d+\.html)"[^<>]*><img[^<>]*id="pfeil_links', IGNORECASE)
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
starter = indirectStarter('http://www.nichtlustig.de/main.html',
|
||||||
|
compile(r'<a href="([^"]*toondb/\d+\.html)"', IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
|
class NinthElsewhere(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.9thelsewhere.com/icenter.html'
|
||||||
|
imageUrl = 'http://www.9thelsewhere.com/%s/9e%s_%s.html'
|
||||||
|
imageSearch = compile(r'<img src="([^"]*9e\d+_\d+\.jpg)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+\.html)">\s*PREV')
|
||||||
|
help = 'Index format: year-chapter-page'
|
||||||
|
|
||||||
|
def setStrip(self, index):
|
||||||
|
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
|
||||||
|
|
||||||
|
|
||||||
|
class Nodwick(_BasicScraper):
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/gamespyarchive/strips/[^"]*)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
|
||||||
|
starter = indirectStarter('http://nodwick.humor.gamespy.com/gamespyarchive/index.php', prevSearch)
|
||||||
|
help = 'Index format: None'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NekkoAndJoruba(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nekkoandjoruba.com/'
|
||||||
|
imageUrl = 'http://www.nekkoandjoruba.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">‹</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NobodyScores(_BasicScraper):
|
||||||
|
latestUrl = 'http://nobodyscores.loosenutstudio.com/'
|
||||||
|
imageUrl = 'http://nobodyscores.loosenutstudio.com/index.php?id=%s'
|
||||||
|
imageSearch = compile(r'><img src="(http://nobodyscores.loosenutstudio.com/comix/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://nobodyscores.loosenutstudio.com/index.php.+?)">the one before </a>')
|
||||||
|
help = 'Index format: nnn'
|
23
dosagelib/plugins/num.py
Normal file
23
dosagelib/plugins/num.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NineteenNinetySeven(_BasicScraper):
|
||||||
|
name = '1997'
|
||||||
|
latestUrl = 'http://www.1977thecomic.com/'
|
||||||
|
imageUrl = 'http://www.1977thecomic.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.1977thecomic.com/comics-1977/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EightHalfByEleven(_BasicScraper):
|
||||||
|
name = '8HalfByEleven'
|
||||||
|
latestUrl = 'http://www.lucastds.com/webcomic/'
|
||||||
|
imageUrl = 'http://www.lucastds.com/webcomic/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(/webcomic/.+?)"><img[^>]+?src="themes/tedzsee/images/previous_a.png">')
|
||||||
|
help = 'Index format: nnn'
|
84
dosagelib/plugins/o.py
Normal file
84
dosagelib/plugins/o.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class OctopusPie(_BasicScraper):
|
||||||
|
starter = indirectStarter('http://www.octopuspie.com/2007-05-14/001-pea-wiggle/',
|
||||||
|
compile(r'<a href="(http://www.octopuspie.com/.+?)"><b>latest comic</b>', IGNORECASE))
|
||||||
|
imageUrl = 'http://www.octopuspie.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.octopuspie.com/strippy/.+?)"')
|
||||||
|
prevSearch = compile(r'<link rel=\'prev\'[^>]+?href=\'(http://www.octopuspie.com/.+?)\'')
|
||||||
|
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OddFish(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.odd-fish.net/'
|
||||||
|
imageUrl = 'http://www.odd-fish.net/viewing.php?&comic_id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(images/\d{1,4}.\w{3,4})" ')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img src="http://www.odd-fishing.net/i/older.gif" ')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OhMyGods(_BasicScraper):
|
||||||
|
latestUrl = 'http://ohmygods.co.uk/'
|
||||||
|
imageUrl = 'http://ohmygods.co.uk/strips/%s'
|
||||||
|
imageSearch = compile(r'<p class="omgs-strip"><img src="(/system/files/.+?)"')
|
||||||
|
prevSearch = compile(r'<li class="custom_pager_prev"><a href="(/strips/.+?)"')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OnTheEdge(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.ontheedgecomics.com/'
|
||||||
|
imageUrl = 'http://ontheedgecomics.com/comic/ote%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: nnn (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OneQuestion(_BasicScraper):
|
||||||
|
latestUrl = 'http://onequestioncomic.com/'
|
||||||
|
imageUrl = 'http://onequestioncomic.com/comics/%s/'
|
||||||
|
imageSearch = compile(r'(istrip_files.+?)"')
|
||||||
|
prevSearch = compile(r'First.+?"(comic.php.+?)".+?previous.png')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OurHomePlanet(_BasicScraper):
|
||||||
|
latestUrl = 'http://gdk.gd-kun.net/'
|
||||||
|
imageUrl = 'http://gdk.gd-kun.net/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(pages/comic.+?)"')
|
||||||
|
prevSearch = compile(r'coords="50,18,95,65".+?href="(.+?\.html)".+?alt=')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class OkCancel(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.ok-cancel.com/comic/%s.html'
|
||||||
|
imageSearch = compile(r'src="(http://www.ok-cancel.com/strips/okcancel\d{8}.gif)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<div class="previous"><a href="(http://www.ok-cancel.com/comic/\d{1,4}.html)">', IGNORECASE)
|
||||||
|
starter = indirectStarter('http://www.ok-cancel.com/', prevSearch)
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Oglaf(_BasicScraper):
|
||||||
|
starter = indirectStarter('http://oglaf.com/',
|
||||||
|
compile(r'<a href="(.+?)"><img src="over18.gif"', IGNORECASE))
|
||||||
|
imageUrl = 'http://oglaf.com/%s.html'
|
||||||
|
imageSearch = compile(r'/><img src="(.+?)"[^>]+?width="760" height="596"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?><img src="prev.gif"', IGNORECASE)
|
||||||
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OverCompensating(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.overcompensating.com/'
|
||||||
|
imageUrl = 'http://www.overcompensating.com/posts/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"><a href="(.+?)"[^>]+?> \<\- </a>')
|
||||||
|
help = 'Index format: yyyymmdd'
|
172
dosagelib/plugins/p.py
Normal file
172
dosagelib/plugins/p.py
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter, queryNamer
|
||||||
|
|
||||||
|
|
||||||
|
class PartiallyClips(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.partiallyclips.com/'
|
||||||
|
imageUrl = 'http://www.partiallyclips.com/index.php?id=%s'
|
||||||
|
imageSearch = compile(r'"(http://www.partiallyclips.com/storage/.+?)"')
|
||||||
|
prevSearch = compile(r'"(index.php\?id=.+?)".+?prev')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PastelDefender(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pasteldefender.com/coverbackcover.html'
|
||||||
|
imageUrl = 'http://www.pasteldefender.com/%s.html'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(images/.+?)" WIDTH="742"')
|
||||||
|
prevSearch = compile(r'<A HREF="([^"]+)"><IMG SRC="images/back\.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PebbleVersion(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pebbleversion.com/'
|
||||||
|
imageUrl = 'http://www.pebbleversion.com/Archives/Strip%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(ComicStrips/.+?|../ComicStrips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="((?!.+?">First Comic)Archives/Strip.+?|(?=.+?">Previous Comic)(?!.+?">First Comic)Strip.+?)"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class PennyAndAggie(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pennyandaggie.com/index.php'
|
||||||
|
imageUrl = 'http://www.pennyandaggie.com/index.php\?p=%s'
|
||||||
|
imageSearch = compile(r'src=".+?(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r"</a><a href='(index.php\?p=.+?)'.+?prev")
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PennyArcade(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://www.penny-arcade.com/comic/',
|
||||||
|
compile(r'<a href="(/comic/[^"]+)">Next</a>'))
|
||||||
|
imageUrl = 'http://www.penny-arcade.com/comic/%s/'
|
||||||
|
imageSearch = compile(r'(?<!<!--)<img src="(http://art\.penny-arcade\.com/photos/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="(/comic/[^"]+)">Back</a>')
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
yyyy, mm, dd = pageUrl.split('/')[-4:-1]
|
||||||
|
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PeppermintSaga(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pepsaga.com/'
|
||||||
|
imageUrl = 'http://www.pepsaga.com/comics/%s/'
|
||||||
|
imageSearch = compile(r'src=.+?(http.+?/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'First</a><a href="(http://www.pepsaga.com/comics/.+?/)"')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class PerkiGoth(_BasicScraper):
|
||||||
|
latestUrl = 'http://mutt.purrsia.com/main.php'
|
||||||
|
imageUrl = 'http://mutt.purrsia.com/main.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
|
||||||
|
help = 'Index format: mm/dd/yyyy'
|
||||||
|
|
||||||
|
|
||||||
|
class Pixel(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.chrisdlugosz.net/pixel/'
|
||||||
|
imageUrl = 'http://www.chrisdlugosz.net/pixel/%s.shtml'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(\d+\.png)" ALT=""><BR><BR>')
|
||||||
|
prevSearch = compile(r'<A HREF="(\d+\.shtml)"><IMG SRC="_prev.png" BORDER=0 ALT=""></A>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PiledHigherAndDeeper(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://www.phdcomics.com/comics/archive.php', compile(r'<a href=(archive\.php\?comicid=\d+)><img height=52 width=49 src=images/next_button\.gif border=0 align=middle>'))
|
||||||
|
imageUrl = 'http://www.phdcomics.com/comics/archive.php?comicid=%s'
|
||||||
|
imageSearch = compile(r'<img src=(http://www\.phdcomics\.com/comics/archive/phd\d+s?\.gif)')
|
||||||
|
prevSearch = compile(r'<a href=(archive\.php\?comicid=\d+)><img height=52 width=49 src=images/prev_button\.gif border=0 align=middle>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
namer = queryNamer('comicid', usePageUrl=True)
|
||||||
|
|
||||||
|
|
||||||
|
class Precocious(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.precociouscomic.com/'
|
||||||
|
imageUrl = 'http://www.precociouscomic.com/comic.php?page=%s'
|
||||||
|
imageSearch = compile(r'(archive/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'First.+?(comic.php\?page=.+?)">Previous<')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class PvPonline(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pvponline.com/'
|
||||||
|
imageUrl = None
|
||||||
|
imageSearch = compile(r'<img src="(http://www.pvponline.com/comics/pvp\d{8}\..+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(http://www.pvponline.com/[^"]+)"[^>]*>‹ Previous', IGNORECASE)
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def pensAndTales(name, baseUrl):
|
||||||
|
return type('PensAndTales_%s' % name,
|
||||||
|
(_BasicScraper,),
|
||||||
|
dict(
|
||||||
|
name='PensAndTales/' + name,
|
||||||
|
latestUrl=baseUrl,
|
||||||
|
imageUrl=baseUrl + '?date=',
|
||||||
|
imageSearch=compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE),
|
||||||
|
prevSearch=compile(r'<a href="([^"]*?\?date=\d+)">(:?<img[^>]+?alt=")?Previous Comic', IGNORECASE),
|
||||||
|
help='Index format: yyyymmdd')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# XXX: using custom Wordpress layout
|
||||||
|
# th = pensAndTales('TreasureHunters', 'http://th.pensandtales.com/')
|
||||||
|
# XXX: comic broken, no content
|
||||||
|
# strangekith = pensAndTales('Strangekith', 'http://strangekith.pensandtales.com/')
|
||||||
|
# XXX: comic broken
|
||||||
|
# fireflycross = pensAndTales('FireflyCross', 'http://fireflycross.pensandtales.com/')
|
||||||
|
thosedestined = pensAndTales('ThoseDestined', 'http://thosedestined.pensandtales.com/')
|
||||||
|
evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
|
||||||
|
redallover = pensAndTales('RedAllOver', 'http://redallover.pensandtales.com/')
|
||||||
|
stickyevil = pensAndTales('StickyEvil', 'http://stickyevil.pensandtales.com/')
|
||||||
|
# XXX: moved / layout changed
|
||||||
|
#ynt = pensAndTales('YamiNoTainai', 'http://ynt.pensandtales.com/')
|
||||||
|
earthbound = pensAndTales('Earthbound', 'http://earthbound.pensandtales.com/')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ProperBarn(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nitrocosm.com/go/gag/'
|
||||||
|
imageUrl = 'http://www.nitrocosm.com/go/gag/%s/'
|
||||||
|
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PunksAndNerds(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.punksandnerds.com/'
|
||||||
|
imageUrl = 'http://www.punksandnerds.com/?id=%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.punksandnerds.com/img/comic/.+?)"')
|
||||||
|
prevSearch = compile(r'<td><a href="(.+?)"[^>]+?><img src="backcomic.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PunksAndNerdsOld(_BasicScraper):
|
||||||
|
latestUrl = 'http://original.punksandnerds.com/'
|
||||||
|
imageUrl = 'http://original.punksandnerds.com/d/%s.html'
|
||||||
|
imageSearch = compile(r' src="(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PlanescapeSurvival(_BasicScraper):
|
||||||
|
latestUrl = 'http://planescapecomic.com/'
|
||||||
|
imageUrl = 'http://planescapecomic.com/%s.html'
|
||||||
|
imageSearch = compile(r'src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-1].split('.')[0]
|
21
dosagelib/plugins/q.py
Normal file
21
dosagelib/plugins/q.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class QuestionableContent(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.questionablecontent.net/'
|
||||||
|
imageUrl = 'http://www.questionablecontent.net/view.php?comic=%s'
|
||||||
|
imageSearch = compile(r'/(comics/\d+\.png)"')
|
||||||
|
prevSearch = compile(r'<a href="(view.php\?comic=\d+)">Previous')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Qwantz(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.qwantz.com/index.php'
|
||||||
|
imageUrl = 'http://www.qwantz.com/index.php?comic=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.qwantz.com/comics/.+?)" class="comic"')
|
||||||
|
prevSearch = compile(r'"><a href="(.+?)">← previous</a>')
|
||||||
|
help = 'Index format: n'
|
65
dosagelib/plugins/r.py
Normal file
65
dosagelib/plugins/r.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RadioactivePanda(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.radioactivepanda.com/'
|
||||||
|
imageUrl = 'http://www.radioactivepanda.com/comic/%s'
|
||||||
|
imageSearch = compile(r'<img src="(/Assets/.*?)".+?"comicimg"')
|
||||||
|
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
|
||||||
|
help = 'Index format: n (no padding)'
|
||||||
|
|
||||||
|
|
||||||
|
class Rascals(_BasicScraper):
|
||||||
|
latestUrl = 'http://petitesymphony.com/rascals'
|
||||||
|
imageUrl = 'http://petitesymphony.com/comic/rascals/%s'
|
||||||
|
imageSearch = compile(r'(http://petitesymphony.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r"KR-nav-previous.><a href=.(http.+?).>")
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class RealLife(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.reallifecomics.com/'
|
||||||
|
imageUrl = 'http://www.reallifecomics.com/achive/%s.html'
|
||||||
|
imageSearch = compile(r'"(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'"(/archive/.+?)".+?nav_previous')
|
||||||
|
help = 'Index format: yymmdd)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RedString(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.redstring.strawberrycomics.com/'
|
||||||
|
imageUrl = 'http://www.redstring.strawberrycomics.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.redstring.strawberrycomics.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)">Previous Comic</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Roza(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.junglestudio.com/roza/index.php'
|
||||||
|
imageUrl = 'http://www.junglestudio.com/roza/index.php\?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(pages/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(index.php\?date=.+?)">[^>].+?navtable_01.gif')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
class RedMeat(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://www.redmeat.com/redmeat/current/index.html', compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
|
||||||
|
imageUrl = 'http://www.redmeat.com/redmeat/%s/index.html'
|
||||||
|
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
|
||||||
|
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return imageUrl.split('/')[-2]
|
||||||
|
|
||||||
|
class RunningWild(_BasicScraper):
|
||||||
|
latestUrl = 'http://runningwild.katbox.net/'
|
||||||
|
imageUrl = 'http://runningwild.katbox.net/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
|
||||||
|
help = 'Index format: n (unpadded)'
|
341
dosagelib/plugins/s.py
Normal file
341
dosagelib/plugins/s.py
Normal file
|
@ -0,0 +1,341 @@
|
||||||
|
from re import compile, MULTILINE, IGNORECASE, sub
|
||||||
|
from os.path import splitext
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
class SailorsunOrg(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.sailorsun.org/'
|
||||||
|
imageUrl = 'http://www.sailorsun.org/browse.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'/(browse.php.+?)".+?/prev.gif')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SamAndFuzzy(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.samandfuzzy.com/'
|
||||||
|
imageUrl = 'http://samandfuzzy.com/%s'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)" alt')
|
||||||
|
prevSearch = compile(r'"><a href="(.+?)"><img src="imgint/nav_prev.gif"')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SarahZero(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.sarahzero.com/'
|
||||||
|
imageUrl = 'http://www.sarahzero.com/sz_%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(z_(?:(?:spreads)|(?:temp)).+?)" alt=""')
|
||||||
|
prevSearch = compile(r'onmouseout="changeImages\(\'sz_05_nav\',\'z_site/sz_05_nav.gif\'\);return true" href="(sz_.+?)">')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ScaryGoRound(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.scarygoround.com/'
|
||||||
|
imageUrl = 'http://www.scarygoround.com/?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/\d{8}\..{3})"')
|
||||||
|
prevSearch = compile(r'f><a href="(.+?)"><img src="site-images/previous.png"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolBites(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.schoolbites.net/'
|
||||||
|
imageUrl = 'http://www.schoolbites.net/d/%s.html'
|
||||||
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'first_day.+?(/d/.+?.html).+?/previous_day.gif')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class SinFest(_BasicScraper):
|
||||||
|
name = 'KeenSpot/SinFest'
|
||||||
|
latestUrl = 'http://www.sinfest.net/'
|
||||||
|
imageUrl = 'http://www.sinfest.net/archive_page.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src=".+?(/comikaze/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'(/archive_page.php\?comicID=.+?)".+?prev_a')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
class SlightlyDamned(_BasicScraper):
|
||||||
|
latestUrl = 'http://raizap.com/sdamned/index.php'
|
||||||
|
imageUrl = 'http://raizap.com/sdamned/pages.php\?comicID=%s'
|
||||||
|
imageSearch = compile(r'"(.+?comics2/.+?)"')
|
||||||
|
prevSearch = compile(r'</a>.+?(pages.php\?comicID=.+?)".+?back1')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SluggyFreelance(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.sluggy.com/'
|
||||||
|
imageUrl = 'http://www.sluggy.com/comics/archives/daily/%s'
|
||||||
|
imageSearch = compile(r'<img src="(/images/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?><span class="ui-icon ui-icon-seek-prev">')
|
||||||
|
help = 'Index format: yymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SodiumEyes(_BasicScraper):
|
||||||
|
imageUrl = 'http://sodiumeyes.com/%s'
|
||||||
|
imageSearch = compile(r'(/comic/.+?)"')
|
||||||
|
prevSearch = compile(r'"http://sodiumeyes.com/(.+?/)"><.+?comic-prev')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
starter = indirectStarter('http://sodiumeyes.com/',
|
||||||
|
compile(r'<a href="http://sodiumeyes.com/(\d\d\d\d.+?/)">'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SpareParts(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.sparepartscomics.com/'
|
||||||
|
imageUrl = 'http://www.sparepartscomics.com/comics/\\?date=s%'
|
||||||
|
imageSearch = compile(r'(/comics/2.+?)[" ]')
|
||||||
|
prevSearch = compile(r'(/comics/.+?|index.php\?.+?)".+?Prev')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Stubble(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.stubblecomics.com/d/20051230.html'
|
||||||
|
imageUrl = 'http://www.stubblecomics.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'"(/comics/.*?)"')
|
||||||
|
prevSearch = compile(r'"(.*?)".*?backarrow')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class StrawberryDeathCake(_BasicScraper):
|
||||||
|
latestUrl = 'http://rainchildstudios.com/strawberry/'
|
||||||
|
imageUrl = 'http://rainchildstudios.com/strawberry/?p=%s'
|
||||||
|
imageSearch = compile(r'/(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'strawberry/(\?p=.+?)".+?span class="prev"')
|
||||||
|
help = 'Index format: n (good luck)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SuburbanTribe(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pixelwhip.com/'
|
||||||
|
imageUrl = 'http://www.pixelwhip.com/?p%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.pixelwhip.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SuccubusJustice(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.succubus-justice.com/Com%20main%20frame.htm'
|
||||||
|
imageUrl = 'http://www.succubus-justice.com/%s%%20frame.htm'
|
||||||
|
imageSearch = compile(r'<p align="center"><img src="(/\d+.\w{3,4})"')
|
||||||
|
prevSearch = compile(r'<a href="(/[\w%]+\.htm|[\w%]+\.htm)"[^>]+?><img src="124.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Supafine(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.supafine.com/comics/classic.php'
|
||||||
|
imageUrl = 'http://www.supafine.com/comics/classic.php?comicID=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.supafine.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.supafine.com/comics/classic.php\?.+?)"><img src="http://supafine.com/comikaze/images/previous.gif" ')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SomethingPositive(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.somethingpositive.net/'
|
||||||
|
imageUrl = 'http://www.somethingpositive.net/sp%s.shtml'
|
||||||
|
imageSearch = compile(r'<img src="(/arch/sp\d+.\w{3,4}|/sp\d+.\w{3,4})"')
|
||||||
|
prevSearch = compile(r'<a \n?href="(sp\d{8}\.shtml)">(<font size=1\nface=".+?"\nSTYLE=".+?">Previous|<img src="images2/previous|<img src="images/previous.gif")', MULTILINE | IGNORECASE)
|
||||||
|
help = 'Index format: mmddyyyy'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SexyLosers(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.sexylosers.com/%s.html'
|
||||||
|
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
starter = indirectStarter('http://www.sexylosers.com/',
|
||||||
|
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
index = pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
title = imageUrl.split('/')[-1].split('.')[0]
|
||||||
|
return index + '-' + title
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def smackJeeves(names):
|
||||||
|
class _SJScraper(_BasicScraper):
|
||||||
|
imageUrl = property(lambda self: self.baseUrl + self.shortName)
|
||||||
|
imageSearch = compile(r'<img src="(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="< Previous"', IGNORECASE)
|
||||||
|
help = 'Index format: nnnn (some increasing number)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-2]
|
||||||
|
|
||||||
|
|
||||||
|
def makeScraper(shortName):
|
||||||
|
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
|
||||||
|
return type('SmackJeeves_%s' % shortName,
|
||||||
|
(_SJScraper,),
|
||||||
|
dict(
|
||||||
|
name='SmackJeeves/' + shortName,
|
||||||
|
baseUrl=baseUrl,
|
||||||
|
starter=bounceStarter(baseUrl, compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="Next >"', IGNORECASE)))
|
||||||
|
)
|
||||||
|
return dict((name, makeScraper(name)) for name in names)
|
||||||
|
|
||||||
|
|
||||||
|
globals().update(smackJeeves([
|
||||||
|
'20galaxies',
|
||||||
|
'axe13',
|
||||||
|
'beartholomew',
|
||||||
|
'bliss',
|
||||||
|
'durian',
|
||||||
|
'heard',
|
||||||
|
'mpmcomic',
|
||||||
|
'nlmo-project',
|
||||||
|
'paranoidloyd',
|
||||||
|
'thatdreamagain',
|
||||||
|
'wowcomics',
|
||||||
|
]))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class StarCrossdDestiny(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.starcrossd.net/comic.html'
|
||||||
|
imageUrl = 'http://www.starcrossd.net/archives/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)">')
|
||||||
|
prevSearch = compile(r'<a href="(http://www\.starcrossd\.net/(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev', IGNORECASE)
|
||||||
|
help = 'Index format: nnnnnnnn'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
if imageUrl.find('ch1') == -1:
|
||||||
|
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
|
||||||
|
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
|
||||||
|
elif not imageUrl.find('strips') == -1:
|
||||||
|
imageUrl = imageUrl.replace('strips/','')
|
||||||
|
directory, filename = imageUrl.split('/')[-2:]
|
||||||
|
filename, extension = splitext(filename)
|
||||||
|
return directory + '-' + filename
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SGVY(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.sgvy.com/Edda%s/Issue%s/Page%s.html'
|
||||||
|
imageSearch = compile(r'"comic" src="((?:\.\./)+images/sgvy/sgvy-[-\w\d]+\.\w+)"')
|
||||||
|
prevSearch = compile(r'<a href="((?:\.\./)+(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">Prev</a>')
|
||||||
|
help = 'Index format: edda-issue-page'
|
||||||
|
|
||||||
|
starter = indirectStarter('http://www.sgvy.com/', compile(r'<a href="(archives/(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">'))
|
||||||
|
|
||||||
|
def setStrip(self, index):
|
||||||
|
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
|
||||||
|
|
||||||
|
|
||||||
|
class Spamusement(_BasicScraper):
|
||||||
|
imageUrl = 'http://spamusement.com/index.php/comics/view/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(http://spamusement.com/index.php/comics/view/.+?)">', IGNORECASE)
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
starter = indirectStarter('http://spamusement.com/', prevSearch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def snafuComics():
|
||||||
|
class _SnafuComics(_BasicScraper):
|
||||||
|
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
|
||||||
|
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.latestUrl + 'index.php?strip_id=%s'
|
||||||
|
|
||||||
|
comics = {
|
||||||
|
'Grim': 'grim',
|
||||||
|
'KOF': 'kof',
|
||||||
|
'PowerPuffGirls': 'ppg',
|
||||||
|
'Snafu': 'www',
|
||||||
|
'Tin': 'tin',
|
||||||
|
'TW': 'tw',
|
||||||
|
'Sugar': 'sugar',
|
||||||
|
'SF': 'sf',
|
||||||
|
'Titan': 'titan',
|
||||||
|
'EA': 'ea',
|
||||||
|
'Zim': 'zim',
|
||||||
|
'Soul': 'soul',
|
||||||
|
'FT': 'ft',
|
||||||
|
'Bunnywith': 'bunnywith',
|
||||||
|
'Braindead': 'braindead',
|
||||||
|
}
|
||||||
|
|
||||||
|
url = 'http://%s.snafu-comics.com/'
|
||||||
|
return dict((name, type('SnafuComics_%s' % name,
|
||||||
|
(_SnafuComics,),
|
||||||
|
dict(name='SnafuComics/' + name,
|
||||||
|
latestUrl=url % host)))
|
||||||
|
for name, host in comics.items())
|
||||||
|
|
||||||
|
globals().update(snafuComics())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SosiaalisestiRajoittuneet(_BasicScraper):
|
||||||
|
latestUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php'
|
||||||
|
imageUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/web/\d+.jpg)" alt=".*?" />')
|
||||||
|
prevSearch = compile(r'<a href="(index_nocomment\.php\?date=\d+)"><img\s+src="images/active_edellinen\.gif"', MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class StrangeCandy(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.strangecandy.net/'
|
||||||
|
imageUrl = 'http://www.strangecandy.net/d/%s.html'
|
||||||
|
imageSearch = compile(r'src="(http://www.strangecandy.net/comics/\d{8}.\w{1,4})"')
|
||||||
|
prevSearch = compile(r'<a href="(http://www.strangecandy.net/d/\d{8}.html)"><img[^>]+?src="http://www.strangecandy.net/images/previous_day.gif"')
|
||||||
|
help = 'Index format: yyyyddmm'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SMBC(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.smbc-comics.com/'
|
||||||
|
imageUrl = 'http://www.smbc-comics.com/index.php?db=comics&id=%s'
|
||||||
|
imageSearch = compile(r'<img src=\'(.+?\d{8}.\w{1,4})\'>')
|
||||||
|
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SomethingLikeLife(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.pulledpunches.com/'
|
||||||
|
imageUrl = 'http://www.pulledpunches.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
|
||||||
|
prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
|
||||||
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class StickEmUpComics(_BasicScraper):
|
||||||
|
latestUrl = 'http://stickemupcomics.com/'
|
||||||
|
imageUrl = 'http://stickemupcomics.com/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://stickemupcomics.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SexDemonBag(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.sexdemonbag.com/'
|
||||||
|
imageUrl = 'http://www.sexdemonbag.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.sexdemonbag.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
141
dosagelib/plugins/t.py
Normal file
141
dosagelib/plugins/t.py
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, indirectStarter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TalesOfPylea(_BasicScraper):
|
||||||
|
latestUrl = 'http://talesofpylea.com/'
|
||||||
|
imageUrl = 'http://talesofpylea.com/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(.+?)">Back</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheNoob(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.thenoobcomic.com/index.php'
|
||||||
|
imageUrl = 'http://www.thenoobcomic.com/index.php?pos=%'
|
||||||
|
imageSearch = compile(r'<img src="(/headquarters/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a class="comic_nav_previous_button" href="(.+?)"></a>')
|
||||||
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheOrderOfTheStick(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.giantitp.com/'
|
||||||
|
imageUrl = 'http://www.giantitp.com/comics/images/%s'
|
||||||
|
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
|
||||||
|
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheParkingLotIsFull(_BasicScraper):
|
||||||
|
latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
|
||||||
|
imageUrl = 'http://plif.courageunfettered.com/archive/wc%s.gif'
|
||||||
|
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
|
||||||
|
prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheWotch(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.thewotch.com/'
|
||||||
|
imageUrl = 'http://www.thewotch.com/?epDate=%s'
|
||||||
|
imageSearch = compile(r"<img.+?src='(comics/.+?)'")
|
||||||
|
prevSearch = compile(r"<link rel='Previous' href='(\?epDate=\d+-\d+-\d+)'")
|
||||||
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
class Thorn(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.mimisgrotto.com/thorn/index.html'
|
||||||
|
imageUrl = 'http://www.mimisgrotto.com/thorn/%s.html'
|
||||||
|
imageSearch = compile(r'"(strips/.+?)"')
|
||||||
|
prevSearch = compile(r'(\d[\d][\d].html)">Prev')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TwoTwoOneFour(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.nitrocosm.com/go/2214_classic/'
|
||||||
|
imageUrl = 'http://www.nitrocosm.com/go/2214_classic/%s/'
|
||||||
|
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheWhiteboard(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.the-whiteboard.com/'
|
||||||
|
imageUrl = 'http://www.the-whiteboard.com/auto%s.html'
|
||||||
|
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
|
||||||
|
prevSearch = compile(r' <a href="(.+?)">previous</a>', IGNORECASE)
|
||||||
|
help = 'Index format: twb or wb + n wg. twb1000'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class _TheFallenAngel(_BasicScraper):
|
||||||
|
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def baseUrl(self):
|
||||||
|
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.baseUrl + '?date=%s'
|
||||||
|
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
return self.baseUrl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HighMaintenance(_TheFallenAngel):
|
||||||
|
name = 'TheFallenAngel/HighMaintenance'
|
||||||
|
shortName = 'hm'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FAWK(_TheFallenAngel):
|
||||||
|
name = 'TheFallenAngel/FAWK'
|
||||||
|
shortName = 'fawk'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MalloryChan(_TheFallenAngel):
|
||||||
|
name = 'TheFallenAngel/MalloryChan'
|
||||||
|
shortName = 'mallorychan'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HMHigh(_BasicScraper):
|
||||||
|
name = 'TheFallenAngel/HMHigh'
|
||||||
|
latestUrl = 'http://www.thefallenangel.co.uk/hmhigh/'
|
||||||
|
imageUrl = 'http://www.thefallenangel.co.uk/hmhigh/?id=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.thefallenangel.co.uk/hmhigh/img/comic/.+?)"')
|
||||||
|
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)" title=".+?">Prev</a>')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheOuterQuarter(_BasicScraper):
|
||||||
|
latestUrl = 'http://theouterquarter.com/'
|
||||||
|
imageUrl = 'http://theouterquarter.com/comic/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TheHorrificAdventuresOfFranky(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.boneyardfranky.com/'
|
||||||
|
imageUrl = 'http://www.boneyardfranky.com/?p=%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.boneyardfranky.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: nnn'
|
62
dosagelib/plugins/u.py
Normal file
62
dosagelib/plugins/u.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
|
||||||
|
from ..util import getQueryParams
|
||||||
|
|
||||||
|
|
||||||
|
class UnderPower(_BasicScraper):
|
||||||
|
latestUrl = 'http://underpower.non-essential.com/'
|
||||||
|
imageUrl = 'http://underpower.non-essential.com/index.php?comic=%s'
|
||||||
|
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class Undertow(_BasicScraper):
|
||||||
|
imageUrl = 'http://undertow.dreamshards.org/%s'
|
||||||
|
imageSearch = compile(r'<img src="(.+?)"')
|
||||||
|
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
||||||
|
help = 'Index format: good luck !'
|
||||||
|
starter = indirectStarter('http://undertow.dreamshards.org/',
|
||||||
|
compile(r'href="(.+?)".+?Most recent page'))
|
||||||
|
|
||||||
|
|
||||||
|
class UnicornJelly(_BasicScraper):
|
||||||
|
latestUrl = 'http://unicornjelly.com/uni666.html'
|
||||||
|
imageUrl = 'http://unicornjelly.com/uni%s.html'
|
||||||
|
imageSearch = compile(r'</TABLE>(?:<FONT COLOR="BLACK">)?<IMG SRC="(images/[^"]+)" WIDTH=')
|
||||||
|
prevSearch = compile(r'<A HREF="(uni\d{3}[bcs]?\.html)">(<FONT COLOR="BLACK">)?<IMG SRC="images/back00\.gif"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
class UserFriendly(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
|
||||||
|
imageUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
|
||||||
|
imageSearch = compile(r'<img border="0" src="(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
|
||||||
|
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
|
||||||
|
|
||||||
|
|
||||||
|
class UndeadFriend(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.undeadfriend.com/'
|
||||||
|
imageUrl = 'http://www.undeadfriend.com/d/%s.html'
|
||||||
|
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class UnspeakableVault(_BasicScraper):
|
||||||
|
imageUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
|
||||||
|
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
|
||||||
|
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
|
||||||
|
help = 'Index format: nn or nnn'
|
||||||
|
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
|
||||||
|
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, imageSearch):
|
||||||
|
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])
|
279
dosagelib/plugins/uc.py
Normal file
279
dosagelib/plugins/uc.py
Normal file
|
@ -0,0 +1,279 @@
|
||||||
|
from re import compile, IGNORECASE, sub
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
from ..util import fetchManyMatches, fetchUrl
|
||||||
|
|
||||||
|
|
||||||
|
class _UClickScraper(_BasicScraper):
|
||||||
|
homepage = 'http://content.uclick.com/a2z.html'
|
||||||
|
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
|
||||||
|
imageUrl = property(lambda self: self.latestUrl + '%s/')
|
||||||
|
imageSearch = compile(r'<img[^>]+src="(http://synd.imgsrv.uclick.com/comics/\w+/\d{4}/[^"]+\.gif)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)">Previous date', IGNORECASE)
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def starter(cls):
|
||||||
|
return cls.baseUrl % (cls.shortName,)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fetchSubmodules(cls):
|
||||||
|
exclusions = (
|
||||||
|
'index',
|
||||||
|
)
|
||||||
|
|
||||||
|
submoduleSearch = compile(r'(<A HREF="http://content.uclick.com/content/\w+.html">[^>]+?</a>)', IGNORECASE)
|
||||||
|
partsMatch = compile(r'<A HREF="http://content.uclick.com/content/(\w+?).html">([^>]+?)</a>', IGNORECASE)
|
||||||
|
matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
|
||||||
|
possibles = [partsMatch.match(match).groups() for match in matches]
|
||||||
|
|
||||||
|
def normalizeName(name):
|
||||||
|
name = sub(r'&(.)acute;', r'\1', name).title()
|
||||||
|
return ''.join([c for c in name if c.isalnum()])
|
||||||
|
|
||||||
|
def fetchSubmodule(module):
|
||||||
|
try:
|
||||||
|
return fetchUrl(cls.baseUrl % module, cls.imageSearch)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
|
||||||
|
|
||||||
|
|
||||||
|
def uclick(name, shortName):
|
||||||
|
return type('UClick_%s' % name,
|
||||||
|
(_UClickScraper,),
|
||||||
|
dict(name='UClick/' + name, shortName=shortName))
|
||||||
|
|
||||||
|
comics = {
|
||||||
|
'5thWave': 'fw',
|
||||||
|
'9To5': 'tmntf',
|
||||||
|
'AdamHome': 'ad',
|
||||||
|
'Agnes': 'cragn',
|
||||||
|
'AlcarazLalo': 'la',
|
||||||
|
'AlcarazLaloSpanish': 'spla',
|
||||||
|
'AndersonNick': 'wpnan',
|
||||||
|
'AndyCapp': 'crcap',
|
||||||
|
'AnimalCrackers': 'tmani',
|
||||||
|
'Annie': 'tmann',
|
||||||
|
'AsayChuck': 'crcas',
|
||||||
|
'AskShagg': 'crask',
|
||||||
|
'AuthTony': 'ta',
|
||||||
|
'BadReporter': 'bad',
|
||||||
|
'Baldo': 'ba',
|
||||||
|
'BaldoSpanish': 'be',
|
||||||
|
'BallardStreet': 'crbal',
|
||||||
|
'BarkEaterLake': 'bark',
|
||||||
|
'BarstowDonna': 'dba',
|
||||||
|
'BC': 'crbc',
|
||||||
|
'BCSpanish': 'crbcs',
|
||||||
|
'BeattieBruce': 'crbbe',
|
||||||
|
'BennetClay': 'wpcbe',
|
||||||
|
'BensonLisa': 'wplbe',
|
||||||
|
'BensonSteve': 'crsbe',
|
||||||
|
'BigTop': 'bt',
|
||||||
|
'Biographic': 'biov',
|
||||||
|
'Bleeker': 'blk',
|
||||||
|
'BobTheSquirrel': 'bob',
|
||||||
|
'BoilingPoint': 'boil',
|
||||||
|
'BokChip': 'crcbo',
|
||||||
|
'BoNanas': 'bon',
|
||||||
|
'Boomerangs': 'boom',
|
||||||
|
'BoondocksThe': 'bo',
|
||||||
|
'BottomLiners': 'tmbot',
|
||||||
|
'BoundAndGagged': 'tmbou',
|
||||||
|
'Brainwaves': 'bwv',
|
||||||
|
'BreenSteve': 'crsbr',
|
||||||
|
'BrendaStarr': 'tmbre',
|
||||||
|
'BrewsterRockit': 'tmrkt',
|
||||||
|
'BrittChris': 'crcbr',
|
||||||
|
'BroomHilda': 'tmbro',
|
||||||
|
'Candorville': 'cand',
|
||||||
|
'CarlsonStuart': 'sc',
|
||||||
|
'CatalinoKen': 'crkca',
|
||||||
|
'Cathy': 'ca',
|
||||||
|
'CathySpanish': 'spca',
|
||||||
|
'CEstLaVie': 'clv',
|
||||||
|
'CityThe': 'derf',
|
||||||
|
'ClearBlueWater': 'cbw',
|
||||||
|
'Cleats': 'cle',
|
||||||
|
'CloseToHome': 'cl',
|
||||||
|
'CombsPaul': 'tmcmb',
|
||||||
|
'CompuToon': 'tmcom',
|
||||||
|
'Condorito': 'cond',
|
||||||
|
'ConradPaul': 'tmpco',
|
||||||
|
'Cornered': 'co',
|
||||||
|
'CulDeSac': 'cds',
|
||||||
|
'DanzigerJeff': 'jd',
|
||||||
|
'DaviesMatt': 'tmmda',
|
||||||
|
'DeepCover': 'deep',
|
||||||
|
'DeeringJohn': 'crjde',
|
||||||
|
'DickTracy': 'tmdic',
|
||||||
|
'DinetteSetThe': 'crdin',
|
||||||
|
'DogEatDoug': 'crdog',
|
||||||
|
'DonWright': 'tmdow',
|
||||||
|
'Doodles': 'tmdoo',
|
||||||
|
'Doonesbury': 'db',
|
||||||
|
'DuplexThe': 'dp',
|
||||||
|
'Eek': 'eek',
|
||||||
|
'ElderberriesThe': 'eld',
|
||||||
|
'FacesInTheNews': 'kw',
|
||||||
|
'FlightDeck': 'crfd',
|
||||||
|
'FloAndFriends': 'crflo',
|
||||||
|
'FlyingMccoysThe': 'fmc',
|
||||||
|
'ForBetterOrForWorse': 'fb',
|
||||||
|
'ForHeavenSSake': 'crfhs',
|
||||||
|
'FoxtrotClassics': 'ftcl',
|
||||||
|
'Foxtrot': 'ft',
|
||||||
|
'FoxtrotSpanish': 'spft',
|
||||||
|
'FrankAndErnest': 'fa',
|
||||||
|
'FredBassetSpanish': 'spfba',
|
||||||
|
'FredBasset': 'tmfba',
|
||||||
|
'FrogApplause': 'frog',
|
||||||
|
'FuscoBrothersThe': 'fu',
|
||||||
|
'Garfield': 'ga',
|
||||||
|
'GarfieldSpanish': 'gh',
|
||||||
|
'GasolineAlley': 'tmgas',
|
||||||
|
'GaturroSpanish': 'spgat',
|
||||||
|
'GilThorp': 'tmgil',
|
||||||
|
'GingerMeggs': 'gin',
|
||||||
|
'GingerMeggsSpanish': 'spgin',
|
||||||
|
'GirlsAndSports': 'crgis',
|
||||||
|
'GorrellBob': 'crbgo',
|
||||||
|
'GoTeamBob': 'gtb',
|
||||||
|
'HammondBruce': 'hb',
|
||||||
|
'HandelsmanWalt': 'tmwha',
|
||||||
|
'HeartOfTheCity': 'hc',
|
||||||
|
'Heathcliff': 'crhea',
|
||||||
|
'HeathcliffSpanish': 'crhes',
|
||||||
|
'HerbAndJamaal': 'crher',
|
||||||
|
'HigginsJack': 'jh',
|
||||||
|
'HomeAndAway': 'wphaa',
|
||||||
|
'HorseyDavid': 'tmdho',
|
||||||
|
'Housebroken': 'tmhou',
|
||||||
|
'HubertAndAbby': 'haa',
|
||||||
|
'IdiotBox': 'ibox',
|
||||||
|
'ImagineThis': 'imt',
|
||||||
|
'InkPen': 'ink',
|
||||||
|
'InTheBleachers': 'bl',
|
||||||
|
'ItsAllAboutYou': 'wpiay',
|
||||||
|
'JamesBondSpanish': 'spjb',
|
||||||
|
'JonesClay': 'crcjo',
|
||||||
|
'KallaugherKevin': 'cwkal',
|
||||||
|
'KChroniclesThe': 'kk',
|
||||||
|
'KelleySteve': 'crske',
|
||||||
|
'Kudzu': 'tmkud',
|
||||||
|
'LaCucaracha': 'lc',
|
||||||
|
'LegendOfBill': 'lob',
|
||||||
|
'LibertyMeadows': 'crlib',
|
||||||
|
'Lio': 'lio',
|
||||||
|
'LittleDogLost': 'wpldl',
|
||||||
|
'LocherDick': 'tmdlo',
|
||||||
|
'LooseParts': 'tmloo',
|
||||||
|
'LostSheep': 'lost',
|
||||||
|
'LoweChan': 'tmclo',
|
||||||
|
'LuckovichMike': 'crmlu',
|
||||||
|
'LuckyCow': 'luc',
|
||||||
|
'MarkstienGary': 'crgma',
|
||||||
|
'MarletteDoug': 'tmdma',
|
||||||
|
'MccoyGlenn': 'gm',
|
||||||
|
'MeaningOfLilaThe': 'crlil',
|
||||||
|
'MeehanStreak': 'tmmee',
|
||||||
|
'MiddletonsThe': 'tmmid',
|
||||||
|
'MinimumSecurity': 'ms',
|
||||||
|
'ModestyBlaiseSpanish': 'spmb',
|
||||||
|
'Momma': 'crmom',
|
||||||
|
'MorinJim': 'cwjmo',
|
||||||
|
'MuttJeffSpanish': 'spmut',
|
||||||
|
'MythTickle': 'myth',
|
||||||
|
'NAoQV': 'naqv',
|
||||||
|
'NaturalSelection': 'crns',
|
||||||
|
'NestHeads': 'cpnst',
|
||||||
|
'Neurotica': 'neu',
|
||||||
|
'NonSequitur': 'nq',
|
||||||
|
'OhmanJack': 'tmjoh',
|
||||||
|
'OliphantPat': 'po',
|
||||||
|
'OnAClaireDay': 'crocd',
|
||||||
|
'OneBigHappy': 'crobh',
|
||||||
|
'OtherCoastThe': 'crtoc',
|
||||||
|
'OutOfTheGenePool': 'wpgen',
|
||||||
|
'Overboard': 'ob',
|
||||||
|
'OverboardSpanish': 'spob',
|
||||||
|
'PepeSpanish': 'sppep',
|
||||||
|
'PettJoel': 'jp',
|
||||||
|
'Pibgorn': 'pib',
|
||||||
|
'Pickles': 'wppic',
|
||||||
|
'Pluggers': 'tmplu',
|
||||||
|
'PoochCafe': 'poc',
|
||||||
|
'PoochCafeSpanish': 'sppoc',
|
||||||
|
'PopCulture': 'pop',
|
||||||
|
'PowellDwane': 'crdpo',
|
||||||
|
'Preteena': 'pr',
|
||||||
|
'PricklyCity': 'prc',
|
||||||
|
'QuigmansThe': 'tmqui',
|
||||||
|
'RallComic': 'tr',
|
||||||
|
'RamirezMicheal': 'crmrm',
|
||||||
|
'RamseyMarshall': 'crmra',
|
||||||
|
'RealLifeAdventures': 'rl',
|
||||||
|
'RedAndRover': 'wpred',
|
||||||
|
'RedMeat': 'red',
|
||||||
|
'ReynoldsUnwrapped': 'rw',
|
||||||
|
'RonaldinhoGaucho': 'ron',
|
||||||
|
'RonaldinhoGauchoSpanish': 'spron',
|
||||||
|
'Rubes': 'crrub',
|
||||||
|
'SackSteve': 'tmssa',
|
||||||
|
'SargentBen': 'bs',
|
||||||
|
'SargentBenSpanish': 'spbs',
|
||||||
|
'SendHelp': 'send',
|
||||||
|
'ShenemanDrew': 'tmdsh',
|
||||||
|
'SherffiusDrew': 'crjsh',
|
||||||
|
'Shoecabbage': 'shcab',
|
||||||
|
'Shoe': 'tmsho',
|
||||||
|
'SigmundSpanish': 'spsig',
|
||||||
|
'Slowpoke': 'slow',
|
||||||
|
'SmallWorld': 'small',
|
||||||
|
'SpaceIsThePlace': 'sitp',
|
||||||
|
'SpeedBump': 'crspe',
|
||||||
|
'StanisScott': 'crsst',
|
||||||
|
'StateOfTheUnion': 'crsou',
|
||||||
|
'StayskalWayne': 'tmwst',
|
||||||
|
'StoneSoup': 'ss',
|
||||||
|
'StrangeBrew': 'crstr',
|
||||||
|
'SummersDana': 'tmdsu',
|
||||||
|
'SuttonImpact': 'stn',
|
||||||
|
'Sylvia': 'tmsyl',
|
||||||
|
'SzepPaul': 'crpsz',
|
||||||
|
'TankMcnamara': 'tm',
|
||||||
|
'TeenageMutantNinjaTurtles': 'tmnt',
|
||||||
|
'TelnaesAnn': 'tmate',
|
||||||
|
'TheArgyleSweater': 'tas',
|
||||||
|
'ThePinkPanther': 'tmpnk',
|
||||||
|
'TheWizardOfId': 'crwiz',
|
||||||
|
'TheWizardOfIdSpanish': 'crwis',
|
||||||
|
'ThInk': 'think',
|
||||||
|
'ThompsonMike': 'crmth',
|
||||||
|
'ThroughThickAndThin': 'cpthk',
|
||||||
|
'TinySepuku': 'tiny',
|
||||||
|
'Toby': 'toby',
|
||||||
|
'TolesTom': 'tt',
|
||||||
|
'TomTheDancingBug': 'td',
|
||||||
|
'TooMuchCoffeeMan': 'tmcm',
|
||||||
|
'Trevor': 'trev',
|
||||||
|
'TutelandiaSpanish': 'sptut',
|
||||||
|
'VarvelGary': 'crgva',
|
||||||
|
'WassermanDan': 'tmdwa',
|
||||||
|
'WatchYourHead': 'wpwyh',
|
||||||
|
'Waylay': 'min',
|
||||||
|
'WeePals': 'crwee',
|
||||||
|
'WinnieThePooh': 'crwin',
|
||||||
|
'WitOfTheWorld': 'cwwit',
|
||||||
|
'WorkingItOut': 'crwio',
|
||||||
|
'WriteDon': 'tmdow',
|
||||||
|
'YennySpanish': 'spyen',
|
||||||
|
'Yenny': 'yen',
|
||||||
|
'ZackHill': 'crzhi',
|
||||||
|
'ZiggySpanish': 'spzi',
|
||||||
|
'Ziggy': 'zi',
|
||||||
|
}
|
||||||
|
|
||||||
|
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))
|
36
dosagelib/plugins/v.py
Normal file
36
dosagelib/plugins/v.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
from re import compile, IGNORECASE, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class _VGCats(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.vgcats.com/comics/'
|
||||||
|
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(\?strip_id=\d+)"><img src="back.gif" border="0"')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.latestUrl + '?strip_id=%s'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Super(_VGCats):
|
||||||
|
name = 'VGCats/Super'
|
||||||
|
latestUrl = 'http://www.vgcats.com/super/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Adventure(_VGCats):
|
||||||
|
name = 'VGCats/Adventure'
|
||||||
|
latestUrl = 'http://www.vgcats.com/ffxi/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ViiviJaWagner(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.hs.fi/viivijawagner/'
|
||||||
|
imageUrl = 'http://www.hs.fi/viivijawagner/%s'
|
||||||
|
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen \n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE)
|
||||||
|
help = 'Index format: shrugs!'
|
216
dosagelib/plugins/w.py
Normal file
216
dosagelib/plugins/w.py
Normal file
|
@ -0,0 +1,216 @@
|
||||||
|
from re import compile, IGNORECASE, DOTALL
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, queryNamer, bounceStarter
|
||||||
|
|
||||||
|
|
||||||
|
class WayfarersMoon(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wayfarersmoon.com/'
|
||||||
|
imageUrl = 'http://www.wayfarersmoon.com/index.php\?page=%s'
|
||||||
|
imageSearch = compile(r'<img src="(/admin.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
|
||||||
|
help = 'Index format: nn'
|
||||||
|
|
||||||
|
|
||||||
|
class WhiteNinja(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.whiteninjacomics.com/comics.shtml'
|
||||||
|
imageUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
|
||||||
|
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
|
||||||
|
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
|
||||||
|
help = 'Index format: s (comic name)'
|
||||||
|
|
||||||
|
|
||||||
|
class WhiteNoise(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wncomic.com/archive.php'
|
||||||
|
imageUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'First .+?"(archive.+?)".+?top_back')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WhyTheLongFace(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
||||||
|
imageUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
|
||||||
|
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
|
||||||
|
help = 'Index format: yyyymm'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Wigu(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wigu.com/wigu/'
|
||||||
|
imageUrl = 'http://www.wigu.com/wigu/?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WiguTV(_BasicScraper):
|
||||||
|
latestUrl = 'http://jjrowland.com/'
|
||||||
|
imageUrl = 'http://jjrowland.com/archive/%s.html'
|
||||||
|
imageSearch = compile(r'"(/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?> ')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WotNow(_BasicScraper):
|
||||||
|
latestUrl = 'http://shadowburn.binmode.com/wotnow/'
|
||||||
|
imageUrl = 'http://shadowburn.binmode.com/wotnow/comic.php?comic_id=%s'
|
||||||
|
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WorldOfWarcraftEh(_BasicScraper):
|
||||||
|
latestUrl = 'http://woweh.com/'
|
||||||
|
imageUrl = 'http://woweh.com/?p='
|
||||||
|
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
||||||
|
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
|
||||||
|
help = 'Index format: non'
|
||||||
|
|
||||||
|
|
||||||
|
class Wulffmorgenthaler(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wulffmorgenthaler.com/'
|
||||||
|
imageUrl = 'http://www.wulffmorgenthaler.com/Default.aspx?id=%s'
|
||||||
|
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"')
|
||||||
|
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">')
|
||||||
|
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)'
|
||||||
|
namer = queryNamer('stripid')
|
||||||
|
|
||||||
|
|
||||||
|
def webcomicsNation():
|
||||||
|
class _WebcomicsNation(_BasicScraper):
|
||||||
|
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
|
||||||
|
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
|
||||||
|
help = 'Index format: nnnn (non-contiguous)'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.baseUrl + '?view=archive&chapter=%s'
|
||||||
|
|
||||||
|
comics = {
|
||||||
|
'AgnesQuill': 'daveroman/agnes/',
|
||||||
|
'Elvenbaath': 'tdotodot2k/elvenbaath/',
|
||||||
|
'IrrationalFears': 'uvernon/irrationalfears/',
|
||||||
|
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
|
||||||
|
'SaikoAndLavender': 'gc/saiko/',
|
||||||
|
'MyMuse': 'gc/muse/',
|
||||||
|
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
|
||||||
|
'JaxEpoch': 'johngreen/quicken/',
|
||||||
|
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
|
||||||
|
'ClownSamurai' : 'qsamurai/clownsamurai/',
|
||||||
|
}
|
||||||
|
|
||||||
|
return dict((name, type('WebcomicsNation_%s' % name,
|
||||||
|
(_WebcomicsNation,),
|
||||||
|
dict(name='WebcomicsNation/' + name,
|
||||||
|
latestUrl='http://www.webcomicsnation.com/' + subpath)))
|
||||||
|
for name, subpath in comics.items())
|
||||||
|
|
||||||
|
|
||||||
|
globals().update(webcomicsNation())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WhiteNoise(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wncomic.com/archive.php'
|
||||||
|
imageUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WapsiSquare(_BasicScraper):
|
||||||
|
latestUrl = 'http://wapsisquare.com/'
|
||||||
|
imageUrl = 'http://wapsisquare.com/comic/%s'
|
||||||
|
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
|
||||||
|
help = 'Index format: strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WrongWay(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.wrongwaycomics.com/'
|
||||||
|
imageUrl = 'http://www.wrongwaycomics.com/%s.html'
|
||||||
|
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||||
|
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
|
||||||
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WeCanSleepTomorrow(_BasicScraper):
|
||||||
|
latestUrl = 'http://wecansleeptomorrow.com/'
|
||||||
|
imageUrl = 'http://wecansleeptomorrow.com/2009/12/07/smothered/'
|
||||||
|
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"')
|
||||||
|
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||||
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class _WLP(_BasicScraper):
|
||||||
|
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
|
||||||
|
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
|
||||||
|
help='Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def baseUrl(self):
|
||||||
|
return 'http://www.wlpcomics.com/%s' % (self.path,)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageUrl(self):
|
||||||
|
return self.baseUrl + '%s.html'
|
||||||
|
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
# XXX: ergh
|
||||||
|
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
|
||||||
|
return meth.__get__(self, type(self))()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChichiChan(_WLP):
|
||||||
|
name = 'WLP/ChichiChan'
|
||||||
|
path = 'adult/chichi/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChocolateMilkMaid(_WLP):
|
||||||
|
name = 'WLP/ChocolateMilkMaid'
|
||||||
|
path = 'adult/cm/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MaidAttack(_WLP):
|
||||||
|
name = 'WLP/MaidAttack'
|
||||||
|
path = 'general/maidattack/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ShadowChasers(_WLP):
|
||||||
|
name = 'WLP/ShadowChasers'
|
||||||
|
path = 'general/shadowchasers/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Stellar(_WLP):
|
||||||
|
name = 'WLP/Stellar'
|
||||||
|
path = 'adult/stellar/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Wondermark(_BasicScraper):
|
||||||
|
latestUrl = 'http://wondermark.com'
|
||||||
|
imageUrl = 'http://wondermark.com/%s/'
|
||||||
|
imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
|
||||||
|
prevSearch = compile(r'<a href="(.+?)" rel="prev">')
|
||||||
|
help = 'Index format: nnn'
|
25
dosagelib/plugins/x.py
Normal file
25
dosagelib/plugins/x.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper, bounceStarter
|
||||||
|
|
||||||
|
class xkcd(_BasicScraper):
|
||||||
|
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next'))
|
||||||
|
imageUrl = 'http://xkcd.com/c%s.html'
|
||||||
|
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"')
|
||||||
|
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>< Prev')
|
||||||
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
index = int(pageUrl.rstrip('/').split('/')[-1])
|
||||||
|
name = imageUrl.split('/')[-1].split('.')[0]
|
||||||
|
return 'c%03d-%s' % (index, name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class xkcdSpanish(_BasicScraper):
|
||||||
|
latestUrl = 'http://es.xkcd.com/xkcd-es/'
|
||||||
|
imageUrl = 'http://es.xkcd.com/xkcd-es/strips/%s/'
|
||||||
|
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
|
||||||
|
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
|
||||||
|
help = 'Index format: stripname'
|
27
dosagelib/plugins/y.py
Normal file
27
dosagelib/plugins/y.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
from re import compile, MULTILINE
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
class YAFGC(_BasicScraper):
|
||||||
|
latestUrl = 'http://yafgc.shipsinker.com/'
|
||||||
|
imageUrl = 'http://yafgc.shipsinker.com/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'(istrip_.+?)"')
|
||||||
|
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE)
|
||||||
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
class YouSayItFirst(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.yousayitfirst.com/'
|
||||||
|
imageUrl = 'http://www.soapylemon.com/comics/index.php?date=%s'
|
||||||
|
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]')
|
||||||
|
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P')
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class Yirmumah(_BasicScraper):
|
||||||
|
latestUrl = 'http://yirmumah.net/archives.php'
|
||||||
|
imageUrl = 'http://yirmumah.net/archives.php?date=%s'
|
||||||
|
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
|
||||||
|
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
|
||||||
|
help = 'Index format: yyyymmdd'
|
18
dosagelib/plugins/z.py
Normal file
18
dosagelib/plugins/z.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from re import compile
|
||||||
|
|
||||||
|
from ..helpers import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
|
class Zapiro(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.mg.co.za/zapiro/all'
|
||||||
|
imageSearch = compile(r'<img src="(cartoons/[^"]+)"')
|
||||||
|
prevSearch = compile(r'<a href="([^"]+)">>')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ZombieHunters(_BasicScraper):
|
||||||
|
latestUrl = 'http://www.thezombiehunters.com/'
|
||||||
|
imageUrl = 'http://www.thezombiehunters.com/index.php?strip_id=%s'
|
||||||
|
imageSearch = compile(r'"(.+?strips/.+?)"')
|
||||||
|
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ')
|
||||||
|
help = 'Index format: n(unpadded)'
|
91
dosagelib/progress.py
Normal file
91
dosagelib/progress.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
from . import util
|
||||||
|
|
||||||
|
class Guess(object):
|
||||||
|
def __init__(self, weight):
|
||||||
|
self.weight = weight
|
||||||
|
self.guess = 0
|
||||||
|
self.best = 0
|
||||||
|
|
||||||
|
def feed(self, value):
|
||||||
|
self.guess = self.weight * value + (1 - self.weight) * self.guess
|
||||||
|
|
||||||
|
def distance(self, value):
|
||||||
|
return (self.guess - value) ** 2
|
||||||
|
|
||||||
|
class FortuneTeller(object):
|
||||||
|
weights = (0.2, 0.3, 0.4)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.guesses = map(Guess, self.weights)
|
||||||
|
|
||||||
|
def feed(self, value):
|
||||||
|
best = min([(guess.distance(value), guess) for guess in self.guesses])[1]
|
||||||
|
best.best += 1
|
||||||
|
for guess in self.guesses:
|
||||||
|
guess.feed(value)
|
||||||
|
|
||||||
|
def predict(self):
|
||||||
|
return max([(guess.best, guess) for guess in self.guesses])[1].guess
|
||||||
|
|
||||||
|
class OperationComplete(Exception): pass
|
||||||
|
|
||||||
|
def drawBar(fill, total, caption):
|
||||||
|
screenWidth = util.getWindowSize()
|
||||||
|
ratio = fill / total
|
||||||
|
mask = '[%%s>%%s] (%.2f%%%%) %s' % (ratio * 100, caption)
|
||||||
|
|
||||||
|
barWidth = screenWidth - len(mask) + 6
|
||||||
|
fillWidth = int(barWidth * ratio) - 1
|
||||||
|
emptyWidth = barWidth - fillWidth - 1
|
||||||
|
|
||||||
|
sys.stdout.write('\r')
|
||||||
|
sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
def drawBounceBar(pos, caption):
|
||||||
|
screenWidth = util.getWindowSize()
|
||||||
|
mask = '[%%s<=>%%s] %s' % (caption,)
|
||||||
|
|
||||||
|
barWidth = screenWidth - len(mask) + 4
|
||||||
|
leftWidth = pos % barWidth - 1
|
||||||
|
rightWidth = barWidth - leftWidth - 1
|
||||||
|
|
||||||
|
sys.stdout.write('\r')
|
||||||
|
sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
def progressBar(fn):
|
||||||
|
completed = bps = 0
|
||||||
|
count = 0
|
||||||
|
ft = FortuneTeller()
|
||||||
|
currentTime = lastTime = time.time()
|
||||||
|
try:
|
||||||
|
while 1:
|
||||||
|
inc = 0
|
||||||
|
while currentTime - lastTime < 0.2:
|
||||||
|
progress, total = fn()
|
||||||
|
inc += progress
|
||||||
|
currentTime = time.time()
|
||||||
|
|
||||||
|
ft.feed(inc / (currentTime - lastTime))
|
||||||
|
lastTime = currentTime
|
||||||
|
|
||||||
|
completed += inc
|
||||||
|
bps = ft.predict()
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
drawBounceBar(count, '%s/sec' % util.saneDataSize(bps))
|
||||||
|
count += 1
|
||||||
|
else:
|
||||||
|
drawBar(completed, max(total, completed), '%s/sec' % util.saneDataSize(bps))
|
||||||
|
except OperationComplete:
|
||||||
|
if count > 0:
|
||||||
|
drawBounceBar(count, '%s/sec' % util.saneDataSize(bps))
|
||||||
|
else:
|
||||||
|
drawBar(max(total, completed), max(total, completed), '%s/sec' % util.saneDataSize(bps))
|
||||||
|
print ''
|
80
dosagelib/rss.py
Normal file
80
dosagelib/rss.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# TODO: Not sure if this RSS output is "valid", should be though.
|
||||||
|
# Might also be nice categorise Comics under one Item
|
||||||
|
|
||||||
|
import xml.dom.minidom
|
||||||
|
import time
|
||||||
|
|
||||||
|
class Feed(object):
|
||||||
|
def __init__(self, title, link, description, lang='en-us'):
|
||||||
|
self.rss = xml.dom.minidom.Document()
|
||||||
|
|
||||||
|
rss_root = self.rss.appendChild(self.rss.createElement('rss'))
|
||||||
|
rss_root.setAttribute('version', '2.0')
|
||||||
|
|
||||||
|
self.channel = rss_root.appendChild(self.rss.createElement('channel'))
|
||||||
|
|
||||||
|
self.addElement(self.channel, 'title', title)
|
||||||
|
self.addElement(self.channel, 'link', link)
|
||||||
|
self.addElement(self.channel, 'language', lang)
|
||||||
|
self.addElement(self.channel, 'description', description)
|
||||||
|
|
||||||
|
def RFC822Date(data):
|
||||||
|
return time.strftime('%a, %d %b %Y %H:%M:%S GMT', data)
|
||||||
|
|
||||||
|
def addElement(self, parent, tag, value):
|
||||||
|
return parent.appendChild(self.rss.createElement(tag)).appendChild(self.rss.createTextNode(value))
|
||||||
|
|
||||||
|
def insertHead(self, title, link, description, date):
|
||||||
|
item = self.rss.createElement('item')
|
||||||
|
|
||||||
|
self.addElement(item, 'title', title)
|
||||||
|
self.addElement(item, 'link', link)
|
||||||
|
self.addElement(item, 'description', description)
|
||||||
|
self.addElement(item, 'pubDate', date)
|
||||||
|
|
||||||
|
elems = self.rss.getElementsByTagName('item')
|
||||||
|
if elems:
|
||||||
|
self.channel.insertBefore(item, elems[0])
|
||||||
|
else:
|
||||||
|
self.channel.appendChild(item)
|
||||||
|
|
||||||
|
def addItem(self, title, link, description, date):
|
||||||
|
item = self.rss.createElement('item')
|
||||||
|
|
||||||
|
self.addElement(item, 'title', title)
|
||||||
|
self.addElement(item, 'link', link)
|
||||||
|
self.addElement(item, 'description', description)
|
||||||
|
self.addElement(item, 'pubDate', date)
|
||||||
|
|
||||||
|
self.channel.appendChild(item)
|
||||||
|
|
||||||
|
def write(self, path):
|
||||||
|
file = open(path, 'w')
|
||||||
|
file.write(self.getXML())
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
def getXML(self):
|
||||||
|
return self.rss.toxml()
|
||||||
|
|
||||||
|
def parseFeed(filename, yesterday):
|
||||||
|
dom = xml.dom.minidom.parse(filename)
|
||||||
|
|
||||||
|
getText = lambda node, tag: node.getElementsByTagName(tag)[0].childNodes[0].data
|
||||||
|
getNode = lambda tag: dom.getElementsByTagName(tag)
|
||||||
|
|
||||||
|
content = getNode('channel')[0] # Only one channel node
|
||||||
|
|
||||||
|
feedTitle = getText(content, 'title')
|
||||||
|
feedLink = getText(content, 'link')
|
||||||
|
feedDesc = getText(content, 'description')
|
||||||
|
|
||||||
|
feed = Feed(feedTitle, feedLink, feedDesc)
|
||||||
|
|
||||||
|
for item in getNode('item'):
|
||||||
|
itemDate = time.strptime(getText(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S GMT')
|
||||||
|
if (itemDate > yesterday): # If newer than yesterday
|
||||||
|
feed.addItem(getText(item, 'title'),
|
||||||
|
getText(item, 'link'),
|
||||||
|
getText(item, 'description'),
|
||||||
|
getText(item, 'pubDate'))
|
||||||
|
return feed
|
141
dosagelib/scraper.py
Normal file
141
dosagelib/scraper.py
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .helpers import _BasicScraper
|
||||||
|
|
||||||
|
disabled = []
|
||||||
|
def init_disabled():
|
||||||
|
filename = os.path.expanduser('~/.dosage/disabled')
|
||||||
|
if not os.path.isfile(filename):
|
||||||
|
return
|
||||||
|
with open(filename) as f:
|
||||||
|
for line in f:
|
||||||
|
if line and not line.startswith('#'):
|
||||||
|
disabled.append(line.rstrip())
|
||||||
|
init_disabled()
|
||||||
|
|
||||||
|
class DisabledComicError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get(comicName):
|
||||||
|
"""Returns a comic module object."""
|
||||||
|
candidates = []
|
||||||
|
for scraper in get_scrapers():
|
||||||
|
lname = scraper.get_name().lower()
|
||||||
|
cname = comicName.lower()
|
||||||
|
if lname == cname:
|
||||||
|
# perfect match
|
||||||
|
return scraper
|
||||||
|
if cname in lname:
|
||||||
|
candidates.append(scraper)
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return candidates[0]
|
||||||
|
elif candidates:
|
||||||
|
comics = ", ".join(x.get_name() for x in candidates)
|
||||||
|
raise ValueError('Multiple comics %s found.' % comics)
|
||||||
|
else:
|
||||||
|
raise ValueError('Comic %r not found.' % comicName)
|
||||||
|
|
||||||
|
|
||||||
|
def items():
|
||||||
|
return get_scrapers()
|
||||||
|
|
||||||
|
|
||||||
|
_scrapers = None
|
||||||
|
def get_scrapers():
|
||||||
|
"""Find all comic scraper classes in the plugins directory.
|
||||||
|
The result is cached.
|
||||||
|
@return: list of _BasicScraper classes
|
||||||
|
@rtype: list of _BasicScraper
|
||||||
|
"""
|
||||||
|
global _scrapers
|
||||||
|
if _scrapers is None:
|
||||||
|
_scrapers = list(get_all_plugins(get_modules()))
|
||||||
|
_scrapers.sort(key=lambda s: s.get_name())
|
||||||
|
check_scrapers()
|
||||||
|
return _scrapers
|
||||||
|
|
||||||
|
|
||||||
|
def check_scrapers():
|
||||||
|
d = {}
|
||||||
|
for s in _scrapers:
|
||||||
|
name = s.get_name().lower()
|
||||||
|
if name in d:
|
||||||
|
name1 = s.get_name()
|
||||||
|
name2 = d[name].get_name()
|
||||||
|
raise ValueError('Duplicate scrapers %s and %s found' % (name1, name2))
|
||||||
|
d[name] = s
|
||||||
|
|
||||||
|
|
||||||
|
def get_modules():
|
||||||
|
"""Find all valid modules in the plugins directory. A valid module
|
||||||
|
must have a .py extension, and is importable.
|
||||||
|
@return: all loaded valid modules
|
||||||
|
@rtype: iterator of module
|
||||||
|
"""
|
||||||
|
# load from the plugins folder
|
||||||
|
folder = os.path.join(os.path.dirname(__file__), 'plugins')
|
||||||
|
for filename in get_importable_modules(folder):
|
||||||
|
try:
|
||||||
|
module = load_module(filename)
|
||||||
|
if module is not None:
|
||||||
|
yield module
|
||||||
|
except StandardError, msg:
|
||||||
|
print "ERROR", msg
|
||||||
|
|
||||||
|
|
||||||
|
def get_importable_modules(folder):
|
||||||
|
"""Find all module files in the given folder that end witn '.py' and
|
||||||
|
don't start with an underscore.
|
||||||
|
@return module filenames
|
||||||
|
@rtype: iterator of string
|
||||||
|
"""
|
||||||
|
for fname in os.listdir(folder):
|
||||||
|
if fname.endswith('.py') and not fname.startswith('_'):
|
||||||
|
yield os.path.join(folder, fname)
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(filename):
|
||||||
|
"""Load and return the module given by the filename.
|
||||||
|
Other exceptions than ImportError are not catched.
|
||||||
|
@return: loaded module or None on import errors
|
||||||
|
@rtype: module or None
|
||||||
|
"""
|
||||||
|
name = os.path.splitext(os.path.basename(filename))[0]
|
||||||
|
modulename = "dosagelib.plugins.%s" % name
|
||||||
|
__import__(modulename)
|
||||||
|
return sys.modules[modulename]
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_plugins(modules):
|
||||||
|
"""Find all scrapers in all modules.
|
||||||
|
@param modules: the modules to search
|
||||||
|
@ptype modules: iterator of modules
|
||||||
|
@return: found scrapers
|
||||||
|
@rytpe: iterator of class objects
|
||||||
|
"""
|
||||||
|
for module in modules:
|
||||||
|
for plugin in get_plugins(module):
|
||||||
|
yield plugin
|
||||||
|
|
||||||
|
|
||||||
|
def get_plugins(module):
|
||||||
|
"""Return all subclasses of _BasicScraper in the module.
|
||||||
|
If the module defines __all__, only those entries will be searched,
|
||||||
|
otherwise all objects not starting with '_' will be searched.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
names = module.__all__
|
||||||
|
except AttributeError:
|
||||||
|
names = [x for x in vars(module) if not x.startswith('_')]
|
||||||
|
for name in names:
|
||||||
|
try:
|
||||||
|
obj = getattr(module, name)
|
||||||
|
except AttributeError:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if issubclass(obj, _BasicScraper):
|
||||||
|
yield obj
|
||||||
|
except TypeError:
|
||||||
|
continue
|
310
dosagelib/util.py
Normal file
310
dosagelib/util.py
Normal file
|
@ -0,0 +1,310 @@
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
import urllib2, urlparse
|
||||||
|
import sys
|
||||||
|
import struct
|
||||||
|
import array
|
||||||
|
import os
|
||||||
|
import cgi
|
||||||
|
import re
|
||||||
|
import traceback
|
||||||
|
import time
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
|
from math import log, floor
|
||||||
|
|
||||||
|
from .output import out
|
||||||
|
from .configuration import UserAgent, AppName, App, SupportUrl
|
||||||
|
|
||||||
|
class NoMatchError(Exception): pass
|
||||||
|
|
||||||
|
def getMatchValues(matches):
|
||||||
|
return set([match.group(1) for match in matches])
|
||||||
|
|
||||||
|
def fetchManyMatches(url, regexes):
|
||||||
|
'''Returns a list containing lists of matches for each regular expression, in the same order.'''
|
||||||
|
out.write('Matching regex(es) %r multiple times against %s...' % ([rex.pattern for rex in regexes], url), 2)
|
||||||
|
page = urlopen(url)
|
||||||
|
data = page.read()
|
||||||
|
|
||||||
|
matches = [getMatchValues(regex.finditer(data)) for regex in regexes]
|
||||||
|
if matches:
|
||||||
|
out.write('...found %r' % (matches,), 2)
|
||||||
|
else:
|
||||||
|
out.write('...not found!', 2)
|
||||||
|
|
||||||
|
return list(matches)
|
||||||
|
|
||||||
|
def fetchMatches(url, regexes):
|
||||||
|
out.write('Matching regex(es) %r against %s...' % ([rex.pattern for rex in regexes], url), 2)
|
||||||
|
page = urlopen(url)
|
||||||
|
data = page.read()
|
||||||
|
|
||||||
|
matches = []
|
||||||
|
for regex in regexes:
|
||||||
|
match = regex.search(data)
|
||||||
|
if match:
|
||||||
|
matches.append(match.group(1))
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
out.write('...found %r' % (matches,), 2)
|
||||||
|
else:
|
||||||
|
out.write('...not found!', 2)
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def fetchMatch(url, regex):
|
||||||
|
matches = fetchMatches(url, (regex,))
|
||||||
|
if matches:
|
||||||
|
return matches[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def fetchUrl(url, regex):
|
||||||
|
match = fetchMatch(url, regex)
|
||||||
|
if match:
|
||||||
|
return urlparse.urljoin(url, match)
|
||||||
|
return None
|
||||||
|
|
||||||
|
baseSearch = re.compile(r'<base\s+href="([^"]*)"\s+/?>', re.IGNORECASE)
|
||||||
|
def fetchUrls(url, regexes):
|
||||||
|
matches = fetchMatches(url, [baseSearch] + list(regexes))
|
||||||
|
baseUrl = matches.pop(0) or url
|
||||||
|
return [urlparse.urljoin(baseUrl, match) for match in matches]
|
||||||
|
|
||||||
|
def fetchManyUrls(url, regexes):
|
||||||
|
matchGroups = fetchManyMatches(url, [baseSearch] + list(regexes))
|
||||||
|
baseUrl = matchGroups.pop(0) or [url]
|
||||||
|
baseUrl = baseUrl[0]
|
||||||
|
|
||||||
|
xformedGroups = []
|
||||||
|
for matchGroup in matchGroups:
|
||||||
|
xformedGroups.append([urlparse.urljoin(baseUrl, match) for match in matchGroup])
|
||||||
|
|
||||||
|
return xformedGroups
|
||||||
|
|
||||||
|
def _unescape(text):
|
||||||
|
"""
|
||||||
|
Replace HTML entities and character references.
|
||||||
|
"""
|
||||||
|
def _fixup(m):
|
||||||
|
text = m.group(0)
|
||||||
|
if text[:2] == "&#":
|
||||||
|
# character reference
|
||||||
|
try:
|
||||||
|
if text[:3] == "&#x":
|
||||||
|
text = unichr(int(text[3:-1], 16))
|
||||||
|
else:
|
||||||
|
text = unichr(int(text[2:-1]))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# named entity
|
||||||
|
try:
|
||||||
|
text = unichr(name2codepoint[text[1:-1]])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
if isinstance(text, unicode):
|
||||||
|
text = text.encode('utf-8')
|
||||||
|
text = urllib2.quote(text, safe=';/?:@&=+$,')
|
||||||
|
return text
|
||||||
|
return re.sub("&#?\w+;", _fixup, text)
|
||||||
|
|
||||||
|
def normaliseURL(url):
|
||||||
|
"""
|
||||||
|
Removes any leading empty segments to avoid breaking urllib2; also replaces
|
||||||
|
HTML entities and character references.
|
||||||
|
"""
|
||||||
|
# XXX: brutal hack
|
||||||
|
url = _unescape(url)
|
||||||
|
url = url.replace(' ', '%20')
|
||||||
|
|
||||||
|
pu = list(urlparse.urlparse(url))
|
||||||
|
segments = pu[2].replace(' ', '%20').split('/')
|
||||||
|
while segments and segments[0] == '':
|
||||||
|
del segments[0]
|
||||||
|
pu[2] = '/' + '/'.join(segments)
|
||||||
|
return urlparse.urlunparse(pu)
|
||||||
|
|
||||||
|
|
||||||
|
def urlopen(url, referrer=None, retries=5):
|
||||||
|
# Work around urllib2 brokenness
|
||||||
|
url = normaliseURL(url)
|
||||||
|
req = urllib2.Request(url)
|
||||||
|
if referrer:
|
||||||
|
req.add_header('Referrer', referrer)
|
||||||
|
req.add_header('Referer', referrer)
|
||||||
|
req.add_header('User-Agent', UserAgent)
|
||||||
|
|
||||||
|
tries = 0
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
urlobj = urllib2.urlopen(req)
|
||||||
|
break
|
||||||
|
except IOError:
|
||||||
|
out.write('URL retrieval failed, sleeping %d seconds and retrying (%d)' % (2**tries, tries), 2)
|
||||||
|
time.sleep(2**tries)
|
||||||
|
tries += 1
|
||||||
|
if tries >= retries:
|
||||||
|
raise
|
||||||
|
|
||||||
|
return urlobj
|
||||||
|
|
||||||
|
def getWindowSize():
|
||||||
|
try:
|
||||||
|
from fcntl import ioctl
|
||||||
|
from termios import TIOCGWINSZ
|
||||||
|
except ImportError:
|
||||||
|
raise NotImplementedError
|
||||||
|
st = 'HHHH'
|
||||||
|
names = 'ws_row', 'ws_col', 'ws_xpixel', 'ws_ypixel'
|
||||||
|
buf = array.array('b', ' ' * struct.calcsize(st))
|
||||||
|
try:
|
||||||
|
ioctl(sys.stderr, TIOCGWINSZ, buf, True)
|
||||||
|
except IOError:
|
||||||
|
raise NotImplementedError
|
||||||
|
winsize = dict(zip(names, struct.unpack(st, buf.tostring())))
|
||||||
|
return winsize['ws_col']
|
||||||
|
|
||||||
|
suffixes = ('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
|
||||||
|
|
||||||
|
def saneDataSize(size):
|
||||||
|
if size == 0:
|
||||||
|
return 'unk B'
|
||||||
|
index = int(floor(log(abs(size), 1024)))
|
||||||
|
index = min(index, len(suffixes) - 1)
|
||||||
|
index = max(index, 0)
|
||||||
|
factor = 1024 ** index
|
||||||
|
return '%0.3f %s' % (float(size) / factor, suffixes[index])
|
||||||
|
|
||||||
|
def splitpath(path):
|
||||||
|
c = []
|
||||||
|
head, tail = os.path.split(path)
|
||||||
|
while tail:
|
||||||
|
c.insert(0, tail)
|
||||||
|
head, tail = os.path.split(head)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def getRelativePath(basepath, path):
|
||||||
|
basepath = splitpath(os.path.abspath(basepath))
|
||||||
|
path = splitpath(os.path.abspath(path))
|
||||||
|
|
||||||
|
afterCommon = False
|
||||||
|
for c in basepath:
|
||||||
|
if afterCommon or path[0] != c:
|
||||||
|
path.insert(0, os.path.pardir)
|
||||||
|
afterCommon = True
|
||||||
|
else:
|
||||||
|
del path[0]
|
||||||
|
|
||||||
|
return os.path.join(*path)
|
||||||
|
|
||||||
|
def getQueryParams(url):
|
||||||
|
query = urlparse.urlsplit(url)[3]
|
||||||
|
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3)
|
||||||
|
return cgi.parse_qs(query)
|
||||||
|
|
||||||
|
|
||||||
|
def internal_error(out=sys.stderr, etype=None, evalue=None, tb=None):
|
||||||
|
"""Print internal error message (output defaults to stderr)."""
|
||||||
|
print >> out, os.linesep
|
||||||
|
print >> out, """********** Oops, I did it again. *************
|
||||||
|
|
||||||
|
You have found an internal error in %(app)s. Please write a bug report
|
||||||
|
at %(url)s and include the following information:
|
||||||
|
- your commandline arguments and any configuration file in ~/.dosage/
|
||||||
|
- the system information below
|
||||||
|
|
||||||
|
Not disclosing some of the information above due to privacy reasons is ok.
|
||||||
|
I will try to help you nonetheless, but you have to give me something
|
||||||
|
I can work with ;) .
|
||||||
|
""" % dict(app=AppName, url=SupportUrl)
|
||||||
|
if etype is None:
|
||||||
|
etype = sys.exc_info()[0]
|
||||||
|
if evalue is None:
|
||||||
|
evalue = sys.exc_info()[1]
|
||||||
|
print >> out, etype, evalue
|
||||||
|
if tb is None:
|
||||||
|
tb = sys.exc_info()[2]
|
||||||
|
traceback.print_exception(etype, evalue, tb, None, out)
|
||||||
|
print_app_info(out=out)
|
||||||
|
print_proxy_info(out=out)
|
||||||
|
print_locale_info(out=out)
|
||||||
|
print >> out, os.linesep, \
|
||||||
|
"******** %s internal error, over and out ********" % AppName
|
||||||
|
|
||||||
|
|
||||||
|
def print_env_info(key, out=sys.stderr):
|
||||||
|
"""If given environment key is defined, print it out."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if value is not None:
|
||||||
|
print >> out, key, "=", repr(value)
|
||||||
|
|
||||||
|
|
||||||
|
def print_proxy_info(out=sys.stderr):
|
||||||
|
"""Print proxy info."""
|
||||||
|
print_env_info("http_proxy", out=out)
|
||||||
|
|
||||||
|
|
||||||
|
def print_locale_info(out=sys.stderr):
|
||||||
|
"""Print locale info."""
|
||||||
|
for key in ("LANGUAGE", "LC_ALL", "LC_CTYPE", "LANG"):
|
||||||
|
print_env_info(key, out=out)
|
||||||
|
|
||||||
|
|
||||||
|
def print_app_info(out=sys.stderr):
|
||||||
|
"""Print system and application info (output defaults to stderr)."""
|
||||||
|
print >> out, "System info:"
|
||||||
|
print >> out, App
|
||||||
|
print >> out, "Python %(version)s on %(platform)s" % \
|
||||||
|
{"version": sys.version, "platform": sys.platform}
|
||||||
|
stime = strtime(time.time())
|
||||||
|
print >> out, "Local time:", stime
|
||||||
|
|
||||||
|
|
||||||
|
def strtime(t):
|
||||||
|
"""Return ISO 8601 formatted time."""
|
||||||
|
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
|
||||||
|
strtimezone()
|
||||||
|
|
||||||
|
|
||||||
|
def strtimezone():
|
||||||
|
"""Return timezone info, %z on some platforms, but not supported on all.
|
||||||
|
"""
|
||||||
|
if time.daylight:
|
||||||
|
zone = time.altzone
|
||||||
|
else:
|
||||||
|
zone = time.timezone
|
||||||
|
return "%+04d" % (-zone//3600)
|
||||||
|
|
||||||
|
|
||||||
|
def tagre(tag, attribute, value):
|
||||||
|
"""Return a regular expression matching the given HTML tag, attribute
|
||||||
|
and value. It matches the tag and attribute names case insensitive,
|
||||||
|
and skips arbitrary whitespace and leading HTML attributes.
|
||||||
|
Also, it adds a match group for the value.
|
||||||
|
@param tag: the tag name
|
||||||
|
@ptype tag: string
|
||||||
|
@param attribute: the attribute name
|
||||||
|
@ptype attribute: string
|
||||||
|
@param value: the attribute value
|
||||||
|
@ptype value: string
|
||||||
|
@return: the generated regular expression suitable for re.compile()
|
||||||
|
@rtype: string
|
||||||
|
"""
|
||||||
|
attrs = dict(
|
||||||
|
tag=case_insensitive_re(tag),
|
||||||
|
attribute=case_insensitive_re(attribute),
|
||||||
|
value=value,
|
||||||
|
)
|
||||||
|
return r'<\s*%(tag)s[^>]*\s+%(attribute)s\s*=\s*"(%(value)s)"' % attrs
|
||||||
|
|
||||||
|
def case_insensitive_re(name):
|
||||||
|
"""Reformat the given name to a case insensitive regular expression string
|
||||||
|
without using re.IGNORECASE. This way selective strings can be made case
|
||||||
|
insensitive.
|
||||||
|
@param name: the name to make case insensitive
|
||||||
|
@ptype name: string
|
||||||
|
@return: the case insenstive regex
|
||||||
|
@rtype: string
|
||||||
|
"""
|
||||||
|
return "".join("[%s%s]" % (c.lower(), c.upper()) for c in name)
|
||||||
|
|
189
setup.py
Normal file
189
setup.py
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Dosage, the webcomic downloader
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of version 2 of the GNU General Public License as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
from distutils.core import setup, Distribution
|
||||||
|
from distutils.command.install_lib import install_lib
|
||||||
|
from distutils import util
|
||||||
|
from distutils.file_util import write_file
|
||||||
|
|
||||||
|
AppVersion = '1.7'
|
||||||
|
AppName = 'Dosage'
|
||||||
|
|
||||||
|
def normpath (path):
|
||||||
|
"""Norm a path name to platform specific notation."""
|
||||||
|
return os.path.normpath(path)
|
||||||
|
|
||||||
|
|
||||||
|
def cnormpath (path):
|
||||||
|
"""Norm a path name to platform specific notation and make it absolute."""
|
||||||
|
path = normpath(path)
|
||||||
|
if os.name == 'nt':
|
||||||
|
# replace slashes with backslashes
|
||||||
|
path = path.replace("/", "\\")
|
||||||
|
if not os.path.isabs(path):
|
||||||
|
path = normpath(os.path.join(sys.prefix, path))
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
release_ro = re.compile(r"\(released (.+)\)")
|
||||||
|
def get_release_date ():
|
||||||
|
"""Parse and return relase date as string from doc/changelog.txt."""
|
||||||
|
fname = os.path.join("doc", "changelog.txt")
|
||||||
|
release_date = "unknown"
|
||||||
|
with open(fname) as fd:
|
||||||
|
# the release date is on the first line
|
||||||
|
line = fd.readline()
|
||||||
|
mo = release_ro.search(line)
|
||||||
|
if mo:
|
||||||
|
release_date = mo.groups(1)
|
||||||
|
return release_date
|
||||||
|
|
||||||
|
|
||||||
|
class MyInstallLib (install_lib, object):
|
||||||
|
"""Custom library installation."""
|
||||||
|
|
||||||
|
def install (self):
|
||||||
|
"""Install the generated config file."""
|
||||||
|
outs = super(MyInstallLib, self).install()
|
||||||
|
infile = self.create_conf_file()
|
||||||
|
outfile = os.path.join(self.install_dir, os.path.basename(infile))
|
||||||
|
self.copy_file(infile, outfile)
|
||||||
|
outs.append(outfile)
|
||||||
|
return outs
|
||||||
|
|
||||||
|
def create_conf_file (self):
|
||||||
|
"""Create configuration file."""
|
||||||
|
cmd_obj = self.distribution.get_command_obj("install")
|
||||||
|
cmd_obj.ensure_finalized()
|
||||||
|
# we have to write a configuration file because we need the
|
||||||
|
# <install_data> directory (and other stuff like author, url, ...)
|
||||||
|
# all paths are made absolute by cnormpath()
|
||||||
|
data = []
|
||||||
|
for d in ['purelib', 'platlib', 'lib', 'headers', 'scripts', 'data']:
|
||||||
|
attr = 'install_%s' % d
|
||||||
|
if cmd_obj.root:
|
||||||
|
# cut off root path prefix
|
||||||
|
cutoff = len(cmd_obj.root)
|
||||||
|
# don't strip the path separator
|
||||||
|
if cmd_obj.root.endswith(os.sep):
|
||||||
|
cutoff -= 1
|
||||||
|
val = getattr(cmd_obj, attr)[cutoff:]
|
||||||
|
else:
|
||||||
|
val = getattr(cmd_obj, attr)
|
||||||
|
if attr == 'install_data':
|
||||||
|
cdir = os.path.join(val, "share", "dosage")
|
||||||
|
data.append('config_dir = %r' % cnormpath(cdir))
|
||||||
|
elif attr == 'install_lib':
|
||||||
|
if cmd_obj.root:
|
||||||
|
_drive, tail = os.path.splitdrive(val)
|
||||||
|
if tail.startswith(os.sep):
|
||||||
|
tail = tail[1:]
|
||||||
|
self.install_lib = os.path.join(cmd_obj.root, tail)
|
||||||
|
else:
|
||||||
|
self.install_lib = val
|
||||||
|
data.append("%s = %r" % (attr, cnormpath(val)))
|
||||||
|
self.distribution.create_conf_file(data, directory=self.install_lib)
|
||||||
|
return self.get_conf_output()
|
||||||
|
|
||||||
|
def get_conf_output (self):
|
||||||
|
return self.distribution.get_conf_filename(self.install_lib)
|
||||||
|
|
||||||
|
def get_outputs (self):
|
||||||
|
"""Add the generated config file to the list of outputs."""
|
||||||
|
outs = super(MyInstallLib, self).get_outputs()
|
||||||
|
outs.append(self.get_conf_output())
|
||||||
|
return outs
|
||||||
|
|
||||||
|
|
||||||
|
class MyDistribution (Distribution, object):
|
||||||
|
"""Custom distribution class generating config file."""
|
||||||
|
|
||||||
|
def __init__ (self, attrs):
|
||||||
|
"""Set console and windows scripts."""
|
||||||
|
super(MyDistribution, self).__init__(attrs)
|
||||||
|
self.console = ['dosage']
|
||||||
|
|
||||||
|
def run_commands (self):
|
||||||
|
"""Generate config file and run commands."""
|
||||||
|
cwd = os.getcwd()
|
||||||
|
data = []
|
||||||
|
data.append('config_dir = %r' % os.path.join(cwd, "config"))
|
||||||
|
data.append("install_data = %r" % cwd)
|
||||||
|
data.append("install_scripts = %r" % cwd)
|
||||||
|
self.create_conf_file(data)
|
||||||
|
super(MyDistribution, self).run_commands()
|
||||||
|
|
||||||
|
def get_conf_filename (self, directory):
|
||||||
|
"""Get name for config file."""
|
||||||
|
return os.path.join(directory, "_%s_configdata.py" % self.get_name())
|
||||||
|
|
||||||
|
def create_conf_file (self, data, directory=None):
|
||||||
|
"""Create local config file from given data (list of lines) in
|
||||||
|
the directory (or current directory if not given)."""
|
||||||
|
data.insert(0, "# this file is automatically created by setup.py")
|
||||||
|
data.insert(0, "# -*- coding: iso-8859-1 -*-")
|
||||||
|
if directory is None:
|
||||||
|
directory = os.getcwd()
|
||||||
|
filename = self.get_conf_filename(directory)
|
||||||
|
# add metadata
|
||||||
|
metanames = ("name", "version", "author", "author_email",
|
||||||
|
"maintainer", "maintainer_email", "url",
|
||||||
|
"license", "description", "long_description",
|
||||||
|
"keywords", "platforms", "fullname", "contact",
|
||||||
|
"contact_email")
|
||||||
|
for name in metanames:
|
||||||
|
method = "get_" + name
|
||||||
|
val = getattr(self.metadata, method)()
|
||||||
|
if isinstance(val, str):
|
||||||
|
val = unicode(val)
|
||||||
|
cmd = "%s = %r" % (name, val)
|
||||||
|
data.append(cmd)
|
||||||
|
data.append('release_date = "%s"' % get_release_date())
|
||||||
|
# write the config file
|
||||||
|
util.execute(write_file, (filename, data),
|
||||||
|
"creating %s" % filename, self.verbose >= 1, self.dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
args = dict(
|
||||||
|
name = AppName,
|
||||||
|
version = AppVersion,
|
||||||
|
description = 'a powerful webcomic downloader and archiver',
|
||||||
|
author = 'Tristan Seligmann, Jonathan Jacobs, Bastian Kleineidam',
|
||||||
|
author_email = 'calvin@users.sourceforge.net',
|
||||||
|
maintainer = 'Bastian Kleineidam',
|
||||||
|
maintainer_email = 'calvin@users.sourceforge.net',
|
||||||
|
license = 'MIT',
|
||||||
|
url = 'https://github.com/wummel/dosage',
|
||||||
|
packages = (
|
||||||
|
'dosagelib',
|
||||||
|
'dosagelib.plugins',
|
||||||
|
),
|
||||||
|
scripts = (
|
||||||
|
'dosage',
|
||||||
|
),
|
||||||
|
distclass = MyDistribution,
|
||||||
|
cmdclass = {
|
||||||
|
'install_lib': MyInstallLib,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
setup(**args)
|
||||||
|
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
48
tests/test_comics.py
Normal file
48
tests/test_comics.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from itertools import izip
|
||||||
|
from unittest import TestCase
|
||||||
|
from dosagelib import scraper
|
||||||
|
|
||||||
|
|
||||||
|
class _ComicTester(TestCase):
|
||||||
|
"""Basic comic test class."""
|
||||||
|
scraperclass=None
|
||||||
|
|
||||||
|
def test_comic(self):
|
||||||
|
# Test a scraper. It must be able to traverse backward for
|
||||||
|
# at least 5 pages from the start, and find strip images
|
||||||
|
# on at least 4 pages.
|
||||||
|
module = self.scraperclass()
|
||||||
|
num = empty = 0
|
||||||
|
for n, comics in izip(xrange(5), module):
|
||||||
|
if len(comics) == 0:
|
||||||
|
empty += 1
|
||||||
|
for comic in comics:
|
||||||
|
self.save(comic)
|
||||||
|
num += 1
|
||||||
|
self.assertTrue(num >= 4, 'Traversal failed after %d strips.' % num)
|
||||||
|
self.assertTrue(empty <= 1, 'Failed to find images on %d pages.' % empty)
|
||||||
|
|
||||||
|
def save(self, comic):
|
||||||
|
# create a temporary directory
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
filename, saved = comic.save(tmpdir)
|
||||||
|
self.assertTrue(saved, 'Could not save comic %s to %s' % (comic, tmpdir))
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_comic_testers():
|
||||||
|
"""For each comic scraper, create a test class.
|
||||||
|
This currently generates over 4000 test classes (one for each comic),
|
||||||
|
so this takes a while."""
|
||||||
|
for s in scraper.items():
|
||||||
|
name = 'Test'+s.__name__
|
||||||
|
globals()[name] = type(name,
|
||||||
|
(_ComicTester,),
|
||||||
|
dict(scraperclass=s)
|
||||||
|
)
|
||||||
|
|
||||||
|
generate_comic_testers()
|
82
tests/test_util.py
Normal file
82
tests/test_util.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
import re
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from dosagelib.util import saneDataSize, normaliseURL, _unescape, tagre
|
||||||
|
|
||||||
|
class SizeFormattingTest(TestCase):
|
||||||
|
"""
|
||||||
|
Unit tests for L{saneDataSize}.
|
||||||
|
"""
|
||||||
|
def check(self, size, expectedOutput):
|
||||||
|
# Check that a particular size is formatted as expected; in particular, a
|
||||||
|
# negative size should be formatted the same as a positive size, except
|
||||||
|
# with a minus sign in front.
|
||||||
|
self.assertEqual(saneDataSize(size), expectedOutput)
|
||||||
|
self.assertEqual(saneDataSize(-size), '-' + expectedOutput)
|
||||||
|
|
||||||
|
def test_verySmallSize(self):
|
||||||
|
# Sizes smaller than a single byte should be formatted as bytes; this
|
||||||
|
# case is fairly pathological, so the output is somewhat nonsensical.
|
||||||
|
self.check(0.1, '0.100 B')
|
||||||
|
|
||||||
|
def test_normalSizes(self):
|
||||||
|
# Sizes should be formatted in the largest unit for which the size will
|
||||||
|
# not be less than a single unit.
|
||||||
|
self.check(1, '1.000 B')
|
||||||
|
self.check(2.075 * 2 ** 10, '2.075 kB')
|
||||||
|
self.check(5.88 * 2 ** 20, '5.880 MB')
|
||||||
|
self.check(13.34 * 2 ** 30, '13.340 GB')
|
||||||
|
self.check(445.348 * 2 ** 40, '445.348 TB')
|
||||||
|
self.check(34.25 * 2 ** 50, '34.250 PB')
|
||||||
|
self.check(3.14 * 2 ** 60, '3.140 EB')
|
||||||
|
self.check(57.892 * 2 ** 70, '57.892 ZB')
|
||||||
|
self.check(999.99 * 2 ** 80, '999.990 YB')
|
||||||
|
|
||||||
|
def test_veryLargeSize(self):
|
||||||
|
# Sizes larger than 1024 yottabytes should be formatted as yottabytes.
|
||||||
|
self.check(5567254 * 2 ** 80, '5567254.000 YB')
|
||||||
|
|
||||||
|
|
||||||
|
class URLTest(TestCase):
|
||||||
|
"""
|
||||||
|
Tests for URL utility functions.
|
||||||
|
"""
|
||||||
|
def test_unescape(self):
|
||||||
|
# Test HTML replacement.
|
||||||
|
self.assertEqual(_unescape('foo&bar'), 'foo&bar')
|
||||||
|
self.assertEqual(_unescape('foo bar'), 'foo%C2%A0bar')
|
||||||
|
self.assertEqual(_unescape('"foo"'), '%22foo%22')
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalisation(self):
|
||||||
|
# Test URL normalisation.
|
||||||
|
self.assertEqual(normaliseURL('http://example.com//bar/baz&baz'),
|
||||||
|
'http://example.com/bar/baz&baz')
|
||||||
|
|
||||||
|
|
||||||
|
class RegexTest(TestCase):
|
||||||
|
|
||||||
|
ValuePrefix = '/bla/'
|
||||||
|
TagTests = (
|
||||||
|
('<img src="%s">', ValuePrefix+'foo', True),
|
||||||
|
('< img src = "%s" >', ValuePrefix, True),
|
||||||
|
('<img class="prev" src="%s">', ValuePrefix+'...', True),
|
||||||
|
('<img origsrc="%s">', ValuePrefix, False),
|
||||||
|
('<Img src="%s">', ValuePrefix, True),
|
||||||
|
('<img SrC="%s">', ValuePrefix, True),
|
||||||
|
('<img src="%s">', ValuePrefix[:-1], False),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_regex(self):
|
||||||
|
matcher = re.compile(tagre("img", "src", self.ValuePrefix+".*"))
|
||||||
|
for tag, value, domatch in self.TagTests:
|
||||||
|
self.match_tag(matcher, tag, value, domatch)
|
||||||
|
|
||||||
|
def match_tag(self, matcher, tag, value, domatch=True):
|
||||||
|
match = matcher.match(tag % value)
|
||||||
|
if domatch:
|
||||||
|
self.assertTrue(match)
|
||||||
|
self.assertEqual(match.group(1), value)
|
||||||
|
else:
|
||||||
|
self.assertFalse(match)
|
||||||
|
|
Loading…
Reference in a new issue