Initial commit to Github.

This commit is contained in:
Bastian Kleineidam 2012-06-20 21:58:13 +02:00
commit f91fb80a39
55 changed files with 10801 additions and 0 deletions

12
.gitignore vendored Normal file
View file

@ -0,0 +1,12 @@
*.orig
*.pyc
*.pyo
/Comics
/build
/dist
/.achievements
/MANIFEST
/todo
/Changelog.patool*
/_Dosage_configdata.py
/comics.test

20
COPYING Normal file
View file

@ -0,0 +1,20 @@
Copyright © 2004-2008 Jonathan Jacobs and Tristan Seligmann
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

4
MANIFEST.in Normal file
View file

@ -0,0 +1,4 @@
include MANIFEST.in
include COPYING doc/*.txt
include Makefile
recursive-include tests *.py

88
Makefile Normal file
View file

@ -0,0 +1,88 @@
# This Makefile is only used by developers.
PYVER:=2.7
PYTHON:=python$(PYVER)
VERSION:=$(shell $(PYTHON) setup.py --version)
ARCHIVE:=dosage-$(VERSION).tar.gz
PY_FILES_DIRS := dosage dosagelib tests *.py
PY2APPOPTS ?=
NOSETESTS:=$(shell which nosetests)
NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo)
CHMODMINUSMINUS:=--
# which test modules to run
TESTS ?= tests/
# set test options, eg. to "--nologcapture"
TESTOPTS=
all:
.PHONY: chmod
chmod:
-chmod -R a+rX,u+w,go-w $(CHMODMINUSMINUS) *
find . -type d -exec chmod 755 {} \;
.PHONY: dist
dist:
git archive --format=tar --prefix=dosage-$(VERSION)/ HEAD | gzip -9 > ../$(ARCHIVE)
[ -f ../$(ARCHIVE).sha1 ] || sha1sum ../$(ARCHIVE) > ../$(ARCHIVE).sha1
[ -f ../$(ARCHIVE).asc ] || gpg --detach-sign --armor ../$(ARCHIVE)
doc/dosage.1.html: doc/dosage.1
man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@
.PHONY: release
release: distclean releasecheck dist
git tag v$(VERSION)
# @echo "Register at Python Package Index..."
# $(PYTHON) setup.py register
# freecode-submit < dosage.freecode
.PHONY: releasecheck
releasecheck: check test
@if egrep -i "xx\.|xxxx|\.xx" doc/changelog.txt > /dev/null; then \
echo "Could not release: edit doc/changelog.txt release date"; false; \
fi
# @if ! grep "Version: $(VERSION)" dosage.freecode > /dev/null; then \
# echo "Could not release: edit dosage.freecode version"; false; \
# fi
# The check programs used here are mostly local scripts on my private system.
# So for other developers there is no need to execute this target.
.PHONY: check
check:
[ ! -d .svn ] || check-nosvneolstyle -v
check-copyright
check-pofiles -v
py-tabdaddy
py-unittest2-compat tests/
.PHONY: pyflakes
pyflakes:
pyflakes $(PY_FILES_DIRS)
.PHONY: count
count:
@sloccount dosage dosagelib | grep "Total Physical Source Lines of Code"
.PHONY: clean
clean:
find . -name \*.pyc -delete
find . -name \*.pyo -delete
rm -rf build dist
PHONY: distclean
distclean: clean
rm -rf build dist Dosage.egg-info
rm -f _Dosage_configdata.py MANIFEST
.PHONY: test
test:
$(PYTHON) $(NOSETESTS) -v --processes=$(NUMPROCESSORS) -m "^test_.*" $(TESTOPTS) $(TESTS)
.PHONY: deb
deb:
git-buildpackage --git-export-dir=../build-area/ --git-upstream-branch=master --git-debian-branch=debian --git-ignore-new
comics:
./dosage -v @@ > comics.log 2>&1

1
README.md Symbolic link
View file

@ -0,0 +1 @@
doc/README.txt

93
doc/README.txt Normal file
View file

@ -0,0 +1,93 @@
Dosage
=======
Dosage is a powerful webcomic downloader and archiver.
Introduction
-------------
Dosage is designed to keep a local copy of specific webcomics
and other picture-based content such as Picture of the Day sites.
With the dosage commandline script you can get the latest strip of
webcomic, or catch-up to the last strip downloaded, or download a
strip for a particular date/index (except if the webcomic's site layout
makes this impossible).
Notice
-------
This software is in no way intended to publically "broadcast" comic strips,
it is purely for personal use. Please be aware that by making these strips
publically available (without the explicit permission of the author) you
may be infringing upon various copyrights.
Usage
------
List available comics (over 4000 at the moment):
`$ dosage -l`
Get the latest comic of for example CalvinAndHobbes and save it in the "Comics"
directory:
`$ dosage CalvinAndHobbes`
If you already have downloaded several comics and want to get the latest
strip of all of them:
`$ dosage @`
For advanced options and features execute dosage -h or look at the dosage
manual page.
Offensive comics
-----------------
There are some comics supported by Dosage that may be offensive to readers or
to others that have access to the downloaded images.
SexyLosers is one module that has been discussed. Dosage offers a mechanism
to disable such modules. Modules listed in "/etc/dosage/disabled" and
"~/.dosage/disabled" will be disabled. These files should contain only one
module name per line. Note: Under Windows "~" will also expand to the user's
home directory, usually "C:\Documents and Settings\UserName".
Dependencies
-------------
Dosage is requires Python version 2.5 or higher, which can be downloaded
from http://www.python.org.
No external Python modules are required - only the Python Standard Library
that gets installed with Python.
Installation
-------------
You can invoke Dosage directly from the source code as "./dosage". Alternatively,
you can install Dosage using python distutils by invoking setup.py in
the root of the distribution. For example:
`python setup.py install`
or if you do not have root permissions:
`python setup.py install --home=$HOME`
Technical Description
----------------------
Dosage is written entirely in Python and relies on regular expressions to
do most of the grunt work.
For each webcomic Dosage has a plugin module, found in the "plugins"
subdirectory of the dosagelib directory. Each module is a subclass of
the _BasicComic class and specifies where to download its comic images.
Some comic syndicates (ucomics for example) have a standard layout for all
comics. For such cases there are general base classes derived from _BasicComic
which help define the plugins for all comics of this syndicate.
Extending Dosage
-----------------
In order to add a new webcoming, a new module class has to be created in one of the
*.py files in the dosagelib/plugins subdirectory. Look at the existing
module classes for examples.
Reporting Bugs
---------------
You can report bugs, patches or requests at the Github issue tracker at
https://github.com/wummel/dosage/issues
Dosage currently supports a large number of comics and that number grows on
a regular basis. If you feel that there are comics that Dosage does not
currently support but should support, please feel free to request them.

647
doc/changelog.txt Normal file
View file

@ -0,0 +1,647 @@
Dosage 1.7 (released xx.xx.2012)
Features:
- cmdline: Added proper return codes for error conditions.
- comics: Added more robust regular expressions for HTML tags.
They match case insensitive and ignore whitespaces now.
Changes:
- installation: Added support for dynamic configuration values.
- comics: Removed the twisted and zope dependencies by adding
an internal plugin search mechanism.
- testing: Refactored the test comic routine in proper unit tests.
Fixes:
- comics: Adjusted Xkcd href values.
- comics: Don't add empty URLs to the list of found URLs.
Dosage v.1.6.0:
* The "Not Dead Yet" release.
* Added / Fixed / etc. comics:
- Too many to list, really.
* New dependencies:
- Twisted
- zope.interface (not zope)
* Revamped plugin system, the first step on the road to Twisted.
Dosage v.1.5.8:
* Added comics:
- BonoboConspiracy
- ChasingTheSunset
- Comedity
- GoneWithTheBlastwave
- KeenSpot/* -- a *LOT* of KeenSpot submodules
- NichtLustig
- OtenbaFiles
- Wulffmorgenthaler
- Y
* Fixed comics:
- AbstractGender
- AlienLovesPredator
- AppleGeeks
- EarthsongSaga
- NewWorld
- WhiteNinja
* Moved comics:
- KeenSpot/CatLegend (previously CatLegend)
- All KeenSpot/* comic subnames no longer have "The" prefixes.
- UClick (replaces UComics and UComicsEspanol)
* Removed comics:
- KeenSpot/TheDevilsPanties (duplicate of KeenSpot/DevilsPanties)
Dosage v.1.5.7:
* Important SmackJeeves module fix. Catchup used to loop around from the
first strip to the last one, thus potentially hammering the SmackJeeves
servers with floods of requests from neverending catchups.
* Added comics:
- AbleAndBaker
- AcademyVale
- Aikida
- Angels2200
- BetterDays
- BlankLabel (virtual module)
- BoredAndEvil
- Catharsis
- ChuckAndElmo
- CloneManga/PennyTribute
- CourtingDisaster
- DeathToTheExtremist
- DogComplex
- DownToEarth
- Dracula
- DragonTails
- DrFun
- DungeonCrawlInc
- ExtraLife
- FalconTwin
- FightCastOrEvade
- Flipside
- Housd
- JerkCity
- JoeAndMonkey
- KeenSpot/SuicideForHire
- LasLindas
- Nekobox
- Nervillsaga
- NewAdventures
- NewAdventuresOfBobbin
- Nihilism
- Nukees
- OkayPants
- PartiallyClips
- PensAndTales
- RWWR
- WebcomicsNation (virtual module)
- Yirmumah
* Fixed comics:
- Asif
- CatLegend
- CloneManga/NanasEverydayLife
- CloneManga/PaperEleven
- DrunkDuck (various comics no longer present)
- EarthsongSaga
- ErrantStory
- InkTank
- KeenSpot/<various> (ComicGenesis migration)
- KiagiSwordscat
- Qwantz
- SGVY
- SmackJeeves
- Smamusement
- SnafuComics
- UComicsEspanol
* Moved comics:
- Stubble (previously KeenSpot/Stubble)
Dosage v.1.5.6:
* Added comics:
- CandyCartoon
- CloneManga/Kanami
- Drowtales
- KeenSpot/FoxTails
- Krakow
- SmackJeeves (virtual module)
* Fixed comics:
- CrapIDrewOnMyLunchBreak
- CtrlAltDel
- DMFA
- EarthsongSaga
- EverybodyLovesEricRaymond
- GirlsWithSlingshots
- KeenSpot
- KeenSpot/WapsiSquare
- NewWorld
- PennyArcade
- PiledHigherAndDeeper
- QuestionableContent
- SluggyFreelance
- SnafuComics
- Sokora
- UComicsEspanol (updated submodules)
- UComics (updated submodules)
* Moved comics:
- CatLegend (previously KeenSpot/CatLegend)
- DominicDeegan (previously KeenSpot/DominicDeegan)
- KeenSpot/TriquetraCats (previously DrunkDuck/TriquetraCats)
- NekoTheKitty (previously KeenSpot/NekoTheKitty)
- TheNoob (previously KeenSpot/TheNoob)
Dosage v.1.5.5:
* Added comics:
- AbstractGender
- AnimeArcadia
- CaptainSNES
- DrunkDuck/Holy_Zen
- EarthsongSaga
- NinthElsewhere (9th Elsewhere)
- PebbleVersion
- SGVY (Sparkling Generation Valkyrie Yuuki)
- SuccubusJustice
- ErrantStory (previously KeenSpot/ErrantStory)
* Fixed comics:
- DrunkDuck
- PvPonline
- SluggyFreelance
Dosage v.1.5.4:
* Added comics:
- Andiwear
- DrunkDuck (virtual)
- EverybodyLovesEricRaymond
- FantasyRealms
- KeenSpot/2WayMirror
- KeenSpot/ANT
- KeenSpot/AngelTheDemoness
- KeenSpot/Apotheosis
- KeenSpot/Aquatica
- KeenSpot/BadlyDrawnKitties
- KeenSpot/BobAndFred
- KeenSpot/BrunoTheBandit
- KeenSpot/CatLegend
- KeenSpot/EdibleDirt
- KeenSpot/FelicityFlint
- KeenSpot/Flem
- KeenSpot/GreenAvenger
- KeenSpot/LangLang
- KeenSpot/Picatrix
- KeenSpot/ScandalSheet
- KeenSpot/Shifters
- KeenSpot/SoapOnARope
- KeenSpot/SuburbanJungle
- KeenSpot/TheClassMenagerie
- KeenSpot/TheDevilsPanties
- KeenSpot/ToddAndPenguin
- KeenSpot/TwoLumps
- KeenSpot/Wereworld
- KeenSpot/YouDamnKid
- SokoraRefugees
* Fixed comics:
- AbsurdNotions
- CloneManga
- PastelDefender
- PennyArcade
- SluggyFreelance
Dosage v.1.5.3:
* Fixed a bug that caused RSS output to crash if the file already existed,
but had no items.
* Added comics:
- CatAndGirl
- CloneManga
- Commissioned
- JoyOfTech
- KeenSpot/AlphaLuna
- KeenSpot/Lowroad75
- KeenSpot/Werechild
- TheWotch
- TonjaSteele
* Fixed comics:
- DieselSweeties
- LittleGamers
- PennyArcade
- StarCrossdDestiny
- VGCats
Dosage v.1.5.2:
* Removed some debugging cruft that slipped through in the last release.
* Added comics:
- KeenSpot/TheNoob
- PiledHigherAndDeeper
* Fixed comics:
- ALessonIsLearned
- Misfile
- RealLife
- UComics
- UComicsEspanol
Dosage v.1.5.1:
* Output event modules now generate proper URLs. You can now pass a base URL
with --base-url, which should correspond to --base-path. If not passed,
Dosage will try to generate a working file:/// URL, but this may not work in
some circumstances.
* RSS output tweaked.
* --list now outputs in columns; pass --single-list to get the old
behaviour (thanks TobiX).
* Added comics:
- AbsurdNotions (contributed by TobiX)
- Altermeta (contributed by TobiX)
- AModestDestiny (contributed by TobiX)
- BadBlood
- BetterYouThanMe
- Bhag (contributed by Shrimp)
- ChroniclesOfGaras (contributed by Shrimp)
- CrapIDrewOnMyLunchBreak (contributed by Shrimp)
- EternalVenture (contributed by Shrimp)
- Evercrest (contributed by TobiX)
- Frump (contributed by Shrimp)
- GUComics (contributed by TobiX)
- KeenSpot/BoomerExpress (contributed by TobiX)
- KevinAndKell (contributed by TobiX)
- LethalDosesClassic (contributed by TobiX)
- LethalDoses (contributed by TobiX)
- ListeningTo11975MHz (contributed by TobiX)
- Marilith
- MinesBigger (contributed by Shrimp)
- MyPrivateLittleHell (contributed by TobiX)
- MyWarWithCulture
- NeoGreenwood (contributed by Shrimp)
- NuklearPower (contributed by Shrimp)
- PerkiGoth (contributed by TobiX)
- PreludesEnd (contributed by Shrimp)
- ShadowInTheMirror (contributed by Shrimp)
- UComicsEspanol
- WhyTheLongFace (contributed by TobiX)
- Winter (contributed by TobiX)
* Fixed comics:
- Creators
- PennyArcade
- UComics (removed comics no longer supported and moved Spanish comics
to UComicsEspanol)
- UnicornJelly
Dosage v.1.5.0:
* Added an RSS output event. (contributed by Colin Alston)
* Dosage now sends a more descriptive User-Agent HTTP header.
* Dosage will now continue downloading strips until no new strips are
downloaded, this fixed problems with comics that had multiple strips per
page or comics that employed "precache" methods.
* Specific modules can now be disabled by specifying them in
/etc/dosage/disabled (global) and ~/.dosage/disabled (local).
* Fixed problem with division by zero error often occuring under Windows.
* Added comics:
- AlienLovesPredator (contributed by Shrimp)
- AllGrownUp (contributed by Shrimp)
- AsylumOn5thStreet (contributed by Shrimp)
- BizarreUprising (contributed by Shrimp)
- Creators/Archie
- Creators/AskShagg
- Creators/ForHeavensSake
- Creators/Rugrats
- Creators/StateOfTheUnion
- Creators/TheDinetteSet
- Creators/TheMeaningOfLila
- Creators/WeePals
- Creators/ZackHill
- DMFA (contributed by TobiX)
- DoctorRoboto (contributed by Shrimp)
- DoemainOfOurOwn
- EntertainDome (contributed by Shrimp)
- FauxPas (contributed by TobiX)
- IrregularWebcomic (contributed by TobiX)
- JamesFrancis/gonzo
- JamesFrancis/psycindom0
- JamesFrancis/psycindom1
- JamesFrancis/psycindom2
- KeenSpot/AlienDice
- KeenSpot/Avalon
- KeenSpot/CountYourSheep
- KeenSpot/DexLives (contributed by TobiX)
- KeenSpot/DominicDeegan
- KeenSpot/ElGoonishShive
- KeenSpot/ElfLife
- KeenSpot/ErrantStory
- KeenSpot/EverythingJake
- KeenSpot/FriendlyHostility
- KeenSpot/FunnyFarm
- KeenSpot/GamingGuardians
- KeenSpot/GeneCatlow
- KeenSpot/GoblinHollow (contributed by TobiX)
- KeenSpot/GreystoneInn
- KeenSpot/InAPerfectWorld (contributed by TobiX)
- KeenSpot/JoeAverage (contributed by TobiX)
- KeenSpot/MariposaRevelation (contributed by TobiX)
- KeenSpot/NaughtFramed
- KeenSpot/NekoTheKitty (contributed by TobiX)
- KeenSpot/NipAndTuck (contributed by TobiX)
- KeenSpot/OneOverZero (contributed by TobiX)
- KeenSpot/PastelDefender
- KeenSpot/RoadWaffles
- KeenSpot/Scatterplot
- KeenSpot/SchlockMercenary
- KeenSpot/TalesOfTheQuestor (contributed by TobiX)
- KeenSpot/UberSoft
- KeenSpot/UnicornJelly
- KeenSpot/WorldOfFenninRo (contributed by TobiX)
- KeenSpot/ZebraGirl
- LessThanKate (contributed by Shrimp)
- OurHomePlanet (contributed by Shrimp)
- Spamusement
- Sternstaub (contributed by Shrimp)
- TheLounge (contributed by Shrimp)
- TheOrderOfTheStick
- UComics/animatedoliphant
- UComics/anntelnaes
- UComics/askcaptainribman
- UComics/baldoespanol
- UComics/barbarabrandon
- UComics/bensargent
- UComics/billdeore
- UComics/brewsterrockit
- UComics/brucehammond
- UComics/calvinandhobbesespanol
- UComics/cathyespanol
- UComics/chanlowe
- UComics/condorito
- UComics/danasummers
- UComics/danwasserman
- UComics/davidhorsey
- UComics/dicklocher
- UComics/dickwright
- UComics/donwright
- UComics/dougmarlette
- UComics/drewsheneman
- UComics/facesinthenews
- UComics/foxtrotespanol
- UComics/fredbassetespanol
- UComics/garfieldespanol
- UComics/garyvarvel
- UComics/gaturro
- UComics/glennmccoy
- UComics/hubertandabby
- UComics/jackhiggins
- UComics/jackohman
- UComics/jeffdanziger
- UComics/laloalcaraz
- UComics/mattdavies
- UComics/modestyblaise
- UComics/muttandjeffespanol
- UComics/neurotica
- UComics/overboardespanol
- UComics/patoliphant
- UComics/paulconrad
- UComics/pepe
- UComics/poochcafeespanol
- UComics/pricklycity
- UComics/sigmund
- UComics/smallworld
- UComics/stevesack
- UComics/stuartcarlson
- UComics/tedrall
- UComics/thebigpicture
- UComics/theelderberries
- UComics/thefifthwave
- UComics/thefuscobrothers
- UComics/themiddletons
- UComics/thequigmans
- UComics/tomtoles
- UComics/tonyauth
- UComics/tutelandia
- UComics/walthandelsman
- UComics/waynestayskal
- UComics/ziggyespanol
- WiguTV
* Fixed comics:
- Dominion
- KeenSpot/GeneralProtectionFault (contributed by TobiX)
- SluggyFreelance
- UserFriendly
- VGCats (contributed by TobiX)
- Wigu
Dosage v.1.4.0:
* A manual page for 'mainline' is now inculded.
* Events output; currently the only useful handler is 'html', which
outputs an HTML page with all of the downloaded comics. These
files are named by date, and have links to the previous and next
days (similar to dailystrips).
* Added comics:
- MadamAndEve (contributed by Anthony Caetano)
- SnafuComics/Grim
- SnafuComics/KOF
- SnafuComics/PowerPuffGirls
- SnafuComics/Snafu
- SnafuComics/Tin
- TheParkingLotIsFull
- Zapiro (contributed by Anthony Caetano)
* Fixed comics:
- UserFriendly (naming fix)
Dosage v.1.3.0:
* Progress bar has been improved; specifically for gauging downloads of
unknown size
* All relevant images are now downloaded where necessary; thanks bruce :)
* Incomplete downloads are discarded
* Removed junview
* Main script is now 'mainline' (used to be 'dosage')
* Added comics:
- AstronomyPOTD
- CounterCulture
- Dominion
- Fallen
- Freefall
- GenrezvousPoint
- KeenSpot/Blindworks
- KeenSpot/BoyMeetsBoy
- KeenSpot/Scrued
- KeenSpot/Stubble
- KeenSpot/TAVision
- KeenSpot/TangsWeeklyComic
- KingFeatures
- OhMyGods
- RedMeat
- WotNow
* Fixed comics:
- MegaTokyo
- SomethingPositive (naming fix)
- TheFray (now a virtual module)
Dosage v.1.2.0:
* Progress bar is now disabled if the window size cannot be determined
* Source was restructured; the dosage script is now located in the bin/
directory.
* Added comics:
- BiggerThanCheeses
- BrickShitHouse
- ChugworthAcademy
- DandyAndCompany
- Girly
- HighPingBastard
- Jack
- KeenSpot/ChoppingBlock
- KeenSpot/SaturdayMorningBreakfastCereal
- KeenSpot/StrangeCandy
- KeenSpot/WapsiSquare
- KiagiSwordscat
- MakeWithTheFunny
- Pixel
- PockyBot
- SamAndFuzzy
- Spoonies
Dosage v.1.1.0:
* A download progress bar is now available on Linux (and probably other
UNIX-like systems)
* Timestamps are now updated even if the strip is not redownloaded
* Added comics:
- ALessonIsLearned
- ASofterWorld
- BoyOnAStickAndSlither
- Chisuji
- ExploitationNow
- KeenSpot/Ghastly
- KeenSpot/Saturnalia
- Loserz
- Qwantz
- StarCrossdDestiny
* Fixed comics:
- LittleGamers
Dosage v.1.0.1:
* Fix embarassing typo in 1.0.0 which rendered it completely unusable
(albeit a trivial fix).
Dosage v.1.0.0:
* 1.0 release, yay!
* Set modified time on downloaded images based on Last-Modified header:
Patch provided by gopalv82@yahoo.com, thanks :)
* Fixed --basepath on Windows:
Passing a path that included a drive letter didn't work.
* Added comics:
- TwoTwoOneFour
* Fixed comics:
- SluggyFreelance
Dosage v.0.3.2:
* Added comics:
- FreakCentral
- KeenSpot/AntiHeroForHire
- KeenSpot/ElfOnlyInn
- KeenSpot/GeneralProtectionFault
- KeenSpot/LimitedSpace
- KeenSpot/LostAndFound
- KeenSpot/Zortic
- RabidMonkeys
- SluggyFreelance
- SpellsAndWhistles
- SuburbanTribe
- TheFray
Dosage v.0.3.1:
* Removed external helper scripts
* Filesize displayed for downloaded files
* Various documentation changes
* Added --timestamps:
Displays timestamps before every message.
* Added comics:
- SomethingPositive
- UnderPower
- UserFriendly
- KeenSpot/QueenOfWands
- CombustibleOrange
- InkTank/*
- QuestionableContent
* Fixed comics:
- ComicsDotCom/flightdeck
- ComicsDotCom/peanuts
- ButternutSquash
- LifeOfConvenience
Dosage v.0.3.0:
* Removed filename override:
Since the comic modules now generally have sane names, this is no
longer of much use.
* Better feedback:
The various info levels (up to 3 now) provide much more informative
output.
* Comic wildcards:
@ expands to every comic already present in the basepath, and @@
expands to every single comic supported by Dosage.
* Added Comics:
- AppleGeeks
- ButternutSquash
- Comet7
- ComicsDotCom
Lots of submodules, most of them are untested.
- CtrlAltDel
- EightBitTheater
- FragileGravity
- KeenSpot/24fps
- KeenSpot/Alice
- KeenSpot/DeltaVenture
- KeenSpot/ItsWalky
- KeenSpot/PurplePussy
- KeenSpot/TheShadows
- LaurasComics
- MacHall
- Supafine
- VGCats
- WhiteNinja
* Fixed comics:
- KeenSpot/CollegeRoomiesFromHell
- KeenSpot/Wigu (renamed to Wigu)
- UComics/{mullets, nonsequitur, tomthedancingbug}
- PennyArcade
Switch back to the "low" resolution comics; some of the "high"
resolution comics are broken, and the "low" ones seem to be
identical anyway.
* Junview:
Lots of fixes / enhancements, still fairly alpha.
Dosage v.0.2.0:
* Virtual comic modules
* URL retrying:
Also, if you specify multiple comics, and one of them errors out
for some reason, Dosage will continue with the others.
* Indexed catchup:
You can now start a catchup from a specific index.
* Added comics:
- FilibusterCartoons
- GlueMeat
- RPGWorld
- RealLife
- UComics (see --list, there are around 70 submodules)
* Fixed comics:
- BasilFlint
- DiselSweeties
- SexyLosers
Generate nice filenames now.
* Comic help:
You can now pass --module-help to see module-specific help for
comic modules.
* Junview:
Image viewer written in wxPython, pretty alpha at this stage,
but feel free to play around with it if you're brave.
Dosage v.0.1.0:
* Various documentation updates
* Added comics:
- LittleGamers
- ClanOfTheCats
- DieselSweeties
- PvPonline
- RadioactivePanda
- ScaryGoRound
* Fixed comics:
- PennyArcade
The comic "bounces" when you get to the first strip, the
"previous" link points to the second comic. Work around this by
checking for the first comic.
- SexyLosers
SexyLosers seems to have implemented referrer checking recently,
this is handled by the new referrer passing support.
* Fix indexed mode up a bit:
The documentation has better examples now.
Dosage v.0.0.1:
* Initial public release

185
doc/dosage.1 Normal file
View file

@ -0,0 +1,185 @@
.TH MAINLINE 1
.SH NAME
mainline \- command line interface to Dosage
.SH SYNOPSIS
.B mainline
.RI [ options ]
.I module
.RI [ module .\|.\|.]
.SH DESCRIPTION
.B mainline
is a command line interface to Dosage. Dosage is a an application designed
to keep a local \(oqmirror\(cq of specific web comics and other picture\-based
content, such as \(oqPicture Of The Day\(cq sites, with a variety of options
for updating and maintaining collections.
.SH OPTIONS
.TP
.BI \-b " PATH" "\fR,\fP \-\^\-base\--path=" PATH
Specifies a base path to put comic subdirectories. The default is \(oqComics\(cq.
.TP
.BI \-\^\-base\-url= PATH
Specifies the base URL for output events. The default is a local file URI.
.TP
.BR \-c ", " \-\^\-catch-up
Traverses all available strips until an (identical) existing one is found.
This can be useful if your collection was previously up to date,
but you've missed a few days worth of strips. Alternatively you can specify
.B \-c
twice for a \(oqfull catchup\(cq, which will not stop until all comics
have been traversed. Catchups can \(oqresumed\(cq by using the index syntax, see
the
.B INDEX SYNTAX
and
.B SPECIAL SYNTAX
sections for more information.
.TP
.BR \-h ", " \-\^\-help
Output brief help information.
.TP
.BR \-l ", " \-\^\-list
List available comic modules in multi\-column fashion.
.TP
.BR \-\^\-single\-list
List available comic modules in single-column fashion.
.TP
.BI \-m " MODULE" "\fR,\fP \-\^\-module-help=" MODULE
Output module-specific help for
.IR MODULE .
.TP
.BI \-o " OUTPUT" "\fR,\fP \-\^\-output=" OUTPUT
.I OUTPUT
may be any one of the following:
.PP
.RS
.BR "text " \-
Provides no additional output and is the default value.
.RE
.PP
.RS
.BR "html " \-
Writes out an HTML file linking to the strips actually downloaded in the
current run, named by date (ala dailystrips). The files can be found in the
\'html' directory of your Comics directory.
.RE
.PP
.RS
.BR "rss " \-
Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in Comics/dailydose.xml.
.RE
.PP
.RS
.BR "rss " \-
Writes an RSS feed with all of the strips downloaded during the run, for use
with your favourite RSS aggregator.
.RE
.TP
.BR \-p ", " \-\^\-progress
Display a progress bar while downloading comics.
.TP
.BR \-t ", " \-\^\-timestamps
Print timestamps for all output at any level.
.TP
.BR \-v ", " \-\^\-verbose
Increase the output level by one with each occurence.
.TP
.BR \-V ", " \-\^\-version
Display the version number.
.I module
At least one valid
.I module
must be specified. A list of valid modules can be found by passing the
.B \-l
option. Multiple
.I module
arguments can be specified on the command line.
.SH INDEX SYNTAX
One can indicate the start of a list of
.B comma seperated
indices using a
.RB \(oq : "\(cq."
.PP
If
.I \-c
is specified with index syntax then \(oqresume\(cq mode is activated,
where a \(oqcatchup\(cq will start at the given index.
.PP
Refer to
.B EXAMPLES
for samples.
.SH OFFENSIVE COMICS
Some users may find certain comics offensive and wish to disable them.
Modules listed in
.B /etc/dosage/disabled
and
.B ~/.dosage/disabled
will be disabled. These files should contain only one module name per line.
.SH SPECIAL SYNTAX
.TP
.B @
This expands to mean all the comics currently in your \(oqComics\(cq
directory.
.TP
.B @@
This expands to mean all the comics available to Dosage.
.PP
.B INDEX SYNTAX
can be used with
.B SPECIAL SYNTAX
but this is unlikely to be useful.
.SH EXAMPLES
Retrieve the latest Mega Tokyo comic:
.RS
.B mainline MegaTokyo
.RE
.PP
Retrieve every strip from every comic that there is a module for:
.RS
.B mainline \-c @@
.RE
.PP
Retrieve all Penny Arcade strips from (and including) a given index to
the beginning regardless of whether they already exist or not:
.RS
.B mainline \-c PennyArcade:2004\-07\-22
.RE
.SH ENVIRONMENT
.IP HTTP_PROXY
.B mainline
will use the specified HTTP proxy whenever possible.
.SH NOTES
Should retrieval fail on any given strip
.B mainline
will attempt to retry. However the retry information is only outputted
in the
.B second
and successive output levels.
.PP
At the time of writing, a
.B complete
Dosage collection weighs in at around 3.0GB.
.SH RETURN VALUE
The return value is 2 when
.IP \(bu
a program error occurred.
.PP
The return value is 1 when
.IP \(bu
comics could not be found or downloaded
.IP \(bu
the program run was aborted with Ctrl-C
.PP
Else the return value is zero.
.SH BUGS
See
.I http://trac.slipgate.za.net/dosage
for a list of current development tasks and suggestions.
.SH FILES
.IP "\fB/etc/dosage/disabled\fR"
Disables comic modules on a global scale.
.IP "\fB~/.dosage/disabled\fR"
Disables comic modules on a local scale.
.SH AUTHORS
.BR mainline " and " Dosage
were written by Jonathan Jacobs <korpse@slipgate.za.net> and Tristan Seligmann
<mithrandi@slipgate.za.net>. This manual page was written by Jonathan Jacobs.

329
doc/dosage.1.html Normal file
View file

@ -0,0 +1,329 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML><HEAD><TITLE>Man page of MAINLINE</TITLE>
</HEAD><BODY>
<H1>MAINLINE</H1>
Section: User Commands (1)<BR><A HREF="#index">Index</A>
<A HREF="../index.html">Return to Main Contents</A><HR>
<A NAME="lbAB">&nbsp;</A>
<H2>NAME</H2>
mainline - command line interface to Dosage
<A NAME="lbAC">&nbsp;</A>
<H2>SYNOPSIS</H2>
<B>mainline</B>
[<I>options</I>]
<I>module</I>
[<I>module</I>...]
<A NAME="lbAD">&nbsp;</A>
<H2>DESCRIPTION</H2>
<B>mainline</B>
is a command line interface to Dosage. Dosage is a an application designed
to keep a local 'mirror' of specific web comics and other picture-based
content, such as 'Picture Of The Day' sites, with a variety of options
for updating and maintaining collections.
<A NAME="lbAE">&nbsp;</A>
<H2>OPTIONS</H2>
<DL COMPACT>
<DT><B>-b</B><I> PATH</I><B></B>, --base--path=<I>PATH</I>
<DD>
Specifies a base path to put comic subdirectories. The default is 'Comics'.
<DT><B>--base-url=</B><I>PATH</I>
<DD>
Specifies the base URL for output events. The default is a local file URI.
<DT><B>-c</B>, <B>--catch-up</B>
<DD>
Traverses all available strips until an (identical) existing one is found.
This can be useful if your collection was previously up to date,
but you've missed a few days worth of strips. Alternatively you can specify
<B>-c</B>
twice for a 'full catchup', which will not stop until all comics
have been traversed. Catchups can 'resumed' by using the index syntax, see
the
<B>INDEX SYNTAX</B>
and
<B>SPECIAL SYNTAX</B>
sections for more information.
<DT><B>-h</B>, <B>--help</B>
<DD>
Output brief help information.
<DT><B>-l</B>, <B>--list</B>
<DD>
List available comic modules in multi-column fashion.
<DT><B>--single-list</B>
<DD>
List available comic modules in single-column fashion.
<DT><B>-m</B><I> MODULE</I><B></B>, --module-help=<I>MODULE</I>
<DD>
Output module-specific help for
<I>MODULE</I>.
<DT><B>-o</B><I> OUTPUT</I><B></B>, --output=<I>OUTPUT</I>
<DD>
<I>OUTPUT</I>
may be any one of the following:
</DL>
<P>
<DL COMPACT><DT><DD>
<B>text </B>-
Provides no additional output and is the default value.
</DL>
<P>
<DL COMPACT><DT><DD>
<B>html </B>-
Writes out an HTML file linking to the strips actually downloaded in the
current run, named by date (ala dailystrips). The files can be found in the
'html' directory of your Comics directory.
</DL>
<P>
<DL COMPACT><DT><DD>
<B>rss </B>-
Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in Comics/dailydose.xml.
</DL>
<P>
<DL COMPACT><DT><DD>
<B>rss </B>-
Writes an RSS feed with all of the strips downloaded during the run, for use
with your favourite RSS aggregator.
</DL>
<DL COMPACT>
<DT><B>-p</B>, <B>--progress</B>
<DD>
Display a progress bar while downloading comics.
<DT><B>-t</B>, <B>--timestamps</B>
<DD>
Print timestamps for all output at any level.
<DT><B>-v</B>, <B>--verbose</B>
<DD>
Increase the output level by one with each occurence.
<DT><B>-V</B>, <B>--version</B>
<DD>
Display the version number.
<I>module</I>
At least one valid
<I>module</I>
must be specified. A list of valid modules can be found by passing the
<B>-l</B>
option. Multiple
<I>module</I>
arguments can be specified on the command line.
</DL>
<A NAME="lbAF">&nbsp;</A>
<H2>INDEX SYNTAX</H2>
One can indicate the start of a list of
<B>comma seperated</B>
indices using a
'<B>:</B>'.
<P>
If
<I>-c</I>
is specified with index syntax then 'resume' mode is activated,
where a 'catchup' will start at the given index.
<P>
Refer to
<B>EXAMPLES</B>
for samples.
<A NAME="lbAG">&nbsp;</A>
<H2>OFFENSIVE COMICS</H2>
Some users may find certain comics offensive and wish to disable them.
Modules listed in
<B>/etc/dosage/disabled</B>
and
<B>~/.dosage/disabled</B>
will be disabled. These files should contain only one module name per line.
<A NAME="lbAH">&nbsp;</A>
<H2>SPECIAL SYNTAX</H2>
<DL COMPACT>
<DT><B>@</B>
<DD>
This expands to mean all the comics currently in your 'Comics'
directory.
<DT><B>@@</B>
<DD>
This expands to mean all the comics available to Dosage.
</DL>
<P>
<B>INDEX SYNTAX</B>
can be used with
<B>SPECIAL SYNTAX</B>
but this is unlikely to be useful.
<A NAME="lbAI">&nbsp;</A>
<H2>EXAMPLES</H2>
Retrieve the latest Mega Tokyo comic:
<DL COMPACT><DT><DD>
<B>mainline MegaTokyo</B>
</DL>
<P>
Retrieve every strip from every comic that there is a module for:
<DL COMPACT><DT><DD>
<B>mainline -c @@</B>
</DL>
<P>
Retrieve all Penny Arcade strips from (and including) a given index to
the beginning regardless of whether they already exist or not:
<DL COMPACT><DT><DD>
<B>mainline -c PennyArcade:2004-07-22</B>
</DL>
<A NAME="lbAJ">&nbsp;</A>
<H2>ENVIRONMENT</H2>
<DL COMPACT>
<DT>HTTP_PROXY<DD>
<B>mainline</B>
will use the specified HTTP proxy whenever possible.
</DL>
<A NAME="lbAK">&nbsp;</A>
<H2>NOTES</H2>
Should retrieval fail on any given strip
<B>mainline</B>
will attempt to retry. However the retry information is only outputted
in the
<B>second</B>
and successive output levels.
<P>
At the time of writing, a
<B>complete</B>
Dosage collection weighs in at around 3.0GB.
<A NAME="lbAL">&nbsp;</A>
<H2>RETURN VALUE</H2>
The return value is 2 when
<DL COMPACT>
<DT>&bull;<DD>
a program error occurred.
</DL>
<P>
The return value is 1 when
<DL COMPACT>
<DT>&bull;<DD>
comics could not be found or downloaded
<DT>&bull;<DD>
the program run was aborted with Ctrl-C
</DL>
<P>
Else the return value is zero.
<A NAME="lbAM">&nbsp;</A>
<H2>BUGS</H2>
See
<I><A HREF="http://trac.slipgate.za.net/dosage">http://trac.slipgate.za.net/dosage</A></I>
for a list of current development tasks and suggestions.
<A NAME="lbAN">&nbsp;</A>
<H2>FILES</H2>
<DL COMPACT>
<DT><B>/etc/dosage/disabled</B><DD>
Disables comic modules on a global scale.
<DT><B>~/.dosage/disabled</B><DD>
Disables comic modules on a local scale.
</DL>
<A NAME="lbAO">&nbsp;</A>
<H2>AUTHORS</H2>
<B>mainline</B> and <B>Dosage</B>
were written by Jonathan Jacobs &lt;<A HREF="mailto:korpse@slipgate.za.net">korpse@slipgate.za.net</A>&gt; and Tristan Seligmann
&lt;<A HREF="mailto:mithrandi@slipgate.za.net">mithrandi@slipgate.za.net</A>&gt;. This manual page was written by Jonathan Jacobs.
<P>
<HR>
<A NAME="index">&nbsp;</A><H2>Index</H2>
<DL>
<DT><A HREF="#lbAB">NAME</A><DD>
<DT><A HREF="#lbAC">SYNOPSIS</A><DD>
<DT><A HREF="#lbAD">DESCRIPTION</A><DD>
<DT><A HREF="#lbAE">OPTIONS</A><DD>
<DT><A HREF="#lbAF">INDEX SYNTAX</A><DD>
<DT><A HREF="#lbAG">OFFENSIVE COMICS</A><DD>
<DT><A HREF="#lbAH">SPECIAL SYNTAX</A><DD>
<DT><A HREF="#lbAI">EXAMPLES</A><DD>
<DT><A HREF="#lbAJ">ENVIRONMENT</A><DD>
<DT><A HREF="#lbAK">NOTES</A><DD>
<DT><A HREF="#lbAL">RETURN VALUE</A><DD>
<DT><A HREF="#lbAM">BUGS</A><DD>
<DT><A HREF="#lbAN">FILES</A><DD>
<DT><A HREF="#lbAO">AUTHORS</A><DD>
</DL>
<HR>
This document was created by
<A HREF="/cgi-bin/man/man2html">man2html</A>,
using the manual pages.<BR>
</BODY>
</HTML>

240
dosage Executable file
View file

@ -0,0 +1,240 @@
#!/usr/bin/env python
# Dosage, the webcomic downloader
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import sys
import os
import optparse
import traceback
from dosagelib import events, scraper
from dosagelib.output import out
from dosagelib.util import getWindowSize, internal_error
from dosagelib.configuration import App, Freeware, Copyright
def setupOptions():
usage = 'usage: %prog [options] comicModule [comicModule ...]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
parser.add_option('-c', '--catch-up', action='count', dest='catchup', default=None, help='traverse and retrieve all available comics up until the strip that already exists locally, use twice to retrieve until all strips exist locally')
parser.add_option('-b', '--base-path', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
parser.add_option('--base-url', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules')
parser.add_option('--single-list', action='store_const', const=2, dest='list', help='list available comic modules in a single list')
parser.add_option('-V', '--version', action='store_true', dest='version', help='display the version number')
parser.add_option('-m', '--module-help', action='store_true', dest='modhelp', help='display help for comic modules')
parser.add_option('-t', '--timestamps', action='store_true', dest='timestamps', default=False, help='print timestamps for all output at any info level')
parser.add_option('-o', '--output', action='store', dest='output', choices=events.getHandlers(), help='output formatting for downloaded comics')
try:
getWindowSize()
except NotImplementedError:
progress = False
else:
progress = True
if progress:
parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics')
return parser
class Dosage(object):
def __init__(self):
self.errors = 0
def setOutputInfo(self):
out.level = 0
out.level += self.settings['verbose']
out.timestamps = self.settings['timestamps']
def saveComic(self, comic):
basepath = self.settings['basepath']
progress = self.settings.get('progress', False)
fn, saved = comic.save(basepath, progress)
return saved
def saveComics(self, comics):
saved = False
for comic in comics:
saved = self.saveComic(comic) or saved
return saved
def safeOp(self, fp, *args, **kwargs):
try:
fp(*args, **kwargs)
except Exception:
self.errors += 1
type, value, tb = sys.exc_info()
out.write('Traceback (most recent call last):', 1)
out.writelines(traceback.format_stack(), 1)
out.writelines(traceback.format_tb(tb)[1:], 1)
out.writelines(traceback.format_exception_only(type, value))
def getCurrent(self):
out.write('Retrieving the current strip...')
self.saveComics(self.module.getCurrentComics())
def getIndex(self, index):
out.write('Retrieving index "%s"....' % (index,))
try:
self.module.setStrip(index)
self.saveComics(self.module.getNextComics())
except NotImplementedError:
out.write('No indexed retrieval support.')
def catchup(self):
out.write('Catching up...')
for comics in self.module:
if not self.saveComics(comics) and self.settings['catchup'] < 2:
break
def catchupIndex(self, index):
out.write('Catching up from index "%s"...' % (index,))
self.module.setStrip(index)
for comics in self.module:
if not self.saveComics(comics) and self.settings['catchup'] < 2:
break
def getScrapers(self):
return scraper.items()
def getExistingComics(self):
for scraper in self.getScrapers():
dirname = scraper.get_name().replace('/', os.sep)
if os.path.isdir(os.path.join(self.settings['basepath'], dirname)):
yield scraper
def doList(self, columnList):
out.write('Available comic scrapers:')
scrapers = self.getScrapers()
if columnList:
self.doColumnList(scrapers)
else:
self.doSingleList(scrapers)
out.write('%d supported comics.' % len(scrapers))
def doSingleList(self, scrapers):
print '\n'.join(scraper.get_name() for scraper in scrapers)
def doColumnList(self, scrapers):
try:
screenWidth = getWindowSize()
except NotImplementedError:
screenWidth = 80
if len(scrapers) == 0:
return
names = [scraper.get_name() for scraper in scrapers]
maxlen = max([len(name) for name in names])
namesPerLine = int(screenWidth / (maxlen + 1))
while names:
print ''.join([name.ljust(maxlen) for name in names[:namesPerLine]])
del names[:namesPerLine]
def doCatchup(self):
for comic in self.useComics():
if self.indices:
self.safeOp(self.catchupIndex, self.indices[0])
else:
self.safeOp(self.catchup)
def doCurrent(self):
for comic in self.useComics():
if self.indices:
for index in self.indices:
self.safeOp(self.getIndex, index)
else:
self.safeOp(self.getCurrent)
def doHelp(self):
for scraper in self.useComics():
for line in scraper.getHelp().splitlines():
out.write("Help: "+line)
def setupComic(self, scraper):
self.module = scraper()
out.context = scraper.get_name()
return self.module
def useComics(self):
for comic in self.comics:
c = comic.split(':', 2)
if len(c) > 1:
self.indices = c[1].split(',')
else:
self.indices = None
moduleName = c[0]
if moduleName == '@':
for s in self.getExistingComics():
yield self.setupComic(s)
elif moduleName == '@@':
for s in self.getScrapers():
yield self.setupComic(s)
else:
yield self.setupComic(scraper.get(moduleName))
def displayVersion(self):
print App
print Copyright
print Freeware
def run(self, settings, comics):
self.settings = settings
self.setOutputInfo()
self.comics = comics
om = self.settings['output']
events.installHandler(om, self.settings['basepath'], self.settings['baseurl'])
events.handler.start()
if self.settings['version']:
self.displayVersion()
elif self.settings['list']:
self.doList(self.settings['list'] == 1)
elif len(comics) <= 0:
out.write('Warning: No comics specified, bailing out!')
elif self.settings['modhelp']:
self.doHelp()
elif self.settings['catchup']:
self.doCatchup()
else:
self.doCurrent()
events.handler.end()
def main():
try:
parser = setupOptions()
options, args = parser.parse_args()
d = Dosage()
d.run(options.__dict__, args)
if d.errors:
res = 1
else:
res = 0
except KeyboardInterrupt:
print "Aborted."
res = 1
except Exception:
internal_error()
res = 2
return res
if __name__ == '__main__':
sys.exit(main())

39
dosagelib/__init__.py Normal file
View file

@ -0,0 +1,39 @@
"""
Automated webcomic downloader. Dosage traverses webcomic websites in
order to download each strip of the comic. The intended use is for
mirroring the strips locally for ease of viewing; redistribution of the
downloaded strips may violate copyright, and is not advisable unless you
have communicated with all of the relevant copyright holders, described
your intentions, and received permission to distribute.
The primary dosage interface is currently the 'mainline' script, which
is just a thin wrapper that invokes L{dosage.mainline}. Comic modules
for each webcomic are located in L{dosage.modules}; most of these make
use of the helper base classes and mixins in L{dosage.modules.helpers},
thus making their individual implementations trivial.
@group Core modules: comic, events, output, progress, rss, util,
version
@group Interface modules: mainline
@group Comic modules: modules
@sort: modules.helpers
@author: U{Dosage development team <dosage@lists.slipgate.za.net>}
@requires: Python 2.3+
@see: U{The dosage webpage <http://slipgate.za.net/dosage>}
@see: U{The dosage Trac site <http://trac.slipgate.za.net/dosage>}
@newfield contributor: Contributor, Contributors (Alphabetical Order)
@contributor: U{Jonathan Jacobs <mailto:korpse@slipgate.za.net>}
@contributor: U{Tristan Seligmann <mailto:mithrandi@mithrandi.za.net>}
@var __license__: The license governing the use and distribution of
dosage.
"""
__docformat__ = 'epytext en'
import sys
if not (hasattr(sys, 'version_info') or
sys.version_info < (2, 5, 0, 'final', 0)):
raise SystemExit("This program requires Python 2.5 or later.")

101
dosagelib/comic.py Normal file
View file

@ -0,0 +1,101 @@
import urllib2
import os
import locale
import rfc822
import time
import shutil
locale.setlocale(locale.LC_ALL, '')
from .output import out
from .util import urlopen, saneDataSize, normaliseURL
from .progress import progressBar, OperationComplete
from .events import handler
class FetchComicError(IOError): pass
class Comic(object):
def __init__(self, moduleName, url, referrer=None, filename=None):
self.moduleName = moduleName
url = normaliseURL(url)
out.write('Getting headers for %s...' % (url,), 2)
try:
self.urlobj = urlopen(url, referrer=referrer)
except urllib2.HTTPError, he:
raise FetchComicError, ('Unable to retrieve URL.', url, he.code)
if self.urlobj.info().getmaintype() != 'image' and \
self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'):
raise FetchComicError, ('No suitable image found to retrieve.', url)
self.filename, self.ext = os.path.splitext(url.split('/')[-1])
self.filename = filename or self.filename
self.filename = self.filename.replace(os.sep, '_')
# Always use mime type for file extension if it is sane.
if self.urlobj.info().getmaintype() == 'image':
self.ext = '.' + self.urlobj.info().getsubtype()
self.contentLength = int(self.urlobj.info().get('content-length', 0))
self.lastModified = self.urlobj.info().get('last-modified')
out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
def touch(self, filename):
if self.lastModified:
tt = rfc822.parsedate(self.lastModified)
if tt:
mtime = time.mktime(tt)
os.utime(filename, (mtime, mtime))
def save(self, basepath, showProgress=False):
comicName, comicExt = self.filename, self.ext
comicSize = self.contentLength
comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep))
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
fn = os.path.join(comicDir, '%s%s' % (self.filename, self.ext))
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.urlobj.close()
self.touch(fn)
out.write('Skipping existing file "%s".' % (fn,), 1)
return fn, False
try:
tmpFn = os.path.join(comicDir, '__%s%s' % (self.filename, self.ext))
out.write('Writing comic to temporary file %s...' % (tmpFn,), 3)
comicOut = file(tmpFn, 'wb')
try:
startTime = time.time()
if showProgress:
def pollData():
data = self.urlobj.read(8192)
if not data:
raise OperationComplete
comicOut.write(data)
return len(data), self.contentLength
progressBar(pollData)
else:
comicOut.write(self.urlobj.read())
endTime = time.time()
finally:
comicOut.close()
out.write('Copying temporary file (%s) to %s...' % (tmpFn, fn), 3)
shutil.copy2(tmpFn, fn)
self.touch(fn)
size = os.path.getsize(fn)
bytes = locale.format('%d', size, True)
if endTime != startTime:
speed = saneDataSize(size / (endTime - startTime))
else:
speed = '???'
attrs = dict(fn=fn, bytes=bytes, speed=speed)
out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1)
handler.comicDownloaded(self.moduleName, fn)
self.urlobj.close()
finally:
try:
out.write('Removing temporary file %s...' % (tmpFn,), 3)
os.remove(tmpFn)
except:
pass
return fn, True

View file

@ -0,0 +1,19 @@
import _Dosage_configdata as configdata
Version = configdata.version
ReleaseDate = configdata.release_date
AppName = configdata.name
App = AppName+u" "+Version
Author = configdata.author
HtmlAuthor = Author.replace(u' ', u'&nbsp;')
Copyright = u"Copyright (C) 2004-2008 "+Author
HtmlCopyright = u"Copyright &copy; 2004-2008 "+HtmlAuthor
Url = configdata.url
SupportUrl = Url + u"/issues"
Email = configdata.author_email
UserAgent = u"%s/%s (+%s)" % (AppName, Version, Url)
Freeware = AppName+u""" comes with ABSOLUTELY NO WARRANTY!
This is free software, and you are welcome to redistribute it
under certain conditions. Look at the file `LICENSE' within this
distribution."""

159
dosagelib/events.py Normal file
View file

@ -0,0 +1,159 @@
import os.path
import time
import rss
import urllib
import util
class EventHandler(object):
def __init__(self, basepath, baseurl):
self.basepath = basepath
self.baseurl = baseurl or self.getBaseUrl()
def getBaseUrl(self):
'''Return a file: URL that probably points to the basedir.
This is used as a halfway sane default when the base URL is not
provided; not perfect, but should work in most cases.'''
components = util.splitpath(os.path.abspath(self.basepath))
url = '/'.join([urllib.quote(component, '') for component in components])
return 'file:///' + url + '/'
def getUrlFromFilename(self, filename):
components = util.splitpath(util.getRelativePath(self.basepath, filename))
url = '/'.join([urllib.quote(component, '') for component in components])
return self.baseurl + url
def start(self):
pass
def comicDownloaded(self, comic, filename):
pass
def end(self):
pass
class TextEventHandler(EventHandler):
pass
class RSSEventHandler(EventHandler):
def RFC822Date(self, indate):
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate))
def getFilename(self):
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
def start(self):
today = time.time()
yesterday = today - 86400
today = time.localtime(today)
yesterday = time.localtime(yesterday)
link = 'https://github.com/wummel/dosage'
self.rssfn = self.getFilename()
if os.path.exists(self.rssfn):
self.newfile = False
self.rss = rss.parseFeed(self.rssfn, yesterday)
else:
self.newfile = True
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
def comicDownloaded(self, comic, filename):
url = self.getUrlFromFilename(filename)
args = (
'%s - %s' % (comic, os.path.basename(filename)),
url,
'<a href="%s">View Comic</a>' % (url,),
self.RFC822Date(time.time())
)
if self.newfile:
self.newfile = False
self.rss.addItem(*args)
else:
self.rss.insertHead(*args)
def end(self):
self.rss.write(self.rssfn)
class HtmlEventHandler(EventHandler):
def fnFromDate(self, date):
fn = time.strftime('comics-%Y%m%d.html', date)
fn = os.path.join(self.basepath, 'html', fn)
fn = os.path.abspath(fn)
return fn
def start(self):
today = time.time()
yesterday = today - 86400
tomorrow = today + 86400
today = time.localtime(today)
yesterday = time.localtime(yesterday)
tomorrow = time.localtime(tomorrow)
fn = self.fnFromDate(today)
assert not os.path.exists(fn), 'Comic page for today already exists!'
d = os.path.dirname(fn)
if not os.path.isdir(d):
os.makedirs(d)
yesterdayUrl = self.getUrlFromFilename(self.fnFromDate(yesterday))
tomorrowUrl = self.getUrlFromFilename(self.fnFromDate(tomorrow))
self.html = file(fn, 'w')
self.html.write('''<html>
<head>
<title>Comics for %s</title>
</head>
<body>
<a href="%s">Previous Day</a> | <a href="%s">Next Day</a>
<ul>
''' % (time.strftime('%Y/%m/%d', today), yesterdayUrl, tomorrowUrl))
self.lastComic = None
def comicDownloaded(self, comic, filename):
if self.lastComic != comic:
self.newComic(comic)
url = self.getUrlFromFilename(filename)
self.html.write(' <li><a href="%s">%s</a></li>\n' % (url, os.path.basename(filename)))
def newComic(self, comic):
if self.lastComic is not None:
self.html.write(' </ul>\n')
self.lastComic = comic
self.html.write(''' <li>%s</li>
<ul>
''' % (comic,))
def end(self):
if self.lastComic is not None:
self.html.write(' </ul>\n')
self.html.write('''</ul>
</body>
</html>''')
self.html.close()
handlers = {
'text': TextEventHandler,
'html': HtmlEventHandler,
'rss': RSSEventHandler,
}
def getHandlers():
l = handlers.keys()
l.sort()
return l
def installHandler(name=None, basepath=None, baseurl=None):
global handler
if name is None:
name = 'text'
if basepath is None:
basepath = '.'
handler = handlers[name](basepath, baseurl)
installHandler()

181
dosagelib/helpers.py Normal file
View file

@ -0,0 +1,181 @@
import re
from .util import fetchUrl, fetchManyUrls, getQueryParams
from .comic import Comic
class _BasicScraper(object):
'''Base class with scrape functions for comics.
@type latestUrl: C{string}
@cvar latestUrl: The URL for the latest comic strip.
@type imageUrl: C{string}
@cvar imageUrl: A string that is interpolated with the strip index
to yield the URL for a particular strip.
@type imageSearch: C{regex}
@cvar imageSearch: A compiled regex that will locate the strip image URL
when applied to the strip page.
@type prevSearch: C{regex}
@cvar prevSearch: A compiled regex that will locate the URL for the
previous strip when applied to a strip page.
'''
referrer = None
help = 'Sorry, no help for this comic yet.'
def __init__(self):
self.currentUrl = None
self.urls = set()
def getReferrer(self, imageUrl, pageUrl):
return self.referrer or pageUrl or self.getLatestUrl()
def getComic(self, url, pageUrl):
if not url:
return None
return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl))
def getCurrentComics(self):
self.currentUrl = self.getLatestUrl()
comics = self.getNextComics()
if not comics:
raise ValueError("Could not find current comic.")
return comics
def getNextComics(self):
comics = []
while not comics and self.currentUrl and self.currentUrl not in self.urls:
comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch])
if prevUrl:
prevUrl = prevUrl[0]
else:
prevUrl = None
for comicUrl in comicUrlGroups:
comics.append(self.getComic(comicUrl, self.currentUrl))
self.urls.update([self.currentUrl])
self.currentUrl = (prevUrl, None)[prevUrl in self.urls]
return comics
def setStrip(self, index):
self.currentUrl = self.imageUrl % index
def getHelp(self):
return self.help
def __iter__(self):
"""Iterate through the strips, starting from the current one and going backward."""
if not self.currentUrl:
self.currentUrl = self.getLatestUrl()
comics = True
while comics:
comics = self.getNextComics()
if comics:
yield comics
@classmethod
def get_name(cls):
if hasattr(cls, 'name'):
return cls.name
return cls.__name__
@classmethod
def starter(cls):
return cls.latestUrl
@classmethod
def namer(cls, imageUrl, pageUrl):
return None
def getFilename(self, imageUrl, pageUrl):
return self.namer(imageUrl, pageUrl)
def getLatestUrl(self):
return self.starter()
def queryNamer(paramName, usePageUrl=False):
@staticmethod
def _namer(imageUrl, pageUrl):
url = (imageUrl, pageUrl)[usePageUrl]
return getQueryParams(url)[paramName][0]
return _namer
def regexNamer(regex):
@staticmethod
def _namer(imageUrl, pageUrl):
return regex.search(imageUrl).group(1)
return _namer
def constStarter(latestUrl):
@staticmethod
def _starter():
return latestUrl
return _starter
def bounceStarter(latestUrl, nextSearch):
@classmethod
def _starter(cls):
url = fetchUrl(latestUrl, cls.prevSearch)
if url:
url = fetchUrl(url, nextSearch)
return url
return _starter
def indirectStarter(baseUrl, latestSearch):
@staticmethod
def _starter():
return fetchUrl(baseUrl, latestSearch)
return _starter
class IndirectLatestMixin(object):
'''
Mixin for comics that link to the latest comic from a base page of
some kind. This also supports comics which don't link to the last comic
from the base page, but the beginning of the latest chapter or similiar
schemes. It simulates going forward until it can't find a 'next' link as
specified by the 'nextSearch' regex.
@type baseUrl: C{string}
@cvar baseUrl: the URL where the link to the latest comic is found.
@type latestSearch C{regex}
@cvar latestSearch: a compiled regex for finding the 'latest' URL.
@type nextSearch C{regex}
@cvar nextSearch: a compiled regex for finding the 'next' URL.
'''
__latestUrl = None
def getLatestUrl(self):
if not self.__latestUrl:
self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch)
if hasattr(self, "nextSearch"):
nextUrl = fetchUrl(self.__latestUrl, self.nextSearch)
while nextUrl:
self.__latestUrl = nextUrl
nextUrl = fetchUrl(self.__latestUrl, self.nextSearch)
return self.__latestUrl
latestUrl = property(getLatestUrl)
class _PHPScraper(_BasicScraper):
"""
I implement IScraper for comics using phpComic/CUSP.
This provides an easy way to define scrapers for webcomics using phpComic.
"""
imageUrl = property(lambda self: self.basePath + 'daily.php?date=%s')
imageSearch = property(lambda self: re.compile(r'<img alt=[^>]+ src="(%scomics/\d{6}\..+?)">' % (self.basePath,)))
help = 'Index format: yymmdd'
@classmethod
def starter(cls):
return cls.basePath + cls.latestUrl

23
dosagelib/output.py Normal file
View file

@ -0,0 +1,23 @@
import time
class Output(object):
def __init__(self):
self.context = ''
self.level = 0
self.timestamps = False
def write(self, s, level=0):
if level > self.level:
return
if self.level > 1 or self.timestamps:
timestamp = time.strftime('%H:%M:%S ')
else:
timestamp = ''
print '%s%s> %s' % (timestamp, self.context, s)
def writelines(self, lines, level=0):
for line in lines:
for line in line.rstrip('\n').split('\n'):
self.write(line.rstrip('\n'), level=level)
out = Output()

View file

@ -0,0 +1 @@

338
dosagelib/plugins/a.py Normal file
View file

@ -0,0 +1,338 @@
from re import compile, MULTILINE
from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter
class ALessonIsLearned(_BasicScraper):
latestUrl = 'http://www.alessonislearned.com/'
imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
imageSearch = compile(r'<img src="(cmx/.+?)"')
prevSearch = compile(r"<a href='(index.php\?comic=.+?)'.+?previous")
help = 'Index format: nnn'
class ASofterWorld(_BasicScraper):
latestUrl = 'http://www.asofterworld.com/'
imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
imageSearch = compile(r'<img src="(http://www.asofterworld.com/clean/[^"]+)"')
prevSearch = compile(r'"([^"]+)">back')
help = 'Index format: n (unpadded)'
class AbleAndBaker(_BasicScraper):
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+\d+?)".+previous.gif')
help = 'Index format: nnn'
class AbominableCharlesChristopher(_BasicScraper):
latestUrl = 'http://abominable.cc/'
imageUrl = 'http://abominable.cc/%s'
imageSearch = compile(r'cc(/comics/.+?)"')
prevSearch = compile(r'cc(/.+?)".+?prev')
help = 'Index format: yyyy/mm/dd/comicname'
class AbstractGender(_BasicScraper):
latestUrl = 'http://www.abstractgender.com/'
imageUrl = 'http://www.abstractgender.com/?comic=%s'
imageSearch = compile(r'<img[^>]+src="(comics/\d+\.\w+)"')
prevSearch = compile(r'<a\W+href="(\?comic=\d+)"><img[^>]+id="comic_menu_prev"')
help = 'Index format: n (unpadded)'
class AbsurdNotions(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/page129.html'
imageUrl = 'http://www.absurdnotions.org/page%s.html'
imageSearch = compile(r'<IMG SRC="(an[^"]+)"')
prevSearch = compile(r'HREF="([^"]+)"><IMG SRC="nprev\.gif"')
help = 'Index format: n (unpadded)'
class AbstruseGoose(_BasicScraper):
starter = bounceStarter('http://abstrusegoose.com/',
compile(r'<a href = "(http://abstrusegoose.com/\d+)">Next &raquo;</a>'))
imageUrl = 'http://abstrusegoose.com/c%s.html'
imageSearch = compile(r'<img[^<]+src="(http://abstrusegoose.com/strips/[^<>"]+)"')
prevSearch = compile(r'<a href = "(http://abstrusegoose.com/\d+)">&laquo; Previous</a>')
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(pageUrl.rstrip('/').split('/')[-1])
name = imageUrl.split('/')[-1].split('.')[0]
return 'c%03d-%s' % (index, name)
class AcademyVale(_BasicScraper):
latestUrl = 'http://imagerie.com/vale/'
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
imageSearch = compile(r'<IMG.+?SRC="(avale\d{4}-\d{2}\..*?)"')
prevSearch = compile(r'HREF=(avarch.*?)><IMG SRC="AVNavBack.gif"')
help = 'Index format: nnn'
class Alice(_BasicScraper):
latestUrl = 'http://alice.alicecomics.com/'
imageUrl = 'http://alice.alicecomics.com/%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r' .+"?com(/.+?)" rel="prev')
help = 'Index format: non'
class AlienLovesPredator(_BasicScraper):
imageUrl = 'http://alienlovespredator.com/%s'
imageSearch = compile(r'<img src="(.+?)"[^>]+>(<center>\n|\n|</center>\n)<div style="height: 2px;">&nbsp;</div>', MULTILINE)
prevSearch = compile(r'<a href="(.+?)"><img src="/images/nav_previous.jpg"')
help = 'Index format: nnn'
starter = bounceStarter('http://alienlovespredator.com/index.php', compile(r'<a href="(.+?)"><img src="/images/nav_next.jpg"'))
@classmethod
def namer(cls, imageUrl, pageUrl):
vol = pageUrl.split('/')[-5]
num = pageUrl.split('/')[-4]
ccc = pageUrl.split('/')[-3]
ddd = pageUrl.split('/')[-2]
return '%s-%s-%s-%s' % (vol, num, ccc, ddd)
class AnarchySD(_BasicScraper):
imageUrl = 'http://www.anarchycomic.com/page%s.php'
imageSearch = compile(r'<img.+src="../(images/page\d+\..+?)"')
prevSearch = compile(r'<a href="(page\d+\.php)">PREVIOUS PAGE')
help = 'Index format: n (unpadded)'
starter = indirectStarter(
'http://www.anarchycomic.com/page1.php',
compile(r'<a href="(page\d+\.php)" class="style15">LATEST'))
class Altermeta(_BasicScraper):
latestUrl = 'http://altermeta.net/'
imageUrl = 'http://altermeta.net/archive.php?comic=%s&view=showfiller'
imageSearch = compile(r'<img src="(comics/[^"]+)" />')
prevSearch = compile(r'<a href="([^"]+)"><img src="http://altermeta\.net/template/default/images/sasha/back\.png')
help = 'Index format: n (unpadded)'
class AltermetaOld(Altermeta):
name = 'Altermeta/Old'
latestUrl = 'http://altermeta.net/oldarchive/index.php'
imageUrl = 'http://altermeta.net/oldarchive/archive.php?comic=%s'
prevSearch = compile(r'<a href="([^"]+)">Back')
class Angels2200(_BasicScraper):
latestUrl = 'http://www.janahoffmann.com/angels/'
imageSearch = compile(r"<img src='(http://www.janahoffmann.com/angels/comics/[^']+)'>")
prevSearch = compile(r'<a href="([^"]+)">&laquo; Previous</a>')
class AppleGeeks(_BasicScraper):
latestUrl = 'http://www.applegeeks.com/'
imageUrl = 'http://www.applegeeks.com/comics/viewcomic.php?issue=%s'
imageSearch = compile(r'<img src="((?:/comics/)?issue\d+?\..+?)"')
prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE)
help = 'Index format: n (unpadded)'
class AppleGeeksLite(_BasicScraper):
latestUrl = 'http://www.applegeeks.com/lite/'
imageUrl = 'http://applegeeks.com/lite/index.php?aglitecomic=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(index.php\?aglitecomic=.+?)".+?back')
help = 'Index format: yyyy-mm-dd'
class Achewood(_BasicScraper):
latestUrl = 'http://www.achewood.com/'
imageUrl = 'http://www.achewood.com/index.php?date=%s'
imageSearch = compile(r'<img src="(http://m.assetbar.com/achewood/autaux.+?)"')
prevSearch = compile(r'<a href="(index\.php\?date=\d{8})" class="dateNav" title="Previous comic"')
help = 'Index format: mmddyyyy'
namer = regexNamer(compile(r'date%3D(\d{8})'))
class AstronomyPOTD(_BasicScraper):
starter = bounceStarter(
'http://antwrp.gsfc.nasa.gov/apod/astropix.html',
compile(r'<a href="(ap\d{6}\.html)">&gt;</a>'))
imageUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
imageSearch = compile(r'<a href="(image/\d{4}/.+\..+?)">')
prevSearch = compile(r'<a href="(ap\d{6}\.html)">&lt;</a>')
help = 'Index format: yymmdd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
imageUrl.split('/')[-1].split('.')[0])
class AfterStrife(_BasicScraper):
latestUrl = 'http://afterstrife.com/?p=262'
imageUrl = 'http://afterstrife.com/?p=%s'
imageSearch = compile(r'<img src="(http://afterstrife.com/strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)" class="navi navi-prev"')
help = 'Index format: nnn'
class AnUnrehearsedRiot(_BasicScraper):
latestUrl = 'http://unrehearsedriot.com/'
imageUrl = 'http://unrehearsedriot.com/%s'
imageSearch = compile(r'<img src="(http://unrehearsedriot.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://unrehearsedriot.com/.+?)" class="navi navi-prev"')
help = 'Index format: yyyy/mm/dd/strip-name'
class ALLCAPS(_BasicScraper):
latestUrl = 'http://www.allcapscomix.com/'
imageUrl = 'http://www.allcapscomix.com/%s'
imageSearch = compile(r'<img src="(http://www.allcapscomix.com/comics/.+?)"')
prevSearch = compile(r'href="(.+?)">(&#9668; Previous|<span class="prev">)')
help = 'Index format: yyyy/mm/strip-name'
class ASkeweredParadise(_BasicScraper):
latestUrl = 'http://aspcomics.net/'
imageUrl = 'http://aspcomics.net/archindex.php?strip_id=%s'
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/previous_day.jpg"')
help = 'Index format: nnn'
class AGirlAndHerFed(_BasicScraper):
starter = bounceStarter('http://www.agirlandherfed.com/',
compile(r' href="(/comic/\?\d+)" class="navigationActive">Next</a>\]'))
imageUrl = 'http://www.agirlandherfed.com/comic/?%s'
imageSearch = compile(r'<img src="(/images/comics/.+?)"')
prevSearch = compile(r' href="(/comic/\?\d+)" class="navigationActive">Previous</a>\]')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('?')[-1]
class AetheriaEpics(_BasicScraper):
latestUrl = 'http://aetheria-epics.schala.net/'
imageUrl = 'http://aetheria-epics.schala.net/%s.html'
imageSearch = compile(r'<td><img src="(\d{5}.\w{3,4})"')
prevSearch = compile(r'<a href="(\d{5}.html)"><img src="prev.jpg"\/>')
help = 'Index format: nnn'
class Adrift(_BasicScraper):
latestUrl = 'http://www.adriftcomic.com/'
imageUrl = 'http://www.adriftcomic.com/page%s.html'
imageSearch = compile(r'<IMG SRC="(Adrift_Web_Page\d+.jpg)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="AdriftBackLink.gif"')
help = 'Index format: nnn'
class AirForceBlues(_BasicScraper):
latestUrl = 'http://www.afblues.com/'
imageUrl = 'http://www.afblues.com/?p=%s'
imageSearch = compile(r'<img src=\'(http://www.afblues.com/comics/.+?)\'>')
prevSearch = compile(r'<a href="(http://www.afblues.com/.+?)">&laquo; Previous')
help = 'Index format: nnn'
class AlienShores(_BasicScraper):
latestUrl = 'http://alienshores.com/alienshores_band/'
imageUrl = 'http://alienshores.com/alienshores_band/?p=%s'
imageSearch = compile(r'><img src="(http://alienshores.com/alienshores_band/comics/.+?)"')
prevSearch = compile(r'<a href="(http://alienshores.com/.+?)" rel="prev">')
help = 'Index format: nnn'
class AllKindsOfBees(_BasicScraper):
latestUrl = 'http://www.allkindsofbees.com/'
imageUrl = 'http://www.allkindsofbees.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.allkindsofbees.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.allkindsofbees.com/.+?)">')
help = 'Index format: nnn'
class AllTheGrowingThings(_BasicScraper):
latestUrl = 'http://typodmary.com/growingthings/'
imageUrl = 'http://typodmary.com/growingthings/%s/'
imageSearch = compile(r'<img src="(http://typodmary.com/growingthings/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://typodmary.com/growingthings/.+?)"')
help = 'Index format: yyyy/mm/dd/strip-name'
class Amya(_BasicScraper):
latestUrl = 'http://www.amyachronicles.com/'
imageUrl = 'http://www.amyachronicles.com/archives/%s'
imageSearch = compile(r'<img src="(http://www.amyachronicles.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.amyachronicles.com/archives/.+?)"')
help = 'Index format: nnn'
class Angband(_BasicScraper):
latestUrl = 'http://angband.calamarain.net/index.php'
imageUrl = 'http://angband.calamarain.net/view.php?date=%s'
imageSearch = compile(r'<img src="(comics/Strip.+?)"')
prevSearch = compile(r'<a href="(view.php\?date\=.+?)">Previous</a>')
help = 'Index format: yyyy-mm-dd'
class ArcticBlast(_BasicScraper):
latestUrl = 'http://www.arcticblastcomic.com/'
imageUrl = 'http://www.arcticblastcomic.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.arcticblastcomic.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.arcticblastcomic.com/.+?)"')
help = 'Index format: nnn'
class ActionAthena(_BasicScraper):
latestUrl = 'http://actionathena.com/'
imageUrl = 'http://actionathena.com/2%s'
imageSearch = compile(r'<img src=\'(http://actionathena.com/comics/.+?)\'>')
prevSearch = compile(r'<a href="(http://actionathena.com/.+?)">&laquo; Previous</a>')
help = 'Index format: yyyy/mm/dd/strip-name'
class AlsoBagels(_BasicScraper):
latestUrl = 'http://www.alsobagels.com/'
imageUrl = 'http://alsobagels.com/index.php/comic/%s/'
imageSearch = compile(r'<img src="(http://alsobagels.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://alsobagels.com/index.php/comic/.+?)">')
help = 'Index format: strip-name'
class Annyseed(_BasicScraper):
latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm'
imageUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm'
imageSearch = compile(r'<td width="570" height="887" valign="top"><img src="(.+?)"')
prevSearch = compile(r'<a href="(http://www.colourofivy.com/.+?)"><img src="Last.gif"')
help = 'Index format: nnn'

317
dosagelib/plugins/b.py Normal file
View file

@ -0,0 +1,317 @@
from re import compile
from ..helpers import _BasicScraper
class BadlyDrawnKitties(_BasicScraper):
latestUrl = 'http://www.badlydrawnkitties.com/'
imageUrl = 'http://www.badlydrawnkitties.com/new/%s.html'
imageSearch = compile(r'<img src="(/new/.+?)">')
prevSearch = compile(r'"(/new/.+?)".+?previous.gif')
help = 'Index format: n (unpadded)'
class Bardsworth(_BasicScraper):
latestUrl = 'http://www.bardsworth.com/'
imageUrl = 'http://www.bardsworth.com/archive.php?p=s%'
imageSearch = compile(r'(strips/.+?)"')
prevSearch = compile(r'"(http.+?)".+?/prev')
help = 'Index format: nnn'
class BetterDays(_BasicScraper):
latestUrl = 'http://www.jaynaylor.com/betterdays/'
imageUrl = 'http://www.jaynaylor.com/betterdays/archives/%s'
imageSearch = compile(r'<img src=(/betterdays/comic/.+?)>')
prevSearch = compile(r'<a href="(.+)">&laquo; Previous')
help = 'Index format: yyyy/mm/<your guess>.html'
class BetterYouThanMe(_BasicScraper):
latestUrl = 'http://betteryouthanme.net/'
imageUrl = 'http://betteryouthanme.net/archive.php?date=%s.gif'
imageSearch = compile(r'"(comics/.+?)"')
prevSearch = compile(r'"(archive.php\?date=.+?)">.+?previous')
help = 'Index format: yyyymmdd'
class BiggerThanCheeses(_BasicScraper):
latestUrl = 'http://www.biggercheese.com'
imageUrl = 'http://www.biggercheese.com/index.php?comic=%s'
imageSearch = compile(r'src="(comics/.+?)" alt')
prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')
help = 'Index format: n (unpadded)'
class BizarreUprising(_BasicScraper):
latestUrl = 'http://www.bizarreuprising.com/'
imageUrl = 'http://www.bizarreuprising.com/view/%s'
imageSearch = compile(r'<img src="(comic/[^"]+)"')
prevSearch = compile(r'<a href="(view/\d+/[^"]+)"><img src="images/b_prev\.gif"')
help = 'Index format: n/name'
class Blip(_BasicScraper):
latestUrl = 'http://blipcomic.com/'
imageUrl = 'http://blipcomic.com/index.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev')
help = 'Index format: n'
class BlueCrashKit(_BasicScraper):
latestUrl = 'http://www.bluecrashkit.com/cheese/'
imageUrl = 'http://www.bluecrashkit.com/cheese/node/%s'
imageSearch = compile(r'(/cheese/files/comics/.+?)"')
prevSearch = compile(r'(/cheese/node/.+?)".+?previous')
help = 'Index format: non'
class BMovieComic(_BasicScraper):
latestUrl = 'http://www.bmoviecomic.com/'
imageUrl = 'http://www.bmoviecomic.com/?cid=%s'
imageSearch = compile(r'"(comics/.+?)"')
prevSearch = compile(r'(\?cid=.+?)".+?Prev')
help = 'Index format: n'
### With BratHalla there is no 'previous' link at comic 360
### You will need to use
### mainline -c BratHalla:360-backup-dad-unstable-plans/
### to get earlier comics
class BratHalla(_BasicScraper):
latestUrl = 'http://brat-halla.com/'
imageUrl = 'http://brat-halla.com/comic/%s'
imageSearch = compile(r"(/comics/.+?)' target='_blank")
prevSearch = compile(r'headernav2".+?"(http.+?)"')
help = 'Index format: non'
class Brink(_BasicScraper):
latestUrl = 'http://paperfangs.com/brink/'
imageUrl = 'http://paperfangs.com/brink/?p=%s'
imageSearch = compile(r'/(comics/.+?)"')
prevSearch = compile(r'previous.+?/brink/(.+?)".+?Previous')
help = 'Index format: non'
class BonoboConspiracy(_BasicScraper):
latestUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/'
imageUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/%s'
imageSearch = compile(r'<P.+?<IMG SRC="(.+?)" ALT')
prevSearch = compile(r'ansuz.+?/(\?i=.+?)".+?Previous')
help = 'Index format: nnn'
class BoredAndEvil(_BasicScraper):
latestUrl = 'http://www.boredandevil.com/'
imageUrl = 'http://www.boredandevil.com/archive.php?date=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
help = 'Index format: yyyy-mm-dd'
class BoyOnAStickAndSlither(_BasicScraper):
latestUrl = 'http://www.boasas.com/'
imageUrl = 'http://www.boasas.com/?c=%s'
imageSearch = compile(r'"(boasas/\d+\..+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="images/left_20.png"')
help = 'Index format: n (unpadded)'
class ButternutSquash(_BasicScraper):
latestUrl = 'http://www.butternutsquash.net/'
imageUrl = 'http://www.butternutsquash.net/v3/%s'
imageSearch = compile(r'<img src="(http://www.butternutsquash.net/v3/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.butternutsquash.net/v3/.+?)">(<span class="prev">&#9668;|&#8656; Previous</a>)')
help = 'Index format: yyyy/mm/dd/strip-name-author-name'
class Bhag(_BasicScraper):
latestUrl = 'http://bhag.sackofjustice.com/'
imageUrl = 'http://bhag.sackofjustice.com/daily.php?date='
imageSearch = compile(r'/(comics/.+?)">')
prevSearch = compile(r'first.+?/(daily.php\?date=.+?)".+?previous')
help = 'Index format: yymmdd'
def blankLabel(name, baseUrl):
return type('BlankLabel_%s' % name,
(_BasicScraper,),
dict(
name='BlankLabel/' + name,
latestUrl=baseUrl,
imageUrl='d/%s.html',
imageSearch=compile(r'"(/comic[s|/].+?)"'),
prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),
help='Index format: yyyymmdd')
)
checkerboardNightmare = blankLabel('CheckerboardNightmare', 'http://www.checkerboardnightmare.com/')
courtingDisaster = blankLabel('CourtingDisaster', 'http://www.courting-disaster.com/')
evilInc = blankLabel('EvilInc', 'http://www.evil-comic.com/')
greystoneInn = blankLabel('GreystoneInn', 'http://www.greystoneinn.net/')
itsWalky = blankLabel('ItsWalky', 'http://www.itswalky.com/')
# one strip name starts with %20
#krazyLarry = blankLabel('KrazyLarry', 'http://www.krazylarry.com/')
melonpool = blankLabel('Melonpool', 'http://www.melonpool.com/')
# strip names = index.php
#realLife = blankLabel('RealLife', 'http://www.reallifecomics.com/')
schlockMercenary = blankLabel('SchlockMercenary', 'http://www.schlockmercenary.com/')
# hosted on ComicsDotCom
#sheldon = blankLabel('Sheldon', 'http://www.sheldoncomics.com/')
shortpacked = blankLabel('Shortpacked', 'http://www.shortpacked.com/')
starslipCrisis = blankLabel('StarslipCrisis', 'http://www.starslipcrisis.com/')
uglyHill = blankLabel('UglyHill', 'http://www.uglyhill.com/')
class BeePower(_BasicScraper):
latestUrl = 'http://comicswithoutviolence.com/d/20080713.html'
imageUrl = 'http://comicswithoutviolence.com/d/%s.html'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
help = 'Index format: yyyy/mm/dd'
class Bellen(_BasicScraper):
latestUrl = 'http://boxbrown.com/'
imageUrl = 'http://boxbrown.com/?p=%s'
imageSearch = compile(r'<img src="(http://boxbrown.com/comics/[^"]+)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: nnn'
class BlankIt(_BasicScraper):
latestUrl = 'http://blankitcomics.com/'
imageUrl = 'http://blankitcomics.com/%s'
imageSearch = compile(r'<img src="(http://blankitcomics.com/bicomics/.+?)"')
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/name'
class BobWhite(_BasicScraper):
latestUrl = 'http://www.bobwhitecomics.com/'
imageUrl = 'http://www.bobwhitecomics.com/%s.shtml'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'"><a href="(.+?)"[^>]+?><img[^>]+?src="/images/prev.jpg">')
help = 'Index format: yyyymmdd'
class BigFatWhale(_BasicScraper):
latestUrl = 'http://www.bigfatwhale.com/'
imageUrl = 'http://www.bigfatwhale.com/archives/bfw_%s.htm'
imageSearch = compile(r'<img src="(archives/bfw_.+?|bfw_.+?)"')
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
help = 'Index format: nnn'
class BadassMuthas(_BasicScraper):
latestUrl = 'http://badassmuthas.com/pages/comic.php'
imageUrl = 'http://badassmuthas.com/pages/comic.php?%s'
imageSearch = compile(r'<img src="(/images/comicsissue.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="/images/comicsbuttonBack.gif" ')
help = 'Index format: nnn'
class Boozeathon4Billion(_BasicScraper):
latestUrl = 'http://boozeathon4billion.com/'
imageUrl = 'http://boozeathon4billion.com/comics/%s'
imageSearch = compile(r'<img src="(http://boozeathon4billion.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: (sometimes chapternumber/)-yyyy-mm-dd/stripname'
class BrightlyWound(_BasicScraper):
latestUrl = 'http://www.brightlywound.com/'
imageUrl = 'http://www.brightlywound.com/?comic=%s'
imageSearch = compile(r'<img src=\'(comic/.+?)\'')
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn'
class BlueCrashKit(_BasicScraper):
latestUrl = 'http://robhamm.com/bluecrashkit'
imageUrl = 'http://robhamm.com/comics/blue-crash-kit/%s'
imageSearch = compile(r'src="(http://robhamm.com/sites/default/files/comics/.+?)"')
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
help = 'Index format: yyyy-mm-dd'
class BloodBound(_BasicScraper):
latestUrl = 'http://www.bloodboundcomic.com/'
imageUrl = 'http://www.bloodboundcomic.com/d/%s.html'
imageSearch = compile(r' src="(/comics/.+?)"')
prevSearch = compile(r' <a href="(/d/.+?)"><img[^>]+?src="/images/previous_day.jpg"')
help = 'Index format: yyyymmdd'
class BookOfBiff(_BasicScraper):
latestUrl = 'http://www.thebookofbiff.com/'
imageUrl = 'http://www.thebookofbiff.com/%s'
imageSearch = compile(r'<img src="(http://www.thebookofbiff.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.thebookofbiff.com/.+?)">&#9668; Previous</a>')
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
class BillyTheDunce(_BasicScraper):
latestUrl = 'http://www.duncepress.com/'
imageUrl = 'http://www.duncepress.com/%s/'
imageSearch = compile(r'<img src="(http://www.duncepress.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/strip-name'
class BackwaterPlanet(_BasicScraper):
latestUrl = 'http://www.backwaterplanet.com/current.htm'
imageUrl = 'http://www.backwaterplanet.com/archive/bwp%s.htm'
imageSearch = compile(r'<img src="(/images/comic/bwp.+?)">')
prevSearch = compile(r'<a href="(/archive/bwp.+?)"><img src="(images/Previous.jpg|/images/Previous.jpg)"')
help = 'Index format: yymmdd'
class Baroquen(_BasicScraper):
latestUrl = 'http://www.baroquencomics.com/'
imageUrl = 'http://www.baroquencomics.com/2010/01/04/the-man-from-omi/'
imageSearch = compile(r'<img src="(http://www.baroquencomics.com/Comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.baroquencomics.com/.+?)" rel="prev">')
help = 'Index format: yyyy/mm/dd/strip-name'
class BetweenFailures(_BasicScraper):
latestUrl = 'http://betweenfailures.com/'
imageUrl = 'http://betweenfailures.com/%s'
imageSearch = compile(r'<img src=\'(http://betweenfailures.com/comics/.+?)\'>')
prevSearch = compile(r'<a href="(http://betweenfailures.com/.+?)">&laquo; Previous</a>')
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
class BillyTheBeaker(_BasicScraper):
latestUrl = 'http://billy.defectivejunk.com/'
imageUrl = 'http://billy.defectivejunk.com/index.php?strip=%s'
imageSearch = compile(r'<img src="(bub\d+_\d+.+?)"')
prevSearch = compile(r' <a href="(index.php\?strip\=.+?)" title="Previous strip">')
help = 'Index format: nnn'

495
dosagelib/plugins/c.py Normal file
View file

@ -0,0 +1,495 @@
from re import compile
from ..helpers import (
_BasicScraper, constStarter, bounceStarter, indirectStarter)
from ..util import getQueryParams
class CalvinAndHobbes(_BasicScraper):
latestUrl = 'http://www.gocomics.com/calvinandhobbes/'
imageUrl = 'http://www.gocomics.com/calvinandhobbes/%s'
imageSearch = compile(r'src="(http://picayune\.uclick\.com/comics/ch/[^"]+\.gif)"')
prevSearch = compile(r'href="(.*?)"\s+onclick="[^"]*">Previous day</a>')
help = 'Index format: yyyy/mm/dd'
class CandyCartoon(_BasicScraper):
latestUrl = 'http://www.candycartoon.com/'
imageUrl = 'http://www.candycartoon.com/archives/%s.html'
imageSearch = compile(r'<img alt="[^"]*" src="(http://www\.candycartoon\.com/archives/[^"]+)"')
prevSearch = compile(r'<a href="(http://www\.candycartoon\.com/archives/\d{6}\.html)">prev')
help = 'Index format: nnnnnn'
class CaptainSNES(_BasicScraper):
latestUrl = 'http://captainsnes.com/'
imageUrl = 'http://captainsnes.com/?date=%s'
imageSearch = compile(r'<img src=\'(http://www.captainsnes.com/comics/.+?)\'')
prevSearch = compile(r'<a href="http://www.captainsnes.com/(.+?)"><span class="prev">')
help = 'Index format: yyyymmdd'
class CaribbeanBlue(_BasicScraper):
latestUrl = 'http://cblue.katbox.net/'
imageUrl = 'http://cblue.katbox.net/index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="images/navigation_back.png"')
help = 'Index format: n (unpadded)'
class Catena(_BasicScraper):
latestUrl = 'http://catenamanor.com/'
imageUrl = 'http://catenamanor.com/index.php?comic=%s'
imageSearch = compile(r'(comics/catena/.+?)"')
prevSearch = compile(r'First</a>.+?"(.+?)".+?Previous')
help = 'Index format: n (unpadded)'
class Catharsis(_BasicScraper):
latestUrl = 'http://catharsiscomic.com/'
imageUrl = 'http://catharsiscomic.com/archive.php?strip=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+"Previous')
help = 'Index format: yymmdd-<your guess>.html'
class ChasingTheSunset(_BasicScraper):
latestUrl = 'http://www.fantasycomic.com/'
imageUrl = 'http://www.fantasycomic.com/index.php?p=c%s'
imageSearch = compile(r'(/cmsimg/.+?)".+?comic-img')
prevSearch = compile(r'<a href="(.+?)" title="" ><img src="(images/eye-prev.png|images/cn-prev.png)"')
help = 'Index format: n'
class Chisuji(_BasicScraper):
latestUrl = 'http://www.chisuji.com/'
imageUrl = 'http://www.chisuji.com/%s'
imageSearch = compile(r'<img src="(http://www.chisuji.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.chisuji.com/.+?)">')
help = 'Index format: yyyy/mm/dd/strip-name'
class ChugworthAcademy(_BasicScraper):
latestUrl = 'http://chugworth.com/'
imageUrl = 'http://chugworth.com/?p=%s'
imageSearch = compile(r'<img src="(.+?)" alt="Comic')
prevSearch = compile(r'<a href="(http://chugworth.com/\?p=\d{1,4})"[^>]+?title="Previous">')
help = 'Index format: n (unpadded)'
class ChugworthAcademyArchive(_BasicScraper):
latestUrl = 'http://chugworth.com/archive/?strip_id=422'
imageUrl = 'http://chugworth.com/archive/?strip_id=%s'
imageSearch = compile(r'<img src=(comics/\d+.+?.\w{1,4})')
prevSearch = compile(r'<a href=\'(.+?)\'><img src=\'images/previous.gif')
help = 'Index format: nnn'
class CigarroAndCerveja(_BasicScraper):
latestUrl = 'http://www.cigarro.ca/'
imageUrl = 'http://www.cigarro.ca/?p=%s'
imageSearch = compile(r"(/comics/.+?)'")
prevSearch = compile(r'(/\?p=.+?)">&laq')
help = 'Index format: non'
class CombustibleOrange(_BasicScraper):
latestUrl = 'http://www.combustibleorange.com/'
imageUrl = 'http://www.combustibleorange.com/index.php?current=%s'
imageSearch = compile(r'<img src="(/images/comics/\d+?\.gif)"')
prevSearch = compile(r'><a href="(.+?)"><img src="images/button-last.gif" border="0">')
help = 'Index format: n (unpadded)'
class Comedity(_BasicScraper):
latestUrl = 'http://www.comedity.com/'
imageUrl = 'http://www.comedity.com/index.php?strip_id=%s'
imageSearch = compile(r'<img src="(Comedity_files/.+?)"')
prevSearch = compile(r'<a href="(/?index.php\?strip_id=\d+?)"> *<img alt=\"Prior Strip')
help = 'Index format: n (no padding)'
class Comet7(_BasicScraper):
latestUrl = 'http://www.comet7.com/'
imageUrl = 'http://www.comet7.com/archive_page.php?id=%s'
imageSearch = compile(r'"(.*?/strips/.*?)"')
prevSearch = compile(r'"(.*?)".*?previous_stripf')
help = 'Index format: n (unpadded)'
class Commissioned(_BasicScraper):
latestUrl = 'http://www.commissionedcomic.com/'
imageUrl = 'http://www.commissionedcomic.com/index.php?strip=%s'
imageSearch = compile(r'<img src="(http://www.commissionedcomic.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: n'
class CoolCatStudio(_BasicScraper):
latestUrl = 'http://www.coolcatstudio.com/'
imageUrl = 'http://www.coolcatstudio.com/index.php?p=%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r"href='(.+?)'>PREV")
help = 'Index format: n'
class CourtingDisaster(_BasicScraper):
latestUrl = 'http://www.courting-disaster.com/'
imageUrl = 'http://www.courting-disaster.com/archive/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="/images/previous.gif"[^>]+?>')
help = 'Index format: yyyymmdd'
class CrapIDrewOnMyLunchBreak(_BasicScraper):
latestUrl = 'http://crap.jinwicked.com/'
imageUrl = 'http://crap.jinwicked.com/%s'
imageSearch = compile(r'<img src="(http://crap.jinwicked.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="http://comics.jinwicked.com/images/navigation_back.png"')
help = 'Index format: yyyy/mm/dd/name'
class CtrlAltDel(_BasicScraper):
latestUrl = 'http://www.cad-comic.com/cad/'
imageSearch = compile(r'<img src="(/comics/\w+/\d{8}\..+?)"')
prevSearch = compile(r'<a href="(/\w+/\d{8})" class="nav-back')
help = 'Index format: yyyymmdd'
@property
def imageUrl(self):
return self.latestUrl + '%s'
class CtrlAltDelSillies(CtrlAltDel):
name = 'CtrlAltDel/Sillies'
latestUrl = 'http://www.cad-comic.com/sillies/'
class Curvy(_BasicScraper):
latestUrl = 'http://www.c.urvy.org/'
imageUrl = 'http://www.c.urvy.org/?date=%s'
imageSearch = compile(r'(/c/.+?)"')
prevSearch = compile(r'(/\?date=.+?)">&lt;&lt; Previous page')
help = 'Index format: yyyymmdd'
def cloneManga(name, shortName, lastStrip=None):
baseUrl = 'http://manga.clone-army.org/%s.php' % (shortName,)
imageUrl = baseUrl + '?page=%s'
if lastStrip is None:
starter = bounceStarter(baseUrl, compile(r'<a href="([^"]+)"><img src="next\.gif"'))
else:
starter = constStarter(imageUrl % (lastStrip,))
def namer(self, imageUrl, pageUrl):
return '%03d' % (int(getQueryParams(pageUrl)['page'][0]),)
return type('CloneManga_%s' % name,
(_BasicScraper,),
dict(
name='CloneManga/' + name,
starter=starter,
imageUrl=imageUrl,
imageSearch=compile(r'<img src="(http://manga\.clone-army\.org/[^"]+)"'),
prevSearch=compile(r'<a href="([^"]+)"><img src="previous\.gif"'),
help='Index format: n',
namer=namer)
)
anm = cloneManga('AprilAndMay', 'anm')
kanami = cloneManga('Kanami', 'kanami')
momoka = cloneManga('MomokaCorner', 'momoka')
nana = cloneManga('NanasEverydayLife', 'nana', '78')
pxi = cloneManga('PaperEleven', 'pxi', '311')
t42r = cloneManga('Tomoyo42sRoom', 't42r')
penny = cloneManga('PennyTribute', 'penny')
class CatAndGirl(_BasicScraper):
latestUrl = 'http://catandgirl.com/'
imageUrl = 'http://catandgirl.com/?p=%s'
imageSearch = compile(r'<img src="(http://catandgirl.com/archive/.+?)"')
prevSearch = compile(r'\s+<a href="(.+?)">&#9668; Previous</a>')
help = 'Index format: n (unpadded)'
def comicsDotCom(name, section):
baseUrl = 'http://www.comics.com/%s/%s/archive/' % (section, name)
@classmethod
def namer(cls, imageUrl, pageUrl):
htmlname = pageUrl.split('/')[-1]
filename = htmlname.split('.')[0]
return filename
return type('ComicsDotCom_%s' % name,
(_BasicScraper,),
dict(
name='ComicsDotCom/' + name,
starter=indirectStarter(baseUrl, compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_right.gif|(?:<font[^>]*?>)?Next Day)')),
imageUrl=baseUrl + 'name-%s.html',
imageSearch=compile(r'SRC="(/[\w/]+?/archive/images/\w+?\d+\..+?)"'),
prevSearch=compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_left.gif|(?:<font[^>]*?>)?Previous Day)'),
help='Index format: yyyymmdd',
namer=namer)
)
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
agnes = comicsDotCom('agnes', 'creators')
alleyoop = comicsDotCom('alleyoop', 'comics')
andycapp = comicsDotCom('andycapp', 'creators')
arlonjanis = comicsDotCom('arlonjanis', 'comics')
ballardst = comicsDotCom('ballardst', 'creators')
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
bc = comicsDotCom('bc', 'creators')
ben = comicsDotCom('ben', 'comics')
betty = comicsDotCom('betty', 'comics')
bignate = comicsDotCom('bignate', 'comics')
bonanas = comicsDotCom('bonanas', 'wash')
bornloser = comicsDotCom('bornloser', 'comics')
buckets = comicsDotCom('buckets', 'comics')
candorville = comicsDotCom('candorville', 'wash')
cheapthrills = comicsDotCom('cheapthrills', 'wash')
chickweed = comicsDotCom('chickweed', 'comics')
committed = comicsDotCom('committed', 'comics')
dilbert = comicsDotCom('dilbert', 'comics')
drabble = comicsDotCom('drabble', 'comics')
fatcats = comicsDotCom('fatcats', 'comics')
ferdnand = comicsDotCom('ferdnand', 'comics')
flightdeck = comicsDotCom('flightdeck', 'creators')
floandfriends = comicsDotCom('floandfriends', 'creators')
franknernest = comicsDotCom('franknernest', 'comics')
frazz = comicsDotCom('frazz', 'comics')
geech = comicsDotCom('geech', 'comics')
genepool = comicsDotCom('genepool', 'wash')
getfuzzy = comicsDotCom('getfuzzy', 'comics')
gofish = comicsDotCom('gofish', 'comics')
graffiti = comicsDotCom('graffiti', 'comics')
grandave = comicsDotCom('grandave', 'comics')
grizzwells = comicsDotCom('grizzwells', 'comics')
heathcliff = comicsDotCom('heathcliff', 'creators')
hedge = comicsDotCom('hedge', 'comics')
herbnjamaal = comicsDotCom('herbnjamaal', 'creators')
herman = comicsDotCom('herman', 'comics')
humblestumble = comicsDotCom('humblestumble', 'comics')
janesworld = comicsDotCom('janesworld', 'comics')
jumpstart = comicsDotCom('jumpstart', 'comics')
kitncarlyle = comicsDotCom('kitncarlyle', 'comics')
liberty = comicsDotCom('liberty', 'creators')
lilabner = comicsDotCom('lilabner', 'comics')
luann = comicsDotCom('luann', 'comics')
marmaduke = comicsDotCom('marmaduke', 'comics')
meg = comicsDotCom('meg', 'comics')
moderatelyconfused = comicsDotCom('moderatelyconfused', 'comics')
momma = comicsDotCom('momma', 'creators')
monty = comicsDotCom('monty', 'comics')
motley = comicsDotCom('motley', 'comics')
nancy = comicsDotCom('nancy', 'comics')
naturalselection = comicsDotCom('naturalselection', 'creators')
offthemark = comicsDotCom('offthemark', 'comics')
onebighappy = comicsDotCom('onebighappy', 'creators')
othercoast = comicsDotCom('othercoast', 'creators')
pcnpixel = comicsDotCom('pcnpixel', 'wash')
peanuts = comicsDotCom('peanuts', 'comics')
pearls = comicsDotCom('pearls', 'comics')
pibgorn = comicsDotCom('pibgorn', 'comics')
pickles = comicsDotCom('pickles', 'wash')
raisingduncan = comicsDotCom('raisingduncan', 'comics')
reality = comicsDotCom('reality', 'comics')
redandrover = comicsDotCom('redandrover', 'wash')
ripleys = comicsDotCom('ripleys', 'comics')
roseisrose = comicsDotCom('roseisrose', 'comics')
rubes = comicsDotCom('rubes', 'creators')
rudypark = comicsDotCom('rudypark', 'comics')
shirleynson = comicsDotCom('shirleynson', 'comics')
soup2nutz = comicsDotCom('soup2nutz', 'comics')
speedbump = comicsDotCom('speedbump', 'creators')
spotthefrog = comicsDotCom('spotthefrog', 'comics')
strangebrew = comicsDotCom('strangebrew', 'creators')
sunshineclub = comicsDotCom('sunshineclub', 'comics')
tarzan = comicsDotCom('tarzan', 'comics')
thatslife = comicsDotCom('thatslife', 'wash')
wizardofid = comicsDotCom('wizardofid', 'creators')
workingdaze = comicsDotCom('workingdaze', 'comics')
workingitout = comicsDotCom('workingitout', 'creators')
def creators(name, shortname):
return type('Creators_%s' % name,
(_BasicScraper,),
dict(
name='Creators/' + name,
latestUrl='http://www.creators.com/comics_show.cfm?ComicName=%s' % (shortname,),
imageUrl=None,
imageSearch=compile(r'<img alt="[^"]+" src="(\d{4}/.+?/.+?\..+?)">'),
prevSearch=compile(r'<a href="(comics_show\.cfm\?next=\d+&ComicName=.+?)" Title="Previous Comic"'),
help='Indexing unsupported')
)
arc = creators('Archie', 'arc')
shg = creators('AskShagg', 'shg')
hev = creators('ForHeavensSake', 'hev')
rug = creators('Rugrats', 'rug')
sou = creators('StateOfTheUnion', 'sou')
din = creators('TheDinetteSet', 'din')
lil = creators('TheMeaningOfLila', 'lil')
wee = creators('WeePals', 'wee')
zhi = creators('ZackHill', 'zhi')
class CyanideAndHappiness(_BasicScraper):
latestUrl = 'http://www.explosm.net/comics'
imageUrl = 'http://www.explosm.net/comics/%s'
imageSearch = compile(r'<img alt="Cyanide and Happiness, a daily webcomic" src="(http:\/\/www\.explosm\.net/db/files/Comics/\w+/\S+\.\w+)"')
prevSearch = compile(r'<a href="(/comics/\d+/?)">< Previous</a>')
help = 'Index format: n (unpadded)'
class CrimsonDark(_BasicScraper):
latestUrl = 'http://www.davidcsimon.com/crimsondark/'
imageUrl = 'http://www.davidcsimon.com/crimsondark/index.php?view=comic&strip_id=%s'
imageSearch = compile(r'src="(.+?strips/.+?)"')
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&amp;strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
help = 'Index format: n (unpadded)'
class CrimesOfCybeleCity(_BasicScraper):
latestUrl = 'http://www.pulledpunches.com/crimes/'
imageUrl = 'http://www.beaglespace.com/pulledpunches/crimes/?p=%s'
imageSearch = compile(r'<img src="(http://www\.beaglespace\.com/pulledpunches/crimes/comics/[^"]+)"')
prevSearch = compile(r'<a href="(http://www\.beaglespace\.com/pulledpunches/crimes/\?p=\d+)"><img src="back1\.gif"')
help = 'Index format: nn'
class CatsAndCameras(_BasicScraper):
latestUrl = 'http://catsncameras.com/cnc/'
imageUrl = 'hhttp://catsncameras.com/cnc/?p=%s'
imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">')
help = 'Index format: nnn'
class CowboyJedi(_BasicScraper):
latestUrl = 'http://www.cowboyjedi.com/'
imageUrl = 'http://www.cowboyjedi.com/%s'
imageSearch = compile(r'<img src="(http://www.cowboyjedi.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.cowboyjedi.com/.+?)" class="navi navi-prev"')
help = 'Index format: yyyy/mm/dd/strip-name'
class CasuallyKayla(_BasicScraper):
latestUrl = 'http://casuallykayla.com/'
imageUrl = 'http://casuallykayla.com/?p=%s'
imageSearch = compile(r'<img src="(http://casuallykayla.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'
class Collar6(_BasicScraper):
latestUrl = 'http://collar6.com/'
imageUrl = 'http://collar6.com/%s'
imageSearch = compile(r'src="(http://collar6.com/comics/.+?)"')
prevSearch = compile(r' href="(http://collar6.com/\d+/\S+)">&#9668; Previous')
help = 'Index format: yyyy/namednumber'
class Chester5000XYV(_BasicScraper):
latestUrl = 'http://jessfink.com/Chester5000XYV/'
imageUrl = 'http://jessfink.com/Chester5000XYV/?p=%s'
imageSearch = compile(r'<img src="(http://jessfink.com/Chester5000XYV/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: nnn'
class CalamitiesOfNature(_BasicScraper):
latestUrl = 'http://www.calamitiesofnature.com/'
imageUrl = 'http://www.calamitiesofnature.com/archive/?c=%s'
imageSearch = compile(r'<IMG SRC="(archive/\d+.+?|http://www.calamitiesofnature.com/archive/\d+.+?)"')
prevSearch = compile(r'<a id="previous" href="(http://www.calamitiesofnature.com/archive/\?c\=\d+)">')
help = 'Index format: nnn'
class Champ2010(_BasicScraper):
latestUrl = 'http://www.jedcollins.com/champ2010/'
imageUrl = 'http://jedcollins.com/champ2010/?p=%s'
imageSearch = compile(r'<img src="(http://jedcollins.com/champ2010/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://jedcollins.com/champ2010/.+?)"')
help = 'Index format: nnn'
class Chucklebrain(_BasicScraper):
latestUrl = 'http://www.chucklebrain.com/main.php'
imageUrl = 'http://www.chucklebrain.com/main.php?img=%s'
imageSearch = compile(r'<img src="(/images/strip.+?)"')
prevSearch = compile(r'<a href=\'(/main.php\?img\=\d+)\'><img src=\'/images/previous.jpg\'')
help = 'Index format: nnn'
class CompanyY(_BasicScraper):
latestUrl = 'http://company-y.com/'
imageUrl = 'http://company-y.com/%s/'
imageSearch = compile(r'<img src="(http://company-y.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://company-y.com/.+?)"')
help = 'Index format: yyyy/mm/dd/strip-name'
class CorydonCafe(_BasicScraper):
starter = bounceStarter('http://corydoncafe.com/', compile(r' href="(\./comic-\d+.html)">Next&gt;</a>'))
imageUrl = 'http://corydoncafe.com/comic-%s.html'
imageSearch = compile(r'<img src=\'(\./comics/.+?)\' ')
prevSearch = compile(r' href="(\./comic-\d+.html)">&lt;Previous</a>')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
class CraftedFables(_BasicScraper):
latestUrl = 'http://www.craftedfables.com/'
imageUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s'
imageSearch = compile(r'<img src="(http://www.caf-fiends.net/craftedfables/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.caf-fiends.net/craftedfables/.+?)"><span class="prev">')
help = 'Index format: nnn'
class Currhue(_BasicScraper):
latestUrl = 'http://www.currhue.com/'
imageUrl = 'http://www.currhue.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.currhue.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.currhue.com/.+?)"')
help = 'Index format: nnn'

182
dosagelib/plugins/d.py Normal file
View file

@ -0,0 +1,182 @@
from re import compile, IGNORECASE, MULTILINE
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
from ..util import getQueryParams
class DMFA(_BasicScraper):
latestUrl = 'http://www.missmab.com/'
imageUrl = 'http://missmab.com/Comics/Vol_%s.php'
imageSearch = compile(r'<IMG SRC="(Comics/.+?|Vol.+?)">', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="(Images/comicprev.gif|../Images/comicprev.gif)" ', MULTILINE | IGNORECASE)
help = 'Index format: nnn (normally, some specials)'
class DandyAndCompany(_BasicScraper):
latestUrl = 'http://www.dandyandcompany.com/'
imageUrl = 'http://www.dandyandcompany.com/%s'
imageSearch = compile(r'<img src="(.*?/strips/.+?)"')
prevSearch = compile(r'<a href="(.*)" class="prev"')
help = 'Index format: yyyy/mm/dd'
class DarkWings(_BasicScraper):
latestUrl = 'http://www.flowerlarkstudios.com/dark-wings/'
imageUrl = 'http://www.flowerlarkstudios.com/dark-wings/archive.php?day=%s'
imageSearch = compile(r'(comics/.+?)" W')
prevSearch = compile(r"first_day.+?/(archive.+?)'.+?previous_day")
help = 'Index format: yyyymmdd'
class DeathToTheExtremist(_BasicScraper):
latestUrl = 'http://www.dtecomic.com/'
imageUrl = 'http://www.dtecomic.com/?n=%s'
imageSearch = compile(r'"(comics/.*?)"')
prevSearch = compile(r'</a> <a href="(\?n=.*?)"><.+?/aprev.gif"')
help = 'Index format: nnn'
class DeepFried(_BasicScraper):
latestUrl = 'http://www.whatisdeepfried.com/'
imageUrl = 'http://www.whatisdeepfried.com/%s'
imageSearch = compile(r'(http://www.whatisdeepfried.com/comics/.+?)"')
prevSearch = compile(r'"(http://www.whatisdeepfried.com/.+?)"><span class="prev">')
help = 'Index format: non'
class DoemainOfOurOwn(_BasicScraper):
latestUrl = 'http://www.doemain.com/'
imageUrl = 'http://www.doemain.com/index.cgi/%s'
imageSearch = compile(r"<img border='0' width='\d+' height='\d+' src='(/strips/\d{4}/\d{6}-[^\']+)'")
prevSearch = compile(r'<a href="(/index\.cgi/\d{4}-\d{2}-\d{2})"><img width="\d+" height="\d+" border="\d+" alt="Previous Strip"')
help = 'Index format: yyyy-mm-dd'
class DrFun(_BasicScraper):
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
imageUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d{6}/df.+?)">')
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
help = 'Index format: nnnnn'
class Dracula(_BasicScraper):
latestUrl = 'http://draculacomic.net/'
imageUrl = 'http://draculacomic.net/comic.php?comicID=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'&nbsp;<a class="archivelink" href="(.+?)">&laquo; Prev</a>')
help = 'Index format: nnn'
class DragonTails(_BasicScraper):
latestUrl = 'http://www.dragon-tails.com/'
imageUrl = 'http://www.dragon-tails.com/archive.php?date=%s'
imageSearch = compile(r'"(newcomic/.+?)"')
prevSearch = compile(r'"(archive.+?)">.+n_2')
help = 'Index format: yyyy-mm-dd'
class DreamKeepersPrelude(_BasicScraper):
latestUrl = 'http://www.dreamkeeperscomic.com/Prelude.php'
imageUrl = 'http://www.dreamkeeperscomic.com/Prelude.php?pg=%s'
imageSearch = compile(r'(images/PreludeNew/.+?)"')
prevSearch = compile(r'(Prelude.php\?pg=.+?)"')
help = 'Index format: n'
class Drowtales(_BasicScraper):
latestUrl = 'http://www.drowtales.com/mainarchive.php'
imageUrl = 'http://www.drowtales.com/mainarchive.php?location=%s'
imageSearch = compile(r'src=".(/tmpmanga/.+?)"')
prevSearch = compile(r'<a href="mainarchive.php(\?location=\d+)"><img src="[^"]*previousday\.gif"')
help = 'Index format: yyyymmdd'
class DungeonCrawlInc(_BasicScraper):
latestUrl = 'http://www.dungeoncrawlinc.com/latest.html'
imageUrl = 'http://www.dungeoncrawlinc.com/comic%s'
imageSearch = compile(r'src="(.+?/DCI_.+?)"')
prevSearch = compile(r'<a href="(.+?)">.+?back')
help = 'Index format: nnn.html'
class DieselSweeties(_BasicScraper):
latestUrl = 'http://www.dieselsweeties.com/'
imageUrl = 'http://www.dieselsweeties.com/archive/%s'
imageSearch = compile(r'src="(/hstrips/.+?)"')
prevSearch = compile(r'href="(/archive/.+?)">(<img src="http://www.dieselsweeties.com/ximages/blackbackarrow160.png|previous webcomic)')
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(imageUrl.split('/')[-1].split('.')[0])
return 'sw%02d' % (index,)
class DominicDeegan(_BasicScraper):
latestUrl = 'http://www.dominic-deegan.com/'
imageUrl = 'http://www.dominic-deegan.com/view.php?date=%s'
imageSearch = compile(r'<img src="(.+?save-as=.+?)" alt')
prevSearch = compile(r'"(view.php\?date=.+?)".+?prev21')
help = 'Index format: yyyy-mm-dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
class DorkTower(_BasicScraper):
latestUrl = 'http://www.dorktower.com/'
imageUrl = None
imageSearch = compile(r'<img src="(http://www\.dorktower\.com/images/comics/[^"]+)"')
prevSearch = compile(r'<a href="(/previous\.php\?[^"]+)"')
help = 'Index format: None'
class DresdenCodak(_BasicScraper):
latestUrl = 'http://dresdencodak.com/'
imageUrl = None
imageSearch = compile(r'<img src="http://dresdencodak.com(/comics/.*?\.jpg)"')
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
class DonkBirds(_BasicScraper):
latestUrl = 'http://www.donkbirds.com/'
imageUrl = 'http://www.donkbirds.com/index.php?date=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
help = 'Index format: yyyy-mm-dd'
class DrawnByDrunks(_BasicScraper):
starter = bounceStarter('http://www.drawnbydrunks.co.uk/', compile(r'<div class="nav-last"><a href="(.+?)">'))
imageUrl = 'http://www.drawnbydrunks.co.uk/?p=%s'
imageSearch = compile(r'<img src="(http://www.drawnbydrunks.co.uk/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('=')[-1]
class DeathCord(_BasicScraper):
latestUrl = 'http://deathchord.com/index.php'
imageUrl = 'http://deathchord.com/__.php?comicID=%s'
imageSearch = compile(r'<img src="(http://deathchord.com/kill/\d+.+?)"')
prevSearch = compile(r'</a>?.+?<a href="(http://deathchord.com/.+?)"><img[^>]+?alt="Previous" />')
help = 'Index format: nnn'

File diff suppressed because it is too large Load diff

182
dosagelib/plugins/e.py Normal file
View file

@ -0,0 +1,182 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, indirectStarter
class EerieCuties(_BasicScraper):
latestUrl = 'http://www.eeriecuties.com/'
imageUrl = 'http://www.eeriecuties.com/d/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'(/d/.+?.html).+?/previous_day.gif')
help = 'Index format: yyyymmdd'
class EdgeTheDevilhunter(_BasicScraper):
name = 'KeenSpot/EdgeTheDevilhunter'
latestUrl = 'http://www.edgethedevilhunter.com/'
imageUrl = 'http://www.edgethedevilhunter.com/comics/%s'
imageSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)" alt')
prevSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)"><span class="prev')
help = 'Index format: mmddyyyy or name'
class Eriadan(_BasicScraper):
imageUrl = 'http://www.shockdom.com/eriadan/?p=%s'
imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
prevSearch = compile(r"<link rel='prev' title='.+?' href='http://www\.shockdom\.com/eriadan/(\?p=.+?)'")
starter = indirectStarter('http://www.shockdom.com/eriadan/', compile(r'<ul class="latest2">[^<]+?<li class="list-title"><a href="(http://www\.shockdom.com/eriadan/\?p=.+?)"'))
help = 'Index format: nnn (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return '%d' % (int(compile(r'p=(\d+)').search(pageUrl).group(1)))
class ElGoonishShive(_BasicScraper):
name = 'KeenSpot/ElGoonishShive'
latestUrl = 'http://www.egscomics.com/'
imageUrl = 'http://www.egscomics.com/?date=%s'
imageSearch = compile(r"'(comics/.+?)'")
prevSearch = compile(r"<a href='(/\?date=.+?)'.+?arrow_prev.gif")
help = 'Index format: yyyy-mm-dd'
class ElGoonishShiveNP(_BasicScraper):
name = 'KeenSpot/ElGoonishShiveNP'
latestUrl = 'http://www.egscomics.com/egsnp/'
imageUrl = 'http://www.egscomics.com/egsnp/?date=%s'
imageSearch = compile(r'<div class=\'comic2\'><img src=\'(comics/\d{4}/\d{2}.+?)\'')
prevSearch = compile(r'<a href=\'(.+?)\'[^>]+?onmouseover=\'\$\("navimg(6|2)"\)')
help = 'Index format: yyyy-mm-dd'
class ElsieHooper(_BasicScraper):
latestUrl = 'http://www.elsiehooper.com/todaysserial.htm'
imageUrl = 'http://www.elsiehooper.com/comics/comic%s.htm'
imageSearch = compile(r'<img src="(/comics_/.+?)">')
prevSearch = compile(r'<A href="(.+?)"><IMG (height=27 src="/images/previous.gif"|src="/images/previous.gif")', IGNORECASE)
help = 'Index format: nnn'
class EmergencyExit(_BasicScraper):
latestUrl = 'http://www.eecomics.net/'
imageUrl = ''
imageSearch = compile(r'"(comics/.+?)"')
prevSearch = compile(r'START.+?"(.+?)"')
help = 'God help us now!'
class ErrantStory(_BasicScraper):
latestUrl = 'http://www.errantstory.com/'
imageUrl = 'http://www.errantstory.com/archive.php?date=%s'
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
prevSearch = compile(r'><a href="(.+?)">&lt;Previous</a>')
help = 'Index format: yyyy-mm-dd'
class EternalVenture(_BasicScraper):
latestUrl = 'http://www.pulledpunches.com/venture/'
imageUrl = 'http://www.beaglespace.com/pulledpunches/venture/?p=%s'
imageSearch = compile(r'<img src="(http://www.beaglespace.com/pulledpunches/venture/comics/.+?)"')
prevSearch = compile(r'id="prev"><a href="(http://www.beaglespace.com/pulledpunches/venture/.+?)" ')
help = 'Index format: nn'
class Evercrest(_BasicScraper):
latestUrl = 'http://www.evercrest.com/archives/20030308'
imageUrl = 'http://www.evercrest.com/archives/%s'
imageSearch = compile(r'<img.+?src="([^"]*/(images/oldstrips|archives/i)/[^"]*)"')
prevSearch = compile(r'<a.+?href="(http://www.evercrest.com/archives/\d+)">&lt; Previous')
help = 'Index format: yyyymmdd'
class EverybodyLovesEricRaymond(_BasicScraper):
latestUrl = 'http://geekz.co.uk/lovesraymond/'
imageUrl = 'http://geekz.co.uk/lovesraymond/archive/%s'
imageSearch = compile(r'<img src="((?:http://geekz.co.uk)?/lovesraymond/wp-content(?:/images)/ep\d+\w?\.jpg)"', IGNORECASE)
prevSearch = compile(r'&laquo; <a href="(http://geekz.co.uk/lovesraymond/archive/[^/"]*)">')
help = 'Index format: name-of-old-comic'
class EvilDiva(_BasicScraper):
latestUrl = 'http://www.evildivacomics.com/'
imageUrl = 'http://www.evildivacomics.com/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'http.+?com/(.+?)".+?"prev')
help = 'Index format: cpn (unpadded)'
class Exiern(_BasicScraper):
latestUrl = 'http://www.exiern.com/'
imageUrl = 'http://www.exiern.com/comic/%s'
imageSearch = compile(r'<img src="(http://www.exiern.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.exiern.com/.+?)" class="navi navi-prev"')
help = 'Index format: ChapterName-StripName'
class ExiernDarkReflections(_BasicScraper):
latestUrl = 'http://darkreflections.exiern.com/'
imageUrl = 'http://darkreflections.exiern.com/index.php?strip_id=%s'
imageSearch = compile(r'"(istrip.+?)"')
prevSearch = compile(r'First.+?(/index.+?)".+?prev')
help = 'Index format: n'
class ExtraLife(_BasicScraper):
latestUrl = 'http://www.myextralife.com/'
imageUrl = 'http://www.myextralife.com/comic/%s/'
imageSearch = compile(r'<img src="(http://www.myextralife.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.myextralife.com/comic/.+?)"')
help = 'Index format: mmddyyyy'
class EyeOfRamalach(_BasicScraper):
latestUrl = 'http://theeye.katbox.net/'
imageUrl = 'http://theeye.katbox.net/index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
help = 'Index format: n (unpadded)'
class EarthsongSaga(_BasicScraper):
latestUrl = 'http://www.earthsongsaga.com/'
imageUrl = None
imageSearch = compile(r'<img src="((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)"')
prevSearch = compile(r'<a href="([^"]+\.html)"[^>]*><img src="(?:(?:\.\.)?/)?images/testing/prev')
starter = indirectStarter('http://www.earthsongsaga.com/',
compile(r'a href="(.+?)".+?current-page.jpg'))
@classmethod
def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl)
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
class ExploitationNow(_BasicScraper):
latestUrl = 'http://exploitationnow.com/'
imageUrl = 'http://exploitationnow.com/comic.php?date=%s'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r' <a href="(.+?)" title="\[Back\]">')
help = 'Index format: yyyy-mm-dd'
class Ellerbisms(_BasicScraper):
latestUrl = 'http://www.ellerbisms.com/'
imageUrl = 'http://www.ellerbisms.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.ellerbisms.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.ellerbisms.com/.+?)"><span class="prev">')
help = 'Index format: nnn'

145
dosagelib/plugins/f.py Normal file
View file

@ -0,0 +1,145 @@
from re import compile, IGNORECASE, MULTILINE
from ..helpers import _BasicScraper, indirectStarter
class FalconTwin(_BasicScraper):
latestUrl = 'http://www.falcontwin.com/'
imageUrl = 'http://www.falcontwin.com/index.html?strip=%s'
imageSearch = compile(r'"(strips/.+?)"')
prevSearch = compile(r'"prev"><a href="(index.+?)"')
help = 'Index format: nnn'
class FauxPas(_BasicScraper):
latestUrl = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi'
imageUrl = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi?%s'
imageSearch = compile(r'<img .*src="(.*fp/fp.*(png|jpg|gif))"')
prevSearch = compile(r'<a href="(pl-fp1\.cgi\?\d+)">Previous Strip')
help = 'Index format: nnn'
class FeyWinds(_BasicScraper):
imageUrl = 'http://kitsune.rydia.net/comic/page.php?id=%s'
imageSearch = compile(r"(../comic/pages//.+?)'")
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
help = 'Index format: n (unpadded)'
starter = indirectStarter('http://kitsune.rydia.net/index.html',
compile(r'(comic/page.php\?id.+?)"'))
class FightCastOrEvade(_BasicScraper):
latestUrl = 'http://www.fightcastorevade.net/'
imageUrl = 'http://www.fightcastorevade.net/d/%s'
imageSearch = compile(r'<img src="(http://www.fightcastorevade.net/comics/.+?)"')
prevSearch = compile(r'"(.+?/d/.+?)".+?previous')
help = 'Index format: yyyymmdd.html'
class FilibusterCartoons(_BasicScraper):
latestUrl = 'http://www.filibustercartoons.com/'
imageUrl = 'http://www.filibustercartoons.com/index.php/%s'
imageSearch = compile(r'<img src="(http://www.filibustercartoons.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src=\'(.+?/arrow-left.gif)\'')
help = 'Index format: yyyy/mm/dd/name'
class FlakyPastry(_BasicScraper):
latestUrl = 'http://flakypastry.runningwithpencils.com/index.php'
imageUrl = 'http://flakypastry.runningwithpencils.com/comic.php\?strip_id=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
help = 'Index format: nnnn'
class Flipside(_BasicScraper):
latestUrl = 'http://www.flipsidecomics.com/comic.php'
imageUrl = 'http://www.flipsidecomics.com/comic.php?i=%s'
imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)">&lt')
help = 'Index format: nnnn'
class Footloose(_BasicScraper):
latestUrl = 'http://footloosecomic.com/footloose/today.php'
imageUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
imageSearch = compile(r'<img src="/footloose/(.+?)"')
prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?)".+?(?:prev)')
# prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?html).+?(?:prev|Prev)')
help = 'Index format: n (unpadded)'
class FragileGravity(_BasicScraper):
latestUrl = 'http://www.fragilegravity.com/'
imageUrl = 'http://www.fragilegravity.com/core.php?archive=%s'
imageSearch = compile(r'<IMG SRC="(strips/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"\nonMouseover="window.status=\'Previous Strip', MULTILINE | IGNORECASE)
help = 'Index format: yyyymmdd'
class Freefall(_BasicScraper):
latestUrl = 'http://freefall.purrsia.com/default.htm'
imageUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
help = 'Index format: nnnn/nnnnn'
class FantasyRealms(_BasicScraper):
imageUrl = 'http://www.fantasyrealmsonline.com/manga/%s.php'
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter('http://www.fantasyrealmsonline.com/',
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
class FullFrontalNerdity(_BasicScraper):
latestUrl = 'http://nodwick.humor.gamespy.com/ffn/index.php'
imageUrl = None
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/ffn/strips/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
class FunInJammies(_BasicScraper):
latestUrl = 'http://www.funinjammies.com/'
imageUrl = 'http://www.funinjammies.com/comic.php?issue=%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'(/comic.php.+?)" id.+?prev')
help = 'Index format: n (unpadded)'
class Fallen(_BasicScraper):
imageUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
help = 'Index format: nn-m (comicNumber-partNumber)'
starter = indirectStarter('http://www.fallencomic.com/fal-page.htm',
compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE))
@classmethod
def namer(cls, imageUrl, pageUrl):
num = pageUrl.split('/')[-1].split('-')[0]
part = pageUrl.split('-')[-1].split('.')[0]
return '%s-%s' % (part, num)
def setStrip(self, index):
index, part = index.split('-')
self.currentUrl = self.imageUrl % (part, index, part)
class FoxTails(_BasicScraper):
latestUrl = 'http://www.magickitsune.com/strips/current.html'
imageUrl = 'http://www.magickitsune.com/strips/%s'
imageSearch = compile(r'<img src=(img/.+?)[ |>]', IGNORECASE)
prevSearch = compile(r'(?<=first.gif)*(?<=</td>)*<a.*href=\'(.+?)\'.+?<img.+?src=\'../img/prev.gif\'>', IGNORECASE)
help = 'Index format: yyyymmdd'

140
dosagelib/plugins/g.py Normal file
View file

@ -0,0 +1,140 @@
from re import compile
from ..helpers import _BasicScraper, indirectStarter
class Galaxion(_BasicScraper):
latestUrl = 'http://galaxioncomics.com/'
imageUrl = 'http://galaxioncomics.com/?p=%s'
imageSearch = compile(r'(wordpress/comics/.+?)"')
prevSearch = compile(r'\| <a href="http://galaxioncomics.com/(\?p=.+?)".+?vious.gif')
help = 'Index format: non'
class Garanos(_BasicScraper):
latestUrl = 'http://www.garanos.com/'
imageUrl = 'http://www.garanos.com/pages/page-%s'
imageSearch = compile(r'<img src=.+?(/pages/.+?)"')
prevSearch = compile(r'<a href="(http://www.garanos.com/pages/page-.../)">&#9668; Previous<')
help = 'Index format: n (unpadded)'
class GUComics(_BasicScraper):
latestUrl = 'http://www.gucomics.com/comic/'
imageUrl = 'http://www.gucomics.com/comic/?cdate=%s'
imageSearch = compile(r'<IMG src="(/comics/\d{4}/gu_.*?)"')
prevSearch = compile(r'<A href="(/comic/\?cdate=\d+)"><IMG src="/images/cnav_prev')
help = 'Index format: yyyymmdd'
class GenrezvousPoint(_BasicScraper):
latestUrl = 'http://genrezvouspoint.com/'
imageUrl = 'http://genrezvouspoint.com/index.php?comicID=%s'
imageSearch = compile(r'<img src=\'(comics/.+?)\'')
prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
help = 'Index format: nnn'
class GirlGenius(_BasicScraper):
latestUrl = 'http://girlgeniusonline.com/comic.php'
imageUrl = 'http://www.girlgeniusonline.com/comic.php\?date=%s'
imageSearch = compile(r"(/ggmain/strips/.+?)'")
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
help = 'Index format: yyyymmdd'
class GirlsWithSlingshots(_BasicScraper):
latestUrl = 'http://www.daniellecorsetto.com/gws.html'
imageUrl = 'http://www.daniellecorsetto.com/GWS%s.html'
imageSearch = compile(r'<img src="(images/gws/GWS\d{3}.jpg)"')
prevSearch = compile(r'(archive.php\?today=\d{3}&comic=\d{3})"[^>]*><img[^>]+src="images/gwsmenu/back_off.jpg"')
help = 'Index format: nnn'
class Girly(_BasicScraper):
latestUrl = 'http://girlyyy.com/'
imageUrl = 'http://girlyyy.com/go/%s'
imageSearch = compile(r'<img src="(http://girlyyy.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"> &nbsp;&lt;&nbsp;prev')
help = 'Index format: nnn'
class Goats(_BasicScraper):
latestUrl = 'http://www.goats.com/'
imageUrl = 'http://www.goats.com/archive/%s.html'
imageSearch = compile(r'<img.+?src="(/comix/.+?)"')
prevSearch = compile(r'<a href="(/archive/\d{6}.html)" class="button" title="go back">')
help = 'Index format: yymmdd'
class GoneWithTheBlastwave(_BasicScraper):
starter = indirectStarter('http://www.blastwave-comic.com/index.php?p=comic&nro=1',
compile(r'href="(index.php\?p=comic&amp;nro=\d+)"><img src="images/page/default/latest'))
imageUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s'
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
prevSearch = compile(r'href="(index.php\?p=comic&amp;nro=\d+)"><img src="images/page/default/previous')
help = 'Index format: n'
@classmethod
def namer(cls, imageUrl, pageUrl):
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
class GunnerkrigCourt(_BasicScraper):
latestUrl = 'http://www.gunnerkrigg.com/index2.php'
imageUrl = 'http://www.gunnerkrigg.com/archive_page.php\?comicID=%s'
imageSearch = compile(r'<img src="(.+?//comics/.+?)"')
prevSearch = compile(r'<.+?(/archive_page.php\?comicID=.+?)".+?prev')
help = 'Index format: n'
class Gunshow(_BasicScraper):
latestUrl = 'http://gunshowcomic.com/'
imageUrl = 'http://gunshowcomic.com/d/%s.html'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'(/d/\d+\.html)"><img[^>]+?src="/images/previous_day')
help = 'Index format: yyyy/mm/dd'
class GleefulNihilism(_BasicScraper):
latestUrl = 'http://gleefulnihilism.com/'
imageUrl = 'http://gleefulnihilism.com/comics/2009/12/01/just-one-of-the-perks/%s'
imageSearch = compile(r'<img src="(http://gleefulnihilism.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: yyyy/mm/dd/strip-name'
class GastroPhobia(_BasicScraper):
latestUrl = 'http://www.gastrophobia.com/'
imageUrl = 'http://www.gastrophobia.com/index.php?date=%s'
imageSearch = compile(r'<img src="(http://gastrophobia.com/comix/[^"]+)"[^>]*>(?!<br>)')
prevSearch = compile(r'<a href="(.+?)"><img src="pix/prev.gif" ')
help = 'Index format: yyyy-mm-dd'
class Geeks(_BasicScraper):
latestUrl = 'http://sevenfloorsdown.com/geeks/'
imageUrl = 'http://sevenfloorsdown.com/geeks/archives/%s'
imageSearch = compile(r'<img src=\'(http://sevenfloorsdown.com/geeks/comics/.+?)\'')
prevSearch = compile(r'<a href="(.+?)">&laquo; Previous')
help = 'Index format: nnn'
class GlassHalfEmpty(_BasicScraper):
latestUrl = 'http://www.defectivity.com/ghe/index.php'
imageUrl = 'http://www.defectivity.com/ghe/index.php?strip_id=%s'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="\.\./images/onback\.jpg"')
help = 'Index format: nnn'

65
dosagelib/plugins/h.py Normal file
View file

@ -0,0 +1,65 @@
from re import compile
from ..helpers import _BasicScraper
class HappyMedium(_BasicScraper):
latestUrl = 'http://happymedium.fast-bee.com/'
imageUrl = 'http://happymedium.fast-bee.com/%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'com(/.+?)".+?"prev">&#9668')
help = 'Index format: yyyy/mm/chapter-n-page-n'
class Heliothaumic(_BasicScraper):
latestUrl = 'http://thaumic.net/'
imageUrl = 'http://thaumic.net/%s'
imageSearch = compile(r'<img src="(http://thaumic.net/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://thaumic.net/.+?)">')
help = 'Index format: yyyy/mm/dd/n(unpadded)-comicname'
class Housd(_BasicScraper):
latestUrl = 'http://housd.net/archive_page.php?comicID=1284'
imageUrl = 'http://housd.net/archive_page.php?comicID=%s'
imageSearch = compile(r'"(.+?/comics/.+?)"')
prevSearch = compile(r'"(h.+?comicID=.+?)".+?prev')
help = 'Index format: nnnn'
class HateSong(_BasicScraper):
latestUrl = 'http://hatesong.com/'
imageUrl = 'http://hatesong.com/%s/'
imageSearch = compile(r'src="(http://www.hatesong.com/strips/.+?)"')
prevSearch = compile(r'<div class="headernav"><a href="(http://hatesong.com/\d{4}/\d{2}/\d{2})')
help = 'Index format: yyyy/mm/dd'
class HorribleVille(_BasicScraper):
latestUrl = 'http://horribleville.com/d/20090517.html'
imageUrl = 'http://horribleville.com/d/%s.html'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
help = 'Index format: yyyy/mm/dd'
class HelpDesk(_BasicScraper):
latestUrl = 'http://www.ubersoft.net/'
imageUrl = 'http://www.ubersoft.net/comic/hd/%s/%s/%s'
imageSearch = compile(r'src="(http://www.ubersoft.net/files/comics/hd/hd\d{8}.png)')
prevSearch = compile(r'<a href="(/comic/.+?)">(.+?)previous</a>')
help = 'Index format: yyyy/mm/name'
class HardGraft(_BasicScraper):
latestUrl = 'http://hard-graft.net/'
imageUrl = 'http://hard-graft.net/?p=%s'
imageSearch = compile(r'<img src="(http://hard-graft.net/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)"')
help = 'Index format: nnn'

75
dosagelib/plugins/i.py Normal file
View file

@ -0,0 +1,75 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper
class IDreamOfAJeanieBottle(_BasicScraper):
latestUrl = 'http://jeaniebottle.com/'
imageUrl = 'http://jeaniebottle.com/review.php?comicID='
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'First".+?(review.php.+?)".+?prev_a.gif')
help = 'Index format: n (unpadded)'
class IrregularWebcomic(_BasicScraper):
latestUrl = 'http://www.irregularwebcomic.net/'
imageUrl = 'http://www.irregularwebcomic.net/cgi-bin/comic.pl?comic=%s'
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
help = 'Index format: nnn'
class InsideOut(_BasicScraper):
latestUrl = 'http://www.insideoutcomic.com/'
imageUrl = 'http://www.insideoutcomic.com/html/%s.html'
imageSearch = compile(r'Picture12LYR.+?C="(.+?/assets/images/.+?)"')
prevSearch = compile(r'Picture7LYR.+?F="(.+?/html/.+?)"')
help = 'Index format: n_comic_name'
class InkTank(_BasicScraper):
shortName = 'inktank'
def starter(self):
return self.baseUrl + self.shortName + '/'
def inkTank(name, shortName):
@classmethod
def _namer(cls, imageUrl, pageUrl):
return '20%s-%s' % (imageUrl[-6:-4], imageUrl[-12:-7])
baseUrl = 'http://www.inktank.com/%s/' % (shortName,)
return type('InkTank_%s' % name,
(_BasicScraper,),
dict(
name='InkTank/' + name,
latestUrl=baseUrl,
imageUrl=baseUrl + 'd/%s.html',
imageSearch=compile(r'<IMG SRC="(/images/[^/]+/cartoons/\d{2}-\d{2}-\d{2}.+?)"'),
prevSearch=compile(r'<A HREF="(/[^/]+/index.cfm\?nav=\d+?)"><IMG SRC="/images/nav_last.gif"'),
help='Index format: n (unpadded)')
)
at = inkTank('AngstTechnology', 'AT')
ww = inkTank('WeakEndWarriors', 'WW')
swo = inkTank('SorryWereOpen', 'SWO')
class IlmanNaista(_BasicScraper):
latestUrl = 'http://kvantti.tky.fi/in/archive_end.shtml'
imageUrl = 'http://kvantti.tky.fi/in/%s.shtml'
imageSearch = compile(r'<img src="(kuvat/in_.+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(\d+.shtml)"><img width="90" height="45" src="deco/edellinen.png" alt="Edellinen"/></a>')
class ICantDrawFeet(_BasicScraper):
latestUrl = 'http://icantdrawfeet.com/'
imageUrl = 'http://icantdrawfeet.com/%s'
imageSearch = compile(r'src="(http://icantdrawfeet.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://icantdrawfeet.com/.+?)"><img src="http://icantdrawfeet.com/pageimages/prev.png"')
help = 'Index format: yyyy/mm/dd/stripname'

39
dosagelib/plugins/j.py Normal file
View file

@ -0,0 +1,39 @@
from re import compile, MULTILINE
from ..helpers import _BasicScraper
class Jack(_BasicScraper):
latestUrl = 'http://www.pholph.com/'
imageUrl = 'http://www.pholph.com/strip.php?id=5&sid=%s'
imageSearch = compile(r'<img src="(./artwork/.+?/Jack.+?)"')
prevSearch = compile(r'\|<a href="(.+?)">Previous Strip</a>')
help = 'Index format: n (unpadded)'
class JerkCity(_BasicScraper):
latestUrl = 'http://www.jerkcity.com/'
imageUrl = 'http://www.jerkcity.com/jerkcity%s'
imageSearch = compile(r'"jerkcity.+?">.+?"(/jerkcity.+?)"')
prevSearch = compile(r'"(jerkcity.+?)">.+?"/jerkcity.+?"')
help = 'Index format: unknown'
class JoeAndMonkey(_BasicScraper):
latestUrl = 'http://www.joeandmonkey.com/'
imageUrl = 'http://www.joeandmonkey.com/%s'
imageSearch = compile(r'"(/comic/[^"]+)"')
prevSearch = compile(r"<a href='(/\d+)'>Previous")
help = 'Index format: nnn'
class JoyOfTech(_BasicScraper):
latestUrl = 'http://www.geekculture.com/joyoftech/index.html'
imageUrl = 'http://www.geekculture.com/joyoftech/joyarchives/%s.html'
imageSearch = compile(r'<img src="(joyimages/.+?|../joyimages/.+?)" alt="The Joy')
prevSearch = compile(r'<a href="((?:joyarchives/)?\w+\.\w{3,4})">(?:<font[^>]*>)?<img[^>]*><br>[\s\n]*Previous Joy', MULTILINE)
help = 'Index format: nnn'

62
dosagelib/plugins/k.py Normal file
View file

@ -0,0 +1,62 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper
class KernelPanic(_BasicScraper):
latestUrl = 'http://www.ubersoft.net/kpanic/'
imageUrl = 'http://www.ubersoft.net/kpanic/d/%s'
imageSearch = compile(r'src="(.+?/kp/kp.+?)" ')
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
help = 'Index format: yyyymmdd.html'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-1].split('.')[0]
class Key(_BasicScraper):
latestUrl = 'http://key.shadilyn.com/latestpage.html'
imageUrl = 'http://key.shadilyn.com/pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
class Krakow(_BasicScraper):
latestUrl = 'http://www.krakowstudios.com/'
imageUrl = 'http://www.krakowstudios.com/archive.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
help = 'Index format: yyyymmdd'
class Kukuburi(_BasicScraper):
latestUrl = 'http://www.kukuburi.com/current/'
imageUrl = 'http://thaumic.net/%s'
imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: non'
class KevinAndKell(_BasicScraper):
latestUrl = 'http://www.kevinandkell.com/'
imageUrl = 'http://www.kevinandkell.com/%s/kk%s%s.html'
imageSearch = compile(r'<img.+?src="(/?(\d+/)?strips/kk\d+.gif)"', IGNORECASE)
prevSearch = compile(r'<a.+?href="(/?(\.\./)?\d+/kk\d+\.html)"[^>]*><span>Previous Strip', IGNORECASE)
help = 'Index format: yyyy-mm-dd'
def setStrip(self, index):
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
class KillerKomics(_BasicScraper):
latestUrl = 'http://www.killerkomics.com/web-comics/index_ang.cfm'
imageUrl = 'http://www.killerkomics.com/web-comics/%s.cfm'
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
help = 'Index format: strip-name'

File diff suppressed because it is too large Load diff

89
dosagelib/plugins/l.py Normal file
View file

@ -0,0 +1,89 @@
from re import compile
from ..helpers import _BasicScraper, indirectStarter
class LasLindas(_BasicScraper):
latestUrl = 'http://www.katbox.net/laslindas/'
imageUrl = 'http://www.katbox.net/laslindas/index.php?strip_id=%s'
imageSearch = compile(r'"(istrip_files/strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><[^>]+?alt="Back"')
help = 'Index format: n (unpadded)'
class LastBlood(_BasicScraper):
latestUrl = 'http://www.lastblood.net/main/'
imageUrl = 'http://www.lastblood.net/main/%s'
imageSearch = compile(r'(/comicfolder/.+?)" alt')
prevSearch = compile(r'Previous Comic:</small><br />&laquo; <a href="(.+?)">')
help = 'Index format: yyyy/mm/dd/(page number and name)'
class LesbianPiratesFromOuterSpace(_BasicScraper):
latestUrl = 'http://rosalarian.com/lesbianpirates/'
imageUrl = 'http://rosalarian.com/lesbianpirates/?p=%s'
imageSearch = compile(r'(/lesbianpirates/comics/.+?)"')
prevSearch = compile(r'/(\?p=.+?)">&laquo')
help = 'Index format: n'
class Lint(_BasicScraper):
latestUrl = 'http://www.purnicellin.com/lint/'
imageUrl = 'http://www.purnicellin.com/lint/%s'
imageSearch = compile(r'<img src="(http://www.purnicellin.com/lint/comics/.+?)"')
prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/num-name'
class LookingForGroup(_BasicScraper):
latestUrl = 'http://www.lfgcomic.com/page/latest'
imageUrl = 'http://www.lfgcomic.com/page/%s'
imageSearch = compile(r'<img src="(http://newcdn.lfgcomic.com/uploads/comics/.+?)"')
prevSearch = compile(r'<a href="(/page/\d+)" id="navtop-prev"')
starter = indirectStarter('http://www.lfgcomic.com/', compile(r'<a href="(/page/\d+)" id="feature-preview"'))
nameSearch = compile(r'/page/(\d+)')
help = 'Index format: nnn'
def namer(self, imageUrl, pageUrl):
return self.nameSearch.search(pageUrl).group(1)
class Loserz(_BasicScraper):
latestUrl = 'http://bukucomics.com/loserz/'
imageUrl = 'http://bukucomics.com/loserz/go/%s'
imageSearch = compile(r'<img src="(http://bukucomics.com/loserz/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"> &nbsp;&lt;&nbsp;')
help = 'Index format: n (unpadded)'
class LittleGamers(_BasicScraper):
latestUrl = 'http://www.little-gamers.com/'
imageUrl = 'http://www.little-gamers.com/%s'
imageSearch = compile(r'<img src="(http://www.little-gamers.com/comics/[^"]+)"')
prevSearch = compile(r'href="(.+?)"><img id="comic-nav-prev"')
help = 'Index format: yyyy/mm/dd/name'
class LegoRobot(_BasicScraper):
latestUrl = 'http://www.legorobotcomics.com/'
imageUrl = 'http://www.legorobotcomics.com/?id=%s'
imageSearch = compile(r'id="the_comic" src="(comics/.+?)"')
prevSearch = compile(r'(\?id=\d+)"><img src="images/back.png"')
help = 'Index format: nnnn'
class LeastICouldDo(_BasicScraper):
latestUrl = 'http://www.leasticoulddo.com/'
imageUrl = 'http://www.leasticoulddo.com/comic/%s'
imageSearch = compile(r'<img src="(http://cdn.leasticoulddo.com/comics/\d{8}.\w{1,4})" />')
prevSearch = compile(r'<a href="(/comic/\d{8})">Previous</a>')
help = 'Index format: yyyymmdd'

107
dosagelib/plugins/m.py Normal file
View file

@ -0,0 +1,107 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, queryNamer
class MadamAndEve(_BasicScraper):
latestUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
imageUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">')
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
help = 'Index format: (none)'
class MagicHigh(_BasicScraper):
latestUrl = 'http://www.doomnstuff.com/magichigh/index.php'
imageUrl = 'http://www.doomnstuff.com/magichigh/index.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First .+?"(/magichigh.+?)".+?top_back')
help = 'Index format: n'
class Marilith(_BasicScraper):
latestUrl = 'http://www.marilith.com/'
imageUrl = 'http://www.marilith.com/archive.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)" border')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
help = 'Index format: yyyymmdd'
class MarryMe(_BasicScraper):
latestUrl = 'http://marrymemovie.com/main/'
imageUrl = 'http://marrymemovie.com/main/%s'
imageSearch = compile(r'(/comicfolder/.+?)"')
prevSearch = compile(r'Previous Comic:</small><br />&#171; <a href="(.+?)">')
help = 'Index format: good luck !'
class Meek(_BasicScraper):
latestUrl = 'http://www.meekcomic.com/'
imageUrl = 'http://www.meekcomic.com/%s'
imageSearch = compile(r'meekcomic.com(/comics/.+?)"')
prevSearch = compile(r'\s.+?(http://www.meekcomic.com/.+?)".+?Previous<')
help = 'Index format: yyyy/mm/dd/ch-p/'
class MegaTokyo(_BasicScraper):
latestUrl = 'http://www.megatokyo.com/'
imageUrl = 'http://www.megatokyo.com/strip/%s'
imageSearch = compile(r'"(strips/.+?)"', IGNORECASE)
prevSearch = compile(r'"(./strip/\d+?)">Prev')
help = 'Index format: nnnn'
class MyPrivateLittleHell(_BasicScraper):
latestUrl = 'http://mutt.purrsia.com/mplh/'
imageUrl = 'http://mutt.purrsia.com/mplh/?date=%s'
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
help = 'Index format: mm/dd/yyyy'
class MacHall(_BasicScraper):
latestUrl = 'http://www.machall.com/'
imageUrl = 'http://www.machall.com/view.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img[^>]+?src=\'drop_shadow/previous.gif\'>')
help = 'Index format: yyyy-mm-dd'
class Misfile(_BasicScraper):
latestUrl = 'http://www.misfile.com/'
imageUrl = 'http://www.misfile.com/?page=%s'
imageSearch = compile(r'<img src="(overlay\.php\?pageCalled=\d+)">')
prevSearch = compile(r'<a href="(\?page=\d+)"><img src="/images/back\.gif"')
help = 'Index format: n (unpadded)'
namer = queryNamer('pageCalled')
class MysteriesOfTheArcana(_BasicScraper):
latestUrl = 'http://mysteriesofthearcana.com/'
imageUrl = 'http://mysteriesofthearcana.com/index.php?action=comics&cid='
imageSearch = compile(r'(image.php\?type=com&i=.+?)"')
prevSearch = compile(r'(index.php\?action=comics&cid=.+?)".+?show_prev1')
help = 'Index format: n (unpadded)'
class MysticRevolution(_BasicScraper):
latestUrl = 'http://www.mysticrev.com/index.php'
imageUrl = 'http://www.mysticrev.com/index.php?cid=%s'
imageSearch = compile(r'(comics/.+?)"')
prevSearch = compile(r'(\?cid=.+?)".+?prev.gif')
help = 'Index format: n (unpadded)'
class MontyAndWooly(_BasicScraper):
latestUrl = 'http://www.montyandwoolley.co.uk/'
imageUrl = 'http://montyandwoolley.co.uk/%s'
imageSearch = compile(r'<img src="(http://montyandwoolley.co.uk/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: yyyy/mm/dd/strip-name'

171
dosagelib/plugins/n.py Normal file
View file

@ -0,0 +1,171 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, indirectStarter, _PHPScraper
class NamirDeiter(_BasicScraper):
latestUrl = 'http://www.namirdeiter.com/'
imageUrl = 'http://www.namirdeiter.com/comics/index.php?date=%s'
imageSearch = compile(r'<img.+?(/comics/\d{8}.+?)[\'|\"]')
prevSearch = compile(r'(/comics/index.php\?date=.+?|http://www.namirdeiter.com/comics/index.php\?date=.+?)[\'|\"].+?previous')
help = 'Index format: yyyymmdd'
class NeoEarth(_BasicScraper):
latestUrl = 'http://www.neo-earth.com/NE/'
imageUrl = 'http://www.neo-earth.com/NE/index.php?date=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
help = 'Index format: yyyy-mm-dd'
class Nervillsaga(_BasicScraper):
latestUrl = 'http://www.nervillsaga.com/'
imageUrl = 'http://www.nervillsaga.com/index.php?s=%s'
imageSearch = compile(r'"(pic/.+?)"')
prevSearch = compile(r'"(.+?)">Previous')
help = 'Index format: nnn'
class NewAdventuresOfBobbin(_BasicScraper):
latestUrl = 'http://bobbin-comic.com/'
imageUrl = 'http://www.bobbin-comic.com/wordpress/?p=%s'
imageSearch = compile(r'<img src="(http://www.bobbin-comic.com/wordpress/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: n'
class NewWorld(_BasicScraper):
latestUrl = 'http://www.tfsnewworld.com/'
imageUrl = 'http://www.tfsnewworld.com/%s'
imageSearch = compile(r'<img src="(http://www.tfsnewworld.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/stripn'
class Nicky510(_BasicScraper):
latestUrl = 'http://www.nicky510.com/'
imageUrl = 'http://www.nicky510.com/%s'
imageSearch = compile(r'(http://www.nicky510.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.nicky510.com/.+?)" class="navi navi-prev"')
help = 'Index format: yyyy/mm/dd/stripname/'
class NoNeedForBushido(_BasicScraper):
latestUrl = 'http://www.noneedforbushido.com/latest/'
imageUrl = 'http://www.noneedforbushido.com/%s'
imageSearch = compile(r'<div class="comics"><img src="([^"]+)"')
prevSearch = compile(r'<a href="([^"]+)" title="[^"]*" class="previous-comic-link')
help = 'Index format: yyyy/comic/nnn'
class Nukees(_BasicScraper):
latestUrl = 'http://www.nukees.com/'
imageUrl = 'http://www.nukees.com/d/%s'
imageSearch = compile(r'"comic".+?"(/comics/.+?)"')
prevSearch = compile(r'"(/d/.+?)".+?previous')
help = 'Index format: yyyymmdd.html'
class _NuklearPower(_BasicScraper):
imageSearch = compile(r'<img src="(http://www.nuklearpower.com/comics/.+?)"')
prevSearch = compile(r'><a href="(.+?)">Previous</a>')
help = 'Index format: yyyy/mm/dd/name'
@property
def baseUrl(self):
return 'http://www.nuklearpower.com/%s/' % (self.shortName,)
def starter(self):
return self.baseUrl
@property
def imageUrl(self):
return self.baseUrl + '%s'
class NP8BitTheater(_NuklearPower):
name = 'NuklearPower/8BitTheater'
shortName = '8-bit-theater'
class NPWarbot(_NuklearPower):
name = 'NuklearPower/Warbot'
shortName = 'warbot'
class NPHIKYM(_NuklearPower):
name = 'NuklearPower/HowIKilledYourMaster'
shortName = 'hikym'
class NPAtomicRobo(_NuklearPower):
name = 'NuklearPower/AtomicRobo'
shortName = 'atomic-robo'
class NekoTheKitty(_PHPScraper):
basePath = 'http://www.nekothekitty.net/cusp/'
latestUrl = 'latest.php'
prevSearch = compile(r"<a href=\"(http://www\.nekothekitty\.net/cusp/daily\.php\?date=\d+)\"><img[^>]+alt='Previous Comic'")
class NichtLustig(_BasicScraper):
imageUrl = 'http://www.nichtlustig.de/toondb/%s.html'
imageSearch = compile(r'<img src="([^"]+)" id="cartoon"', IGNORECASE)
prevSearch = compile(r'<a href="(\d+\.html)"[^<>]*><img[^<>]*id="pfeil_links', IGNORECASE)
help = 'Index format: yymmdd'
starter = indirectStarter('http://www.nichtlustig.de/main.html',
compile(r'<a href="([^"]*toondb/\d+\.html)"', IGNORECASE))
class NinthElsewhere(_BasicScraper):
latestUrl = 'http://www.9thelsewhere.com/icenter.html'
imageUrl = 'http://www.9thelsewhere.com/%s/9e%s_%s.html'
imageSearch = compile(r'<img src="([^"]*9e\d+_\d+\.jpg)"')
prevSearch = compile(r'<a href="([^"]+\.html)">\s*PREV')
help = 'Index format: year-chapter-page'
def setStrip(self, index):
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
class Nodwick(_BasicScraper):
imageUrl = None
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/gamespyarchive/strips/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
starter = indirectStarter('http://nodwick.humor.gamespy.com/gamespyarchive/index.php', prevSearch)
help = 'Index format: None'
class NekkoAndJoruba(_BasicScraper):
latestUrl = 'http://www.nekkoandjoruba.com/'
imageUrl = 'http://www.nekkoandjoruba.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NobodyScores(_BasicScraper):
latestUrl = 'http://nobodyscores.loosenutstudio.com/'
imageUrl = 'http://nobodyscores.loosenutstudio.com/index.php?id=%s'
imageSearch = compile(r'><img src="(http://nobodyscores.loosenutstudio.com/comix/.+?)"')
prevSearch = compile(r'<a href="(http://nobodyscores.loosenutstudio.com/index.php.+?)">the one before </a>')
help = 'Index format: nnn'

23
dosagelib/plugins/num.py Normal file
View file

@ -0,0 +1,23 @@
from re import compile
from ..helpers import _BasicScraper
class NineteenNinetySeven(_BasicScraper):
name = '1997'
latestUrl = 'http://www.1977thecomic.com/'
imageUrl = 'http://www.1977thecomic.com/%s'
imageSearch = compile(r'<img src="(http://www.1977thecomic.com/comics-1977/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: yyyy/mm/dd/strip-name'
class EightHalfByEleven(_BasicScraper):
name = '8HalfByEleven'
latestUrl = 'http://www.lucastds.com/webcomic/'
imageUrl = 'http://www.lucastds.com/webcomic/index.php?strip_id=%s'
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(/webcomic/.+?)"><img[^>]+?src="themes/tedzsee/images/previous_a.png">')
help = 'Index format: nnn'

84
dosagelib/plugins/o.py Normal file
View file

@ -0,0 +1,84 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, indirectStarter
class OctopusPie(_BasicScraper):
starter = indirectStarter('http://www.octopuspie.com/2007-05-14/001-pea-wiggle/',
compile(r'<a href="(http://www.octopuspie.com/.+?)"><b>latest comic</b>', IGNORECASE))
imageUrl = 'http://www.octopuspie.com/%s'
imageSearch = compile(r'<img src="(http://www.octopuspie.com/strippy/.+?)"')
prevSearch = compile(r'<link rel=\'prev\'[^>]+?href=\'(http://www.octopuspie.com/.+?)\'')
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class OddFish(_BasicScraper):
latestUrl = 'http://www.odd-fish.net/'
imageUrl = 'http://www.odd-fish.net/viewing.php?&comic_id=%s'
imageSearch = compile(r'<img src="(images/\d{1,4}.\w{3,4})" ')
prevSearch = compile(r'<a href="(.+?)"><img src="http://www.odd-fishing.net/i/older.gif" ')
help = 'Index format: n (unpadded)'
class OhMyGods(_BasicScraper):
latestUrl = 'http://ohmygods.co.uk/'
imageUrl = 'http://ohmygods.co.uk/strips/%s'
imageSearch = compile(r'<p class="omgs-strip"><img src="(/system/files/.+?)"')
prevSearch = compile(r'<li class="custom_pager_prev"><a href="(/strips/.+?)"')
help = 'Index format: yyyy-mm-dd'
class OnTheEdge(_BasicScraper):
latestUrl = 'http://www.ontheedgecomics.com/'
imageUrl = 'http://ontheedgecomics.com/comic/ote%s'
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
help = 'Index format: nnn (unpadded)'
class OneQuestion(_BasicScraper):
latestUrl = 'http://onequestioncomic.com/'
imageUrl = 'http://onequestioncomic.com/comics/%s/'
imageSearch = compile(r'(istrip_files.+?)"')
prevSearch = compile(r'First.+?"(comic.php.+?)".+?previous.png')
help = 'Index format: n (unpadded)'
class OurHomePlanet(_BasicScraper):
latestUrl = 'http://gdk.gd-kun.net/'
imageUrl = 'http://gdk.gd-kun.net/%s.html'
imageSearch = compile(r'<img src="(pages/comic.+?)"')
prevSearch = compile(r'coords="50,18,95,65".+?href="(.+?\.html)".+?alt=')
help = 'Index format: n (unpadded)'
class OkCancel(_BasicScraper):
imageUrl = 'http://www.ok-cancel.com/comic/%s.html'
imageSearch = compile(r'src="(http://www.ok-cancel.com/strips/okcancel\d{8}.gif)"', IGNORECASE)
prevSearch = compile(r'<div class="previous"><a href="(http://www.ok-cancel.com/comic/\d{1,4}.html)">', IGNORECASE)
starter = indirectStarter('http://www.ok-cancel.com/', prevSearch)
help = 'Index format: yyyymmdd'
class Oglaf(_BasicScraper):
starter = indirectStarter('http://oglaf.com/',
compile(r'<a href="(.+?)"><img src="over18.gif"', IGNORECASE))
imageUrl = 'http://oglaf.com/%s.html'
imageSearch = compile(r'/><img src="(.+?)"[^>]+?width="760" height="596"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"[^>]+?><img src="prev.gif"', IGNORECASE)
help = 'Index format: nn'
class OverCompensating(_BasicScraper):
latestUrl = 'http://www.overcompensating.com/'
imageUrl = 'http://www.overcompensating.com/posts/%s.html'
imageSearch = compile(r'<img src="(/comics/.+?)"')
prevSearch = compile(r'"><a href="(.+?)"[^>]+?>&nbsp;\<\- &nbsp;</a>')
help = 'Index format: yyyymmdd'

172
dosagelib/plugins/p.py Normal file
View file

@ -0,0 +1,172 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, bounceStarter, queryNamer
class PartiallyClips(_BasicScraper):
latestUrl = 'http://www.partiallyclips.com/'
imageUrl = 'http://www.partiallyclips.com/index.php?id=%s'
imageSearch = compile(r'"(http://www.partiallyclips.com/storage/.+?)"')
prevSearch = compile(r'"(index.php\?id=.+?)".+?prev')
help = 'Index format: nnnn'
class PastelDefender(_BasicScraper):
latestUrl = 'http://www.pasteldefender.com/coverbackcover.html'
imageUrl = 'http://www.pasteldefender.com/%s.html'
imageSearch = compile(r'<IMG SRC="(images/.+?)" WIDTH="742"')
prevSearch = compile(r'<A HREF="([^"]+)"><IMG SRC="images/back\.gif"')
help = 'Index format: nnn'
class PebbleVersion(_BasicScraper):
latestUrl = 'http://www.pebbleversion.com/'
imageUrl = 'http://www.pebbleversion.com/Archives/Strip%s.html'
imageSearch = compile(r'<img src="(ComicStrips/.+?|../ComicStrips/.+?)"')
prevSearch = compile(r'<a href="((?!.+?">First Comic)Archives/Strip.+?|(?=.+?">Previous Comic)(?!.+?">First Comic)Strip.+?)"')
help = 'Index format: n (unpadded)'
class PennyAndAggie(_BasicScraper):
latestUrl = 'http://www.pennyandaggie.com/index.php'
imageUrl = 'http://www.pennyandaggie.com/index.php\?p=%s'
imageSearch = compile(r'src=".+?(/comics/.+?)"')
prevSearch = compile(r"</a><a href='(index.php\?p=.+?)'.+?prev")
help = 'Index format: n (unpadded)'
class PennyArcade(_BasicScraper):
starter = bounceStarter('http://www.penny-arcade.com/comic/',
compile(r'<a href="(/comic/[^"]+)">Next</a>'))
imageUrl = 'http://www.penny-arcade.com/comic/%s/'
imageSearch = compile(r'(?<!<!--)<img src="(http://art\.penny-arcade\.com/photos/[^"]+)"')
prevSearch = compile(r'<a href="(/comic/[^"]+)">Back</a>')
help = 'Index format: yyyy/mm/dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
yyyy, mm, dd = pageUrl.split('/')[-4:-1]
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))
class PeppermintSaga(_BasicScraper):
latestUrl = 'http://www.pepsaga.com/'
imageUrl = 'http://www.pepsaga.com/comics/%s/'
imageSearch = compile(r'src=.+?(http.+?/comics/.+?)"')
prevSearch = compile(r'First</a><a href="(http://www.pepsaga.com/comics/.+?/)"')
help = 'Index format: non'
class PerkiGoth(_BasicScraper):
latestUrl = 'http://mutt.purrsia.com/main.php'
imageUrl = 'http://mutt.purrsia.com/main.php?date=%s'
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
help = 'Index format: mm/dd/yyyy'
class Pixel(_BasicScraper):
latestUrl = 'http://www.chrisdlugosz.net/pixel/'
imageUrl = 'http://www.chrisdlugosz.net/pixel/%s.shtml'
imageSearch = compile(r'<IMG SRC="(\d+\.png)" ALT=""><BR><BR>')
prevSearch = compile(r'<A HREF="(\d+\.shtml)"><IMG SRC="_prev.png" BORDER=0 ALT=""></A>')
help = 'Index format: nnn'
class PiledHigherAndDeeper(_BasicScraper):
starter = bounceStarter('http://www.phdcomics.com/comics/archive.php', compile(r'<a href=(archive\.php\?comicid=\d+)><img height=52 width=49 src=images/next_button\.gif border=0 align=middle>'))
imageUrl = 'http://www.phdcomics.com/comics/archive.php?comicid=%s'
imageSearch = compile(r'<img src=(http://www\.phdcomics\.com/comics/archive/phd\d+s?\.gif)')
prevSearch = compile(r'<a href=(archive\.php\?comicid=\d+)><img height=52 width=49 src=images/prev_button\.gif border=0 align=middle>')
help = 'Index format: n (unpadded)'
namer = queryNamer('comicid', usePageUrl=True)
class Precocious(_BasicScraper):
latestUrl = 'http://www.precociouscomic.com/'
imageUrl = 'http://www.precociouscomic.com/comic.php?page=%s'
imageSearch = compile(r'(archive/strips/.+?)"')
prevSearch = compile(r'First.+?(comic.php\?page=.+?)">Previous<')
help = 'Index format: n (unpadded)'
class PvPonline(_BasicScraper):
latestUrl = 'http://www.pvponline.com/'
imageUrl = None
imageSearch = compile(r'<img src="(http://www.pvponline.com/comics/pvp\d{8}\..+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(http://www.pvponline.com/[^"]+)"[^>]*>&lsaquo; Previous', IGNORECASE)
help = 'Index format: yyyymmdd'
def pensAndTales(name, baseUrl):
return type('PensAndTales_%s' % name,
(_BasicScraper,),
dict(
name='PensAndTales/' + name,
latestUrl=baseUrl,
imageUrl=baseUrl + '?date=',
imageSearch=compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE),
prevSearch=compile(r'<a href="([^"]*?\?date=\d+)">(:?<img[^>]+?alt=")?Previous Comic', IGNORECASE),
help='Index format: yyyymmdd')
)
# XXX: using custom Wordpress layout
# th = pensAndTales('TreasureHunters', 'http://th.pensandtales.com/')
# XXX: comic broken, no content
# strangekith = pensAndTales('Strangekith', 'http://strangekith.pensandtales.com/')
# XXX: comic broken
# fireflycross = pensAndTales('FireflyCross', 'http://fireflycross.pensandtales.com/')
thosedestined = pensAndTales('ThoseDestined', 'http://thosedestined.pensandtales.com/')
evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
redallover = pensAndTales('RedAllOver', 'http://redallover.pensandtales.com/')
stickyevil = pensAndTales('StickyEvil', 'http://stickyevil.pensandtales.com/')
# XXX: moved / layout changed
#ynt = pensAndTales('YamiNoTainai', 'http://ynt.pensandtales.com/')
earthbound = pensAndTales('Earthbound', 'http://earthbound.pensandtales.com/')
class ProperBarn(_BasicScraper):
latestUrl = 'http://www.nitrocosm.com/go/gag/'
imageUrl = 'http://www.nitrocosm.com/go/gag/%s/'
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
help = 'Index format: nnn'
class PunksAndNerds(_BasicScraper):
latestUrl = 'http://www.punksandnerds.com/'
imageUrl = 'http://www.punksandnerds.com/?id=%s/'
imageSearch = compile(r'<img src="(http://www.punksandnerds.com/img/comic/.+?)"')
prevSearch = compile(r'<td><a href="(.+?)"[^>]+?><img src="backcomic.gif"')
help = 'Index format: nnn'
class PunksAndNerdsOld(_BasicScraper):
latestUrl = 'http://original.punksandnerds.com/'
imageUrl = 'http://original.punksandnerds.com/d/%s.html'
imageSearch = compile(r' src="(/comics/.+?)"')
prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">')
help = 'Index format: yyyymmdd'
class PlanescapeSurvival(_BasicScraper):
latestUrl = 'http://planescapecomic.com/'
imageUrl = 'http://planescapecomic.com/%s.html'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]

21
dosagelib/plugins/q.py Normal file
View file

@ -0,0 +1,21 @@
from re import compile
from ..helpers import _BasicScraper
class QuestionableContent(_BasicScraper):
latestUrl = 'http://www.questionablecontent.net/'
imageUrl = 'http://www.questionablecontent.net/view.php?comic=%s'
imageSearch = compile(r'/(comics/\d+\.png)"')
prevSearch = compile(r'<a href="(view.php\?comic=\d+)">Previous')
help = 'Index format: n (unpadded)'
class Qwantz(_BasicScraper):
latestUrl = 'http://www.qwantz.com/index.php'
imageUrl = 'http://www.qwantz.com/index.php?comic=%s'
imageSearch = compile(r'<img src="(http://www.qwantz.com/comics/.+?)" class="comic"')
prevSearch = compile(r'"><a href="(.+?)">&larr; previous</a>')
help = 'Index format: n'

65
dosagelib/plugins/r.py Normal file
View file

@ -0,0 +1,65 @@
from re import compile
from ..helpers import _BasicScraper, bounceStarter
class RadioactivePanda(_BasicScraper):
latestUrl = 'http://www.radioactivepanda.com/'
imageUrl = 'http://www.radioactivepanda.com/comic/%s'
imageSearch = compile(r'<img src="(/Assets/.*?)".+?"comicimg"')
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
help = 'Index format: n (no padding)'
class Rascals(_BasicScraper):
latestUrl = 'http://petitesymphony.com/rascals'
imageUrl = 'http://petitesymphony.com/comic/rascals/%s'
imageSearch = compile(r'(http://petitesymphony.com/comics/.+?)"')
prevSearch = compile(r"KR-nav-previous.><a href=.(http.+?).>")
help = 'Index format: non'
class RealLife(_BasicScraper):
latestUrl = 'http://www.reallifecomics.com/'
imageUrl = 'http://www.reallifecomics.com/achive/%s.html'
imageSearch = compile(r'"(/comics/.+?)"')
prevSearch = compile(r'"(/archive/.+?)".+?nav_previous')
help = 'Index format: yymmdd)'
class RedString(_BasicScraper):
latestUrl = 'http://www.redstring.strawberrycomics.com/'
imageUrl = 'http://www.redstring.strawberrycomics.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.redstring.strawberrycomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">Previous Comic</a>')
help = 'Index format: nnn'
class Roza(_BasicScraper):
latestUrl = 'http://www.junglestudio.com/roza/index.php'
imageUrl = 'http://www.junglestudio.com/roza/index.php\?date=%s'
imageSearch = compile(r'<img src="(pages/.+?)"')
prevSearch = compile(r'<a href="(index.php\?date=.+?)">[^>].+?navtable_01.gif')
help = 'Index format: yyyy-mm-dd'
class RedMeat(_BasicScraper):
starter = bounceStarter('http://www.redmeat.com/redmeat/current/index.html', compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
imageUrl = 'http://www.redmeat.com/redmeat/%s/index.html'
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
help = 'Index format: yyyy-mm-dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-2]
class RunningWild(_BasicScraper):
latestUrl = 'http://runningwild.katbox.net/'
imageUrl = 'http://runningwild.katbox.net/index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
help = 'Index format: n (unpadded)'

341
dosagelib/plugins/s.py Normal file
View file

@ -0,0 +1,341 @@
from re import compile, MULTILINE, IGNORECASE, sub
from os.path import splitext
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
class SailorsunOrg(_BasicScraper):
latestUrl = 'http://www.sailorsun.org/'
imageUrl = 'http://www.sailorsun.org/browse.php?comicID=%s'
imageSearch = compile(r'(comics/.+?)"')
prevSearch = compile(r'/(browse.php.+?)".+?/prev.gif')
help = 'Index format: n (unpadded)'
class SamAndFuzzy(_BasicScraper):
latestUrl = 'http://www.samandfuzzy.com/'
imageUrl = 'http://samandfuzzy.com/%s'
imageSearch = compile(r'(/comics/.+?)" alt')
prevSearch = compile(r'"><a href="(.+?)"><img src="imgint/nav_prev.gif"')
help = 'Index format: nnnn'
class SarahZero(_BasicScraper):
latestUrl = 'http://www.sarahzero.com/'
imageUrl = 'http://www.sarahzero.com/sz_%s.html'
imageSearch = compile(r'<img src="(z_(?:(?:spreads)|(?:temp)).+?)" alt=""')
prevSearch = compile(r'onmouseout="changeImages\(\'sz_05_nav\',\'z_site/sz_05_nav.gif\'\);return true" href="(sz_.+?)">')
help = 'Index format: nnnn'
class ScaryGoRound(_BasicScraper):
latestUrl = 'http://www.scarygoround.com/'
imageUrl = 'http://www.scarygoround.com/?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..{3})"')
prevSearch = compile(r'f><a href="(.+?)"><img src="site-images/previous.png"')
help = 'Index format: n (unpadded)'
class SchoolBites(_BasicScraper):
latestUrl = 'http://www.schoolbites.net/'
imageUrl = 'http://www.schoolbites.net/d/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'first_day.+?(/d/.+?.html).+?/previous_day.gif')
help = 'Index format: yyyymmdd'
class SinFest(_BasicScraper):
name = 'KeenSpot/SinFest'
latestUrl = 'http://www.sinfest.net/'
imageUrl = 'http://www.sinfest.net/archive_page.php?comicID=%s'
imageSearch = compile(r'<img src=".+?(/comikaze/comics/.+?)"')
prevSearch = compile(r'(/archive_page.php\?comicID=.+?)".+?prev_a')
help = 'Index format: n (unpadded)'
class SlightlyDamned(_BasicScraper):
latestUrl = 'http://raizap.com/sdamned/index.php'
imageUrl = 'http://raizap.com/sdamned/pages.php\?comicID=%s'
imageSearch = compile(r'"(.+?comics2/.+?)"')
prevSearch = compile(r'</a>.+?(pages.php\?comicID=.+?)".+?back1')
help = 'Index format: n (unpadded)'
class SluggyFreelance(_BasicScraper):
latestUrl = 'http://www.sluggy.com/'
imageUrl = 'http://www.sluggy.com/comics/archives/daily/%s'
imageSearch = compile(r'<img src="(/images/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?><span class="ui-icon ui-icon-seek-prev">')
help = 'Index format: yymmdd'
class SodiumEyes(_BasicScraper):
imageUrl = 'http://sodiumeyes.com/%s'
imageSearch = compile(r'(/comic/.+?)"')
prevSearch = compile(r'"http://sodiumeyes.com/(.+?/)"><.+?comic-prev')
help = 'Index format: nnn'
starter = indirectStarter('http://sodiumeyes.com/',
compile(r'<a href="http://sodiumeyes.com/(\d\d\d\d.+?/)">'))
class SpareParts(_BasicScraper):
latestUrl = 'http://www.sparepartscomics.com/'
imageUrl = 'http://www.sparepartscomics.com/comics/\\?date=s%'
imageSearch = compile(r'(/comics/2.+?)[" ]')
prevSearch = compile(r'(/comics/.+?|index.php\?.+?)".+?Prev')
help = 'Index format: yyyymmdd'
class Stubble(_BasicScraper):
latestUrl = 'http://www.stubblecomics.com/d/20051230.html'
imageUrl = 'http://www.stubblecomics.com/d/%s.html'
imageSearch = compile(r'"(/comics/.*?)"')
prevSearch = compile(r'"(.*?)".*?backarrow')
help = 'Index format: yyyymmdd'
class StrawberryDeathCake(_BasicScraper):
latestUrl = 'http://rainchildstudios.com/strawberry/'
imageUrl = 'http://rainchildstudios.com/strawberry/?p=%s'
imageSearch = compile(r'/(comics/.+?)"')
prevSearch = compile(r'strawberry/(\?p=.+?)".+?span class="prev"')
help = 'Index format: n (good luck)'
class SuburbanTribe(_BasicScraper):
latestUrl = 'http://www.pixelwhip.com/'
imageUrl = 'http://www.pixelwhip.com/?p%s'
imageSearch = compile(r'<img src="(http://www.pixelwhip.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnnn'
class SuccubusJustice(_BasicScraper):
latestUrl = 'http://www.succubus-justice.com/Com%20main%20frame.htm'
imageUrl = 'http://www.succubus-justice.com/%s%%20frame.htm'
imageSearch = compile(r'<p align="center"><img src="(/\d+.\w{3,4})"')
prevSearch = compile(r'<a href="(/[\w%]+\.htm|[\w%]+\.htm)"[^>]+?><img src="124.gif"')
help = 'Index format: nnn'
class Supafine(_BasicScraper):
latestUrl = 'http://www.supafine.com/comics/classic.php'
imageUrl = 'http://www.supafine.com/comics/classic.php?comicID=%s'
imageSearch = compile(r'<img src="(http://www.supafine.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.supafine.com/comics/classic.php\?.+?)"><img src="http://supafine.com/comikaze/images/previous.gif" ')
help = 'Index format: nnn'
class SomethingPositive(_BasicScraper):
latestUrl = 'http://www.somethingpositive.net/'
imageUrl = 'http://www.somethingpositive.net/sp%s.shtml'
imageSearch = compile(r'<img src="(/arch/sp\d+.\w{3,4}|/sp\d+.\w{3,4})"')
prevSearch = compile(r'<a \n?href="(sp\d{8}\.shtml)">(<font size=1\nface=".+?"\nSTYLE=".+?">Previous|<img src="images2/previous|<img src="images/previous.gif")', MULTILINE | IGNORECASE)
help = 'Index format: mmddyyyy'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
class SexyLosers(_BasicScraper):
imageUrl = 'http://www.sexylosers.com/%s.html'
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter('http://www.sexylosers.com/',
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
@classmethod
def namer(cls, imageUrl, pageUrl):
index = pageUrl.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title
def smackJeeves(names):
class _SJScraper(_BasicScraper):
imageUrl = property(lambda self: self.baseUrl + self.shortName)
imageSearch = compile(r'<img src="(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="< Previous"', IGNORECASE)
help = 'Index format: nnnn (some increasing number)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-2]
def makeScraper(shortName):
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
return type('SmackJeeves_%s' % shortName,
(_SJScraper,),
dict(
name='SmackJeeves/' + shortName,
baseUrl=baseUrl,
starter=bounceStarter(baseUrl, compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="Next >"', IGNORECASE)))
)
return dict((name, makeScraper(name)) for name in names)
globals().update(smackJeeves([
'20galaxies',
'axe13',
'beartholomew',
'bliss',
'durian',
'heard',
'mpmcomic',
'nlmo-project',
'paranoidloyd',
'thatdreamagain',
'wowcomics',
]))
class StarCrossdDestiny(_BasicScraper):
latestUrl = 'http://www.starcrossd.net/comic.html'
imageUrl = 'http://www.starcrossd.net/archives/%s.html'
imageSearch = compile(r'<img src="(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)">')
prevSearch = compile(r'<a href="(http://www\.starcrossd\.net/(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev', IGNORECASE)
help = 'Index format: nnnnnnnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
elif not imageUrl.find('strips') == -1:
imageUrl = imageUrl.replace('strips/','')
directory, filename = imageUrl.split('/')[-2:]
filename, extension = splitext(filename)
return directory + '-' + filename
class SGVY(_BasicScraper):
imageUrl = 'http://www.sgvy.com/Edda%s/Issue%s/Page%s.html'
imageSearch = compile(r'"comic" src="((?:\.\./)+images/sgvy/sgvy-[-\w\d]+\.\w+)"')
prevSearch = compile(r'<a href="((?:\.\./)+(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">Prev</a>')
help = 'Index format: edda-issue-page'
starter = indirectStarter('http://www.sgvy.com/', compile(r'<a href="(archives/(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">'))
def setStrip(self, index):
self.currentUrl = self.imageUrl % tuple(map(int, index.split('-')))
class Spamusement(_BasicScraper):
imageUrl = 'http://spamusement.com/index.php/comics/view/%s'
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(http://spamusement.com/index.php/comics/view/.+?)">', IGNORECASE)
help = 'Index format: n (unpadded)'
starter = indirectStarter('http://spamusement.com/', prevSearch)
def snafuComics():
class _SnafuComics(_BasicScraper):
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
help = 'Index format: n (unpadded)'
@property
def imageUrl(self):
return self.latestUrl + 'index.php?strip_id=%s'
comics = {
'Grim': 'grim',
'KOF': 'kof',
'PowerPuffGirls': 'ppg',
'Snafu': 'www',
'Tin': 'tin',
'TW': 'tw',
'Sugar': 'sugar',
'SF': 'sf',
'Titan': 'titan',
'EA': 'ea',
'Zim': 'zim',
'Soul': 'soul',
'FT': 'ft',
'Bunnywith': 'bunnywith',
'Braindead': 'braindead',
}
url = 'http://%s.snafu-comics.com/'
return dict((name, type('SnafuComics_%s' % name,
(_SnafuComics,),
dict(name='SnafuComics/' + name,
latestUrl=url % host)))
for name, host in comics.items())
globals().update(snafuComics())
class SosiaalisestiRajoittuneet(_BasicScraper):
latestUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php'
imageUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php?date=%s'
imageSearch = compile(r'<img src="(strips/web/\d+.jpg)" alt=".*?" />')
prevSearch = compile(r'<a href="(index_nocomment\.php\?date=\d+)"><img\s+src="images/active_edellinen\.gif"', MULTILINE)
class StrangeCandy(_BasicScraper):
latestUrl = 'http://www.strangecandy.net/'
imageUrl = 'http://www.strangecandy.net/d/%s.html'
imageSearch = compile(r'src="(http://www.strangecandy.net/comics/\d{8}.\w{1,4})"')
prevSearch = compile(r'<a href="(http://www.strangecandy.net/d/\d{8}.html)"><img[^>]+?src="http://www.strangecandy.net/images/previous_day.gif"')
help = 'Index format: yyyyddmm'
class SMBC(_BasicScraper):
latestUrl = 'http://www.smbc-comics.com/'
imageUrl = 'http://www.smbc-comics.com/index.php?db=comics&id=%s'
imageSearch = compile(r'<img src=\'(.+?\d{8}.\w{1,4})\'>')
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
help = 'Index format: nnnn'
class SomethingLikeLife(_BasicScraper):
latestUrl = 'http://www.pulledpunches.com/'
imageUrl = 'http://www.pulledpunches.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
help = 'Index format: nn'
class StickEmUpComics(_BasicScraper):
latestUrl = 'http://stickemupcomics.com/'
imageUrl = 'http://stickemupcomics.com/%s'
imageSearch = compile(r'<img src="(http://stickemupcomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: yyyy/mm/dd/stripname'
class SexDemonBag(_BasicScraper):
latestUrl = 'http://www.sexdemonbag.com/'
imageUrl = 'http://www.sexdemonbag.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.sexdemonbag.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'

141
dosagelib/plugins/t.py Normal file
View file

@ -0,0 +1,141 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, indirectStarter
class TalesOfPylea(_BasicScraper):
latestUrl = 'http://talesofpylea.com/'
imageUrl = 'http://talesofpylea.com/%s/'
imageSearch = compile(r'<img src="(istrip_files/strips/.+?)"')
prevSearch = compile(r' <a href="(.+?)">Back</a>')
help = 'Index format: nnn'
class TheNoob(_BasicScraper):
latestUrl = 'http://www.thenoobcomic.com/index.php'
imageUrl = 'http://www.thenoobcomic.com/index.php?pos=%'
imageSearch = compile(r'<img src="(/headquarters/comics/.+?)"')
prevSearch = compile(r'<a class="comic_nav_previous_button" href="(.+?)"></a>')
help = 'Index format: nnnn'
class TheOrderOfTheStick(_BasicScraper):
latestUrl = 'http://www.giantitp.com/'
imageUrl = 'http://www.giantitp.com/comics/images/%s'
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)'
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"'))
class TheParkingLotIsFull(_BasicScraper):
latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
imageUrl = 'http://plif.courageunfettered.com/archive/wc%s.gif'
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>')
help = 'Index format: nnn'
class TheWotch(_BasicScraper):
latestUrl = 'http://www.thewotch.com/'
imageUrl = 'http://www.thewotch.com/?epDate=%s'
imageSearch = compile(r"<img.+?src='(comics/.+?)'")
prevSearch = compile(r"<link rel='Previous' href='(\?epDate=\d+-\d+-\d+)'")
help = 'Index format: yyyy-mm-dd'
class Thorn(_BasicScraper):
latestUrl = 'http://www.mimisgrotto.com/thorn/index.html'
imageUrl = 'http://www.mimisgrotto.com/thorn/%s.html'
imageSearch = compile(r'"(strips/.+?)"')
prevSearch = compile(r'(\d[\d][\d].html)">Prev')
help = 'Index format: nnn'
class TwoTwoOneFour(_BasicScraper):
latestUrl = 'http://www.nitrocosm.com/go/2214_classic/'
imageUrl = 'http://www.nitrocosm.com/go/2214_classic/%s/'
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
help = 'Index format: n (unpadded)'
class TheWhiteboard(_BasicScraper):
latestUrl = 'http://www.the-whiteboard.com/'
imageUrl = 'http://www.the-whiteboard.com/auto%s.html'
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
prevSearch = compile(r'&nbsp<a href="(.+?)">previous</a>', IGNORECASE)
help = 'Index format: twb or wb + n wg. twb1000'
class _TheFallenAngel(_BasicScraper):
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
help = 'Index format: yyyymmdd'
@property
def baseUrl(self):
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
@property
def imageUrl(self):
return self.baseUrl + '?date=%s'
def starter(self):
return self.baseUrl
class HighMaintenance(_TheFallenAngel):
name = 'TheFallenAngel/HighMaintenance'
shortName = 'hm'
class FAWK(_TheFallenAngel):
name = 'TheFallenAngel/FAWK'
shortName = 'fawk'
class MalloryChan(_TheFallenAngel):
name = 'TheFallenAngel/MalloryChan'
shortName = 'mallorychan'
class HMHigh(_BasicScraper):
name = 'TheFallenAngel/HMHigh'
latestUrl = 'http://www.thefallenangel.co.uk/hmhigh/'
imageUrl = 'http://www.thefallenangel.co.uk/hmhigh/?id=%s'
imageSearch = compile(r'<img src="(http://www.thefallenangel.co.uk/hmhigh/img/comic/.+?)"')
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)" title=".+?">Prev</a>')
help = 'Index format: nnn'
class TheOuterQuarter(_BasicScraper):
latestUrl = 'http://theouterquarter.com/'
imageUrl = 'http://theouterquarter.com/comic/%s'
imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn'
class TheHorrificAdventuresOfFranky(_BasicScraper):
latestUrl = 'http://www.boneyardfranky.com/'
imageUrl = 'http://www.boneyardfranky.com/?p=%s'
imageSearch = compile(r'<img src="(http://www.boneyardfranky.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'

62
dosagelib/plugins/u.py Normal file
View file

@ -0,0 +1,62 @@
from re import compile, IGNORECASE
from ..helpers import _BasicScraper, bounceStarter, indirectStarter
from ..util import getQueryParams
class UnderPower(_BasicScraper):
latestUrl = 'http://underpower.non-essential.com/'
imageUrl = 'http://underpower.non-essential.com/index.php?comic=%s'
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
help = 'Index format: yyyymmdd'
class Undertow(_BasicScraper):
imageUrl = 'http://undertow.dreamshards.org/%s'
imageSearch = compile(r'<img src="(.+?)"')
prevSearch = compile(r'href="(.+?)".+?teynpoint')
help = 'Index format: good luck !'
starter = indirectStarter('http://undertow.dreamshards.org/',
compile(r'href="(.+?)".+?Most recent page'))
class UnicornJelly(_BasicScraper):
latestUrl = 'http://unicornjelly.com/uni666.html'
imageUrl = 'http://unicornjelly.com/uni%s.html'
imageSearch = compile(r'</TABLE>(?:<FONT COLOR="BLACK">)?<IMG SRC="(images/[^"]+)" WIDTH=')
prevSearch = compile(r'<A HREF="(uni\d{3}[bcs]?\.html)">(<FONT COLOR="BLACK">)?<IMG SRC="images/back00\.gif"')
help = 'Index format: nnn'
class UserFriendly(_BasicScraper):
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
imageUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
imageSearch = compile(r'<img border="0" src="(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
help = 'Index format: yyyymmdd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
class UndeadFriend(_BasicScraper):
latestUrl = 'http://www.undeadfriend.com/'
imageUrl = 'http://www.undeadfriend.com/d/%s.html'
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
help = 'Index format: yyyymmdd'
class UnspeakableVault(_BasicScraper):
imageUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
help = 'Index format: nn or nnn'
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
@classmethod
def namer(cls, imageUrl, imageSearch):
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])

279
dosagelib/plugins/uc.py Normal file
View file

@ -0,0 +1,279 @@
from re import compile, IGNORECASE, sub
from ..helpers import _BasicScraper
from ..util import fetchManyMatches, fetchUrl
class _UClickScraper(_BasicScraper):
homepage = 'http://content.uclick.com/a2z.html'
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
imageUrl = property(lambda self: self.latestUrl + '%s/')
imageSearch = compile(r'<img[^>]+src="(http://synd.imgsrv.uclick.com/comics/\w+/\d{4}/[^"]+\.gif)"', IGNORECASE)
prevSearch = compile(r'<a href="(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)">Previous date', IGNORECASE)
help = 'Index format: yyyy/mm/dd'
@classmethod
def starter(cls):
return cls.baseUrl % (cls.shortName,)
@classmethod
def fetchSubmodules(cls):
exclusions = (
'index',
)
submoduleSearch = compile(r'(<A HREF="http://content.uclick.com/content/\w+.html">[^>]+?</a>)', IGNORECASE)
partsMatch = compile(r'<A HREF="http://content.uclick.com/content/(\w+?).html">([^>]+?)</a>', IGNORECASE)
matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
possibles = [partsMatch.match(match).groups() for match in matches]
def normalizeName(name):
name = sub(r'&(.)acute;', r'\1', name).title()
return ''.join([c for c in name if c.isalnum()])
def fetchSubmodule(module):
try:
return fetchUrl(cls.baseUrl % module, cls.imageSearch)
except:
return False
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
def uclick(name, shortName):
return type('UClick_%s' % name,
(_UClickScraper,),
dict(name='UClick/' + name, shortName=shortName))
comics = {
'5thWave': 'fw',
'9To5': 'tmntf',
'AdamHome': 'ad',
'Agnes': 'cragn',
'AlcarazLalo': 'la',
'AlcarazLaloSpanish': 'spla',
'AndersonNick': 'wpnan',
'AndyCapp': 'crcap',
'AnimalCrackers': 'tmani',
'Annie': 'tmann',
'AsayChuck': 'crcas',
'AskShagg': 'crask',
'AuthTony': 'ta',
'BadReporter': 'bad',
'Baldo': 'ba',
'BaldoSpanish': 'be',
'BallardStreet': 'crbal',
'BarkEaterLake': 'bark',
'BarstowDonna': 'dba',
'BC': 'crbc',
'BCSpanish': 'crbcs',
'BeattieBruce': 'crbbe',
'BennetClay': 'wpcbe',
'BensonLisa': 'wplbe',
'BensonSteve': 'crsbe',
'BigTop': 'bt',
'Biographic': 'biov',
'Bleeker': 'blk',
'BobTheSquirrel': 'bob',
'BoilingPoint': 'boil',
'BokChip': 'crcbo',
'BoNanas': 'bon',
'Boomerangs': 'boom',
'BoondocksThe': 'bo',
'BottomLiners': 'tmbot',
'BoundAndGagged': 'tmbou',
'Brainwaves': 'bwv',
'BreenSteve': 'crsbr',
'BrendaStarr': 'tmbre',
'BrewsterRockit': 'tmrkt',
'BrittChris': 'crcbr',
'BroomHilda': 'tmbro',
'Candorville': 'cand',
'CarlsonStuart': 'sc',
'CatalinoKen': 'crkca',
'Cathy': 'ca',
'CathySpanish': 'spca',
'CEstLaVie': 'clv',
'CityThe': 'derf',
'ClearBlueWater': 'cbw',
'Cleats': 'cle',
'CloseToHome': 'cl',
'CombsPaul': 'tmcmb',
'CompuToon': 'tmcom',
'Condorito': 'cond',
'ConradPaul': 'tmpco',
'Cornered': 'co',
'CulDeSac': 'cds',
'DanzigerJeff': 'jd',
'DaviesMatt': 'tmmda',
'DeepCover': 'deep',
'DeeringJohn': 'crjde',
'DickTracy': 'tmdic',
'DinetteSetThe': 'crdin',
'DogEatDoug': 'crdog',
'DonWright': 'tmdow',
'Doodles': 'tmdoo',
'Doonesbury': 'db',
'DuplexThe': 'dp',
'Eek': 'eek',
'ElderberriesThe': 'eld',
'FacesInTheNews': 'kw',
'FlightDeck': 'crfd',
'FloAndFriends': 'crflo',
'FlyingMccoysThe': 'fmc',
'ForBetterOrForWorse': 'fb',
'ForHeavenSSake': 'crfhs',
'FoxtrotClassics': 'ftcl',
'Foxtrot': 'ft',
'FoxtrotSpanish': 'spft',
'FrankAndErnest': 'fa',
'FredBassetSpanish': 'spfba',
'FredBasset': 'tmfba',
'FrogApplause': 'frog',
'FuscoBrothersThe': 'fu',
'Garfield': 'ga',
'GarfieldSpanish': 'gh',
'GasolineAlley': 'tmgas',
'GaturroSpanish': 'spgat',
'GilThorp': 'tmgil',
'GingerMeggs': 'gin',
'GingerMeggsSpanish': 'spgin',
'GirlsAndSports': 'crgis',
'GorrellBob': 'crbgo',
'GoTeamBob': 'gtb',
'HammondBruce': 'hb',
'HandelsmanWalt': 'tmwha',
'HeartOfTheCity': 'hc',
'Heathcliff': 'crhea',
'HeathcliffSpanish': 'crhes',
'HerbAndJamaal': 'crher',
'HigginsJack': 'jh',
'HomeAndAway': 'wphaa',
'HorseyDavid': 'tmdho',
'Housebroken': 'tmhou',
'HubertAndAbby': 'haa',
'IdiotBox': 'ibox',
'ImagineThis': 'imt',
'InkPen': 'ink',
'InTheBleachers': 'bl',
'ItsAllAboutYou': 'wpiay',
'JamesBondSpanish': 'spjb',
'JonesClay': 'crcjo',
'KallaugherKevin': 'cwkal',
'KChroniclesThe': 'kk',
'KelleySteve': 'crske',
'Kudzu': 'tmkud',
'LaCucaracha': 'lc',
'LegendOfBill': 'lob',
'LibertyMeadows': 'crlib',
'Lio': 'lio',
'LittleDogLost': 'wpldl',
'LocherDick': 'tmdlo',
'LooseParts': 'tmloo',
'LostSheep': 'lost',
'LoweChan': 'tmclo',
'LuckovichMike': 'crmlu',
'LuckyCow': 'luc',
'MarkstienGary': 'crgma',
'MarletteDoug': 'tmdma',
'MccoyGlenn': 'gm',
'MeaningOfLilaThe': 'crlil',
'MeehanStreak': 'tmmee',
'MiddletonsThe': 'tmmid',
'MinimumSecurity': 'ms',
'ModestyBlaiseSpanish': 'spmb',
'Momma': 'crmom',
'MorinJim': 'cwjmo',
'MuttJeffSpanish': 'spmut',
'MythTickle': 'myth',
'NAoQV': 'naqv',
'NaturalSelection': 'crns',
'NestHeads': 'cpnst',
'Neurotica': 'neu',
'NonSequitur': 'nq',
'OhmanJack': 'tmjoh',
'OliphantPat': 'po',
'OnAClaireDay': 'crocd',
'OneBigHappy': 'crobh',
'OtherCoastThe': 'crtoc',
'OutOfTheGenePool': 'wpgen',
'Overboard': 'ob',
'OverboardSpanish': 'spob',
'PepeSpanish': 'sppep',
'PettJoel': 'jp',
'Pibgorn': 'pib',
'Pickles': 'wppic',
'Pluggers': 'tmplu',
'PoochCafe': 'poc',
'PoochCafeSpanish': 'sppoc',
'PopCulture': 'pop',
'PowellDwane': 'crdpo',
'Preteena': 'pr',
'PricklyCity': 'prc',
'QuigmansThe': 'tmqui',
'RallComic': 'tr',
'RamirezMicheal': 'crmrm',
'RamseyMarshall': 'crmra',
'RealLifeAdventures': 'rl',
'RedAndRover': 'wpred',
'RedMeat': 'red',
'ReynoldsUnwrapped': 'rw',
'RonaldinhoGaucho': 'ron',
'RonaldinhoGauchoSpanish': 'spron',
'Rubes': 'crrub',
'SackSteve': 'tmssa',
'SargentBen': 'bs',
'SargentBenSpanish': 'spbs',
'SendHelp': 'send',
'ShenemanDrew': 'tmdsh',
'SherffiusDrew': 'crjsh',
'Shoecabbage': 'shcab',
'Shoe': 'tmsho',
'SigmundSpanish': 'spsig',
'Slowpoke': 'slow',
'SmallWorld': 'small',
'SpaceIsThePlace': 'sitp',
'SpeedBump': 'crspe',
'StanisScott': 'crsst',
'StateOfTheUnion': 'crsou',
'StayskalWayne': 'tmwst',
'StoneSoup': 'ss',
'StrangeBrew': 'crstr',
'SummersDana': 'tmdsu',
'SuttonImpact': 'stn',
'Sylvia': 'tmsyl',
'SzepPaul': 'crpsz',
'TankMcnamara': 'tm',
'TeenageMutantNinjaTurtles': 'tmnt',
'TelnaesAnn': 'tmate',
'TheArgyleSweater': 'tas',
'ThePinkPanther': 'tmpnk',
'TheWizardOfId': 'crwiz',
'TheWizardOfIdSpanish': 'crwis',
'ThInk': 'think',
'ThompsonMike': 'crmth',
'ThroughThickAndThin': 'cpthk',
'TinySepuku': 'tiny',
'Toby': 'toby',
'TolesTom': 'tt',
'TomTheDancingBug': 'td',
'TooMuchCoffeeMan': 'tmcm',
'Trevor': 'trev',
'TutelandiaSpanish': 'sptut',
'VarvelGary': 'crgva',
'WassermanDan': 'tmdwa',
'WatchYourHead': 'wpwyh',
'Waylay': 'min',
'WeePals': 'crwee',
'WinnieThePooh': 'crwin',
'WitOfTheWorld': 'cwwit',
'WorkingItOut': 'crwio',
'WriteDon': 'tmdow',
'YennySpanish': 'spyen',
'Yenny': 'yen',
'ZackHill': 'crzhi',
'ZiggySpanish': 'spzi',
'Ziggy': 'zi',
}
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))

36
dosagelib/plugins/v.py Normal file
View file

@ -0,0 +1,36 @@
from re import compile, IGNORECASE, MULTILINE
from ..helpers import _BasicScraper
class _VGCats(_BasicScraper):
latestUrl = 'http://www.vgcats.com/comics/'
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
prevSearch = compile(r'<a href="(\?strip_id=\d+)"><img src="back.gif" border="0"')
help = 'Index format: n (unpadded)'
@property
def imageUrl(self):
return self.latestUrl + '?strip_id=%s'
class Super(_VGCats):
name = 'VGCats/Super'
latestUrl = 'http://www.vgcats.com/super/'
class Adventure(_VGCats):
name = 'VGCats/Adventure'
latestUrl = 'http://www.vgcats.com/ffxi/'
class ViiviJaWagner(_BasicScraper):
latestUrl = 'http://www.hs.fi/viivijawagner/'
imageUrl = 'http://www.hs.fi/viivijawagner/%s'
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen&nbsp;\n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE)
help = 'Index format: shrugs!'

216
dosagelib/plugins/w.py Normal file
View file

@ -0,0 +1,216 @@
from re import compile, IGNORECASE, DOTALL
from ..helpers import _BasicScraper, queryNamer, bounceStarter
class WayfarersMoon(_BasicScraper):
latestUrl = 'http://www.wayfarersmoon.com/'
imageUrl = 'http://www.wayfarersmoon.com/index.php\?page=%s'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
class WhiteNinja(_BasicScraper):
latestUrl = 'http://www.whiteninjacomics.com/comics.shtml'
imageUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhiteNoise(_BasicScraper):
latestUrl = 'http://www.wncomic.com/archive.php'
imageUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First .+?"(archive.+?)".+?top_back')
help = 'Index format: n'
class WhyTheLongFace(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
imageUrl = 'http://www.absurdnotions.org/wtlf%s.html'
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm'
class Wigu(_BasicScraper):
latestUrl = 'http://www.wigu.com/wigu/'
imageUrl = 'http://www.wigu.com/wigu/?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ')
help = 'Index format: yyyymmdd'
class WiguTV(_BasicScraper):
latestUrl = 'http://jjrowland.com/'
imageUrl = 'http://jjrowland.com/archive/%s.html'
imageSearch = compile(r'"(/comics/.+?)"')
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?>&nbsp;')
help = 'Index format: yyyymmdd'
class WotNow(_BasicScraper):
latestUrl = 'http://shadowburn.binmode.com/wotnow/'
imageUrl = 'http://shadowburn.binmode.com/wotnow/comic.php?comic_id=%s'
imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
help = 'Index format: n (unpadded)'
class WorldOfWarcraftEh(_BasicScraper):
latestUrl = 'http://woweh.com/'
imageUrl = 'http://woweh.com/?p='
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
help = 'Index format: non'
class Wulffmorgenthaler(_BasicScraper):
latestUrl = 'http://www.wulffmorgenthaler.com/'
imageUrl = 'http://www.wulffmorgenthaler.com/Default.aspx?id=%s'
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"')
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">')
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)'
namer = queryNamer('stripid')
def webcomicsNation():
class _WebcomicsNation(_BasicScraper):
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
help = 'Index format: nnnn (non-contiguous)'
@property
def imageUrl(self):
return self.baseUrl + '?view=archive&amp;chapter=%s'
comics = {
'AgnesQuill': 'daveroman/agnes/',
'Elvenbaath': 'tdotodot2k/elvenbaath/',
'IrrationalFears': 'uvernon/irrationalfears/',
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
'SaikoAndLavender': 'gc/saiko/',
'MyMuse': 'gc/muse/',
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
'JaxEpoch': 'johngreen/quicken/',
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
'ClownSamurai' : 'qsamurai/clownsamurai/',
}
return dict((name, type('WebcomicsNation_%s' % name,
(_WebcomicsNation,),
dict(name='WebcomicsNation/' + name,
latestUrl='http://www.webcomicsnation.com/' + subpath)))
for name, subpath in comics.items())
globals().update(webcomicsNation())
class WhiteNoise(_BasicScraper):
latestUrl = 'http://www.wncomic.com/archive.php'
imageUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'
class WapsiSquare(_BasicScraper):
latestUrl = 'http://wapsisquare.com/'
imageUrl = 'http://wapsisquare.com/comic/%s'
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: strip-name'
class WrongWay(_BasicScraper):
latestUrl = 'http://www.wrongwaycomics.com/'
imageUrl = 'http://www.wrongwaycomics.com/%s.html'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
help = 'Index format: nnn'
class WeCanSleepTomorrow(_BasicScraper):
latestUrl = 'http://wecansleeptomorrow.com/'
imageUrl = 'http://wecansleeptomorrow.com/2009/12/07/smothered/'
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: yyyy/mm/dd/stripname'
class _WLP(_BasicScraper):
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
help='Index format: nnn'
@property
def baseUrl(self):
return 'http://www.wlpcomics.com/%s' % (self.path,)
@property
def imageUrl(self):
return self.baseUrl + '%s.html'
def namer(self, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
def starter(self):
# XXX: ergh
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
return meth.__get__(self, type(self))()
class ChichiChan(_WLP):
name = 'WLP/ChichiChan'
path = 'adult/chichi/'
class ChocolateMilkMaid(_WLP):
name = 'WLP/ChocolateMilkMaid'
path = 'adult/cm/'
class MaidAttack(_WLP):
name = 'WLP/MaidAttack'
path = 'general/maidattack/'
class ShadowChasers(_WLP):
name = 'WLP/ShadowChasers'
path = 'general/shadowchasers/'
class Stellar(_WLP):
name = 'WLP/Stellar'
path = 'adult/stellar/'
class Wondermark(_BasicScraper):
latestUrl = 'http://wondermark.com'
imageUrl = 'http://wondermark.com/%s/'
imageSearch = compile(r'<img src="(http://wondermark.com/c/.+?)"')
prevSearch = compile(r'<a href="(.+?)" rel="prev">')
help = 'Index format: nnn'

25
dosagelib/plugins/x.py Normal file
View file

@ -0,0 +1,25 @@
from re import compile
from ..helpers import _BasicScraper, bounceStarter
class xkcd(_BasicScraper):
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next'))
imageUrl = 'http://xkcd.com/c%s.html'
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"')
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>&lt; Prev')
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(pageUrl.rstrip('/').split('/')[-1])
name = imageUrl.split('/')[-1].split('.')[0]
return 'c%03d-%s' % (index, name)
class xkcdSpanish(_BasicScraper):
latestUrl = 'http://es.xkcd.com/xkcd-es/'
imageUrl = 'http://es.xkcd.com/xkcd-es/strips/%s/'
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
help = 'Index format: stripname'

27
dosagelib/plugins/y.py Normal file
View file

@ -0,0 +1,27 @@
from re import compile, MULTILINE
from ..helpers import _BasicScraper
class YAFGC(_BasicScraper):
latestUrl = 'http://yafgc.shipsinker.com/'
imageUrl = 'http://yafgc.shipsinker.com/index.php?strip_id=%s'
imageSearch = compile(r'(istrip_.+?)"')
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE)
help = 'Index format: n'
class YouSayItFirst(_BasicScraper):
latestUrl = 'http://www.yousayitfirst.com/'
imageUrl = 'http://www.soapylemon.com/comics/index.php?date=%s'
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]')
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P')
help = 'Index format: yyyymmdd'
class Yirmumah(_BasicScraper):
latestUrl = 'http://yirmumah.net/archives.php'
imageUrl = 'http://yirmumah.net/archives.php?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
help = 'Index format: yyyymmdd'

18
dosagelib/plugins/z.py Normal file
View file

@ -0,0 +1,18 @@
from re import compile
from ..helpers import _BasicScraper
class Zapiro(_BasicScraper):
latestUrl = 'http://www.mg.co.za/zapiro/all'
imageSearch = compile(r'<img src="(cartoons/[^"]+)"')
prevSearch = compile(r'<a href="([^"]+)">&gt;')
class ZombieHunters(_BasicScraper):
latestUrl = 'http://www.thezombiehunters.com/'
imageUrl = 'http://www.thezombiehunters.com/index.php?strip_id=%s'
imageSearch = compile(r'"(.+?strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ')
help = 'Index format: n(unpadded)'

91
dosagelib/progress.py Normal file
View file

@ -0,0 +1,91 @@
from __future__ import division
import sys
import time
from . import util
class Guess(object):
def __init__(self, weight):
self.weight = weight
self.guess = 0
self.best = 0
def feed(self, value):
self.guess = self.weight * value + (1 - self.weight) * self.guess
def distance(self, value):
return (self.guess - value) ** 2
class FortuneTeller(object):
weights = (0.2, 0.3, 0.4)
def __init__(self):
self.guesses = map(Guess, self.weights)
def feed(self, value):
best = min([(guess.distance(value), guess) for guess in self.guesses])[1]
best.best += 1
for guess in self.guesses:
guess.feed(value)
def predict(self):
return max([(guess.best, guess) for guess in self.guesses])[1].guess
class OperationComplete(Exception): pass
def drawBar(fill, total, caption):
screenWidth = util.getWindowSize()
ratio = fill / total
mask = '[%%s>%%s] (%.2f%%%%) %s' % (ratio * 100, caption)
barWidth = screenWidth - len(mask) + 6
fillWidth = int(barWidth * ratio) - 1
emptyWidth = barWidth - fillWidth - 1
sys.stdout.write('\r')
sys.stdout.write(mask % ('=' * fillWidth, '-' * emptyWidth))
sys.stdout.flush()
def drawBounceBar(pos, caption):
screenWidth = util.getWindowSize()
mask = '[%%s<=>%%s] %s' % (caption,)
barWidth = screenWidth - len(mask) + 4
leftWidth = pos % barWidth - 1
rightWidth = barWidth - leftWidth - 1
sys.stdout.write('\r')
sys.stdout.write(mask % (' ' * leftWidth, ' ' * rightWidth))
sys.stdout.flush()
def progressBar(fn):
completed = bps = 0
count = 0
ft = FortuneTeller()
currentTime = lastTime = time.time()
try:
while 1:
inc = 0
while currentTime - lastTime < 0.2:
progress, total = fn()
inc += progress
currentTime = time.time()
ft.feed(inc / (currentTime - lastTime))
lastTime = currentTime
completed += inc
bps = ft.predict()
if total == 0:
drawBounceBar(count, '%s/sec' % util.saneDataSize(bps))
count += 1
else:
drawBar(completed, max(total, completed), '%s/sec' % util.saneDataSize(bps))
except OperationComplete:
if count > 0:
drawBounceBar(count, '%s/sec' % util.saneDataSize(bps))
else:
drawBar(max(total, completed), max(total, completed), '%s/sec' % util.saneDataSize(bps))
print ''

80
dosagelib/rss.py Normal file
View file

@ -0,0 +1,80 @@
# TODO: Not sure if this RSS output is "valid", should be though.
# Might also be nice categorise Comics under one Item
import xml.dom.minidom
import time
class Feed(object):
def __init__(self, title, link, description, lang='en-us'):
self.rss = xml.dom.minidom.Document()
rss_root = self.rss.appendChild(self.rss.createElement('rss'))
rss_root.setAttribute('version', '2.0')
self.channel = rss_root.appendChild(self.rss.createElement('channel'))
self.addElement(self.channel, 'title', title)
self.addElement(self.channel, 'link', link)
self.addElement(self.channel, 'language', lang)
self.addElement(self.channel, 'description', description)
def RFC822Date(data):
return time.strftime('%a, %d %b %Y %H:%M:%S GMT', data)
def addElement(self, parent, tag, value):
return parent.appendChild(self.rss.createElement(tag)).appendChild(self.rss.createTextNode(value))
def insertHead(self, title, link, description, date):
item = self.rss.createElement('item')
self.addElement(item, 'title', title)
self.addElement(item, 'link', link)
self.addElement(item, 'description', description)
self.addElement(item, 'pubDate', date)
elems = self.rss.getElementsByTagName('item')
if elems:
self.channel.insertBefore(item, elems[0])
else:
self.channel.appendChild(item)
def addItem(self, title, link, description, date):
item = self.rss.createElement('item')
self.addElement(item, 'title', title)
self.addElement(item, 'link', link)
self.addElement(item, 'description', description)
self.addElement(item, 'pubDate', date)
self.channel.appendChild(item)
def write(self, path):
file = open(path, 'w')
file.write(self.getXML())
file.close()
def getXML(self):
return self.rss.toxml()
def parseFeed(filename, yesterday):
dom = xml.dom.minidom.parse(filename)
getText = lambda node, tag: node.getElementsByTagName(tag)[0].childNodes[0].data
getNode = lambda tag: dom.getElementsByTagName(tag)
content = getNode('channel')[0] # Only one channel node
feedTitle = getText(content, 'title')
feedLink = getText(content, 'link')
feedDesc = getText(content, 'description')
feed = Feed(feedTitle, feedLink, feedDesc)
for item in getNode('item'):
itemDate = time.strptime(getText(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S GMT')
if (itemDate > yesterday): # If newer than yesterday
feed.addItem(getText(item, 'title'),
getText(item, 'link'),
getText(item, 'description'),
getText(item, 'pubDate'))
return feed

141
dosagelib/scraper.py Normal file
View file

@ -0,0 +1,141 @@
import os
import sys
from .helpers import _BasicScraper
disabled = []
def init_disabled():
filename = os.path.expanduser('~/.dosage/disabled')
if not os.path.isfile(filename):
return
with open(filename) as f:
for line in f:
if line and not line.startswith('#'):
disabled.append(line.rstrip())
init_disabled()
class DisabledComicError(ValueError):
pass
def get(comicName):
"""Returns a comic module object."""
candidates = []
for scraper in get_scrapers():
lname = scraper.get_name().lower()
cname = comicName.lower()
if lname == cname:
# perfect match
return scraper
if cname in lname:
candidates.append(scraper)
if len(candidates) == 1:
return candidates[0]
elif candidates:
comics = ", ".join(x.get_name() for x in candidates)
raise ValueError('Multiple comics %s found.' % comics)
else:
raise ValueError('Comic %r not found.' % comicName)
def items():
return get_scrapers()
_scrapers = None
def get_scrapers():
"""Find all comic scraper classes in the plugins directory.
The result is cached.
@return: list of _BasicScraper classes
@rtype: list of _BasicScraper
"""
global _scrapers
if _scrapers is None:
_scrapers = list(get_all_plugins(get_modules()))
_scrapers.sort(key=lambda s: s.get_name())
check_scrapers()
return _scrapers
def check_scrapers():
d = {}
for s in _scrapers:
name = s.get_name().lower()
if name in d:
name1 = s.get_name()
name2 = d[name].get_name()
raise ValueError('Duplicate scrapers %s and %s found' % (name1, name2))
d[name] = s
def get_modules():
"""Find all valid modules in the plugins directory. A valid module
must have a .py extension, and is importable.
@return: all loaded valid modules
@rtype: iterator of module
"""
# load from the plugins folder
folder = os.path.join(os.path.dirname(__file__), 'plugins')
for filename in get_importable_modules(folder):
try:
module = load_module(filename)
if module is not None:
yield module
except StandardError, msg:
print "ERROR", msg
def get_importable_modules(folder):
"""Find all module files in the given folder that end witn '.py' and
don't start with an underscore.
@return module filenames
@rtype: iterator of string
"""
for fname in os.listdir(folder):
if fname.endswith('.py') and not fname.startswith('_'):
yield os.path.join(folder, fname)
def load_module(filename):
"""Load and return the module given by the filename.
Other exceptions than ImportError are not catched.
@return: loaded module or None on import errors
@rtype: module or None
"""
name = os.path.splitext(os.path.basename(filename))[0]
modulename = "dosagelib.plugins.%s" % name
__import__(modulename)
return sys.modules[modulename]
def get_all_plugins(modules):
"""Find all scrapers in all modules.
@param modules: the modules to search
@ptype modules: iterator of modules
@return: found scrapers
@rytpe: iterator of class objects
"""
for module in modules:
for plugin in get_plugins(module):
yield plugin
def get_plugins(module):
"""Return all subclasses of _BasicScraper in the module.
If the module defines __all__, only those entries will be searched,
otherwise all objects not starting with '_' will be searched.
"""
try:
names = module.__all__
except AttributeError:
names = [x for x in vars(module) if not x.startswith('_')]
for name in names:
try:
obj = getattr(module, name)
except AttributeError:
continue
try:
if issubclass(obj, _BasicScraper):
yield obj
except TypeError:
continue

310
dosagelib/util.py Normal file
View file

@ -0,0 +1,310 @@
from __future__ import division
import urllib2, urlparse
import sys
import struct
import array
import os
import cgi
import re
import traceback
import time
from htmlentitydefs import name2codepoint
from math import log, floor
from .output import out
from .configuration import UserAgent, AppName, App, SupportUrl
class NoMatchError(Exception): pass
def getMatchValues(matches):
return set([match.group(1) for match in matches])
def fetchManyMatches(url, regexes):
'''Returns a list containing lists of matches for each regular expression, in the same order.'''
out.write('Matching regex(es) %r multiple times against %s...' % ([rex.pattern for rex in regexes], url), 2)
page = urlopen(url)
data = page.read()
matches = [getMatchValues(regex.finditer(data)) for regex in regexes]
if matches:
out.write('...found %r' % (matches,), 2)
else:
out.write('...not found!', 2)
return list(matches)
def fetchMatches(url, regexes):
out.write('Matching regex(es) %r against %s...' % ([rex.pattern for rex in regexes], url), 2)
page = urlopen(url)
data = page.read()
matches = []
for regex in regexes:
match = regex.search(data)
if match:
matches.append(match.group(1))
if matches:
out.write('...found %r' % (matches,), 2)
else:
out.write('...not found!', 2)
return matches
def fetchMatch(url, regex):
matches = fetchMatches(url, (regex,))
if matches:
return matches[0]
return None
def fetchUrl(url, regex):
match = fetchMatch(url, regex)
if match:
return urlparse.urljoin(url, match)
return None
baseSearch = re.compile(r'<base\s+href="([^"]*)"\s+/?>', re.IGNORECASE)
def fetchUrls(url, regexes):
matches = fetchMatches(url, [baseSearch] + list(regexes))
baseUrl = matches.pop(0) or url
return [urlparse.urljoin(baseUrl, match) for match in matches]
def fetchManyUrls(url, regexes):
matchGroups = fetchManyMatches(url, [baseSearch] + list(regexes))
baseUrl = matchGroups.pop(0) or [url]
baseUrl = baseUrl[0]
xformedGroups = []
for matchGroup in matchGroups:
xformedGroups.append([urlparse.urljoin(baseUrl, match) for match in matchGroup])
return xformedGroups
def _unescape(text):
"""
Replace HTML entities and character references.
"""
def _fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
text = unichr(int(text[3:-1], 16))
else:
text = unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(name2codepoint[text[1:-1]])
except KeyError:
pass
if isinstance(text, unicode):
text = text.encode('utf-8')
text = urllib2.quote(text, safe=';/?:@&=+$,')
return text
return re.sub("&#?\w+;", _fixup, text)
def normaliseURL(url):
"""
Removes any leading empty segments to avoid breaking urllib2; also replaces
HTML entities and character references.
"""
# XXX: brutal hack
url = _unescape(url)
url = url.replace(' ', '%20')
pu = list(urlparse.urlparse(url))
segments = pu[2].replace(' ', '%20').split('/')
while segments and segments[0] == '':
del segments[0]
pu[2] = '/' + '/'.join(segments)
return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=5):
# Work around urllib2 brokenness
url = normaliseURL(url)
req = urllib2.Request(url)
if referrer:
req.add_header('Referrer', referrer)
req.add_header('Referer', referrer)
req.add_header('User-Agent', UserAgent)
tries = 0
while 1:
try:
urlobj = urllib2.urlopen(req)
break
except IOError:
out.write('URL retrieval failed, sleeping %d seconds and retrying (%d)' % (2**tries, tries), 2)
time.sleep(2**tries)
tries += 1
if tries >= retries:
raise
return urlobj
def getWindowSize():
try:
from fcntl import ioctl
from termios import TIOCGWINSZ
except ImportError:
raise NotImplementedError
st = 'HHHH'
names = 'ws_row', 'ws_col', 'ws_xpixel', 'ws_ypixel'
buf = array.array('b', ' ' * struct.calcsize(st))
try:
ioctl(sys.stderr, TIOCGWINSZ, buf, True)
except IOError:
raise NotImplementedError
winsize = dict(zip(names, struct.unpack(st, buf.tostring())))
return winsize['ws_col']
suffixes = ('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
def saneDataSize(size):
if size == 0:
return 'unk B'
index = int(floor(log(abs(size), 1024)))
index = min(index, len(suffixes) - 1)
index = max(index, 0)
factor = 1024 ** index
return '%0.3f %s' % (float(size) / factor, suffixes[index])
def splitpath(path):
c = []
head, tail = os.path.split(path)
while tail:
c.insert(0, tail)
head, tail = os.path.split(head)
return c
def getRelativePath(basepath, path):
basepath = splitpath(os.path.abspath(basepath))
path = splitpath(os.path.abspath(path))
afterCommon = False
for c in basepath:
if afterCommon or path[0] != c:
path.insert(0, os.path.pardir)
afterCommon = True
else:
del path[0]
return os.path.join(*path)
def getQueryParams(url):
query = urlparse.urlsplit(url)[3]
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3)
return cgi.parse_qs(query)
def internal_error(out=sys.stderr, etype=None, evalue=None, tb=None):
"""Print internal error message (output defaults to stderr)."""
print >> out, os.linesep
print >> out, """********** Oops, I did it again. *************
You have found an internal error in %(app)s. Please write a bug report
at %(url)s and include the following information:
- your commandline arguments and any configuration file in ~/.dosage/
- the system information below
Not disclosing some of the information above due to privacy reasons is ok.
I will try to help you nonetheless, but you have to give me something
I can work with ;) .
""" % dict(app=AppName, url=SupportUrl)
if etype is None:
etype = sys.exc_info()[0]
if evalue is None:
evalue = sys.exc_info()[1]
print >> out, etype, evalue
if tb is None:
tb = sys.exc_info()[2]
traceback.print_exception(etype, evalue, tb, None, out)
print_app_info(out=out)
print_proxy_info(out=out)
print_locale_info(out=out)
print >> out, os.linesep, \
"******** %s internal error, over and out ********" % AppName
def print_env_info(key, out=sys.stderr):
"""If given environment key is defined, print it out."""
value = os.getenv(key)
if value is not None:
print >> out, key, "=", repr(value)
def print_proxy_info(out=sys.stderr):
"""Print proxy info."""
print_env_info("http_proxy", out=out)
def print_locale_info(out=sys.stderr):
"""Print locale info."""
for key in ("LANGUAGE", "LC_ALL", "LC_CTYPE", "LANG"):
print_env_info(key, out=out)
def print_app_info(out=sys.stderr):
"""Print system and application info (output defaults to stderr)."""
print >> out, "System info:"
print >> out, App
print >> out, "Python %(version)s on %(platform)s" % \
{"version": sys.version, "platform": sys.platform}
stime = strtime(time.time())
print >> out, "Local time:", stime
def strtime(t):
"""Return ISO 8601 formatted time."""
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
strtimezone()
def strtimezone():
"""Return timezone info, %z on some platforms, but not supported on all.
"""
if time.daylight:
zone = time.altzone
else:
zone = time.timezone
return "%+04d" % (-zone//3600)
def tagre(tag, attribute, value):
"""Return a regular expression matching the given HTML tag, attribute
and value. It matches the tag and attribute names case insensitive,
and skips arbitrary whitespace and leading HTML attributes.
Also, it adds a match group for the value.
@param tag: the tag name
@ptype tag: string
@param attribute: the attribute name
@ptype attribute: string
@param value: the attribute value
@ptype value: string
@return: the generated regular expression suitable for re.compile()
@rtype: string
"""
attrs = dict(
tag=case_insensitive_re(tag),
attribute=case_insensitive_re(attribute),
value=value,
)
return r'<\s*%(tag)s[^>]*\s+%(attribute)s\s*=\s*"(%(value)s)"' % attrs
def case_insensitive_re(name):
"""Reformat the given name to a case insensitive regular expression string
without using re.IGNORECASE. This way selective strings can be made case
insensitive.
@param name: the name to make case insensitive
@ptype name: string
@return: the case insenstive regex
@rtype: string
"""
return "".join("[%s%s]" % (c.lower(), c.upper()) for c in name)

189
setup.py Normal file
View file

@ -0,0 +1,189 @@
#!/usr/bin/env python
# Dosage, the webcomic downloader
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import os
import sys
import re
from distutils.core import setup, Distribution
from distutils.command.install_lib import install_lib
from distutils import util
from distutils.file_util import write_file
AppVersion = '1.7'
AppName = 'Dosage'
def normpath (path):
"""Norm a path name to platform specific notation."""
return os.path.normpath(path)
def cnormpath (path):
"""Norm a path name to platform specific notation and make it absolute."""
path = normpath(path)
if os.name == 'nt':
# replace slashes with backslashes
path = path.replace("/", "\\")
if not os.path.isabs(path):
path = normpath(os.path.join(sys.prefix, path))
return path
release_ro = re.compile(r"\(released (.+)\)")
def get_release_date ():
"""Parse and return relase date as string from doc/changelog.txt."""
fname = os.path.join("doc", "changelog.txt")
release_date = "unknown"
with open(fname) as fd:
# the release date is on the first line
line = fd.readline()
mo = release_ro.search(line)
if mo:
release_date = mo.groups(1)
return release_date
class MyInstallLib (install_lib, object):
"""Custom library installation."""
def install (self):
"""Install the generated config file."""
outs = super(MyInstallLib, self).install()
infile = self.create_conf_file()
outfile = os.path.join(self.install_dir, os.path.basename(infile))
self.copy_file(infile, outfile)
outs.append(outfile)
return outs
def create_conf_file (self):
"""Create configuration file."""
cmd_obj = self.distribution.get_command_obj("install")
cmd_obj.ensure_finalized()
# we have to write a configuration file because we need the
# <install_data> directory (and other stuff like author, url, ...)
# all paths are made absolute by cnormpath()
data = []
for d in ['purelib', 'platlib', 'lib', 'headers', 'scripts', 'data']:
attr = 'install_%s' % d
if cmd_obj.root:
# cut off root path prefix
cutoff = len(cmd_obj.root)
# don't strip the path separator
if cmd_obj.root.endswith(os.sep):
cutoff -= 1
val = getattr(cmd_obj, attr)[cutoff:]
else:
val = getattr(cmd_obj, attr)
if attr == 'install_data':
cdir = os.path.join(val, "share", "dosage")
data.append('config_dir = %r' % cnormpath(cdir))
elif attr == 'install_lib':
if cmd_obj.root:
_drive, tail = os.path.splitdrive(val)
if tail.startswith(os.sep):
tail = tail[1:]
self.install_lib = os.path.join(cmd_obj.root, tail)
else:
self.install_lib = val
data.append("%s = %r" % (attr, cnormpath(val)))
self.distribution.create_conf_file(data, directory=self.install_lib)
return self.get_conf_output()
def get_conf_output (self):
return self.distribution.get_conf_filename(self.install_lib)
def get_outputs (self):
"""Add the generated config file to the list of outputs."""
outs = super(MyInstallLib, self).get_outputs()
outs.append(self.get_conf_output())
return outs
class MyDistribution (Distribution, object):
"""Custom distribution class generating config file."""
def __init__ (self, attrs):
"""Set console and windows scripts."""
super(MyDistribution, self).__init__(attrs)
self.console = ['dosage']
def run_commands (self):
"""Generate config file and run commands."""
cwd = os.getcwd()
data = []
data.append('config_dir = %r' % os.path.join(cwd, "config"))
data.append("install_data = %r" % cwd)
data.append("install_scripts = %r" % cwd)
self.create_conf_file(data)
super(MyDistribution, self).run_commands()
def get_conf_filename (self, directory):
"""Get name for config file."""
return os.path.join(directory, "_%s_configdata.py" % self.get_name())
def create_conf_file (self, data, directory=None):
"""Create local config file from given data (list of lines) in
the directory (or current directory if not given)."""
data.insert(0, "# this file is automatically created by setup.py")
data.insert(0, "# -*- coding: iso-8859-1 -*-")
if directory is None:
directory = os.getcwd()
filename = self.get_conf_filename(directory)
# add metadata
metanames = ("name", "version", "author", "author_email",
"maintainer", "maintainer_email", "url",
"license", "description", "long_description",
"keywords", "platforms", "fullname", "contact",
"contact_email")
for name in metanames:
method = "get_" + name
val = getattr(self.metadata, method)()
if isinstance(val, str):
val = unicode(val)
cmd = "%s = %r" % (name, val)
data.append(cmd)
data.append('release_date = "%s"' % get_release_date())
# write the config file
util.execute(write_file, (filename, data),
"creating %s" % filename, self.verbose >= 1, self.dry_run)
args = dict(
name = AppName,
version = AppVersion,
description = 'a powerful webcomic downloader and archiver',
author = 'Tristan Seligmann, Jonathan Jacobs, Bastian Kleineidam',
author_email = 'calvin@users.sourceforge.net',
maintainer = 'Bastian Kleineidam',
maintainer_email = 'calvin@users.sourceforge.net',
license = 'MIT',
url = 'https://github.com/wummel/dosage',
packages = (
'dosagelib',
'dosagelib.plugins',
),
scripts = (
'dosage',
),
distclass = MyDistribution,
cmdclass = {
'install_lib': MyInstallLib,
},
)
if __name__ == '__main__':
setup(**args)

0
tests/__init__.py Normal file
View file

48
tests/test_comics.py Normal file
View file

@ -0,0 +1,48 @@
import tempfile
import shutil
from itertools import izip
from unittest import TestCase
from dosagelib import scraper
class _ComicTester(TestCase):
"""Basic comic test class."""
scraperclass=None
def test_comic(self):
# Test a scraper. It must be able to traverse backward for
# at least 5 pages from the start, and find strip images
# on at least 4 pages.
module = self.scraperclass()
num = empty = 0
for n, comics in izip(xrange(5), module):
if len(comics) == 0:
empty += 1
for comic in comics:
self.save(comic)
num += 1
self.assertTrue(num >= 4, 'Traversal failed after %d strips.' % num)
self.assertTrue(empty <= 1, 'Failed to find images on %d pages.' % empty)
def save(self, comic):
# create a temporary directory
tmpdir = tempfile.mkdtemp()
try:
filename, saved = comic.save(tmpdir)
self.assertTrue(saved, 'Could not save comic %s to %s' % (comic, tmpdir))
finally:
shutil.rmtree(tmpdir)
def generate_comic_testers():
"""For each comic scraper, create a test class.
This currently generates over 4000 test classes (one for each comic),
so this takes a while."""
for s in scraper.items():
name = 'Test'+s.__name__
globals()[name] = type(name,
(_ComicTester,),
dict(scraperclass=s)
)
generate_comic_testers()

82
tests/test_util.py Normal file
View file

@ -0,0 +1,82 @@
import re
from unittest import TestCase
from dosagelib.util import saneDataSize, normaliseURL, _unescape, tagre
class SizeFormattingTest(TestCase):
"""
Unit tests for L{saneDataSize}.
"""
def check(self, size, expectedOutput):
# Check that a particular size is formatted as expected; in particular, a
# negative size should be formatted the same as a positive size, except
# with a minus sign in front.
self.assertEqual(saneDataSize(size), expectedOutput)
self.assertEqual(saneDataSize(-size), '-' + expectedOutput)
def test_verySmallSize(self):
# Sizes smaller than a single byte should be formatted as bytes; this
# case is fairly pathological, so the output is somewhat nonsensical.
self.check(0.1, '0.100 B')
def test_normalSizes(self):
# Sizes should be formatted in the largest unit for which the size will
# not be less than a single unit.
self.check(1, '1.000 B')
self.check(2.075 * 2 ** 10, '2.075 kB')
self.check(5.88 * 2 ** 20, '5.880 MB')
self.check(13.34 * 2 ** 30, '13.340 GB')
self.check(445.348 * 2 ** 40, '445.348 TB')
self.check(34.25 * 2 ** 50, '34.250 PB')
self.check(3.14 * 2 ** 60, '3.140 EB')
self.check(57.892 * 2 ** 70, '57.892 ZB')
self.check(999.99 * 2 ** 80, '999.990 YB')
def test_veryLargeSize(self):
# Sizes larger than 1024 yottabytes should be formatted as yottabytes.
self.check(5567254 * 2 ** 80, '5567254.000 YB')
class URLTest(TestCase):
"""
Tests for URL utility functions.
"""
def test_unescape(self):
# Test HTML replacement.
self.assertEqual(_unescape('foo&amp;bar'), 'foo&bar')
self.assertEqual(_unescape('foo&#160;bar'), 'foo%C2%A0bar')
self.assertEqual(_unescape('&quot;foo&quot;'), '%22foo%22')
def test_normalisation(self):
# Test URL normalisation.
self.assertEqual(normaliseURL('http://example.com//bar/baz&amp;baz'),
'http://example.com/bar/baz&baz')
class RegexTest(TestCase):
ValuePrefix = '/bla/'
TagTests = (
('<img src="%s">', ValuePrefix+'foo', True),
('< img src = "%s" >', ValuePrefix, True),
('<img class="prev" src="%s">', ValuePrefix+'...', True),
('<img origsrc="%s">', ValuePrefix, False),
('<Img src="%s">', ValuePrefix, True),
('<img SrC="%s">', ValuePrefix, True),
('<img src="%s">', ValuePrefix[:-1], False),
)
def test_regex(self):
matcher = re.compile(tagre("img", "src", self.ValuePrefix+".*"))
for tag, value, domatch in self.TagTests:
self.match_tag(matcher, tag, value, domatch)
def match_tag(self, matcher, tag, value, domatch=True):
match = matcher.match(tag % value)
if domatch:
self.assertTrue(match)
self.assertEqual(match.group(1), value)
else:
self.assertFalse(match)