commit f91fb80a39edb4d484e85106da2f6f607dd64e84 Author: Bastian Kleineidam Date: Wed Jun 20 21:58:13 2012 +0200 Initial commit to Github. diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..bbd213dd8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.orig +*.pyc +*.pyo +/Comics +/build +/dist +/.achievements +/MANIFEST +/todo +/Changelog.patool* +/_Dosage_configdata.py +/comics.test diff --git a/COPYING b/COPYING new file mode 100644 index 000000000..1bdab18b2 --- /dev/null +++ b/COPYING @@ -0,0 +1,20 @@ +Copyright © 2004-2008 Jonathan Jacobs and Tristan Seligmann + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..7c3b6831c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include MANIFEST.in +include COPYING doc/*.txt +include Makefile +recursive-include tests *.py diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..1f1ad357b --- /dev/null +++ b/Makefile @@ -0,0 +1,88 @@ +# This Makefile is only used by developers. +PYVER:=2.7 +PYTHON:=python$(PYVER) +VERSION:=$(shell $(PYTHON) setup.py --version) +ARCHIVE:=dosage-$(VERSION).tar.gz +PY_FILES_DIRS := dosage dosagelib tests *.py +PY2APPOPTS ?= +NOSETESTS:=$(shell which nosetests) +NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo) +CHMODMINUSMINUS:=-- +# which test modules to run +TESTS ?= tests/ +# set test options, eg. to "--nologcapture" +TESTOPTS= + +all: + + +.PHONY: chmod +chmod: + -chmod -R a+rX,u+w,go-w $(CHMODMINUSMINUS) * + find . -type d -exec chmod 755 {} \; + +.PHONY: dist +dist: + git archive --format=tar --prefix=dosage-$(VERSION)/ HEAD | gzip -9 > ../$(ARCHIVE) + [ -f ../$(ARCHIVE).sha1 ] || sha1sum ../$(ARCHIVE) > ../$(ARCHIVE).sha1 + [ -f ../$(ARCHIVE).asc ] || gpg --detach-sign --armor ../$(ARCHIVE) + +doc/dosage.1.html: doc/dosage.1 + man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@ + +.PHONY: release +release: distclean releasecheck dist + git tag v$(VERSION) +# @echo "Register at Python Package Index..." +# $(PYTHON) setup.py register +# freecode-submit < dosage.freecode + + +.PHONY: releasecheck +releasecheck: check test + @if egrep -i "xx\.|xxxx|\.xx" doc/changelog.txt > /dev/null; then \ + echo "Could not release: edit doc/changelog.txt release date"; false; \ + fi +# @if ! grep "Version: $(VERSION)" dosage.freecode > /dev/null; then \ +# echo "Could not release: edit dosage.freecode version"; false; \ +# fi + +# The check programs used here are mostly local scripts on my private system. +# So for other developers there is no need to execute this target. +.PHONY: check +check: + [ ! -d .svn ] || check-nosvneolstyle -v + check-copyright + check-pofiles -v + py-tabdaddy + py-unittest2-compat tests/ + +.PHONY: pyflakes +pyflakes: + pyflakes $(PY_FILES_DIRS) + +.PHONY: count +count: + @sloccount dosage dosagelib | grep "Total Physical Source Lines of Code" + +.PHONY: clean +clean: + find . -name \*.pyc -delete + find . -name \*.pyo -delete + rm -rf build dist + +PHONY: distclean +distclean: clean + rm -rf build dist Dosage.egg-info + rm -f _Dosage_configdata.py MANIFEST + +.PHONY: test +test: + $(PYTHON) $(NOSETESTS) -v --processes=$(NUMPROCESSORS) -m "^test_.*" $(TESTOPTS) $(TESTS) + +.PHONY: deb +deb: + git-buildpackage --git-export-dir=../build-area/ --git-upstream-branch=master --git-debian-branch=debian --git-ignore-new + +comics: + ./dosage -v @@ > comics.log 2>&1 diff --git a/README.md b/README.md new file mode 120000 index 000000000..253de0ce1 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +doc/README.txt \ No newline at end of file diff --git a/doc/README.txt b/doc/README.txt new file mode 100644 index 000000000..a2888c9de --- /dev/null +++ b/doc/README.txt @@ -0,0 +1,93 @@ +Dosage +======= + +Dosage is a powerful webcomic downloader and archiver. + +Introduction +------------- +Dosage is designed to keep a local copy of specific webcomics +and other picture-based content such as Picture of the Day sites. +With the dosage commandline script you can get the latest strip of +webcomic, or catch-up to the last strip downloaded, or download a +strip for a particular date/index (except if the webcomic's site layout +makes this impossible). + +Notice +------- +This software is in no way intended to publically "broadcast" comic strips, +it is purely for personal use. Please be aware that by making these strips +publically available (without the explicit permission of the author) you +may be infringing upon various copyrights. + +Usage +------ +List available comics (over 4000 at the moment): +`$ dosage -l` + +Get the latest comic of for example CalvinAndHobbes and save it in the "Comics" +directory: +`$ dosage CalvinAndHobbes` + +If you already have downloaded several comics and want to get the latest +strip of all of them: +`$ dosage @` + +For advanced options and features execute dosage -h or look at the dosage +manual page. + +Offensive comics +----------------- +There are some comics supported by Dosage that may be offensive to readers or +to others that have access to the downloaded images. +SexyLosers is one module that has been discussed. Dosage offers a mechanism +to disable such modules. Modules listed in "/etc/dosage/disabled" and +"~/.dosage/disabled" will be disabled. These files should contain only one +module name per line. Note: Under Windows "~" will also expand to the user's +home directory, usually "C:\Documents and Settings\UserName". + +Dependencies +------------- +Dosage is requires Python version 2.5 or higher, which can be downloaded +from http://www.python.org. +No external Python modules are required - only the Python Standard Library +that gets installed with Python. + +Installation +------------- +You can invoke Dosage directly from the source code as "./dosage". Alternatively, +you can install Dosage using python distutils by invoking setup.py in +the root of the distribution. For example: + +`python setup.py install` + +or if you do not have root permissions: + +`python setup.py install --home=$HOME` + +Technical Description +---------------------- +Dosage is written entirely in Python and relies on regular expressions to +do most of the grunt work. + +For each webcomic Dosage has a plugin module, found in the "plugins" +subdirectory of the dosagelib directory. Each module is a subclass of +the _BasicComic class and specifies where to download its comic images. +Some comic syndicates (ucomics for example) have a standard layout for all +comics. For such cases there are general base classes derived from _BasicComic +which help define the plugins for all comics of this syndicate. + +Extending Dosage +----------------- +In order to add a new webcoming, a new module class has to be created in one of the +*.py files in the dosagelib/plugins subdirectory. Look at the existing +module classes for examples. + +Reporting Bugs +--------------- +You can report bugs, patches or requests at the Github issue tracker at +https://github.com/wummel/dosage/issues + +Dosage currently supports a large number of comics and that number grows on +a regular basis. If you feel that there are comics that Dosage does not +currently support but should support, please feel free to request them. + diff --git a/doc/changelog.txt b/doc/changelog.txt new file mode 100644 index 000000000..63b1f1b91 --- /dev/null +++ b/doc/changelog.txt @@ -0,0 +1,647 @@ +Dosage 1.7 (released xx.xx.2012) + +Features: +- cmdline: Added proper return codes for error conditions. +- comics: Added more robust regular expressions for HTML tags. + They match case insensitive and ignore whitespaces now. + +Changes: +- installation: Added support for dynamic configuration values. +- comics: Removed the twisted and zope dependencies by adding + an internal plugin search mechanism. +- testing: Refactored the test comic routine in proper unit tests. + +Fixes: +- comics: Adjusted Xkcd href values. +- comics: Don't add empty URLs to the list of found URLs. + + +Dosage v.1.6.0: + * The "Not Dead Yet" release. + * Added / Fixed / etc. comics: + - Too many to list, really. + * New dependencies: + - Twisted + - zope.interface (not zope) + * Revamped plugin system, the first step on the road to Twisted. + +Dosage v.1.5.8: + * Added comics: + - BonoboConspiracy + - ChasingTheSunset + - Comedity + - GoneWithTheBlastwave + - KeenSpot/* -- a *LOT* of KeenSpot submodules + - NichtLustig + - OtenbaFiles + - Wulffmorgenthaler + - Y + * Fixed comics: + - AbstractGender + - AlienLovesPredator + - AppleGeeks + - EarthsongSaga + - NewWorld + - WhiteNinja + * Moved comics: + - KeenSpot/CatLegend (previously CatLegend) + - All KeenSpot/* comic subnames no longer have "The" prefixes. + - UClick (replaces UComics and UComicsEspanol) + * Removed comics: + - KeenSpot/TheDevilsPanties (duplicate of KeenSpot/DevilsPanties) + +Dosage v.1.5.7: + * Important SmackJeeves module fix. Catchup used to loop around from the + first strip to the last one, thus potentially hammering the SmackJeeves + servers with floods of requests from neverending catchups. + * Added comics: + - AbleAndBaker + - AcademyVale + - Aikida + - Angels2200 + - BetterDays + - BlankLabel (virtual module) + - BoredAndEvil + - Catharsis + - ChuckAndElmo + - CloneManga/PennyTribute + - CourtingDisaster + - DeathToTheExtremist + - DogComplex + - DownToEarth + - Dracula + - DragonTails + - DrFun + - DungeonCrawlInc + - ExtraLife + - FalconTwin + - FightCastOrEvade + - Flipside + - Housd + - JerkCity + - JoeAndMonkey + - KeenSpot/SuicideForHire + - LasLindas + - Nekobox + - Nervillsaga + - NewAdventures + - NewAdventuresOfBobbin + - Nihilism + - Nukees + - OkayPants + - PartiallyClips + - PensAndTales + - RWWR + - WebcomicsNation (virtual module) + - Yirmumah + * Fixed comics: + - Asif + - CatLegend + - CloneManga/NanasEverydayLife + - CloneManga/PaperEleven + - DrunkDuck (various comics no longer present) + - EarthsongSaga + - ErrantStory + - InkTank + - KeenSpot/ (ComicGenesis migration) + - KiagiSwordscat + - Qwantz + - SGVY + - SmackJeeves + - Smamusement + - SnafuComics + - UComicsEspanol + * Moved comics: + - Stubble (previously KeenSpot/Stubble) + +Dosage v.1.5.6: + * Added comics: + - CandyCartoon + - CloneManga/Kanami + - Drowtales + - KeenSpot/FoxTails + - Krakow + - SmackJeeves (virtual module) + * Fixed comics: + - CrapIDrewOnMyLunchBreak + - CtrlAltDel + - DMFA + - EarthsongSaga + - EverybodyLovesEricRaymond + - GirlsWithSlingshots + - KeenSpot + - KeenSpot/WapsiSquare + - NewWorld + - PennyArcade + - PiledHigherAndDeeper + - QuestionableContent + - SluggyFreelance + - SnafuComics + - Sokora + - UComicsEspanol (updated submodules) + - UComics (updated submodules) + * Moved comics: + - CatLegend (previously KeenSpot/CatLegend) + - DominicDeegan (previously KeenSpot/DominicDeegan) + - KeenSpot/TriquetraCats (previously DrunkDuck/TriquetraCats) + - NekoTheKitty (previously KeenSpot/NekoTheKitty) + - TheNoob (previously KeenSpot/TheNoob) + +Dosage v.1.5.5: + * Added comics: + - AbstractGender + - AnimeArcadia + - CaptainSNES + - DrunkDuck/Holy_Zen + - EarthsongSaga + - NinthElsewhere (9th Elsewhere) + - PebbleVersion + - SGVY (Sparkling Generation Valkyrie Yuuki) + - SuccubusJustice + - ErrantStory (previously KeenSpot/ErrantStory) + * Fixed comics: + - DrunkDuck + - PvPonline + - SluggyFreelance + +Dosage v.1.5.4: + * Added comics: + - Andiwear + - DrunkDuck (virtual) + - EverybodyLovesEricRaymond + - FantasyRealms + - KeenSpot/2WayMirror + - KeenSpot/ANT + - KeenSpot/AngelTheDemoness + - KeenSpot/Apotheosis + - KeenSpot/Aquatica + - KeenSpot/BadlyDrawnKitties + - KeenSpot/BobAndFred + - KeenSpot/BrunoTheBandit + - KeenSpot/CatLegend + - KeenSpot/EdibleDirt + - KeenSpot/FelicityFlint + - KeenSpot/Flem + - KeenSpot/GreenAvenger + - KeenSpot/LangLang + - KeenSpot/Picatrix + - KeenSpot/ScandalSheet + - KeenSpot/Shifters + - KeenSpot/SoapOnARope + - KeenSpot/SuburbanJungle + - KeenSpot/TheClassMenagerie + - KeenSpot/TheDevilsPanties + - KeenSpot/ToddAndPenguin + - KeenSpot/TwoLumps + - KeenSpot/Wereworld + - KeenSpot/YouDamnKid + - SokoraRefugees + * Fixed comics: + - AbsurdNotions + - CloneManga + - PastelDefender + - PennyArcade + - SluggyFreelance + +Dosage v.1.5.3: + * Fixed a bug that caused RSS output to crash if the file already existed, + but had no items. + * Added comics: + - CatAndGirl + - CloneManga + - Commissioned + - JoyOfTech + - KeenSpot/AlphaLuna + - KeenSpot/Lowroad75 + - KeenSpot/Werechild + - TheWotch + - TonjaSteele + * Fixed comics: + - DieselSweeties + - LittleGamers + - PennyArcade + - StarCrossdDestiny + - VGCats + +Dosage v.1.5.2: + * Removed some debugging cruft that slipped through in the last release. + * Added comics: + - KeenSpot/TheNoob + - PiledHigherAndDeeper + * Fixed comics: + - ALessonIsLearned + - Misfile + - RealLife + - UComics + - UComicsEspanol + +Dosage v.1.5.1: + * Output event modules now generate proper URLs. You can now pass a base URL + with --base-url, which should correspond to --base-path. If not passed, + Dosage will try to generate a working file:/// URL, but this may not work in + some circumstances. + * RSS output tweaked. + * --list now outputs in columns; pass --single-list to get the old + behaviour (thanks TobiX). + * Added comics: + - AbsurdNotions (contributed by TobiX) + - Altermeta (contributed by TobiX) + - AModestDestiny (contributed by TobiX) + - BadBlood + - BetterYouThanMe + - Bhag (contributed by Shrimp) + - ChroniclesOfGaras (contributed by Shrimp) + - CrapIDrewOnMyLunchBreak (contributed by Shrimp) + - EternalVenture (contributed by Shrimp) + - Evercrest (contributed by TobiX) + - Frump (contributed by Shrimp) + - GUComics (contributed by TobiX) + - KeenSpot/BoomerExpress (contributed by TobiX) + - KevinAndKell (contributed by TobiX) + - LethalDosesClassic (contributed by TobiX) + - LethalDoses (contributed by TobiX) + - ListeningTo11975MHz (contributed by TobiX) + - Marilith + - MinesBigger (contributed by Shrimp) + - MyPrivateLittleHell (contributed by TobiX) + - MyWarWithCulture + - NeoGreenwood (contributed by Shrimp) + - NuklearPower (contributed by Shrimp) + - PerkiGoth (contributed by TobiX) + - PreludesEnd (contributed by Shrimp) + - ShadowInTheMirror (contributed by Shrimp) + - UComicsEspanol + - WhyTheLongFace (contributed by TobiX) + - Winter (contributed by TobiX) + * Fixed comics: + - Creators + - PennyArcade + - UComics (removed comics no longer supported and moved Spanish comics + to UComicsEspanol) + - UnicornJelly + +Dosage v.1.5.0: + * Added an RSS output event. (contributed by Colin Alston) + * Dosage now sends a more descriptive User-Agent HTTP header. + * Dosage will now continue downloading strips until no new strips are + downloaded, this fixed problems with comics that had multiple strips per + page or comics that employed "precache" methods. + * Specific modules can now be disabled by specifying them in + /etc/dosage/disabled (global) and ~/.dosage/disabled (local). + * Fixed problem with division by zero error often occuring under Windows. + * Added comics: + - AlienLovesPredator (contributed by Shrimp) + - AllGrownUp (contributed by Shrimp) + - AsylumOn5thStreet (contributed by Shrimp) + - BizarreUprising (contributed by Shrimp) + - Creators/Archie + - Creators/AskShagg + - Creators/ForHeavensSake + - Creators/Rugrats + - Creators/StateOfTheUnion + - Creators/TheDinetteSet + - Creators/TheMeaningOfLila + - Creators/WeePals + - Creators/ZackHill + - DMFA (contributed by TobiX) + - DoctorRoboto (contributed by Shrimp) + - DoemainOfOurOwn + - EntertainDome (contributed by Shrimp) + - FauxPas (contributed by TobiX) + - IrregularWebcomic (contributed by TobiX) + - JamesFrancis/gonzo + - JamesFrancis/psycindom0 + - JamesFrancis/psycindom1 + - JamesFrancis/psycindom2 + - KeenSpot/AlienDice + - KeenSpot/Avalon + - KeenSpot/CountYourSheep + - KeenSpot/DexLives (contributed by TobiX) + - KeenSpot/DominicDeegan + - KeenSpot/ElGoonishShive + - KeenSpot/ElfLife + - KeenSpot/ErrantStory + - KeenSpot/EverythingJake + - KeenSpot/FriendlyHostility + - KeenSpot/FunnyFarm + - KeenSpot/GamingGuardians + - KeenSpot/GeneCatlow + - KeenSpot/GoblinHollow (contributed by TobiX) + - KeenSpot/GreystoneInn + - KeenSpot/InAPerfectWorld (contributed by TobiX) + - KeenSpot/JoeAverage (contributed by TobiX) + - KeenSpot/MariposaRevelation (contributed by TobiX) + - KeenSpot/NaughtFramed + - KeenSpot/NekoTheKitty (contributed by TobiX) + - KeenSpot/NipAndTuck (contributed by TobiX) + - KeenSpot/OneOverZero (contributed by TobiX) + - KeenSpot/PastelDefender + - KeenSpot/RoadWaffles + - KeenSpot/Scatterplot + - KeenSpot/SchlockMercenary + - KeenSpot/TalesOfTheQuestor (contributed by TobiX) + - KeenSpot/UberSoft + - KeenSpot/UnicornJelly + - KeenSpot/WorldOfFenninRo (contributed by TobiX) + - KeenSpot/ZebraGirl + - LessThanKate (contributed by Shrimp) + - OurHomePlanet (contributed by Shrimp) + - Spamusement + - Sternstaub (contributed by Shrimp) + - TheLounge (contributed by Shrimp) + - TheOrderOfTheStick + - UComics/animatedoliphant + - UComics/anntelnaes + - UComics/askcaptainribman + - UComics/baldoespanol + - UComics/barbarabrandon + - UComics/bensargent + - UComics/billdeore + - UComics/brewsterrockit + - UComics/brucehammond + - UComics/calvinandhobbesespanol + - UComics/cathyespanol + - UComics/chanlowe + - UComics/condorito + - UComics/danasummers + - UComics/danwasserman + - UComics/davidhorsey + - UComics/dicklocher + - UComics/dickwright + - UComics/donwright + - UComics/dougmarlette + - UComics/drewsheneman + - UComics/facesinthenews + - UComics/foxtrotespanol + - UComics/fredbassetespanol + - UComics/garfieldespanol + - UComics/garyvarvel + - UComics/gaturro + - UComics/glennmccoy + - UComics/hubertandabby + - UComics/jackhiggins + - UComics/jackohman + - UComics/jeffdanziger + - UComics/laloalcaraz + - UComics/mattdavies + - UComics/modestyblaise + - UComics/muttandjeffespanol + - UComics/neurotica + - UComics/overboardespanol + - UComics/patoliphant + - UComics/paulconrad + - UComics/pepe + - UComics/poochcafeespanol + - UComics/pricklycity + - UComics/sigmund + - UComics/smallworld + - UComics/stevesack + - UComics/stuartcarlson + - UComics/tedrall + - UComics/thebigpicture + - UComics/theelderberries + - UComics/thefifthwave + - UComics/thefuscobrothers + - UComics/themiddletons + - UComics/thequigmans + - UComics/tomtoles + - UComics/tonyauth + - UComics/tutelandia + - UComics/walthandelsman + - UComics/waynestayskal + - UComics/ziggyespanol + - WiguTV + * Fixed comics: + - Dominion + - KeenSpot/GeneralProtectionFault (contributed by TobiX) + - SluggyFreelance + - UserFriendly + - VGCats (contributed by TobiX) + - Wigu + +Dosage v.1.4.0: + * A manual page for 'mainline' is now inculded. + * Events output; currently the only useful handler is 'html', which + outputs an HTML page with all of the downloaded comics. These + files are named by date, and have links to the previous and next + days (similar to dailystrips). + * Added comics: + - MadamAndEve (contributed by Anthony Caetano) + - SnafuComics/Grim + - SnafuComics/KOF + - SnafuComics/PowerPuffGirls + - SnafuComics/Snafu + - SnafuComics/Tin + - TheParkingLotIsFull + - Zapiro (contributed by Anthony Caetano) + * Fixed comics: + - UserFriendly (naming fix) + +Dosage v.1.3.0: + * Progress bar has been improved; specifically for gauging downloads of + unknown size + * All relevant images are now downloaded where necessary; thanks bruce :) + * Incomplete downloads are discarded + * Removed junview + * Main script is now 'mainline' (used to be 'dosage') + * Added comics: + - AstronomyPOTD + - CounterCulture + - Dominion + - Fallen + - Freefall + - GenrezvousPoint + - KeenSpot/Blindworks + - KeenSpot/BoyMeetsBoy + - KeenSpot/Scrued + - KeenSpot/Stubble + - KeenSpot/TAVision + - KeenSpot/TangsWeeklyComic + - KingFeatures + - OhMyGods + - RedMeat + - WotNow + * Fixed comics: + - MegaTokyo + - SomethingPositive (naming fix) + - TheFray (now a virtual module) + +Dosage v.1.2.0: + * Progress bar is now disabled if the window size cannot be determined + * Source was restructured; the dosage script is now located in the bin/ + directory. + * Added comics: + - BiggerThanCheeses + - BrickShitHouse + - ChugworthAcademy + - DandyAndCompany + - Girly + - HighPingBastard + - Jack + - KeenSpot/ChoppingBlock + - KeenSpot/SaturdayMorningBreakfastCereal + - KeenSpot/StrangeCandy + - KeenSpot/WapsiSquare + - KiagiSwordscat + - MakeWithTheFunny + - Pixel + - PockyBot + - SamAndFuzzy + - Spoonies + +Dosage v.1.1.0: + * A download progress bar is now available on Linux (and probably other + UNIX-like systems) + * Timestamps are now updated even if the strip is not redownloaded + * Added comics: + - ALessonIsLearned + - ASofterWorld + - BoyOnAStickAndSlither + - Chisuji + - ExploitationNow + - KeenSpot/Ghastly + - KeenSpot/Saturnalia + - Loserz + - Qwantz + - StarCrossdDestiny + * Fixed comics: + - LittleGamers + +Dosage v.1.0.1: + * Fix embarassing typo in 1.0.0 which rendered it completely unusable + (albeit a trivial fix). + +Dosage v.1.0.0: + * 1.0 release, yay! + * Set modified time on downloaded images based on Last-Modified header: + Patch provided by gopalv82@yahoo.com, thanks :) + * Fixed --basepath on Windows: + Passing a path that included a drive letter didn't work. + * Added comics: + - TwoTwoOneFour + * Fixed comics: + - SluggyFreelance + +Dosage v.0.3.2: + * Added comics: + - FreakCentral + - KeenSpot/AntiHeroForHire + - KeenSpot/ElfOnlyInn + - KeenSpot/GeneralProtectionFault + - KeenSpot/LimitedSpace + - KeenSpot/LostAndFound + - KeenSpot/Zortic + - RabidMonkeys + - SluggyFreelance + - SpellsAndWhistles + - SuburbanTribe + - TheFray + +Dosage v.0.3.1: + * Removed external helper scripts + * Filesize displayed for downloaded files + * Various documentation changes + * Added --timestamps: + Displays timestamps before every message. + * Added comics: + - SomethingPositive + - UnderPower + - UserFriendly + - KeenSpot/QueenOfWands + - CombustibleOrange + - InkTank/* + - QuestionableContent + * Fixed comics: + - ComicsDotCom/flightdeck + - ComicsDotCom/peanuts + - ButternutSquash + - LifeOfConvenience + +Dosage v.0.3.0: + * Removed filename override: + Since the comic modules now generally have sane names, this is no + longer of much use. + * Better feedback: + The various info levels (up to 3 now) provide much more informative + output. + * Comic wildcards: + @ expands to every comic already present in the basepath, and @@ + expands to every single comic supported by Dosage. + * Added Comics: + - AppleGeeks + - ButternutSquash + - Comet7 + - ComicsDotCom + Lots of submodules, most of them are untested. + - CtrlAltDel + - EightBitTheater + - FragileGravity + - KeenSpot/24fps + - KeenSpot/Alice + - KeenSpot/DeltaVenture + - KeenSpot/ItsWalky + - KeenSpot/PurplePussy + - KeenSpot/TheShadows + - LaurasComics + - MacHall + - Supafine + - VGCats + - WhiteNinja + * Fixed comics: + - KeenSpot/CollegeRoomiesFromHell + - KeenSpot/Wigu (renamed to Wigu) + - UComics/{mullets, nonsequitur, tomthedancingbug} + - PennyArcade + Switch back to the "low" resolution comics; some of the "high" + resolution comics are broken, and the "low" ones seem to be + identical anyway. + * Junview: + Lots of fixes / enhancements, still fairly alpha. + +Dosage v.0.2.0: + * Virtual comic modules + * URL retrying: + Also, if you specify multiple comics, and one of them errors out + for some reason, Dosage will continue with the others. + * Indexed catchup: + You can now start a catchup from a specific index. + * Added comics: + - FilibusterCartoons + - GlueMeat + - RPGWorld + - RealLife + - UComics (see --list, there are around 70 submodules) + * Fixed comics: + - BasilFlint + - DiselSweeties + - SexyLosers + Generate nice filenames now. + * Comic help: + You can now pass --module-help to see module-specific help for + comic modules. + * Junview: + Image viewer written in wxPython, pretty alpha at this stage, + but feel free to play around with it if you're brave. + +Dosage v.0.1.0: + * Various documentation updates + * Added comics: + - LittleGamers + - ClanOfTheCats + - DieselSweeties + - PvPonline + - RadioactivePanda + - ScaryGoRound + * Fixed comics: + - PennyArcade + The comic "bounces" when you get to the first strip, the + "previous" link points to the second comic. Work around this by + checking for the first comic. + - SexyLosers + SexyLosers seems to have implemented referrer checking recently, + this is handled by the new referrer passing support. + * Fix indexed mode up a bit: + The documentation has better examples now. + +Dosage v.0.0.1: + * Initial public release diff --git a/doc/dosage.1 b/doc/dosage.1 new file mode 100644 index 000000000..6bc80e919 --- /dev/null +++ b/doc/dosage.1 @@ -0,0 +1,185 @@ +.TH MAINLINE 1 +.SH NAME +mainline \- command line interface to Dosage +.SH SYNOPSIS +.B mainline +.RI [ options ] +.I module +.RI [ module .\|.\|.] +.SH DESCRIPTION +.B mainline +is a command line interface to Dosage. Dosage is a an application designed +to keep a local \(oqmirror\(cq of specific web comics and other picture\-based +content, such as \(oqPicture Of The Day\(cq sites, with a variety of options +for updating and maintaining collections. +.SH OPTIONS +.TP +.BI \-b " PATH" "\fR,\fP \-\^\-base\--path=" PATH +Specifies a base path to put comic subdirectories. The default is \(oqComics\(cq. +.TP +.BI \-\^\-base\-url= PATH +Specifies the base URL for output events. The default is a local file URI. +.TP +.BR \-c ", " \-\^\-catch-up +Traverses all available strips until an (identical) existing one is found. +This can be useful if your collection was previously up to date, +but you've missed a few days worth of strips. Alternatively you can specify +.B \-c +twice for a \(oqfull catchup\(cq, which will not stop until all comics +have been traversed. Catchups can \(oqresumed\(cq by using the index syntax, see +the +.B INDEX SYNTAX +and +.B SPECIAL SYNTAX +sections for more information. +.TP +.BR \-h ", " \-\^\-help +Output brief help information. +.TP +.BR \-l ", " \-\^\-list +List available comic modules in multi\-column fashion. +.TP +.BR \-\^\-single\-list +List available comic modules in single-column fashion. +.TP +.BI \-m " MODULE" "\fR,\fP \-\^\-module-help=" MODULE +Output module-specific help for +.IR MODULE . +.TP +.BI \-o " OUTPUT" "\fR,\fP \-\^\-output=" OUTPUT +.I OUTPUT +may be any one of the following: +.PP +.RS +.BR "text " \- +Provides no additional output and is the default value. +.RE +.PP +.RS +.BR "html " \- +Writes out an HTML file linking to the strips actually downloaded in the +current run, named by date (ala dailystrips). The files can be found in the +\'html' directory of your Comics directory. +.RE +.PP +.RS +.BR "rss " \- +Writes out an RSS feed detailing what strips were downloaded in the last 24 +hours. The feed can be found in Comics/dailydose.xml. +.RE +.PP +.RS +.BR "rss " \- +Writes an RSS feed with all of the strips downloaded during the run, for use +with your favourite RSS aggregator. +.RE +.TP +.BR \-p ", " \-\^\-progress +Display a progress bar while downloading comics. +.TP +.BR \-t ", " \-\^\-timestamps +Print timestamps for all output at any level. +.TP +.BR \-v ", " \-\^\-verbose +Increase the output level by one with each occurence. +.TP +.BR \-V ", " \-\^\-version +Display the version number. +.I module +At least one valid +.I module +must be specified. A list of valid modules can be found by passing the +.B \-l +option. Multiple +.I module +arguments can be specified on the command line. +.SH INDEX SYNTAX +One can indicate the start of a list of +.B comma seperated +indices using a +.RB \(oq : "\(cq." +.PP +If +.I \-c +is specified with index syntax then \(oqresume\(cq mode is activated, +where a \(oqcatchup\(cq will start at the given index. +.PP +Refer to +.B EXAMPLES +for samples. +.SH OFFENSIVE COMICS +Some users may find certain comics offensive and wish to disable them. +Modules listed in +.B /etc/dosage/disabled +and +.B ~/.dosage/disabled +will be disabled. These files should contain only one module name per line. +.SH SPECIAL SYNTAX +.TP +.B @ +This expands to mean all the comics currently in your \(oqComics\(cq +directory. +.TP +.B @@ +This expands to mean all the comics available to Dosage. +.PP +.B INDEX SYNTAX +can be used with +.B SPECIAL SYNTAX +but this is unlikely to be useful. +.SH EXAMPLES +Retrieve the latest Mega Tokyo comic: +.RS +.B mainline MegaTokyo +.RE +.PP +Retrieve every strip from every comic that there is a module for: +.RS +.B mainline \-c @@ +.RE +.PP +Retrieve all Penny Arcade strips from (and including) a given index to +the beginning regardless of whether they already exist or not: +.RS +.B mainline \-c PennyArcade:2004\-07\-22 +.RE +.SH ENVIRONMENT +.IP HTTP_PROXY +.B mainline +will use the specified HTTP proxy whenever possible. +.SH NOTES +Should retrieval fail on any given strip +.B mainline +will attempt to retry. However the retry information is only outputted +in the +.B second +and successive output levels. +.PP +At the time of writing, a +.B complete +Dosage collection weighs in at around 3.0GB. +.SH RETURN VALUE +The return value is 2 when +.IP \(bu +a program error occurred. +.PP +The return value is 1 when +.IP \(bu +comics could not be found or downloaded +.IP \(bu +the program run was aborted with Ctrl-C +.PP +Else the return value is zero. +.SH BUGS +See +.I http://trac.slipgate.za.net/dosage +for a list of current development tasks and suggestions. +.SH FILES +.IP "\fB/etc/dosage/disabled\fR" +Disables comic modules on a global scale. +.IP "\fB~/.dosage/disabled\fR" +Disables comic modules on a local scale. +.SH AUTHORS +.BR mainline " and " Dosage +were written by Jonathan Jacobs and Tristan Seligmann +. This manual page was written by Jonathan Jacobs. diff --git a/doc/dosage.1.html b/doc/dosage.1.html new file mode 100644 index 000000000..df61e952a --- /dev/null +++ b/doc/dosage.1.html @@ -0,0 +1,329 @@ + + +Man page of MAINLINE + +

MAINLINE

+Section: User Commands (1)
Index +Return to Main Contents
+ +  +

NAME

+ +mainline - command line interface to Dosage +  +

SYNOPSIS

+ +mainline + +[options] + +module + +[module...] + +  +

DESCRIPTION

+ +mainline + +is a command line interface to Dosage. Dosage is a an application designed +to keep a local 'mirror' of specific web comics and other picture-based +content, such as 'Picture Of The Day' sites, with a variety of options +for updating and maintaining collections. +  +

OPTIONS

+ +
+
-b PATH, --base--path=PATH + +
+Specifies a base path to put comic subdirectories. The default is 'Comics'. +
--base-url=PATH + +
+Specifies the base URL for output events. The default is a local file URI. +
-c, --catch-up + +
+Traverses all available strips until an (identical) existing one is found. +This can be useful if your collection was previously up to date, +but you've missed a few days worth of strips. Alternatively you can specify +-c + +twice for a 'full catchup', which will not stop until all comics +have been traversed. Catchups can 'resumed' by using the index syntax, see +the +INDEX SYNTAX + +and +SPECIAL SYNTAX + +sections for more information. +
-h, --help + +
+Output brief help information. +
-l, --list + +
+List available comic modules in multi-column fashion. +
--single-list + +
+List available comic modules in single-column fashion. +
-m MODULE, --module-help=MODULE + +
+Output module-specific help for +MODULE. + +
-o OUTPUT, --output=OUTPUT + +
+OUTPUT + +may be any one of the following: +
+

+ +

+text - + +Provides no additional output and is the default value. +
+ +

+ +

+html - + +Writes out an HTML file linking to the strips actually downloaded in the +current run, named by date (ala dailystrips). The files can be found in the +'html' directory of your Comics directory. +
+ +

+ +

+rss - + +Writes out an RSS feed detailing what strips were downloaded in the last 24 +hours. The feed can be found in Comics/dailydose.xml. +
+ +

+ +

+rss - + +Writes an RSS feed with all of the strips downloaded during the run, for use +with your favourite RSS aggregator. +
+ +
+
-p, --progress + +
+Display a progress bar while downloading comics. +
-t, --timestamps + +
+Print timestamps for all output at any level. +
-v, --verbose + +
+Increase the output level by one with each occurence. +
-V, --version + +
+Display the version number. +module + +At least one valid +module + +must be specified. A list of valid modules can be found by passing the +-l + +option. Multiple +module + +arguments can be specified on the command line. +
+  +

INDEX SYNTAX

+ +One can indicate the start of a list of +comma seperated + +indices using a +':'. + +

+ +If +-c + +is specified with index syntax then 'resume' mode is activated, +where a 'catchup' will start at the given index. +

+ +Refer to +EXAMPLES + +for samples. +  +

OFFENSIVE COMICS

+ +Some users may find certain comics offensive and wish to disable them. +Modules listed in +/etc/dosage/disabled + +and +~/.dosage/disabled + +will be disabled. These files should contain only one module name per line. +  +

SPECIAL SYNTAX

+ +
+
@ + +
+This expands to mean all the comics currently in your 'Comics' +directory. +
@@ + +
+This expands to mean all the comics available to Dosage. +
+

+ +INDEX SYNTAX + +can be used with +SPECIAL SYNTAX + +but this is unlikely to be useful. +  +

EXAMPLES

+ +Retrieve the latest Mega Tokyo comic: +
+mainline MegaTokyo + +
+ +

+ +Retrieve every strip from every comic that there is a module for: +

+mainline -c @@ + +
+ +

+ +Retrieve all Penny Arcade strips from (and including) a given index to +the beginning regardless of whether they already exist or not: +

+mainline -c PennyArcade:2004-07-22 + +
+ +  +

ENVIRONMENT

+ +
+
HTTP_PROXY
+mainline + +will use the specified HTTP proxy whenever possible. +
+  +

NOTES

+ +Should retrieval fail on any given strip +mainline + +will attempt to retry. However the retry information is only outputted +in the +second + +and successive output levels. +

+ +At the time of writing, a +complete + +Dosage collection weighs in at around 3.0GB. +  +

RETURN VALUE

+ +The return value is 2 when +
+
+a program error occurred. +
+

+ +The return value is 1 when +

+
+comics could not be found or downloaded +
+the program run was aborted with Ctrl-C +
+

+ +Else the return value is zero. +  +

BUGS

+ +See +http://trac.slipgate.za.net/dosage + +for a list of current development tasks and suggestions. +  +

FILES

+ +
+
/etc/dosage/disabled
+Disables comic modules on a global scale. +
~/.dosage/disabled
+Disables comic modules on a local scale. +
+  +

AUTHORS

+ +mainline and Dosage + +were written by Jonathan Jacobs <korpse@slipgate.za.net> and Tristan Seligmann +<mithrandi@slipgate.za.net>. This manual page was written by Jonathan Jacobs. +

+ +


+ 

Index

+
+
NAME
+
SYNOPSIS
+
DESCRIPTION
+
OPTIONS
+
INDEX SYNTAX
+
OFFENSIVE COMICS
+
SPECIAL SYNTAX
+
EXAMPLES
+
ENVIRONMENT
+
NOTES
+
RETURN VALUE
+
BUGS
+
FILES
+
AUTHORS
+
+
+This document was created by +man2html, +using the manual pages.
+ + + diff --git a/dosage b/dosage new file mode 100755 index 000000000..afd304a32 --- /dev/null +++ b/dosage @@ -0,0 +1,240 @@ +#!/usr/bin/env python + +# Dosage, the webcomic downloader +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import sys +import os +import optparse +import traceback + +from dosagelib import events, scraper +from dosagelib.output import out +from dosagelib.util import getWindowSize, internal_error +from dosagelib.configuration import App, Freeware, Copyright + +def setupOptions(): + usage = 'usage: %prog [options] comicModule [comicModule ...]' + parser = optparse.OptionParser(usage=usage) + parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity') + parser.add_option('-c', '--catch-up', action='count', dest='catchup', default=None, help='traverse and retrieve all available comics up until the strip that already exists locally, use twice to retrieve until all strips exist locally') + parser.add_option('-b', '--base-path', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH') + parser.add_option('--base-url', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH') + parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules') + parser.add_option('--single-list', action='store_const', const=2, dest='list', help='list available comic modules in a single list') + parser.add_option('-V', '--version', action='store_true', dest='version', help='display the version number') + parser.add_option('-m', '--module-help', action='store_true', dest='modhelp', help='display help for comic modules') + parser.add_option('-t', '--timestamps', action='store_true', dest='timestamps', default=False, help='print timestamps for all output at any info level') + parser.add_option('-o', '--output', action='store', dest='output', choices=events.getHandlers(), help='output formatting for downloaded comics') + try: + getWindowSize() + except NotImplementedError: + progress = False + else: + progress = True + + if progress: + parser.add_option('-p', '--progress', action='store_true', dest='progress', default=False, help='display progress bar while downloading comics') + return parser + +class Dosage(object): + + def __init__(self): + self.errors = 0 + + def setOutputInfo(self): + out.level = 0 + out.level += self.settings['verbose'] + out.timestamps = self.settings['timestamps'] + + def saveComic(self, comic): + basepath = self.settings['basepath'] + progress = self.settings.get('progress', False) + fn, saved = comic.save(basepath, progress) + return saved + + def saveComics(self, comics): + saved = False + for comic in comics: + saved = self.saveComic(comic) or saved + return saved + + def safeOp(self, fp, *args, **kwargs): + try: + fp(*args, **kwargs) + except Exception: + self.errors += 1 + type, value, tb = sys.exc_info() + out.write('Traceback (most recent call last):', 1) + out.writelines(traceback.format_stack(), 1) + out.writelines(traceback.format_tb(tb)[1:], 1) + out.writelines(traceback.format_exception_only(type, value)) + + def getCurrent(self): + out.write('Retrieving the current strip...') + self.saveComics(self.module.getCurrentComics()) + + def getIndex(self, index): + out.write('Retrieving index "%s"....' % (index,)) + try: + self.module.setStrip(index) + self.saveComics(self.module.getNextComics()) + except NotImplementedError: + out.write('No indexed retrieval support.') + + def catchup(self): + out.write('Catching up...') + for comics in self.module: + if not self.saveComics(comics) and self.settings['catchup'] < 2: + break + + def catchupIndex(self, index): + out.write('Catching up from index "%s"...' % (index,)) + self.module.setStrip(index) + for comics in self.module: + if not self.saveComics(comics) and self.settings['catchup'] < 2: + break + + def getScrapers(self): + return scraper.items() + + def getExistingComics(self): + for scraper in self.getScrapers(): + dirname = scraper.get_name().replace('/', os.sep) + if os.path.isdir(os.path.join(self.settings['basepath'], dirname)): + yield scraper + + def doList(self, columnList): + out.write('Available comic scrapers:') + scrapers = self.getScrapers() + if columnList: + self.doColumnList(scrapers) + else: + self.doSingleList(scrapers) + out.write('%d supported comics.' % len(scrapers)) + + def doSingleList(self, scrapers): + print '\n'.join(scraper.get_name() for scraper in scrapers) + + def doColumnList(self, scrapers): + try: + screenWidth = getWindowSize() + except NotImplementedError: + screenWidth = 80 + + if len(scrapers) == 0: + return + + names = [scraper.get_name() for scraper in scrapers] + maxlen = max([len(name) for name in names]) + namesPerLine = int(screenWidth / (maxlen + 1)) + + while names: + print ''.join([name.ljust(maxlen) for name in names[:namesPerLine]]) + del names[:namesPerLine] + + def doCatchup(self): + for comic in self.useComics(): + if self.indices: + self.safeOp(self.catchupIndex, self.indices[0]) + else: + self.safeOp(self.catchup) + + def doCurrent(self): + for comic in self.useComics(): + if self.indices: + for index in self.indices: + self.safeOp(self.getIndex, index) + else: + self.safeOp(self.getCurrent) + + def doHelp(self): + for scraper in self.useComics(): + for line in scraper.getHelp().splitlines(): + out.write("Help: "+line) + + def setupComic(self, scraper): + self.module = scraper() + out.context = scraper.get_name() + return self.module + + def useComics(self): + for comic in self.comics: + c = comic.split(':', 2) + if len(c) > 1: + self.indices = c[1].split(',') + else: + self.indices = None + + moduleName = c[0] + if moduleName == '@': + for s in self.getExistingComics(): + yield self.setupComic(s) + elif moduleName == '@@': + for s in self.getScrapers(): + yield self.setupComic(s) + else: + yield self.setupComic(scraper.get(moduleName)) + + def displayVersion(self): + print App + print Copyright + print Freeware + + def run(self, settings, comics): + self.settings = settings + self.setOutputInfo() + self.comics = comics + + om = self.settings['output'] + events.installHandler(om, self.settings['basepath'], self.settings['baseurl']) + events.handler.start() + + if self.settings['version']: + self.displayVersion() + elif self.settings['list']: + self.doList(self.settings['list'] == 1) + elif len(comics) <= 0: + out.write('Warning: No comics specified, bailing out!') + elif self.settings['modhelp']: + self.doHelp() + elif self.settings['catchup']: + self.doCatchup() + else: + self.doCurrent() + + events.handler.end() + +def main(): + try: + parser = setupOptions() + options, args = parser.parse_args() + d = Dosage() + d.run(options.__dict__, args) + if d.errors: + res = 1 + else: + res = 0 + except KeyboardInterrupt: + print "Aborted." + res = 1 + except Exception: + internal_error() + res = 2 + return res + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/dosagelib/__init__.py b/dosagelib/__init__.py new file mode 100644 index 000000000..98da5620c --- /dev/null +++ b/dosagelib/__init__.py @@ -0,0 +1,39 @@ +""" +Automated webcomic downloader. Dosage traverses webcomic websites in +order to download each strip of the comic. The intended use is for +mirroring the strips locally for ease of viewing; redistribution of the +downloaded strips may violate copyright, and is not advisable unless you +have communicated with all of the relevant copyright holders, described +your intentions, and received permission to distribute. + +The primary dosage interface is currently the 'mainline' script, which +is just a thin wrapper that invokes L{dosage.mainline}. Comic modules +for each webcomic are located in L{dosage.modules}; most of these make +use of the helper base classes and mixins in L{dosage.modules.helpers}, +thus making their individual implementations trivial. + +@group Core modules: comic, events, output, progress, rss, util, + version +@group Interface modules: mainline +@group Comic modules: modules + +@sort: modules.helpers + +@author: U{Dosage development team } +@requires: Python 2.3+ +@see: U{The dosage webpage } +@see: U{The dosage Trac site } + +@newfield contributor: Contributor, Contributors (Alphabetical Order) +@contributor: U{Jonathan Jacobs } +@contributor: U{Tristan Seligmann } + +@var __license__: The license governing the use and distribution of + dosage. +""" +__docformat__ = 'epytext en' +import sys +if not (hasattr(sys, 'version_info') or + sys.version_info < (2, 5, 0, 'final', 0)): + raise SystemExit("This program requires Python 2.5 or later.") + diff --git a/dosagelib/comic.py b/dosagelib/comic.py new file mode 100644 index 000000000..ef19b863c --- /dev/null +++ b/dosagelib/comic.py @@ -0,0 +1,101 @@ +import urllib2 +import os +import locale +import rfc822 +import time +import shutil +locale.setlocale(locale.LC_ALL, '') + +from .output import out +from .util import urlopen, saneDataSize, normaliseURL +from .progress import progressBar, OperationComplete +from .events import handler + +class FetchComicError(IOError): pass + +class Comic(object): + def __init__(self, moduleName, url, referrer=None, filename=None): + self.moduleName = moduleName + url = normaliseURL(url) + out.write('Getting headers for %s...' % (url,), 2) + try: + self.urlobj = urlopen(url, referrer=referrer) + except urllib2.HTTPError, he: + raise FetchComicError, ('Unable to retrieve URL.', url, he.code) + + if self.urlobj.info().getmaintype() != 'image' and \ + self.urlobj.info().gettype() not in ('application/octet-stream', 'application/x-shockwave-flash'): + raise FetchComicError, ('No suitable image found to retrieve.', url) + + self.filename, self.ext = os.path.splitext(url.split('/')[-1]) + self.filename = filename or self.filename + self.filename = self.filename.replace(os.sep, '_') + # Always use mime type for file extension if it is sane. + if self.urlobj.info().getmaintype() == 'image': + self.ext = '.' + self.urlobj.info().getsubtype() + self.contentLength = int(self.urlobj.info().get('content-length', 0)) + self.lastModified = self.urlobj.info().get('last-modified') + out.write('... filename = "%s", ext = "%s", contentLength = %d' % (self.filename, self.ext, self.contentLength), 2) + + def touch(self, filename): + if self.lastModified: + tt = rfc822.parsedate(self.lastModified) + if tt: + mtime = time.mktime(tt) + os.utime(filename, (mtime, mtime)) + + def save(self, basepath, showProgress=False): + comicName, comicExt = self.filename, self.ext + comicSize = self.contentLength + comicDir = os.path.join(basepath, self.moduleName.replace('/', os.sep)) + if not os.path.isdir(comicDir): + os.makedirs(comicDir) + + fn = os.path.join(comicDir, '%s%s' % (self.filename, self.ext)) + if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize: + self.urlobj.close() + self.touch(fn) + out.write('Skipping existing file "%s".' % (fn,), 1) + return fn, False + + try: + tmpFn = os.path.join(comicDir, '__%s%s' % (self.filename, self.ext)) + out.write('Writing comic to temporary file %s...' % (tmpFn,), 3) + comicOut = file(tmpFn, 'wb') + try: + startTime = time.time() + if showProgress: + def pollData(): + data = self.urlobj.read(8192) + if not data: + raise OperationComplete + comicOut.write(data) + return len(data), self.contentLength + progressBar(pollData) + else: + comicOut.write(self.urlobj.read()) + endTime = time.time() + finally: + comicOut.close() + out.write('Copying temporary file (%s) to %s...' % (tmpFn, fn), 3) + shutil.copy2(tmpFn, fn) + self.touch(fn) + + size = os.path.getsize(fn) + bytes = locale.format('%d', size, True) + if endTime != startTime: + speed = saneDataSize(size / (endTime - startTime)) + else: + speed = '???' + attrs = dict(fn=fn, bytes=bytes, speed=speed) + out.write('Saved "%(fn)s" (%(bytes)s bytes, %(speed)s/sec).' % attrs, 1) + handler.comicDownloaded(self.moduleName, fn) + self.urlobj.close() + finally: + try: + out.write('Removing temporary file %s...' % (tmpFn,), 3) + os.remove(tmpFn) + except: + pass + + return fn, True diff --git a/dosagelib/configuration.py b/dosagelib/configuration.py new file mode 100644 index 000000000..505f5fad6 --- /dev/null +++ b/dosagelib/configuration.py @@ -0,0 +1,19 @@ +import _Dosage_configdata as configdata + +Version = configdata.version +ReleaseDate = configdata.release_date +AppName = configdata.name +App = AppName+u" "+Version +Author = configdata.author +HtmlAuthor = Author.replace(u' ', u' ') +Copyright = u"Copyright (C) 2004-2008 "+Author +HtmlCopyright = u"Copyright © 2004-2008 "+HtmlAuthor +Url = configdata.url +SupportUrl = Url + u"/issues" +Email = configdata.author_email +UserAgent = u"%s/%s (+%s)" % (AppName, Version, Url) +Freeware = AppName+u""" comes with ABSOLUTELY NO WARRANTY! +This is free software, and you are welcome to redistribute it +under certain conditions. Look at the file `LICENSE' within this +distribution.""" + diff --git a/dosagelib/events.py b/dosagelib/events.py new file mode 100644 index 000000000..9a5677be5 --- /dev/null +++ b/dosagelib/events.py @@ -0,0 +1,159 @@ +import os.path +import time +import rss +import urllib +import util + +class EventHandler(object): + def __init__(self, basepath, baseurl): + self.basepath = basepath + self.baseurl = baseurl or self.getBaseUrl() + + def getBaseUrl(self): + '''Return a file: URL that probably points to the basedir. + + This is used as a halfway sane default when the base URL is not + provided; not perfect, but should work in most cases.''' + components = util.splitpath(os.path.abspath(self.basepath)) + url = '/'.join([urllib.quote(component, '') for component in components]) + return 'file:///' + url + '/' + + def getUrlFromFilename(self, filename): + components = util.splitpath(util.getRelativePath(self.basepath, filename)) + url = '/'.join([urllib.quote(component, '') for component in components]) + return self.baseurl + url + + def start(self): + pass + + def comicDownloaded(self, comic, filename): + pass + + def end(self): + pass + +class TextEventHandler(EventHandler): + pass + +class RSSEventHandler(EventHandler): + def RFC822Date(self, indate): + return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(indate)) + + def getFilename(self): + return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss')) + + def start(self): + today = time.time() + yesterday = today - 86400 + today = time.localtime(today) + yesterday = time.localtime(yesterday) + + link = 'https://github.com/wummel/dosage' + + self.rssfn = self.getFilename() + + if os.path.exists(self.rssfn): + self.newfile = False + self.rss = rss.parseFeed(self.rssfn, yesterday) + else: + self.newfile = True + self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today)) + + def comicDownloaded(self, comic, filename): + url = self.getUrlFromFilename(filename) + args = ( + '%s - %s' % (comic, os.path.basename(filename)), + url, + 'View Comic' % (url,), + self.RFC822Date(time.time()) + ) + + if self.newfile: + self.newfile = False + self.rss.addItem(*args) + else: + self.rss.insertHead(*args) + + def end(self): + self.rss.write(self.rssfn) + +class HtmlEventHandler(EventHandler): + def fnFromDate(self, date): + fn = time.strftime('comics-%Y%m%d.html', date) + fn = os.path.join(self.basepath, 'html', fn) + fn = os.path.abspath(fn) + return fn + + def start(self): + today = time.time() + yesterday = today - 86400 + tomorrow = today + 86400 + today = time.localtime(today) + yesterday = time.localtime(yesterday) + tomorrow = time.localtime(tomorrow) + + fn = self.fnFromDate(today) + assert not os.path.exists(fn), 'Comic page for today already exists!' + + d = os.path.dirname(fn) + if not os.path.isdir(d): + os.makedirs(d) + + yesterdayUrl = self.getUrlFromFilename(self.fnFromDate(yesterday)) + tomorrowUrl = self.getUrlFromFilename(self.fnFromDate(tomorrow)) + + self.html = file(fn, 'w') + self.html.write(''' + +Comics for %s + + +Previous Day | Next Day +
    +''' % (time.strftime('%Y/%m/%d', today), yesterdayUrl, tomorrowUrl)) + + self.lastComic = None + + def comicDownloaded(self, comic, filename): + if self.lastComic != comic: + self.newComic(comic) + url = self.getUrlFromFilename(filename) + self.html.write('
  • %s
  • \n' % (url, os.path.basename(filename))) + + def newComic(self, comic): + if self.lastComic is not None: + self.html.write('
\n') + self.lastComic = comic + self.html.write('''
  • %s
  • +
      +''' % (comic,)) + + def end(self): + if self.lastComic is not None: + self.html.write('
    \n') + self.html.write(''' + +''') + self.html.close() + + +handlers = { + 'text': TextEventHandler, + 'html': HtmlEventHandler, + 'rss': RSSEventHandler, +} + +def getHandlers(): + l = handlers.keys() + l.sort() + return l + +def installHandler(name=None, basepath=None, baseurl=None): + global handler + if name is None: + name = 'text' + if basepath is None: + basepath = '.' + handler = handlers[name](basepath, baseurl) + +installHandler() diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py new file mode 100644 index 000000000..d3b676b78 --- /dev/null +++ b/dosagelib/helpers.py @@ -0,0 +1,181 @@ +import re + +from .util import fetchUrl, fetchManyUrls, getQueryParams +from .comic import Comic + +class _BasicScraper(object): + '''Base class with scrape functions for comics. + + @type latestUrl: C{string} + @cvar latestUrl: The URL for the latest comic strip. + @type imageUrl: C{string} + @cvar imageUrl: A string that is interpolated with the strip index + to yield the URL for a particular strip. + @type imageSearch: C{regex} + @cvar imageSearch: A compiled regex that will locate the strip image URL + when applied to the strip page. + @type prevSearch: C{regex} + @cvar prevSearch: A compiled regex that will locate the URL for the + previous strip when applied to a strip page. + ''' + referrer = None + help = 'Sorry, no help for this comic yet.' + + def __init__(self): + self.currentUrl = None + self.urls = set() + + def getReferrer(self, imageUrl, pageUrl): + return self.referrer or pageUrl or self.getLatestUrl() + + def getComic(self, url, pageUrl): + if not url: + return None + return Comic(self.get_name(), url, filename=self.getFilename(url, pageUrl), referrer=self.getReferrer(url, pageUrl)) + + def getCurrentComics(self): + self.currentUrl = self.getLatestUrl() + comics = self.getNextComics() + if not comics: + raise ValueError("Could not find current comic.") + return comics + + def getNextComics(self): + comics = [] + while not comics and self.currentUrl and self.currentUrl not in self.urls: + comicUrlGroups, prevUrl = fetchManyUrls(self.currentUrl, [self.imageSearch, self.prevSearch]) + + if prevUrl: + prevUrl = prevUrl[0] + else: + prevUrl = None + + for comicUrl in comicUrlGroups: + comics.append(self.getComic(comicUrl, self.currentUrl)) + + self.urls.update([self.currentUrl]) + self.currentUrl = (prevUrl, None)[prevUrl in self.urls] + return comics + + def setStrip(self, index): + self.currentUrl = self.imageUrl % index + + def getHelp(self): + return self.help + + def __iter__(self): + """Iterate through the strips, starting from the current one and going backward.""" + if not self.currentUrl: + self.currentUrl = self.getLatestUrl() + + comics = True + while comics: + comics = self.getNextComics() + if comics: + yield comics + + @classmethod + def get_name(cls): + if hasattr(cls, 'name'): + return cls.name + return cls.__name__ + + @classmethod + def starter(cls): + return cls.latestUrl + + @classmethod + def namer(cls, imageUrl, pageUrl): + return None + + def getFilename(self, imageUrl, pageUrl): + return self.namer(imageUrl, pageUrl) + + def getLatestUrl(self): + return self.starter() + + +def queryNamer(paramName, usePageUrl=False): + @staticmethod + def _namer(imageUrl, pageUrl): + url = (imageUrl, pageUrl)[usePageUrl] + return getQueryParams(url)[paramName][0] + return _namer + + +def regexNamer(regex): + @staticmethod + def _namer(imageUrl, pageUrl): + return regex.search(imageUrl).group(1) + return _namer + + +def constStarter(latestUrl): + @staticmethod + def _starter(): + return latestUrl + return _starter + + +def bounceStarter(latestUrl, nextSearch): + @classmethod + def _starter(cls): + url = fetchUrl(latestUrl, cls.prevSearch) + if url: + url = fetchUrl(url, nextSearch) + return url + return _starter + + +def indirectStarter(baseUrl, latestSearch): + @staticmethod + def _starter(): + return fetchUrl(baseUrl, latestSearch) + return _starter + + +class IndirectLatestMixin(object): + ''' + Mixin for comics that link to the latest comic from a base page of + some kind. This also supports comics which don't link to the last comic + from the base page, but the beginning of the latest chapter or similiar + schemes. It simulates going forward until it can't find a 'next' link as + specified by the 'nextSearch' regex. + + @type baseUrl: C{string} + @cvar baseUrl: the URL where the link to the latest comic is found. + @type latestSearch C{regex} + @cvar latestSearch: a compiled regex for finding the 'latest' URL. + @type nextSearch C{regex} + @cvar nextSearch: a compiled regex for finding the 'next' URL. + ''' + + __latestUrl = None + + def getLatestUrl(self): + if not self.__latestUrl: + self.__latestUrl = fetchUrl(self.baseUrl, self.latestSearch) + if hasattr(self, "nextSearch"): + nextUrl = fetchUrl(self.__latestUrl, self.nextSearch) + while nextUrl: + self.__latestUrl = nextUrl + nextUrl = fetchUrl(self.__latestUrl, self.nextSearch) + return self.__latestUrl + + latestUrl = property(getLatestUrl) + + +class _PHPScraper(_BasicScraper): + """ + I implement IScraper for comics using phpComic/CUSP. + + This provides an easy way to define scrapers for webcomics using phpComic. + """ + imageUrl = property(lambda self: self.basePath + 'daily.php?date=%s') + imageSearch = property(lambda self: re.compile(r'[^]+ src="(%scomics/\d{6}\..+?)">' % (self.basePath,))) + + help = 'Index format: yymmdd' + + @classmethod + def starter(cls): + return cls.basePath + cls.latestUrl diff --git a/dosagelib/output.py b/dosagelib/output.py new file mode 100644 index 000000000..5ebe4fa9d --- /dev/null +++ b/dosagelib/output.py @@ -0,0 +1,23 @@ +import time + +class Output(object): + def __init__(self): + self.context = '' + self.level = 0 + self.timestamps = False + + def write(self, s, level=0): + if level > self.level: + return + if self.level > 1 or self.timestamps: + timestamp = time.strftime('%H:%M:%S ') + else: + timestamp = '' + print '%s%s> %s' % (timestamp, self.context, s) + + def writelines(self, lines, level=0): + for line in lines: + for line in line.rstrip('\n').split('\n'): + self.write(line.rstrip('\n'), level=level) + +out = Output() diff --git a/dosagelib/plugins/__init__.py b/dosagelib/plugins/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/dosagelib/plugins/__init__.py @@ -0,0 +1 @@ + diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py new file mode 100644 index 000000000..097afe130 --- /dev/null +++ b/dosagelib/plugins/a.py @@ -0,0 +1,338 @@ +from re import compile, MULTILINE + +from ..helpers import _BasicScraper, regexNamer, bounceStarter, indirectStarter + + +class ALessonIsLearned(_BasicScraper): + latestUrl = 'http://www.alessonislearned.com/' + imageUrl = 'http://www.alessonislearned.com/lesson%s.html' + imageSearch = compile(r'back') + help = 'Index format: n (unpadded)' + + +class AbleAndBaker(_BasicScraper): + latestUrl = 'http://www.jimburgessdesign.com/comics/index.php' + imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s' + imageSearch = compile(r']+src="(comics/\d+\.\w+)"') + prevSearch = compile(r']+id="comic_menu_prev"') + help = 'Index format: n (unpadded)' + + +class AbsurdNotions(_BasicScraper): + latestUrl = 'http://www.absurdnotions.org/page129.html' + imageUrl = 'http://www.absurdnotions.org/page%s.html' + imageSearch = compile(r'Next »')) + imageUrl = 'http://abstrusegoose.com/c%s.html' + imageSearch = compile(r'"]+)"') + prevSearch = compile(r'« Previous') + help = 'Index format: n (unpadded)' + + @classmethod + def namer(cls, imageUrl, pageUrl): + index = int(pageUrl.rstrip('/').split('/')[-1]) + name = imageUrl.split('/')[-1].split('.')[0] + return 'c%03d-%s' % (index, name) + + + +class AcademyVale(_BasicScraper): + latestUrl = 'http://imagerie.com/vale/' + imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s' + imageSearch = compile(r']+>(
    \n|\n|
    \n)
     
    ', MULTILINE) + prevSearch = compile(r'PREVIOUS PAGE') + help = 'Index format: n (unpadded)' + starter = indirectStarter( + 'http://www.anarchycomic.com/page1.php', + compile(r'LATEST')) + + + +class Altermeta(_BasicScraper): + latestUrl = 'http://altermeta.net/' + imageUrl = 'http://altermeta.net/archive.php?comic=%s&view=showfiller' + imageSearch = compile(r'') + prevSearch = compile(r'Back') + + + +class Angels2200(_BasicScraper): + latestUrl = 'http://www.janahoffmann.com/angels/' + imageSearch = compile(r"") + prevSearch = compile(r'« Previous') + + + +class AppleGeeks(_BasicScraper): + latestUrl = 'http://www.applegeeks.com/' + imageUrl = 'http://www.applegeeks.com/comics/viewcomic.php?issue=%s' + imageSearch = compile(r'Previous Comic\s*

    ', MULTILINE) + help = 'Index format: n (unpadded)' + + +class AppleGeeksLite(_BasicScraper): + latestUrl = 'http://www.applegeeks.com/lite/' + imageUrl = 'http://applegeeks.com/lite/index.php?aglitecomic=%s' + imageSearch = compile(r'>')) + imageUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' + imageSearch = compile(r'') + prevSearch = compile(r'<') + help = 'Index format: yymmdd' + + @classmethod + def namer(cls, imageUrl, pageUrl): + return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], + imageUrl.split('/')[-1].split('.')[0]) + + + +class AfterStrife(_BasicScraper): + latestUrl = 'http://afterstrife.com/?p=262' + imageUrl = 'http://afterstrife.com/?p=%s' + imageSearch = compile(r'(◄ Previous|)') + help = 'Index format: yyyy/mm/strip-name' + + + +class ASkeweredParadise(_BasicScraper): + latestUrl = 'http://aspcomics.net/' + imageUrl = 'http://aspcomics.net/archindex.php?strip_id=%s' + imageSearch = compile(r'Next\]')) + imageUrl = 'http://www.agirlandherfed.com/comic/?%s' + imageSearch = compile(r'Previous\]') + help = 'Index format: nnn' + + @classmethod + def namer(cls, imageUrl, pageUrl): + return pageUrl.split('?')[-1] + + + +class AetheriaEpics(_BasicScraper): + latestUrl = 'http://aetheria-epics.schala.net/' + imageUrl = 'http://aetheria-epics.schala.net/%s.html' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class Adrift(_BasicScraper): + latestUrl = 'http://www.adriftcomic.com/' + imageUrl = 'http://www.adriftcomic.com/page%s.html' + imageSearch = compile(r'') + prevSearch = compile(r'« Previous') + help = 'Index format: nnn' + + + +class AlienShores(_BasicScraper): + latestUrl = 'http://alienshores.com/alienshores_band/' + imageUrl = 'http://alienshores.com/alienshores_band/?p=%s' + imageSearch = compile(r'>') + help = 'Index format: nnn' + + + +class AllKindsOfBees(_BasicScraper): + latestUrl = 'http://www.allkindsofbees.com/' + imageUrl = 'http://www.allkindsofbees.com/?p=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class AllTheGrowingThings(_BasicScraper): + latestUrl = 'http://typodmary.com/growingthings/' + imageUrl = 'http://typodmary.com/growingthings/%s/' + imageSearch = compile(r'Previous') + help = 'Index format: yyyy-mm-dd' + + + +class ArcticBlast(_BasicScraper): + latestUrl = 'http://www.arcticblastcomic.com/' + imageUrl = 'http://www.arcticblastcomic.com/?p=%s' + imageSearch = compile(r'') + prevSearch = compile(r'« Previous') + help = 'Index format: yyyy/mm/dd/strip-name' + + + +class AlsoBagels(_BasicScraper): + latestUrl = 'http://www.alsobagels.com/' + imageUrl = 'http://alsobagels.com/index.php/comic/%s/' + imageSearch = compile(r'') + help = 'Index format: strip-name' + + + +class Annyseed(_BasicScraper): + latestUrl = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' + imageUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' + imageSearch = compile(r'') + prevSearch = compile(r'"(/new/.+?)".+?previous.gif') + help = 'Index format: n (unpadded)' + + +class Bardsworth(_BasicScraper): + latestUrl = 'http://www.bardsworth.com/' + imageUrl = 'http://www.bardsworth.com/archive.php?p=s%' + imageSearch = compile(r'(strips/.+?)"') + prevSearch = compile(r'"(http.+?)".+?/prev') + help = 'Index format: nnn' + + +class BetterDays(_BasicScraper): + latestUrl = 'http://www.jaynaylor.com/betterdays/' + imageUrl = 'http://www.jaynaylor.com/betterdays/archives/%s' + imageSearch = compile(r'') + prevSearch = compile(r'« Previous') + help = 'Index format: yyyy/mm/.html' + + +class BetterYouThanMe(_BasicScraper): + latestUrl = 'http://betteryouthanme.net/' + imageUrl = 'http://betteryouthanme.net/archive.php?date=%s.gif' + imageSearch = compile(r'"(comics/.+?)"') + prevSearch = compile(r'"(archive.php\?date=.+?)">.+?previous') + help = 'Index format: yyyymmdd' + + +class BiggerThanCheeses(_BasicScraper): + latestUrl = 'http://www.biggercheese.com' + imageUrl = 'http://www.biggercheese.com/index.php?comic=%s' + imageSearch = compile(r'src="(comics/.+?)" alt') + prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back') + help = 'Index format: n (unpadded)' + + + +class BizarreUprising(_BasicScraper): + latestUrl = 'http://www.bizarreuprising.com/' + imageUrl = 'http://www.bizarreuprising.com/view/%s' + imageSearch = compile(r'(◄|⇐ Previous)') + help = 'Index format: yyyy/mm/dd/strip-name-author-name' + + + +class Bhag(_BasicScraper): + latestUrl = 'http://bhag.sackofjustice.com/' + imageUrl = 'http://bhag.sackofjustice.com/daily.php?date=' + imageSearch = compile(r'/(comics/.+?)">') + prevSearch = compile(r'first.+?/(daily.php\?date=.+?)".+?previous') + help = 'Index format: yymmdd' + + + +def blankLabel(name, baseUrl): + return type('BlankLabel_%s' % name, + (_BasicScraper,), + dict( + name='BlankLabel/' + name, + latestUrl=baseUrl, + imageUrl='d/%s.html', + imageSearch=compile(r'"(/comic[s|/].+?)"'), + prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'), + help='Index format: yyyymmdd') + ) + + +checkerboardNightmare = blankLabel('CheckerboardNightmare', 'http://www.checkerboardnightmare.com/') +courtingDisaster = blankLabel('CourtingDisaster', 'http://www.courting-disaster.com/') +evilInc = blankLabel('EvilInc', 'http://www.evil-comic.com/') +greystoneInn = blankLabel('GreystoneInn', 'http://www.greystoneinn.net/') +itsWalky = blankLabel('ItsWalky', 'http://www.itswalky.com/') +# one strip name starts with %20 +#krazyLarry = blankLabel('KrazyLarry', 'http://www.krazylarry.com/') +melonpool = blankLabel('Melonpool', 'http://www.melonpool.com/') +# strip names = index.php +#realLife = blankLabel('RealLife', 'http://www.reallifecomics.com/') +schlockMercenary = blankLabel('SchlockMercenary', 'http://www.schlockmercenary.com/') +# hosted on ComicsDotCom +#sheldon = blankLabel('Sheldon', 'http://www.sheldoncomics.com/') +shortpacked = blankLabel('Shortpacked', 'http://www.shortpacked.com/') +starslipCrisis = blankLabel('StarslipCrisis', 'http://www.starslipcrisis.com/') +uglyHill = blankLabel('UglyHill', 'http://www.uglyhill.com/') + + + +class BeePower(_BasicScraper): + latestUrl = 'http://comicswithoutviolence.com/d/20080713.html' + imageUrl = 'http://comicswithoutviolence.com/d/%s.html' + imageSearch = compile(r'src="(/comics/.+?)"') + prevSearch = compile(r'(\d+\.html)">]+?src="/images/previous_day.png"') + help = 'Index format: yyyy/mm/dd' + + + +class Bellen(_BasicScraper): + latestUrl = 'http://boxbrown.com/' + imageUrl = 'http://boxbrown.com/?p=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class BlankIt(_BasicScraper): + latestUrl = 'http://blankitcomics.com/' + imageUrl = 'http://blankitcomics.com/%s' + imageSearch = compile(r'') + help = 'Index format: yyyy/mm/dd/name' + + + +class BobWhite(_BasicScraper): + latestUrl = 'http://www.bobwhitecomics.com/' + imageUrl = 'http://www.bobwhitecomics.com/%s.shtml' + imageSearch = compile(r'src="(/comics/.+?)"') + prevSearch = compile(r'">]+?>]+?src="/images/prev.jpg">') + help = 'Index format: yyyymmdd' + + + +class BigFatWhale(_BasicScraper): + latestUrl = 'http://www.bigfatwhale.com/' + imageUrl = 'http://www.bigfatwhale.com/archives/bfw_%s.htm' + imageSearch = compile(r']+?>Previous') + help = 'Index format: (sometimes chapternumber/)-yyyy-mm-dd/stripname' + + + +class BrightlyWound(_BasicScraper): + latestUrl = 'http://www.brightlywound.com/' + imageUrl = 'http://www.brightlywound.com/?comic=%s' + imageSearch = compile(r'') + help = 'Index format: yyyy-mm-dd' + + + +class BloodBound(_BasicScraper): + latestUrl = 'http://www.bloodboundcomic.com/' + imageUrl = 'http://www.bloodboundcomic.com/d/%s.html' + imageSearch = compile(r' src="(/comics/.+?)"') + prevSearch = compile(r' ]+?src="/images/previous_day.jpg"') + help = 'Index format: yyyymmdd' + + + +class BookOfBiff(_BasicScraper): + latestUrl = 'http://www.thebookofbiff.com/' + imageUrl = 'http://www.thebookofbiff.com/%s' + imageSearch = compile(r'◄ Previous') + help = 'Index format: yyyy/mm/dd/stripnum-strip-name' + + + +class BillyTheDunce(_BasicScraper): + latestUrl = 'http://www.duncepress.com/' + imageUrl = 'http://www.duncepress.com/%s/' + imageSearch = compile(r'') + help = 'Index format: yyyy/mm/dd/strip-name' + + + +class BetweenFailures(_BasicScraper): + latestUrl = 'http://betweenfailures.com/' + imageUrl = 'http://betweenfailures.com/%s' + imageSearch = compile(r'') + prevSearch = compile(r'« Previous') + help = 'Index format: yyyy/mm/dd/stripnum-strip-name' + + + +class BillyTheBeaker(_BasicScraper): + latestUrl = 'http://billy.defectivejunk.com/' + imageUrl = 'http://billy.defectivejunk.com/index.php?strip=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py new file mode 100644 index 000000000..705cfb9a8 --- /dev/null +++ b/dosagelib/plugins/c.py @@ -0,0 +1,495 @@ +from re import compile + +from ..helpers import ( + _BasicScraper, constStarter, bounceStarter, indirectStarter) +from ..util import getQueryParams + + +class CalvinAndHobbes(_BasicScraper): + latestUrl = 'http://www.gocomics.com/calvinandhobbes/' + imageUrl = 'http://www.gocomics.com/calvinandhobbes/%s' + imageSearch = compile(r'src="(http://picayune\.uclick\.com/comics/ch/[^"]+\.gif)"') + prevSearch = compile(r'href="(.*?)"\s+onclick="[^"]*">Previous day') + help = 'Index format: yyyy/mm/dd' + + + +class CandyCartoon(_BasicScraper): + latestUrl = 'http://www.candycartoon.com/' + imageUrl = 'http://www.candycartoon.com/archives/%s.html' + imageSearch = compile(r'[^prev') + help = 'Index format: nnnnnn' + + + +class CaptainSNES(_BasicScraper): + latestUrl = 'http://captainsnes.com/' + imageUrl = 'http://captainsnes.com/?date=%s' + imageSearch = compile(r'') + help = 'Index format: yyyymmdd' + + + +class CaribbeanBlue(_BasicScraper): + latestUrl = 'http://cblue.katbox.net/' + imageUrl = 'http://cblue.katbox.net/index.php?strip_id=%s' + imageSearch = compile(r'="(.+?strips/.+?)"') + prevSearch = compile(r'.+?"(.+?)".+?Previous') + help = 'Index format: n (unpadded)' + + +class Catharsis(_BasicScraper): + latestUrl = 'http://catharsiscomic.com/' + imageUrl = 'http://catharsiscomic.com/archive.php?strip=%s' + imageSearch = compile(r'') + help = 'Index format: yyyy/mm/dd/strip-name' + + + +class ChugworthAcademy(_BasicScraper): + latestUrl = 'http://chugworth.com/' + imageUrl = 'http://chugworth.com/?p=%s' + imageSearch = compile(r'Comic')
+    prevSearch = compile(r'<a href=]+?title="Previous">') + help = 'Index format: n (unpadded)' + + + +class ChugworthAcademyArchive(_BasicScraper): + latestUrl = 'http://chugworth.com/archive/?strip_id=422' + imageUrl = 'http://chugworth.com/archive/?strip_id=%s' + imageSearch = compile(r'&laq') + help = 'Index format: non' + + + +class CombustibleOrange(_BasicScraper): + latestUrl = 'http://www.combustibleorange.com/' + imageUrl = 'http://www.combustibleorange.com/index.php?current=%s' + imageSearch = compile(r'') + help = 'Index format: n (unpadded)' + + + +class Comedity(_BasicScraper): + latestUrl = 'http://www.comedity.com/' + imageUrl = 'http://www.comedity.com/index.php?strip_id=%s' + imageSearch = compile(r' *\"Prior') + help = 'Index format: n' + + + +class CoolCatStudio(_BasicScraper): + latestUrl = 'http://www.coolcatstudio.com/' + imageUrl = 'http://www.coolcatstudio.com/index.php?p=%s' + imageSearch = compile(r'(/comics/.+?)"') + prevSearch = compile(r"href='(.+?)'>PREV") + help = 'Index format: n' + + + +class CourtingDisaster(_BasicScraper): + latestUrl = 'http://www.courting-disaster.com/' + imageUrl = 'http://www.courting-disaster.com/archive/%s.html' + imageSearch = compile(r'(/comics/.+?)"') + prevSearch = compile(r']+?>') + help = 'Index format: yyyymmdd' + + + +class CrapIDrewOnMyLunchBreak(_BasicScraper): + latestUrl = 'http://crap.jinwicked.com/' + imageUrl = 'http://crap.jinwicked.com/%s' + imageSearch = compile(r'<< Previous page') + help = 'Index format: yyyymmdd' + + +def cloneManga(name, shortName, lastStrip=None): + baseUrl = 'http://manga.clone-army.org/%s.php' % (shortName,) + imageUrl = baseUrl + '?page=%s' + if lastStrip is None: + starter = bounceStarter(baseUrl, compile(r'◄ Previous') + help = 'Index format: n (unpadded)' + + +def comicsDotCom(name, section): + baseUrl = 'http://www.comics.com/%s/%s/archive/' % (section, name) + + @classmethod + def namer(cls, imageUrl, pageUrl): + htmlname = pageUrl.split('/')[-1] + filename = htmlname.split('.')[0] + return filename + + return type('ComicsDotCom_%s' % name, + (_BasicScraper,), + dict( + name='ComicsDotCom/' + name, + starter=indirectStarter(baseUrl, compile(r'(?:(?:'), + prevSearch=compile(r'< Previous') + help = 'Index format: n (unpadded)' + + + +class CrimsonDark(_BasicScraper): + latestUrl = 'http://www.davidcsimon.com/crimsondark/' + imageUrl = 'http://www.davidcsimon.com/crimsondark/index.php?view=comic&strip_id=%s' + imageSearch = compile(r'src="(.+?strips/.+?)"') + prevSearch = compile(r'') + help = 'Index format: nnn' + + + +class CowboyJedi(_BasicScraper): + latestUrl = 'http://www.cowboyjedi.com/' + imageUrl = 'http://www.cowboyjedi.com/%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class Collar6(_BasicScraper): + latestUrl = 'http://collar6.com/' + imageUrl = 'http://collar6.com/%s' + imageSearch = compile(r'src="(http://collar6.com/comics/.+?)"') + prevSearch = compile(r' href="(http://collar6.com/\d+/\S+)">◄ Previous') + help = 'Index format: yyyy/namednumber' + + + +class Chester5000XYV(_BasicScraper): + latestUrl = 'http://jessfink.com/Chester5000XYV/' + imageUrl = 'http://jessfink.com/Chester5000XYV/?p=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class CalamitiesOfNature(_BasicScraper): + latestUrl = 'http://www.calamitiesofnature.com/' + imageUrl = 'http://www.calamitiesofnature.com/archive/?c=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class Champ2010(_BasicScraper): + latestUrl = 'http://www.jedcollins.com/champ2010/' + imageUrl = 'http://jedcollins.com/champ2010/?p=%s' + imageSearch = compile(r'Next>')) + imageUrl = 'http://corydoncafe.com/comic-%s.html' + imageSearch = compile(r'<Previous') + help = 'Index format: nnn' + + @classmethod + def namer(cls, imageUrl, pageUrl): + return pageUrl.split('/')[-1].split('.')[0] + + + +class CraftedFables(_BasicScraper): + latestUrl = 'http://www.craftedfables.com/' + imageUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s' + imageSearch = compile(r'') + help = 'Index format: nnn' + + + +class Currhue(_BasicScraper): + latestUrl = 'http://www.currhue.com/' + imageUrl = 'http://www.currhue.com/?p=%s' + imageSearch = compile(r'', IGNORECASE) + prevSearch = compile(r' <.+?/aprev.gif"') + help = 'Index format: nnn' + + +class DeepFried(_BasicScraper): + latestUrl = 'http://www.whatisdeepfried.com/' + imageUrl = 'http://www.whatisdeepfried.com/%s' + imageSearch = compile(r'(http://www.whatisdeepfried.com/comics/.+?)"') + prevSearch = compile(r'"(http://www.whatisdeepfried.com/.+?)">') + help = 'Index format: non' + + + +class DoemainOfOurOwn(_BasicScraper): + latestUrl = 'http://www.doemain.com/' + imageUrl = 'http://www.doemain.com/index.cgi/%s' + imageSearch = compile(r"Previous Strip') + prevSearch = compile(r'Previous Week,') + help = 'Index format: nnnnn' + + + +class Dracula(_BasicScraper): + latestUrl = 'http://draculacomic.net/' + imageUrl = 'http://draculacomic.net/comic.php?comicID=%s' + imageSearch = compile(r'« Prev') + help = 'Index format: nnn' + + + +class DragonTails(_BasicScraper): + latestUrl = 'http://www.dragon-tails.com/' + imageUrl = 'http://www.dragon-tails.com/archive.php?date=%s' + imageSearch = compile(r'"(newcomic/.+?)"') + prevSearch = compile(r'"(archive.+?)">.+n_2') + help = 'Index format: yyyy-mm-dd' + + +class DreamKeepersPrelude(_BasicScraper): + latestUrl = 'http://www.dreamkeeperscomic.com/Prelude.php' + imageUrl = 'http://www.dreamkeeperscomic.com/Prelude.php?pg=%s' + imageSearch = compile(r'(images/PreludeNew/.+?)"') + prevSearch = compile(r'(Prelude.php\?pg=.+?)"') + help = 'Index format: n' + + +class Drowtales(_BasicScraper): + latestUrl = 'http://www.drowtales.com/mainarchive.php' + imageUrl = 'http://www.drowtales.com/mainarchive.php?location=%s' + imageSearch = compile(r'src=".(/tmpmanga/.+?)"') + prevSearch = compile(r'.+?back') + help = 'Index format: nnn.html' + + + +class DieselSweeties(_BasicScraper): + latestUrl = 'http://www.dieselsweeties.com/' + imageUrl = 'http://www.dieselsweeties.com/archive/%s' + imageSearch = compile(r'src="(/hstrips/.+?)"') + prevSearch = compile(r'href="(/archive/.+?)">(') + starter = indirectStarter('http://dresdencodak.com/', compile(r'

    ')) + + + +class DonkBirds(_BasicScraper): + latestUrl = 'http://www.donkbirds.com/' + imageUrl = 'http://www.donkbirds.com/index.php?date=%s' + imageSearch = compile(r'Previous') + help = 'Index format: yyyy-mm-dd' + + + +class DrawnByDrunks(_BasicScraper): + starter = bounceStarter('http://www.drawnbydrunks.co.uk/', compile(r'