Fix more comics.

This commit is contained in:
Bastian Kleineidam 2012-12-08 00:45:18 +01:00
parent 1b74e304c0
commit faba7b0bca
43 changed files with 1254 additions and 171 deletions

View file

@ -11,7 +11,7 @@ NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo)
# - write test results in file
# - run all tests found in the "tests" subdirectory
TESTOUTPUT?=testresults.txt
PYTESTOPTS:=-n $(NUMPROCESSORS) --resultlog=$(TESTOUTPUT) --tb=short
PYTESTOPTS?=-n $(NUMPROCESSORS) --resultlog=$(TESTOUTPUT) --tb=short
CHMODMINUSMINUS:=--
# directory or file with tests to run
TESTS ?= tests

View file

@ -1,9 +1,14 @@
MANFILES:=dosage.1.html
all: man testresults.html
man: $(MANFILES)
dosage.1.html: dosage.1
man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@
# patch --no-backup-if-mismatch --quiet $@ dosage.1.html.diff
testresults.html: ../testresults.txt
../scripts/mktestpage.py $< > $@
.PHONY: all man

View file

@ -21,7 +21,7 @@ you may be infringing upon various copyrights.
Usage
------
List available comics (over 3500 at the moment):
List available comics (over 3000 at the moment):
`$ dosage -l`

18
doc/css/dosage.css Normal file
View file

@ -0,0 +1,18 @@
body {
font-family: open-sans, sans-serif;
}
.item {
width: 100px;
margin: 10px;
float: left;
}
.ok
{
color: #119911;
}
.failed
{
color: #992200;
}

298
doc/css/main.css Normal file
View file

@ -0,0 +1,298 @@
/*
* HTML5 Boilerplate
*
* What follows is the result of much research on cross-browser styling.
* Credit left inline and big thanks to Nicolas Gallagher, Jonathan Neal,
* Kroc Camen, and the H5BP dev community and team.
*/
/* ==========================================================================
Base styles: opinionated defaults
========================================================================== */
html,
button,
input,
select,
textarea {
color: #222;
}
body {
font-size: 1em;
line-height: 1.4;
}
/*
* Remove text-shadow in selection highlight: h5bp.com/i
* These selection declarations have to be separate.
* Customize the background color to match your design.
*/
::-moz-selection {
background: #b3d4fc;
text-shadow: none;
}
::selection {
background: #b3d4fc;
text-shadow: none;
}
/*
* A better looking default horizontal rule
*/
hr {
display: block;
height: 1px;
border: 0;
border-top: 1px solid #ccc;
margin: 1em 0;
padding: 0;
}
/*
* Remove the gap between images and the bottom of their containers: h5bp.com/i/440
*/
img {
vertical-align: middle;
}
/*
* Remove default fieldset styles.
*/
fieldset {
border: 0;
margin: 0;
padding: 0;
}
/*
* Allow only vertical resizing of textareas.
*/
textarea {
resize: vertical;
}
/* ==========================================================================
Chrome Frame prompt
========================================================================== */
.chromeframe {
margin: 0.2em 0;
background: #ccc;
color: #000;
padding: 0.2em 0;
}
/* ==========================================================================
Author's custom styles
========================================================================== */
/* ==========================================================================
Helper classes
========================================================================== */
/*
* Image replacement
*/
.ir {
background-color: transparent;
border: 0;
overflow: hidden;
/* IE 6/7 fallback */
*text-indent: -9999px;
}
.ir:before {
content: "";
display: block;
width: 0;
height: 100%;
}
/*
* Hide from both screenreaders and browsers: h5bp.com/u
*/
.hidden {
display: none !important;
visibility: hidden;
}
/*
* Hide only visually, but have it available for screenreaders: h5bp.com/v
*/
.visuallyhidden {
border: 0;
clip: rect(0 0 0 0);
height: 1px;
margin: -1px;
overflow: hidden;
padding: 0;
position: absolute;
width: 1px;
}
/*
* Extends the .visuallyhidden class to allow the element to be focusable
* when navigated to via the keyboard: h5bp.com/p
*/
.visuallyhidden.focusable:active,
.visuallyhidden.focusable:focus {
clip: auto;
height: auto;
margin: 0;
overflow: visible;
position: static;
width: auto;
}
/*
* Hide visually and from screenreaders, but maintain layout
*/
.invisible {
visibility: hidden;
}
/*
* Clearfix: contain floats
*
* For modern browsers
* 1. The space content is one way to avoid an Opera bug when the
* `contenteditable` attribute is included anywhere else in the document.
* Otherwise it causes space to appear at the top and bottom of elements
* that receive the `clearfix` class.
* 2. The use of `table` rather than `block` is only necessary if using
* `:before` to contain the top-margins of child elements.
*/
.clearfix:before,
.clearfix:after {
content: " "; /* 1 */
display: table; /* 2 */
}
.clearfix:after {
clear: both;
}
/*
* For IE 6/7 only
* Include this rule to trigger hasLayout and contain floats.
*/
.clearfix {
*zoom: 1;
}
/* ==========================================================================
EXAMPLE Media Queries for Responsive Design.
Theses examples override the primary ('mobile first') styles.
Modify as content requires.
========================================================================== */
@media only screen and (min-width: 35em) {
/* Style adjustments for viewports that meet the condition */
}
@media only screen and (-webkit-min-device-pixel-ratio: 1.5),
only screen and (min-resolution: 144dpi) {
/* Style adjustments for high resolution devices */
}
/* ==========================================================================
Print styles.
Inlined to avoid required HTTP connection: h5bp.com/r
========================================================================== */
@media print {
* {
background: transparent !important;
color: #000 !important; /* Black prints faster: h5bp.com/s */
box-shadow: none !important;
text-shadow: none !important;
}
a,
a:visited {
text-decoration: underline;
}
a[href]:after {
content: " (" attr(href) ")";
}
abbr[title]:after {
content: " (" attr(title) ")";
}
/*
* Don't show links for images, or javascript/internal links
*/
.ir a:after,
a[href^="javascript:"]:after,
a[href^="#"]:after {
content: "";
}
pre,
blockquote {
border: 1px solid #999;
page-break-inside: avoid;
}
thead {
display: table-header-group; /* h5bp.com/t */
}
tr,
img {
page-break-inside: avoid;
}
img {
max-width: 100% !important;
}
@page {
margin: 0.5cm;
}
p,
h2,
h3 {
orphans: 3;
widows: 3;
}
h2,
h3 {
page-break-after: avoid;
}
}

504
doc/css/normalize.css vendored Normal file
View file

@ -0,0 +1,504 @@
/*! normalize.css v1.0.1 | MIT License | git.io/normalize */
/* ==========================================================================
HTML5 display definitions
========================================================================== */
/*
* Corrects `block` display not defined in IE 6/7/8/9 and Firefox 3.
*/
article,
aside,
details,
figcaption,
figure,
footer,
header,
hgroup,
nav,
section,
summary {
display: block;
}
/*
* Corrects `inline-block` display not defined in IE 6/7/8/9 and Firefox 3.
*/
audio,
canvas,
video {
display: inline-block;
*display: inline;
*zoom: 1;
}
/*
* Prevents modern browsers from displaying `audio` without controls.
* Remove excess height in iOS 5 devices.
*/
audio:not([controls]) {
display: none;
height: 0;
}
/*
* Addresses styling for `hidden` attribute not present in IE 7/8/9, Firefox 3,
* and Safari 4.
* Known issue: no IE 6 support.
*/
[hidden] {
display: none;
}
/* ==========================================================================
Base
========================================================================== */
/*
* 1. Corrects text resizing oddly in IE 6/7 when body `font-size` is set using
* `em` units.
* 2. Prevents iOS text size adjust after orientation change, without disabling
* user zoom.
*/
html {
font-size: 100%; /* 1 */
-webkit-text-size-adjust: 100%; /* 2 */
-ms-text-size-adjust: 100%; /* 2 */
}
/*
* Addresses `font-family` inconsistency between `textarea` and other form
* elements.
*/
html,
button,
input,
select,
textarea {
font-family: sans-serif;
}
/*
* Addresses margins handled incorrectly in IE 6/7.
*/
body {
margin: 0;
}
/* ==========================================================================
Links
========================================================================== */
/*
* Addresses `outline` inconsistency between Chrome and other browsers.
*/
a:focus {
outline: thin dotted;
}
/*
* Improves readability when focused and also mouse hovered in all browsers.
*/
a:active,
a:hover {
outline: 0;
}
/* ==========================================================================
Typography
========================================================================== */
/*
* Addresses font sizes and margins set differently in IE 6/7.
* Addresses font sizes within `section` and `article` in Firefox 4+, Safari 5,
* and Chrome.
*/
h1 {
font-size: 2em;
margin: 0.67em 0;
}
h2 {
font-size: 1.5em;
margin: 0.83em 0;
}
h3 {
font-size: 1.17em;
margin: 1em 0;
}
h4 {
font-size: 1em;
margin: 1.33em 0;
}
h5 {
font-size: 0.83em;
margin: 1.67em 0;
}
h6 {
font-size: 0.75em;
margin: 2.33em 0;
}
/*
* Addresses styling not present in IE 7/8/9, Safari 5, and Chrome.
*/
abbr[title] {
border-bottom: 1px dotted;
}
/*
* Addresses style set to `bolder` in Firefox 3+, Safari 4/5, and Chrome.
*/
b,
strong {
font-weight: bold;
}
blockquote {
margin: 1em 40px;
}
/*
* Addresses styling not present in Safari 5 and Chrome.
*/
dfn {
font-style: italic;
}
/*
* Addresses styling not present in IE 6/7/8/9.
*/
mark {
background: #ff0;
color: #000;
}
/*
* Addresses margins set differently in IE 6/7.
*/
p,
pre {
margin: 1em 0;
}
/*
* Corrects font family set oddly in IE 6, Safari 4/5, and Chrome.
*/
code,
kbd,
pre,
samp {
font-family: monospace, serif;
_font-family: 'courier new', monospace;
font-size: 1em;
}
/*
* Improves readability of pre-formatted text in all browsers.
*/
pre {
white-space: pre;
white-space: pre-wrap;
word-wrap: break-word;
}
/*
* Addresses CSS quotes not supported in IE 6/7.
*/
q {
quotes: none;
}
/*
* Addresses `quotes` property not supported in Safari 4.
*/
q:before,
q:after {
content: '';
content: none;
}
/*
* Addresses inconsistent and variable font size in all browsers.
*/
small {
font-size: 80%;
}
/*
* Prevents `sub` and `sup` affecting `line-height` in all browsers.
*/
sub,
sup {
font-size: 75%;
line-height: 0;
position: relative;
vertical-align: baseline;
}
sup {
top: -0.5em;
}
sub {
bottom: -0.25em;
}
/* ==========================================================================
Lists
========================================================================== */
/*
* Addresses margins set differently in IE 6/7.
*/
dl,
menu,
ol,
ul {
margin: 1em 0;
}
dd {
margin: 0 0 0 40px;
}
/*
* Addresses paddings set differently in IE 6/7.
*/
menu,
ol,
ul {
padding: 0 0 0 40px;
}
/*
* Corrects list images handled incorrectly in IE 7.
*/
nav ul,
nav ol {
list-style: none;
list-style-image: none;
}
/* ==========================================================================
Embedded content
========================================================================== */
/*
* 1. Removes border when inside `a` element in IE 6/7/8/9 and Firefox 3.
* 2. Improves image quality when scaled in IE 7.
*/
img {
border: 0; /* 1 */
-ms-interpolation-mode: bicubic; /* 2 */
}
/*
* Corrects overflow displayed oddly in IE 9.
*/
svg:not(:root) {
overflow: hidden;
}
/* ==========================================================================
Figures
========================================================================== */
/*
* Addresses margin not present in IE 6/7/8/9, Safari 5, and Opera 11.
*/
figure {
margin: 0;
}
/* ==========================================================================
Forms
========================================================================== */
/*
* Corrects margin displayed oddly in IE 6/7.
*/
form {
margin: 0;
}
/*
* Define consistent border, margin, and padding.
*/
fieldset {
border: 1px solid #c0c0c0;
margin: 0 2px;
padding: 0.35em 0.625em 0.75em;
}
/*
* 1. Corrects color not being inherited in IE 6/7/8/9.
* 2. Corrects text not wrapping in Firefox 3.
* 3. Corrects alignment displayed oddly in IE 6/7.
*/
legend {
border: 0; /* 1 */
padding: 0;
white-space: normal; /* 2 */
*margin-left: -7px; /* 3 */
}
/*
* 1. Corrects font size not being inherited in all browsers.
* 2. Addresses margins set differently in IE 6/7, Firefox 3+, Safari 5,
* and Chrome.
* 3. Improves appearance and consistency in all browsers.
*/
button,
input,
select,
textarea {
font-size: 100%; /* 1 */
margin: 0; /* 2 */
vertical-align: baseline; /* 3 */
*vertical-align: middle; /* 3 */
}
/*
* Addresses Firefox 3+ setting `line-height` on `input` using `!important` in
* the UA stylesheet.
*/
button,
input {
line-height: normal;
}
/*
* 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio`
* and `video` controls.
* 2. Corrects inability to style clickable `input` types in iOS.
* 3. Improves usability and consistency of cursor style between image-type
* `input` and others.
* 4. Removes inner spacing in IE 7 without affecting normal text inputs.
* Known issue: inner spacing remains in IE 6.
*/
button,
html input[type="button"], /* 1 */
input[type="reset"],
input[type="submit"] {
-webkit-appearance: button; /* 2 */
cursor: pointer; /* 3 */
*overflow: visible; /* 4 */
}
/*
* Re-set default cursor for disabled elements.
*/
button[disabled],
input[disabled] {
cursor: default;
}
/*
* 1. Addresses box sizing set to content-box in IE 8/9.
* 2. Removes excess padding in IE 8/9.
* 3. Removes excess padding in IE 7.
* Known issue: excess padding remains in IE 6.
*/
input[type="checkbox"],
input[type="radio"] {
box-sizing: border-box; /* 1 */
padding: 0; /* 2 */
*height: 13px; /* 3 */
*width: 13px; /* 3 */
}
/*
* 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome.
* 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome
* (include `-moz` to future-proof).
*/
input[type="search"] {
-webkit-appearance: textfield; /* 1 */
-moz-box-sizing: content-box;
-webkit-box-sizing: content-box; /* 2 */
box-sizing: content-box;
}
/*
* Removes inner padding and search cancel button in Safari 5 and Chrome
* on OS X.
*/
input[type="search"]::-webkit-search-cancel-button,
input[type="search"]::-webkit-search-decoration {
-webkit-appearance: none;
}
/*
* Removes inner padding and border in Firefox 3+.
*/
button::-moz-focus-inner,
input::-moz-focus-inner {
border: 0;
padding: 0;
}
/*
* 1. Removes default vertical scrollbar in IE 6/7/8/9.
* 2. Improves readability and alignment in all browsers.
*/
textarea {
overflow: auto; /* 1 */
vertical-align: top; /* 2 */
}
/* ==========================================================================
Tables
========================================================================== */
/*
* Remove most spacing between table cells.
*/
table {
border-collapse: collapse;
border-spacing: 0;
}

View file

@ -9,7 +9,7 @@ Section: User Commands (1)<BR><A HREF="#index">Index</A>
<A NAME="lbAB">&nbsp;</A>
<H2>NAME</H2>
dosage - comic strip downloader
dosage - a commandline webcomic downloader and archiver
<A NAME="lbAC">&nbsp;</A>
<H2>SYNOPSIS</H2>

9
doc/js/masonry.min.js vendored Normal file

File diff suppressed because one or more lines are too long

156
doc/testresults.html Normal file
View file

@ -0,0 +1,156 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<title>Dosage test results</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="css/normalize.css">
<link rel="stylesheet" href="css/main.css">
<link rel="stylesheet" href="css/dosage.css">
<script src="js/masonry.min.js"></script>
</head>
<body>
<p>Dosage test results from 07.12.2012</p>
<div id="container">
<div class="item"><a href="http://alienshores.com/alienshores_band/" class="ok">AlienShores OK</a></div>
<div class="item"><a href="http://www.biggercheese.com/" class="ok">BiggerThanCheeses OK</a></div>
<div class="item"><a href="http://www.captainsnes.com/" class="ok">CaptainSNES OK</a></div>
<div class="item"><a href="http://jessfink.com/Chester5000XYV/" class="ok">Chester5000XYV OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/diamond-lil.html" class="ok">Creators/DiamondLil OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/hope-and-death.html" class="ok">Creators/HopeAndDeath OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/on-a-claire-day.html" class="ok">Creators/OnaClaireDay OK</a></div>
<div class="item"><a href="http://dilbert.com/" class="ok">Dilbert OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/2s_a_company/5250099/" class="ok">DrunkDuck/2s_a_company OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Acrobat/5380691/" class="ok">DrunkDuck/Acrobat OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Allan/5326380/" class="ok">DrunkDuck/Allan OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Amya/5355643/" class="ok">DrunkDuck/Amya OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Angry_D_Monkey/5300564/" class="ok">DrunkDuck/ Angry_D_Monkey OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Awesomataz/5388634/" class="ok">DrunkDuck/Awesomataz OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Battle_of_the_Robofemoids/5414294/" class="ok">DrunkDuck/ Battle_of_the_Robofemoids OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Been_Better/5416677/" class="ok">DrunkDuck/Been_Better OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/BffSatan/5237809/" class="ok">DrunkDuck/BffSatan OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Chester_and_Ferdie/5406525/" class="ok">DrunkDuck/ Chester_and_Ferdie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Children_of_the_Tiger/5220744/" class="ok">DrunkDuck/ Children_of_the_Tiger OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Circle_Arcadia/4925373/" class="ok">DrunkDuck/ Circle_Arcadia OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Cloud_Eagle/5329187/" class="ok">DrunkDuck/Cloud_Eagle OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Dasien/5343187/" class="ok">DrunkDuck/Dasien OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Desperate_Angels/5405057/" class="ok">DrunkDuck/ Desperate_Angels OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Edge_of_December/5352865/" class="ok">DrunkDuck/ Edge_of_December OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Engine/5403371/" class="ok">DrunkDuck/Engine OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Explorers_Of_the_Unknown/5395556/" class="ok">DrunkDuck/ Explorers_Of_the_Unknown OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Faults/5226225/" class="ok">DrunkDuck/Faults OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Good_Guy/5413334/" class="ok">DrunkDuck/Good_Guy OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Headless_Cross/5390146/" class="ok">DrunkDuck/ Headless_Cross OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/I_got_it_in_my_mouth/5368954/" class="ok">DrunkDuck/ I_got_it_in_my_mouth OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Karen_the_Marilith/5227021/" class="ok">DrunkDuck/ Karen_the_Marilith OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Kat_and_Dogg/5074974/" class="ok">DrunkDuck/Kat_and_Dogg OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Kitty_Litter/5245306/" class="ok">DrunkDuck/Kitty_Litter OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Knights_Requiem/5135780/" class="ok">DrunkDuck/ Knights_Requiem OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Last_Place_Comics/5416438/" class="ok">DrunkDuck/ Last_Place_Comics OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Last_War/4954728/" class="ok">DrunkDuck/Last_War OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Life_Blowz/5305212/" class="ok">DrunkDuck/Life_Blowz OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Linnyanie/5205907/" class="ok">DrunkDuck/Linnyanie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Maggot_Boy/5407805/" class="ok">DrunkDuck/Maggot_Boy OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Metal_Breakdown/5386007/" class="ok">DrunkDuck/ Metal_Breakdown OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Morphic/5253227/" class="ok">DrunkDuck/Morphic OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Musical_Farm/5357846/" class="ok">DrunkDuck/Musical_Farm OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/My_Pet_Demon/5415753/" class="ok">DrunkDuck/My_Pet_Demon OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/My_Thingie/5354620/" class="ok">DrunkDuck/My_Thingie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Mystery_World/5071936/" class="ok">DrunkDuck/ Mystery_World OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/No_Talent/5264318/" class="ok">DrunkDuck/No_Talent OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/One_last_breath/5178709/" class="ok">DrunkDuck/ One_last_breath OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Pixel_Plumbers/5344885/" class="ok">DrunkDuck/ Pixel_Plumbers OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Planet_Chaser/5416679/" class="ok">DrunkDuck/ Planet_Chaser OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Pokemon_Light_and_Dark/5341704/" class="ok">DrunkDuck/ Pokemon_Light_and_Dark OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Politics_The_Tankers_Way/4982065/" class="ok">DrunkDuck/ Politics_The_Tankers_Way OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Prelude/4895211/" class="ok">DrunkDuck/Prelude OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Project_217/5414112/" class="ok">DrunkDuck/Project_217 OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Solar_Salvage/5394935/" class="ok">DrunkDuck/ Solar_Salvage OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Adventures_of_Chad_Cleanly/5137427/" class="ok">DrunkDuck/ The_Adventures_of_Chad_Cleanly OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Begining_of_an_End/5113421/" class="ok">DrunkDuck/ The_Begining_of_an_End OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Chronicles_of_Gaddick/5372621/" class="ok">DrunkDuck/ The_Chronicles_of_Gaddick OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Emerald_City/5188061/" class="ok">DrunkDuck/ The_Emerald_City OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Essyane_Warriors/5416744/" class="ok">DrunkDuck/ The_Essyane_Warriors OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Uncanny_Uper_Dave/5273996/" class="ok">DrunkDuck/ The_Uncanny_Uper_Dave OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/This_Ego_of_Mine/5222563/" class="ok">DrunkDuck/ This_Ego_of_Mine OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Thog_Infinitron/5207209/" class="ok">DrunkDuck/ Thog_Infinitron OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Twisted_Mind_of_Stranger/5416587/" class="ok">DrunkDuck/ Twisted_Mind_of_Stranger OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Underscore/5333512/" class="ok">DrunkDuck/Underscore OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Woah_Roscoe/5234849/" class="ok">DrunkDuck/Woah_Roscoe OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/XAZ_A_Megaman_X_Fancomic/5195871/" class="ok">DrunkDuck/ XAZ_A_Megaman_X_Fancomic OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Yeah_wait_what/5292872/" class="ok">DrunkDuck/ Yeah_wait_what OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/the_Many_Deaths_of_Mario/5376436/" class="ok">DrunkDuck/ the_Many_Deaths_of_Mario OK</a></div>
<div class="item"><a href="http://www.gocomics.com/acadasia-down/2012/11/20" class="ok">GoComics/AcadasiaDown OK</a></div>
<div class="item"><a href="http://www.gocomics.com/anecdote/2012/08/15" class="ok">GoComics/Anecdote OK</a></div>
<div class="item"><a href="http://www.gocomics.com/bloomcounty/2012/12/07" class="ok">GoComics/BloomCounty OK</a></div>
<div class="item"><a href="http://www.gocomics.com/boundandgagged/2012/12/07" class="ok">GoComics/ BoundandGagged OK</a></div>
<div class="item"><a href="http://www.gocomics.com/candyblondell/2012/09/28" class="ok">GoComics/CANDYBLONDELL OK</a></div>
<div class="item"><a href="http://www.gocomics.com/casey-and-kyle/2012/11/17" class="ok">GoComics/CaseyandKyle OK</a></div>
<div class="item"><a href="http://www.gocomics.com/chuckasay/2012/12/05" class="ok">GoComics/ChuckAsay OK</a></div>
<div class="item"><a href="http://www.gocomics.com/compu-toon/2012/12/07" class="ok">GoComics/Computoon OK</a></div>
<div class="item"><a href="http://www.gocomics.com/ditzabled-princess/2012/12/05" class="ok">GoComics/ DitzAbledPrincess OK</a></div>
<div class="item"><a href="http://www.gocomics.com/forbetterorforworse/2012/12/07" class="ok">GoComics/ ForBetterorForWorse OK</a></div>
<div class="item"><a href="http://www.gocomics.com/historybluffs/2012/09/13" class="ok">GoComics/HistoryBluffs OK</a></div>
<div class="item"><a href="http://www.gocomics.com/incompatibles/2012/09/29" class="ok">GoComics/INCOMPATIBLES OK</a></div>
<div class="item"><a href="http://www.gocomics.com/inkpen/2012/12/07" class="ok">GoComics/InkPen OK</a></div>
<div class="item"><a href="http://www.gocomics.com/magic-coffee-hair/2012/12/06" class="ok">GoComics/ MagicCoffeeHair OK</a></div>
<div class="item"><a href="http://www.gocomics.com/the-lil-miesters/2012/12/07" class="ok">GoComics/ TheLilMiesters OK</a></div>
<div class="item"><a href="http://www.gocomics.com/uncleartsfunland/2012/12/02" class="ok">GoComics/ UncleArtsFunland OK</a></div>
<div class="item"><a href="http://ballofyarn.comicgenesis.com/d/20020624.html" class="ok">KeenSpot/BallofYarn OK</a></div>
<div class="item"><a href="http://beforedawn.comicgenesis.com/" class="ok">KeenSpot/BeforeDawn OK</a></div>
<div class="item"><a href="http://bsbs.comicgenesis.com/" class="ok">KeenSpot/BitterSweetBS OK</a></div>
<div class="item"><a href="http://llv.comicgenesis.com/" class="ok">KeenSpot/CTRO OK</a></div>
<div class="item"><a href="http://chrisread.comicgenesis.com/" class="ok">KeenSpot/ Chris27sCollectionofCrappyComix OK</a></div>
<div class="item"><a href="http://chronic.comicgenesis.com/" class="ok">KeenSpot/ ChronicMisadventures OK</a></div>
<div class="item"><a href="http://citrushouse.comicgenesis.com/" class="ok">KeenSpot/CitrusHouse OK</a></div>
<div class="item"><a href="http://creepyhead.comicgenesis.com/" class="ok">KeenSpot/CreepyHead OK</a></div>
<div class="item"><a href="http://fearful.comicgenesis.com/" class="ok">KeenSpot/ FearfulAsymptote OK</a></div>
<div class="item"><a href="http://gmhockey.comicgenesis.com/" class="ok">KeenSpot/ GameMisconduct OK</a></div>
<div class="item"><a href="http://gamingguardians.comicgenesis.com/" class="ok">KeenSpot/ GamingGuardians OK</a></div>
<div class="item"><a href="http://goldenage.comicgenesis.com/" class="ok">KeenSpot/GoldenAge OK</a></div>
<div class="item"><a href="http://jtv.comicgenesis.com/" class="ok">KeenSpot/ JimTheVikingTheUnanimatedSeries OK</a></div>
<div class="item"><a href="http://liliane.comicgenesis.com/" class="ok">KeenSpot/LilianeBiDyke OK</a></div>
<div class="item"><a href="http://lovebites.comicgenesis.com/" class="ok">KeenSpot/LoveBites OK</a></div>
<div class="item"><a href="http://mtranc3.comicgenesis.com/" class="ok">KeenSpot/MTranc3 OK</a></div>
<div class="item"><a href="http://majestic7.comicgenesis.com/" class="ok">KeenSpot/Majestic7 OK</a></div>
<div class="item"><a href="http://newtraditionalists.comicgenesis.com/" class="ok">KeenSpot/ NewTraditionalists OK</a></div>
<div class="item"><a href="http://orangepenguins.comicgenesis.com/" class="ok">KeenSpot/ OrangePenguins OK</a></div>
<div class="item"><a href="http://ordinarypeople.comicgenesis.com/" class="ok">KeenSpot/ OrdinaryPeople OK</a></div>
<div class="item"><a href="http://ozoneocean.comicgenesis.com/" class="ok">KeenSpot/PinkyTA OK</a></div>
<div class="item"><a href="http://pr0ncrest.comicgenesis.com/" class="ok">KeenSpot/Pr0nCrest OK</a></div>
<div class="item"><a href="http://shinegotower.comicgenesis.com/" class="ok">KeenSpot/ ShineGetDumplingsGoGoTower OK</a></div>
<div class="item"><a href="http://sippan.comicgenesis.com/" class="ok">KeenSpot/SippansSerie OK</a></div>
<div class="item"><a href="http://splendiforous.comicgenesis.com/" class="ok">KeenSpot/ SplendiforousEscapades OK</a></div>
<div class="item"><a href="http://straightahead.comicgenesis.com/" class="ok">KeenSpot/StraightAhead OK</a></div>
<div class="item"><a href="http://tang.comicgenesis.com/" class="ok">KeenSpot/TangsComics OK</a></div>
<div class="item"><a href="http://theanarchist.comicgenesis.com/" class="ok">KeenSpot/TheAnarchist OK</a></div>
<div class="item"><a href="http://okk.comicgenesis.com/" class="ok">KeenSpot/ TheMisadventuresofOkk OK</a></div>
<div class="item"><a href="http://xuanwu.comicgenesis.com/" class="ok">KeenSpot/TheProfessor OK</a></div>
<div class="item"><a href="http://gager.comicgenesis.com/" class="ok">KeenSpot/ TheSagaofGagerff2 OK</a></div>
<div class="item"><a href="http://noskillz.comicgenesis.com/" class="ok">KeenSpot/Trevino OK</a></div>
<div class="item"><a href="http://weekendwarriors.comicgenesis.com/" class="ok">KeenSpot/ WeekendWarriors OK</a></div>
<div class="item"><a href="http://fallinglessons.comicgenesis.com/" class="ok">KeenSpot/ fallinglessons OK</a></div>
<div class="item"><a href="http://leapingwizards.comicgenesis.com/" class="ok">KeenSpot/ leapingwizards OK</a></div>
<div class="item"><a href="http://www.meekcomic.com/" class="ok">Meek OK</a></div>
<div class="item"><a href="http://noneedforbushido.com/latest/" class="ok">NoNeedForBushido OK</a></div>
<div class="item"><a href="http://nobodyscores.loosenutstudio.com/" class="ok">NobodyScores OK</a></div>
<div class="item"><a href="http://www.odd-fish.net/" class="ok">OddFish OK</a></div>
<div class="item"><a href="http://oglaf.com/" class="ok">Oglaf OK</a></div>
<div class="item"><a href="http://www.redmeat.com/redmeat/2012-12-04/index.html" class="ok">RedMeat OK</a></div>
<div class="item"><a href="http://www.shortpacked.com/" class="ok">Shortpacked OK</a></div>
<div class="item"><a href="http://mpmcomic.smackjeeves.com/comics/205716/how-does-it-work-oh-thats-how/" class="ok">SmackJeeves/mpmcomic OK</a></div>
<div class="item"><a href="http://sf.snafu-comics.com/" class="ok">SnafuComics/SF OK</a></div>
<div class="item"><a href="http://www.thefallenangel.co.uk/hmhigh/" class="ok">TheFallenAngel/HMHigh OK</a></div>
<div class="item"><a href="http://www.hs.fi/viivijawagner/" class="ok">ViiviJaWagner OK</a></div>
</div>
<script>
window.onload = function() {
var wall = new Masonry( document.getElementById('container'), {
columnWidth: 240
});
};
</script>
</body>
</html>

51
dosage
View file

@ -24,7 +24,7 @@ import optparse
from dosagelib import events, scraper
from dosagelib.output import out
from dosagelib.util import get_columns, internal_error
from dosagelib.util import get_columns, internal_error, getDirname
from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
def setupOptions():
@ -35,7 +35,8 @@ def setupOptions():
usage = 'usage: %prog [options] comicModule [comicModule ...]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
parser.add_option('-a', '--all', action='count', dest='all', default=None, help='traverse and retrieve all available comics')
parser.add_option('-n', '--numstrips', action='store', dest='numstrips', type='int', default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
parser.add_option('-a', '--all', action='store_true', dest='all', default=None, help='traverse and retrieve all comic strips')
parser.add_option('-b', '--basepath', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
parser.add_option('--baseurl', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules')
@ -73,20 +74,26 @@ def saveComicStrip(strip, basepath):
if saved:
allskipped = False
except IOError as msg:
out.write('Error saving %s: %s' % (image.filename, msg))
out.error('Could not save %s: %s' % (image.filename, msg))
errors += 1
return errors, allskipped
def displayHelp(comics, basepath):
"""Print help for comic strips."""
try:
for scraperobj in getScrapers(comics, basepath):
for line in scraperobj.getHelp().splitlines():
out.write("Help: "+line)
out.info("Help: "+line)
except ValueError as msg:
out.error(msg)
return 1
return 0
def getComics(options, comics):
"""Retrieve given comics."""
# XXX refactor
errors = 0
if options.output:
events.installHandler(options.output, options.basepath, options.baseurl)
@ -95,6 +102,8 @@ def getComics(options, comics):
out.context = scraperobj.get_name()
if options.all:
strips = scraperobj.getAllStrips()
elif options.numstrips:
strips = scraperobj.getAllStrips(options.numstrips)
else:
strips = scraperobj.getCurrentStrips()
first = True
@ -105,12 +114,13 @@ def getComics(options, comics):
if not first and scraperobj.indexes:
# stop when indexed retrieval skipped all images for one
# comie strip (except the first one)
out.write("Stop retrieval because image file already exists")
out.info("Stop retrieval because image file already exists")
break
first = False
except IOError as msg:
out.write('Error getting strip: %s' % msg)
except (ValueError, IOError) as msg:
out.error(msg)
errors += 1
continue
events.getHandler().end()
return errors
@ -123,26 +133,22 @@ def run(options, comics):
if options.list:
return doList(options.list == 1)
if len(comics) <= 0:
out.write('Warning: No comics specified, bailing out!')
out.warn('No comics specified, bailing out!')
return 1
try:
if options.modhelp:
return displayHelp(comics, options.basepath)
return getComics(options, comics)
except ValueError as msg:
out.write("Error: %s" % msg)
return 1
def doList(columnList):
"""List available comics."""
out.write('Available comic scrapers:')
out.info('Available comic scrapers:')
scrapers = getScrapers(['@@'])
if columnList:
num = doColumnList(scrapers)
else:
num = doSingleList(scrapers)
out.write('%d supported comics.' % num)
out.info('%d supported comics.' % num)
return 0
@ -171,9 +177,9 @@ def getScrapers(comics, basepath=None):
if '@' in comics:
# only scrapers whose directory already exists
if len(comics) > 1:
out.write("WARN: using '@' as comic name ignores all other specified comics.\n")
out.warn("using '@' as comic name ignores all other specified comics.")
for scraperclass in scraper.get_scrapers():
dirname = scraperclass.get_name().replace('/', os.sep)
dirname = getDirname(scraperclass.get_name())
if os.path.isdir(os.path.join(basepath, dirname)):
yield scraperclass()
elif '@@' in comics:
@ -181,7 +187,9 @@ def getScrapers(comics, basepath=None):
for scraperclass in scraper.get_scrapers():
yield scraperclass()
else:
# only selected
# get only selected comic scrapers
# store them in a list to catch naming errors early
scrapers = []
for comic in comics:
if ':' in comic:
name, index = comic.split(':', 1)
@ -189,14 +197,19 @@ def getScrapers(comics, basepath=None):
else:
name = comic
indexes = None
yield scraper.get_scraper(name)(indexes=indexes)
scrapers.append(scraper.get_scraper(name)(indexes=indexes))
for s in scrapers:
yield s
def main():
"""Parse options and execute commands."""
try:
parser = setupOptions()
options, args = parser.parse_args()
res = run(options, args)
# eliminate duplicate comic names
comics = set(args)
res = run(options, comics)
except KeyboardInterrupt:
print("Aborted.")
res = 1

View file

@ -7,7 +7,7 @@ import rfc822
import time
from .output import out
from .util import getImageObject, normaliseURL, unquote, strsize
from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
from .events import getHandler
class FetchComicError(IOError):
@ -34,20 +34,21 @@ class ComicStrip(object):
filename = self.namer(url, self.stripUrl)
if filename is None:
filename = url.rsplit('/', 1)[1]
return ComicImage(self.name, url, self.stripUrl, filename)
dirname = getDirname(self.name)
return ComicImage(self.name, url, self.stripUrl, dirname, filename)
class ComicImage(object):
"""A comic image downloader."""
def __init__(self, name, url, referrer, filename):
def __init__(self, name, url, referrer, dirname, filename):
"""Set URL and filename."""
self.name = name
self.referrer = referrer
self.url = url
self.dirname = dirname
filename = getFilename(filename)
self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self):
"""Connect to host and get meta information."""
@ -71,7 +72,7 @@ class ComicImage(object):
self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0))
self.lastModified = self.urlobj.headers.get('last-modified')
out.write('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength), 2)
out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
def touch(self, filename):
"""Set last modified date on filename."""
@ -86,18 +87,18 @@ class ComicImage(object):
self.connect()
filename = "%s%s" % (self.filename, self.ext)
comicSize = self.contentLength
comicDir = os.path.join(basepath, self.name.replace('/', os.sep))
comicDir = os.path.join(basepath, self.dirname)
if not os.path.isdir(comicDir):
os.makedirs(comicDir)
fn = os.path.join(comicDir, filename)
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.touch(fn)
out.write('Skipping existing file "%s".' % fn, 1)
out.info('Skipping existing file "%s".' % fn, 1)
return fn, False
try:
out.write('Writing comic to file %s...' % fn, 3)
out.debug('Writing comic to file %s...' % fn)
with open(fn, 'wb') as comicOut:
comicOut.write(self.urlobj.content)
self.touch(fn)
@ -107,7 +108,7 @@ class ComicImage(object):
raise
else:
size = strsize(os.path.getsize(fn))
out.write("Saved %s (%s)." % (fn, size), 1)
out.info("Saved %s (%s)." % (fn, size), 1)
getHandler().comicDownloaded(self.name, fn)
return fn, True

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012 Bastian Kleineidam
from __future__ import print_function
import time
import sys
class Output(object):
"""Print output with context, indentation and optional timestamps."""
@ -13,7 +14,19 @@ class Output(object):
self.level = 0
self.timestamps = False
def write(self, s, level=0):
def info(self, s, level=0):
self.write(s, level=level)
def debug(self, s):
self.write(s, level=2)
def warn(self, s):
self.write("WARN: %s" % s, file=sys.stderr)
def error(self, s):
self.write("ERROR: %s" % s, file=sys.stderr)
def write(self, s, level=0, file=sys.stdout):
"""Write message with indentation, context and optional timestamp."""
if level > self.level:
return
@ -21,7 +34,8 @@ class Output(object):
timestamp = time.strftime('%H:%M:%S ')
else:
timestamp = ''
print('%s%s> %s' % (timestamp, self.context, s))
print('%s%s> %s' % (timestamp, self.context, s), file=file)
file.flush()
def writelines(self, lines, level=0):
"""Write multiple messages."""

View file

@ -135,6 +135,7 @@ class AstronomyPOTD(_BasicScraper):
compile(r'<a href="(ap\d{6}\.html)">&gt;</a>'))
stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
imageSearch = compile(r'<a href="(image/\d{4}/[^"]+)"')
multipleImagesPerStrip = True
prevSearch = compile(r'<a href="(ap\d{6}\.html)">&lt;</a>')
help = 'Index format: yymmdd'
@ -176,10 +177,6 @@ class AGirlAndHerFed(_BasicScraper):
prevSearch = compile(r'<a href="([^"]+)">[^>]+Back')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('?')[-1]
class AetheriaEpics(_BasicScraper):
latestUrl = 'http://aetheria-epics.schala.net/'

View file

@ -101,6 +101,10 @@ class BoyOnAStickAndSlither(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/')[-1]
class ButternutSquash(_BasicScraper):
latestUrl = 'http://www.butternutsquash.net/'
@ -206,12 +210,3 @@ class BetweenFailures(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous"))
help = 'Index format: stripnum-strip-name'
class BillyTheBeaker(_BasicScraper):
latestUrl = 'http://billy.defectivejunk.com/'
stripUrl = latestUrl + 'index.php?strip=%s'
multipleImagesPerStrip = True
imageSearch = compile(tagre("img", "src", r'(bub\d+_\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?strip\=[^"]+)', after="Previous strip"))
help = 'Index format: nnn'

View file

@ -26,6 +26,15 @@ class CaribbeanBlue(_BasicScraper):
help = 'Index format: nnn-stripname'
class Catalyst(_BasicScraper):
baseUrl = "http://catalyst.spiderforest.com/"
latestUrl = baseUrl + "comic.php?comic_id=415"
stripUrl = baseUrl + "comic.php?comic_id=%s"
imageSearch = compile(tagre("img", "src", r'(http://catalyst\.spiderforest\.com/comics/[^"]+)'))
prevSearch = compile("<center>" + tagre("a", "href", r'(http://catalyst\.spiderforest\.com/comic\.php\?comic_id=\d+)'))
help = 'Index format: number'
class Catena(_BasicScraper):
latestUrl = 'http://catenamanor.com/'
stripUrl = latestUrl + '%s'
@ -98,6 +107,14 @@ class Commissioned(_BasicScraper):
help = 'Index format: n'
class Concession(_BasicScraper):
latestUrl = 'http://concessioncomic.com/'
stripUrl = latestUrl + 'index.php?pid=%s'
imageSearch = compile(tagre("img", "src", r'(http://concessioncomic\.com/comics/[^"]+)', after="Comic"))
prevSearch = compile(tagre("a", "href", r'(http://concessioncomic\.com/index\.php\?pid=\d+)', after="nav-prev"))
help = 'Index format: number'
class CoolCatStudio(_BasicScraper):
latestUrl = 'http://www.coolcatstudio.com/'
stripUrl = latestUrl + 'strips-cat/ccs%s'

View file

@ -6,7 +6,7 @@ from re import compile
from ..scraper import make_scraper
from ..util import tagre
_imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)'))
_imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
def add(name, path):
baseurl = 'http://www.creators.com'

View file

@ -9,14 +9,14 @@ from ..helpers import indirectStarter
from ..util import tagre
class DMFA(_BasicScraper):
latestUrl = 'http://www.missmab.com/'
stripUrl = latestUrl + 'Comics/Vol_%s.php'
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
help = 'Index format: nnn (normally, some specials)'
class DailyDose(_BasicScraper):
baseUrl = 'http://dailydoseofcomics.com/'
starter = indirectStarter(baseUrl,
compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="preview")))
stripUrl = baseUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="align(?:none|center)"))
prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
help = 'Index format: stripname'
class DandyAndCompany(_BasicScraper):
@ -52,6 +52,16 @@ class DeepFried(_BasicScraper):
help = 'Index format: non'
class DMFA(_BasicScraper):
latestUrl = 'http://www.missmab.com/'
stripUrl = latestUrl + 'Comics/Vol_%s.php'
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
help = 'Index format: nnn (normally, some specials)'
class DoemainOfOurOwn(_BasicScraper):
latestUrl = 'http://www.doemain.com/'
stripUrl = latestUrl + 'index.cgi/%s'

View file

@ -423,7 +423,6 @@ add('Flying_Under_the_Influence')
add('For_Your_Eyes_Only')
add('Forsaken_Valor')
add('Fortress_Avalon')
add('Found_Art')
add('Four_Bats')
add('Frame_by_Frame')
add('Frank_and_Steinway')
@ -670,7 +669,6 @@ add('ManBoys')
add('Mario_and_Luigi_Misadventures')
add('Marios_Day_Job')
add('Marital_Bliss')
add('Mary_Sue_Academy')
add('Mask_of_the_Aryans')
add('Master_the_Tiger')
add('Mastorism')

View file

@ -20,7 +20,7 @@ class EerieCuties(_BasicScraper):
class Eriadan(_BasicScraper):
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='alt=""'))
imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='width="800"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'

View file

@ -1,28 +0,0 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import asciify
_imageSearch = compile(r'SRC="(http://www\.thefallenangel\.co\.uk/\w+comics/.+?)"')
_prevSearch = compile(r' <a href="(http://www\.thefallenangel\.co\.uk/.+?)"><img[^>]+?src="http://www\.thefallenangel\.co\.uk/images/previousday\.jpg"')
def add(name, shortname):
latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
classname = "FallenAngel_" + asciify(name)
globals()[classname] = make_scraper(classname,
latestUrl = latestUrl,
stripUrl = latestUrl + '?date=%s',
name='FallenAngel/' + name,
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help = 'Index format: yyyymmdd',
)
add('HighMaintenance', 'hm')
add('FAWK', 'fawk')
add('MalloryChan', 'mallorychan')

View file

@ -4,7 +4,7 @@
from re import compile
from ..scraper import make_scraper
from ..util import tagre
from ..util import tagre, quote
from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)'))
@ -23,7 +23,7 @@ def add(name, shortname):
globals()[classname] = make_scraper(classname,
starter = bounceStarter(baseUrl + shortname, _nextSearch),
name='GoComics/' + name,
stripUrl=baseUrl + shortname + '/%s',
stripUrl=baseUrl + quote(shortname) + '/%s',
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help='Index format: yyyy/mm/dd',
@ -433,7 +433,6 @@ add('Rechid', '/rechid')
add('RedMeat', '/redmeat')
add('RedandRover', '/redandrover')
add('ReplyAll', '/replyall')
add('RichardsPoorAlmanac', '/richards-poor-almanac')
add('RipHaywire', '/riphaywire')
add('RipleysBelieveItorNot', '/ripleysbelieveitornot')
add('Risible', '/risible')

View file

@ -16,6 +16,10 @@ _prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') +
def add(name, url):
classname = 'KeenSpot_%s' % name
if '/d/' in url:
stripUrl = url.split('/d/')[0] + '/d/%s.html'
else:
stripUrl = url + 'd/%s.html'
@classmethod
def _prevUrlModifier(cls, prevUrl):
@ -28,7 +32,7 @@ def add(name, url):
globals()[classname] = make_scraper(classname,
name='KeenSpot/' + name,
latestUrl=url,
stripUrl=url + 'd/%s.html',
stripUrl=stripUrl,
imageSearch = _imageSearch,
prevSearch = _prevSearch,
prevUrlModifier = _prevUrlModifier,
@ -153,7 +157,7 @@ add('CameoComic', 'http://cameocomic.comicgenesis.com/')
add('CampAlaska', 'http://campalaska.comicgenesis.com/')
add('CampusLife', 'http://campuslife.comicgenesis.com/')
add('CanYouKeepaSecret', 'http://cykas.comicgenesis.com/d/20041035.html')
add('Candi', 'http://www.candicomics.com/')
add('Candi', 'http://candicomics.com/')
add('CanisLupus', 'http://cheetahfox.comicgenesis.com/')
add('CaptainGreyhound', 'http://captaingreyhound.comicgenesis.com/')
add('CaptainMike', 'http://captainmike.comicgenesis.com/')
@ -195,7 +199,6 @@ add('CornerAlley13', 'http://corneralley.comicgenesis.com/d/20101010.html')
add('CorporateLife', 'http://amacher.comicgenesis.com/')
add('CosmicAwareness', 'http://cosmicawareness.comicgenesis.com/')
add('CosmicDrift', 'http://cosmicdrift.comicgenesis.com/')
add('Countyoursheep', 'http://countyoursheep.keenspot.com/')
add('CracklingSilence', 'http://crackling.comicgenesis.com/')
add('Crackwalker', 'http://crackwalker.comicgenesis.com/')
add('CreepyHead', 'http://creepyhead.comicgenesis.com/')
@ -241,7 +244,6 @@ add('DoomedUntoEternalVigilanceForever', 'http://duevf.comicgenesis.com/')
add('DormSweetDorm', 'http://dormsweetdorm.comicgenesis.com/')
add('DoubleyouTeeEff', 'http://doubleyouteeeff.comicgenesis.com/')
add('Downscale', 'http://downscale.comicgenesis.com/')
add('DownwardBound', 'http://downwardbound.comicgenesis.com/')
add('Dragon27sBane', 'http://jasonwhitewaterz.comicgenesis.com/')
add('DragonBallTM', 'http://dragonballtm.comicgenesis.com/')
add('DragonBoy', 'http://dragonboy.comicgenesis.com/')
@ -282,7 +284,6 @@ add('EvilWenchesIncorporated', 'http://evilwenchesinc.comicgenesis.com/')
add('EyeoftheMonkey', 'http://eyeofthemonkey.comicgenesis.com/')
add('Ezailia', 'http://ezailia.comicgenesis.com/')
add('Faces', 'http://faces.comicgenesis.com/')
add('FairestandFallen', 'http://fairestandfallen.comicgenesis.com/')
add('FakingSanity', 'http://fakingsanity.comicgenesis.com/')
add('FalseGods', 'http://falsegod.comicgenesis.com/')
add('FancyThat', 'http://fancythat.comicgenesis.com/')
@ -303,7 +304,6 @@ add('FireflyCross', 'http://fireflycross.comicgenesis.com/')
add('FiveMinuteComic', 'http://fiveminute.comicgenesis.com/')
add('Fizzle', 'http://fizzle.comicgenesis.com/')
add('FlinchandFriends', 'http://flinch.comicgenesis.com/')
add('FlipandSplog', 'http://fas.comicgenesis.com/')
add('Flounderville', 'http://flounderville.comicgenesis.com/')
add('FloydCartoons', 'http://floydcartoons.comicgenesis.com/')
add('Flunkies', 'http://flunkies.comicgenesis.com/')
@ -320,11 +320,10 @@ add('FourDays', 'http://fourdays.comicgenesis.com/')
add('Fourboys', 'http://fourboys.comicgenesis.com/')
add('Fox27sFreakyAdventures', 'http://basa.comicgenesis.com/')
add('FoxTails', 'http://foxtails.comicgenesis.com/')
add('Framed', 'http://framed.comicgenesis.com/')
add('FreakU', 'http://freaku.comicgenesis.com//d/20080827.html')
add('FreakU', 'http://freaku.comicgenesis.com/d/20080827.html')
add('FreaksandG33k', 'http://freaksandgeeks.comicgenesis.com/')
add('FredtheDot', 'http://fredthedot.comicgenesis.com/')
add('FreeParking', 'http://freeparking.comicgenesis.com//d/20051029.html')
add('FreeParking', 'http://freeparking.comicgenesis.com/d/20051029.html')
add('FromTheAntiCulture', 'http://anticulture.comicgenesis.com/')
add('FromTheMargin', 'http://fromthemargin.comicgenesis.com/')
add('FruitFlies', 'http://fruitflies.comicgenesis.com/')
@ -337,7 +336,6 @@ add('GambitasBishounen', 'http://snigepippi.comicgenesis.com/')
add('GameJumpers', 'http://gamejumpers.comicgenesis.com/')
add('GameMisconduct', 'http://gmhockey.comicgenesis.com/')
add('Gameboy', 'http://gameboy.comicgenesis.com/')
add('GamerPsychotica', 'http://gp.comicgenesis.com/d/20060113.html')
add('GamersParadox', 'http://gamersparadox.comicgenesis.com/')
add('GamingGuardians', 'http://gamingguardians.comicgenesis.com/')
add('GamingReality', 'http://gamingreality.comicgenesis.com/')
@ -361,13 +359,11 @@ add('GorgeousPrincessCreamyBeamy', 'http://creamybeamy.comicgenesis.com/')
add('GothyMcGee', 'http://gothymcgee.comicgenesis.com/')
add('GratuitousMangaStyle', 'http://mangastyle.comicgenesis.com/')
add('GraveyardShift', 'http://graveyardshift.comicgenesis.com/')
add('Gravity', 'http://gravity.comicgenesis.com/')
add('GreenLightGo', 'http://glg.comicgenesis.com/')
add('GroundFloor', 'http://groundfloor.comicgenesis.com/')
add('Grumpythefathamster', 'http://burp.comicgenesis.com/')
add('GuiShinTaeChiAKAGhostHunter', 'http://ghosthunter.comicgenesis.com/')
add('HERZBLUT', 'http://herzblut.comicgenesis.com/')
add('HalflightBreaking', 'http://halflight.comicgenesis.com/d/20021031.html')
add('HangingAround', 'http://hangingaround.comicgenesis.com/')
add('Hans', 'http://hans.comicgenesis.com/')
add('HaypennyRag', 'http://haypenny.comicgenesis.com/')
@ -412,7 +408,6 @@ add('InappropriateIrving', 'http://irving.comicgenesis.com/')
add('InfiniteSouls', 'http://keyoko.comicgenesis.com/')
add('InkyorShaggy', 'http://inkyorshaggy.comicgenesis.com/')
add('IntergalacticSpaceSheriffs', 'http://spacesheriffs.comicgenesis.com/')
add('Inverloch', 'http://inverloch.comicgenesis.com/')
add('IpsoFacto', 'http://ipsofactocomic.comicgenesis.com/')
add('ItHurtsToBeThatStupid', 'http://ihtbts.comicgenesis.com/')
add('ItsGravy', 'http://itsgravy.comicgenesis.com/')
@ -470,7 +465,6 @@ add('LifeGoesOn', 'http://lgo.comicgenesis.com/')
add('LifeinBellCounty', 'http://bellcountylife.comicgenesis.com/')
add('LifeisUnfair', 'http://lifeisunfair.comicgenesis.com/')
add('LifeofBuddha', 'http://lifeofbuddha.comicgenesis.com/')
add('LifeonForbez', 'http://cdc.comicgenesis.com/')
add('Lightbringer', 'http://lightbringer.comicgenesis.com/')
add('LikeItIs', 'http://likeitis.comicgenesis.com/')
add('LilDude', 'http://lildudecomics.comicgenesis.com/')
@ -492,7 +486,6 @@ add('LustForFreelance', 'http://imabubble.comicgenesis.com/')
add('MEHComics', 'http://mehcomics.comicgenesis.com/')
add('MORONS', 'http://morons.comicgenesis.com/')
add('MTranc3', 'http://mtranc3.comicgenesis.com/')
add('MacHall', 'http://machall.comicgenesis.com/d/20020125.html')
add('Maddland', 'http://maddland.comicgenesis.com/')
add('MadeInHeaven', 'http://mih.comicgenesis.com/')
add('MagiIndustries', 'http://magiindustries.comicgenesis.com/')
@ -512,7 +505,6 @@ add('MelEverymanAndHisSarcasticTalkingHousepetAmbrose', 'http://everyman.comicge
add('MenschunsererZeitGerman', 'http://muz.comicgenesis.com/')
add('Midcentral', 'http://midcentral.comicgenesis.com/')
add('MiketheMulletThing', 'http://mikethemulletthing.comicgenesis.com/')
add('Mindmistress', 'http://mindmistress.comicgenesis.com/')
add('Mindtap', 'http://mindtap.comicgenesis.com/')
add('MinimalismSucks', 'http://minisuck.comicgenesis.com/')
add('MinimumSecurityUniversity', 'http://mshs.comicgenesis.com/')
@ -526,7 +518,6 @@ add('MorysEducation', 'http://mory.comicgenesis.com/')
add('MrBoffleandFriends', 'http://mrboffle.comicgenesis.com/')
add('MrBubbles', 'http://profound.comicgenesis.com/')
add('MrFooAdventures', 'http://foo.comicgenesis.com/')
add('MrPinkBlob', 'http://mrpinkblob.comicgenesis.com/d/100.html')
add('MrScience', 'http://mrscience.comicgenesis.com/')
add('Muertitos', 'http://muertitos.comicgenesis.com/')
add('Muffythelitlerabbit', 'http://muffyrabbit.comicgenesis.com/')
@ -545,7 +536,6 @@ add('NastyChocolates', 'http://nastychocolates.comicgenesis.com/')
add('NeTrek', 'http://netrek.comicgenesis.com/')
add('NeedleandThread', 'http://needleandthread.comicgenesis.com/')
add('NekkoandJoruba', 'http://nekkoandjoruba.comicgenesis.com/d/20050816.html')
add('NekoTheKitty', 'http://nekothekitty.comicgenesis.com/')
add('Nekotime', 'http://nekotime.comicgenesis.com/')
add('Netjeru', 'http://netjeru.comicgenesis.com/')
add('NeverYouMind', 'http://neveryoumind.comicgenesis.com/')
@ -896,7 +886,6 @@ add('Unconventional', 'http://unconventional.comicgenesis.com/')
add('UnfamiliarReflection', 'http://emri.comicgenesis.com/')
add('UnlifeOnline', 'http://unlifeonline.comicgenesis.com/')
add('UnseenFate', 'http://unseenfate.comicgenesis.com/')
add('Untitled', 'http://untitled.comicgenesis.com/')
add('UntitledAgain', 'http://untitledagain.comicgenesis.com/')
add('UrbanFable', 'http://urbanfable.comicgenesis.com/')
add('VRPG', 'http://vrpg.comicgenesis.com/')
@ -993,7 +982,6 @@ add('silvette', 'http://silvette.comicgenesis.com/')
add('skimlinescomAcollectionofthings', 'http://www.skimlines.com/')
add('smut', 'http://smut.comicgenesis.com/')
add('socializedmedicine', 'http://socializedmedicine.comicgenesis.com/')
add('spacejams', 'http://spacejams.comicgenesis.com/d/20020820.html')
add('spiderfrogballoon', 'http://spiderfrogballoon.comicgenesis.com/')
add('theadventuresofmegamanandlink', 'http://takeru.comicgenesis.com/')
add('theendofthings', 'http://endofthings.comicgenesis.com/')

View file

@ -27,6 +27,7 @@ class NeoEarth(_BasicScraper):
class NewAdventuresOfBobbin(_BasicScraper):
latestUrl = 'http://www.bobbin-comic.com/bobbin_strips/'
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
multipleImagesPerStrip = True
prevSearch = None
help = 'Index format: none'

View file

@ -35,7 +35,7 @@ class OnTheEdge(_BasicScraper):
class OneQuestion(_BasicScraper):
latestUrl = 'http://onequestioncomic.com/'
latestUrl = 'http://www.onequestioncomic.com/'
stripUrl = latestUrl + 'comic.php?strip_id=%s'
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))

View file

@ -162,7 +162,3 @@ class PlanescapeSurvival(_BasicScraper):
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]

View file

@ -10,8 +10,8 @@ from ..util import tagre
class QuestionableContent(_BasicScraper):
latestUrl = 'http://www.questionablecontent.net/'
stripUrl = latestUrl + 'view.php?comic=%s'
imageSearch = compile(r'/(comics/\d+\.png)"')
prevSearch = compile(r'<a href="(view.php\?comic=\d+)">Previous')
imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'(view\.php\?comic=\d+)') + 'Previous')
help = 'Index format: n (unpadded)'

View file

@ -149,10 +149,6 @@ class SomethingPositive(_BasicScraper):
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
help = 'Index format: mmddyyyy'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
class SexyLosers(_BasicScraper):
stripUrl = 'http://www.sexylosers.com/%s.html'

View file

@ -16,15 +16,17 @@ class TheNoob(_BasicScraper):
help = 'Index format: nnnn'
class TheOrderOfTheStick(_BasicScraper):
latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
stripUrl = 'http://www.giantitp.com/comics/oots%s.html'
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)'
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"'))
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/', 1)[-1][:-5]
class TheParkingLotIsFull(_BasicScraper):
@ -36,7 +38,6 @@ class TheParkingLotIsFull(_BasicScraper):
help = 'Index format: nnn'
class TheWotch(_BasicScraper):
latestUrl = 'http://www.thewotch.com/'
stripUrl = latestUrl + '?date=%s'

View file

@ -29,7 +29,7 @@ class UnicornJelly(_BasicScraper):
class UserFriendly(_BasicScraper):
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
imageSearch = compile(r'<img border="0" src="(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
help = 'Index format: yyyymmdd'

View file

@ -35,3 +35,7 @@ class ViiviJaWagner(_BasicScraper):
imageSearch = compile(tagre("link", "href", r'(http://hs\d+\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm"))
help = 'Index format: none'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('=')[1]

View file

@ -0,0 +1,26 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre
_prevSearch = compile(tagre("a", "href", r'(\?id=\d+)') + tagre("img", "src", r'images/navi-zurueck\.gif'))
_imageSearch = compile(tagre("img", "src", r'([^"]+/img/comic/[^"]+)', after="comicimg"))
def add(name, shortname):
latestUrl = 'http://%s.webcomic.eu/' % shortname
classname = 'WebcomicEu_%s' % name
globals()[classname] = make_scraper(classname,
name = 'WebcomicEu/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '?id=%s',
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help = 'Index format: number',
)
add('TheBessEffect', 'thebesseffect')
add('TheBessEffectEnglish', 'tbe-english')
add('Talandor', 'talandor')

View file

@ -28,5 +28,4 @@ def add(name, subpath):
add('AgnesQuill', 'daveroman/agnes/')
add('MyMuse', 'gc/muse/')
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
add('JaxEpoch', 'johngreen/quicken/')
add('ClownSamurai', 'qsamurai/clownsamurai/')

View file

@ -8,7 +8,7 @@ from ..scraper import make_scraper
from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+)'))
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+/comics/[^"]+)'))
_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous')
_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next')

View file

@ -8,6 +8,14 @@ from ..util import tagre
from ..helpers import bounceStarter
class ZapComic(_BasicScraper):
latestUrl = 'http://www.zapcomic.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.zapcomic\.com\?comic_object=\d+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.zapcomic\.com/[^"]+)', after="previous-comic-link"))
help = 'Index format: yyyy/mm/nnn-stripname'
class Zapiro(_BasicScraper):
baseUrl = 'http://www.mg.co.za/zapiro/'
starter = bounceStarter(baseUrl,
@ -23,6 +31,14 @@ class Zapiro(_BasicScraper):
return name
class ZebraGirl(_BasicScraper):
latestUrl = 'http://www.zebragirl.net/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
prevSearch = compile(tagre("link", "href", r"(/\?date=[^']+)", quote="'", before='Previous'))
help = 'Index format: yyyy-mm-dd'
class ZombieHunters(_BasicScraper):
latestUrl = 'http://www.thezombiehunters.com/'
stripUrl = latestUrl + '?strip_id=%s'

View file

@ -43,7 +43,7 @@ class _BasicScraper(object):
msg = 'Retrieving the current strip'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
out.info(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.stripUrl % index
@ -55,40 +55,48 @@ class _BasicScraper(object):
"""Get comic strip for given URL."""
imageUrls = fetchUrls(url, self.imageSearch)[0]
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
raise ValueError("found %d images with %s" % (len(imageUrls), self.imageSearch.pattern))
out.warn("found %d images instead of 1 with %s" % (len(imageUrls), self.imageSearch.pattern))
return self.getComicStrip(url, imageUrls)
def getComicStrip(self, url, imageUrls):
"""Get comic strip downloader for given URL and images."""
return ComicStrip(self.get_name(), url, imageUrls, self.namer)
def getAllStrips(self):
def getAllStrips(self, maxstrips=None):
"""Get all comic strips."""
if maxstrips:
msg = 'Retrieving %d strips' % maxstrips
elif self.indexes:
msg += "Retrieving %d strips for indexes %s" % (len(self.indexes), self.indexes)
else:
msg = 'Retrieving all strips'
if self.indexes:
msg += " for indexes %s" % self.indexes
out.write(msg+"...")
out.info(msg+"...")
if self.indexes:
for index in self.indexes:
url = self.stripUrl % index
for strip in self.getAllStripsFor(url):
for strip in self.getStripsFor(url, 1):
yield strip
else:
url = self.getLatestUrl()
for strip in self.getAllStripsFor(url):
for strip in self.getStripsFor(url, maxstrips):
yield strip
def getAllStripsFor(self, url):
"""Get all comic strips for an URL."""
def getStripsFor(self, url, maxstrips):
"""Get comic strips for an URL. If maxstrips is a positive number, stop after
retrieving the given number of strips."""
seen_urls = set()
while url:
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
prevUrl = self.prevUrlModifier(prevUrl)
out.write("Matched previous URL %s" % prevUrl, 2)
out.debug("Matched previous URL %s" % prevUrl)
seen_urls.add(url)
yield self.getComicStrip(url, imageUrls)
# avoid recursive URL loops
url = prevUrl if prevUrl not in seen_urls else None
if maxstrips is not None:
maxstrips -= 1
if maxstrips <= 0:
break
def setStrip(self, index):
"""Set current comic strip URL."""
@ -161,13 +169,13 @@ def get_scrapers():
"""
global _scrapers
if _scrapers is None:
out.write("Loading comic modules...", 2)
out.debug("Loading comic modules...")
modules = loader.get_modules()
plugins = loader.get_plugins(modules, _BasicScraper)
_scrapers = list(plugins)
_scrapers.sort(key=lambda s: s.get_name())
check_scrapers()
out.write("... %d modules loaded." % len(_scrapers), 2)
out.debug("... %d modules loaded." % len(_scrapers))
return _scrapers

View file

@ -27,6 +27,8 @@ MaxContentBytes = 1024 * 1024 * 2 # 2 MB
# Maximum content size for images
MaxImageBytes = 1024 * 1024 * 20 # 20 MB
# Default connection timeout
ConnectionTimeoutSecs = 60
def tagre(tag, attribute, value, quote='"', before="", after=""):
"""Return a regular expression matching the given HTML tag, attribute
@ -102,7 +104,7 @@ def fetchUrl(url, urlSearch):
searchUrl = match.group(1)
if not searchUrl:
raise ValueError("Match empty URL at %s with pattern %s" % (url, urlSearch.pattern))
out.write('matched URL %r' % searchUrl, 2)
out.debug('matched URL %r' % searchUrl)
return normaliseURL(urlparse.urljoin(baseUrl, searchUrl))
return None
@ -115,10 +117,10 @@ def fetchUrls(url, imageSearch, prevSearch=None):
imageUrl = match.group(1)
if not imageUrl:
raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern))
out.write('matched image URL %r with pattern %s' % (imageUrl, imageSearch.pattern), 2)
out.debug('matched image URL %r with pattern %s' % (imageUrl, imageSearch.pattern))
imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl)))
if not imageUrls:
out.write("warning: no images found at %s with pattern %s" % (url, imageSearch.pattern))
out.warn("no images found at %s with pattern %s" % (url, imageSearch.pattern))
if prevSearch is not None:
# match previous URL
match = prevSearch.search(data)
@ -128,7 +130,7 @@ def fetchUrls(url, imageSearch, prevSearch=None):
raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern))
prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl))
else:
out.write('no previous URL %s at %s' % (prevSearch.pattern, url), 2)
out.debug('no previous URL %s at %s' % (prevSearch.pattern, url))
prevUrl = None
return imageUrls, prevUrl
return imageUrls, None
@ -183,8 +185,9 @@ def normaliseURL(url):
return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_bytes=None):
out.write('Open URL %s' % url, 2)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_bytes=None,
timeout=ConnectionTimeoutSecs):
out.debug('Open URL %s' % url)
assert retries >= 0, 'invalid retry value %r' % retries
assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
headers = {'User-Agent': UserAgent}
@ -192,13 +195,12 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_byt
if referrer:
headers['Referer'] = referrer
try:
req = requests.get(url, headers=headers, config=config, prefetch=False)
req = requests.get(url, headers=headers, config=config, prefetch=False, timeout=timeout)
check_content_size(url, req.headers, max_content_bytes)
req.raise_for_status()
return req
except requests.exceptions.RequestException as err:
msg = 'URL retrieval of %s failed: %s' % (url, err)
out.write(msg)
raise IOError(msg)
def check_content_size(url, headers, max_content_bytes):
@ -251,7 +253,7 @@ def getRelativePath(basepath, path):
def getQueryParams(url):
query = urlparse.urlsplit(url)[3]
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3)
out.debug('Extracting query parameters from %r (%r)...' % (url, query))
return cgi.parse_qs(query)
@ -334,10 +336,16 @@ def asciify(name):
def unquote(text):
while '%' in text:
text = urllib.unquote(text)
newtext = urllib.unquote(text)
if newtext == text:
break
text = newtext
return text
def quote(text):
return urllib.quote(text)
def strsize (b):
"""Return human representation of bytes b. A negative number of bytes
raises a value error."""
@ -357,3 +365,20 @@ def strsize (b):
return "%.2fGB" % (float(b) / (1024*1024*1024))
return "%.1fGB" % (float(b) / (1024*1024*1024))
def getDirname(name):
"""Replace slashes with path separator of name."""
return name.replace('/', os.sep)
def getFilename(name):
# first replace all illegal chars
name = re.sub(r"[^0-9a-zA-Z_\-\.]", "_", name)
# then remove double dots and underscores
while ".." in name:
name = name.replace('..', '.')
while "__" in name:
name = name.replace('__', '_')
# remove a leading dot or minus
if name.startswith((".", "-")):
name = name[1:]
return name

View file

@ -47,6 +47,7 @@ exclude_comics = [
"Emerald_Winter", # broken images
"Enter_the_Duck_2", # broken images
"ffff", # broken images
"Found_Art", # broken images
"Function_Over_Fashion", # broken images
"Funday_Morning", # broken images
"greys_journey", # broken images
@ -69,6 +70,7 @@ exclude_comics = [
"Louder_Than_Bombs", # broken images
"Lucky_Dawg", # broken images
"Mario_in_Johto", # broken images
"Mary_Sue_Academy", # borken images
"Master", # start page requires login
"Mastermind_BTRN", # broken images
"MAYA_____The_legend_of_Wolf", # broken images

View file

@ -35,6 +35,7 @@ exclude_comics = [
"OysterWar", # too few comics
"PIGTIMES", # comic unavailable
"PS", # comic unavailable
"RichardsPoorAlmanac", # missing images
"SherpaAid", # comic unavailable
"SparComics", # comic unavailable
]

File diff suppressed because one or more lines are too long

View file

@ -72,6 +72,7 @@ exclude_comics = [
"ComicMischief", # page moved
"ComputerGameAddicts", # page moved
"Concession", # page moved
"Countyoursheep", # broken links
"CorridorZ", # page does not follow standard layout
"CrashBoomMagic", # page moved
"CrazySlowlyGoing", # page has 403 forbidden
@ -85,6 +86,7 @@ exclude_comics = [
"DimBulbComics", # page is gone
"DIVE", # page is gone
"DominicDeegan", # page moved
"DownwardBound", # page does not follow standard layout
"DungeonDamage", # page does not follow standard layout
"Dylan", # page has 403 forbidden
"EarthRiser", # redirects to a new page
@ -99,6 +101,7 @@ exclude_comics = [
"Evilish", # page moved
"EvolBara", # page is gone
"FaerieTales", # page does not follow standard layout
"FairestandFallen", # page does not follow standard layout
"FairyTaleNewVillage", # missing images
"Fate27sTear", # page moved
"FaultyLogic", # page does not follow standard layout
@ -107,9 +110,12 @@ exclude_comics = [
"Flatwood", # page moved
"FLEMComics", # page moved
"FletchersCave", # page is broken
"FlipandSplog", # page does not follow standard layout
"ForcesofGoodandEvil", # page does not follow standard layout
"Framed", # page does not follow standard layout
"FurryBlackDevil", # page moved
"Galacticus", # page has 403 forbidden
"GamerPsychotica", # page does not follow standard layout
"GeebasonParade", # page does not follow standard layout
"geeks", # page moved
"GeminiBright", # page does not follow standard layout
@ -119,9 +125,11 @@ exclude_comics = [
"GODLIKE", # page has 403 forbidden
"GoForIt", # page is gone
"GothBoy", # page moved
"Gravity", # page does not follow standard layout
"Grimage", # page moved
"GrossePointeDogs", # page is broken
"GUComics", # page moved
"HalflightBreaking", # page does not follow standard layout
"HardUnderbelly", # page does not follow standard layout
"HazardousScience", # page is gone
"HereThereBeDragons", # page moved
@ -138,6 +146,7 @@ exclude_comics = [
"InsideJoke", # page is gone
"InsidetheBox", # page has 403 forbidden
"InternationalHopeFoundation", # page does not follow standard layout
"Inverloch", # page does not follow standard layout
"JamieandNick", # page moved
"JasonLovesHisGrandpa", # page is gone
"JavanteasFate", # page is gone
@ -165,8 +174,10 @@ exclude_comics = [
"LinktotheBoards", # page does not follow standard layout
"LinT", # page moved
"LiterallySpeaking", # page does not follow standard layout
"LifeonForbez", # missing images
"LoxieAndZoot", # page does not follow standard layout
"Lunchtable", # missing images
"MacHall", # page does not follow standard layout
"MadWorld", # page has 403 forbidden
"Magellan", # page does not follow standard layout
"Marachan", # missing images
@ -175,13 +186,16 @@ exclude_comics = [
"Meiosis", # page moved
"Michikomonogatari", # page does not follow standard layout
"MidnorthFlourCo", # page has 403 forbidden
"Mindmistress", # page does not follow standard layout
"MintCondition", # page moved
"MisadventuresinPhysics", # page has 403 forbidden
"MobileMadness", # page does not follow standard layout
"MrPinkBlob", # page does not follow standard layout
"MyAngelYouAreAngel", # page is gone
"MyBrainHurts", # page does not follow standard layout
"NAFTANorthAmericanFreeToonAgreementalsoYankuckcanee", # page does not follow standard layout
"NeglectedMarioCharacterComix", # page does not follow standard layout
"NekoTheKitty", # page does not follow standard layout
"Nemutionjewel", # page does not follow standard layout
"Nerdgasm", # missing images
"Nerdz", # page is gone
@ -249,6 +263,7 @@ exclude_comics = [
"SoManyLevels", # page moved
"SomethingSoft", # page is gone
"Sorcery101", # page moved
"spacejams", # page does not follow standard layout
"SpellBinder", # page is gone
"SPQRBlues", # page moved
"StationV3", # page moved
@ -294,6 +309,7 @@ exclude_comics = [
"TwoEvilScientists", # page moved
"TwoLumps", # page moved
"TwoSidesWide", # page moved
"Untitled", # page does not follow standard layout
"Vendetta", # page moved
"VictimsoftheSystem", # page moved
"Victor", # page moved
@ -318,23 +334,19 @@ url_overrides = {
"AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html",
"ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html",
"ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html",
'Candi': 'http://candicomics.com/',
"CanYouKeepaSecret": "http://cykas.comicgenesis.com/d/20041035.html",
"CapturetheMoment": "http://capturethemoment.comicgenesis.com/d/20100927.html",
"CornerAlley13": "http://corneralley.comicgenesis.com/d/20101010.html",
"Countyoursheep": "http://countyoursheep.keenspot.com/",
"FreakU": "http://freaku.comicgenesis.com//d/20080827.html",
"FreeParking": "http://freeparking.comicgenesis.com//d/20051029.html",
"GamerPsychotica": "http://gp.comicgenesis.com/d/20060113.html",
"FreakU": "http://freaku.comicgenesis.com/d/20080827.html",
"FreeParking": "http://freeparking.comicgenesis.com/d/20051029.html",
"GoneAstray": "http://goneastray.comicgenesis.com/d/20100305.html",
"GoodnEvil": "http://gne.comicgenesis.com/d/20040814.html",
"HalflightBreaking": "http://halflight.comicgenesis.com/d/20021031.html",
"HealerOnFeatheredWings": "http://selsachronicles.comicgenesis.com/",
"HowNottoRunAComic": "http://hownottorunacomic.comicgenesis.com/d/19950719.html",
"HurricaneParty": "http://hurricaneparty.comicgenesis.com/d/20040123.html",
"MacHall": "http://machall.comicgenesis.com/d/20020125.html",
"MaryQuiteContrary": "http://marycontrary.comicgenesis.com/d/20070824.html",
"MoonCrest24": "http://mooncrest.comicgenesis.com/d/20121117.html",
"MrPinkBlob": "http://mrpinkblob.comicgenesis.com/d/100.html",
"NekkoandJoruba": "http://nekkoandjoruba.comicgenesis.com/d/20050816.html",
"No4thWalltoBreak": "http://no4thwalltobreak.comicgenesis.com/d/20041025.html",
"OtakuKyokai": "http://otakukyokai.comicgenesis.com/d/20060818.html",
@ -345,7 +357,6 @@ url_overrides = {
"PlanetsCollide": "http://ruthcomix.comicgenesis.com/d/20010706.html",
"RuneMaster": "http://runemaster.comicgenesis.com/d/20050607.html",
"ShinobiHigh": "http://shinobihigh.comicgenesis.com/d/20020118.html",
"spacejams": "http://spacejams.comicgenesis.com/d/20020820.html",
"TheAdventuresofVindibuddSuperheroInTraining": "http://vindibudd.comicgenesis.com/d/20070720.html",
"TriumphantLosers": "http://triumphantlosers.comicgenesis.com/d/20081006.html",
"Zortic": "http://zortic.comicgenesis.com/d/20030922.html",

View file

@ -19,6 +19,7 @@ htmltemplate = """
<link rel="stylesheet" href="css/main.css">
<link rel="stylesheet" href="css/dosage.css">
<script src="js/masonry.min.js"></script>
<script src="http://use.edgefonts.net/open-sans.js"></script>
</head>
<body>
<p>Dosage test results from %(date)s</p>
@ -85,6 +86,8 @@ def get_content(filename):
res = []
for name, url in tests:
css = name.split()[-1].lower()
if len(name) > 25 and '/' in name:
name = name.replace('/', '/ ')
if url:
inner = '<a href="%s" class="%s">%s</a>' % (url, css, name)
else:
@ -94,7 +97,7 @@ def get_content(filename):
def main(args):
filename = "testresults.txt"
filename = args[0]
modified = get_mtime(filename)
content = get_content(filename)
attrs = {"date": strdate(modified), "content": content}

View file

@ -7,5 +7,5 @@ script=test.sh
rm -f "$script"
echo "#!/bin/sh -e" > "$script"
egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | sort | awk '{ print "make test TESTOUTPUT=/dev/null TESTS=" $0; }' >> "$script"
egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | sort | awk '{ print "make test PYTESTOPTS=--tb=short TESTS=" $0; }' >> "$script"
chmod 755 "$script"

View file

@ -36,13 +36,13 @@ class _ComicTester(TestCase):
num = 0
max_strips = 5
for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
images = 0
images = []
for image in strip.getImages():
images += 1
images.append(image.url)
self.save(image)
self.check(images > 0, 'failed to find images at %s' % strip.stripUrl)
self.check(images, 'failed to find images at %s' % strip.stripUrl)
if not self.scraperclass.multipleImagesPerStrip:
self.check(images == 1, 'found %d instead of 1 image at %s' % (images, strip.stripUrl))
self.check(len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images))
if num > 0 and self.scraperclass.prevUrlMatchesStripUrl:
self.check_stripurl(strip)
num += 1