Fix more comics.

This commit is contained in:
Bastian Kleineidam 2012-12-08 00:45:18 +01:00
parent 1b74e304c0
commit faba7b0bca
43 changed files with 1254 additions and 171 deletions

View file

@ -11,7 +11,7 @@ NUMPROCESSORS:=$(shell grep -c processor /proc/cpuinfo)
# - write test results in file # - write test results in file
# - run all tests found in the "tests" subdirectory # - run all tests found in the "tests" subdirectory
TESTOUTPUT?=testresults.txt TESTOUTPUT?=testresults.txt
PYTESTOPTS:=-n $(NUMPROCESSORS) --resultlog=$(TESTOUTPUT) --tb=short PYTESTOPTS?=-n $(NUMPROCESSORS) --resultlog=$(TESTOUTPUT) --tb=short
CHMODMINUSMINUS:=-- CHMODMINUSMINUS:=--
# directory or file with tests to run # directory or file with tests to run
TESTS ?= tests TESTS ?= tests

View file

@ -1,9 +1,14 @@
MANFILES:=dosage.1.html MANFILES:=dosage.1.html
all: man testresults.html
man: $(MANFILES) man: $(MANFILES)
dosage.1.html: dosage.1 dosage.1.html: dosage.1
man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@ man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@
# patch --no-backup-if-mismatch --quiet $@ dosage.1.html.diff # patch --no-backup-if-mismatch --quiet $@ dosage.1.html.diff
testresults.html: ../testresults.txt
../scripts/mktestpage.py $< > $@
.PHONY: all man .PHONY: all man

View file

@ -21,7 +21,7 @@ you may be infringing upon various copyrights.
Usage Usage
------ ------
List available comics (over 3500 at the moment): List available comics (over 3000 at the moment):
`$ dosage -l` `$ dosage -l`

18
doc/css/dosage.css Normal file
View file

@ -0,0 +1,18 @@
body {
font-family: open-sans, sans-serif;
}
.item {
width: 100px;
margin: 10px;
float: left;
}
.ok
{
color: #119911;
}
.failed
{
color: #992200;
}

298
doc/css/main.css Normal file
View file

@ -0,0 +1,298 @@
/*
* HTML5 Boilerplate
*
* What follows is the result of much research on cross-browser styling.
* Credit left inline and big thanks to Nicolas Gallagher, Jonathan Neal,
* Kroc Camen, and the H5BP dev community and team.
*/
/* ==========================================================================
Base styles: opinionated defaults
========================================================================== */
html,
button,
input,
select,
textarea {
color: #222;
}
body {
font-size: 1em;
line-height: 1.4;
}
/*
* Remove text-shadow in selection highlight: h5bp.com/i
* These selection declarations have to be separate.
* Customize the background color to match your design.
*/
::-moz-selection {
background: #b3d4fc;
text-shadow: none;
}
::selection {
background: #b3d4fc;
text-shadow: none;
}
/*
* A better looking default horizontal rule
*/
hr {
display: block;
height: 1px;
border: 0;
border-top: 1px solid #ccc;
margin: 1em 0;
padding: 0;
}
/*
* Remove the gap between images and the bottom of their containers: h5bp.com/i/440
*/
img {
vertical-align: middle;
}
/*
* Remove default fieldset styles.
*/
fieldset {
border: 0;
margin: 0;
padding: 0;
}
/*
* Allow only vertical resizing of textareas.
*/
textarea {
resize: vertical;
}
/* ==========================================================================
Chrome Frame prompt
========================================================================== */
.chromeframe {
margin: 0.2em 0;
background: #ccc;
color: #000;
padding: 0.2em 0;
}
/* ==========================================================================
Author's custom styles
========================================================================== */
/* ==========================================================================
Helper classes
========================================================================== */
/*
* Image replacement
*/
.ir {
background-color: transparent;
border: 0;
overflow: hidden;
/* IE 6/7 fallback */
*text-indent: -9999px;
}
.ir:before {
content: "";
display: block;
width: 0;
height: 100%;
}
/*
* Hide from both screenreaders and browsers: h5bp.com/u
*/
.hidden {
display: none !important;
visibility: hidden;
}
/*
* Hide only visually, but have it available for screenreaders: h5bp.com/v
*/
.visuallyhidden {
border: 0;
clip: rect(0 0 0 0);
height: 1px;
margin: -1px;
overflow: hidden;
padding: 0;
position: absolute;
width: 1px;
}
/*
* Extends the .visuallyhidden class to allow the element to be focusable
* when navigated to via the keyboard: h5bp.com/p
*/
.visuallyhidden.focusable:active,
.visuallyhidden.focusable:focus {
clip: auto;
height: auto;
margin: 0;
overflow: visible;
position: static;
width: auto;
}
/*
* Hide visually and from screenreaders, but maintain layout
*/
.invisible {
visibility: hidden;
}
/*
* Clearfix: contain floats
*
* For modern browsers
* 1. The space content is one way to avoid an Opera bug when the
* `contenteditable` attribute is included anywhere else in the document.
* Otherwise it causes space to appear at the top and bottom of elements
* that receive the `clearfix` class.
* 2. The use of `table` rather than `block` is only necessary if using
* `:before` to contain the top-margins of child elements.
*/
.clearfix:before,
.clearfix:after {
content: " "; /* 1 */
display: table; /* 2 */
}
.clearfix:after {
clear: both;
}
/*
* For IE 6/7 only
* Include this rule to trigger hasLayout and contain floats.
*/
.clearfix {
*zoom: 1;
}
/* ==========================================================================
EXAMPLE Media Queries for Responsive Design.
Theses examples override the primary ('mobile first') styles.
Modify as content requires.
========================================================================== */
@media only screen and (min-width: 35em) {
/* Style adjustments for viewports that meet the condition */
}
@media only screen and (-webkit-min-device-pixel-ratio: 1.5),
only screen and (min-resolution: 144dpi) {
/* Style adjustments for high resolution devices */
}
/* ==========================================================================
Print styles.
Inlined to avoid required HTTP connection: h5bp.com/r
========================================================================== */
@media print {
* {
background: transparent !important;
color: #000 !important; /* Black prints faster: h5bp.com/s */
box-shadow: none !important;
text-shadow: none !important;
}
a,
a:visited {
text-decoration: underline;
}
a[href]:after {
content: " (" attr(href) ")";
}
abbr[title]:after {
content: " (" attr(title) ")";
}
/*
* Don't show links for images, or javascript/internal links
*/
.ir a:after,
a[href^="javascript:"]:after,
a[href^="#"]:after {
content: "";
}
pre,
blockquote {
border: 1px solid #999;
page-break-inside: avoid;
}
thead {
display: table-header-group; /* h5bp.com/t */
}
tr,
img {
page-break-inside: avoid;
}
img {
max-width: 100% !important;
}
@page {
margin: 0.5cm;
}
p,
h2,
h3 {
orphans: 3;
widows: 3;
}
h2,
h3 {
page-break-after: avoid;
}
}

504
doc/css/normalize.css vendored Normal file
View file

@ -0,0 +1,504 @@
/*! normalize.css v1.0.1 | MIT License | git.io/normalize */
/* ==========================================================================
HTML5 display definitions
========================================================================== */
/*
* Corrects `block` display not defined in IE 6/7/8/9 and Firefox 3.
*/
article,
aside,
details,
figcaption,
figure,
footer,
header,
hgroup,
nav,
section,
summary {
display: block;
}
/*
* Corrects `inline-block` display not defined in IE 6/7/8/9 and Firefox 3.
*/
audio,
canvas,
video {
display: inline-block;
*display: inline;
*zoom: 1;
}
/*
* Prevents modern browsers from displaying `audio` without controls.
* Remove excess height in iOS 5 devices.
*/
audio:not([controls]) {
display: none;
height: 0;
}
/*
* Addresses styling for `hidden` attribute not present in IE 7/8/9, Firefox 3,
* and Safari 4.
* Known issue: no IE 6 support.
*/
[hidden] {
display: none;
}
/* ==========================================================================
Base
========================================================================== */
/*
* 1. Corrects text resizing oddly in IE 6/7 when body `font-size` is set using
* `em` units.
* 2. Prevents iOS text size adjust after orientation change, without disabling
* user zoom.
*/
html {
font-size: 100%; /* 1 */
-webkit-text-size-adjust: 100%; /* 2 */
-ms-text-size-adjust: 100%; /* 2 */
}
/*
* Addresses `font-family` inconsistency between `textarea` and other form
* elements.
*/
html,
button,
input,
select,
textarea {
font-family: sans-serif;
}
/*
* Addresses margins handled incorrectly in IE 6/7.
*/
body {
margin: 0;
}
/* ==========================================================================
Links
========================================================================== */
/*
* Addresses `outline` inconsistency between Chrome and other browsers.
*/
a:focus {
outline: thin dotted;
}
/*
* Improves readability when focused and also mouse hovered in all browsers.
*/
a:active,
a:hover {
outline: 0;
}
/* ==========================================================================
Typography
========================================================================== */
/*
* Addresses font sizes and margins set differently in IE 6/7.
* Addresses font sizes within `section` and `article` in Firefox 4+, Safari 5,
* and Chrome.
*/
h1 {
font-size: 2em;
margin: 0.67em 0;
}
h2 {
font-size: 1.5em;
margin: 0.83em 0;
}
h3 {
font-size: 1.17em;
margin: 1em 0;
}
h4 {
font-size: 1em;
margin: 1.33em 0;
}
h5 {
font-size: 0.83em;
margin: 1.67em 0;
}
h6 {
font-size: 0.75em;
margin: 2.33em 0;
}
/*
* Addresses styling not present in IE 7/8/9, Safari 5, and Chrome.
*/
abbr[title] {
border-bottom: 1px dotted;
}
/*
* Addresses style set to `bolder` in Firefox 3+, Safari 4/5, and Chrome.
*/
b,
strong {
font-weight: bold;
}
blockquote {
margin: 1em 40px;
}
/*
* Addresses styling not present in Safari 5 and Chrome.
*/
dfn {
font-style: italic;
}
/*
* Addresses styling not present in IE 6/7/8/9.
*/
mark {
background: #ff0;
color: #000;
}
/*
* Addresses margins set differently in IE 6/7.
*/
p,
pre {
margin: 1em 0;
}
/*
* Corrects font family set oddly in IE 6, Safari 4/5, and Chrome.
*/
code,
kbd,
pre,
samp {
font-family: monospace, serif;
_font-family: 'courier new', monospace;
font-size: 1em;
}
/*
* Improves readability of pre-formatted text in all browsers.
*/
pre {
white-space: pre;
white-space: pre-wrap;
word-wrap: break-word;
}
/*
* Addresses CSS quotes not supported in IE 6/7.
*/
q {
quotes: none;
}
/*
* Addresses `quotes` property not supported in Safari 4.
*/
q:before,
q:after {
content: '';
content: none;
}
/*
* Addresses inconsistent and variable font size in all browsers.
*/
small {
font-size: 80%;
}
/*
* Prevents `sub` and `sup` affecting `line-height` in all browsers.
*/
sub,
sup {
font-size: 75%;
line-height: 0;
position: relative;
vertical-align: baseline;
}
sup {
top: -0.5em;
}
sub {
bottom: -0.25em;
}
/* ==========================================================================
Lists
========================================================================== */
/*
* Addresses margins set differently in IE 6/7.
*/
dl,
menu,
ol,
ul {
margin: 1em 0;
}
dd {
margin: 0 0 0 40px;
}
/*
* Addresses paddings set differently in IE 6/7.
*/
menu,
ol,
ul {
padding: 0 0 0 40px;
}
/*
* Corrects list images handled incorrectly in IE 7.
*/
nav ul,
nav ol {
list-style: none;
list-style-image: none;
}
/* ==========================================================================
Embedded content
========================================================================== */
/*
* 1. Removes border when inside `a` element in IE 6/7/8/9 and Firefox 3.
* 2. Improves image quality when scaled in IE 7.
*/
img {
border: 0; /* 1 */
-ms-interpolation-mode: bicubic; /* 2 */
}
/*
* Corrects overflow displayed oddly in IE 9.
*/
svg:not(:root) {
overflow: hidden;
}
/* ==========================================================================
Figures
========================================================================== */
/*
* Addresses margin not present in IE 6/7/8/9, Safari 5, and Opera 11.
*/
figure {
margin: 0;
}
/* ==========================================================================
Forms
========================================================================== */
/*
* Corrects margin displayed oddly in IE 6/7.
*/
form {
margin: 0;
}
/*
* Define consistent border, margin, and padding.
*/
fieldset {
border: 1px solid #c0c0c0;
margin: 0 2px;
padding: 0.35em 0.625em 0.75em;
}
/*
* 1. Corrects color not being inherited in IE 6/7/8/9.
* 2. Corrects text not wrapping in Firefox 3.
* 3. Corrects alignment displayed oddly in IE 6/7.
*/
legend {
border: 0; /* 1 */
padding: 0;
white-space: normal; /* 2 */
*margin-left: -7px; /* 3 */
}
/*
* 1. Corrects font size not being inherited in all browsers.
* 2. Addresses margins set differently in IE 6/7, Firefox 3+, Safari 5,
* and Chrome.
* 3. Improves appearance and consistency in all browsers.
*/
button,
input,
select,
textarea {
font-size: 100%; /* 1 */
margin: 0; /* 2 */
vertical-align: baseline; /* 3 */
*vertical-align: middle; /* 3 */
}
/*
* Addresses Firefox 3+ setting `line-height` on `input` using `!important` in
* the UA stylesheet.
*/
button,
input {
line-height: normal;
}
/*
* 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio`
* and `video` controls.
* 2. Corrects inability to style clickable `input` types in iOS.
* 3. Improves usability and consistency of cursor style between image-type
* `input` and others.
* 4. Removes inner spacing in IE 7 without affecting normal text inputs.
* Known issue: inner spacing remains in IE 6.
*/
button,
html input[type="button"], /* 1 */
input[type="reset"],
input[type="submit"] {
-webkit-appearance: button; /* 2 */
cursor: pointer; /* 3 */
*overflow: visible; /* 4 */
}
/*
* Re-set default cursor for disabled elements.
*/
button[disabled],
input[disabled] {
cursor: default;
}
/*
* 1. Addresses box sizing set to content-box in IE 8/9.
* 2. Removes excess padding in IE 8/9.
* 3. Removes excess padding in IE 7.
* Known issue: excess padding remains in IE 6.
*/
input[type="checkbox"],
input[type="radio"] {
box-sizing: border-box; /* 1 */
padding: 0; /* 2 */
*height: 13px; /* 3 */
*width: 13px; /* 3 */
}
/*
* 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome.
* 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome
* (include `-moz` to future-proof).
*/
input[type="search"] {
-webkit-appearance: textfield; /* 1 */
-moz-box-sizing: content-box;
-webkit-box-sizing: content-box; /* 2 */
box-sizing: content-box;
}
/*
* Removes inner padding and search cancel button in Safari 5 and Chrome
* on OS X.
*/
input[type="search"]::-webkit-search-cancel-button,
input[type="search"]::-webkit-search-decoration {
-webkit-appearance: none;
}
/*
* Removes inner padding and border in Firefox 3+.
*/
button::-moz-focus-inner,
input::-moz-focus-inner {
border: 0;
padding: 0;
}
/*
* 1. Removes default vertical scrollbar in IE 6/7/8/9.
* 2. Improves readability and alignment in all browsers.
*/
textarea {
overflow: auto; /* 1 */
vertical-align: top; /* 2 */
}
/* ==========================================================================
Tables
========================================================================== */
/*
* Remove most spacing between table cells.
*/
table {
border-collapse: collapse;
border-spacing: 0;
}

View file

@ -9,7 +9,7 @@ Section: User Commands (1)<BR><A HREF="#index">Index</A>
<A NAME="lbAB">&nbsp;</A> <A NAME="lbAB">&nbsp;</A>
<H2>NAME</H2> <H2>NAME</H2>
dosage - comic strip downloader dosage - a commandline webcomic downloader and archiver
<A NAME="lbAC">&nbsp;</A> <A NAME="lbAC">&nbsp;</A>
<H2>SYNOPSIS</H2> <H2>SYNOPSIS</H2>

9
doc/js/masonry.min.js vendored Normal file

File diff suppressed because one or more lines are too long

156
doc/testresults.html Normal file
View file

@ -0,0 +1,156 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<title>Dosage test results</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="css/normalize.css">
<link rel="stylesheet" href="css/main.css">
<link rel="stylesheet" href="css/dosage.css">
<script src="js/masonry.min.js"></script>
</head>
<body>
<p>Dosage test results from 07.12.2012</p>
<div id="container">
<div class="item"><a href="http://alienshores.com/alienshores_band/" class="ok">AlienShores OK</a></div>
<div class="item"><a href="http://www.biggercheese.com/" class="ok">BiggerThanCheeses OK</a></div>
<div class="item"><a href="http://www.captainsnes.com/" class="ok">CaptainSNES OK</a></div>
<div class="item"><a href="http://jessfink.com/Chester5000XYV/" class="ok">Chester5000XYV OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/diamond-lil.html" class="ok">Creators/DiamondLil OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/hope-and-death.html" class="ok">Creators/HopeAndDeath OK</a></div>
<div class="item"><a href="http://www.creators.com/comics/on-a-claire-day.html" class="ok">Creators/OnaClaireDay OK</a></div>
<div class="item"><a href="http://dilbert.com/" class="ok">Dilbert OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/2s_a_company/5250099/" class="ok">DrunkDuck/2s_a_company OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Acrobat/5380691/" class="ok">DrunkDuck/Acrobat OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Allan/5326380/" class="ok">DrunkDuck/Allan OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Amya/5355643/" class="ok">DrunkDuck/Amya OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Angry_D_Monkey/5300564/" class="ok">DrunkDuck/ Angry_D_Monkey OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Awesomataz/5388634/" class="ok">DrunkDuck/Awesomataz OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Battle_of_the_Robofemoids/5414294/" class="ok">DrunkDuck/ Battle_of_the_Robofemoids OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Been_Better/5416677/" class="ok">DrunkDuck/Been_Better OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/BffSatan/5237809/" class="ok">DrunkDuck/BffSatan OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Chester_and_Ferdie/5406525/" class="ok">DrunkDuck/ Chester_and_Ferdie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Children_of_the_Tiger/5220744/" class="ok">DrunkDuck/ Children_of_the_Tiger OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Circle_Arcadia/4925373/" class="ok">DrunkDuck/ Circle_Arcadia OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Cloud_Eagle/5329187/" class="ok">DrunkDuck/Cloud_Eagle OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Dasien/5343187/" class="ok">DrunkDuck/Dasien OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Desperate_Angels/5405057/" class="ok">DrunkDuck/ Desperate_Angels OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Edge_of_December/5352865/" class="ok">DrunkDuck/ Edge_of_December OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Engine/5403371/" class="ok">DrunkDuck/Engine OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Explorers_Of_the_Unknown/5395556/" class="ok">DrunkDuck/ Explorers_Of_the_Unknown OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Faults/5226225/" class="ok">DrunkDuck/Faults OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Good_Guy/5413334/" class="ok">DrunkDuck/Good_Guy OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Headless_Cross/5390146/" class="ok">DrunkDuck/ Headless_Cross OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/I_got_it_in_my_mouth/5368954/" class="ok">DrunkDuck/ I_got_it_in_my_mouth OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Karen_the_Marilith/5227021/" class="ok">DrunkDuck/ Karen_the_Marilith OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Kat_and_Dogg/5074974/" class="ok">DrunkDuck/Kat_and_Dogg OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Kitty_Litter/5245306/" class="ok">DrunkDuck/Kitty_Litter OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Knights_Requiem/5135780/" class="ok">DrunkDuck/ Knights_Requiem OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Last_Place_Comics/5416438/" class="ok">DrunkDuck/ Last_Place_Comics OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Last_War/4954728/" class="ok">DrunkDuck/Last_War OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Life_Blowz/5305212/" class="ok">DrunkDuck/Life_Blowz OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Linnyanie/5205907/" class="ok">DrunkDuck/Linnyanie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Maggot_Boy/5407805/" class="ok">DrunkDuck/Maggot_Boy OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Metal_Breakdown/5386007/" class="ok">DrunkDuck/ Metal_Breakdown OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Morphic/5253227/" class="ok">DrunkDuck/Morphic OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Musical_Farm/5357846/" class="ok">DrunkDuck/Musical_Farm OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/My_Pet_Demon/5415753/" class="ok">DrunkDuck/My_Pet_Demon OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/My_Thingie/5354620/" class="ok">DrunkDuck/My_Thingie OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Mystery_World/5071936/" class="ok">DrunkDuck/ Mystery_World OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/No_Talent/5264318/" class="ok">DrunkDuck/No_Talent OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/One_last_breath/5178709/" class="ok">DrunkDuck/ One_last_breath OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Pixel_Plumbers/5344885/" class="ok">DrunkDuck/ Pixel_Plumbers OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Planet_Chaser/5416679/" class="ok">DrunkDuck/ Planet_Chaser OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Pokemon_Light_and_Dark/5341704/" class="ok">DrunkDuck/ Pokemon_Light_and_Dark OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Politics_The_Tankers_Way/4982065/" class="ok">DrunkDuck/ Politics_The_Tankers_Way OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Prelude/4895211/" class="ok">DrunkDuck/Prelude OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Project_217/5414112/" class="ok">DrunkDuck/Project_217 OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Solar_Salvage/5394935/" class="ok">DrunkDuck/ Solar_Salvage OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Adventures_of_Chad_Cleanly/5137427/" class="ok">DrunkDuck/ The_Adventures_of_Chad_Cleanly OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Begining_of_an_End/5113421/" class="ok">DrunkDuck/ The_Begining_of_an_End OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Chronicles_of_Gaddick/5372621/" class="ok">DrunkDuck/ The_Chronicles_of_Gaddick OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Emerald_City/5188061/" class="ok">DrunkDuck/ The_Emerald_City OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Essyane_Warriors/5416744/" class="ok">DrunkDuck/ The_Essyane_Warriors OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/The_Uncanny_Uper_Dave/5273996/" class="ok">DrunkDuck/ The_Uncanny_Uper_Dave OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/This_Ego_of_Mine/5222563/" class="ok">DrunkDuck/ This_Ego_of_Mine OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Thog_Infinitron/5207209/" class="ok">DrunkDuck/ Thog_Infinitron OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Twisted_Mind_of_Stranger/5416587/" class="ok">DrunkDuck/ Twisted_Mind_of_Stranger OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Underscore/5333512/" class="ok">DrunkDuck/Underscore OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Woah_Roscoe/5234849/" class="ok">DrunkDuck/Woah_Roscoe OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/XAZ_A_Megaman_X_Fancomic/5195871/" class="ok">DrunkDuck/ XAZ_A_Megaman_X_Fancomic OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/Yeah_wait_what/5292872/" class="ok">DrunkDuck/ Yeah_wait_what OK</a></div>
<div class="item"><a href="http://www.drunkduck.com/the_Many_Deaths_of_Mario/5376436/" class="ok">DrunkDuck/ the_Many_Deaths_of_Mario OK</a></div>
<div class="item"><a href="http://www.gocomics.com/acadasia-down/2012/11/20" class="ok">GoComics/AcadasiaDown OK</a></div>
<div class="item"><a href="http://www.gocomics.com/anecdote/2012/08/15" class="ok">GoComics/Anecdote OK</a></div>
<div class="item"><a href="http://www.gocomics.com/bloomcounty/2012/12/07" class="ok">GoComics/BloomCounty OK</a></div>
<div class="item"><a href="http://www.gocomics.com/boundandgagged/2012/12/07" class="ok">GoComics/ BoundandGagged OK</a></div>
<div class="item"><a href="http://www.gocomics.com/candyblondell/2012/09/28" class="ok">GoComics/CANDYBLONDELL OK</a></div>
<div class="item"><a href="http://www.gocomics.com/casey-and-kyle/2012/11/17" class="ok">GoComics/CaseyandKyle OK</a></div>
<div class="item"><a href="http://www.gocomics.com/chuckasay/2012/12/05" class="ok">GoComics/ChuckAsay OK</a></div>
<div class="item"><a href="http://www.gocomics.com/compu-toon/2012/12/07" class="ok">GoComics/Computoon OK</a></div>
<div class="item"><a href="http://www.gocomics.com/ditzabled-princess/2012/12/05" class="ok">GoComics/ DitzAbledPrincess OK</a></div>
<div class="item"><a href="http://www.gocomics.com/forbetterorforworse/2012/12/07" class="ok">GoComics/ ForBetterorForWorse OK</a></div>
<div class="item"><a href="http://www.gocomics.com/historybluffs/2012/09/13" class="ok">GoComics/HistoryBluffs OK</a></div>
<div class="item"><a href="http://www.gocomics.com/incompatibles/2012/09/29" class="ok">GoComics/INCOMPATIBLES OK</a></div>
<div class="item"><a href="http://www.gocomics.com/inkpen/2012/12/07" class="ok">GoComics/InkPen OK</a></div>
<div class="item"><a href="http://www.gocomics.com/magic-coffee-hair/2012/12/06" class="ok">GoComics/ MagicCoffeeHair OK</a></div>
<div class="item"><a href="http://www.gocomics.com/the-lil-miesters/2012/12/07" class="ok">GoComics/ TheLilMiesters OK</a></div>
<div class="item"><a href="http://www.gocomics.com/uncleartsfunland/2012/12/02" class="ok">GoComics/ UncleArtsFunland OK</a></div>
<div class="item"><a href="http://ballofyarn.comicgenesis.com/d/20020624.html" class="ok">KeenSpot/BallofYarn OK</a></div>
<div class="item"><a href="http://beforedawn.comicgenesis.com/" class="ok">KeenSpot/BeforeDawn OK</a></div>
<div class="item"><a href="http://bsbs.comicgenesis.com/" class="ok">KeenSpot/BitterSweetBS OK</a></div>
<div class="item"><a href="http://llv.comicgenesis.com/" class="ok">KeenSpot/CTRO OK</a></div>
<div class="item"><a href="http://chrisread.comicgenesis.com/" class="ok">KeenSpot/ Chris27sCollectionofCrappyComix OK</a></div>
<div class="item"><a href="http://chronic.comicgenesis.com/" class="ok">KeenSpot/ ChronicMisadventures OK</a></div>
<div class="item"><a href="http://citrushouse.comicgenesis.com/" class="ok">KeenSpot/CitrusHouse OK</a></div>
<div class="item"><a href="http://creepyhead.comicgenesis.com/" class="ok">KeenSpot/CreepyHead OK</a></div>
<div class="item"><a href="http://fearful.comicgenesis.com/" class="ok">KeenSpot/ FearfulAsymptote OK</a></div>
<div class="item"><a href="http://gmhockey.comicgenesis.com/" class="ok">KeenSpot/ GameMisconduct OK</a></div>
<div class="item"><a href="http://gamingguardians.comicgenesis.com/" class="ok">KeenSpot/ GamingGuardians OK</a></div>
<div class="item"><a href="http://goldenage.comicgenesis.com/" class="ok">KeenSpot/GoldenAge OK</a></div>
<div class="item"><a href="http://jtv.comicgenesis.com/" class="ok">KeenSpot/ JimTheVikingTheUnanimatedSeries OK</a></div>
<div class="item"><a href="http://liliane.comicgenesis.com/" class="ok">KeenSpot/LilianeBiDyke OK</a></div>
<div class="item"><a href="http://lovebites.comicgenesis.com/" class="ok">KeenSpot/LoveBites OK</a></div>
<div class="item"><a href="http://mtranc3.comicgenesis.com/" class="ok">KeenSpot/MTranc3 OK</a></div>
<div class="item"><a href="http://majestic7.comicgenesis.com/" class="ok">KeenSpot/Majestic7 OK</a></div>
<div class="item"><a href="http://newtraditionalists.comicgenesis.com/" class="ok">KeenSpot/ NewTraditionalists OK</a></div>
<div class="item"><a href="http://orangepenguins.comicgenesis.com/" class="ok">KeenSpot/ OrangePenguins OK</a></div>
<div class="item"><a href="http://ordinarypeople.comicgenesis.com/" class="ok">KeenSpot/ OrdinaryPeople OK</a></div>
<div class="item"><a href="http://ozoneocean.comicgenesis.com/" class="ok">KeenSpot/PinkyTA OK</a></div>
<div class="item"><a href="http://pr0ncrest.comicgenesis.com/" class="ok">KeenSpot/Pr0nCrest OK</a></div>
<div class="item"><a href="http://shinegotower.comicgenesis.com/" class="ok">KeenSpot/ ShineGetDumplingsGoGoTower OK</a></div>
<div class="item"><a href="http://sippan.comicgenesis.com/" class="ok">KeenSpot/SippansSerie OK</a></div>
<div class="item"><a href="http://splendiforous.comicgenesis.com/" class="ok">KeenSpot/ SplendiforousEscapades OK</a></div>
<div class="item"><a href="http://straightahead.comicgenesis.com/" class="ok">KeenSpot/StraightAhead OK</a></div>
<div class="item"><a href="http://tang.comicgenesis.com/" class="ok">KeenSpot/TangsComics OK</a></div>
<div class="item"><a href="http://theanarchist.comicgenesis.com/" class="ok">KeenSpot/TheAnarchist OK</a></div>
<div class="item"><a href="http://okk.comicgenesis.com/" class="ok">KeenSpot/ TheMisadventuresofOkk OK</a></div>
<div class="item"><a href="http://xuanwu.comicgenesis.com/" class="ok">KeenSpot/TheProfessor OK</a></div>
<div class="item"><a href="http://gager.comicgenesis.com/" class="ok">KeenSpot/ TheSagaofGagerff2 OK</a></div>
<div class="item"><a href="http://noskillz.comicgenesis.com/" class="ok">KeenSpot/Trevino OK</a></div>
<div class="item"><a href="http://weekendwarriors.comicgenesis.com/" class="ok">KeenSpot/ WeekendWarriors OK</a></div>
<div class="item"><a href="http://fallinglessons.comicgenesis.com/" class="ok">KeenSpot/ fallinglessons OK</a></div>
<div class="item"><a href="http://leapingwizards.comicgenesis.com/" class="ok">KeenSpot/ leapingwizards OK</a></div>
<div class="item"><a href="http://www.meekcomic.com/" class="ok">Meek OK</a></div>
<div class="item"><a href="http://noneedforbushido.com/latest/" class="ok">NoNeedForBushido OK</a></div>
<div class="item"><a href="http://nobodyscores.loosenutstudio.com/" class="ok">NobodyScores OK</a></div>
<div class="item"><a href="http://www.odd-fish.net/" class="ok">OddFish OK</a></div>
<div class="item"><a href="http://oglaf.com/" class="ok">Oglaf OK</a></div>
<div class="item"><a href="http://www.redmeat.com/redmeat/2012-12-04/index.html" class="ok">RedMeat OK</a></div>
<div class="item"><a href="http://www.shortpacked.com/" class="ok">Shortpacked OK</a></div>
<div class="item"><a href="http://mpmcomic.smackjeeves.com/comics/205716/how-does-it-work-oh-thats-how/" class="ok">SmackJeeves/mpmcomic OK</a></div>
<div class="item"><a href="http://sf.snafu-comics.com/" class="ok">SnafuComics/SF OK</a></div>
<div class="item"><a href="http://www.thefallenangel.co.uk/hmhigh/" class="ok">TheFallenAngel/HMHigh OK</a></div>
<div class="item"><a href="http://www.hs.fi/viivijawagner/" class="ok">ViiviJaWagner OK</a></div>
</div>
<script>
window.onload = function() {
var wall = new Masonry( document.getElementById('container'), {
columnWidth: 240
});
};
</script>
</body>
</html>

61
dosage
View file

@ -24,7 +24,7 @@ import optparse
from dosagelib import events, scraper from dosagelib import events, scraper
from dosagelib.output import out from dosagelib.output import out
from dosagelib.util import get_columns, internal_error from dosagelib.util import get_columns, internal_error, getDirname
from dosagelib.configuration import App, Freeware, Copyright, SupportUrl from dosagelib.configuration import App, Freeware, Copyright, SupportUrl
def setupOptions(): def setupOptions():
@ -35,7 +35,8 @@ def setupOptions():
usage = 'usage: %prog [options] comicModule [comicModule ...]' usage = 'usage: %prog [options] comicModule [comicModule ...]'
parser = optparse.OptionParser(usage=usage) parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity') parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help='provides verbose output, use multiple times for more verbosity')
parser.add_option('-a', '--all', action='count', dest='all', default=None, help='traverse and retrieve all available comics') parser.add_option('-n', '--numstrips', action='store', dest='numstrips', type='int', default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
parser.add_option('-a', '--all', action='store_true', dest='all', default=None, help='traverse and retrieve all comic strips')
parser.add_option('-b', '--basepath', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH') parser.add_option('-b', '--basepath', action='store', dest='basepath', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
parser.add_option('--baseurl', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH') parser.add_option('--baseurl', action='store', dest='baseurl', default=None, help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules') parser.add_option('-l', '--list', action='store_const', const=1, dest='list', help='list available comic modules')
@ -73,20 +74,26 @@ def saveComicStrip(strip, basepath):
if saved: if saved:
allskipped = False allskipped = False
except IOError as msg: except IOError as msg:
out.write('Error saving %s: %s' % (image.filename, msg)) out.error('Could not save %s: %s' % (image.filename, msg))
errors += 1 errors += 1
return errors, allskipped return errors, allskipped
def displayHelp(comics, basepath): def displayHelp(comics, basepath):
"""Print help for comic strips.""" """Print help for comic strips."""
for scraperobj in getScrapers(comics, basepath): try:
for line in scraperobj.getHelp().splitlines(): for scraperobj in getScrapers(comics, basepath):
out.write("Help: "+line) for line in scraperobj.getHelp().splitlines():
out.info("Help: "+line)
except ValueError as msg:
out.error(msg)
return 1
return 0 return 0
def getComics(options, comics): def getComics(options, comics):
"""Retrieve given comics.""" """Retrieve given comics."""
# XXX refactor
errors = 0 errors = 0
if options.output: if options.output:
events.installHandler(options.output, options.basepath, options.baseurl) events.installHandler(options.output, options.basepath, options.baseurl)
@ -95,6 +102,8 @@ def getComics(options, comics):
out.context = scraperobj.get_name() out.context = scraperobj.get_name()
if options.all: if options.all:
strips = scraperobj.getAllStrips() strips = scraperobj.getAllStrips()
elif options.numstrips:
strips = scraperobj.getAllStrips(options.numstrips)
else: else:
strips = scraperobj.getCurrentStrips() strips = scraperobj.getCurrentStrips()
first = True first = True
@ -105,12 +114,13 @@ def getComics(options, comics):
if not first and scraperobj.indexes: if not first and scraperobj.indexes:
# stop when indexed retrieval skipped all images for one # stop when indexed retrieval skipped all images for one
# comie strip (except the first one) # comie strip (except the first one)
out.write("Stop retrieval because image file already exists") out.info("Stop retrieval because image file already exists")
break break
first = False first = False
except IOError as msg: except (ValueError, IOError) as msg:
out.write('Error getting strip: %s' % msg) out.error(msg)
errors += 1 errors += 1
continue
events.getHandler().end() events.getHandler().end()
return errors return errors
@ -123,26 +133,22 @@ def run(options, comics):
if options.list: if options.list:
return doList(options.list == 1) return doList(options.list == 1)
if len(comics) <= 0: if len(comics) <= 0:
out.write('Warning: No comics specified, bailing out!') out.warn('No comics specified, bailing out!')
return 1 return 1
try: if options.modhelp:
if options.modhelp: return displayHelp(comics, options.basepath)
return displayHelp(comics, options.basepath) return getComics(options, comics)
return getComics(options, comics)
except ValueError as msg:
out.write("Error: %s" % msg)
return 1
def doList(columnList): def doList(columnList):
"""List available comics.""" """List available comics."""
out.write('Available comic scrapers:') out.info('Available comic scrapers:')
scrapers = getScrapers(['@@']) scrapers = getScrapers(['@@'])
if columnList: if columnList:
num = doColumnList(scrapers) num = doColumnList(scrapers)
else: else:
num = doSingleList(scrapers) num = doSingleList(scrapers)
out.write('%d supported comics.' % num) out.info('%d supported comics.' % num)
return 0 return 0
@ -171,9 +177,9 @@ def getScrapers(comics, basepath=None):
if '@' in comics: if '@' in comics:
# only scrapers whose directory already exists # only scrapers whose directory already exists
if len(comics) > 1: if len(comics) > 1:
out.write("WARN: using '@' as comic name ignores all other specified comics.\n") out.warn("using '@' as comic name ignores all other specified comics.")
for scraperclass in scraper.get_scrapers(): for scraperclass in scraper.get_scrapers():
dirname = scraperclass.get_name().replace('/', os.sep) dirname = getDirname(scraperclass.get_name())
if os.path.isdir(os.path.join(basepath, dirname)): if os.path.isdir(os.path.join(basepath, dirname)):
yield scraperclass() yield scraperclass()
elif '@@' in comics: elif '@@' in comics:
@ -181,7 +187,9 @@ def getScrapers(comics, basepath=None):
for scraperclass in scraper.get_scrapers(): for scraperclass in scraper.get_scrapers():
yield scraperclass() yield scraperclass()
else: else:
# only selected # get only selected comic scrapers
# store them in a list to catch naming errors early
scrapers = []
for comic in comics: for comic in comics:
if ':' in comic: if ':' in comic:
name, index = comic.split(':', 1) name, index = comic.split(':', 1)
@ -189,14 +197,19 @@ def getScrapers(comics, basepath=None):
else: else:
name = comic name = comic
indexes = None indexes = None
yield scraper.get_scraper(name)(indexes=indexes) scrapers.append(scraper.get_scraper(name)(indexes=indexes))
for s in scrapers:
yield s
def main(): def main():
"""Parse options and execute commands.""" """Parse options and execute commands."""
try: try:
parser = setupOptions() parser = setupOptions()
options, args = parser.parse_args() options, args = parser.parse_args()
res = run(options, args) # eliminate duplicate comic names
comics = set(args)
res = run(options, comics)
except KeyboardInterrupt: except KeyboardInterrupt:
print("Aborted.") print("Aborted.")
res = 1 res = 1

View file

@ -7,7 +7,7 @@ import rfc822
import time import time
from .output import out from .output import out
from .util import getImageObject, normaliseURL, unquote, strsize from .util import getImageObject, normaliseURL, unquote, strsize, getDirname, getFilename
from .events import getHandler from .events import getHandler
class FetchComicError(IOError): class FetchComicError(IOError):
@ -34,20 +34,21 @@ class ComicStrip(object):
filename = self.namer(url, self.stripUrl) filename = self.namer(url, self.stripUrl)
if filename is None: if filename is None:
filename = url.rsplit('/', 1)[1] filename = url.rsplit('/', 1)[1]
return ComicImage(self.name, url, self.stripUrl, filename) dirname = getDirname(self.name)
return ComicImage(self.name, url, self.stripUrl, dirname, filename)
class ComicImage(object): class ComicImage(object):
"""A comic image downloader.""" """A comic image downloader."""
def __init__(self, name, url, referrer, filename): def __init__(self, name, url, referrer, dirname, filename):
"""Set URL and filename.""" """Set URL and filename."""
self.name = name self.name = name
self.referrer = referrer self.referrer = referrer
self.url = url self.url = url
self.dirname = dirname
filename = getFilename(filename)
self.filename, self.ext = os.path.splitext(filename) self.filename, self.ext = os.path.splitext(filename)
self.filename = self.filename.replace(os.sep, '_')
self.ext = self.ext.replace(os.sep, '_')
def connect(self): def connect(self):
"""Connect to host and get meta information.""" """Connect to host and get meta information."""
@ -71,7 +72,7 @@ class ComicImage(object):
self.ext = '.' + subtype.replace('jpeg', 'jpg') self.ext = '.' + subtype.replace('jpeg', 'jpg')
self.contentLength = int(self.urlobj.headers.get('content-length', 0)) self.contentLength = int(self.urlobj.headers.get('content-length', 0))
self.lastModified = self.urlobj.headers.get('last-modified') self.lastModified = self.urlobj.headers.get('last-modified')
out.write('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength), 2) out.debug('... filename = %r, ext = %r, contentLength = %d' % (self.filename, self.ext, self.contentLength))
def touch(self, filename): def touch(self, filename):
"""Set last modified date on filename.""" """Set last modified date on filename."""
@ -86,18 +87,18 @@ class ComicImage(object):
self.connect() self.connect()
filename = "%s%s" % (self.filename, self.ext) filename = "%s%s" % (self.filename, self.ext)
comicSize = self.contentLength comicSize = self.contentLength
comicDir = os.path.join(basepath, self.name.replace('/', os.sep)) comicDir = os.path.join(basepath, self.dirname)
if not os.path.isdir(comicDir): if not os.path.isdir(comicDir):
os.makedirs(comicDir) os.makedirs(comicDir)
fn = os.path.join(comicDir, filename) fn = os.path.join(comicDir, filename)
if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize: if os.path.isfile(fn) and os.path.getsize(fn) >= comicSize:
self.touch(fn) self.touch(fn)
out.write('Skipping existing file "%s".' % fn, 1) out.info('Skipping existing file "%s".' % fn, 1)
return fn, False return fn, False
try: try:
out.write('Writing comic to file %s...' % fn, 3) out.debug('Writing comic to file %s...' % fn)
with open(fn, 'wb') as comicOut: with open(fn, 'wb') as comicOut:
comicOut.write(self.urlobj.content) comicOut.write(self.urlobj.content)
self.touch(fn) self.touch(fn)
@ -107,7 +108,7 @@ class ComicImage(object):
raise raise
else: else:
size = strsize(os.path.getsize(fn)) size = strsize(os.path.getsize(fn))
out.write("Saved %s (%s)." % (fn, size), 1) out.info("Saved %s (%s)." % (fn, size), 1)
getHandler().comicDownloaded(self.name, fn) getHandler().comicDownloaded(self.name, fn)
return fn, True return fn, True

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from __future__ import print_function from __future__ import print_function
import time import time
import sys
class Output(object): class Output(object):
"""Print output with context, indentation and optional timestamps.""" """Print output with context, indentation and optional timestamps."""
@ -13,7 +14,19 @@ class Output(object):
self.level = 0 self.level = 0
self.timestamps = False self.timestamps = False
def write(self, s, level=0): def info(self, s, level=0):
self.write(s, level=level)
def debug(self, s):
self.write(s, level=2)
def warn(self, s):
self.write("WARN: %s" % s, file=sys.stderr)
def error(self, s):
self.write("ERROR: %s" % s, file=sys.stderr)
def write(self, s, level=0, file=sys.stdout):
"""Write message with indentation, context and optional timestamp.""" """Write message with indentation, context and optional timestamp."""
if level > self.level: if level > self.level:
return return
@ -21,7 +34,8 @@ class Output(object):
timestamp = time.strftime('%H:%M:%S ') timestamp = time.strftime('%H:%M:%S ')
else: else:
timestamp = '' timestamp = ''
print('%s%s> %s' % (timestamp, self.context, s)) print('%s%s> %s' % (timestamp, self.context, s), file=file)
file.flush()
def writelines(self, lines, level=0): def writelines(self, lines, level=0):
"""Write multiple messages.""" """Write multiple messages."""

View file

@ -135,6 +135,7 @@ class AstronomyPOTD(_BasicScraper):
compile(r'<a href="(ap\d{6}\.html)">&gt;</a>')) compile(r'<a href="(ap\d{6}\.html)">&gt;</a>'))
stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
imageSearch = compile(r'<a href="(image/\d{4}/[^"]+)"') imageSearch = compile(r'<a href="(image/\d{4}/[^"]+)"')
multipleImagesPerStrip = True
prevSearch = compile(r'<a href="(ap\d{6}\.html)">&lt;</a>') prevSearch = compile(r'<a href="(ap\d{6}\.html)">&lt;</a>')
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
@ -176,10 +177,6 @@ class AGirlAndHerFed(_BasicScraper):
prevSearch = compile(r'<a href="([^"]+)">[^>]+Back') prevSearch = compile(r'<a href="([^"]+)">[^>]+Back')
help = 'Index format: nnn' help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('?')[-1]
class AetheriaEpics(_BasicScraper): class AetheriaEpics(_BasicScraper):
latestUrl = 'http://aetheria-epics.schala.net/' latestUrl = 'http://aetheria-epics.schala.net/'

View file

@ -101,6 +101,10 @@ class BoyOnAStickAndSlither(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page") prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/')[-1]
class ButternutSquash(_BasicScraper): class ButternutSquash(_BasicScraper):
latestUrl = 'http://www.butternutsquash.net/' latestUrl = 'http://www.butternutsquash.net/'
@ -206,12 +210,3 @@ class BetweenFailures(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous"))
help = 'Index format: stripnum-strip-name' help = 'Index format: stripnum-strip-name'
class BillyTheBeaker(_BasicScraper):
latestUrl = 'http://billy.defectivejunk.com/'
stripUrl = latestUrl + 'index.php?strip=%s'
multipleImagesPerStrip = True
imageSearch = compile(tagre("img", "src", r'(bub\d+_\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?strip\=[^"]+)', after="Previous strip"))
help = 'Index format: nnn'

View file

@ -26,6 +26,15 @@ class CaribbeanBlue(_BasicScraper):
help = 'Index format: nnn-stripname' help = 'Index format: nnn-stripname'
class Catalyst(_BasicScraper):
baseUrl = "http://catalyst.spiderforest.com/"
latestUrl = baseUrl + "comic.php?comic_id=415"
stripUrl = baseUrl + "comic.php?comic_id=%s"
imageSearch = compile(tagre("img", "src", r'(http://catalyst\.spiderforest\.com/comics/[^"]+)'))
prevSearch = compile("<center>" + tagre("a", "href", r'(http://catalyst\.spiderforest\.com/comic\.php\?comic_id=\d+)'))
help = 'Index format: number'
class Catena(_BasicScraper): class Catena(_BasicScraper):
latestUrl = 'http://catenamanor.com/' latestUrl = 'http://catenamanor.com/'
stripUrl = latestUrl + '%s' stripUrl = latestUrl + '%s'
@ -98,6 +107,14 @@ class Commissioned(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class Concession(_BasicScraper):
latestUrl = 'http://concessioncomic.com/'
stripUrl = latestUrl + 'index.php?pid=%s'
imageSearch = compile(tagre("img", "src", r'(http://concessioncomic\.com/comics/[^"]+)', after="Comic"))
prevSearch = compile(tagre("a", "href", r'(http://concessioncomic\.com/index\.php\?pid=\d+)', after="nav-prev"))
help = 'Index format: number'
class CoolCatStudio(_BasicScraper): class CoolCatStudio(_BasicScraper):
latestUrl = 'http://www.coolcatstudio.com/' latestUrl = 'http://www.coolcatstudio.com/'
stripUrl = latestUrl + 'strips-cat/ccs%s' stripUrl = latestUrl + 'strips-cat/ccs%s'

View file

@ -6,7 +6,7 @@ from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
from ..util import tagre from ..util import tagre
_imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')) _imageSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]+)'))
def add(name, path): def add(name, path):
baseurl = 'http://www.creators.com' baseurl = 'http://www.creators.com'

View file

@ -9,14 +9,14 @@ from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre
class DMFA(_BasicScraper): class DailyDose(_BasicScraper):
latestUrl = 'http://www.missmab.com/' baseUrl = 'http://dailydoseofcomics.com/'
stripUrl = latestUrl + 'Comics/Vol_%s.php' starter = indirectStarter(baseUrl,
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)')) compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="preview")))
multipleImagesPerStrip = True stripUrl = baseUrl + '%s/'
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+ imageSearch = compile(tagre("img", "src", r'([^"]+)', before="align(?:none|center)"))
tagre("img", "src", r'(?:../)?Images/comicprev\.gif')) prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
help = 'Index format: nnn (normally, some specials)' help = 'Index format: stripname'
class DandyAndCompany(_BasicScraper): class DandyAndCompany(_BasicScraper):
@ -52,6 +52,16 @@ class DeepFried(_BasicScraper):
help = 'Index format: non' help = 'Index format: non'
class DMFA(_BasicScraper):
latestUrl = 'http://www.missmab.com/'
stripUrl = latestUrl + 'Comics/Vol_%s.php'
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
help = 'Index format: nnn (normally, some specials)'
class DoemainOfOurOwn(_BasicScraper): class DoemainOfOurOwn(_BasicScraper):
latestUrl = 'http://www.doemain.com/' latestUrl = 'http://www.doemain.com/'
stripUrl = latestUrl + 'index.cgi/%s' stripUrl = latestUrl + 'index.cgi/%s'

View file

@ -423,7 +423,6 @@ add('Flying_Under_the_Influence')
add('For_Your_Eyes_Only') add('For_Your_Eyes_Only')
add('Forsaken_Valor') add('Forsaken_Valor')
add('Fortress_Avalon') add('Fortress_Avalon')
add('Found_Art')
add('Four_Bats') add('Four_Bats')
add('Frame_by_Frame') add('Frame_by_Frame')
add('Frank_and_Steinway') add('Frank_and_Steinway')
@ -670,7 +669,6 @@ add('ManBoys')
add('Mario_and_Luigi_Misadventures') add('Mario_and_Luigi_Misadventures')
add('Marios_Day_Job') add('Marios_Day_Job')
add('Marital_Bliss') add('Marital_Bliss')
add('Mary_Sue_Academy')
add('Mask_of_the_Aryans') add('Mask_of_the_Aryans')
add('Master_the_Tiger') add('Master_the_Tiger')
add('Mastorism') add('Mastorism')

View file

@ -20,7 +20,7 @@ class EerieCuties(_BasicScraper):
class Eriadan(_BasicScraper): class Eriadan(_BasicScraper):
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/' latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
stripUrl = latestUrl + '%s' stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='alt=""')) imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='width="800"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)' help = 'Index format: yyyy/mm/dd/nnn (unpadded)'

View file

@ -1,28 +0,0 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import asciify
_imageSearch = compile(r'SRC="(http://www\.thefallenangel\.co\.uk/\w+comics/.+?)"')
_prevSearch = compile(r' <a href="(http://www\.thefallenangel\.co\.uk/.+?)"><img[^>]+?src="http://www\.thefallenangel\.co\.uk/images/previousday\.jpg"')
def add(name, shortname):
latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
classname = "FallenAngel_" + asciify(name)
globals()[classname] = make_scraper(classname,
latestUrl = latestUrl,
stripUrl = latestUrl + '?date=%s',
name='FallenAngel/' + name,
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help = 'Index format: yyyymmdd',
)
add('HighMaintenance', 'hm')
add('FAWK', 'fawk')
add('MalloryChan', 'mallorychan')

View file

@ -4,7 +4,7 @@
from re import compile from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
from ..util import tagre from ..util import tagre, quote
from ..helpers import bounceStarter from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')) _imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)'))
@ -23,7 +23,7 @@ def add(name, shortname):
globals()[classname] = make_scraper(classname, globals()[classname] = make_scraper(classname,
starter = bounceStarter(baseUrl + shortname, _nextSearch), starter = bounceStarter(baseUrl + shortname, _nextSearch),
name='GoComics/' + name, name='GoComics/' + name,
stripUrl=baseUrl + shortname + '/%s', stripUrl=baseUrl + quote(shortname) + '/%s',
imageSearch = _imageSearch, imageSearch = _imageSearch,
prevSearch = _prevSearch, prevSearch = _prevSearch,
help='Index format: yyyy/mm/dd', help='Index format: yyyy/mm/dd',
@ -433,7 +433,6 @@ add('Rechid', '/rechid')
add('RedMeat', '/redmeat') add('RedMeat', '/redmeat')
add('RedandRover', '/redandrover') add('RedandRover', '/redandrover')
add('ReplyAll', '/replyall') add('ReplyAll', '/replyall')
add('RichardsPoorAlmanac', '/richards-poor-almanac')
add('RipHaywire', '/riphaywire') add('RipHaywire', '/riphaywire')
add('RipleysBelieveItorNot', '/ripleysbelieveitornot') add('RipleysBelieveItorNot', '/ripleysbelieveitornot')
add('Risible', '/risible') add('Risible', '/risible')

View file

@ -16,6 +16,10 @@ _prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') +
def add(name, url): def add(name, url):
classname = 'KeenSpot_%s' % name classname = 'KeenSpot_%s' % name
if '/d/' in url:
stripUrl = url.split('/d/')[0] + '/d/%s.html'
else:
stripUrl = url + 'd/%s.html'
@classmethod @classmethod
def _prevUrlModifier(cls, prevUrl): def _prevUrlModifier(cls, prevUrl):
@ -28,7 +32,7 @@ def add(name, url):
globals()[classname] = make_scraper(classname, globals()[classname] = make_scraper(classname,
name='KeenSpot/' + name, name='KeenSpot/' + name,
latestUrl=url, latestUrl=url,
stripUrl=url + 'd/%s.html', stripUrl=stripUrl,
imageSearch = _imageSearch, imageSearch = _imageSearch,
prevSearch = _prevSearch, prevSearch = _prevSearch,
prevUrlModifier = _prevUrlModifier, prevUrlModifier = _prevUrlModifier,
@ -153,7 +157,7 @@ add('CameoComic', 'http://cameocomic.comicgenesis.com/')
add('CampAlaska', 'http://campalaska.comicgenesis.com/') add('CampAlaska', 'http://campalaska.comicgenesis.com/')
add('CampusLife', 'http://campuslife.comicgenesis.com/') add('CampusLife', 'http://campuslife.comicgenesis.com/')
add('CanYouKeepaSecret', 'http://cykas.comicgenesis.com/d/20041035.html') add('CanYouKeepaSecret', 'http://cykas.comicgenesis.com/d/20041035.html')
add('Candi', 'http://www.candicomics.com/') add('Candi', 'http://candicomics.com/')
add('CanisLupus', 'http://cheetahfox.comicgenesis.com/') add('CanisLupus', 'http://cheetahfox.comicgenesis.com/')
add('CaptainGreyhound', 'http://captaingreyhound.comicgenesis.com/') add('CaptainGreyhound', 'http://captaingreyhound.comicgenesis.com/')
add('CaptainMike', 'http://captainmike.comicgenesis.com/') add('CaptainMike', 'http://captainmike.comicgenesis.com/')
@ -195,7 +199,6 @@ add('CornerAlley13', 'http://corneralley.comicgenesis.com/d/20101010.html')
add('CorporateLife', 'http://amacher.comicgenesis.com/') add('CorporateLife', 'http://amacher.comicgenesis.com/')
add('CosmicAwareness', 'http://cosmicawareness.comicgenesis.com/') add('CosmicAwareness', 'http://cosmicawareness.comicgenesis.com/')
add('CosmicDrift', 'http://cosmicdrift.comicgenesis.com/') add('CosmicDrift', 'http://cosmicdrift.comicgenesis.com/')
add('Countyoursheep', 'http://countyoursheep.keenspot.com/')
add('CracklingSilence', 'http://crackling.comicgenesis.com/') add('CracklingSilence', 'http://crackling.comicgenesis.com/')
add('Crackwalker', 'http://crackwalker.comicgenesis.com/') add('Crackwalker', 'http://crackwalker.comicgenesis.com/')
add('CreepyHead', 'http://creepyhead.comicgenesis.com/') add('CreepyHead', 'http://creepyhead.comicgenesis.com/')
@ -241,7 +244,6 @@ add('DoomedUntoEternalVigilanceForever', 'http://duevf.comicgenesis.com/')
add('DormSweetDorm', 'http://dormsweetdorm.comicgenesis.com/') add('DormSweetDorm', 'http://dormsweetdorm.comicgenesis.com/')
add('DoubleyouTeeEff', 'http://doubleyouteeeff.comicgenesis.com/') add('DoubleyouTeeEff', 'http://doubleyouteeeff.comicgenesis.com/')
add('Downscale', 'http://downscale.comicgenesis.com/') add('Downscale', 'http://downscale.comicgenesis.com/')
add('DownwardBound', 'http://downwardbound.comicgenesis.com/')
add('Dragon27sBane', 'http://jasonwhitewaterz.comicgenesis.com/') add('Dragon27sBane', 'http://jasonwhitewaterz.comicgenesis.com/')
add('DragonBallTM', 'http://dragonballtm.comicgenesis.com/') add('DragonBallTM', 'http://dragonballtm.comicgenesis.com/')
add('DragonBoy', 'http://dragonboy.comicgenesis.com/') add('DragonBoy', 'http://dragonboy.comicgenesis.com/')
@ -282,7 +284,6 @@ add('EvilWenchesIncorporated', 'http://evilwenchesinc.comicgenesis.com/')
add('EyeoftheMonkey', 'http://eyeofthemonkey.comicgenesis.com/') add('EyeoftheMonkey', 'http://eyeofthemonkey.comicgenesis.com/')
add('Ezailia', 'http://ezailia.comicgenesis.com/') add('Ezailia', 'http://ezailia.comicgenesis.com/')
add('Faces', 'http://faces.comicgenesis.com/') add('Faces', 'http://faces.comicgenesis.com/')
add('FairestandFallen', 'http://fairestandfallen.comicgenesis.com/')
add('FakingSanity', 'http://fakingsanity.comicgenesis.com/') add('FakingSanity', 'http://fakingsanity.comicgenesis.com/')
add('FalseGods', 'http://falsegod.comicgenesis.com/') add('FalseGods', 'http://falsegod.comicgenesis.com/')
add('FancyThat', 'http://fancythat.comicgenesis.com/') add('FancyThat', 'http://fancythat.comicgenesis.com/')
@ -303,7 +304,6 @@ add('FireflyCross', 'http://fireflycross.comicgenesis.com/')
add('FiveMinuteComic', 'http://fiveminute.comicgenesis.com/') add('FiveMinuteComic', 'http://fiveminute.comicgenesis.com/')
add('Fizzle', 'http://fizzle.comicgenesis.com/') add('Fizzle', 'http://fizzle.comicgenesis.com/')
add('FlinchandFriends', 'http://flinch.comicgenesis.com/') add('FlinchandFriends', 'http://flinch.comicgenesis.com/')
add('FlipandSplog', 'http://fas.comicgenesis.com/')
add('Flounderville', 'http://flounderville.comicgenesis.com/') add('Flounderville', 'http://flounderville.comicgenesis.com/')
add('FloydCartoons', 'http://floydcartoons.comicgenesis.com/') add('FloydCartoons', 'http://floydcartoons.comicgenesis.com/')
add('Flunkies', 'http://flunkies.comicgenesis.com/') add('Flunkies', 'http://flunkies.comicgenesis.com/')
@ -320,11 +320,10 @@ add('FourDays', 'http://fourdays.comicgenesis.com/')
add('Fourboys', 'http://fourboys.comicgenesis.com/') add('Fourboys', 'http://fourboys.comicgenesis.com/')
add('Fox27sFreakyAdventures', 'http://basa.comicgenesis.com/') add('Fox27sFreakyAdventures', 'http://basa.comicgenesis.com/')
add('FoxTails', 'http://foxtails.comicgenesis.com/') add('FoxTails', 'http://foxtails.comicgenesis.com/')
add('Framed', 'http://framed.comicgenesis.com/') add('FreakU', 'http://freaku.comicgenesis.com/d/20080827.html')
add('FreakU', 'http://freaku.comicgenesis.com//d/20080827.html')
add('FreaksandG33k', 'http://freaksandgeeks.comicgenesis.com/') add('FreaksandG33k', 'http://freaksandgeeks.comicgenesis.com/')
add('FredtheDot', 'http://fredthedot.comicgenesis.com/') add('FredtheDot', 'http://fredthedot.comicgenesis.com/')
add('FreeParking', 'http://freeparking.comicgenesis.com//d/20051029.html') add('FreeParking', 'http://freeparking.comicgenesis.com/d/20051029.html')
add('FromTheAntiCulture', 'http://anticulture.comicgenesis.com/') add('FromTheAntiCulture', 'http://anticulture.comicgenesis.com/')
add('FromTheMargin', 'http://fromthemargin.comicgenesis.com/') add('FromTheMargin', 'http://fromthemargin.comicgenesis.com/')
add('FruitFlies', 'http://fruitflies.comicgenesis.com/') add('FruitFlies', 'http://fruitflies.comicgenesis.com/')
@ -337,7 +336,6 @@ add('GambitasBishounen', 'http://snigepippi.comicgenesis.com/')
add('GameJumpers', 'http://gamejumpers.comicgenesis.com/') add('GameJumpers', 'http://gamejumpers.comicgenesis.com/')
add('GameMisconduct', 'http://gmhockey.comicgenesis.com/') add('GameMisconduct', 'http://gmhockey.comicgenesis.com/')
add('Gameboy', 'http://gameboy.comicgenesis.com/') add('Gameboy', 'http://gameboy.comicgenesis.com/')
add('GamerPsychotica', 'http://gp.comicgenesis.com/d/20060113.html')
add('GamersParadox', 'http://gamersparadox.comicgenesis.com/') add('GamersParadox', 'http://gamersparadox.comicgenesis.com/')
add('GamingGuardians', 'http://gamingguardians.comicgenesis.com/') add('GamingGuardians', 'http://gamingguardians.comicgenesis.com/')
add('GamingReality', 'http://gamingreality.comicgenesis.com/') add('GamingReality', 'http://gamingreality.comicgenesis.com/')
@ -361,13 +359,11 @@ add('GorgeousPrincessCreamyBeamy', 'http://creamybeamy.comicgenesis.com/')
add('GothyMcGee', 'http://gothymcgee.comicgenesis.com/') add('GothyMcGee', 'http://gothymcgee.comicgenesis.com/')
add('GratuitousMangaStyle', 'http://mangastyle.comicgenesis.com/') add('GratuitousMangaStyle', 'http://mangastyle.comicgenesis.com/')
add('GraveyardShift', 'http://graveyardshift.comicgenesis.com/') add('GraveyardShift', 'http://graveyardshift.comicgenesis.com/')
add('Gravity', 'http://gravity.comicgenesis.com/')
add('GreenLightGo', 'http://glg.comicgenesis.com/') add('GreenLightGo', 'http://glg.comicgenesis.com/')
add('GroundFloor', 'http://groundfloor.comicgenesis.com/') add('GroundFloor', 'http://groundfloor.comicgenesis.com/')
add('Grumpythefathamster', 'http://burp.comicgenesis.com/') add('Grumpythefathamster', 'http://burp.comicgenesis.com/')
add('GuiShinTaeChiAKAGhostHunter', 'http://ghosthunter.comicgenesis.com/') add('GuiShinTaeChiAKAGhostHunter', 'http://ghosthunter.comicgenesis.com/')
add('HERZBLUT', 'http://herzblut.comicgenesis.com/') add('HERZBLUT', 'http://herzblut.comicgenesis.com/')
add('HalflightBreaking', 'http://halflight.comicgenesis.com/d/20021031.html')
add('HangingAround', 'http://hangingaround.comicgenesis.com/') add('HangingAround', 'http://hangingaround.comicgenesis.com/')
add('Hans', 'http://hans.comicgenesis.com/') add('Hans', 'http://hans.comicgenesis.com/')
add('HaypennyRag', 'http://haypenny.comicgenesis.com/') add('HaypennyRag', 'http://haypenny.comicgenesis.com/')
@ -412,7 +408,6 @@ add('InappropriateIrving', 'http://irving.comicgenesis.com/')
add('InfiniteSouls', 'http://keyoko.comicgenesis.com/') add('InfiniteSouls', 'http://keyoko.comicgenesis.com/')
add('InkyorShaggy', 'http://inkyorshaggy.comicgenesis.com/') add('InkyorShaggy', 'http://inkyorshaggy.comicgenesis.com/')
add('IntergalacticSpaceSheriffs', 'http://spacesheriffs.comicgenesis.com/') add('IntergalacticSpaceSheriffs', 'http://spacesheriffs.comicgenesis.com/')
add('Inverloch', 'http://inverloch.comicgenesis.com/')
add('IpsoFacto', 'http://ipsofactocomic.comicgenesis.com/') add('IpsoFacto', 'http://ipsofactocomic.comicgenesis.com/')
add('ItHurtsToBeThatStupid', 'http://ihtbts.comicgenesis.com/') add('ItHurtsToBeThatStupid', 'http://ihtbts.comicgenesis.com/')
add('ItsGravy', 'http://itsgravy.comicgenesis.com/') add('ItsGravy', 'http://itsgravy.comicgenesis.com/')
@ -470,7 +465,6 @@ add('LifeGoesOn', 'http://lgo.comicgenesis.com/')
add('LifeinBellCounty', 'http://bellcountylife.comicgenesis.com/') add('LifeinBellCounty', 'http://bellcountylife.comicgenesis.com/')
add('LifeisUnfair', 'http://lifeisunfair.comicgenesis.com/') add('LifeisUnfair', 'http://lifeisunfair.comicgenesis.com/')
add('LifeofBuddha', 'http://lifeofbuddha.comicgenesis.com/') add('LifeofBuddha', 'http://lifeofbuddha.comicgenesis.com/')
add('LifeonForbez', 'http://cdc.comicgenesis.com/')
add('Lightbringer', 'http://lightbringer.comicgenesis.com/') add('Lightbringer', 'http://lightbringer.comicgenesis.com/')
add('LikeItIs', 'http://likeitis.comicgenesis.com/') add('LikeItIs', 'http://likeitis.comicgenesis.com/')
add('LilDude', 'http://lildudecomics.comicgenesis.com/') add('LilDude', 'http://lildudecomics.comicgenesis.com/')
@ -492,7 +486,6 @@ add('LustForFreelance', 'http://imabubble.comicgenesis.com/')
add('MEHComics', 'http://mehcomics.comicgenesis.com/') add('MEHComics', 'http://mehcomics.comicgenesis.com/')
add('MORONS', 'http://morons.comicgenesis.com/') add('MORONS', 'http://morons.comicgenesis.com/')
add('MTranc3', 'http://mtranc3.comicgenesis.com/') add('MTranc3', 'http://mtranc3.comicgenesis.com/')
add('MacHall', 'http://machall.comicgenesis.com/d/20020125.html')
add('Maddland', 'http://maddland.comicgenesis.com/') add('Maddland', 'http://maddland.comicgenesis.com/')
add('MadeInHeaven', 'http://mih.comicgenesis.com/') add('MadeInHeaven', 'http://mih.comicgenesis.com/')
add('MagiIndustries', 'http://magiindustries.comicgenesis.com/') add('MagiIndustries', 'http://magiindustries.comicgenesis.com/')
@ -512,7 +505,6 @@ add('MelEverymanAndHisSarcasticTalkingHousepetAmbrose', 'http://everyman.comicge
add('MenschunsererZeitGerman', 'http://muz.comicgenesis.com/') add('MenschunsererZeitGerman', 'http://muz.comicgenesis.com/')
add('Midcentral', 'http://midcentral.comicgenesis.com/') add('Midcentral', 'http://midcentral.comicgenesis.com/')
add('MiketheMulletThing', 'http://mikethemulletthing.comicgenesis.com/') add('MiketheMulletThing', 'http://mikethemulletthing.comicgenesis.com/')
add('Mindmistress', 'http://mindmistress.comicgenesis.com/')
add('Mindtap', 'http://mindtap.comicgenesis.com/') add('Mindtap', 'http://mindtap.comicgenesis.com/')
add('MinimalismSucks', 'http://minisuck.comicgenesis.com/') add('MinimalismSucks', 'http://minisuck.comicgenesis.com/')
add('MinimumSecurityUniversity', 'http://mshs.comicgenesis.com/') add('MinimumSecurityUniversity', 'http://mshs.comicgenesis.com/')
@ -526,7 +518,6 @@ add('MorysEducation', 'http://mory.comicgenesis.com/')
add('MrBoffleandFriends', 'http://mrboffle.comicgenesis.com/') add('MrBoffleandFriends', 'http://mrboffle.comicgenesis.com/')
add('MrBubbles', 'http://profound.comicgenesis.com/') add('MrBubbles', 'http://profound.comicgenesis.com/')
add('MrFooAdventures', 'http://foo.comicgenesis.com/') add('MrFooAdventures', 'http://foo.comicgenesis.com/')
add('MrPinkBlob', 'http://mrpinkblob.comicgenesis.com/d/100.html')
add('MrScience', 'http://mrscience.comicgenesis.com/') add('MrScience', 'http://mrscience.comicgenesis.com/')
add('Muertitos', 'http://muertitos.comicgenesis.com/') add('Muertitos', 'http://muertitos.comicgenesis.com/')
add('Muffythelitlerabbit', 'http://muffyrabbit.comicgenesis.com/') add('Muffythelitlerabbit', 'http://muffyrabbit.comicgenesis.com/')
@ -545,7 +536,6 @@ add('NastyChocolates', 'http://nastychocolates.comicgenesis.com/')
add('NeTrek', 'http://netrek.comicgenesis.com/') add('NeTrek', 'http://netrek.comicgenesis.com/')
add('NeedleandThread', 'http://needleandthread.comicgenesis.com/') add('NeedleandThread', 'http://needleandthread.comicgenesis.com/')
add('NekkoandJoruba', 'http://nekkoandjoruba.comicgenesis.com/d/20050816.html') add('NekkoandJoruba', 'http://nekkoandjoruba.comicgenesis.com/d/20050816.html')
add('NekoTheKitty', 'http://nekothekitty.comicgenesis.com/')
add('Nekotime', 'http://nekotime.comicgenesis.com/') add('Nekotime', 'http://nekotime.comicgenesis.com/')
add('Netjeru', 'http://netjeru.comicgenesis.com/') add('Netjeru', 'http://netjeru.comicgenesis.com/')
add('NeverYouMind', 'http://neveryoumind.comicgenesis.com/') add('NeverYouMind', 'http://neveryoumind.comicgenesis.com/')
@ -896,7 +886,6 @@ add('Unconventional', 'http://unconventional.comicgenesis.com/')
add('UnfamiliarReflection', 'http://emri.comicgenesis.com/') add('UnfamiliarReflection', 'http://emri.comicgenesis.com/')
add('UnlifeOnline', 'http://unlifeonline.comicgenesis.com/') add('UnlifeOnline', 'http://unlifeonline.comicgenesis.com/')
add('UnseenFate', 'http://unseenfate.comicgenesis.com/') add('UnseenFate', 'http://unseenfate.comicgenesis.com/')
add('Untitled', 'http://untitled.comicgenesis.com/')
add('UntitledAgain', 'http://untitledagain.comicgenesis.com/') add('UntitledAgain', 'http://untitledagain.comicgenesis.com/')
add('UrbanFable', 'http://urbanfable.comicgenesis.com/') add('UrbanFable', 'http://urbanfable.comicgenesis.com/')
add('VRPG', 'http://vrpg.comicgenesis.com/') add('VRPG', 'http://vrpg.comicgenesis.com/')
@ -993,7 +982,6 @@ add('silvette', 'http://silvette.comicgenesis.com/')
add('skimlinescomAcollectionofthings', 'http://www.skimlines.com/') add('skimlinescomAcollectionofthings', 'http://www.skimlines.com/')
add('smut', 'http://smut.comicgenesis.com/') add('smut', 'http://smut.comicgenesis.com/')
add('socializedmedicine', 'http://socializedmedicine.comicgenesis.com/') add('socializedmedicine', 'http://socializedmedicine.comicgenesis.com/')
add('spacejams', 'http://spacejams.comicgenesis.com/d/20020820.html')
add('spiderfrogballoon', 'http://spiderfrogballoon.comicgenesis.com/') add('spiderfrogballoon', 'http://spiderfrogballoon.comicgenesis.com/')
add('theadventuresofmegamanandlink', 'http://takeru.comicgenesis.com/') add('theadventuresofmegamanandlink', 'http://takeru.comicgenesis.com/')
add('theendofthings', 'http://endofthings.comicgenesis.com/') add('theendofthings', 'http://endofthings.comicgenesis.com/')

View file

@ -27,6 +27,7 @@ class NeoEarth(_BasicScraper):
class NewAdventuresOfBobbin(_BasicScraper): class NewAdventuresOfBobbin(_BasicScraper):
latestUrl = 'http://www.bobbin-comic.com/bobbin_strips/' latestUrl = 'http://www.bobbin-comic.com/bobbin_strips/'
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)')) imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
multipleImagesPerStrip = True
prevSearch = None prevSearch = None
help = 'Index format: none' help = 'Index format: none'

View file

@ -35,7 +35,7 @@ class OnTheEdge(_BasicScraper):
class OneQuestion(_BasicScraper): class OneQuestion(_BasicScraper):
latestUrl = 'http://onequestioncomic.com/' latestUrl = 'http://www.onequestioncomic.com/'
stripUrl = latestUrl + 'comic.php?strip_id=%s' stripUrl = latestUrl + 'comic.php?strip_id=%s'
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)')) imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg')) prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))

View file

@ -162,7 +162,3 @@ class PlanescapeSurvival(_BasicScraper):
imageSearch = compile(r'src="(comics/.+?)"') imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ') prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
help = 'Index format: nnn' help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]

View file

@ -10,8 +10,8 @@ from ..util import tagre
class QuestionableContent(_BasicScraper): class QuestionableContent(_BasicScraper):
latestUrl = 'http://www.questionablecontent.net/' latestUrl = 'http://www.questionablecontent.net/'
stripUrl = latestUrl + 'view.php?comic=%s' stripUrl = latestUrl + 'view.php?comic=%s'
imageSearch = compile(r'/(comics/\d+\.png)"') imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)', before="strip"))
prevSearch = compile(r'<a href="(view.php\?comic=\d+)">Previous') prevSearch = compile(tagre("a", "href", r'(view\.php\?comic=\d+)') + 'Previous')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'

View file

@ -149,10 +149,6 @@ class SomethingPositive(_BasicScraper):
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)") "(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
help = 'Index format: mmddyyyy' help = 'Index format: mmddyyyy'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
class SexyLosers(_BasicScraper): class SexyLosers(_BasicScraper):
stripUrl = 'http://www.sexylosers.com/%s.html' stripUrl = 'http://www.sexylosers.com/%s.html'

View file

@ -16,15 +16,17 @@ class TheNoob(_BasicScraper):
help = 'Index format: nnnn' help = 'Index format: nnnn'
class TheOrderOfTheStick(_BasicScraper): class TheOrderOfTheStick(_BasicScraper):
latestUrl = 'http://www.giantitp.com/comics/oots0863.html' latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
stripUrl = 'http://www.giantitp.com/comics/oots%s.html' stripUrl = 'http://www.giantitp.com/comics/oots%s.html'
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">') imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"') prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"')) starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"'))
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/', 1)[-1][:-5]
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
@ -36,7 +38,6 @@ class TheParkingLotIsFull(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class TheWotch(_BasicScraper): class TheWotch(_BasicScraper):
latestUrl = 'http://www.thewotch.com/' latestUrl = 'http://www.thewotch.com/'
stripUrl = latestUrl + '?date=%s' stripUrl = latestUrl + '?date=%s'

View file

@ -29,7 +29,7 @@ class UnicornJelly(_BasicScraper):
class UserFriendly(_BasicScraper): class UserFriendly(_BasicScraper):
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">')) starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic' stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
imageSearch = compile(r'<img border="0" src="(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"') imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">') prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'

View file

@ -35,3 +35,7 @@ class ViiviJaWagner(_BasicScraper):
imageSearch = compile(tagre("link", "href", r'(http://hs\d+\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src")) imageSearch = compile(tagre("link", "href", r'(http://hs\d+\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm")) prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm"))
help = 'Index format: none' help = 'Index format: none'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('=')[1]

View file

@ -0,0 +1,26 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre
_prevSearch = compile(tagre("a", "href", r'(\?id=\d+)') + tagre("img", "src", r'images/navi-zurueck\.gif'))
_imageSearch = compile(tagre("img", "src", r'([^"]+/img/comic/[^"]+)', after="comicimg"))
def add(name, shortname):
latestUrl = 'http://%s.webcomic.eu/' % shortname
classname = 'WebcomicEu_%s' % name
globals()[classname] = make_scraper(classname,
name = 'WebcomicEu/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '?id=%s',
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help = 'Index format: number',
)
add('TheBessEffect', 'thebesseffect')
add('TheBessEffectEnglish', 'tbe-english')
add('Talandor', 'talandor')

View file

@ -28,5 +28,4 @@ def add(name, subpath):
add('AgnesQuill', 'daveroman/agnes/') add('AgnesQuill', 'daveroman/agnes/')
add('MyMuse', 'gc/muse/') add('MyMuse', 'gc/muse/')
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/') add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
add('JaxEpoch', 'johngreen/quicken/')
add('ClownSamurai', 'qsamurai/clownsamurai/') add('ClownSamurai', 'qsamurai/clownsamurai/')

View file

@ -8,7 +8,7 @@ from ..scraper import make_scraper
from ..helpers import bounceStarter from ..helpers import bounceStarter
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+)')) _imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+/comics/[^"]+)'))
_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous') _prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous')
_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next') _nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next')

View file

@ -8,6 +8,14 @@ from ..util import tagre
from ..helpers import bounceStarter from ..helpers import bounceStarter
class ZapComic(_BasicScraper):
latestUrl = 'http://www.zapcomic.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.zapcomic\.com\?comic_object=\d+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.zapcomic\.com/[^"]+)', after="previous-comic-link"))
help = 'Index format: yyyy/mm/nnn-stripname'
class Zapiro(_BasicScraper): class Zapiro(_BasicScraper):
baseUrl = 'http://www.mg.co.za/zapiro/' baseUrl = 'http://www.mg.co.za/zapiro/'
starter = bounceStarter(baseUrl, starter = bounceStarter(baseUrl,
@ -23,6 +31,14 @@ class Zapiro(_BasicScraper):
return name return name
class ZebraGirl(_BasicScraper):
latestUrl = 'http://www.zebragirl.net/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
prevSearch = compile(tagre("link", "href", r"(/\?date=[^']+)", quote="'", before='Previous'))
help = 'Index format: yyyy-mm-dd'
class ZombieHunters(_BasicScraper): class ZombieHunters(_BasicScraper):
latestUrl = 'http://www.thezombiehunters.com/' latestUrl = 'http://www.thezombiehunters.com/'
stripUrl = latestUrl + '?strip_id=%s' stripUrl = latestUrl + '?strip_id=%s'

View file

@ -43,7 +43,7 @@ class _BasicScraper(object):
msg = 'Retrieving the current strip' msg = 'Retrieving the current strip'
if self.indexes: if self.indexes:
msg += " for indexes %s" % self.indexes msg += " for indexes %s" % self.indexes
out.write(msg+"...") out.info(msg+"...")
if self.indexes: if self.indexes:
for index in self.indexes: for index in self.indexes:
url = self.stripUrl % index url = self.stripUrl % index
@ -55,40 +55,48 @@ class _BasicScraper(object):
"""Get comic strip for given URL.""" """Get comic strip for given URL."""
imageUrls = fetchUrls(url, self.imageSearch)[0] imageUrls = fetchUrls(url, self.imageSearch)[0]
if len(imageUrls) > 1 and not self.multipleImagesPerStrip: if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
raise ValueError("found %d images with %s" % (len(imageUrls), self.imageSearch.pattern)) out.warn("found %d images instead of 1 with %s" % (len(imageUrls), self.imageSearch.pattern))
return self.getComicStrip(url, imageUrls) return self.getComicStrip(url, imageUrls)
def getComicStrip(self, url, imageUrls): def getComicStrip(self, url, imageUrls):
"""Get comic strip downloader for given URL and images.""" """Get comic strip downloader for given URL and images."""
return ComicStrip(self.get_name(), url, imageUrls, self.namer) return ComicStrip(self.get_name(), url, imageUrls, self.namer)
def getAllStrips(self): def getAllStrips(self, maxstrips=None):
"""Get all comic strips.""" """Get all comic strips."""
msg = 'Retrieving all strips' if maxstrips:
if self.indexes: msg = 'Retrieving %d strips' % maxstrips
msg += " for indexes %s" % self.indexes elif self.indexes:
out.write(msg+"...") msg += "Retrieving %d strips for indexes %s" % (len(self.indexes), self.indexes)
else:
msg = 'Retrieving all strips'
out.info(msg+"...")
if self.indexes: if self.indexes:
for index in self.indexes: for index in self.indexes:
url = self.stripUrl % index url = self.stripUrl % index
for strip in self.getAllStripsFor(url): for strip in self.getStripsFor(url, 1):
yield strip yield strip
else: else:
url = self.getLatestUrl() url = self.getLatestUrl()
for strip in self.getAllStripsFor(url): for strip in self.getStripsFor(url, maxstrips):
yield strip yield strip
def getAllStripsFor(self, url): def getStripsFor(self, url, maxstrips):
"""Get all comic strips for an URL.""" """Get comic strips for an URL. If maxstrips is a positive number, stop after
retrieving the given number of strips."""
seen_urls = set() seen_urls = set()
while url: while url:
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch) imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
prevUrl = self.prevUrlModifier(prevUrl) prevUrl = self.prevUrlModifier(prevUrl)
out.write("Matched previous URL %s" % prevUrl, 2) out.debug("Matched previous URL %s" % prevUrl)
seen_urls.add(url) seen_urls.add(url)
yield self.getComicStrip(url, imageUrls) yield self.getComicStrip(url, imageUrls)
# avoid recursive URL loops # avoid recursive URL loops
url = prevUrl if prevUrl not in seen_urls else None url = prevUrl if prevUrl not in seen_urls else None
if maxstrips is not None:
maxstrips -= 1
if maxstrips <= 0:
break
def setStrip(self, index): def setStrip(self, index):
"""Set current comic strip URL.""" """Set current comic strip URL."""
@ -161,13 +169,13 @@ def get_scrapers():
""" """
global _scrapers global _scrapers
if _scrapers is None: if _scrapers is None:
out.write("Loading comic modules...", 2) out.debug("Loading comic modules...")
modules = loader.get_modules() modules = loader.get_modules()
plugins = loader.get_plugins(modules, _BasicScraper) plugins = loader.get_plugins(modules, _BasicScraper)
_scrapers = list(plugins) _scrapers = list(plugins)
_scrapers.sort(key=lambda s: s.get_name()) _scrapers.sort(key=lambda s: s.get_name())
check_scrapers() check_scrapers()
out.write("... %d modules loaded." % len(_scrapers), 2) out.debug("... %d modules loaded." % len(_scrapers))
return _scrapers return _scrapers

View file

@ -27,6 +27,8 @@ MaxContentBytes = 1024 * 1024 * 2 # 2 MB
# Maximum content size for images # Maximum content size for images
MaxImageBytes = 1024 * 1024 * 20 # 20 MB MaxImageBytes = 1024 * 1024 * 20 # 20 MB
# Default connection timeout
ConnectionTimeoutSecs = 60
def tagre(tag, attribute, value, quote='"', before="", after=""): def tagre(tag, attribute, value, quote='"', before="", after=""):
"""Return a regular expression matching the given HTML tag, attribute """Return a regular expression matching the given HTML tag, attribute
@ -102,7 +104,7 @@ def fetchUrl(url, urlSearch):
searchUrl = match.group(1) searchUrl = match.group(1)
if not searchUrl: if not searchUrl:
raise ValueError("Match empty URL at %s with pattern %s" % (url, urlSearch.pattern)) raise ValueError("Match empty URL at %s with pattern %s" % (url, urlSearch.pattern))
out.write('matched URL %r' % searchUrl, 2) out.debug('matched URL %r' % searchUrl)
return normaliseURL(urlparse.urljoin(baseUrl, searchUrl)) return normaliseURL(urlparse.urljoin(baseUrl, searchUrl))
return None return None
@ -115,10 +117,10 @@ def fetchUrls(url, imageSearch, prevSearch=None):
imageUrl = match.group(1) imageUrl = match.group(1)
if not imageUrl: if not imageUrl:
raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern)) raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern))
out.write('matched image URL %r with pattern %s' % (imageUrl, imageSearch.pattern), 2) out.debug('matched image URL %r with pattern %s' % (imageUrl, imageSearch.pattern))
imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl))) imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl)))
if not imageUrls: if not imageUrls:
out.write("warning: no images found at %s with pattern %s" % (url, imageSearch.pattern)) out.warn("no images found at %s with pattern %s" % (url, imageSearch.pattern))
if prevSearch is not None: if prevSearch is not None:
# match previous URL # match previous URL
match = prevSearch.search(data) match = prevSearch.search(data)
@ -128,7 +130,7 @@ def fetchUrls(url, imageSearch, prevSearch=None):
raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern)) raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern))
prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl)) prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl))
else: else:
out.write('no previous URL %s at %s' % (prevSearch.pattern, url), 2) out.debug('no previous URL %s at %s' % (prevSearch.pattern, url))
prevUrl = None prevUrl = None
return imageUrls, prevUrl return imageUrls, prevUrl
return imageUrls, None return imageUrls, None
@ -183,8 +185,9 @@ def normaliseURL(url):
return urlparse.urlunparse(pu) return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_bytes=None): def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_bytes=None,
out.write('Open URL %s' % url, 2) timeout=ConnectionTimeoutSecs):
out.debug('Open URL %s' % url)
assert retries >= 0, 'invalid retry value %r' % retries assert retries >= 0, 'invalid retry value %r' % retries
assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
headers = {'User-Agent': UserAgent} headers = {'User-Agent': UserAgent}
@ -192,13 +195,12 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_byt
if referrer: if referrer:
headers['Referer'] = referrer headers['Referer'] = referrer
try: try:
req = requests.get(url, headers=headers, config=config, prefetch=False) req = requests.get(url, headers=headers, config=config, prefetch=False, timeout=timeout)
check_content_size(url, req.headers, max_content_bytes) check_content_size(url, req.headers, max_content_bytes)
req.raise_for_status() req.raise_for_status()
return req return req
except requests.exceptions.RequestException as err: except requests.exceptions.RequestException as err:
msg = 'URL retrieval of %s failed: %s' % (url, err) msg = 'URL retrieval of %s failed: %s' % (url, err)
out.write(msg)
raise IOError(msg) raise IOError(msg)
def check_content_size(url, headers, max_content_bytes): def check_content_size(url, headers, max_content_bytes):
@ -251,7 +253,7 @@ def getRelativePath(basepath, path):
def getQueryParams(url): def getQueryParams(url):
query = urlparse.urlsplit(url)[3] query = urlparse.urlsplit(url)[3]
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3) out.debug('Extracting query parameters from %r (%r)...' % (url, query))
return cgi.parse_qs(query) return cgi.parse_qs(query)
@ -334,10 +336,16 @@ def asciify(name):
def unquote(text): def unquote(text):
while '%' in text: while '%' in text:
text = urllib.unquote(text) newtext = urllib.unquote(text)
if newtext == text:
break
text = newtext
return text return text
def quote(text):
return urllib.quote(text)
def strsize (b): def strsize (b):
"""Return human representation of bytes b. A negative number of bytes """Return human representation of bytes b. A negative number of bytes
raises a value error.""" raises a value error."""
@ -357,3 +365,20 @@ def strsize (b):
return "%.2fGB" % (float(b) / (1024*1024*1024)) return "%.2fGB" % (float(b) / (1024*1024*1024))
return "%.1fGB" % (float(b) / (1024*1024*1024)) return "%.1fGB" % (float(b) / (1024*1024*1024))
def getDirname(name):
"""Replace slashes with path separator of name."""
return name.replace('/', os.sep)
def getFilename(name):
# first replace all illegal chars
name = re.sub(r"[^0-9a-zA-Z_\-\.]", "_", name)
# then remove double dots and underscores
while ".." in name:
name = name.replace('..', '.')
while "__" in name:
name = name.replace('__', '_')
# remove a leading dot or minus
if name.startswith((".", "-")):
name = name[1:]
return name

View file

@ -47,6 +47,7 @@ exclude_comics = [
"Emerald_Winter", # broken images "Emerald_Winter", # broken images
"Enter_the_Duck_2", # broken images "Enter_the_Duck_2", # broken images
"ffff", # broken images "ffff", # broken images
"Found_Art", # broken images
"Function_Over_Fashion", # broken images "Function_Over_Fashion", # broken images
"Funday_Morning", # broken images "Funday_Morning", # broken images
"greys_journey", # broken images "greys_journey", # broken images
@ -69,6 +70,7 @@ exclude_comics = [
"Louder_Than_Bombs", # broken images "Louder_Than_Bombs", # broken images
"Lucky_Dawg", # broken images "Lucky_Dawg", # broken images
"Mario_in_Johto", # broken images "Mario_in_Johto", # broken images
"Mary_Sue_Academy", # borken images
"Master", # start page requires login "Master", # start page requires login
"Mastermind_BTRN", # broken images "Mastermind_BTRN", # broken images
"MAYA_____The_legend_of_Wolf", # broken images "MAYA_____The_legend_of_Wolf", # broken images

View file

@ -35,6 +35,7 @@ exclude_comics = [
"OysterWar", # too few comics "OysterWar", # too few comics
"PIGTIMES", # comic unavailable "PIGTIMES", # comic unavailable
"PS", # comic unavailable "PS", # comic unavailable
"RichardsPoorAlmanac", # missing images
"SherpaAid", # comic unavailable "SherpaAid", # comic unavailable
"SparComics", # comic unavailable "SparComics", # comic unavailable
] ]

File diff suppressed because one or more lines are too long

View file

@ -72,6 +72,7 @@ exclude_comics = [
"ComicMischief", # page moved "ComicMischief", # page moved
"ComputerGameAddicts", # page moved "ComputerGameAddicts", # page moved
"Concession", # page moved "Concession", # page moved
"Countyoursheep", # broken links
"CorridorZ", # page does not follow standard layout "CorridorZ", # page does not follow standard layout
"CrashBoomMagic", # page moved "CrashBoomMagic", # page moved
"CrazySlowlyGoing", # page has 403 forbidden "CrazySlowlyGoing", # page has 403 forbidden
@ -85,6 +86,7 @@ exclude_comics = [
"DimBulbComics", # page is gone "DimBulbComics", # page is gone
"DIVE", # page is gone "DIVE", # page is gone
"DominicDeegan", # page moved "DominicDeegan", # page moved
"DownwardBound", # page does not follow standard layout
"DungeonDamage", # page does not follow standard layout "DungeonDamage", # page does not follow standard layout
"Dylan", # page has 403 forbidden "Dylan", # page has 403 forbidden
"EarthRiser", # redirects to a new page "EarthRiser", # redirects to a new page
@ -99,6 +101,7 @@ exclude_comics = [
"Evilish", # page moved "Evilish", # page moved
"EvolBara", # page is gone "EvolBara", # page is gone
"FaerieTales", # page does not follow standard layout "FaerieTales", # page does not follow standard layout
"FairestandFallen", # page does not follow standard layout
"FairyTaleNewVillage", # missing images "FairyTaleNewVillage", # missing images
"Fate27sTear", # page moved "Fate27sTear", # page moved
"FaultyLogic", # page does not follow standard layout "FaultyLogic", # page does not follow standard layout
@ -107,9 +110,12 @@ exclude_comics = [
"Flatwood", # page moved "Flatwood", # page moved
"FLEMComics", # page moved "FLEMComics", # page moved
"FletchersCave", # page is broken "FletchersCave", # page is broken
"FlipandSplog", # page does not follow standard layout
"ForcesofGoodandEvil", # page does not follow standard layout "ForcesofGoodandEvil", # page does not follow standard layout
"Framed", # page does not follow standard layout
"FurryBlackDevil", # page moved "FurryBlackDevil", # page moved
"Galacticus", # page has 403 forbidden "Galacticus", # page has 403 forbidden
"GamerPsychotica", # page does not follow standard layout
"GeebasonParade", # page does not follow standard layout "GeebasonParade", # page does not follow standard layout
"geeks", # page moved "geeks", # page moved
"GeminiBright", # page does not follow standard layout "GeminiBright", # page does not follow standard layout
@ -119,9 +125,11 @@ exclude_comics = [
"GODLIKE", # page has 403 forbidden "GODLIKE", # page has 403 forbidden
"GoForIt", # page is gone "GoForIt", # page is gone
"GothBoy", # page moved "GothBoy", # page moved
"Gravity", # page does not follow standard layout
"Grimage", # page moved "Grimage", # page moved
"GrossePointeDogs", # page is broken "GrossePointeDogs", # page is broken
"GUComics", # page moved "GUComics", # page moved
"HalflightBreaking", # page does not follow standard layout
"HardUnderbelly", # page does not follow standard layout "HardUnderbelly", # page does not follow standard layout
"HazardousScience", # page is gone "HazardousScience", # page is gone
"HereThereBeDragons", # page moved "HereThereBeDragons", # page moved
@ -138,6 +146,7 @@ exclude_comics = [
"InsideJoke", # page is gone "InsideJoke", # page is gone
"InsidetheBox", # page has 403 forbidden "InsidetheBox", # page has 403 forbidden
"InternationalHopeFoundation", # page does not follow standard layout "InternationalHopeFoundation", # page does not follow standard layout
"Inverloch", # page does not follow standard layout
"JamieandNick", # page moved "JamieandNick", # page moved
"JasonLovesHisGrandpa", # page is gone "JasonLovesHisGrandpa", # page is gone
"JavanteasFate", # page is gone "JavanteasFate", # page is gone
@ -165,8 +174,10 @@ exclude_comics = [
"LinktotheBoards", # page does not follow standard layout "LinktotheBoards", # page does not follow standard layout
"LinT", # page moved "LinT", # page moved
"LiterallySpeaking", # page does not follow standard layout "LiterallySpeaking", # page does not follow standard layout
"LifeonForbez", # missing images
"LoxieAndZoot", # page does not follow standard layout "LoxieAndZoot", # page does not follow standard layout
"Lunchtable", # missing images "Lunchtable", # missing images
"MacHall", # page does not follow standard layout
"MadWorld", # page has 403 forbidden "MadWorld", # page has 403 forbidden
"Magellan", # page does not follow standard layout "Magellan", # page does not follow standard layout
"Marachan", # missing images "Marachan", # missing images
@ -175,13 +186,16 @@ exclude_comics = [
"Meiosis", # page moved "Meiosis", # page moved
"Michikomonogatari", # page does not follow standard layout "Michikomonogatari", # page does not follow standard layout
"MidnorthFlourCo", # page has 403 forbidden "MidnorthFlourCo", # page has 403 forbidden
"Mindmistress", # page does not follow standard layout
"MintCondition", # page moved "MintCondition", # page moved
"MisadventuresinPhysics", # page has 403 forbidden "MisadventuresinPhysics", # page has 403 forbidden
"MobileMadness", # page does not follow standard layout "MobileMadness", # page does not follow standard layout
"MrPinkBlob", # page does not follow standard layout
"MyAngelYouAreAngel", # page is gone "MyAngelYouAreAngel", # page is gone
"MyBrainHurts", # page does not follow standard layout "MyBrainHurts", # page does not follow standard layout
"NAFTANorthAmericanFreeToonAgreementalsoYankuckcanee", # page does not follow standard layout "NAFTANorthAmericanFreeToonAgreementalsoYankuckcanee", # page does not follow standard layout
"NeglectedMarioCharacterComix", # page does not follow standard layout "NeglectedMarioCharacterComix", # page does not follow standard layout
"NekoTheKitty", # page does not follow standard layout
"Nemutionjewel", # page does not follow standard layout "Nemutionjewel", # page does not follow standard layout
"Nerdgasm", # missing images "Nerdgasm", # missing images
"Nerdz", # page is gone "Nerdz", # page is gone
@ -249,6 +263,7 @@ exclude_comics = [
"SoManyLevels", # page moved "SoManyLevels", # page moved
"SomethingSoft", # page is gone "SomethingSoft", # page is gone
"Sorcery101", # page moved "Sorcery101", # page moved
"spacejams", # page does not follow standard layout
"SpellBinder", # page is gone "SpellBinder", # page is gone
"SPQRBlues", # page moved "SPQRBlues", # page moved
"StationV3", # page moved "StationV3", # page moved
@ -294,6 +309,7 @@ exclude_comics = [
"TwoEvilScientists", # page moved "TwoEvilScientists", # page moved
"TwoLumps", # page moved "TwoLumps", # page moved
"TwoSidesWide", # page moved "TwoSidesWide", # page moved
"Untitled", # page does not follow standard layout
"Vendetta", # page moved "Vendetta", # page moved
"VictimsoftheSystem", # page moved "VictimsoftheSystem", # page moved
"Victor", # page moved "Victor", # page moved
@ -318,23 +334,19 @@ url_overrides = {
"AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html", "AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html",
"ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html", "ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html",
"ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html", "ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html",
'Candi': 'http://candicomics.com/',
"CanYouKeepaSecret": "http://cykas.comicgenesis.com/d/20041035.html", "CanYouKeepaSecret": "http://cykas.comicgenesis.com/d/20041035.html",
"CapturetheMoment": "http://capturethemoment.comicgenesis.com/d/20100927.html", "CapturetheMoment": "http://capturethemoment.comicgenesis.com/d/20100927.html",
"CornerAlley13": "http://corneralley.comicgenesis.com/d/20101010.html", "CornerAlley13": "http://corneralley.comicgenesis.com/d/20101010.html",
"Countyoursheep": "http://countyoursheep.keenspot.com/", "FreakU": "http://freaku.comicgenesis.com/d/20080827.html",
"FreakU": "http://freaku.comicgenesis.com//d/20080827.html", "FreeParking": "http://freeparking.comicgenesis.com/d/20051029.html",
"FreeParking": "http://freeparking.comicgenesis.com//d/20051029.html",
"GamerPsychotica": "http://gp.comicgenesis.com/d/20060113.html",
"GoneAstray": "http://goneastray.comicgenesis.com/d/20100305.html", "GoneAstray": "http://goneastray.comicgenesis.com/d/20100305.html",
"GoodnEvil": "http://gne.comicgenesis.com/d/20040814.html", "GoodnEvil": "http://gne.comicgenesis.com/d/20040814.html",
"HalflightBreaking": "http://halflight.comicgenesis.com/d/20021031.html",
"HealerOnFeatheredWings": "http://selsachronicles.comicgenesis.com/", "HealerOnFeatheredWings": "http://selsachronicles.comicgenesis.com/",
"HowNottoRunAComic": "http://hownottorunacomic.comicgenesis.com/d/19950719.html", "HowNottoRunAComic": "http://hownottorunacomic.comicgenesis.com/d/19950719.html",
"HurricaneParty": "http://hurricaneparty.comicgenesis.com/d/20040123.html", "HurricaneParty": "http://hurricaneparty.comicgenesis.com/d/20040123.html",
"MacHall": "http://machall.comicgenesis.com/d/20020125.html",
"MaryQuiteContrary": "http://marycontrary.comicgenesis.com/d/20070824.html", "MaryQuiteContrary": "http://marycontrary.comicgenesis.com/d/20070824.html",
"MoonCrest24": "http://mooncrest.comicgenesis.com/d/20121117.html", "MoonCrest24": "http://mooncrest.comicgenesis.com/d/20121117.html",
"MrPinkBlob": "http://mrpinkblob.comicgenesis.com/d/100.html",
"NekkoandJoruba": "http://nekkoandjoruba.comicgenesis.com/d/20050816.html", "NekkoandJoruba": "http://nekkoandjoruba.comicgenesis.com/d/20050816.html",
"No4thWalltoBreak": "http://no4thwalltobreak.comicgenesis.com/d/20041025.html", "No4thWalltoBreak": "http://no4thwalltobreak.comicgenesis.com/d/20041025.html",
"OtakuKyokai": "http://otakukyokai.comicgenesis.com/d/20060818.html", "OtakuKyokai": "http://otakukyokai.comicgenesis.com/d/20060818.html",
@ -345,7 +357,6 @@ url_overrides = {
"PlanetsCollide": "http://ruthcomix.comicgenesis.com/d/20010706.html", "PlanetsCollide": "http://ruthcomix.comicgenesis.com/d/20010706.html",
"RuneMaster": "http://runemaster.comicgenesis.com/d/20050607.html", "RuneMaster": "http://runemaster.comicgenesis.com/d/20050607.html",
"ShinobiHigh": "http://shinobihigh.comicgenesis.com/d/20020118.html", "ShinobiHigh": "http://shinobihigh.comicgenesis.com/d/20020118.html",
"spacejams": "http://spacejams.comicgenesis.com/d/20020820.html",
"TheAdventuresofVindibuddSuperheroInTraining": "http://vindibudd.comicgenesis.com/d/20070720.html", "TheAdventuresofVindibuddSuperheroInTraining": "http://vindibudd.comicgenesis.com/d/20070720.html",
"TriumphantLosers": "http://triumphantlosers.comicgenesis.com/d/20081006.html", "TriumphantLosers": "http://triumphantlosers.comicgenesis.com/d/20081006.html",
"Zortic": "http://zortic.comicgenesis.com/d/20030922.html", "Zortic": "http://zortic.comicgenesis.com/d/20030922.html",

View file

@ -19,6 +19,7 @@ htmltemplate = """
<link rel="stylesheet" href="css/main.css"> <link rel="stylesheet" href="css/main.css">
<link rel="stylesheet" href="css/dosage.css"> <link rel="stylesheet" href="css/dosage.css">
<script src="js/masonry.min.js"></script> <script src="js/masonry.min.js"></script>
<script src="http://use.edgefonts.net/open-sans.js"></script>
</head> </head>
<body> <body>
<p>Dosage test results from %(date)s</p> <p>Dosage test results from %(date)s</p>
@ -85,6 +86,8 @@ def get_content(filename):
res = [] res = []
for name, url in tests: for name, url in tests:
css = name.split()[-1].lower() css = name.split()[-1].lower()
if len(name) > 25 and '/' in name:
name = name.replace('/', '/ ')
if url: if url:
inner = '<a href="%s" class="%s">%s</a>' % (url, css, name) inner = '<a href="%s" class="%s">%s</a>' % (url, css, name)
else: else:
@ -94,7 +97,7 @@ def get_content(filename):
def main(args): def main(args):
filename = "testresults.txt" filename = args[0]
modified = get_mtime(filename) modified = get_mtime(filename)
content = get_content(filename) content = get_content(filename)
attrs = {"date": strdate(modified), "content": content} attrs = {"date": strdate(modified), "content": content}

View file

@ -7,5 +7,5 @@ script=test.sh
rm -f "$script" rm -f "$script"
echo "#!/bin/sh -e" > "$script" echo "#!/bin/sh -e" > "$script"
egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | sort | awk '{ print "make test TESTOUTPUT=/dev/null TESTS=" $0; }' >> "$script" egrep -v "^\. " testresults.txt | egrep "^F " | cut -b "3-" | sort | awk '{ print "make test PYTESTOPTS=--tb=short TESTS=" $0; }' >> "$script"
chmod 755 "$script" chmod 755 "$script"

View file

@ -36,13 +36,13 @@ class _ComicTester(TestCase):
num = 0 num = 0
max_strips = 5 max_strips = 5
for strip in islice(scraperobj.getAllStrips(), 0, max_strips): for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
images = 0 images = []
for image in strip.getImages(): for image in strip.getImages():
images += 1 images.append(image.url)
self.save(image) self.save(image)
self.check(images > 0, 'failed to find images at %s' % strip.stripUrl) self.check(images, 'failed to find images at %s' % strip.stripUrl)
if not self.scraperclass.multipleImagesPerStrip: if not self.scraperclass.multipleImagesPerStrip:
self.check(images == 1, 'found %d instead of 1 image at %s' % (images, strip.stripUrl)) self.check(len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images))
if num > 0 and self.scraperclass.prevUrlMatchesStripUrl: if num > 0 and self.scraperclass.prevUrlMatchesStripUrl:
self.check_stripurl(strip) self.check_stripurl(strip)
num += 1 num += 1