Fix more comics.

This commit is contained in:
Bastian Kleineidam 2012-12-05 21:52:52 +01:00
parent 387dff79a9
commit e5d9002f09
16 changed files with 366 additions and 44 deletions

View file

@ -68,7 +68,7 @@ pyflakes:
pyflakes $(PY_FILES_DIRS)
count:
@sloccount dosage dosagelib | grep "Total Physical Source Lines of Code"
@sloccount $(PY_FILES_DIRS) | grep "Total Physical Source Lines of Code"
clean:
find . -name \*.pyc -delete

View file

@ -7,7 +7,7 @@ import rfc822
import time
from .output import out
from .util import urlopen, normaliseURL, unquote, strsize
from .util import getImageObject, normaliseURL, unquote, strsize
from .events import getHandler
class FetchComicError(IOError):
@ -52,7 +52,7 @@ class ComicImage(object):
def connect(self):
"""Connect to host and get meta information."""
try:
self.urlobj = urlopen(self.url, referrer=self.referrer)
self.urlobj = getImageObject(self.url, self.referrer)
except IOError as msg:
raise FetchComicError('Unable to retrieve URL.', self.url, msg)

View file

@ -9,7 +9,10 @@ from ..util import tagre
_imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
_prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') +
'(?:Previous comic|'+tagre("img", "alt", "Previous comic")+')')
'(?:Previous comic' + '|' +
tagre("img", "alt", "Previous comic") + '|' +
tagre("img", "src", "images/back\.gif") +
')')
def add(name, url):
classname = 'KeenSpot_%s' % name
@ -17,7 +20,9 @@ def add(name, url):
@classmethod
def _prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("keenspace", "comicgenesis"
return prevUrl.replace("keenspace.com", "comicgenesis.com"
).replace("keenspot.com", "comicgenesis.com"
).replace("toonspace.com", "comicgenesis.com"
).replace("comicgen.com", "comicgenesis.com")
globals()[classname] = make_scraper(classname,

View file

@ -6,16 +6,17 @@ from re import compile
from ..scraper import make_scraper
from ..util import tagre
_imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)'))
_imageSearch = compile(tagre("img", "src", r'(http://v\.cdn\.nuklearpower\.com/comics/[^"]+)'))
_prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous")
def add(name, shortname):
baseUrl = 'http://www.nuklearpower.com/' + shortname + '/'
baseUrl = 'http://www.nuklearpower.com/'
latestUrl = baseUrl + shortname + '/'
classname = 'NuklearPower_%s' % name
globals()[classname] = make_scraper(classname,
name='NuklearPower/' + name,
latestUrl = baseUrl,
latestUrl = latestUrl,
stripUrl = baseUrl + '%s',
imageSearch = _imageSearch,
prevSearch = _prevSearch,

View file

@ -8,8 +8,8 @@ from ..util import tagre
_imageSearch = compile(tagre("img", "src", r'(http://(?:www|img2)\.smackjeeves\.com/images/uploaded/comics/[^"]+)'))
_linkSearch = tagre("a", "href", r'([^"]*/comics/\d+/[^"]*)')
_prevSearch = compile(_linkSearch + '(?:<img[^>]*alt="< Previous"|&lt; Back)')
_nextSearch = compile(_linkSearch + '(?:<img[^>]*alt="Next >"|Next &gt;)')
_prevSearch = compile(_linkSearch + '(?:<img[^>]*alt="< Previous"|&lt; Back|. previous)')
_nextSearch = compile(_linkSearch + '(?:<img[^>]*alt="Next >"|Next &gt;|next )')
def add(name):
classname = 'SmackJeeves/' + name
@ -39,6 +39,3 @@ add('durian')
add('heard')
add('mpmcomic')
add('nlmo-project')
add('paranoidloyd')
add('thatdreamagain')
add('wowcomics')

View file

@ -23,10 +23,8 @@ def add(name, host):
)
add('Grim', 'grim')
add('KOF', 'kof')
add('PowerPuffGirls', 'ppg')
add('Snafu', 'www')
add('Tin', 'tin')
add('TW', 'tw')
add('Sugar', 'sugar')

View file

@ -18,7 +18,7 @@ _imageSearch = compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[^
def add(name, shortname):
latestUrl = 'http://www.universaluclick.com%s' % shortname
classname = 'UClick_%s' % name
classname = 'Universal_%s' % name
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -34,7 +34,7 @@ def add(name, shortname):
return parse_strdate(strdate).strftime("%Y%m%d")
globals()[classname] = make_scraper(classname,
name='UClick/' + name,
name='Universal/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s/',
imageSearch = _imageSearch,

View file

@ -17,18 +17,16 @@ def add(name, subpath):
latestUrl = baseUrl + subpath,
stripUrl = baseUrl + '?view=archive&amp;chapter=%s',
imageSearch = _imageSearch,
multipleImagesPerStrip = True,
prevSearch = _prevSearch,
# the prevSearch is a redirect
prevUrlMatchesStripUrl = False,
help = 'Index format: nnnn (non-contiguous)',
)
add('AgnesQuill', 'daveroman/agnes/')
add('Elvenbaath', 'tdotodot2k/elvenbaath/')
add('IrrationalFears', 'uvernon/irrationalfears/')
add('KismetHuntersMoon', 'laylalawlor/huntersmoon/')
add('SaikoAndLavender', 'gc/saiko/')
add('MyMuse', 'gc/muse/')
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
add('JaxEpoch', 'johngreen/quicken/')
add('QuantumRockOfAges', 'DreamchildNYC/quantum/')
add('ClownSamurai', 'qsamurai/clownsamurai/')

View file

@ -22,11 +22,17 @@ class _BasicScraper(object):
@cvar prevSearch: A compiled regex that will locate the URL for the
previous strip when applied to a strip page.
'''
# if more than one image per URL is expected
multipleImagesPerStrip = False
# set to False if previous URLs do not match the strip URL (ie. because of redirects)
prevUrlMatchesStripUrl = True
# usually the index format help
help = 'Sorry, no help for this comic yet.'
def __init__(self, indexes=None):
"""Initialize internal variables."""
self.urls = set()

View file

@ -21,7 +21,12 @@ if os.name == 'nt':
has_curses = has_module("curses")
MAX_FILESIZE = 1024*1024*1 # 1MB
# Maximum content size for HTML pages
MaxContentBytes = 1024 * 1024 * 2 # 2 MB
# Maximum content size for images
MaxImageBytes = 1024 * 1024 * 20 # 20 MB
def tagre(tag, attribute, value, quote='"', before="", after=""):
"""Return a regular expression matching the given HTML tag, attribute
@ -71,9 +76,9 @@ def case_insensitive_re(name):
baseSearch = re.compile(tagre("base", "href", '([^"]*)'))
def getPageContent(url):
def getPageContent(url, max_content_bytes=MaxContentBytes):
# read page data
page = urlopen(url)
page = urlopen(url, max_content_bytes=max_content_bytes)
data = page.text
# determine base URL
baseUrl = None
@ -85,6 +90,11 @@ def getPageContent(url):
return data, baseUrl
def getImageObject(url, referrer, max_content_bytes=MaxImageBytes):
"""Get response object for given image URL."""
return urlopen(url, referrer=referrer, max_content_bytes=max_content_bytes)
def fetchUrl(url, urlSearch):
data, baseUrl = getPageContent(url)
match = urlSearch.search(data)
@ -116,7 +126,6 @@ def fetchUrls(url, imageSearch, prevSearch=None):
prevUrl = match.group(1)
if not prevUrl:
raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern))
out.write('matched previous URL %r' % prevUrl, 2)
prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl))
else:
out.write('no previous URL %s at %s' % (prevSearch.pattern, url), 2)
@ -174,7 +183,7 @@ def normaliseURL(url):
return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5, max_content_bytes=None):
out.write('Open URL %s' % url, 2)
assert retries >= 0, 'invalid retry value %r' % retries
assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
@ -183,7 +192,8 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
if referrer:
headers['Referer'] = referrer
try:
req = requests.get(url, headers=headers, config=config)
req = requests.get(url, headers=headers, config=config, prefetch=False)
check_content_size(url, req.headers, max_content_bytes)
req.raise_for_status()
return req
except requests.exceptions.RequestException as err:
@ -191,6 +201,15 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
out.write(msg)
raise IOError(msg)
def check_content_size(url, headers, max_content_bytes):
if not max_content_bytes:
return
if 'content-length' in headers:
size = int(headers['content-length'])
if size > max_content_bytes:
msg = 'URL content of %s with %d Bytes exceeds %d Bytes.' % (url, size, max_content_bytes)
raise IOError(msg)
def get_columns (fp):
"""Return number of columns for given file."""

View file

@ -170,7 +170,7 @@ def handle_url(url, url_matcher, num_matcher, res):
end = match.end(1)
mo = num_matcher.search(data[end:])
if not mo:
print("ERROR:", repr(data[end:end+300], file=sys.stderr))
print("ERROR:", repr(data[end:end+300]), file=sys.stderr)
continue
num = int(mo.group(1))
res[name] = num

File diff suppressed because one or more lines are too long

View file

@ -47,23 +47,269 @@ exclude_comics = [
"beerkada", # no images
"BelovedLeader", # broken images
"BigMouthComics", # page does not follow standard layout
"", # page is gone
"", # page is gone
"", # page is gone
"BilltheMagician", # page does not follow standard layout
"BlackBlue", # page moved
"BlackMagic", # page does not follow standard layout
"BloodBound", # page moved
"bloodofthedragon", # page does not follow standard layout
"BloodWing", # broken images
"BlueZombie", # broken page
"BoomerExpress", # redirection to another page
"BobOnline", # missing images
"BottomFlavor", # page does not follow standard layout
"BradTheVampire", # page does not follow standard layout
"BreakpointCity", # page moved
"Brinkerhoff", # page redirected
"CampusSafari", # page moved
"CapturetheMoment", # page moved
"CaseyandAndy", # page moved
"Catalyst", # page moved
"Cats", # broken images
"Chair", # page moved
"ChildrenAtPlay", # page does not follow standard layout
"chu", # broken images
"CoACityofAscii", # only ascii images
"ComicMischief", # page moved
"ComputerGameAddicts", # page moved
"Concession", # page moved
"CorridorZ", # page does not follow standard layout
"CrashBoomMagic", # page moved
"CrazySlowlyGoing", # page has 403 forbidden
"CrimsonWings", # page moved
"DakotasRidge", # page moved
"DATAROM", # broken images
"DazeinaHaze", # page moved
"DIABOLICA", # broken images
"DIfIK", # page does not follow standard layout
"DigitalWar", # page is gone
"DimBulbComics", # page is gone
"DIVE", # page is gone
"DominicDeegan", # page moved
"DungeonDamage", # page does not follow standard layout
"Dylan", # page has 403 forbidden
"EarthRiser", # redirects to a new page
"EdgetheDevilhunter", # page is gone
"EdibleDirt", # page moved
"Einstien27sDesk", # page is gone
"ElfOnlyInn", # page moved
"Ensuing", # broken links
"etch", # broken images
"EternalCaffeineJunkie", # page does not follow standard layout
"EternityComplex", # page does not follow standard layout
"Evilish", # page moved
"EvolBara", # page is gone
"FaerieTales", # page does not follow standard layout
"FairyTaleNewVillage", # missing images
"Fate27sTear", # page moved
"FaultyLogic", # page does not follow standard layout
"FireontheMountain", # page does not follow standard layout
"FiveBucksanHour", # page is gone
"Flatwood", # page moved
"FLEMComics", # page moved
"FletchersCave", # page is broken
"ForcesofGoodandEvil", # page does not follow standard layout
"FurryBlackDevil", # page moved
"Galacticus", # page has 403 forbidden
"GeebasonParade", # page does not follow standard layout
"geeks", # page moved
"GeminiBright", # page does not follow standard layout
"GemutationsPlague", # page does not follow standard layout
"GeorgetheSecond", # page does not follow standard layout
"Ghostz", # page does not follow standard layout
"GODLIKE", # page has 403 forbidden
"GoForIt", # page is gone
"JuvenileDiversion", # page moved
"GothBoy", # page moved
"Grimage", # page moved
"GrossePointeDogs", # page is broken
"GUComics", # page moved
"HardUnderbelly", # page does not follow standard layout
"HazardousScience", # page is gone
"HereThereBeDragons", # page moved
"HighMaintenance", # missing images
"HighSchoolRPG", # page does not follow standard layout
"Horndog", # page moved
"HorseshoesandHandgrenades", # missing images
"HotelGrim", # missing images
"IAlwaysWakeUpLazy", # page moved
"ihatesteve", # page is gone
"IllicitMiracles", # page does not follow standard layout
"IndefensiblePositions", # page does not follow standard layout
"InsanityFair", # page does not follow standard layout
"InsideJoke", # page is gone
"InsidetheBox", # page has 403 forbidden
"InternationalHopeFoundation", # page does not follow standard layout
"JamieandNick", # page moved
"JasonLovesHisGrandpa", # page is gone
"JavanteasFate", # page is gone
"JBBcomics", # page is gone
"JedandDark", # page does not follow standard layout
"JoBeth", # page moved
"Joyride", # page moved
"JustAnotherEscape", # page moved
"JustWeird", # page has 403 forbidden
"Michikomonogatari", # page does not follow standard layout
"MobileMadness", # page does not follow standard layout
"JuvenileDiversion", # page moved
"JWalkinAndapos", # missing images
"KarmaSlave", # page moved
"KeenLace", # page is gone
"khaoskomic", # page moved
"KillingTime", # page is gone
"KnightsOfTheNexus", # page does not follow standard layout
"KoFightClub", # page moved
"LabGoatsInc", # page moved
"LandofGreed", # page is gone
"LeanOnMe", # page has 403 forbidden
"LegendsofRovana", # page has 403 forbidden
"LifeatBayside", # page does not follow standard layout
"LifeinaNutshell", # page does not follow standard layout
"Lifesuchasitis", # page has 403 forbidden
"LinktotheBoards", # page does not follow standard layout
"LinT", # page moved
"LiterallySpeaking", # page does not follow standard layout
"LoxieAndZoot", # page does not follow standard layout
"Lunchtable", # missing images
"MadWorld", # page has 403 forbidden
"Magellan", # page does not follow standard layout
"Marachan", # missing images
"MassProduction", # page does tno follow standard layout
"MayIHelpYou", # page has 403 forbidden
"Meiosis", # page moved
"Michikomonogatari", # page does not follow standard layout
"MidnorthFlourCo", # page has 403 forbidden
"MintCondition", # page moved
"MisadventuresinPhysics", # page has 403 forbidden
"MobileMadness", # page does not follow standard layout
"MyAngelYouAreAngel", # page is gone
"MyBrainHurts", # page does not follow standard layout
"NAFTANorthAmericanFreeToonAgreementalsoYankuckcanee", # page does not follow standard layout
"NeglectedMarioCharacterComix", # page does not follow standard layout
"Nemutionjewel", # page does not follow standard layout
"Nerdgasm", # missing images
"Nerdz", # page is gone
"Nervillsaga", # page does not follow standard layout
"NetherOakasuburbanadventure", # page does not follow standard layout
"NoNeedForBushido", # page moved
"nothingcomesnaturally", # page does not follow standard layout
"NymphsoftheWest", # too few images
"OffTheWall", # page does not follow standard layout
"OneHourAxis", # page is gone
"OnlyOne", # page is gone
"OopsNevermind", # page is gone
"PacoStand", # page has 403 forbidden
"Pander", # page is gone
"PANDORA", # page is missing pages
"PhilosophyBites", # missing images
"PhilosophyMonkey", # page is gone
"PicpakDog", # page moved
"PictureDiary", # page is gone
"PillarsofFaith", # page does not follow standard layout
"Pimpette", # page moved
"PokC3A9Chow", # page has 403 forbidden
"PolleninArabia", # page does not follow standard layout
"PranMan", # page moved
"QueensOfRandomness", # broken images
"QuestionableTales", # page does not follow standard layout
"RadioactiveFanboys", # page does not follow standard layout
"RandomAssembly", # page is gone
"RandomInk", # page is gone
"ReceptorFatigue", # page does not follow standard layout
"Remsi", # page does not follow standard layout
"Reset", # page does not follow standard layout
"ResistanceLine", # page does not follow standard layout
"ReturntoDonnelly", # page is gone
"Riboflavin", # page does not follow standard layout
"RitualsandOfferings", # page is gone
"RiverCityHigh", # page is gone
"RM27sothercomics", # page does not follow standard layout
"RogerAndDominic", # page does not follow standard layout
"RoleoftheDie", # page is gone
"RonnieRaccoon", # page moved
"RosalarianAndapossRandomCreepyTales", # page is gone
"RulesofMakeBelieve", # page is gone
"Rveillerie", # page has 403 forbidden
"SaintPeter27sCross", # page does not follow standard layout
"Saturnalia", # page moved
"SavageIslands", # page has 403 forbidden
"SaveMeGebus", # page does not follow standard layout
"Sawdust", # page has 403 forbidden
"Scooterboy1234", # page has 403 forbidden
"SecondNight", # page moved
"Sempiternal", # page moved
"Senioritis", # page has 403 forbidden
"ShivaeStudios", # page moved
"ShonenAiKudasai", # page is gone
"ShootMeNow", # page does not follow standard layout
"SidandLasker", # page moved
"SillyConeV", # page is gone
"Skunk", # page moved
"SLAGIT", # missing images
"SmithStone", # page has 403 forbidden
"SnowflakeStudios", # page is gone
"Sock27d", # page is gone
"Soks", # page is gone
"SoManyLevels", # page moved
"SomethingSoft", # page is gone
"Sorcery101", # page moved
"SpellBinder", # page is gone
"SPQRBlues", # page moved
"StationV3", # page moved
"SticksandStuff", # page does not follow standard layout
"StickyFingers", # page does not follow standard layout
"Stubble", # page moved
"SurrealKins", # page is gone
"SwirlyMarkYume", # page does not follow standard layout
"SynapticMisfiring", # page is gone
"TalesoftheQuestor", # page moved
"TAVISION", # page moved
"ThatWasMcPherson", # page moved
"The6GUYSInMyHead", # page has 403 forbidden
"TheAdventuresofCaptainMooki", # page moved
"TheAdventuresofLi27lDenverPastrami", # page is gone
"TheAdventuresofPeppyThePipingPirate", # page is gone
"TheAmoeba", # page is gone
"TheAvatar", # page does not follow standard layout
"TheBessEffectGerman", # page moved
"TheBestandtheBrightest", # page moved
"TheDevilsPanties", # page moved
"TheDoctorPepperShow", # page has 403 forbidden
"TheGods27Pack", # page has 403 forbidden
"TheMadBrothers", # page does not follow standard layout
"TheMediocres", # missing images
"TheNamelessStory", # page has 403 forbidden
"Thenoob", # page moved
"TheOrangeArrow", # page is gone
"TheSailorNeopetsRPG", # page does not follow standard layout
"TheWayoftheWorld", # page moved
"TheWorldofUh", # broken images
"TheWotch", # page does not follow standard layout
"ThunderandLightning", # page moved
"TinysWorld", # page does not follow standard layout
"ToonPimp27sPalace", # page moved
"Tossers", # page moved
"Towner", # page does not follow standard layout
"Townies", # page is gone
"TracyandTristan", # page moved
"TrialsintheLight", # page does not follow standard layout
"ttskr", # page does not follow standard layout
"twelvedragons", # page does not follow standard layout
"TwoEvilScientists", # page moved
"TwoLumps", # page moved
"TwoSidesWide", # page moved
"Vendetta", # page moved
"VictimsoftheSystem", # page moved
"Victor", # page moved
"WARPZONEthinkwithinthecube", # page does not follow standard layout
"WayoftheDodo", # page does not follow standard layout
"Wedontgetiteither", # page moved
"WeishauptScholars", # page does not follow standard layout
"Werechild", # page has 403 forbidden
"WhiskeyAndMelancholy", # missing pages
"YellowMoon", # page has 403 forbidden
"YouScrewedUp", # missing images
"YUMEdream", # page moved
"Zap", # page moved
"ZebraGirl", # page moved
"Zeek", # page moved
"Zootz", # page is gone
]
# links to last valid strips
@ -72,8 +318,37 @@ url_overrides = {
"AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html",
"ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html",
"ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html",
"": "",
"": "",
"CanYouKeepaSecret": "http://cykas.comicgenesis.com/d/20041035.html",
"CapturetheMoment": "http://capturethemoment.comicgenesis.com/d/20100927.html",
"CornerAlley13": "http://corneralley.comicgenesis.com/d/20101010.html",
"Countyoursheep": "http://countyoursheep.keenspot.com/",
"FreakU": "http://freaku.comicgenesis.com//d/20080827.html",
"FreeParking": "http://freeparking.comicgenesis.com//d/20051029.html",
"GamerPsychotica": "http://gp.comicgenesis.com/d/20060113.html",
"GoneAstray": "http://goneastray.comicgenesis.com/d/20100305.html",
"GoodnEvil": "http://gne.comicgenesis.com/d/20040814.html",
"HalflightBreaking": "http://halflight.comicgenesis.com/d/20021031.html",
"HealerOnFeatheredWings": "http://selsachronicles.comicgenesis.com/",
"HowNottoRunAComic": "http://hownottorunacomic.comicgenesis.com/d/19950719.html",
"HurricaneParty": "http://hurricaneparty.comicgenesis.com/d/20040123.html",
"MacHall": "http://machall.comicgenesis.com/d/20020125.html",
"MaryQuiteContrary": "http://marycontrary.comicgenesis.com/d/20070824.html",
"MoonCrest24": "http://mooncrest.comicgenesis.com/d/20121117.html",
"MrPinkBlob": "http://mrpinkblob.comicgenesis.com/d/100.html",
"NekkoandJoruba": "http://nekkoandjoruba.comicgenesis.com/d/20050816.html",
"No4thWalltoBreak": "http://no4thwalltobreak.comicgenesis.com/d/20041025.html",
"OtakuKyokai": "http://otakukyokai.comicgenesis.com/d/20060818.html",
"PandP": "http://pandpcomic.comicgenesis.com/d/20021002.html",
"Paradigm": "http://paradigm.comicgenesis.com/d/20020716.html",
"ParallelDementia": "http://paralleldementia.comicgenesis.com/d/20071221.html",
"PET": "http://petcomic.comicgenesis.com/d/20070413.html",
"PlanetsCollide": "http://ruthcomix.comicgenesis.com/d/20010706.html",
"RuneMaster": "http://runemaster.comicgenesis.com/d/20050607.html",
"ShinobiHigh": "http://shinobihigh.comicgenesis.com/d/20020118.html",
"spacejams": "http://spacejams.comicgenesis.com/d/20020820.html",
"TheAdventuresofVindibuddSuperheroInTraining": "http://vindibudd.comicgenesis.com/d/20070720.html",
"TriumphantLosers": "http://triumphantlosers.comicgenesis.com/d/20081006.html",
"Zortic": "http://zortic.comicgenesis.com/d/20030922.html",
}
def handle_url(url, res):

View file

@ -17,12 +17,21 @@ htmltemplate = """
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="css/normalize.css">
<link rel="stylesheet" href="css/main.css">
<link rel="stylesheet" href="css/dosage.css">
<script src="js/masonry.min.js"></script>
</head>
<body>
<p>Dosage test results from %(date)s</p>
<ul>
%(content)s
</ul>
<p>Dosage test results from %(date)s</p>
<div id="container">
%(content)s
</div>
<script>
window.onload = function() {
var wall = new Masonry( document.getElementById('container'), {
columnWidth: 240
});
};
</script>
</body>
</html>
"""
@ -80,7 +89,7 @@ def get_content(filename):
inner = '<a href="%s" class="%s">%s</a>' % (url, css, name)
else:
inner = '<span class="%s">%s</span>' % (css, name)
res.append(' <li>%s</li>' % inner)
res.append(' <div class="item">%s</div>' % inner)
return os.linesep.join(res)

View file

@ -20,6 +20,20 @@ url_matcher = re.compile(r'<li><a href="(/comics/[^"]+)">([^<]+)</a>')
# names of comics to exclude
exclude_comics = [
"BusinessAndFinance", # not a comic
"ComicPanel", # not a comic
"ComicsAZ", # not a comic
"ComicStrip", # not a comic
"Espaol", # not a comic
"Family", # not a comic
"ForKids", # not a comic
"JamesBond", # not a comic
"Men", # not a comic
"NEA", # not a comic
"Pets", # not a comic
"SundayOnly", # not a comic
"WebExclusive", # not a comic
"Women", # not a comic
]

View file

@ -43,7 +43,7 @@ class _ComicTester(TestCase):
self.check(images > 0, 'failed to find images at %s' % strip.stripUrl)
if not self.scraperclass.multipleImagesPerStrip:
self.check(images == 1, 'found %d instead of 1 image at %s' % (images, strip.stripUrl))
if num > 0:
if num > 0 and self.scraperclass.prevUrlMatchesStripUrl:
self.check_stripurl(strip)
num += 1
if self.scraperclass.prevSearch: