Various comics are fixed.
This commit is contained in:
parent
de1b80fa4d
commit
5f9e5ae3ca
18 changed files with 2857 additions and 170 deletions
2944
doc/testresults.html
2944
doc/testresults.html
File diff suppressed because it is too large
Load diff
|
@ -226,14 +226,6 @@ class Angband(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class ActionAthena(_BasicScraper):
|
|
||||||
latestUrl = 'http://actionathena.com/'
|
|
||||||
stripUrl = latestUrl + '2%s'
|
|
||||||
imageSearch = compile(r'<img src=\'(http://actionathena.com/comics/.+?)\'>')
|
|
||||||
prevSearch = compile(r'<a href="(http://actionathena.com/.+?)">« Previous</a>')
|
|
||||||
help = 'Index format: yyyy/mm/dd/strip-name'
|
|
||||||
|
|
||||||
|
|
||||||
class AlsoBagels(_BasicScraper):
|
class AlsoBagels(_BasicScraper):
|
||||||
latestUrl = 'http://alsobagels.com/'
|
latestUrl = 'http://alsobagels.com/'
|
||||||
stripUrl = latestUrl + 'index.php/comic/%s/'
|
stripUrl = latestUrl + 'index.php/comic/%s/'
|
||||||
|
|
|
@ -181,7 +181,7 @@ class CatAndGirl(_BasicScraper):
|
||||||
class CyanideAndHappiness(_BasicScraper):
|
class CyanideAndHappiness(_BasicScraper):
|
||||||
latestUrl = 'http://www.explosm.net/comics/'
|
latestUrl = 'http://www.explosm.net/comics/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http:\/\/www\.explosm\.net/db/files/Comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?explosm\.net/db/files/Comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev"))
|
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@ -234,14 +234,6 @@ class Chester5000XYV(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class CalamitiesOfNature(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.calamitiesofnature.com/'
|
|
||||||
stripUrl = latestUrl + 'archive/?c=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(archive/\d+[^"]+|http://www\.calamitiesofnature\.com/archive/\d+[^"]+)'))
|
|
||||||
prevSearch = compile(r'<a id="previous" href="(http://www.calamitiesofnature.com/archive/\?c\=\d+)">')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class Champ2010(_BasicScraper):
|
class Champ2010(_BasicScraper):
|
||||||
# the latest URL is hard coded since the comic is discontinued
|
# the latest URL is hard coded since the comic is discontinued
|
||||||
latestUrl = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
|
latestUrl = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
|
||||||
|
|
|
@ -28,7 +28,8 @@ class Damonk(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class DandyAndCompany(_BasicScraper):
|
# XXX disallowed /search by robots.txt
|
||||||
|
class _DandyAndCompany(_BasicScraper):
|
||||||
latestUrl = 'http://www.dandyandcompany.com/'
|
latestUrl = 'http://www.dandyandcompany.com/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
|
@ -68,7 +68,8 @@ class EmergencyExit(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
class ErrantStory(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _ErrantStory(_BasicScraper):
|
||||||
latestUrl = 'http://www.errantstory.com/'
|
latestUrl = 'http://www.errantstory.com/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
|
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
|
||||||
|
|
|
@ -58,10 +58,19 @@ class KillerKomics(_BasicScraper):
|
||||||
help = 'Index format: strip-name'
|
help = 'Index format: strip-name'
|
||||||
|
|
||||||
|
|
||||||
class Kofightclub(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _Kofightclub(_BasicScraper):
|
||||||
latestUrl = 'http://www.kofightclub.com/'
|
latestUrl = 'http://www.kofightclub.com/'
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
stripUrl = latestUrl + 'd/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(\.\./images/\d+[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(\.\./images/\d+[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'((?:http://www\.kofightclub\.com)?/d/\d+\.html)')
|
prevSearch = compile(tagre("a", "href", r'((?:http://www\.kofightclub\.com)?/d/\d+\.html)')
|
||||||
+ tagre("img", "alt", "Previous comic"))
|
+ tagre("img", "alt", "Previous comic"))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
class KuroShouri(_BasicScraper):
|
||||||
|
latestUrl = 'http://kuroshouri.com/'
|
||||||
|
stripUrl = latestUrl + '?webcomic_post=%s'
|
||||||
|
imageSearch = compile(tagre("img", "src", r"(http://kuroshouri\.com/wp-content/webcomic/kuroshouri/[^'\"]+)", quote="['\"]"))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(http://kuroshouri\.com/\?webcomic_post=[^"]+)', after="previous"))
|
||||||
|
help = 'Index format: chapter-n-page-m'
|
||||||
|
|
|
@ -72,14 +72,6 @@ class Melonpool(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
class MintCondition(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.mintconditioncomic.com/'
|
|
||||||
stripUrl = latestUrl + '%s/'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.mintconditioncomic\.com/comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.mintconditioncomic\.com/[^"]+)', after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class Misfile(_BasicScraper):
|
class Misfile(_BasicScraper):
|
||||||
latestUrl = 'http://www.misfile.com/'
|
latestUrl = 'http://www.misfile.com/'
|
||||||
stripUrl = latestUrl + '?date=%s'
|
stripUrl = latestUrl + '?date=%s'
|
||||||
|
|
|
@ -35,7 +35,7 @@ class OnTheEdge(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class OneQuestion(_BasicScraper):
|
class OneQuestion(_BasicScraper):
|
||||||
latestUrl = 'http://www.onequestioncomic.com/'
|
latestUrl = 'http://onequestioncomic.com/'
|
||||||
stripUrl = latestUrl + 'comic.php?strip_id=%s'
|
stripUrl = latestUrl + 'comic.php?strip_id=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||||
|
|
|
@ -142,7 +142,8 @@ class SPQRBlues(_BasicScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class StationV3(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _StationV3(_BasicScraper):
|
||||||
latestUrl = 'http://www.stationv3.com/'
|
latestUrl = 'http://www.stationv3.com/'
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
stripUrl = latestUrl + 'd/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.stationv3\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.stationv3\.com/comics/[^"]+)'))
|
||||||
|
@ -228,7 +229,8 @@ class Spamusement(_BasicScraper):
|
||||||
starter = indirectStarter('http://spamusement.com/', prevSearch)
|
starter = indirectStarter('http://spamusement.com/', prevSearch)
|
||||||
|
|
||||||
|
|
||||||
class StrangeCandy(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _StrangeCandy(_BasicScraper):
|
||||||
latestUrl = 'http://www.strangecandy.net/'
|
latestUrl = 'http://www.strangecandy.net/'
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
stripUrl = latestUrl + 'd/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
||||||
|
|
|
@ -13,12 +13,12 @@ _attrs = dict(
|
||||||
next = case_insensitive_re("next"),
|
next = case_insensitive_re("next"),
|
||||||
)
|
)
|
||||||
_prevSearch = compile(_linkSearch +
|
_prevSearch = compile(_linkSearch +
|
||||||
r'(?:<img[^>]+alt="[^"]*%(prev)s|<img[^>]+(?:button_previous|nav_prev4)\.|[^<]*%(back)s|\s*<<? (?:%(back)s|%(prev)s)|[^<]*%(prev)s)' % _attrs)
|
r'(?:<img[^>]+alt="[^"]*%(prev)s|<img[^>]+(?:button_previous|naviButtons_Previous|nav_prev4|prev|previous|webbuttonback|PrevArrow)\.|[^<]*%(back)s|\s*<<? (?:%(back)s|%(prev)s)|[^<]*%(prev)s)' % _attrs)
|
||||||
_nextSearch = compile(_linkSearch +
|
_nextSearch = compile(_linkSearch +
|
||||||
r'(?:<img[^>]+alt="%(next)s|<img[^>]+(?:button_next|nav_next4)\.|\s*<?[^<]*%(next)s)' % _attrs)
|
r'(?:<img[^>]+alt="%(next)s|<img[^>]+(?:button_next|naviButtons_Next|nav_next4|next|webbuttonnext-1|NextArrow)\.|\s*<?[^<]*%(next)s)' % _attrs)
|
||||||
|
|
||||||
def add(name, url, description, adult, bounce):
|
def add(name, url, description, adult, bounce):
|
||||||
classname = 'SmackJeeves/' + name
|
classname = 'SmackJeeves_' + name
|
||||||
|
|
||||||
def modifier(pageUrl):
|
def modifier(pageUrl):
|
||||||
if adult:
|
if adult:
|
||||||
|
@ -48,6 +48,7 @@ def add(name, url, description, adult, bounce):
|
||||||
return "%s_%s" % (name, num)
|
return "%s_%s" % (name, num)
|
||||||
|
|
||||||
globals()[classname] = make_scraper(classname,
|
globals()[classname] = make_scraper(classname,
|
||||||
|
name = 'SmackJeeves/' + name,
|
||||||
adult = adult,
|
adult = adult,
|
||||||
starter = _starter,
|
starter = _starter,
|
||||||
prevUrlModifier = lambda cls, url: modifier(url),
|
prevUrlModifier = lambda cls, url: modifier(url),
|
||||||
|
|
|
@ -71,7 +71,8 @@ class TinyKittenTeeth(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
|
help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class TwoLumps(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _TwoLumps(_BasicScraper):
|
||||||
latestUrl = 'http://www.twolumps.net/'
|
latestUrl = 'http://www.twolumps.net/'
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
stripUrl = latestUrl + 'd/%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||||
|
|
|
@ -26,7 +26,8 @@ class UnicornJelly(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class UserFriendly(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _UserFriendly(_BasicScraper):
|
||||||
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
|
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
|
||||||
stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
|
stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
|
||||||
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
|
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
|
||||||
|
|
|
@ -57,7 +57,8 @@ class WotNow(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class WorldOfWarcraftEh(_BasicScraper):
|
# XXX disallowed by robots.txt
|
||||||
|
class _WorldOfWarcraftEh(_BasicScraper):
|
||||||
latestUrl = 'http://woweh.com/'
|
latestUrl = 'http://woweh.com/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
||||||
|
|
|
@ -30,6 +30,7 @@ exclude_comics = [
|
||||||
"Apartment_408_Full_Size", # broken images
|
"Apartment_408_Full_Size", # broken images
|
||||||
"Apple_Valley", # broken images
|
"Apple_Valley", # broken images
|
||||||
"Apt_408_Minis", # broken images
|
"Apt_408_Minis", # broken images
|
||||||
|
"Art_dump", # broken images
|
||||||
"Atxs", # broken images
|
"Atxs", # broken images
|
||||||
"A_Word_Of_Wisdom", # broken images
|
"A_Word_Of_Wisdom", # broken images
|
||||||
"Brathalla", # broken images
|
"Brathalla", # broken images
|
||||||
|
@ -64,6 +65,7 @@ exclude_comics = [
|
||||||
"Inside_OuT", # broken images
|
"Inside_OuT", # broken images
|
||||||
"Journey_to_Raifina", # broken images
|
"Journey_to_Raifina", # broken images
|
||||||
"KALA_dan", # broken images
|
"KALA_dan", # broken images
|
||||||
|
"Kuro_Shouri", # page moved
|
||||||
"Live_to_tell", # start page requires login
|
"Live_to_tell", # start page requires login
|
||||||
"Locoma", # broken images
|
"Locoma", # broken images
|
||||||
"London_Underworld", # broken images
|
"London_Underworld", # broken images
|
||||||
|
@ -141,7 +143,7 @@ exclude_comics = [
|
||||||
"Weave", # broken images
|
"Weave", # broken images
|
||||||
"Weirdlings", # template error
|
"Weirdlings", # template error
|
||||||
"Welcome_To_Border_City", # broken images
|
"Welcome_To_Border_City", # broken images
|
||||||
"what_comes_first", # start page requires login
|
"What_comes_first", # start page requires login
|
||||||
"Within_Shadows", # broken images
|
"Within_Shadows", # broken images
|
||||||
"Xolta", # start page requires login
|
"Xolta", # start page requires login
|
||||||
"XTIN__The_Dragons_Dream_World", # start page requires login
|
"XTIN__The_Dragons_Dream_World", # start page requires login
|
||||||
|
|
|
@ -38,6 +38,7 @@ exclude_comics = [
|
||||||
"RichardsPoorAlmanac", # missing images
|
"RichardsPoorAlmanac", # missing images
|
||||||
"SherpaAid", # comic unavailable
|
"SherpaAid", # comic unavailable
|
||||||
"SparComics", # comic unavailable
|
"SparComics", # comic unavailable
|
||||||
|
"SurvivingSingle", # comic unavailable
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,9 @@ htmltemplate = """
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>Dosage test results from %(date)s</p>
|
<p>Dosage test results from %(date)s</p>
|
||||||
|
<p>Note that it is almost impossible to get a 100% OK test run
|
||||||
|
due to temporary network failures or sites that are just updating
|
||||||
|
the comic page.</p>
|
||||||
<div id="container">
|
<div id="container">
|
||||||
%(content)s
|
%(content)s
|
||||||
</div>
|
</div>
|
||||||
|
@ -70,14 +73,16 @@ def get_content(filename):
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
print("Tests parsed: 0", end=" ", file=sys.stderr)
|
print("Tests parsed: 0", end=" ", file=sys.stderr)
|
||||||
num_tests = 0
|
num_tests = 0
|
||||||
|
add_reason = False
|
||||||
for line in f:
|
for line in f:
|
||||||
if line.startswith((". ", "F ")) and "test_comics" in line:
|
if line.startswith((". ", "F ")) and "test_comics" in line:
|
||||||
|
add_reason = line.startswith("F ")
|
||||||
num_tests += 1
|
num_tests += 1
|
||||||
try:
|
try:
|
||||||
tests.append(get_test(line))
|
tests.append(get_test(line))
|
||||||
add_reason = line.startswith("F ")
|
|
||||||
except Exception as msg:
|
except Exception as msg:
|
||||||
print("WARNING:", msg, file=sys.stderr)
|
print("WARNING:", msg, file=sys.stderr)
|
||||||
|
continue
|
||||||
elif add_reason and line.startswith(" E "):
|
elif add_reason and line.startswith(" E "):
|
||||||
reason = line[3:].strip()
|
reason = line[3:].strip()
|
||||||
tests[-1][-1] = reason
|
tests[-1][-1] = reason
|
||||||
|
|
|
@ -40,6 +40,8 @@ exclude_comics = [
|
||||||
"MylifewithFel", # does not follow standard layout
|
"MylifewithFel", # does not follow standard layout
|
||||||
"NegativeZen", # does not follow standard layout
|
"NegativeZen", # does not follow standard layout
|
||||||
"NightShot", # does not follow standard layout
|
"NightShot", # does not follow standard layout
|
||||||
|
"NormalIsBoring", # does not follow standard layout
|
||||||
|
"Okamirai", # images are 403 forbidden
|
||||||
"OmnisSpriteShowcase", # missing images
|
"OmnisSpriteShowcase", # missing images
|
||||||
"OpticalDisarray", # does not follow standard layout
|
"OpticalDisarray", # does not follow standard layout
|
||||||
"PicturesofYou", # does not follow standard layout
|
"PicturesofYou", # does not follow standard layout
|
||||||
|
@ -49,6 +51,7 @@ exclude_comics = [
|
||||||
"Ribon", # does not follow standard layout
|
"Ribon", # does not follow standard layout
|
||||||
"SecretSanta2011", # missing images
|
"SecretSanta2011", # missing images
|
||||||
"ShinkaTheLastEevee", # does not follow standard layout
|
"ShinkaTheLastEevee", # does not follow standard layout
|
||||||
|
"SimplePixel", # does not follow standard layout
|
||||||
"SJArtCollab", # missing images
|
"SJArtCollab", # missing images
|
||||||
"SlightlyDifferent", # missing images
|
"SlightlyDifferent", # missing images
|
||||||
"TheAfterSubtract", # does not follow standard layout
|
"TheAfterSubtract", # does not follow standard layout
|
||||||
|
|
|
@ -30,6 +30,7 @@ exclude_comics = [
|
||||||
"JamesBond", # not a comic
|
"JamesBond", # not a comic
|
||||||
"Men", # not a comic
|
"Men", # not a comic
|
||||||
"NEA", # not a comic
|
"NEA", # not a comic
|
||||||
|
"PeanutsPortuguese", # not found
|
||||||
"Pets", # not a comic
|
"Pets", # not a comic
|
||||||
"SundayOnly", # not a comic
|
"SundayOnly", # not a comic
|
||||||
"WebExclusive", # not a comic
|
"WebExclusive", # not a comic
|
||||||
|
|
Loading…
Reference in a new issue