Various comics are fixed.
This commit is contained in:
parent
de1b80fa4d
commit
5f9e5ae3ca
18 changed files with 2857 additions and 170 deletions
2944
doc/testresults.html
2944
doc/testresults.html
File diff suppressed because it is too large
Load diff
|
@ -226,14 +226,6 @@ class Angband(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class ActionAthena(_BasicScraper):
|
||||
latestUrl = 'http://actionathena.com/'
|
||||
stripUrl = latestUrl + '2%s'
|
||||
imageSearch = compile(r'<img src=\'(http://actionathena.com/comics/.+?)\'>')
|
||||
prevSearch = compile(r'<a href="(http://actionathena.com/.+?)">« Previous</a>')
|
||||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
class AlsoBagels(_BasicScraper):
|
||||
latestUrl = 'http://alsobagels.com/'
|
||||
stripUrl = latestUrl + 'index.php/comic/%s/'
|
||||
|
|
|
@ -181,7 +181,7 @@ class CatAndGirl(_BasicScraper):
|
|||
class CyanideAndHappiness(_BasicScraper):
|
||||
latestUrl = 'http://www.explosm.net/comics/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http:\/\/www\.explosm\.net/db/files/Comics/[^"]+)'))
|
||||
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?explosm\.net/db/files/Comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
@ -234,14 +234,6 @@ class Chester5000XYV(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class CalamitiesOfNature(_BasicScraper):
|
||||
latestUrl = 'http://www.calamitiesofnature.com/'
|
||||
stripUrl = latestUrl + 'archive/?c=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(archive/\d+[^"]+|http://www\.calamitiesofnature\.com/archive/\d+[^"]+)'))
|
||||
prevSearch = compile(r'<a id="previous" href="(http://www.calamitiesofnature.com/archive/\?c\=\d+)">')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Champ2010(_BasicScraper):
|
||||
# the latest URL is hard coded since the comic is discontinued
|
||||
latestUrl = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
|
||||
|
|
|
@ -28,7 +28,8 @@ class Damonk(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class DandyAndCompany(_BasicScraper):
|
||||
# XXX disallowed /search by robots.txt
|
||||
class _DandyAndCompany(_BasicScraper):
|
||||
latestUrl = 'http://www.dandyandcompany.com/'
|
||||
stripUrl = None
|
||||
multipleImagesPerStrip = True
|
||||
|
|
|
@ -68,7 +68,8 @@ class EmergencyExit(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
class ErrantStory(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _ErrantStory(_BasicScraper):
|
||||
latestUrl = 'http://www.errantstory.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
|
||||
|
|
|
@ -58,10 +58,19 @@ class KillerKomics(_BasicScraper):
|
|||
help = 'Index format: strip-name'
|
||||
|
||||
|
||||
class Kofightclub(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _Kofightclub(_BasicScraper):
|
||||
latestUrl = 'http://www.kofightclub.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(\.\./images/\d+[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'((?:http://www\.kofightclub\.com)?/d/\d+\.html)')
|
||||
+ tagre("img", "alt", "Previous comic"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class KuroShouri(_BasicScraper):
|
||||
latestUrl = 'http://kuroshouri.com/'
|
||||
stripUrl = latestUrl + '?webcomic_post=%s'
|
||||
imageSearch = compile(tagre("img", "src", r"(http://kuroshouri\.com/wp-content/webcomic/kuroshouri/[^'\"]+)", quote="['\"]"))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://kuroshouri\.com/\?webcomic_post=[^"]+)', after="previous"))
|
||||
help = 'Index format: chapter-n-page-m'
|
||||
|
|
|
@ -72,14 +72,6 @@ class Melonpool(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
class MintCondition(_BasicScraper):
|
||||
latestUrl = 'http://www.mintconditioncomic.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.mintconditioncomic\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.mintconditioncomic\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class Misfile(_BasicScraper):
|
||||
latestUrl = 'http://www.misfile.com/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
|
|
|
@ -35,7 +35,7 @@ class OnTheEdge(_BasicScraper):
|
|||
|
||||
|
||||
class OneQuestion(_BasicScraper):
|
||||
latestUrl = 'http://www.onequestioncomic.com/'
|
||||
latestUrl = 'http://onequestioncomic.com/'
|
||||
stripUrl = latestUrl + 'comic.php?strip_id=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||
|
|
|
@ -142,7 +142,8 @@ class SPQRBlues(_BasicScraper):
|
|||
help = 'Index format: number'
|
||||
|
||||
|
||||
class StationV3(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _StationV3(_BasicScraper):
|
||||
latestUrl = 'http://www.stationv3.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.stationv3\.com/comics/[^"]+)'))
|
||||
|
@ -228,7 +229,8 @@ class Spamusement(_BasicScraper):
|
|||
starter = indirectStarter('http://spamusement.com/', prevSearch)
|
||||
|
||||
|
||||
class StrangeCandy(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _StrangeCandy(_BasicScraper):
|
||||
latestUrl = 'http://www.strangecandy.net/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
||||
|
|
|
@ -13,12 +13,12 @@ _attrs = dict(
|
|||
next = case_insensitive_re("next"),
|
||||
)
|
||||
_prevSearch = compile(_linkSearch +
|
||||
r'(?:<img[^>]+alt="[^"]*%(prev)s|<img[^>]+(?:button_previous|nav_prev4)\.|[^<]*%(back)s|\s*<<? (?:%(back)s|%(prev)s)|[^<]*%(prev)s)' % _attrs)
|
||||
r'(?:<img[^>]+alt="[^"]*%(prev)s|<img[^>]+(?:button_previous|naviButtons_Previous|nav_prev4|prev|previous|webbuttonback|PrevArrow)\.|[^<]*%(back)s|\s*<<? (?:%(back)s|%(prev)s)|[^<]*%(prev)s)' % _attrs)
|
||||
_nextSearch = compile(_linkSearch +
|
||||
r'(?:<img[^>]+alt="%(next)s|<img[^>]+(?:button_next|nav_next4)\.|\s*<?[^<]*%(next)s)' % _attrs)
|
||||
r'(?:<img[^>]+alt="%(next)s|<img[^>]+(?:button_next|naviButtons_Next|nav_next4|next|webbuttonnext-1|NextArrow)\.|\s*<?[^<]*%(next)s)' % _attrs)
|
||||
|
||||
def add(name, url, description, adult, bounce):
|
||||
classname = 'SmackJeeves/' + name
|
||||
classname = 'SmackJeeves_' + name
|
||||
|
||||
def modifier(pageUrl):
|
||||
if adult:
|
||||
|
@ -48,6 +48,7 @@ def add(name, url, description, adult, bounce):
|
|||
return "%s_%s" % (name, num)
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name = 'SmackJeeves/' + name,
|
||||
adult = adult,
|
||||
starter = _starter,
|
||||
prevUrlModifier = lambda cls, url: modifier(url),
|
||||
|
|
|
@ -71,7 +71,8 @@ class TinyKittenTeeth(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
|
||||
|
||||
|
||||
class TwoLumps(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _TwoLumps(_BasicScraper):
|
||||
latestUrl = 'http://www.twolumps.net/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
|
|
|
@ -26,7 +26,8 @@ class UnicornJelly(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class UserFriendly(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _UserFriendly(_BasicScraper):
|
||||
starter = bounceStarter('http://ars.userfriendly.org/cartoons/?mode=classic', compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
|
||||
stripUrl = 'http://ars.userfriendly.org/cartoons/?id=%s&mode=classic'
|
||||
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
|
||||
|
|
|
@ -57,7 +57,8 @@ class WotNow(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class WorldOfWarcraftEh(_BasicScraper):
|
||||
# XXX disallowed by robots.txt
|
||||
class _WorldOfWarcraftEh(_BasicScraper):
|
||||
latestUrl = 'http://woweh.com/'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
|
||||
|
|
|
@ -30,6 +30,7 @@ exclude_comics = [
|
|||
"Apartment_408_Full_Size", # broken images
|
||||
"Apple_Valley", # broken images
|
||||
"Apt_408_Minis", # broken images
|
||||
"Art_dump", # broken images
|
||||
"Atxs", # broken images
|
||||
"A_Word_Of_Wisdom", # broken images
|
||||
"Brathalla", # broken images
|
||||
|
@ -64,6 +65,7 @@ exclude_comics = [
|
|||
"Inside_OuT", # broken images
|
||||
"Journey_to_Raifina", # broken images
|
||||
"KALA_dan", # broken images
|
||||
"Kuro_Shouri", # page moved
|
||||
"Live_to_tell", # start page requires login
|
||||
"Locoma", # broken images
|
||||
"London_Underworld", # broken images
|
||||
|
@ -141,7 +143,7 @@ exclude_comics = [
|
|||
"Weave", # broken images
|
||||
"Weirdlings", # template error
|
||||
"Welcome_To_Border_City", # broken images
|
||||
"what_comes_first", # start page requires login
|
||||
"What_comes_first", # start page requires login
|
||||
"Within_Shadows", # broken images
|
||||
"Xolta", # start page requires login
|
||||
"XTIN__The_Dragons_Dream_World", # start page requires login
|
||||
|
|
|
@ -38,6 +38,7 @@ exclude_comics = [
|
|||
"RichardsPoorAlmanac", # missing images
|
||||
"SherpaAid", # comic unavailable
|
||||
"SparComics", # comic unavailable
|
||||
"SurvivingSingle", # comic unavailable
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ htmltemplate = """
|
|||
</head>
|
||||
<body>
|
||||
<p>Dosage test results from %(date)s</p>
|
||||
<p>Note that it is almost impossible to get a 100% OK test run
|
||||
due to temporary network failures or sites that are just updating
|
||||
the comic page.</p>
|
||||
<div id="container">
|
||||
%(content)s
|
||||
</div>
|
||||
|
@ -70,14 +73,16 @@ def get_content(filename):
|
|||
with open(filename, "r") as f:
|
||||
print("Tests parsed: 0", end=" ", file=sys.stderr)
|
||||
num_tests = 0
|
||||
add_reason = False
|
||||
for line in f:
|
||||
if line.startswith((". ", "F ")) and "test_comics" in line:
|
||||
add_reason = line.startswith("F ")
|
||||
num_tests += 1
|
||||
try:
|
||||
tests.append(get_test(line))
|
||||
add_reason = line.startswith("F ")
|
||||
except Exception as msg:
|
||||
print("WARNING:", msg, file=sys.stderr)
|
||||
continue
|
||||
elif add_reason and line.startswith(" E "):
|
||||
reason = line[3:].strip()
|
||||
tests[-1][-1] = reason
|
||||
|
|
|
@ -40,6 +40,8 @@ exclude_comics = [
|
|||
"MylifewithFel", # does not follow standard layout
|
||||
"NegativeZen", # does not follow standard layout
|
||||
"NightShot", # does not follow standard layout
|
||||
"NormalIsBoring", # does not follow standard layout
|
||||
"Okamirai", # images are 403 forbidden
|
||||
"OmnisSpriteShowcase", # missing images
|
||||
"OpticalDisarray", # does not follow standard layout
|
||||
"PicturesofYou", # does not follow standard layout
|
||||
|
@ -49,6 +51,7 @@ exclude_comics = [
|
|||
"Ribon", # does not follow standard layout
|
||||
"SecretSanta2011", # missing images
|
||||
"ShinkaTheLastEevee", # does not follow standard layout
|
||||
"SimplePixel", # does not follow standard layout
|
||||
"SJArtCollab", # missing images
|
||||
"SlightlyDifferent", # missing images
|
||||
"TheAfterSubtract", # does not follow standard layout
|
||||
|
|
|
@ -30,6 +30,7 @@ exclude_comics = [
|
|||
"JamesBond", # not a comic
|
||||
"Men", # not a comic
|
||||
"NEA", # not a comic
|
||||
"PeanutsPortuguese", # not found
|
||||
"Pets", # not a comic
|
||||
"SundayOnly", # not a comic
|
||||
"WebExclusive", # not a comic
|
||||
|
|
Loading…
Reference in a new issue