Updated exclusions.
This commit is contained in:
parent
fcdc67ef92
commit
b25e9e68ec
5 changed files with 47 additions and 42 deletions
|
@ -19,6 +19,7 @@ url_matcher = re.compile(r'<li><b><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
|
"HagartheHorrible", # better source available
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,64 +23,29 @@ activity_matcher = re.compile(r'<b>Activity status:</b> <span class="comicinfo">
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
"6tsc", # unsuitable navigation
|
|
||||||
"Archininja", # unsuitable navigation
|
|
||||||
"BoozerandStoner", # unsuitable navigation
|
|
||||||
"Fuzzylittleninjas", # unsuitable navigation
|
|
||||||
"Kaze", # unsuitable navigation
|
|
||||||
"Sweetcheeriosandorangejuice", # unsuitable navigation
|
|
||||||
"Coolstorybro", # unsuitable navigation
|
|
||||||
"BUXY", # unsuitable navigation
|
|
||||||
"Icannotdraw", # unsuitable navigation
|
|
||||||
"ProjectX", # unsuitable navigation
|
|
||||||
"Insectia", # unsuitable navigation
|
|
||||||
"Oeight", # unsuitable navigation
|
|
||||||
"ReadershipofOne", # unsuitable navigation
|
|
||||||
"Haywire", # unsuitable navigation
|
|
||||||
"Immortalfool", # unsuitable navigation
|
|
||||||
"BlockTales", # unsuitable navigation
|
|
||||||
"Goldrush", # unsuitable navigation
|
|
||||||
"Theredeemers", # unsuitable navigation
|
|
||||||
"Lovekillsslowly", # unsuitable navigation
|
|
||||||
"Dotcomic", # unsuitable navigation
|
|
||||||
"Democomix", # unsuitable navigation
|
|
||||||
"Crepusculars", # unsuitable navigation
|
|
||||||
"Xenozone", # unsuitable navigation
|
|
||||||
"Rocr", # unsuitable navigation
|
|
||||||
"Mytvisevil", # unsuitable navigation
|
|
||||||
"Ofpf", # unsuitable navigation
|
|
||||||
"GRIND", # unsuitable navigation
|
|
||||||
"Tezzleandzeek", # unsuitable navigation
|
|
||||||
"Kmlssticks", # unsuitable navigation
|
|
||||||
"Bidoof", # unsuitable navigation
|
|
||||||
"Nemution", # unsuitable navigation
|
|
||||||
"Colorforce", # unsuitable navigation
|
|
||||||
"CtrlZ", # unsuitable navigation
|
|
||||||
"Monobow", # unsuitable navigation
|
|
||||||
"Mars", # unsuitable navigation
|
|
||||||
"ThornsInOurSide", # unsuitable navigation
|
|
||||||
"Longandexcitingjourney", # unsuitable navigation
|
|
||||||
"Unichat", # unsuitable navigation
|
|
||||||
"Lately", # unsuitable navigation
|
|
||||||
"Thestickmen", # unsuitable navigation
|
|
||||||
"Horizongakuen", # unsuitable navigation
|
|
||||||
"12yearsofmissj", # unsuitable navigation
|
"12yearsofmissj", # unsuitable navigation
|
||||||
"3DGlasses", # unsuitable navigation
|
"3DGlasses", # unsuitable navigation
|
||||||
|
"6tsc", # unsuitable navigation
|
||||||
"Abyss", # unsuitable navigation
|
"Abyss", # unsuitable navigation
|
||||||
"Actdr", # unsuitable navigation
|
"Actdr", # unsuitable navigation
|
||||||
"Aerosol", # unsuitable navigation
|
"Aerosol", # unsuitable navigation
|
||||||
"Alienirony", # unsuitable navigation
|
"Alienirony", # unsuitable navigation
|
||||||
"AngelguardianEspanol", # unsuitable navigation
|
"AngelguardianEspanol", # unsuitable navigation
|
||||||
"Angryalien", # unsuitable navigation
|
"Angryalien", # unsuitable navigation
|
||||||
|
"Archininja", # unsuitable navigation
|
||||||
"Arveytoonz", # unsuitable navigation
|
"Arveytoonz", # unsuitable navigation
|
||||||
"AttackoftheRobofemoids", # unsuitable navigation
|
"AttackoftheRobofemoids", # unsuitable navigation
|
||||||
"Bedlam", # unsuitable navigation
|
"Bedlam", # unsuitable navigation
|
||||||
|
"Bidoof", # unsuitable navigation
|
||||||
|
"BlockTales", # unsuitable navigation
|
||||||
"Bobcomix", # unsuitable navigation
|
"Bobcomix", # unsuitable navigation
|
||||||
"Bonejangles", # unsuitable navigation
|
"Bonejangles", # unsuitable navigation
|
||||||
|
"BoozerandStoner", # unsuitable navigation
|
||||||
"Boyaurus", # unsuitable navigation
|
"Boyaurus", # unsuitable navigation
|
||||||
"Brainfood", # unsuitable navigation
|
"Brainfood", # unsuitable navigation
|
||||||
"Bromosworld", # unsuitable navigation
|
"Bromosworld", # unsuitable navigation
|
||||||
"BulletMythology", # unsuitable navigation
|
"BulletMythology", # unsuitable navigation
|
||||||
|
"BUXY", # unsuitable navigation
|
||||||
"CafeGruesome", # unsuitable navigation
|
"CafeGruesome", # unsuitable navigation
|
||||||
"Chanpuru", # unsuitable navigation
|
"Chanpuru", # unsuitable navigation
|
||||||
"Christmaswithmaddog", # unsuitable navigation
|
"Christmaswithmaddog", # unsuitable navigation
|
||||||
|
@ -90,9 +55,16 @@ exclude_comics = [
|
||||||
"ConsequencesOfChoice", # unsuitable navigation
|
"ConsequencesOfChoice", # unsuitable navigation
|
||||||
"CoolYuleComics", # unsuitable navigation
|
"CoolYuleComics", # unsuitable navigation
|
||||||
"Crossworldsnexus", # unsuitable navigation
|
"Crossworldsnexus", # unsuitable navigation
|
||||||
|
"Colorforce", # unsuitable navigation
|
||||||
|
"Coolstorybro", # unsuitable navigation
|
||||||
|
"Crepusculars", # unsuitable navigation
|
||||||
|
"CtrlZ", # unsuitable navigation
|
||||||
"DeadNight", # unsuitable navigation
|
"DeadNight", # unsuitable navigation
|
||||||
|
"Democomix", # unsuitable navigation
|
||||||
"Dinosaurkingdom", # unsuitable navigation
|
"Dinosaurkingdom", # unsuitable navigation
|
||||||
|
"Dotcomic", # unsuitable navigation
|
||||||
"Droned", # unsuitable navigation
|
"Droned", # unsuitable navigation
|
||||||
|
"Effingukookoo", # unsuitable navigation
|
||||||
"ErraticBeat", # unsuitable navigation
|
"ErraticBeat", # unsuitable navigation
|
||||||
"Evilbear", # unsuitable navigation
|
"Evilbear", # unsuitable navigation
|
||||||
"Ewmic", # unsuitable navigation
|
"Ewmic", # unsuitable navigation
|
||||||
|
@ -104,6 +76,7 @@ exclude_comics = [
|
||||||
"Frigginrandom", # unsuitable navigation
|
"Frigginrandom", # unsuitable navigation
|
||||||
"Frostfire", # unsuitable navigation
|
"Frostfire", # unsuitable navigation
|
||||||
"Furnerdy", # unsuitable navigation
|
"Furnerdy", # unsuitable navigation
|
||||||
|
"Fuzzylittleninjas", # unsuitable navigation
|
||||||
"Garfieldminusjon", # unsuitable navigation
|
"Garfieldminusjon", # unsuitable navigation
|
||||||
"Gatito", # unsuitable navigation
|
"Gatito", # unsuitable navigation
|
||||||
"Gbksayonara", # unsuitable navigation
|
"Gbksayonara", # unsuitable navigation
|
||||||
|
@ -111,6 +84,9 @@ exclude_comics = [
|
||||||
"Gratz", # unsuitable navigation
|
"Gratz", # unsuitable navigation
|
||||||
"Greygaroutopheavyartwork", # unsuitable navigation
|
"Greygaroutopheavyartwork", # unsuitable navigation
|
||||||
"GrimReaperSchool", # unsuitable navigation
|
"GrimReaperSchool", # unsuitable navigation
|
||||||
|
"Goldrush", # unsuitable navigation
|
||||||
|
"GRIND", # unsuitable navigation
|
||||||
|
"Haywire", # unsuitable navigation
|
||||||
"Hallodri", # unsuitable navigation
|
"Hallodri", # unsuitable navigation
|
||||||
"Harrysorehead", # unsuitable navigation
|
"Harrysorehead", # unsuitable navigation
|
||||||
"HazSci", # unsuitable navigation
|
"HazSci", # unsuitable navigation
|
||||||
|
@ -118,9 +94,13 @@ exclude_comics = [
|
||||||
"Herecomesskeeter", # unsuitable navigation
|
"Herecomesskeeter", # unsuitable navigation
|
||||||
"Holycowcomics", # unsuitable navigation
|
"Holycowcomics", # unsuitable navigation
|
||||||
"Houseescapeold", # unsuitable navigation
|
"Houseescapeold", # unsuitable navigation
|
||||||
|
"Horizongakuen", # unsuitable navigation
|
||||||
|
"Icannotdraw", # unsuitable navigation
|
||||||
"Ign", # unsuitable navigation
|
"Ign", # unsuitable navigation
|
||||||
"Illusionoftime", # unsuitable navigation
|
"Illusionoftime", # unsuitable navigation
|
||||||
"InsideOuT", # unsuitable navigation
|
"InsideOuT", # unsuitable navigation
|
||||||
|
"Immortalfool", # unsuitable navigation
|
||||||
|
"Insectia", # unsuitable navigation
|
||||||
"Jackitandfriends", # unsuitable navigation
|
"Jackitandfriends", # unsuitable navigation
|
||||||
"Jenffersshow5", # unsuitable navigation
|
"Jenffersshow5", # unsuitable navigation
|
||||||
"Johnsonsuperior", # unsuitable navigation
|
"Johnsonsuperior", # unsuitable navigation
|
||||||
|
@ -128,14 +108,20 @@ exclude_comics = [
|
||||||
"Journ", # unsuitable navigation
|
"Journ", # unsuitable navigation
|
||||||
"JourneyToRaifina", # unsuitable navigation
|
"JourneyToRaifina", # unsuitable navigation
|
||||||
"Junk", # unsuitable navigation
|
"Junk", # unsuitable navigation
|
||||||
|
"Kaze", # unsuitable navigation
|
||||||
|
"Kmlssticks", # unsuitable navigation
|
||||||
"KiLAiLO", # unsuitable navigation
|
"KiLAiLO", # unsuitable navigation
|
||||||
"Kingdomprettycure", # unsuitable navigation
|
"Kingdomprettycure", # unsuitable navigation
|
||||||
"Kmfe", # unsuitable navigation
|
"Kmfe", # unsuitable navigation
|
||||||
|
"Lately", # unsuitable navigation
|
||||||
"Legendoftheredphantom", # unsuitable navigation
|
"Legendoftheredphantom", # unsuitable navigation
|
||||||
"Littlephoenix", # unsuitable navigation
|
"Littlephoenix", # unsuitable navigation
|
||||||
"Llwhoelterran", # unsuitable navigation
|
"Llwhoelterran", # unsuitable navigation
|
||||||
"Lomeathandhuilii", # unsuitable navigation
|
"Lomeathandhuilii", # unsuitable navigation
|
||||||
|
"Longandexcitingjourney", # unsuitable navigation
|
||||||
|
"Lovekillsslowly", # unsuitable navigation
|
||||||
"Mannack", # unsuitable navigation
|
"Mannack", # unsuitable navigation
|
||||||
|
"Mars", # unsuitable navigation
|
||||||
"MaskoftheAryans", # unsuitable navigation
|
"MaskoftheAryans", # unsuitable navigation
|
||||||
"Megamaiden", # unsuitable navigation
|
"Megamaiden", # unsuitable navigation
|
||||||
"Minecraft2b2t", # unsuitable navigation
|
"Minecraft2b2t", # unsuitable navigation
|
||||||
|
@ -145,11 +131,16 @@ exclude_comics = [
|
||||||
"MoonlightValley", # unsuitable navigation
|
"MoonlightValley", # unsuitable navigation
|
||||||
"MurghComics", # unsuitable navigation
|
"MurghComics", # unsuitable navigation
|
||||||
"MVPL", # unsuitable navigation
|
"MVPL", # unsuitable navigation
|
||||||
|
"Monobow", # unsuitable navigation
|
||||||
|
"Mytvisevil", # unsuitable navigation
|
||||||
"Natao", # unsuitable navigation
|
"Natao", # unsuitable navigation
|
||||||
|
"Nemution", # unsuitable navigation
|
||||||
"NMG", # unsuitable navigation
|
"NMG", # unsuitable navigation
|
||||||
"Noche", # unsuitable navigation
|
"Noche", # unsuitable navigation
|
||||||
"Noprrkele", # unsuitable navigation
|
"Noprrkele", # unsuitable navigation
|
||||||
"Nothingfitsartblog", # unsuitable navigation
|
"Nothingfitsartblog", # unsuitable navigation
|
||||||
|
"Oeight", # unsuitable navigation
|
||||||
|
"Ofpf", # unsuitable navigation
|
||||||
"Old2g", # unsuitable navigation
|
"Old2g", # unsuitable navigation
|
||||||
"Outtolunch", # unsuitable navigation
|
"Outtolunch", # unsuitable navigation
|
||||||
"Parisel313", # unsuitable navigation
|
"Parisel313", # unsuitable navigation
|
||||||
|
@ -158,6 +149,8 @@ exclude_comics = [
|
||||||
"Pi5a", # unsuitable navigation
|
"Pi5a", # unsuitable navigation
|
||||||
"Pokemonwarpers", # unsuitable navigation
|
"Pokemonwarpers", # unsuitable navigation
|
||||||
"Princess", # unsuitable navigation
|
"Princess", # unsuitable navigation
|
||||||
|
"ProjectX", # unsuitable navigation
|
||||||
|
"ReadershipofOne", # unsuitable navigation
|
||||||
"Queenie", # unsuitable navigation
|
"Queenie", # unsuitable navigation
|
||||||
"Rain", # unsuitable navigation
|
"Rain", # unsuitable navigation
|
||||||
"Ratantia", # unsuitable navigation
|
"Ratantia", # unsuitable navigation
|
||||||
|
@ -167,6 +160,7 @@ exclude_comics = [
|
||||||
"Requiem", # unsuitable navigation
|
"Requiem", # unsuitable navigation
|
||||||
"Retrofiyora", # unsuitable navigation
|
"Retrofiyora", # unsuitable navigation
|
||||||
"Rexfordavenue", # unsuitable navigation
|
"Rexfordavenue", # unsuitable navigation
|
||||||
|
"Rocr", # unsuitable navigation
|
||||||
"S", # unsuitable navigation
|
"S", # unsuitable navigation
|
||||||
"Sandgate", # unsuitable navigation
|
"Sandgate", # unsuitable navigation
|
||||||
"Shadowstories", # unsuitable navigation
|
"Shadowstories", # unsuitable navigation
|
||||||
|
@ -180,6 +174,7 @@ exclude_comics = [
|
||||||
"StrangerThanFiction", # unsuitable navigation
|
"StrangerThanFiction", # unsuitable navigation
|
||||||
"SundaySmash", # unsuitable navigation
|
"SundaySmash", # unsuitable navigation
|
||||||
"Superproultimatewrestling", # unsuitable navigation
|
"Superproultimatewrestling", # unsuitable navigation
|
||||||
|
"Sweetcheeriosandorangejuice", # unsuitable navigation
|
||||||
"Synapticisms", # unsuitable navigation
|
"Synapticisms", # unsuitable navigation
|
||||||
"Talesofspoons", # unsuitable navigation
|
"Talesofspoons", # unsuitable navigation
|
||||||
"Terwilligers", # unsuitable navigation
|
"Terwilligers", # unsuitable navigation
|
||||||
|
@ -192,6 +187,12 @@ exclude_comics = [
|
||||||
"TOGM", # unsuitable navigation
|
"TOGM", # unsuitable navigation
|
||||||
"Townburgcity", # unsuitable navigation
|
"Townburgcity", # unsuitable navigation
|
||||||
"Tuhinaloota", # unsuitable navigation
|
"Tuhinaloota", # unsuitable navigation
|
||||||
|
"Tezzleandzeek", # unsuitable navigation
|
||||||
|
"Theredeemers", # unsuitable navigation
|
||||||
|
"Thestickmen", # unsuitable navigation
|
||||||
|
"Thingsthatannoyme", # unsuitable navigation
|
||||||
|
"ThornsInOurSide", # unsuitable navigation
|
||||||
|
"Unichat", # unsuitable navigation
|
||||||
"UFPA", # unsuitable navigation
|
"UFPA", # unsuitable navigation
|
||||||
"V4", # unsuitable navigation
|
"V4", # unsuitable navigation
|
||||||
"Verboten", # unsuitable navigation
|
"Verboten", # unsuitable navigation
|
||||||
|
@ -203,6 +204,7 @@ exclude_comics = [
|
||||||
"WindRiders", # unsuitable navigation
|
"WindRiders", # unsuitable navigation
|
||||||
"WitchesTeaParty", # unsuitable navigation
|
"WitchesTeaParty", # unsuitable navigation
|
||||||
"Woohooligan", # unsuitable navigation
|
"Woohooligan", # unsuitable navigation
|
||||||
|
"Xenozone", # unsuitable navigation
|
||||||
"XWingAlliance", # unsuitable navigation
|
"XWingAlliance", # unsuitable navigation
|
||||||
"Yppcomic", # unsuitable navigation
|
"Yppcomic", # unsuitable navigation
|
||||||
"Zeroeffort", # unsuitable navigation
|
"Zeroeffort", # unsuitable navigation
|
||||||
|
|
|
@ -39,6 +39,7 @@ exclude_comics = [
|
||||||
"OysterWar", # too few comics
|
"OysterWar", # too few comics
|
||||||
"PIGTIMES", # comic unavailable
|
"PIGTIMES", # comic unavailable
|
||||||
"PS", # comic unavailable
|
"PS", # comic unavailable
|
||||||
|
"RatchetAndSpin", # too few comics
|
||||||
"RichardsPoorAlmanac", # missing images
|
"RichardsPoorAlmanac", # missing images
|
||||||
"SherpaAid", # comic unavailable
|
"SherpaAid", # comic unavailable
|
||||||
"Slowpoke", # comic moved
|
"Slowpoke", # comic moved
|
||||||
|
|
|
@ -84,7 +84,7 @@ def handle_url(url, session, res):
|
||||||
|
|
||||||
|
|
||||||
def get_description(url, session):
|
def get_description(url, session):
|
||||||
"""Get comic stirp description."""
|
"""Get comic strip description."""
|
||||||
try:
|
try:
|
||||||
data, baseUrl = getPageContent(url, session)
|
data, baseUrl = getPageContent(url, session)
|
||||||
except IOError as msg:
|
except IOError as msg:
|
||||||
|
|
|
@ -34,6 +34,7 @@ exclude_comics = [
|
||||||
"GART", # does not follow standard layout
|
"GART", # does not follow standard layout
|
||||||
"GBAsCrib", # timeout
|
"GBAsCrib", # timeout
|
||||||
"HEARD", # missing images
|
"HEARD", # missing images
|
||||||
|
"Indigo", # broken domain name
|
||||||
"IwillbenapoSpamDump", # missing images
|
"IwillbenapoSpamDump", # missing images
|
||||||
"JennyHaniver", # does not follow standard layout
|
"JennyHaniver", # does not follow standard layout
|
||||||
"KiLAiLO", # does not follow standard layout
|
"KiLAiLO", # does not follow standard layout
|
||||||
|
|
Loading…
Reference in a new issue