Code cleanup.
This commit is contained in:
parent
389ae838b8
commit
afa93b498b
7 changed files with 39 additions and 79 deletions
|
@ -7,10 +7,9 @@ from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import getPageContent, asciify, unescape, tagre
|
from dosagelib.util import getPageContent, asciify, unescape, tagre
|
||||||
from scriptutil import contains_case_insensitive, capfirst
|
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -37,30 +36,22 @@ def handle_url(url, res):
|
||||||
continue
|
continue
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
res[name] = url
|
res[name] = url
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
# store info in a dictionary {name -> shortname}
|
# store info in a dictionary {name -> shortname}
|
||||||
res = {}
|
res = {}
|
||||||
handle_url('http://www.creators.com/comics/cat-seeall.html', res)
|
handle_url('http://www.creators.com/comics/cat-seeall.html', res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def print_results(args):
|
def print_results(args):
|
||||||
"""Print comics."""
|
"""Print comics."""
|
||||||
with open(json_file, "rb") as f:
|
for name, url in sorted(load_result(json_file).items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, url in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
print("add(%r, %r)" % (str(name), str(url)))
|
print("add(%r, %r)" % (str(name), str(url)))
|
||||||
|
|
|
@ -7,10 +7,9 @@ from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import tagre, getPageContent, unquote, unescape, asciify
|
from dosagelib.util import tagre, getPageContent, unquote, unescape, asciify
|
||||||
from scriptutil import contains_case_insensitive, capfirst
|
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -167,7 +166,7 @@ def handle_url(url, url_matcher, num_matcher, res):
|
||||||
name = capfirst(asciify(path))
|
name = capfirst(asciify(path))
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
|
@ -181,12 +180,6 @@ def handle_url(url, url_matcher, num_matcher, res):
|
||||||
res[name] = (path, num)
|
res[name] = (path, num)
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
base = "http://www.drunkduck.com/search/?page=%d&search=&type=0&type=1&last_update="
|
base = "http://www.drunkduck.com/search/?page=%d&search=&type=0&type=1&last_update="
|
||||||
|
@ -200,14 +193,12 @@ def get_results():
|
||||||
for i in range(1, result_pages + 1):
|
for i in range(1, result_pages + 1):
|
||||||
print(i, file=sys.stderr, end=" ")
|
print(i, file=sys.stderr, end=" ")
|
||||||
handle_url(base % i, href, num, res)
|
handle_url(base % i, href, num, res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def print_results(min_strips):
|
def print_results(min_strips):
|
||||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||||
with open(json_file, "rb") as f:
|
for name, entry in sorted(load_result(json_file).items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, entry in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
path, num = entry
|
path, num = entry
|
||||||
|
|
|
@ -7,11 +7,10 @@ from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import tagre, getPageContent, asciify, unescape
|
from dosagelib.util import tagre, getPageContent, asciify, unescape
|
||||||
from dosagelib.scraper import get_scrapers
|
from dosagelib.scraper import get_scrapers
|
||||||
from scriptutil import contains_case_insensitive, capfirst
|
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -59,17 +58,11 @@ def handle_url(url, res):
|
||||||
continue
|
continue
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
res[name] = shortname
|
res[name] = shortname
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
# store info in a dictionary {name -> shortname}
|
# store info in a dictionary {name -> shortname}
|
||||||
|
@ -77,7 +70,7 @@ def get_results():
|
||||||
handle_url('http://www.gocomics.com/features', res)
|
handle_url('http://www.gocomics.com/features', res)
|
||||||
handle_url('http://www.gocomics.com/explore/editorial_list', res)
|
handle_url('http://www.gocomics.com/explore/editorial_list', res)
|
||||||
handle_url('http://www.gocomics.com/explore/sherpa_list', res)
|
handle_url('http://www.gocomics.com/explore/sherpa_list', res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def has_creators_comic(name):
|
def has_creators_comic(name):
|
||||||
|
@ -91,9 +84,7 @@ def has_creators_comic(name):
|
||||||
|
|
||||||
def print_results(args):
|
def print_results(args):
|
||||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||||
with open(json_file, "rb") as f:
|
for name, shortname in sorted(load_result(json_file).items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, shortname in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
if has_creators_comic(name):
|
if has_creators_comic(name):
|
||||||
|
|
|
@ -7,11 +7,10 @@ from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import getPageContent, asciify, unescape, tagre
|
from dosagelib.util import getPageContent, asciify, unescape, tagre
|
||||||
from dosagelib.scraper import get_scrapers
|
from dosagelib.scraper import get_scrapers
|
||||||
from scriptutil import contains_case_insensitive, capfirst
|
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -379,7 +378,7 @@ def handle_url(url, res):
|
||||||
continue
|
continue
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
# find out how many images this comic has
|
# find out how many images this comic has
|
||||||
end = match.end()
|
end = match.end()
|
||||||
|
@ -391,12 +390,6 @@ def handle_url(url, res):
|
||||||
res[name] = (url_overrides.get(name, url), num)
|
res[name] = (url_overrides.get(name, url), num)
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
# store info in a dictionary {name -> shortname}
|
# store info in a dictionary {name -> shortname}
|
||||||
|
@ -404,7 +397,7 @@ def get_results():
|
||||||
base = 'http://guide.comicgenesis.com/Keenspace_%s.html'
|
base = 'http://guide.comicgenesis.com/Keenspace_%s.html'
|
||||||
for c in '0ABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
for c in '0ABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||||
handle_url(base % c, res)
|
handle_url(base % c, res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def has_comic(name):
|
def has_comic(name):
|
||||||
|
@ -420,9 +413,7 @@ def has_comic(name):
|
||||||
def print_results(args):
|
def print_results(args):
|
||||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||||
min_comics = int(args[0])
|
min_comics = int(args[0])
|
||||||
with open(json_file, "rb") as f:
|
for name, entry in sorted(load_result(json_file).items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, entry in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
url, num = entry
|
url, num = entry
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
def contains_case_insensitive(adict, akey):
|
def contains_case_insensitive(adict, akey):
|
||||||
|
"""Check if key is in adict. The search is case insensitive."""
|
||||||
for key in adict:
|
for key in adict:
|
||||||
if key.lower() == akey.lower():
|
if key.lower() == akey.lower():
|
||||||
return True
|
return True
|
||||||
|
@ -11,6 +12,7 @@ def contains_case_insensitive(adict, akey):
|
||||||
|
|
||||||
_tagre = re.compile(r"<.+?>")
|
_tagre = re.compile(r"<.+?>")
|
||||||
def remove_html_tags(text):
|
def remove_html_tags(text):
|
||||||
|
"""Remove all HTML tags from text."""
|
||||||
return _tagre.sub("", text)
|
return _tagre.sub("", text)
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,6 +25,18 @@ def capfirst(text):
|
||||||
|
|
||||||
_ws = re.compile(r"\s+")
|
_ws = re.compile(r"\s+")
|
||||||
def compact_whitespace(text):
|
def compact_whitespace(text):
|
||||||
|
"""Compact all subsequent whitespace to a single space."""
|
||||||
if not text:
|
if not text:
|
||||||
return text
|
return text
|
||||||
return _ws.sub(" ", text)
|
return _ws.sub(" ", text)
|
||||||
|
|
||||||
|
|
||||||
|
def save_result(res, json_file):
|
||||||
|
"""Save result to file."""
|
||||||
|
with open(json_file, 'wb') as f:
|
||||||
|
json.dump(res, f, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
def load_result(json_file):
|
||||||
|
with open(json_file, "rb") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
|
@ -8,11 +8,10 @@ import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import urlparse
|
import urlparse
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import getPageContent, asciify, unescape, tagre, unquote
|
from dosagelib.util import getPageContent, asciify, unescape, tagre, unquote
|
||||||
from dosagelib.scraper import get_scrapers
|
from dosagelib.scraper import get_scrapers
|
||||||
from scriptutil import contains_case_insensitive, remove_html_tags, capfirst, compact_whitespace
|
from scriptutil import contains_case_insensitive, remove_html_tags, capfirst, compact_whitespace, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -231,7 +230,7 @@ def handle_url(url, res):
|
||||||
continue
|
continue
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
# find out how many images this comic has
|
# find out how many images this comic has
|
||||||
end = match.end()
|
end = match.end()
|
||||||
|
@ -270,12 +269,6 @@ def handle_url(url, res):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
base = "http://www.smackjeeves.com/search.php?submit=Search+for+Webcomics&search_mode=webcomics&comic_title=&special=all&last_update=3&style_all=on&genre_all=on&format_all=on&sort_by=2&start=%d"
|
base = "http://www.smackjeeves.com/search.php?submit=Search+for+Webcomics&search_mode=webcomics&comic_title=&special=all&last_update=3&style_all=on&genre_all=on&format_all=on&sort_by=2&start=%d"
|
||||||
|
@ -287,7 +280,7 @@ def get_results():
|
||||||
for i in range(0, result_pages):
|
for i in range(0, result_pages):
|
||||||
print(i+1, file=sys.stderr, end=" ")
|
print(i+1, file=sys.stderr, end=" ")
|
||||||
handle_url(base % (i*12), res)
|
handle_url(base % (i*12), res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def has_comic(name):
|
def has_comic(name):
|
||||||
|
@ -302,9 +295,7 @@ def has_comic(name):
|
||||||
def print_results(args):
|
def print_results(args):
|
||||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||||
min_comics = int(args[0])
|
min_comics = int(args[0])
|
||||||
with open(json_file, "rb") as f:
|
for name, entry in sorted(load_result(json_file).items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, entry in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
url, num, desc, adult, bounce = entry
|
url, num, desc, adult, bounce = entry
|
||||||
|
|
|
@ -7,11 +7,10 @@ from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from dosagelib.util import getPageContent, asciify, unescape
|
from dosagelib.util import getPageContent, asciify, unescape
|
||||||
from dosagelib.scraper import get_scrapers
|
from dosagelib.scraper import get_scrapers
|
||||||
from scriptutil import contains_case_insensitive, capfirst
|
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
|
@ -55,23 +54,17 @@ def handle_url(url, res):
|
||||||
continue
|
continue
|
||||||
if contains_case_insensitive(res, name):
|
if contains_case_insensitive(res, name):
|
||||||
# we cannot handle two comics that only differ in case
|
# we cannot handle two comics that only differ in case
|
||||||
print("WARN: skipping possible duplicate", name, file=sys.stderr)
|
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
res[name] = shortname
|
res[name] = shortname
|
||||||
|
|
||||||
|
|
||||||
def save_result(res):
|
|
||||||
"""Save result to file."""
|
|
||||||
with open(json_file, 'wb') as f:
|
|
||||||
json.dump(res, f, sort_keys=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
# store info in a dictionary {name -> shortname}
|
# store info in a dictionary {name -> shortname}
|
||||||
res = {}
|
res = {}
|
||||||
handle_url('http://www.universaluclick.com/comics/list', res)
|
handle_url('http://www.universaluclick.com/comics/list', res)
|
||||||
save_result(res)
|
save_result(res, json_file)
|
||||||
|
|
||||||
|
|
||||||
def has_comic(name):
|
def has_comic(name):
|
||||||
|
@ -86,9 +79,7 @@ def has_comic(name):
|
||||||
|
|
||||||
def print_results(args):
|
def print_results(args):
|
||||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||||
with open(json_file, "rb") as f:
|
for name, shortname in sorted(load_result().items()):
|
||||||
comics = json.load(f)
|
|
||||||
for name, shortname in sorted(comics.items()):
|
|
||||||
if name in exclude_comics:
|
if name in exclude_comics:
|
||||||
continue
|
continue
|
||||||
if has_comic(name):
|
if has_comic(name):
|
||||||
|
|
Loading…
Reference in a new issue