diff --git a/scripts/keenspot.py b/scripts/keenspot.py
index 14ed84a56..c8376b9fc 100755
--- a/scripts/keenspot.py
+++ b/scripts/keenspot.py
@@ -22,7 +22,7 @@ url_matcher = re.compile(
r"(?:)?([^<]+)(?:)?"
)
descurl_matcher = re.compile(r"(desc/[^']+\.html)")
-desc_matcher = re.compile(r'
(.+)(?:)?', re.DOTALL)
+desc_matcher = re.compile(ur'
(.+)(?:)?', re.DOTALL)
# names of comics to exclude
exclude_comics = [
@@ -89,10 +89,10 @@ def get_description(url, session):
data, baseUrl = getPageContent(url, session)
except IOError as msg:
print("ERROR:", msg, file=sys.stderr)
- return ""
+ return u""
mo = desc_matcher.search(data)
if not mo:
- print(data)
+ print("ERROR:", repr(data))
return format_description(mo.group(1))
diff --git a/scripts/scriptutil.py b/scripts/scriptutil.py
index 9dcfde477..00b92732d 100644
--- a/scripts/scriptutil.py
+++ b/scripts/scriptutil.py
@@ -14,7 +14,7 @@ def contains_case_insensitive(adict, akey):
_tagre = re.compile(r"<.+?>")
def remove_html_tags(text):
"""Remove all HTML tags from text."""
- return _tagre.sub("", text)
+ return _tagre.sub(u"", text)
def capfirst(text):
@@ -24,12 +24,12 @@ def capfirst(text):
return text[0].upper() + text[1:]
-_ws = re.compile(r"\s+")
+_ws = re.compile(ur"\s+")
def compact_whitespace(text):
"""Compact all subsequent whitespace to a single space."""
if not text:
return text
- return _ws.sub(" ", text)
+ return _ws.sub(u" ", text)
def save_result(res, json_file):
@@ -52,7 +52,7 @@ def truncate_name(text):
def format_name(text):
"""Format a comic name."""
name = unescape(text)
- name = asciify(name.replace('&', 'And').replace('@', 'At'))
+ name = asciify(name.replace(u'&', u'And').replace(u'@', u'At'))
name = capfirst(name)
return name