Skip to content

Commit adb71bd

Browse files
committed
Fix unicode handling in text parsing, which threw fatal exceptions
1 parent bae9d2d commit adb71bd

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

slack_parse.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import json
2020
import re
2121
import sys
22+
import unicodedata
2223

2324
# Import the user id <-> nick dict
2425
users = {}
@@ -82,9 +83,11 @@ def parse_line(line):
8283

8384
def replace_with_special(text):
8485
"Substitutes user ids with nicks, channel ids with channels, escaped chars"
86+
unicode_pass = \
87+
unicodedata.normalize("NFKD", text).encode("ascii", "ignore")
8588
users_pass = re.sub(r"<@(U[0-9A-Z]{8})(\|[^>]*)?>",
8689
lambda x: users[x.group(1)],
87-
text)
90+
unicode_pass)
8891
channels_pass = re.sub(r"<#(C[0-9A-Z]{8})>",
8992
lambda x: channels[x.group(1)],
9093
users_pass)

0 commit comments

Comments
 (0)