From e4794ed9296229cdcae279facf54381c745446d0 Mon Sep 17 00:00:00 2001 From: Grine Alima <33562691+GIMALima@users.noreply.github.com> Date: Sun, 14 Apr 2019 17:24:10 +0200 Subject: [PATCH] Update tokenizer.py --- tokenizer/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokenizer/tokenizer.py b/tokenizer/tokenizer.py index 63fc61f..44b060b 100644 --- a/tokenizer/tokenizer.py +++ b/tokenizer/tokenizer.py @@ -57,7 +57,7 @@ #my emoticons, borrowed & expanded from https://github.com/g-c-k/idiml/blob/master/predict/src/main/resources/data/emoticons.txt EMOTICONS = [] -with open(EMOTICONS_FILE, 'r') as f: +with open(EMOTICONS_FILE, 'r', encoding="utf-8") as f: for line in f: item = line.rstrip('\n') item = re.escape(item)