dryan · amrael · Apr 3, 2017
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 argparse==1.2.1
 PyYAML==3.10
 beautifulsoup4==4.2.0
+six==1.10.0
diff --git a/tests.py b/tests.py
@@ -1,11 +1,12 @@
 # encoding=utf-8
 
+import six
 import twitter_text, sys, os, json, argparse, re
 from twitter_text.unicode import force_unicode
 
 narrow_build = True
 try:
-    unichr(0x20000)
+    six.unichr(0x20000)
     narrow_build = False
 except:
     pass
@@ -177,4 +178,4 @@ def assert_equal(result, test):
 
 sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
 sys.stdout.flush()
-sys.exit(os.EX_OK)
+sys.exit(os.EX_OK)
diff --git a/twitter_text/autolink.py b/twitter_text/autolink.py
@@ -1,5 +1,5 @@
 # encoding=utf-8
-
+from __future__ import unicode_literals
 import re, cgi
 
 from twitter_text.regex import REGEXEN
@@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
             return self.text
 
         # NOTE deprecate these attributes not options keys in options hash, then use html_attrs
-        options = dict(DEFAULT_OPTIONS.items() + options.items())
+        options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
         options['html_attrs'] = self._extract_html_attrs_from_options(options)
         if not options.get('suppress_no_follow', False):
             options['html_attrs']['rel'] = "nofollow"
@@ -302,16 +302,16 @@ def _link_url_with_entity(self, entity, options = {}):
         For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
         For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
         """
-        display_url = entity.get('display_url').decode('utf-8')
+        display_url = entity.get('display_url')
         expanded_url = entity.get('expanded_url')
         invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)
 
-        display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
+        display_url_sans_ellipses = re.sub(r'…', '', display_url)
 
         if expanded_url.find(display_url_sans_ellipses) > -1:
             before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
-            preceding_ellipsis = re.search(ur'\A…', display_url)
-            following_ellipsis = re.search(ur'…\z', display_url)
+            preceding_ellipsis = re.search(r'\A…', display_url)
+            following_ellipsis = re.search(r'…\Z', display_url)
             if preceding_ellipsis is not None:
                 preceding_ellipsis = preceding_ellipsis.group()
             else:
@@ -344,7 +344,7 @@ def _link_url_with_entity(self, entity, options = {}):
             #   …
             # </span>
 
-            return u"<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
+            return "<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
         else:
             return self._html_escape(display_url)
 
@@ -356,13 +356,13 @@ def _link_to_hashtag(self, entity, chars, options = {}):
         if REGEXEN['rtl_chars'].search(hashtag):
             hashtag_class += ' rtl'
 
-        href = options.get('hashtag_url_transform', lambda ht: u'%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
+        href = options.get('hashtag_url_transform', lambda ht: '%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
 
         html_attrs = {}
         html_attrs.update(options.get('html_attrs', {}))
         html_attrs = {
             'class':    hashtag_class,
-            'title':    u'#%s' % hashtag,
+            'title':    '#%s' % hashtag,
         }
 
         link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href, html_attrs, options)
@@ -372,19 +372,19 @@ def _link_to_cashtag(self, entity, chars, options = {}):
         dollar = chars[entity['indices'][0]]
         cashtag = entity['cashtag']
 
-        href = options.get('cashtag_url_transform', lambda ct: u'%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
+        href = options.get('cashtag_url_transform', lambda ct: '%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
 
         html_attrs = {
             'class': options.get('cashtag_class'),
-            'title': u'$%s' % cashtag
+            'title': '$%s' % cashtag
         }
         html_attrs.update(options.get('html_attrs', {}))
 
         link = self._link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
         return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
 
     def _link_to_screen_name(self, entity, chars, options = {}):
-        name = u'%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
+        name = '%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
         chunk = options.get('link_text_transform', default_transform)(entity, name)
         name = name.lower()
 
@@ -395,30 +395,30 @@ def _link_to_screen_name(self, entity, chars, options = {}):
             del(html_attrs['title'])
 
         if entity.get('list_slug') and not options.get('supress_lists'):
-            href = options.get('list_url_transform', lambda sn: u'%s%s' % (options.get('list_url_base'), sn))(name)
+            href = options.get('list_url_transform', lambda sn: '%s%s' % (options.get('list_url_base'), sn))(name)
             html_attrs['class'] = options.get('list_class')
         else:
-            href = options.get('username_url_transform', lambda sn: u'%s%s' % (options.get('username_url_base'), sn))(name)
+            href = options.get('username_url_transform', lambda sn: '%s%s' % (options.get('username_url_base'), sn))(name)
             html_attrs['class'] = options.get('username_class')
 
         link = self._link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
         return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
 
     def _link_to_text_with_symbol(self, entity, symbol, text, href, attributes = {}, options = {}):
-        tagged_symbol = u'<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
+        tagged_symbol = '<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
         text = self._html_escape(text)
-        tagged_text = u'<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
+        tagged_text = '<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
         if options.get('username_include_symbol') or not REGEXEN['at_signs'].match(symbol):
-            return u'%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
+            return '%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
         else:
-            return u'%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
+            return '%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
 
     def _link_to_text(self, entity, text, href, attributes = {}, options = {}):
         attributes['href'] = href
-        if options.get('link_attributes_transform'):
-            attributes = options.get('link_attributes_transform')(entity, attributes)
+        if options.get('link_attribute_transform'):
+            attributes = options.get('link_attribute_transform')(entity, attributes)
         text = options.get('link_text_transform', default_transform)(entity, text)
-        return u'<a %s>%s</a>' % (self._tag_attrs(attributes), text)
+        return '<a %s>%s</a>' % (self._tag_attrs(attributes), text)
 
     def _tag_attrs(self, attributes = {}):
         attrs = []
@@ -428,7 +428,7 @@ def _tag_attrs(self, attributes = {}):
                 attrs.append(key)
                 continue
             if type(value) == list:
-                value = u' '.join(value)
-            attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))
+                value = ' '.join(value)
+            attrs.append('%s="%s"' % (self._html_escape(key), self._html_escape(value)))
 
-        return u' '.join(attrs)
+        return ' '.join(attrs)
diff --git a/twitter_text/highlighter.py b/twitter_text/highlighter.py
@@ -1,15 +1,15 @@
 # encoding=utf-8
-
+from __future__ import unicode_literals
 import re
-from HTMLParser import HTMLParser
+from six.moves import html_parser
 
 from twitter_text.regex import UNICODE_SPACES
 from twitter_text.unicode import force_unicode
 
 DEFAULT_HIGHLIGHT_TAG = 'em'
 
 # from http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
-class MLStripper(HTMLParser):
+class MLStripper(html_parser.HTMLParser):
     def __init__(self):
         self.reset()
         self.fed = []
@@ -34,14 +34,14 @@ def hit_highlight(self, hits = [], **kwargs):
 
         if not hits and kwargs.get('query'):
             stripped_text   =   strip_tags(self.text)
-            for match in re.finditer(ur'%s' % kwargs.get('query'), stripped_text):
+            for match in re.finditer(r'%s' % kwargs.get('query'), stripped_text):
                 hits.append(match.span())
 
         if hits and not type(hits) == list:
             raise Exception('The syntax for the hit_highlight method has changed. You must pass in a list of lists containing the indices of the strings you want to match.')
 
         tag_name = kwargs.get('tag', DEFAULT_HIGHLIGHT_TAG)
-        tags = [u'<%s>' % tag_name, u'</%s>' % tag_name]
+        tags = ['<%s>' % tag_name, '</%s>' % tag_name]
 
         text = self.text
         chunks = re.split(r'[<>]', text)
@@ -58,7 +58,7 @@ def hit_highlight(self, hits = [], **kwargs):
                 if index % 2:
                     # we're inside a <tag>
                     continue
-                chunk_start = len(u''.join(text_chunks[0:index / 2]))
+                chunk_start = len(''.join(text_chunks[0:index / 2]))
                 chunk_end = chunk_start + len(chunk)
                 if hit_start >= chunk_start and hit_start < chunk_end:
                     chunk = chunk[:hit_start - chunk_start] + tags[0] + chunk[hit_start - chunk_start:]
@@ -76,8 +76,8 @@ def hit_highlight(self, hits = [], **kwargs):
         for index, chunk in enumerate(chunks):
             if index % 2:
                 # we're inside a <tag>
-                result.append(u'<%s>' % chunk)
+                result.append('<%s>' % chunk)
             else:
                 result.append(chunk)
-        self.text = u''.join(result)
-        return self.text
+        self.text = ''.join(result)
+        return self.text