dryan · glyph · Oct 23, 2017 · Oct 23, 2017 · Oct 23, 2017 · Oct 23, 2017
diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,5 @@
 Copyright (c) 2010, Daniel Ryan 
+Copyright (c) 2017, Glyph
 All rights reserved. 
 
 Redistribution and use in source and binary forms, with or without 

diff --git a/__init__.py b/__init__.py
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[wheel]
+universal = 1
diff --git a/setup.py b/setup.py
@@ -1,20 +1,23 @@
 from setuptools import setup, find_packages
- 
+
 setup(
-    name='twitter-text-py',
-    version='2.0.2',
-    description='A library for auto-converting URLs, mentions, hashtags, lists, etc. in Twitter text. Also does tweet validation and search term highlighting.',
-    author='Daniel Ryan',
-    author_email='[email protected]',
-    url='http://github.com/dryan/twitter-text-py',
+    name='twitter-text',
+    version='3.0',
+    description='A library for auto-converting URLs, mentions, hashtags, lists, etc. in Twitter text. Also does tweet validation and search term highlighting.  Fork of twitter-text-py, that supports python 3.  Originally by David Ryan, Py3 port by Glyph.',
+    author='Glyph',
+    author_email='[email protected]',
+    url='http://github.com/glyph/twitter-text-py',
     packages=find_packages(),
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Environment :: Web Environment',
+        'Environment :: Console',
         'Intended Audience :: Developers',
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 3',
         'Framework :: Django',
     ],
     include_package_data=True,

diff --git a/tests.py b/tests.py
@@ -2,6 +2,10 @@
 
 import twitter_text, sys, os, json, argparse, re
 from twitter_text.unicode import force_unicode
+try:
+    unichr
+except NameError:
+    unichr = chr
 
 narrow_build = True
 try:
@@ -34,25 +38,35 @@ def construct_yaml_str(self, node):
         raise Exception('You need to install BeautifulSoup to run the tests')
 
 def success(text):
-    return (u'\033[92m%s\033[0m\n' % text).encode('utf-8')
+    return (u'\033[92m%s\033[0m\n' % text)
 
 def error(text):
-    return (u'\033[91m%s\033[0m\n' % text).encode('utf-8')
+    return (u'\033[91m%s\033[0m\n' % text)
 
 attempted = 0
+passed = 0
+failed = 0
 
 def assert_equal_without_attribute_order(result, test, failure_message = None):
     global attempted
     attempted += 1
     # Beautiful Soup sorts the attributes for us so we can skip all the hoops the ruby version jumps through
-    assert BeautifulSoup(result) == BeautifulSoup(test.get('expected')), error(u'Test %d Failed: %s' % (attempted, test.get('description')))
+    actual = BeautifulSoup(result)
+    expected = BeautifulSoup(test.get('expected'))
+    assert actual == expected, error(u'Test %d Failed: %s (%s != %s)' % (attempted, test.get('description'),
+                                                                         actual, expected))
     sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
     sys.stdout.flush()
 
 def assert_equal(result, test):
     global attempted
     attempted += 1
-    assert result == test.get('expected'), error(u'\nTest %d Failed: %s%s' % (attempted, test.get('description'), u'\n%s' % test.get('hits') if test.get('hits') else ''))
+    expected = test.get('expected')
+    assert result == expected, error(u'\nTest %d Failed: %s%s (%s != %s)' % (
+        attempted, test.get('description'),
+        u'\n%s' % test.get('hits') if test.get('hits') else '',
+        result, expected
+    ))
     sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
     sys.stdout.flush()
 
@@ -72,27 +86,33 @@ def assert_equal(result, test):
             sys.stdout.write('Skipping: %s\n' % test.get('description'))
             sys.stdout.flush()
             continue
-        extractor = twitter_text.extractor.Extractor(test.get('text'))
-        if section == 'mentions':
-            assert_equal(extractor.extract_mentioned_screen_names(), test)
-        elif section == 'mentions_with_indices':
-            assert_equal(extractor.extract_mentioned_screen_names_with_indices(), test)
-        elif section == 'mentions_or_lists_with_indices':
-            assert_equal(extractor.extract_mentions_or_lists_with_indices(), test)
-        elif section == 'replies':
-            assert_equal(extractor.extract_reply_screen_name(), test)
-        elif section == 'urls':
-            assert_equal(extractor.extract_urls(), test)
-        elif section == 'urls_with_indices':
-            assert_equal(extractor.extract_urls_with_indices(), test)
-        elif section == 'hashtags':
-            assert_equal(extractor.extract_hashtags(), test)
-        elif section == 'cashtags':
-            assert_equal(extractor.extract_cashtags(), test)
-        elif section == 'hashtags_with_indices':
-            assert_equal(extractor.extract_hashtags_with_indices(), test)
-        elif section == 'cashtags_with_indices':
-            assert_equal(extractor.extract_cashtags_with_indices(), test)
+        try:
+            extractor = twitter_text.extractor.Extractor(test.get('text'))
+            if section == 'mentions':
+                assert_equal(extractor.extract_mentioned_screen_names(), test)
+            elif section == 'mentions_with_indices':
+                assert_equal(extractor.extract_mentioned_screen_names_with_indices(), test)
+            elif section == 'mentions_or_lists_with_indices':
+                assert_equal(extractor.extract_mentions_or_lists_with_indices(), test)
+            elif section == 'replies':
+                assert_equal(extractor.extract_reply_screen_name(), test)
+            elif section == 'urls':
+                assert_equal(extractor.extract_urls(), test)
+            elif section == 'urls_with_indices':
+                assert_equal(extractor.extract_urls_with_indices(), test)
+            elif section == 'hashtags':
+                assert_equal(extractor.extract_hashtags(), test)
+            elif section == 'cashtags':
+                assert_equal(extractor.extract_cashtags(), test)
+            elif section == 'hashtags_with_indices':
+                assert_equal(extractor.extract_hashtags_with_indices(), test)
+            elif section == 'cashtags_with_indices':
+                assert_equal(extractor.extract_cashtags_with_indices(), test)
+        except AssertionError as ae:
+            print(ae.args[0])
+            failed += 1
+        else:
+            passed += 1
 
 # autolink section
 autolink_file = open(os.path.join('twitter-text-conformance', 'autolink.yml'), 'r')
@@ -112,20 +132,26 @@ def assert_equal(result, test):
             sys.stdout.flush()
             continue
         autolink = twitter_text.autolink.Autolink(test.get('text'))
-        if section == 'usernames':
-            assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
-        elif section == 'cashtags':
-            assert_equal_without_attribute_order(autolink.auto_link_cashtags(autolink_options), test)
-        elif section == 'urls':
-            assert_equal_without_attribute_order(autolink.auto_link_urls(autolink_options), test)
-        elif section == 'hashtags':
-            assert_equal_without_attribute_order(autolink.auto_link_hashtags(autolink_options), test)
-        elif section == 'all':
-            assert_equal_without_attribute_order(autolink.auto_link(autolink_options), test)
-        elif section == 'lists':
-            assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
-        elif section == 'json':
-            assert_equal_without_attribute_order(autolink.auto_link_with_json(json.loads(test.get('json')), autolink_options), test)
+        try:
+            if section == 'usernames':
+                assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
+            elif section == 'cashtags':
+                assert_equal_without_attribute_order(autolink.auto_link_cashtags(autolink_options), test)
+            elif section == 'urls':
+                assert_equal_without_attribute_order(autolink.auto_link_urls(autolink_options), test)
+            elif section == 'hashtags':
+                assert_equal_without_attribute_order(autolink.auto_link_hashtags(autolink_options), test)
+            elif section == 'all':
+                assert_equal_without_attribute_order(autolink.auto_link(autolink_options), test)
+            elif section == 'lists':
+                assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
+            elif section == 'json':
+                assert_equal_without_attribute_order(autolink.auto_link_with_json(json.loads(test.get('json')), autolink_options), test)
+        except AssertionError as ae:
+            print(ae.args[0])
+            failed += 1
+        else:
+            passed += 1
 
 # hit_highlighting section
 hit_highlighting_file = open(os.path.join('twitter-text-conformance', 'hit_highlighting.yml'), 'r')
@@ -150,7 +176,10 @@ def assert_equal(result, test):
 try:
     validate_file = open(os.path.join('twitter-text-conformance', 'validate.yml'), 'r')
     validate_file_contents = validate_file.read()
-    validate_tests = yaml.load(re.sub(ur'\\n', '\n', validate_file_contents.encode('unicode-escape')))
+    validate_tests = yaml.load(
+        re.sub(u'\\\\n', '\n',
+               validate_file_contents.encode('unicode-escape').decode("ascii"))
+    )
     validate_file.close()
 except ValueError:
     sys.stdout.write('\nValidation tests were skipped because of wide character issues\n')
@@ -164,17 +193,24 @@ def assert_equal(result, test):
         sys.stdout.write('\nTesting Validation: %s\n' % section)
         for test in validate_tests.get('tests').get(section):
             validator = twitter_text.validation.Validation(test.get('text'))
-            if section == 'tweets':
-                assert_equal(not validator.tweet_invalid(), test)
-            elif section == 'usernames':
-                assert_equal(validator.valid_username(), test)
-            elif section == 'lists':
-                assert_equal(validator.valid_list(), test)
-            elif section == 'hashtags':
-                assert_equal(validator.valid_hashtag(), test)
-            elif section == 'urls':
-                assert_equal(validator.valid_url(), test)
-
-sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
+            try:
+                if section == 'tweets':
+                    assert_equal(not validator.tweet_invalid(), test)
+                elif section == 'usernames':
+                    assert_equal(validator.valid_username(), test)
+                elif section == 'lists':
+                    assert_equal(validator.valid_list(), test)
+                elif section == 'hashtags':
+                    assert_equal(validator.valid_hashtag(), test)
+                elif section == 'urls':
+                    assert_equal(validator.valid_url(), test)
+            except AssertionError as ae:
+                print(ae.args[0])
+                failed += 1
+            else:
+                passed += 1
+
+sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed%s.\033[0m\n' %
+                 (passed, (error(", %d failed" % failed) if failed else "")))
 sys.stdout.flush()
-sys.exit(os.EX_OK)
+sys.exit(os.EX_OK if not failed else os.EX_SOFTWARE)
diff --git a/twitter-text-conformance b/twitter-text-conformance
diff --git a/twitter_text/autolink.py b/twitter_text/autolink.py
@@ -1,6 +1,6 @@
 # encoding=utf-8
 
-import re, cgi
+import re
 
 from twitter_text.regex import REGEXEN
 from twitter_text.unicode import force_unicode
@@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
             return self.text
 
         # NOTE deprecate these attributes not options keys in options hash, then use html_attrs
-        options = dict(DEFAULT_OPTIONS.items() + options.items())
+        options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
         options['html_attrs'] = self._extract_html_attrs_from_options(options)
         if not options.get('suppress_no_follow', False):
             options['html_attrs']['rel'] = "nofollow"
@@ -302,16 +302,16 @@ def _link_url_with_entity(self, entity, options = {}):
         For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
         For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
         """
-        display_url = entity.get('display_url').decode('utf-8')
+        display_url = entity.get('display_url')
         expanded_url = entity.get('expanded_url')
         invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)
 
-        display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
+        display_url_sans_ellipses = re.sub(u'…', u'', display_url)
 
         if expanded_url.find(display_url_sans_ellipses) > -1:
             before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
-            preceding_ellipsis = re.search(ur'\A…', display_url)
-            following_ellipsis = re.search(ur'…\z', display_url)
+            preceding_ellipsis = re.search(u'\\A…', display_url)
+            following_ellipsis = re.search(u'…\\Z', display_url)
             if preceding_ellipsis is not None:
                 preceding_ellipsis = preceding_ellipsis.group()
             else:
@@ -431,4 +431,4 @@ def _tag_attrs(self, attributes = {}):
                 value = u' '.join(value)
             attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))
 
-        return u' '.join(attrs)
+        return u' '.join(attrs)
diff --git a/twitter_text/highlighter.py b/twitter_text/highlighter.py
@@ -1,9 +1,11 @@
 # encoding=utf-8
 
 import re
-from HTMLParser import HTMLParser
+try:
+    from html.parser import HTMLParser
+except ImportError:
+    from HTMLParser import HTMLParser
 
-from twitter_text.regex import UNICODE_SPACES
 from twitter_text.unicode import force_unicode
 
 DEFAULT_HIGHLIGHT_TAG = 'em'
@@ -34,7 +36,7 @@ def hit_highlight(self, hits = [], **kwargs):
 
         if not hits and kwargs.get('query'):
             stripped_text   =   strip_tags(self.text)
-            for match in re.finditer(ur'%s' % kwargs.get('query'), stripped_text):
+            for match in re.finditer(u'%s' % kwargs.get('query'), stripped_text):
                 hits.append(match.span())
 
         if hits and not type(hits) == list:
@@ -58,7 +60,7 @@ def hit_highlight(self, hits = [], **kwargs):
                 if index % 2:
                     # we're inside a <tag>
                     continue
-                chunk_start = len(u''.join(text_chunks[0:index / 2]))
+                chunk_start = len(u''.join(text_chunks[0:index // 2]))
                 chunk_end = chunk_start + len(chunk)
                 if hit_start >= chunk_start and hit_start < chunk_end:
                     chunk = chunk[:hit_start - chunk_start] + tags[0] + chunk[hit_start - chunk_start:]
@@ -80,4 +82,4 @@ def hit_highlight(self, hits = [], **kwargs):
             else:
                 result.append(chunk)
         self.text = u''.join(result)
-        return self.text
+        return self.text