Skip to content
This repository has been archived by the owner on Apr 4, 2018. It is now read-only.

python 3 support #41

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Copyright (c) 2010, Daniel Ryan
Copyright (c) 2017, Glyph
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
Empty file removed __init__.py
Empty file.
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[wheel]
universal = 1
17 changes: 10 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
from setuptools import setup, find_packages

setup(
name='twitter-text-py',
version='2.0.2',
description='A library for auto-converting URLs, mentions, hashtags, lists, etc. in Twitter text. Also does tweet validation and search term highlighting.',
author='Daniel Ryan',
author_email='[email protected]',
url='http://github.com/dryan/twitter-text-py',
name='twitter-text',
version='3.0',
description='A library for auto-converting URLs, mentions, hashtags, lists, etc. in Twitter text. Also does tweet validation and search term highlighting. Fork of twitter-text-py, that supports python 3. Originally by David Ryan, Py3 port by Glyph.',
author='Glyph',
author_email='[email protected]',
url='http://github.com/glyph/twitter-text-py',
packages=find_packages(),
classifiers=[
'Development Status :: 5 - Production/Stable',
'Environment :: Web Environment',
'Environment :: Console',
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 3',
'Framework :: Django',
],
include_package_data=True,
Expand Down
142 changes: 89 additions & 53 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import twitter_text, sys, os, json, argparse, re
from twitter_text.unicode import force_unicode
try:
unichr
except NameError:
unichr = chr

narrow_build = True
try:
Expand Down Expand Up @@ -34,25 +38,35 @@ def construct_yaml_str(self, node):
raise Exception('You need to install BeautifulSoup to run the tests')

def success(text):
return (u'\033[92m%s\033[0m\n' % text).encode('utf-8')
return (u'\033[92m%s\033[0m\n' % text)

def error(text):
return (u'\033[91m%s\033[0m\n' % text).encode('utf-8')
return (u'\033[91m%s\033[0m\n' % text)

attempted = 0
passed = 0
failed = 0

def assert_equal_without_attribute_order(result, test, failure_message = None):
global attempted
attempted += 1
# Beautiful Soup sorts the attributes for us so we can skip all the hoops the ruby version jumps through
assert BeautifulSoup(result) == BeautifulSoup(test.get('expected')), error(u'Test %d Failed: %s' % (attempted, test.get('description')))
actual = BeautifulSoup(result)
expected = BeautifulSoup(test.get('expected'))
assert actual == expected, error(u'Test %d Failed: %s (%s != %s)' % (attempted, test.get('description'),
actual, expected))
sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
sys.stdout.flush()

def assert_equal(result, test):
global attempted
attempted += 1
assert result == test.get('expected'), error(u'\nTest %d Failed: %s%s' % (attempted, test.get('description'), u'\n%s' % test.get('hits') if test.get('hits') else ''))
expected = test.get('expected')
assert result == expected, error(u'\nTest %d Failed: %s%s (%s != %s)' % (
attempted, test.get('description'),
u'\n%s' % test.get('hits') if test.get('hits') else '',
result, expected
))
sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
sys.stdout.flush()

Expand All @@ -72,27 +86,33 @@ def assert_equal(result, test):
sys.stdout.write('Skipping: %s\n' % test.get('description'))
sys.stdout.flush()
continue
extractor = twitter_text.extractor.Extractor(test.get('text'))
if section == 'mentions':
assert_equal(extractor.extract_mentioned_screen_names(), test)
elif section == 'mentions_with_indices':
assert_equal(extractor.extract_mentioned_screen_names_with_indices(), test)
elif section == 'mentions_or_lists_with_indices':
assert_equal(extractor.extract_mentions_or_lists_with_indices(), test)
elif section == 'replies':
assert_equal(extractor.extract_reply_screen_name(), test)
elif section == 'urls':
assert_equal(extractor.extract_urls(), test)
elif section == 'urls_with_indices':
assert_equal(extractor.extract_urls_with_indices(), test)
elif section == 'hashtags':
assert_equal(extractor.extract_hashtags(), test)
elif section == 'cashtags':
assert_equal(extractor.extract_cashtags(), test)
elif section == 'hashtags_with_indices':
assert_equal(extractor.extract_hashtags_with_indices(), test)
elif section == 'cashtags_with_indices':
assert_equal(extractor.extract_cashtags_with_indices(), test)
try:
extractor = twitter_text.extractor.Extractor(test.get('text'))
if section == 'mentions':
assert_equal(extractor.extract_mentioned_screen_names(), test)
elif section == 'mentions_with_indices':
assert_equal(extractor.extract_mentioned_screen_names_with_indices(), test)
elif section == 'mentions_or_lists_with_indices':
assert_equal(extractor.extract_mentions_or_lists_with_indices(), test)
elif section == 'replies':
assert_equal(extractor.extract_reply_screen_name(), test)
elif section == 'urls':
assert_equal(extractor.extract_urls(), test)
elif section == 'urls_with_indices':
assert_equal(extractor.extract_urls_with_indices(), test)
elif section == 'hashtags':
assert_equal(extractor.extract_hashtags(), test)
elif section == 'cashtags':
assert_equal(extractor.extract_cashtags(), test)
elif section == 'hashtags_with_indices':
assert_equal(extractor.extract_hashtags_with_indices(), test)
elif section == 'cashtags_with_indices':
assert_equal(extractor.extract_cashtags_with_indices(), test)
except AssertionError as ae:
print(ae.args[0])
failed += 1
else:
passed += 1

# autolink section
autolink_file = open(os.path.join('twitter-text-conformance', 'autolink.yml'), 'r')
Expand All @@ -112,20 +132,26 @@ def assert_equal(result, test):
sys.stdout.flush()
continue
autolink = twitter_text.autolink.Autolink(test.get('text'))
if section == 'usernames':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'cashtags':
assert_equal_without_attribute_order(autolink.auto_link_cashtags(autolink_options), test)
elif section == 'urls':
assert_equal_without_attribute_order(autolink.auto_link_urls(autolink_options), test)
elif section == 'hashtags':
assert_equal_without_attribute_order(autolink.auto_link_hashtags(autolink_options), test)
elif section == 'all':
assert_equal_without_attribute_order(autolink.auto_link(autolink_options), test)
elif section == 'lists':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'json':
assert_equal_without_attribute_order(autolink.auto_link_with_json(json.loads(test.get('json')), autolink_options), test)
try:
if section == 'usernames':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'cashtags':
assert_equal_without_attribute_order(autolink.auto_link_cashtags(autolink_options), test)
elif section == 'urls':
assert_equal_without_attribute_order(autolink.auto_link_urls(autolink_options), test)
elif section == 'hashtags':
assert_equal_without_attribute_order(autolink.auto_link_hashtags(autolink_options), test)
elif section == 'all':
assert_equal_without_attribute_order(autolink.auto_link(autolink_options), test)
elif section == 'lists':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'json':
assert_equal_without_attribute_order(autolink.auto_link_with_json(json.loads(test.get('json')), autolink_options), test)
except AssertionError as ae:
print(ae.args[0])
failed += 1
else:
passed += 1

# hit_highlighting section
hit_highlighting_file = open(os.path.join('twitter-text-conformance', 'hit_highlighting.yml'), 'r')
Expand All @@ -150,7 +176,10 @@ def assert_equal(result, test):
try:
validate_file = open(os.path.join('twitter-text-conformance', 'validate.yml'), 'r')
validate_file_contents = validate_file.read()
validate_tests = yaml.load(re.sub(ur'\\n', '\n', validate_file_contents.encode('unicode-escape')))
validate_tests = yaml.load(
re.sub(u'\\\\n', '\n',
validate_file_contents.encode('unicode-escape').decode("ascii"))
)
validate_file.close()
except ValueError:
sys.stdout.write('\nValidation tests were skipped because of wide character issues\n')
Expand All @@ -164,17 +193,24 @@ def assert_equal(result, test):
sys.stdout.write('\nTesting Validation: %s\n' % section)
for test in validate_tests.get('tests').get(section):
validator = twitter_text.validation.Validation(test.get('text'))
if section == 'tweets':
assert_equal(not validator.tweet_invalid(), test)
elif section == 'usernames':
assert_equal(validator.valid_username(), test)
elif section == 'lists':
assert_equal(validator.valid_list(), test)
elif section == 'hashtags':
assert_equal(validator.valid_hashtag(), test)
elif section == 'urls':
assert_equal(validator.valid_url(), test)

sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
try:
if section == 'tweets':
assert_equal(not validator.tweet_invalid(), test)
elif section == 'usernames':
assert_equal(validator.valid_username(), test)
elif section == 'lists':
assert_equal(validator.valid_list(), test)
elif section == 'hashtags':
assert_equal(validator.valid_hashtag(), test)
elif section == 'urls':
assert_equal(validator.valid_url(), test)
except AssertionError as ae:
print(ae.args[0])
failed += 1
else:
passed += 1

sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed%s.\033[0m\n' %
(passed, (error(", %d failed" % failed) if failed else "")))
sys.stdout.flush()
sys.exit(os.EX_OK)
sys.exit(os.EX_OK if not failed else os.EX_SOFTWARE)
2 changes: 1 addition & 1 deletion twitter-text-conformance
14 changes: 7 additions & 7 deletions twitter_text/autolink.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# encoding=utf-8

import re, cgi
import re

from twitter_text.regex import REGEXEN
from twitter_text.unicode import force_unicode
Expand Down Expand Up @@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
return self.text

# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
options = dict(DEFAULT_OPTIONS.items() + options.items())
options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
options['html_attrs'] = self._extract_html_attrs_from_options(options)
if not options.get('suppress_no_follow', False):
options['html_attrs']['rel'] = "nofollow"
Expand Down Expand Up @@ -302,16 +302,16 @@ def _link_url_with_entity(self, entity, options = {}):
For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
"""
display_url = entity.get('display_url').decode('utf-8')
display_url = entity.get('display_url')
expanded_url = entity.get('expanded_url')
invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)

display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
display_url_sans_ellipses = re.sub(u'…', u'', display_url)

if expanded_url.find(display_url_sans_ellipses) > -1:
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
preceding_ellipsis = re.search(ur'\A…', display_url)
following_ellipsis = re.search(ur'…\z', display_url)
preceding_ellipsis = re.search(u'\\A…', display_url)
following_ellipsis = re.search(u'…\\Z', display_url)
if preceding_ellipsis is not None:
preceding_ellipsis = preceding_ellipsis.group()
else:
Expand Down Expand Up @@ -431,4 +431,4 @@ def _tag_attrs(self, attributes = {}):
value = u' '.join(value)
attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))

return u' '.join(attrs)
return u' '.join(attrs)
12 changes: 7 additions & 5 deletions twitter_text/highlighter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# encoding=utf-8

import re
from HTMLParser import HTMLParser
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser

from twitter_text.regex import UNICODE_SPACES
from twitter_text.unicode import force_unicode

DEFAULT_HIGHLIGHT_TAG = 'em'
Expand Down Expand Up @@ -34,7 +36,7 @@ def hit_highlight(self, hits = [], **kwargs):

if not hits and kwargs.get('query'):
stripped_text = strip_tags(self.text)
for match in re.finditer(ur'%s' % kwargs.get('query'), stripped_text):
for match in re.finditer(u'%s' % kwargs.get('query'), stripped_text):
hits.append(match.span())

if hits and not type(hits) == list:
Expand All @@ -58,7 +60,7 @@ def hit_highlight(self, hits = [], **kwargs):
if index % 2:
# we're inside a <tag>
continue
chunk_start = len(u''.join(text_chunks[0:index / 2]))
chunk_start = len(u''.join(text_chunks[0:index // 2]))
chunk_end = chunk_start + len(chunk)
if hit_start >= chunk_start and hit_start < chunk_end:
chunk = chunk[:hit_start - chunk_start] + tags[0] + chunk[hit_start - chunk_start:]
Expand All @@ -80,4 +82,4 @@ def hit_highlight(self, hits = [], **kwargs):
else:
result.append(chunk)
self.text = u''.join(result)
return self.text
return self.text
Loading