Skip to content
This repository has been archived by the owner on Apr 4, 2018. It is now read-only.

Python 3.x Support #39

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
argparse==1.2.1
PyYAML==3.10
beautifulsoup4==4.2.0
six==1.10.0
5 changes: 3 additions & 2 deletions tests.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# encoding=utf-8

import six
import twitter_text, sys, os, json, argparse, re
from twitter_text.unicode import force_unicode

narrow_build = True
try:
unichr(0x20000)
six.unichr(0x20000)
narrow_build = False
except:
pass
Expand Down Expand Up @@ -177,4 +178,4 @@ def assert_equal(result, test):

sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
sys.stdout.flush()
sys.exit(os.EX_OK)
sys.exit(os.EX_OK)
48 changes: 24 additions & 24 deletions twitter_text/autolink.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# encoding=utf-8

from __future__ import unicode_literals
import re, cgi

from twitter_text.regex import REGEXEN
Expand Down Expand Up @@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
return self.text

# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
options = dict(DEFAULT_OPTIONS.items() + options.items())
options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
options['html_attrs'] = self._extract_html_attrs_from_options(options)
if not options.get('suppress_no_follow', False):
options['html_attrs']['rel'] = "nofollow"
Expand Down Expand Up @@ -302,16 +302,16 @@ def _link_url_with_entity(self, entity, options = {}):
For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
"""
display_url = entity.get('display_url').decode('utf-8')
display_url = entity.get('display_url')
expanded_url = entity.get('expanded_url')
invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)

display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
display_url_sans_ellipses = re.sub(r'…', '', display_url)

if expanded_url.find(display_url_sans_ellipses) > -1:
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
preceding_ellipsis = re.search(ur'\A…', display_url)
following_ellipsis = re.search(ur'…\z', display_url)
preceding_ellipsis = re.search(r'\A…', display_url)
following_ellipsis = re.search(r'…\Z', display_url)
if preceding_ellipsis is not None:
preceding_ellipsis = preceding_ellipsis.group()
else:
Expand Down Expand Up @@ -344,7 +344,7 @@ def _link_url_with_entity(self, entity, options = {}):
# …
# </span>

return u"<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
return "<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
else:
return self._html_escape(display_url)

Expand All @@ -356,13 +356,13 @@ def _link_to_hashtag(self, entity, chars, options = {}):
if REGEXEN['rtl_chars'].search(hashtag):
hashtag_class += ' rtl'

href = options.get('hashtag_url_transform', lambda ht: u'%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
href = options.get('hashtag_url_transform', lambda ht: '%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)

html_attrs = {}
html_attrs.update(options.get('html_attrs', {}))
html_attrs = {
'class': hashtag_class,
'title': u'#%s' % hashtag,
'title': '#%s' % hashtag,
}

link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href, html_attrs, options)
Expand All @@ -372,19 +372,19 @@ def _link_to_cashtag(self, entity, chars, options = {}):
dollar = chars[entity['indices'][0]]
cashtag = entity['cashtag']

href = options.get('cashtag_url_transform', lambda ct: u'%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
href = options.get('cashtag_url_transform', lambda ct: '%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)

html_attrs = {
'class': options.get('cashtag_class'),
'title': u'$%s' % cashtag
'title': '$%s' % cashtag
}
html_attrs.update(options.get('html_attrs', {}))

link = self._link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

def _link_to_screen_name(self, entity, chars, options = {}):
name = u'%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
name = '%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
chunk = options.get('link_text_transform', default_transform)(entity, name)
name = name.lower()

Expand All @@ -395,30 +395,30 @@ def _link_to_screen_name(self, entity, chars, options = {}):
del(html_attrs['title'])

if entity.get('list_slug') and not options.get('supress_lists'):
href = options.get('list_url_transform', lambda sn: u'%s%s' % (options.get('list_url_base'), sn))(name)
href = options.get('list_url_transform', lambda sn: '%s%s' % (options.get('list_url_base'), sn))(name)
html_attrs['class'] = options.get('list_class')
else:
href = options.get('username_url_transform', lambda sn: u'%s%s' % (options.get('username_url_base'), sn))(name)
href = options.get('username_url_transform', lambda sn: '%s%s' % (options.get('username_url_base'), sn))(name)
html_attrs['class'] = options.get('username_class')

link = self._link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

def _link_to_text_with_symbol(self, entity, symbol, text, href, attributes = {}, options = {}):
tagged_symbol = u'<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
tagged_symbol = '<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
text = self._html_escape(text)
tagged_text = u'<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
tagged_text = '<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
if options.get('username_include_symbol') or not REGEXEN['at_signs'].match(symbol):
return u'%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
return '%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
else:
return u'%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
return '%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))

def _link_to_text(self, entity, text, href, attributes = {}, options = {}):
attributes['href'] = href
if options.get('link_attributes_transform'):
attributes = options.get('link_attributes_transform')(entity, attributes)
if options.get('link_attribute_transform'):
attributes = options.get('link_attribute_transform')(entity, attributes)
text = options.get('link_text_transform', default_transform)(entity, text)
return u'<a %s>%s</a>' % (self._tag_attrs(attributes), text)
return '<a %s>%s</a>' % (self._tag_attrs(attributes), text)

def _tag_attrs(self, attributes = {}):
attrs = []
Expand All @@ -428,7 +428,7 @@ def _tag_attrs(self, attributes = {}):
attrs.append(key)
continue
if type(value) == list:
value = u' '.join(value)
attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))
value = ' '.join(value)
attrs.append('%s="%s"' % (self._html_escape(key), self._html_escape(value)))

return u' '.join(attrs)
return ' '.join(attrs)
18 changes: 9 additions & 9 deletions twitter_text/highlighter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# encoding=utf-8

from __future__ import unicode_literals
import re
from HTMLParser import HTMLParser
from six.moves import html_parser

from twitter_text.regex import UNICODE_SPACES
from twitter_text.unicode import force_unicode

DEFAULT_HIGHLIGHT_TAG = 'em'

# from http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
class MLStripper(HTMLParser):
class MLStripper(html_parser.HTMLParser):
def __init__(self):
self.reset()
self.fed = []
Expand All @@ -34,14 +34,14 @@ def hit_highlight(self, hits = [], **kwargs):

if not hits and kwargs.get('query'):
stripped_text = strip_tags(self.text)
for match in re.finditer(ur'%s' % kwargs.get('query'), stripped_text):
for match in re.finditer(r'%s' % kwargs.get('query'), stripped_text):
hits.append(match.span())

if hits and not type(hits) == list:
raise Exception('The syntax for the hit_highlight method has changed. You must pass in a list of lists containing the indices of the strings you want to match.')

tag_name = kwargs.get('tag', DEFAULT_HIGHLIGHT_TAG)
tags = [u'<%s>' % tag_name, u'</%s>' % tag_name]
tags = ['<%s>' % tag_name, '</%s>' % tag_name]

text = self.text
chunks = re.split(r'[<>]', text)
Expand All @@ -58,7 +58,7 @@ def hit_highlight(self, hits = [], **kwargs):
if index % 2:
# we're inside a <tag>
continue
chunk_start = len(u''.join(text_chunks[0:index / 2]))
chunk_start = len(''.join(text_chunks[0:index / 2]))
chunk_end = chunk_start + len(chunk)
if hit_start >= chunk_start and hit_start < chunk_end:
chunk = chunk[:hit_start - chunk_start] + tags[0] + chunk[hit_start - chunk_start:]
Expand All @@ -76,8 +76,8 @@ def hit_highlight(self, hits = [], **kwargs):
for index, chunk in enumerate(chunks):
if index % 2:
# we're inside a <tag>
result.append(u'<%s>' % chunk)
result.append('<%s>' % chunk)
else:
result.append(chunk)
self.text = u''.join(result)
return self.text
self.text = ''.join(result)
return self.text
Loading