Skip to content

Commit 3fffe29

Browse files
authored
Bug 1321290 - Migrate HTML entities to Unicode characters (#28)
1 parent 27640ce commit 3fffe29

File tree

3 files changed

+97
-43
lines changed

3 files changed

+97
-43
lines changed

fluent/migrate/transforms.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,19 @@ def __call__(self, ctx):
9393
class Source(Transform):
9494
"""Declare the source translation to be migrated with other transforms.
9595
96-
When evaluated `Source` returns a simple string value. All \\uXXXX from
97-
the original translations are converted beforehand to the literal
98-
characters they encode.
96+
When evaluated, `Source` returns a simple string value. Escaped characters
97+
are unescaped by the compare-locales parser according to the file format:
9998
100-
HTML entities are left unchanged for now because we can't know if they
101-
should be converted to the characters they represent or not. Consider the
102-
following example in which `&` could be replaced with the literal `&`:
99+
- in properties files: \\uXXXX,
100+
- in DTD files: known named, decimal, and hexadecimal HTML entities.
103101
104-
Privacy & History
102+
Consult the following files for the list of known named HTML entities:
105103
106-
vs. these two examples where the HTML encoding should be preserved:
107-
108-
Erreur !
109-
Use /help <command> for more information.
104+
https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py
105+
https://github.com/python/cpython/blob/3.6/Lib/html/entities.py
110106
111107
"""
112108

113-
# XXX Perhaps there's a strict subset of HTML entities which must or must
114-
# not be replaced?
115-
116109
def __init__(self, path, key):
117110
if path.endswith('.ftl'):
118111
raise NotSupportedError(

tests/migrate/test_copy.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,8 @@ class TestCopy(MockContext):
2525
def setUp(self):
2626
self.strings = parse(PropertiesParser, '''
2727
foo = Foo
28-
foo.unicode.middle = Foo\\u0020Bar
2928
foo.unicode.begin = \\u0020Foo
3029
foo.unicode.end = Foo\\u0020
31-
32-
foo.html.entity = <⇧⌘K>
3330
''')
3431

3532
def test_copy(self):
@@ -45,19 +42,6 @@ def test_copy(self):
4542
''')
4643
)
4744

48-
def test_copy_escape_unicode_middle(self):
49-
msg = FTL.Message(
50-
FTL.Identifier('foo-unicode-middle'),
51-
value=COPY('test.properties', 'foo.unicode.middle')
52-
)
53-
54-
self.assertEqual(
55-
evaluate(self, msg).to_json(),
56-
ftl_message_to_json('''
57-
foo-unicode-middle = Foo Bar
58-
''')
59-
)
60-
6145
@unittest.skip('Parser/Serializer trim whitespace')
6246
def test_copy_escape_unicode_begin(self):
6347
msg = FTL.Message(
@@ -86,19 +70,6 @@ def test_copy_escape_unicode_end(self):
8670
''')
8771
)
8872

89-
def test_copy_html_entity(self):
90-
msg = FTL.Message(
91-
FTL.Identifier('foo-html-entity'),
92-
value=COPY('test.properties', 'foo.html.entity')
93-
)
94-
95-
self.assertEqual(
96-
evaluate(self, msg).to_json(),
97-
ftl_message_to_json('''
98-
foo-html-entity = <⇧⌘K>
99-
''')
100-
)
101-
10273

10374
@unittest.skipUnless(DTDParser, 'compare-locales required')
10475
class TestCopyAttributes(MockContext):

tests/migrate/test_source.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33

44
import unittest
55

6+
try:
7+
from compare_locales.parser import PropertiesParser, DTDParser
8+
except ImportError:
9+
PropertiesParser = DTDParser = None
10+
611
import fluent.syntax.ast as FTL
712

813
from fluent.migrate.errors import NotSupportedError
914
from fluent.migrate.transforms import Source, COPY, PLURALS, REPLACE
15+
from fluent.migrate.util import parse
1016
from fluent.migrate.helpers import EXTERNAL_ARGUMENT
1117

1218

@@ -49,3 +55,87 @@ def test_replace(self):
4955
}
5056
)
5157
)
58+
59+
60+
class MockContext(unittest.TestCase):
61+
def get_source(self, _path, key):
62+
# Ignore _path (test.properties) and get translations from self.strings.
63+
return self.strings[key].val
64+
65+
66+
@unittest.skipUnless(PropertiesParser, 'compare-locales required')
67+
class TestProperties(MockContext):
68+
def setUp(self):
69+
self.strings = parse(PropertiesParser, '''
70+
foo = Foo
71+
72+
unicode-start = \\u0020Foo
73+
unicode-middle = Foo\\u0020Bar
74+
unicode-end = Foo\\u0020
75+
76+
html-entity = <⇧⌘K>
77+
''')
78+
79+
def test_simple_text(self):
80+
source = Source('test.properties', 'foo')
81+
self.assertEqual(source(self), 'Foo')
82+
83+
def test_escape_unicode_start(self):
84+
source = Source('test.properties', 'unicode-start')
85+
self.assertEqual(source(self), ' Foo')
86+
87+
def test_escape_unicode_middle(self):
88+
source = Source('test.properties', 'unicode-middle')
89+
self.assertEqual(source(self), 'Foo Bar')
90+
91+
def test_escape_unicode_end(self):
92+
source = Source('test.properties', 'unicode-end')
93+
self.assertEqual(source(self), 'Foo ')
94+
95+
def test_html_entity(self):
96+
source = Source('test.properties', 'html-entity')
97+
self.assertEqual(source(self), '<⇧⌘K>')
98+
99+
100+
@unittest.skipUnless(DTDParser, 'compare-locales required')
101+
class TestDTD(MockContext):
102+
def setUp(self):
103+
self.strings = parse(DTDParser, '''
104+
<!ENTITY foo "Foo">
105+
106+
<!ENTITY unicodeEscape "Foo\\u0020Bar">
107+
108+
<!ENTITY named "&amp;">
109+
<!ENTITY decimal "&#38;">
110+
<!ENTITY shorthexcode "&#x26;">
111+
<!ENTITY longhexcode "&#x0026;">
112+
<!ENTITY unknown "&unknownEntity;">
113+
''')
114+
115+
def test_simple_text(self):
116+
source = Source('test.dtd', 'foo')
117+
self.assertEqual(source(self), 'Foo')
118+
119+
def test_backslash_unicode_escape(self):
120+
source = Source('test.dtd', 'unicodeEscape')
121+
self.assertEqual(source(self), 'Foo\\u0020Bar')
122+
123+
def test_named_entity(self):
124+
source = Source('test.dtd', 'named')
125+
self.assertEqual(source(self), '&')
126+
127+
def test_decimal_entity(self):
128+
source = Source('test.dtd', 'decimal')
129+
self.assertEqual(source(self), '&')
130+
131+
def test_shorthex_entity(self):
132+
source = Source('test.dtd', 'shorthexcode')
133+
self.assertEqual(source(self), '&')
134+
135+
def test_longhex_entity(self):
136+
source = Source('test.dtd', 'longhexcode')
137+
self.assertEqual(source(self), '&')
138+
139+
def test_unknown_entity(self):
140+
source = Source('test.dtd', 'unknown')
141+
self.assertEqual(source(self), '&unknownEntity;')

0 commit comments

Comments
 (0)