Skip to content

Commit 29ce4cc

Browse files
authored
Merge pull request #379 from transifex/TX-16613_2
TX-16723: Update template on parser - STRUCTURED JSON
2 parents fa52924 + 60ceb33 commit 29ce4cc

File tree

2 files changed

+545
-11
lines changed

2 files changed

+545
-11
lines changed

openformats/formats/json.py

Lines changed: 207 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,21 +1125,202 @@ def _copy_until_and_remove_section(self, pos):
11251125
# Unlike the JSON format, do not remove the remaining section of the
11261126
# template
11271127

1128-
def remove_strings_from_template(
1129-
self, template: str, stringset: list[OpenString]
1130-
) -> str:
1128+
def remove_strings_from_template(self, template, stringset):
11311129
"""
1132-
Removes strings from the template that are not in the stringset.
1130+
Remove structured-json entries whose hashed 'string' content does not
1131+
match the ordered stringset, similar to JsonHandler behavior, but:
1132+
1133+
- We match by OpenString.template_replacement (hash), not by key.
1134+
- For dict roots: walk nested dicts and drop leaf objects with
1135+
mismatching "string".
1136+
- For list roots: treat each list item as a dict-root, dropping
1137+
items that end up with no kept leaves.
11331138
"""
1134-
return template
1139+
self.stringset = list(stringset)
1140+
self.stringset_index = 0
11351141

1136-
def add_strings_to_template(
1137-
self, template: str, stringset: list[OpenString]
1138-
) -> str:
1142+
transcriber = Transcriber(template)
1143+
source = transcriber.source
1144+
parsed = DumbJson(source)
1145+
1146+
def next_string():
1147+
try:
1148+
return self.stringset[self.stringset_index]
1149+
except IndexError:
1150+
return None
1151+
1152+
def walk_dict(node):
1153+
"""
1154+
Recursively traverse a DumbJson dict node.
1155+
1156+
Returns True if this node (or any of its descendants) contains at
1157+
least one *kept* leaf. Otherwise returns False so the caller can
1158+
prune the whole section.
1159+
"""
1160+
if node.type != dict:
1161+
return False
1162+
1163+
has_kept_leaf = False
1164+
1165+
for _, key_pos, value, _ in node:
1166+
if not (isinstance(value, DumbJson) and value.type == dict):
1167+
continue
1168+
1169+
# Decide if this object is a *leaf* (direct "string" field)
1170+
is_leaf = any(
1171+
child_key == self.STRING_KEY
1172+
for child_key, _, _, _ in value
1173+
)
1174+
1175+
transcriber.copy_until(key_pos - 1)
1176+
transcriber.mark_section_start()
1177+
1178+
if is_leaf:
1179+
((string_value, _),) = value.find_children(self.STRING_KEY)
1180+
1181+
current = next_string()
1182+
keep = False
1183+
1184+
if current is not None:
1185+
templ = current.template_replacement
1186+
1187+
if current.pluralized:
1188+
# hash embedded inside ICU plural string
1189+
if templ in string_value:
1190+
keep = True
1191+
else:
1192+
# plain hash
1193+
if string_value == templ:
1194+
keep = True
1195+
1196+
transcriber.copy_until(value.end + 1)
1197+
transcriber.mark_section_end()
1198+
1199+
if keep:
1200+
has_kept_leaf = True
1201+
self.stringset_index += 1
1202+
else:
1203+
transcriber.remove_section()
1204+
else:
1205+
# Nested dict – recurse
1206+
child_has_kept = walk_dict(value)
1207+
1208+
transcriber.copy_until(value.end + 1)
1209+
transcriber.mark_section_end()
1210+
1211+
if child_has_kept:
1212+
has_kept_leaf = True
1213+
else:
1214+
transcriber.remove_section()
1215+
1216+
return has_kept_leaf
1217+
1218+
if parsed.type == dict:
1219+
walk_dict(parsed)
1220+
elif parsed.type == list:
1221+
# List-root: each item is a dict-root
1222+
for value, _ in parsed:
1223+
if not isinstance(value, DumbJson) or value.type != dict:
1224+
continue
1225+
1226+
transcriber.copy_until(value.start)
1227+
transcriber.mark_section_start()
1228+
1229+
has_kept = walk_dict(value)
1230+
1231+
transcriber.copy_until(value.end + 1)
1232+
transcriber.mark_section_end()
1233+
1234+
if not has_kept:
1235+
transcriber.remove_section()
1236+
1237+
transcriber.copy_until(len(source))
1238+
compiled = transcriber.get_destination()
1239+
return self._clean_empties(compiled)
1240+
1241+
1242+
def _build_structured_payload(self, os) -> dict:
11391243
"""
1140-
Adds strings to the template that are not in the template currently.
1244+
Build the inner payload dict for a structured-json entry:
1245+
1246+
{
1247+
"string": "<hash>",
1248+
"context": "...",
1249+
"developer_comment": "...",
1250+
"character_limit": 100
1251+
}
11411252
"""
1142-
return template
1253+
payload = {
1254+
self.STRING_KEY: os.template_replacement,
1255+
}
1256+
1257+
# Optional metadata – only add if present
1258+
if getattr(os, "context", None):
1259+
payload[self.CONTEXT_KEY] = self.escape(os.context)
1260+
if getattr(os, "developer_comment", None):
1261+
payload[self.DEVELOPER_COMMENT_KEY] = self.escape(
1262+
os.developer_comment
1263+
)
1264+
if getattr(os, "character_limit", None) is not None:
1265+
payload[self.CHARACTER_LIMIT_KEY] = os.character_limit
1266+
1267+
return payload
1268+
1269+
def _build_structured_json_entry(self, os) -> Tuple[str, str]:
1270+
"""
1271+
Build the JSON snippet for a structured-json entry for dict roots:
1272+
1273+
"key": {
1274+
"string": "<hash>",
1275+
"context": "...",
1276+
"developer_comment": "...",
1277+
"character_limit": 100
1278+
}
1279+
"""
1280+
key_literal = json.dumps(os.key, ensure_ascii=False)
1281+
payload = self._build_structured_payload(os)
1282+
1283+
value_literal = json.dumps(payload, ensure_ascii=False, indent=2)
1284+
1285+
# Optional cosmetic tab indent after the first line
1286+
lines = value_literal.splitlines()
1287+
if len(lines) > 1:
1288+
lines = [lines[0]] + ["\t" + line for line in lines[1:]]
1289+
value_literal = "\n".join(lines)
1290+
1291+
return key_literal, value_literal
1292+
1293+
def _make_added_entry_for_dict(self, os) -> str:
1294+
key_literal, value_literal = self._build_structured_json_entry(os)
1295+
return f"{key_literal}: {value_literal}"
1296+
1297+
def _make_added_entry_for_list(self, os) -> str:
1298+
"""
1299+
For list-root STRUCTURED_JSON, each added item is a *separate object*
1300+
in the root list, with a single top-level key.
1301+
1302+
The key is taken *as-is* from os.key (no '..0..' stripping, no
1303+
special dot handling):
1304+
1305+
os.key = "batmobil"
1306+
1307+
Resulting list item:
1308+
1309+
{
1310+
"batmobil": {
1311+
"string": "<hash>",
1312+
"context": "...",
1313+
"developer_comment": "...",
1314+
"character_limit": 100
1315+
}
1316+
}
1317+
"""
1318+
container = {
1319+
os.key: self._build_structured_payload(os)
1320+
}
1321+
1322+
return json.dumps(container, ensure_ascii=False, indent=2)
1323+
11431324

11441325
class ChromeI18nHandler(JsonHandler):
11451326
"""Responsible for CHROME files, based on the JsonHandler."""
@@ -1457,6 +1638,22 @@ def compile(self, template, stringset, **kwargs):
14571638

14581639
return compiled
14591640

1641+
def remove_strings_from_template(
1642+
self, template: str, stringset: list[OpenString]
1643+
) -> str:
1644+
"""
1645+
Removes strings from the template that are not in the stringset.
1646+
"""
1647+
return template
1648+
1649+
def add_strings_to_template(
1650+
self, template: str, stringset: list[OpenString]
1651+
) -> str:
1652+
"""
1653+
Adds strings to the template that are not in the template currently.
1654+
"""
1655+
return template
1656+
14601657
@staticmethod
14611658
def escape(string):
14621659
return escape(string)

0 commit comments

Comments
 (0)