@@ -291,11 +291,84 @@ def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -
291291 return lines [0 ] if lines else rule .line_number
292292
293293
294+ def structure_token_occurrence_index (tokens : List [str ], position : int ) -> Optional [int ]:
295+ """
296+ Return which occurrence of a token appears at a given absolute token position.
297+
298+ Example: for ["test:", "if:", "test:"], position 2 returns 1.
299+ """
300+ if position < 0 or position >= len (tokens ):
301+ return None
302+ token = tokens [position ]
303+ return sum (1 for current in tokens [:position ] if current == token )
304+
305+
306+ def resolve_structure_issue_lines (diff : RuleDifference ) -> Optional [Tuple [int , int ]]:
307+ """
308+ Resolve stable line anchors for a structural rule difference.
309+
310+ Strategy:
311+ - Use position-aware token occurrence matching when possible.
312+ - For insert/delete cases (one side missing token), anchor to the previous
313+ shared structural token; if unavailable, anchor to `replace:`.
314+ """
315+ en_tokens = extract_structure_elements (diff .english_rule .data )
316+ tr_tokens = extract_structure_elements (diff .translated_rule .data )
317+ en_token , tr_token , mismatch_pos = first_structure_mismatch (en_tokens , tr_tokens )
318+
319+ if mismatch_pos < 0 :
320+ return None
321+
322+ # Insertion/deletion: anchor to the previous shared token if possible.
323+ if en_token is None or tr_token is None :
324+ anchor_pos = mismatch_pos - 1
325+ if (
326+ anchor_pos >= 0
327+ and anchor_pos < len (en_tokens )
328+ and anchor_pos < len (tr_tokens )
329+ and en_tokens [anchor_pos ] == tr_tokens [anchor_pos ]
330+ ):
331+ anchor_token = en_tokens [anchor_pos ]
332+ en_occ = structure_token_occurrence_index (en_tokens , anchor_pos )
333+ tr_occ = structure_token_occurrence_index (tr_tokens , anchor_pos )
334+ if en_occ is not None and tr_occ is not None :
335+ line_en = resolve_issue_line_at_position (diff .english_rule , "structure" , anchor_token , en_occ )
336+ line_tr = resolve_issue_line_at_position (diff .translated_rule , "structure" , anchor_token , tr_occ )
337+ if line_en is not None and line_tr is not None :
338+ return line_en , line_tr
339+
340+ # Fallback: anchor both sides to replace, which is the rule body entrypoint.
341+ line_en = resolve_issue_line (diff .english_rule , "structure" , "replace:" ) or diff .english_rule .line_number
342+ line_tr = resolve_issue_line (diff .translated_rule , "structure" , "replace:" ) or diff .translated_rule .line_number
343+ return line_en , line_tr
344+
345+ # Exact token available on both sides: resolve by occurrence index at mismatch.
346+ en_occ = structure_token_occurrence_index (en_tokens , mismatch_pos )
347+ tr_occ = structure_token_occurrence_index (tr_tokens , mismatch_pos )
348+ if en_occ is not None and tr_occ is not None :
349+ line_en = resolve_issue_line_at_position (diff .english_rule , "structure" , en_token , en_occ )
350+ line_tr = resolve_issue_line_at_position (diff .translated_rule , "structure" , tr_token , tr_occ )
351+ if line_en is not None and line_tr is not None :
352+ return line_en , line_tr
353+
354+ line_en = resolve_issue_line (diff .english_rule , "structure" , en_token )
355+ line_tr = resolve_issue_line (diff .translated_rule , "structure" , tr_token )
356+ if line_en is None or line_tr is None :
357+ return None
358+ return line_en , line_tr
359+
360+
294361def collect_issues (
295362 result : ComparisonResult ,
296363 file_name : str ,
297364 language : str ,
298365) -> List [dict ]:
366+ """
367+ Flatten a ComparisonResult into one normalized dictionary per issue.
368+
369+ This is the canonical bridge from parser/diff objects to serializable
370+ records consumed by JSONL output, snapshot tests, and line-level assertions.
371+ """
299372 issues = []
300373
301374 for rule in result .missing_rules :
@@ -345,23 +418,10 @@ def collect_issues(
345418 rule = diff .english_rule
346419 issue = issue_base (rule , file_name , language )
347420 if diff .diff_type == "structure" :
348- en_tokens = extract_structure_elements (diff .english_rule .data )
349- tr_tokens = extract_structure_elements (diff .translated_rule .data )
350- en_token , tr_token , mismatch_pos = first_structure_mismatch (en_tokens , tr_tokens )
351-
352- # Skip reporting when tokens are misaligned (both exist but differ)
353- # This avoids misleading line numbers when entire blocks are missing/added
354- # We only report when one is None (clear case of missing element)
355- if en_token is not None and tr_token is not None and en_token != tr_token :
356- continue
357-
358- issue_line_en = resolve_issue_line (diff .english_rule , "structure" , en_token )
359- issue_line_tr = resolve_issue_line (diff .translated_rule , "structure" , tr_token )
360-
361- # Skip reporting structure differences where we can't find both tokens
362- # This avoids misleading line numbers when blocks are missing
363- if issue_line_en is None or issue_line_tr is None :
421+ structure_lines = resolve_structure_issue_lines (diff )
422+ if structure_lines is None :
364423 continue
424+ issue_line_en , issue_line_tr = structure_lines
365425 else :
366426 issue_line_en = resolve_issue_line (diff .english_rule , diff .diff_type )
367427 issue_line_tr = resolve_issue_line (diff .translated_rule , diff .diff_type )
@@ -444,20 +504,10 @@ def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None:
444504
445505 for diff in result .rule_differences :
446506 if diff .diff_type == "structure" :
447- en_tokens = extract_structure_elements (diff .english_rule .data )
448- tr_tokens = extract_structure_elements (diff .translated_rule .data )
449- en_token , tr_token , mismatch_pos = first_structure_mismatch (en_tokens , tr_tokens )
450-
451- # Skip reporting when tokens are misaligned (both exist but differ)
452- # This avoids misleading line numbers when entire blocks are missing/added
453- if en_token is not None and tr_token is not None and en_token != tr_token :
454- continue
455-
456- line_en = resolve_issue_line (diff .english_rule , "structure" , en_token )
457- line_tr = resolve_issue_line (diff .translated_rule , "structure" , tr_token )
458- # Skip structure diffs where we can't find both tokens
459- if line_en is None or line_tr is None :
507+ structure_lines = resolve_structure_issue_lines (diff )
508+ if structure_lines is None :
460509 continue
510+ line_en , line_tr = structure_lines
461511 else :
462512 line_en = resolve_issue_line (diff .english_rule , diff .diff_type )
463513 line_tr = resolve_issue_line (diff .translated_rule , diff .diff_type )
0 commit comments