Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions semantic_match_registry/src/smr/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,12 @@ def find_semantic_matches(

# Traverse to the neighboring and therefore connected `semantic_id`s
for neighbor, edge_data in graph[node].items():
if path and neighbor == path[-1]:
continue # avoid immediate backtrack A->B->A ping-pong
# Avoid any cycle: no revisiting nodes that are already in this path
# Note: `path` holds all previous nodes; `node` is *not* yet in `path`
# This also lets us avoid immediate backtrack A->B->A ping-pong for free
if neighbor in path or neighbor == node:
continue

edge_weight = float(edge_data.get("weight", 0.0))
new_score: float = score * edge_weight # Multiplicative propagation

Expand Down
27 changes: 27 additions & 0 deletions semantic_match_registry/tests/test_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,33 @@ def test_loop_prevention(self):
self.assertIn("D", matched_semantic_ids)
self.assertNotIn("A", matched_semantic_ids) # "A" should not be revisited

def test_avoid_longer_cycles(self):
"""Ensure that non-start-node cycles (e.g. B -> C -> D -> B) do not create infinite paths."""
# Create a cycle B -> C -> D -> B on top of the existing setup:
# A -> B, B -> C, C -> D, B -> D, D -> E
self.graph.add_edge("D", "B", weight=0.4)

matches: List[algorithm.SemanticMatch] = algorithm.find_semantic_matches(
self.graph,
semantic_id="A",
min_score=0.0, # allow all paths that pass the structural constraints
)

# 1) We still only get the finite set of simple paths starting at A.
# The additional D -> B edge must not create extra paths like A->B->C->D->B->...
self.assertEqual(6, len(matches))

# 2) Every returned path must be simple: no node appears twice.
for m in matches:
# Reconstruct the full node sequence for the path:
# path... -> match_semantic_id
full_path = m.path + [m.match_semantic_id]
self.assertEqual(
len(full_path),
len(set(full_path)),
msg=f"Path contains a cycle: {full_path}",
)

def test_minimum_threshold(self):
"""Ensure that results below the minimum score are excluded."""
matches = algorithm.find_semantic_matches(self.graph, "A", min_score=0.6)
Expand Down