Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions src/logseq_matryca_parser/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
_PATH_JOIN = " > "
_REFS_JOIN = ", "

_MISSING_AI_EXPORT_DEPS_MSG = (
"Missing AI export dependencies. Please install them using: uv sync --extra ai"
)


class SynapseMetadata(TypedDict, total=False):
"""Vector-store-safe metadata schema for LangChain / LlamaIndex exports."""
Expand All @@ -44,6 +48,7 @@ class SynapseMetadata(TypedDict, total=False):
line_start: NotRequired[int | None]
effective_properties: NotRequired[dict[str, Any]]


Document: type[Any] | None
NodeRelationship: Any
RelatedNodeInfo: type[Any] | None
Expand Down Expand Up @@ -237,7 +242,8 @@ def visit_node(self, node: LogseqNode) -> None:
text_node = self._text_node_cls(
id_=node.uuid,
text=node.clean_text,
metadata=build_synapse_metadata(node, source=node.source_path or ""),
metadata=build_synapse_metadata(
node, source=node.source_path or ""),
)
if not hasattr(text_node, "relationships") or text_node.relationships is None:
text_node.relationships = {}
Expand All @@ -259,7 +265,8 @@ def visit_node(self, node: LogseqNode) -> None:
child_relationships = parent_node.relationships.get(
self._node_relationship.CHILD, []
)
child_relationships.append(self._related_node_info_cls(node_id=node.uuid))
child_relationships.append(
self._related_node_info_cls(node_id=node.uuid))
parent_node.relationships[self._node_relationship.CHILD] = child_relationships

if node.left_id:
Expand Down Expand Up @@ -289,8 +296,11 @@ class SynapseAdapter:
def to_langchain_documents(nodes: list[LogseqNode], source_name: str) -> list[Any]:
"""Convert AST nodes to LangChain documents using `LangChainVisitor`."""
if Document is None:
raise ImportError("LangChain non rilevato. Installa 'langchain-core' per usare Synapse.")
visitor = LangChainVisitor(source_name=source_name, document_cls=Document)

raise ImportError(_MISSING_AI_EXPORT_DEPS_MSG)

visitor = LangChainVisitor(
source_name=source_name, document_cls=Document)
for node in nodes:
node.accept(visitor)
return visitor.get_documents()
Expand All @@ -304,7 +314,9 @@ def to_llamaindex_nodes(
) -> list[Any]:
"""Convert AST nodes to LlamaIndex nodes preserving topology links."""
if TextNode is None or NodeRelationship is None or RelatedNodeInfo is None:
raise ImportError("LlamaIndex non rilevato. Installa 'llama-index' per usare Synapse.")

raise ImportError(_MISSING_AI_EXPORT_DEPS_MSG)

flat = _flatten_nodes_for_export(nodes)
unique_paths = {node.source_path for node in flat if node.source_path}
use_per_node_source = len(unique_paths) > 1
Expand All @@ -313,8 +325,10 @@ def to_llamaindex_nodes(
def _source_id_for_node(node: LogseqNode) -> str:
path_key = node.source_path or ""
if path_key not in source_ids_by_path:
title_seed = page_title or (Path(path_key).stem if path_key else "untitled")
source_ids_by_path[path_key] = page_source_node_id(title_seed, path_key or None)
title_seed = page_title or (
Path(path_key).stem if path_key else "untitled")
source_ids_by_path[path_key] = page_source_node_id(
title_seed, path_key or None)
return source_ids_by_path[path_key]

resolved_source_id = page_source_id
Expand All @@ -341,18 +355,20 @@ def to_context_enriched_chunks(
) -> list[Any]:
"""Flatten ``nodes`` and emit LangChain ``Document``s with breadcrumb-enriched ``page_content``."""
if Document is None:
raise ImportError("LangChain non rilevato. Installa 'langchain-core' per usare Synapse.")
raise ImportError(_MISSING_AI_EXPORT_DEPS_MSG)
documents: list[Any] = []
flat = _flatten_nodes_for_export(nodes)
for node in flat:
if graph.page_for_node(node) is None:
logger.debug("context chunk skip orphan uuid=%s", node.uuid)
continue
breadcrumbs, page = _build_breadcrumbs(graph, node)
source_name = Path(node.source_path).name if node.source_path else str(graph.graph_path.name)
source_name = Path(node.source_path).name if node.source_path else str(
graph.graph_path.name)
host_page = graph.page_for_node(node)
embed_chain = (
frozenset({host_page.title}) if host_page is not None else frozenset()
frozenset({host_page.title}
) if host_page is not None else frozenset()
)
expanded_content = _expand_macros_and_embeds(
node.content, graph, set(), embed_page_chain=embed_chain
Expand All @@ -376,7 +392,8 @@ def to_context_enriched_chunks(
"effective_properties": effective_properties,
},
)
documents.append(Document(page_content=page_content, metadata=metadata))
documents.append(
Document(page_content=page_content, metadata=metadata))
logger.debug(
"context chunk uuid=%s breadcrumbs_len=%s effective_keys=%s",
node.uuid,
Expand All @@ -390,4 +407,4 @@ def load_and_convert(cls, file_path: Path) -> list[Any]:
"""Parse a file and convert it to LangChain documents."""
parser = LogosParser()
nodes = parser.parse_file(file_path)
return cls.to_langchain_documents(nodes, source_name=file_path.name)
return cls.to_langchain_documents(nodes, source_name=file_path.name)
41 changes: 26 additions & 15 deletions tests/test_synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ def build_ast() -> list[LogseqNode]:

def test_to_langchain_documents_raises_when_dependency_missing() -> None:
with patch("logseq_matryca_parser.synapse.Document", None):
with pytest.raises(ImportError, match="LangChain"):
SynapseAdapter.to_langchain_documents(build_ast(), source_name="test.md")
with pytest.raises(ImportError, match="Missing AI export dependencies"):
SynapseAdapter.to_langchain_documents(
build_ast(), source_name="test.md")


def test_to_langchain_documents_uses_visitor_and_graph_metadata() -> None:
with patch("logseq_matryca_parser.synapse.Document", FakeDocument):
docs = SynapseAdapter.to_langchain_documents(build_ast(), source_name="graph.md")
docs = SynapseAdapter.to_langchain_documents(
build_ast(), source_name="graph.md")

assert len(docs) == 2
root_doc = docs[0]
Expand Down Expand Up @@ -106,7 +108,7 @@ def test_to_llamaindex_nodes_raises_when_dependency_missing() -> None:
patch("logseq_matryca_parser.synapse.NodeRelationship", None),
patch("logseq_matryca_parser.synapse.RelatedNodeInfo", None),
):
with pytest.raises(ImportError, match="LlamaIndex"):
with pytest.raises(ImportError, match="Missing AI export dependencies"):
SynapseAdapter.to_llamaindex_nodes(build_ast())


Expand Down Expand Up @@ -173,7 +175,8 @@ def test_to_llamaindex_nodes_assigns_distinct_source_per_page() -> None:
):
nodes = SynapseAdapter.to_llamaindex_nodes([root_a, root_b])

source_ids = {nodes[0].relationships["SOURCE"].node_id, nodes[1].relationships["SOURCE"].node_id}
source_ids = {nodes[0].relationships["SOURCE"].node_id,
nodes[1].relationships["SOURCE"].node_id}
assert len(source_ids) == 2


Expand Down Expand Up @@ -212,7 +215,8 @@ def test_to_llamaindex_nodes_wires_sibling_next_and_previous() -> None:
patch("logseq_matryca_parser.synapse.NodeRelationship", fake_relationship),
patch("logseq_matryca_parser.synapse.RelatedNodeInfo", FakeRelatedNodeInfo),
):
nodes = SynapseAdapter.to_llamaindex_nodes([root], page_source_id="page-doc")
nodes = SynapseAdapter.to_llamaindex_nodes(
[root], page_source_id="page-doc")

by_id = {node.id_: node for node in nodes}
assert by_id["sibling-b"].relationships["PREVIOUS"].node_id == "sibling-a"
Expand All @@ -221,7 +225,7 @@ def test_to_llamaindex_nodes_wires_sibling_next_and_previous() -> None:

def test_to_context_enriched_chunks_raises_when_dependency_missing(tmp_path: Path) -> None:
with patch("logseq_matryca_parser.synapse.Document", None):
with pytest.raises(ImportError, match="LangChain"):
with pytest.raises(ImportError, match="Missing AI export dependencies"):
graph = LogseqGraph(graph_path=tmp_path, pages={})
SynapseAdapter.to_context_enriched_chunks([], graph)

Expand All @@ -243,7 +247,8 @@ def test_synapse_context_enriched_chunking(tmp_path: Path) -> None:
demo = graph.pages["Demo"]

with patch("logseq_matryca_parser.synapse.Document", FakeDocument):
chunks = SynapseAdapter.to_context_enriched_chunks(demo.root_nodes, graph)
chunks = SynapseAdapter.to_context_enriched_chunks(
demo.root_nodes, graph)

assert len(chunks) == 2
child_chunk = chunks[1]
Expand Down Expand Up @@ -277,8 +282,10 @@ def test_synapse_recursive_embed_expansion(tmp_path: Path) -> None:
"- Before {{embed ((" + block_id + "))}} after\n",
encoding="utf-8",
)
(pages / "SnippetPage.md").write_text("- Line one from snippet\n- Line two from snippet\n", encoding="utf-8")
(pages / "PageEmbedHost.md").write_text("- Start {{embed [[SnippetPage]]}} end\n", encoding="utf-8")
(pages / "SnippetPage.md").write_text(
"- Line one from snippet\n- Line two from snippet\n", encoding="utf-8")
(pages / "PageEmbedHost.md").write_text(
"- Start {{embed [[SnippetPage]]}} end\n", encoding="utf-8")

graph = LogseqGraph.load_directory(graph_root)

Expand Down Expand Up @@ -309,7 +316,8 @@ def test_expand_embed_missing_page_completes_without_hang(tmp_path: Path) -> Non
graph_root = tmp_path / "vault"
pages = graph_root / "pages"
pages.mkdir(parents=True)
(pages / "P.md").write_text("- x {{embed [[NoSuchPage]]}}\n", encoding="utf-8")
(pages /
"P.md").write_text("- x {{embed [[NoSuchPage]]}}\n", encoding="utf-8")
graph = LogseqGraph.load_directory(graph_root)
text = graph.pages["P"].root_nodes[0].content

Expand Down Expand Up @@ -342,14 +350,16 @@ def test_expand_cyclic_page_embed_does_not_duplicate_parent_text(tmp_path: Path)
graph_root = tmp_path / "vault"
pages = graph_root / "pages"
pages.mkdir(parents=True)
(pages / "A.md").write_text("- before {{embed [[B]]}} after\n", encoding="utf-8")
(pages /
"A.md").write_text("- before {{embed [[B]]}} after\n", encoding="utf-8")
(pages / "B.md").write_text("- inner {{embed [[A]]}}\n", encoding="utf-8")
graph = LogseqGraph.load_directory(graph_root)
host_page = graph.pages["A"]
text = host_page.root_nodes[0].content
chain = frozenset({host_page.title})

expanded = _expand_macros_and_embeds(text, graph, set(), embed_page_chain=chain)
expanded = _expand_macros_and_embeds(
text, graph, set(), embed_page_chain=chain)

assert expanded.strip() == "before inner after"

Expand All @@ -361,7 +371,8 @@ def test_expand_missing_block_embed_completes_without_hang(tmp_path: Path) -> No
pages = graph_root / "pages"
pages.mkdir(parents=True)
missing = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
(pages / "P.md").write_text(f"- {{{{embed (({missing}))}}}}\n", encoding="utf-8")
(pages /
"P.md").write_text(f"- {{{{embed (({missing}))}}}}\n", encoding="utf-8")
graph = LogseqGraph.load_directory(graph_root)
text = graph.pages["P"].root_nodes[0].content

Expand Down Expand Up @@ -411,7 +422,7 @@ def test_includes_core_keys(self):
node = LogseqNode(uuid="abc", content="Test", indent_level=0)
meta = build_synapse_metadata(node, source="test")
for key in ("uuid", "indent_level", "source", "path", "refs",
"task_status", "task_priority"):
"task_status", "task_priority"):
assert key in meta

def test_property_serialization(self):
Expand Down