diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bfb548..35f1138 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Contributor issues (wave 3)** — Six new issues from local code study ([#59](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/59)–[#64](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/64)): LENS ghost wikilink nodes, corrupt X-Ray state handling, `agent_write` assert guard, and paired good-first tests. - **Contributor issues (wave 4)** — Seven issues from Clean Architecture code study ([#65](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/65)–[#71](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/71)): SYNAPSE cyclic embed duplication, unresolved embed semantics, kinetic dead code, graph watcher DIP, English DX messages, OCP embed refactor, and parametrized SYNAPSE tests. Agent rule: [`.cursor/rules/07-clean-architecture-audit.mdc`](.cursor/rules/07-clean-architecture-audit.mdc). +- **CLI hygiene** — `scan --broken-refs` now reports unresolved `((uuid))` block references and exits with status 1 when any are found ([#29](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/29)). - **Contributor issues (wave 5)** — [#72](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/72) / [#73](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/73): `append_child_to_node` corrupts Markdown when the source file lacks a trailing newline (agent-write data loss). - **Test coverage (wave 2)** — Community contribution ([#58](https://github.com/MarcoPorcellato/logseq-matryca-parser/pull/58), maintainer #43): **65** new pytest cases (**443** total) for `detect_tab_size_from_markdown`, graph link/backlink helpers, SYNAPSE embedding strip + metadata schema, FORGE Markdown/JSON visitors, LENS node classification, `extract_changelog` CLI, `LogseqConfigReader` timestamps, and KINETIC `agent-write` validation errors. Closes [#20](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/20), [#43](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/43)–[#52](https://github.com/MarcoPorcellato/logseq-matryca-parser/issues/52). diff --git a/src/logseq_matryca_parser/kinetic.py b/src/logseq_matryca_parser/kinetic.py index dec46ba..cb2798b 100644 --- a/src/logseq_matryca_parser/kinetic.py +++ b/src/logseq_matryca_parser/kinetic.py @@ -149,6 +149,28 @@ def _build_stats_table(pages: list[LogseqPage]) -> Table: return table +def _build_broken_references_table( + graph: LogseqGraph, broken_nodes: list[LogseqNode] +) -> Table: + table = Table(title="Broken Block References") + table.add_column("Page", style="cyan") + table.add_column("Block UUID", style="magenta") + table.add_column("Missing Block Ref", style="bold red") + + for node in broken_nodes: + page = graph.page_for_node(node) + page_title = page.title if page is not None else "" + missing_refs = [ + ref for ref in node.block_refs if graph.get_node_by_embed_ref(ref) is None + ] + table.add_row( + page_title, + node.uuid, + ", ".join(f"(({ref}))" for ref in missing_refs), + ) + return table + + def _build_deep_stats_tables(stats: dict[str, Any]) -> tuple[Table, Table, Table]: overview_table = Table(title="LENS Deep Statistics") overview_table.add_column("Metric", style="cyan") @@ -222,17 +244,35 @@ def scan( None, help="Path to the Logseq graph root.", ), + broken_refs: bool = typer.Option( + False, + "--broken-refs", + help="Print unresolved block references and exit 1 when any are found.", + ), ) -> None: """Scan a graph and print aggregate parsing statistics.""" resolved = _resolve_graph_path(ctx, graph_path) - pages = _canonical_pages_from_graph(resolved) + from logseq_matryca_parser.graph import LogseqGraph + + graph = LogseqGraph.load_directory(resolved) + pages = list(graph.iter_canonical_pages()) if not pages: console.print("[yellow]No Markdown files found under pages/ or journals/.[/]") raise typer.Exit(code=0) console.print(_build_stats_table(pages)) + if broken_refs: + broken = graph.get_broken_references() + if not broken: + console.print("[green]No unresolved block references found.[/]") + raise typer.Exit(code=0) + + console.print("") + console.print(_build_broken_references_table(graph, broken)) + raise typer.Exit(code=1) + @app.command() def visualize( diff --git a/tests/test_kinetic.py b/tests/test_kinetic.py index 2dd8a0e..f7d0775 100644 --- a/tests/test_kinetic.py +++ b/tests/test_kinetic.py @@ -86,6 +86,23 @@ def test_verbose_flag_enables_debug_logging(tmp_path: Path, caplog: pytest.LogCa assert any(record.levelno == logging.DEBUG for record in caplog.records) +def test_scan_command_reports_broken_refs(tmp_path: Path) -> None: + graph_root = tmp_path / "vault" + pages_dir = graph_root / "pages" + pages_dir.mkdir(parents=True, exist_ok=True) + fake_uuid = "00000000-0000-0000-0000-000000000099" + (pages_dir / "Broken.md").write_text( + f"- Linker references (({fake_uuid}))\n", + encoding="utf-8", + ) + + result = runner.invoke(app, ["scan", str(graph_root), "--broken-refs"]) + + assert result.exit_code == 1 + assert "Broken Block References" in result.output + assert "((00000000-0000-0000-0000-00000" in result.output + + def test_export_command_json_writes_output_file(tmp_path: Path) -> None: graph_root = _create_graph(tmp_path) output_dir = tmp_path / "out-json"