From fffb07af45435de5d86abcd2a153b5b83c4f0b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Harboe?= Date: Mon, 18 May 2026 06:57:09 +0200 Subject: [PATCH 1/2] synth: hash post-`read_design_sources` state to isolate slang non-determinism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit yosys-slang was recently found to be non-idempotent in a few cases: re-running synth on the same RTL produces different post-frontend RTLIL. Master already has two file-based hashes that bracket the whole synthesis pipeline: synth__canonical_netlist__hash sha1 of `1_1_yosys_canonicalize.rtlil` (captured AFTER `read_design_sources` -> `hierarchy -check` -> `opt_clean -purge`). synth__netlist__hash sha1 of `1_2_yosys.v` (captured AFTER ABC). A slang-induced diff therefore collapses into `canonical_netlist__hash` alongside any hierarchy / opt_clean drift, with no way to tell them apart. Add one more hash, `synth__post_read_sources__hash`, captured immediately after `read_design_sources` returns -- i.e. the state straight out of the HDL frontend, before any other pass runs. An unstable hash here means the frontend itself is non-idempotent, distinct from drift introduced later. Mechanics: * `flow/scripts/synth_preamble.tcl` gains a `write_state_hash` proc that strips `src` attributes (file:line metadata, so the hash is path-independent across the bazel sandbox and the classic-make build), dumps the current RTLIL to a temp file under `$OBJECTS_DIR`, sha1's it, deletes the temp, and emits a `: ` line to the yosys log. * `flow/scripts/synth_canonicalize.tcl` calls `write_state_hash synth__post_read_sources__hash` immediately after `read_design_sources`. * `flow/util/genMetrics.py` extracts the new hash from `1_1_yosys_canonicalize.log` into `metadata.json`. * `flow/util/genRuleFile.py` declares a `level=warning`, `compare="=="` literal rule for the new metric so a downstream `rules-base.json` can pin it without failing the build on a mismatch (matching how `canonical_netlist__hash` and `netlist__hash` are declared). No design's `rules-base.json` gets a baseline value pinned in this PR; declaring the rule type just makes the metric available to any future per-design pin. Pre-ABC / post-hierarchy / post-synth-main hashes are intentionally left out -- they belong in follow-up PRs if the slang-only signal isn't enough. Signed-off-by: Øyvind Harboe Signed-off-by: Øyvind Harboe --- flow/scripts/synth_canonicalize.tcl | 6 ++++++ flow/scripts/synth_preamble.tcl | 26 ++++++++++++++++++++++++++ flow/util/genMetrics.py | 27 +++++++++++++++++++++++---- flow/util/genRuleFile.py | 5 +++++ 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/flow/scripts/synth_canonicalize.tcl b/flow/scripts/synth_canonicalize.tcl index 4ba62a4523..58a2044e6a 100644 --- a/flow/scripts/synth_canonicalize.tcl +++ b/flow/scripts/synth_canonicalize.tcl @@ -1,5 +1,11 @@ source $::env(SCRIPTS_DIR)/synth_preamble.tcl read_design_sources +# Fingerprint the design state right after the HDL frontend +# (slang / builtin Verilog) returns, before any other pass runs. +# An unstable hash here means the frontend itself is non-idempotent +# -- distinct from drift introduced later by hierarchy / opt_clean +# / etc. Surfaced as a literal warning-level rule via genRuleFile.py. +write_state_hash synth__post_read_sources__hash hierarchy -check -top $::env(DESIGN_NAME) diff --git a/flow/scripts/synth_preamble.tcl b/flow/scripts/synth_preamble.tcl index af43512c0f..537f46881e 100644 --- a/flow/scripts/synth_preamble.tcl +++ b/flow/scripts/synth_preamble.tcl @@ -3,6 +3,32 @@ yosys -import source $::env(SCRIPTS_DIR)/util.tcl erase_non_stage_variables synth +# Fingerprint the current yosys design state to a `: ` +# line in the surrounding yosys log so `genMetrics.py` can pick it up. +# +# `setattr -unset src` strips file:line attribute lines from the +# RTLIL before hashing so the hash is path-independent (bazel sandbox +# paths differ from the classic-make build's relative paths; without +# stripping, hashes always differ trivially). +# +# The strip is wrapped in `design -push` / `design -pop` so it does +# not leak into the rest of the synth run -- src attributes are +# preserved for back-annotation / debugging downstream. This matters +# specifically for builds without `SYNTH_REPEATABLE_BUILD=1`, where +# synth_canonicalize.tcl deliberately leaves src attrs alone after +# the canonical-RTLIL write. +proc write_state_hash { metric } { + design -push + setattr -unset src * + setattr -mod -unset src * + set tmp $::env(OBJECTS_DIR)/.${metric}.tmp.rtlil + write_rtlil $tmp + design -pop + set sha [lindex [split [exec sha1sum $tmp]] 0] + file delete $tmp + puts "${metric}: $sha" +} + # If using a cached, gate level netlist, then copy over to the results dir with # preserve timestamps flag set. If you don't, subsequent runs will cause the # floorplan step to be re-executed. diff --git a/flow/util/genMetrics.py b/flow/util/genMetrics.py index 6424712e27..e37cd798bc 100755 --- a/flow/util/genMetrics.py +++ b/flow/util/genMetrics.py @@ -262,10 +262,29 @@ def extract_metrics( rptPath + "/synth_stat.txt", ) - # Netlist hashes: fingerprints of the canonical RTLIL (pre-ABC) and - # the final post-synthesis Verilog so the rules-base.json check - # (level=warning) flags when bazel-built vs make-built yosys - # disagree for the same RTL. + # Netlist hashes: fingerprints at three points in the yosys + # pipeline so the rules-base.json check (level=warning) can + # isolate frontend drift (yosys-slang isn't idempotent) from + # mid-synth drift from ABC drift. + # + # post_read_sources = state right after `read_design_sources` + # (HDL frontend output only) + # canonical_netlist = state after `opt_clean -purge` + # (= `1_1_yosys_canonicalize.rtlil`) + # netlist = `1_2_yosys.v`, post-ABC + # + # `post_read_sources` is emitted by synth_canonicalize.tcl via + # `write_state_hash` (synth_preamble.tcl) as a `: ` + # line in 1_1_yosys_canonicalize.log; the other two come straight + # from `file_sha1` of the already-emitted RTLIL / Verilog. + extractTagFromFile( + "synth__post_read_sources__hash", + metrics_dict, + r"^synth__post_read_sources__hash:\s+([0-9a-f]{40})\s*$", + logPath + "/1_1_yosys_canonicalize.log", + t=str, + required=False, + ) metrics_dict["synth__canonical_netlist__hash"] = file_sha1( resultPath + "/1_1_yosys_canonicalize.rtlil" ) diff --git a/flow/util/genRuleFile.py b/flow/util/genRuleFile.py index 358d59231e..58d240e187 100755 --- a/flow/util/genRuleFile.py +++ b/flow/util/genRuleFile.py @@ -79,6 +79,11 @@ def gen_rule_file( # surfaces as a [WARN] diagnostic in checkMetadata.py without # failing the build, matching how rules-base.json already # treats warning counts. + "synth__post_read_sources__hash": { + "mode": "literal", + "compare": "==", + "level": "warning", + }, "synth__canonical_netlist__hash": { "mode": "literal", "compare": "==", From 93f78df9125f2f4fdf0df6369dbfb9256b874ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Harboe?= Date: Mon, 18 May 2026 07:32:14 +0200 Subject: [PATCH 2/2] fix: address gemini-code-assist nits on #4236 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from gemini's second review pass on #4236: * `setattr -unset src *` in `write_state_hash` only strips src attrs from objects in the *current* module. `write_state_hash` runs before `hierarchy -check -top` in synth_canonicalize.tcl, so the post-frontend design has many separate modules whose src attrs (= file:line metadata) survive the strip and end up in the hashed RTLIL, breaking path-independence between the bazel sandbox and the classic-make build. Use `*/*` which selects across all modules. * The genMetrics.py regex for the new hash was anchored to a bare `^`, while other extractors in the same file allow optional leading whitespace. Add `\s*` after `^` so the extraction is robust to any log wrapper that inserts indentation. Signed-off-by: Øyvind Harboe Signed-off-by: Øyvind Harboe --- flow/scripts/synth_preamble.tcl | 9 ++++++++- flow/util/genMetrics.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/flow/scripts/synth_preamble.tcl b/flow/scripts/synth_preamble.tcl index 537f46881e..5e9a052f1a 100644 --- a/flow/scripts/synth_preamble.tcl +++ b/flow/scripts/synth_preamble.tcl @@ -19,7 +19,14 @@ erase_non_stage_variables synth # the canonical-RTLIL write. proc write_state_hash { metric } { design -push - setattr -unset src * + # `*/*` strips src attrs across objects in *all* modules. Bare `*` + # only targets the current module's objects, which is fine after + # `hierarchy -check -top` collapses things to one design, but + # `write_state_hash` is called before `hierarchy` here, so the + # post-frontend state may have many separate modules whose src + # attrs would otherwise survive into the hashed RTLIL and break + # path-independence. + setattr -unset src */* setattr -mod -unset src * set tmp $::env(OBJECTS_DIR)/.${metric}.tmp.rtlil write_rtlil $tmp diff --git a/flow/util/genMetrics.py b/flow/util/genMetrics.py index e37cd798bc..7ef1f899aa 100755 --- a/flow/util/genMetrics.py +++ b/flow/util/genMetrics.py @@ -280,7 +280,7 @@ def extract_metrics( extractTagFromFile( "synth__post_read_sources__hash", metrics_dict, - r"^synth__post_read_sources__hash:\s+([0-9a-f]{40})\s*$", + r"^\s*synth__post_read_sources__hash:\s+([0-9a-f]{40})\s*$", logPath + "/1_1_yosys_canonicalize.log", t=str, required=False,