From 99eb467e6c28cbf04a88084676b2fe7ad1afc6d7 Mon Sep 17 00:00:00 2001 From: T Date: Sat, 27 Jun 2026 17:37:30 +0100 Subject: [PATCH 1/5] feat(807): Tier-1 modeling for float_clamp / int_to_float / float_to_int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models the three modelable Float64 builtins deferred from #797's FloatingPoint-sort fix, completing #807. - float_clamp(v, lo, hi): modeled unconditionally as the faithful WASM f64.min(f64.max(v, lo), hi) via explicit _wasm_fp_min / _wasm_fp_max helpers. Z3's own fp.min / fp.max DIVERGE from WASM on NaN (SMT-LIB returns the non-NaN operand; WASM propagates) and on signed zero, so a naive model would unsoundly prove !float_is_nan(float_clamp(NaN, ...)) — the helpers build the WASM semantics explicitly. Total op, no obligation. - int_to_float(n) / float_to_int(x): cross the Int-Float boundary where Z3's SYMBOLIC Int-Real-FP reasoning is unreliable (it returns spurious counterexamples that don't satisfy their own constraints, non- deterministically across timeouts). So they are modeled at Tier 1 ONLY for a concrete (constant-foldable) argument; a symbolic argument defers to a sound Tier 3 (matching the #392 audit principle of deferring what Z3 cannot soundly model). float_to_int is partial (i64.trunc_f64_s traps on NaN / Inf / out-of-i64-range), so a concrete out-of-domain argument is a loud E529 and a symbolic one a runtime-guarded Tier-3 trap. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (+/-0, +/-inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases). The four format/parse Float64 builtins (float_to_string, parse_float64, decimal_from_float, decimal_to_float) remain Tier-3 by necessity. New: error code E529 (float_to_int domain) and obligation kind float_to_int_domain. Release prep for v0.0.183. Closes #807. Co-Authored-By: Claude --- CHANGELOG.md | 9 +- HISTORY.md | 3 +- README.md | 2 +- ROADMAP.md | 3 +- TESTING.md | 5 +- docs/index.html | 2 +- docs/index.md | 2 +- docs/llms-full.txt | 3 +- docs/llms.txt | 2 +- pyproject.toml | 2 +- scripts/check_spec_examples.py | 2 +- spec/06-contracts.md | 3 + tests/test_float64_builtins_807.py | 441 +++++++++++++++++++++++++++++ tests/test_verifier.py | 12 +- uv.lock | 2 +- vera/__init__.py | 2 +- vera/errors.py | 1 + vera/obligations/core.py | 7 + vera/smt.py | 134 +++++++++ vera/verifier.py | 111 ++++++++ 20 files changed, 731 insertions(+), 17 deletions(-) create mode 100644 tests/test_float64_builtins_807.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ac4bc4d4..95bab3be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [0.0.183] - 2026-06-27 + +### Added + +- **Tier-1 verification modeling for the modelable `@Float64` builtins `float_clamp`, `int_to_float`, and `float_to_int`** (follow-up to [#797](https://github.com/aallan/vera/issues/797), the [#392](https://github.com/aallan/vera/issues/392) `smt.py` soundness audit's `@Float64` → FloatingPoint-sort fix; the three were left as Tier-3 deferrals there). `float_clamp(v, lo, hi)` is modeled unconditionally as the faithful WASM `f64.min(f64.max(v, lo), hi)`: Z3's own `fp.min` / `fp.max` *diverge* from WASM on `NaN` (SMT-LIB returns the non-`NaN` operand; WASM propagates `NaN`) and on signed zero, so the model builds those semantics explicitly — a naive `fpMin` / `fpMax` would unsoundly prove `!float_is_nan(float_clamp(NaN, …))`. `int_to_float` and `float_to_int` cross the Int↔Float boundary, where Z3's *symbolic* Int↔Real↔FP reasoning is unreliable (it returns spurious counterexamples that do not satisfy their own constraints), so they are modeled at Tier 1 **only for a concrete (constant-foldable) argument** and a symbolic one defers to a sound **Tier 3** — matching the audit principle of deferring what Z3 cannot soundly model. `float_to_int` compiles to `i64.trunc_f64_s`, which traps on `NaN` / `±Inf` / out-of-`i64`-range, so a concrete out-of-domain argument is now a loud compile error (**E529**) and a symbolic one a runtime-guarded Tier-3 trap. Each model is confirmed by a verify-vs-run differential. The four format/parse `@Float64` builtins (`float_to_string`, `parse_float64`, `decimal_from_float`, `decimal_to_float`) remain Tier-3 by necessity. ([#807](https://github.com/aallan/vera/issues/807)) + ## [0.0.182] - 2026-06-27 ### Fixed @@ -2610,7 +2616,8 @@ Small docs sweep — closes six aging documentation issues in one PR. No code c - Grammar: handler body simplified to avoid LALR reduce/reduce conflict - `pyproject.toml`: corrected build backend, package discovery, PEP 639 compliance -[Unreleased]: https://github.com/aallan/vera/compare/v0.0.182...HEAD +[Unreleased]: https://github.com/aallan/vera/compare/v0.0.183...HEAD +[0.0.183]: https://github.com/aallan/vera/compare/v0.0.182...v0.0.183 [0.0.182]: https://github.com/aallan/vera/compare/v0.0.181...v0.0.182 [0.0.181]: https://github.com/aallan/vera/compare/v0.0.180...v0.0.181 [0.0.180]: https://github.com/aallan/vera/compare/v0.0.179...v0.0.180 diff --git a/HISTORY.md b/HISTORY.md index e04bb8ec..3a8eff2f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -393,6 +393,7 @@ Proof obligations became first-class records with a warm Z3 session, and the LSP | v0.0.180 | 26 Jun | **`@Float64` contracts are verified against Z3's IEEE-754 FloatingPoint sort instead of mathematical reals, so Tier-1 proofs respect `NaN`/`Inf`/signed-zero/rounding and `%` (C `fmod`) and match the runtime, so unsound relational / reflexive Float64 contracts are now correctly rejected** ([#797](https://github.com/aallan/vera/issues/797)). | | v0.0.181 | 27 Jun | **`@Int` / `@Nat` arithmetic overflow now traps: `+` / `-` / `*` are partial operations carrying an auto-synthesised `int_overflow` obligation, so a result that provably leaves i64 / u64 range is a compile error (E528) and a dynamic one is a runtime-guarded trap, instead of the verifier proving postconditions the two's-complement runtime then violated** ([#798](https://github.com/aallan/vera/issues/798)). | | v0.0.182 | 27 Jun | **`string_length` UTF-8 byte soundness defers non-literal `string_length` to Tier 3 and models string literals at their exact byte length, completing the `smt.py` soundness audit** ([#802](https://github.com/aallan/vera/issues/802)). | +| v0.0.183 | 27 Jun | **Tier-1 verification modeling for the modelable `@Float64` builtins: `float_clamp` (faithful WASM `f64.min`/`f64.max`, NaN/±0-correct), and concrete-gated `int_to_float` / `float_to_int` (symbolic args defer to Tier 3 since Z3's FP↔Real reasoning is unreliable), with a `float_to_int` domain obligation (E529) and verify-vs-run differentials** ([#807](https://github.com/aallan/vera/issues/807)). | --- @@ -408,4 +409,4 @@ Proof obligations became first-class records with a warm Z3 session, and the LSP | Spec chapters | 7 | 10 | 11 | 12 | 13 | 13 | 13 | 13 | | Code coverage | — | — | — | 90% | 91% | 96% | 96% | 95% | -Total: **1,400+ commits, 182 tagged releases, 77 active development days.** +Total: **1,400+ commits, 183 tagged releases, 77 active development days.** diff --git a/README.md b/README.md index 62a2517a..89bc2e65 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ cp /path/to/vera/SKILL.md ~/.claude/skills/vera-language/SKILL.md ## Project status -Vera is in **active development** at v0.0.182 — 1,400+ commits, 182 releases, 5,173 tests, 91% code coverage, 93 conformance programs, 35 examples, and a 13-chapter specification. See **[HISTORY.md](HISTORY.md)** for how the compiler was built. +Vera is in **active development** at v0.0.183 — 1,400+ commits, 183 releases, 5,250 tests, 91% code coverage, 93 conformance programs, 35 examples, and a 13-chapter specification. See **[HISTORY.md](HISTORY.md)** for how the compiler was built. The reference compiler — parser, AST, type checker, contract verifier (Z3), WASM code generator, module system, browser runtime, and runtime contract insertion — is working. The language specification is in draft across [13 chapters](spec/). diff --git a/ROADMAP.md b/ROADMAP.md index 4e01b129..444cb91d 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ Priority lives in this file and nowhere else — issues carry kind and area labe ## Where we are -5,173 tests, 93 conformance programs, 35 examples, 13 spec chapters. +5,250 tests, 93 conformance programs, 35 examples, 13 spec chapters. ## The roadmap @@ -20,7 +20,6 @@ The infrastructure that catches the next regression before a user does, plus the | Issue | What | |---|---| -| [#807](https://github.com/aallan/vera/issues/807) | Tier-1 model the deferred-but-modelable `@Float64` builtins from #797. **Blocked by #798**: `int_to_float`/`float_to_int` cross the Int↔Float boundary that the unbounded-`Int` idealisation (#798) makes unsound, so they wait on that fix; `float_clamp` is unblocked but needs a verify-vs-run codegen differential first. The four format/parse builtins (`float_to_string`, `parse_float64`, `decimal_from_float`/`_to_float`) are Tier-3 by necessity, not deferred. | | [#808](https://github.com/aallan/vera/issues/808) | Give the `@Int` / `@Nat` arithmetic-overflow runtime trap (#798) a precise `overflow` trap kind, so a dynamic overflow surfaces as a dedicated trap message rather than the generic `unreachable` kind it currently shares. | | [#592](https://github.com/aallan/vera/issues/592) | End-to-end behavioural tests for the five UTF-8 decode sites currently pinned only by structural greps. | | [#645](https://github.com/aallan/vera/issues/645) | Explicit `encoding='utf-8'` at every text-mode file call, with a pre-commit check to hold the line. | diff --git a/TESTING.md b/TESTING.md index 8a24c0dd..31405f54 100644 --- a/TESTING.md +++ b/TESTING.md @@ -6,7 +6,7 @@ This is the single source of truth for Vera's testing infrastructure, coverage d | Metric | Value | |--------|-------| -| **Tests** | 5,173 across 45 files (~63,000 lines of test code; 5,141 passed + 16 stress, 16 skipped) | +| **Tests** | 5,250 across 46 files (~63,000 lines of test code; 5,218 passed + 16 stress, 16 skipped) | | **Compiler code coverage** | 95% Python, 61% JavaScript — 91% combined (CI minimum: 80%) | | **Conformance programs** | 93 programs across 9 spec chapters, validating every language feature | | **Example programs** | 35, all validated through `vera check` + `vera verify` | @@ -59,7 +59,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists | `test_ast.py` | 128 | 1,122 | AST transformation, node structure, serialisation, string escape sequences, ability declarations | | `test_checker.py` | 548 | 6,347 | Type synthesis, slot resolution, effects, effect subtyping, contracts, exhaustiveness, cross-module typing, visibility, error codes, string built-ins, generic rejection, IO operation types, Markdown types, Regex types, abilities, Map collection, Set collection, Decimal type, Json type, Html type, Http effect, Inference effect, removed legacy name regression | | `test_obligations.py` | 284 | 1087 | Reified proof obligations + warm `VerificationSession` (#222 Phase A): full-corpus differential oracle (warm session == cold `verify()` on diagnostics, summary, and obligation stream, plus warm-twice determinism, across all 35 examples and every verify/run-level conformance program), summary↔obligation tier-bookkeeping consistency, per-kind unit tests (requires / ensures / decreases / nat_sub / call_pre statuses, counterexamples, error codes), content-key stability + same-text-two-sites span disambiguation, session solver reuse, type-error short-circuit, ADT-registry resync between programs; plus the Phase B incremental suite — identical-source full replay, callee-body-edit replays callers while callee-contract-edit invalidates them, span-shift and ADT-edit conservative invalidation, cross-program isolation, timeout-status never cached (monkeypatched solver), FIFO eviction bound; plus the #727 dedup pin — a violating call in a let RHS records exactly one E501 diagnostic and one call_pre obligation | -| `test_verifier.py` | 473 | 9,288 | Z3 verification, counterexamples, tier classification, call-site preconditions, branch-aware preconditions, pipe operator, cross-module contracts, match/ADT verification, decreases verification, mutual recursion, refined Bool/String/Float64 param sorts, **@Nat subtraction underflow obligation** (#520 — Path-A obligation discharge via requires/path-conditions/path-aware Z3 refutation, pure-literal exclusion, Int-Int and Nat-Int → Int exemptions), **@Nat binding-site narrowing obligation** (#552 — Tier-1 `value >= 0` at let/call-arg/effect-op-arg/ctor-field/match-bind/literal-destructure narrowing, #520 double-emit disjointness, E503 counterexample-witness pin; #747 — generic-instantiation, ADT sub-pattern, non-literal tuple-destructure, and cross-module imported-ctor narrowing, with concrete-site Tier-3 narrowings (incl. the if-expr-source SMT gap, recorded per-component) classified as codegen-guarded `tier3_runtime`, generic function-formal call-args guarded on the monomorphised callee (keyed on `call_target`, not the erased generic decl), and the two genuinely-unguarded residuals — effect-op-arg narrowings (no codegen guard yet, #754) and generic-instantiated constructor-field narrowings (constructors carry no per-field `@Nat` mono metadata) — surfacing the `E504` unguarded warning; #749 — IndexExpr/InterpolatedString walker-recursion pins, `_fresh_slot_var` nat-alias unit test, `_narrows_into_nat` verifier/codegen soundness parity), **refinement-predicate verification** (#746 — Tier-1 discharge at let/call-arg/ctor-field/effect-op-arg/match-bind/tuple-destructure narrowings and refined return positions, E505 violation with counterexample, E506 Tier-3 for untranslatable/non-primitive-base predicates, the R3 already-refined-source exemption via predicate-AST equality, the R5 violating-return pin, the refinement-over-`@Nat` `>= 0 && P` conjoin, and the **refined-ADT-sub-pattern arm-fact carry** — a `Some(@PosInt)` bind on `Option` carries the field's source-type predicate into the arm body so a downstream `@Nat` narrowing discharges instead of a false E503, and reaches call **preconditions** in the arm body via the SMT match-translation fact hook so `Some(@PosInt) -> needs_positive(@PosInt.0)` discharges instead of a false E501, while a genuine `Option` narrowing stays obligated (E505/E501), never silently assumed; an **alias-base refined return** (`{ @Age \| @Age.0 >= 18 }`) is assumed by the caller via the predicate binder name; and a **refined return from a match arm** (`Some(@PosInt) -> @PosInt.0`) discharges via a global `arm-matched => source-fact` SMT implication on both the normal and generic-fast paths, a violating `Option` payload still E505), **per-monomorphization generic verification** (#732 — an instantiated `forall` body is verified per concrete instantiation: a generic-body `@Nat` underflow is caught (naming the instantiation, with a counterexample) or discharged when guarded, the never-instantiated residual stays Tier-3 `E520`, collapsed-type-var (`A=B=Int`) De Bruijn reindex soundness, one-diagnostic dedup across multiple instantiations, and discovery of a generic reached only through a `decreases(...)` measure, plus a mixed-`tier3`/`timeout` aggregate-label completeness pin and a recursive-generic-clone source-name pin so `decreases` resolves on the monomorphised instance), **primitive-operation safety obligations** (#680 — division/modulo by-zero `E526` and array-index-bounds `E527`, the in-bounds/out-of-bounds two-check with float-exemption, honest Tier-3 for opaque lengths, the off-by-one and `i >= 0` lower-bound pins, walker recursion through array-literal / assert / interpolated-string / let-destructure positions, the opaque-shadow Tier-3 routing that stops an untranslatable `let`/destructure from silently discharging a stale same-type-outer divisor, De Bruijn-correct `E526`/`E527` fix hints rendered from the actual operands, Tier-1 projection of literal-constructor destructure components (`let Tuple<@Int, @Int> = Tuple(10, 6); _ / @Int.1` discharges `10 != 0` rather than shadowing to Tier-3, while a non-literal source stays a tracked Tier-3 shadow), a tracked placeholder pushed for *every* destructured component so same-type De Bruijn positions don't collapse (`Tuple(10, )` keeps `@Int.0` on the opaque second component, never silently discharging against the literal `10`), `_contains_opaque_shadow` recursion so a *compound* divisor / subtraction operand embedding a shadow (`shadow + 1`) falls to Tier-3 instead of a spurious `E526`/`E502` counterexample, and tracked shadowing of match-arm pattern slots bound over an untranslatable (effect-op) scrutinee so a primitive op in the arm falls to Tier-3 rather than silently discharging against a stale same-name outer (`match Source.next(()) { Some(@Int) -> 1 / @Int.0 }`); plus a **multi-agent shadow/projection audit battery** (`TestShadowAuditDivision680` / `TestShadowAuditSubtraction680` / `TestShadowAuditIndex680` / `TestDestructureDeBruijnAlignment680` / `TestShadowAuditInteractions680`) — 57 differential tests pinning the invariant trichotomy (safe → verified, opaque/shadow → Tier-3, provably-unsafe → loud) across compound shadows, modulo parity, multi-component De Bruijn alignment with value-distinct siblings, intervening-type namespaces, stacked / nested destructures, opaque match scrutinees, nested blocks, and intra-block scoping; **mutation-validated** — every shadow/projection test was confirmed to flip RED when its target machinery (`_is_opaque_shadow` / `_contains_opaque_shadow` / the De Bruijn placeholder push / the `let`-shadow / the match-arm `_fresh_pattern_env` / literal projection) is deliberately broken, so none is green-for-the-wrong-reason) | +| `test_verifier.py` | 473 | 9,296 | Z3 verification, counterexamples, tier classification, call-site preconditions, branch-aware preconditions, pipe operator, cross-module contracts, match/ADT verification, decreases verification, mutual recursion, refined Bool/String/Float64 param sorts, **@Nat subtraction underflow obligation** (#520 — Path-A obligation discharge via requires/path-conditions/path-aware Z3 refutation, pure-literal exclusion, Int-Int and Nat-Int → Int exemptions), **@Nat binding-site narrowing obligation** (#552 — Tier-1 `value >= 0` at let/call-arg/effect-op-arg/ctor-field/match-bind/literal-destructure narrowing, #520 double-emit disjointness, E503 counterexample-witness pin; #747 — generic-instantiation, ADT sub-pattern, non-literal tuple-destructure, and cross-module imported-ctor narrowing, with concrete-site Tier-3 narrowings (incl. the if-expr-source SMT gap, recorded per-component) classified as codegen-guarded `tier3_runtime`, generic function-formal call-args guarded on the monomorphised callee (keyed on `call_target`, not the erased generic decl), and the two genuinely-unguarded residuals — effect-op-arg narrowings (no codegen guard yet, #754) and generic-instantiated constructor-field narrowings (constructors carry no per-field `@Nat` mono metadata) — surfacing the `E504` unguarded warning; #749 — IndexExpr/InterpolatedString walker-recursion pins, `_fresh_slot_var` nat-alias unit test, `_narrows_into_nat` verifier/codegen soundness parity), **refinement-predicate verification** (#746 — Tier-1 discharge at let/call-arg/ctor-field/effect-op-arg/match-bind/tuple-destructure narrowings and refined return positions, E505 violation with counterexample, E506 Tier-3 for untranslatable/non-primitive-base predicates, the R3 already-refined-source exemption via predicate-AST equality, the R5 violating-return pin, the refinement-over-`@Nat` `>= 0 && P` conjoin, and the **refined-ADT-sub-pattern arm-fact carry** — a `Some(@PosInt)` bind on `Option` carries the field's source-type predicate into the arm body so a downstream `@Nat` narrowing discharges instead of a false E503, and reaches call **preconditions** in the arm body via the SMT match-translation fact hook so `Some(@PosInt) -> needs_positive(@PosInt.0)` discharges instead of a false E501, while a genuine `Option` narrowing stays obligated (E505/E501), never silently assumed; an **alias-base refined return** (`{ @Age \| @Age.0 >= 18 }`) is assumed by the caller via the predicate binder name; and a **refined return from a match arm** (`Some(@PosInt) -> @PosInt.0`) discharges via a global `arm-matched => source-fact` SMT implication on both the normal and generic-fast paths, a violating `Option` payload still E505), **per-monomorphization generic verification** (#732 — an instantiated `forall` body is verified per concrete instantiation: a generic-body `@Nat` underflow is caught (naming the instantiation, with a counterexample) or discharged when guarded, the never-instantiated residual stays Tier-3 `E520`, collapsed-type-var (`A=B=Int`) De Bruijn reindex soundness, one-diagnostic dedup across multiple instantiations, and discovery of a generic reached only through a `decreases(...)` measure, plus a mixed-`tier3`/`timeout` aggregate-label completeness pin and a recursive-generic-clone source-name pin so `decreases` resolves on the monomorphised instance), **primitive-operation safety obligations** (#680 — division/modulo by-zero `E526` and array-index-bounds `E527`, the in-bounds/out-of-bounds two-check with float-exemption, honest Tier-3 for opaque lengths, the off-by-one and `i >= 0` lower-bound pins, walker recursion through array-literal / assert / interpolated-string / let-destructure positions, the opaque-shadow Tier-3 routing that stops an untranslatable `let`/destructure from silently discharging a stale same-type-outer divisor, De Bruijn-correct `E526`/`E527` fix hints rendered from the actual operands, Tier-1 projection of literal-constructor destructure components (`let Tuple<@Int, @Int> = Tuple(10, 6); _ / @Int.1` discharges `10 != 0` rather than shadowing to Tier-3, while a non-literal source stays a tracked Tier-3 shadow), a tracked placeholder pushed for *every* destructured component so same-type De Bruijn positions don't collapse (`Tuple(10, )` keeps `@Int.0` on the opaque second component, never silently discharging against the literal `10`), `_contains_opaque_shadow` recursion so a *compound* divisor / subtraction operand embedding a shadow (`shadow + 1`) falls to Tier-3 instead of a spurious `E526`/`E502` counterexample, and tracked shadowing of match-arm pattern slots bound over an untranslatable (effect-op) scrutinee so a primitive op in the arm falls to Tier-3 rather than silently discharging against a stale same-name outer (`match Source.next(()) { Some(@Int) -> 1 / @Int.0 }`); plus a **multi-agent shadow/projection audit battery** (`TestShadowAuditDivision680` / `TestShadowAuditSubtraction680` / `TestShadowAuditIndex680` / `TestDestructureDeBruijnAlignment680` / `TestShadowAuditInteractions680`) — 57 differential tests pinning the invariant trichotomy (safe → verified, opaque/shadow → Tier-3, provably-unsafe → loud) across compound shadows, modulo parity, multi-component De Bruijn alignment with value-distinct siblings, intervening-type namespaces, stacked / nested destructures, opaque match scrutinees, nested blocks, and intra-block scoping; **mutation-validated** — every shadow/projection test was confirmed to flip RED when its target machinery (`_is_opaque_shadow` / `_contains_opaque_shadow` / the De Bruijn placeholder push / the `let`-shadow / the match-arm `_fresh_pattern_env` / literal projection) is deliberately broken, so none is green-for-the-wrong-reason) | | `test_soundness_392.py` | 36 | 584 | #392 audit batches 1–2 — verifier soundness/completeness fixes: signed div/mod truncate toward zero (#799), body `assert(P)` carries a Tier-1 obligation (#800), divisions in contract predicates carry a `div_zero` obligation (#801), and the #804 assume-half of #800's `assert` rule — a prior `assert`/`assume` discharges later obligations (including a later call's precondition) + the postcondition at Tier 1, removing false E501/E503/E500/E505. Test-first: each fails on the pre-fix verifier | | `test_int_overflow.py` | 6 | 143 | #798 — `@Int`/`@Nat` arithmetic-overflow obligations (part of the #392 `smt.py` soundness audit): `+`/`-`/`*` on `@Int`/`@Nat` now emit an `int_overflow` obligation (the analog of `nat_sub`/`div_zero`) rather than modelling the operands as Z3's unbounded integers, so a `ensures(@Int.result > @Int.0)` over `@Int.0 + 1` no longer proves a contract the i64/u64 runtime violates under two's-complement wraparound. Unbounded operands leave the obligation undischarged (Tier-3, runtime-guarded); operand bounds that prove the result stays in range discharge it at Tier 1. Test-first: each fails on the pre-fix verifier (no `int_overflow` obligation emitted) | | `test_int_overflow_codegen.py` | 50 | 567 | #798 Stage 3 — runtime overflow-trap codegen: the codegen emits a guard at *exactly* the `@Int`/`@Nat` `+`/`-`/`*` sites the verifier obligates, so `vera run`/`vera compile` programs trap on overflow instead of silently wrapping at the i64/u64 boundary. The trap is a bare `unreachable` (Option A — matches the #520 `nat_sub` and #552 nat-bind precedent), classified `kind="unreachable"` today (a precise `"overflow"` trap kind via a host import is a follow-up). Test-first: every `*_traps` test fails on the pre-Stage-3 codegen (the op wraps silently → no trap → `execute` returns a value); every `*_no_trap` test passes before and after (safe arithmetic unchanged) | @@ -95,6 +95,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists | `test_readme.py` | 2 | 79 | README code sample parsing | | `test_html.py` | 4 | 187 | HTML landing page code samples: parse, check, verify | | `test_float64_fp.py` | 9 | 204 | #797 — `@Float64` contracts via Z3's IEEE-754 FloatingPoint sort: unsound relational / reflexive contracts (rounding at 2^53, `NaN`, `Inf`) flip from proved to violated/Tier-3, NaN-guarded contracts still verify at Tier 1, `==`/`!=` use IEEE `fpEQ`/`fpNEQ` (incl. `+0.0 == -0.0`), `%` matches codegen truncated remainder (not `fp.rem`; NaN-by-zero + large-magnitude edges), and `float_is_nan` / `float_is_infinite` / `nan()` / `infinity()` translate to FP predicates / constants. Also guards mixed `@Float64`/`@Int` ordering as a clean E142 (not a Z3 crash). Test-first: each fails on the pre-fix Real-sort verifier | +| `test_float64_builtins_807.py` | 77 | 441 | #807 — Tier-1 modeling of the modelable `@Float64` builtins. `float_clamp` modeled unconditionally as faithful WASM `f64.min(f64.max(v,lo),hi)` (the NaN-propagation soundness guard distinguishes it from a naive `z3.fpMin`/`fpMax`); `int_to_float` / `float_to_int` concrete-gated (symbolic args defer to Tier 3 — Z3's symbolic FP↔Real reasoning returns spurious counterexamples); `float_to_int` domain obligation (E529) for concrete NaN/Inf/out-of-range args. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (±0, ±inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases) | | `test_build_site.py` | 23 | 316 | Site-asset tooling — `_abs_links` rewriting (relative links, fenced-block immunity incl. inline backticks and tilde fences, http/https/fragment pass-through, Vera effect syntax not mis-parsed), `build_site` `` stability (preserve/refresh keyed on URL-structure change), and `check_site_assets` sitemap staleness (missing / date-only-clean / structural-stale) | | `test_check_changelog_updated.py` | 68 | 708 | `check_changelog_updated.py` unit + end-to-end tests: file classification (incl. file-style exact-match vs directory-style prefix-match), CHANGELOG diff parsing with `[Unreleased]` section tracking, bare-heading rejection, and full-file context (regression test for bullets far below the heading), `Skip-changelog:` trailer detection, temp-repo integration covering substantive/exempt/label/trailer paths, and `GIT_*`-env hermeticity of the temp-repo fixtures (regression for the pre-commit-hook env leak) | | `test_check_doc_counts.py` | 15 | 150 | `check_doc_counts.py` planning-document checks: KNOWN_ISSUES refactoring line counts (±10% tolerance band incl. the exact-boundary case, drift detection, empty-file citation, hyphenated paths, missing file/section/rows) and HISTORY version-row format (issue-link limit, ` — ` separator rejection, dateless-row and prose exemption, line-number reporting) | diff --git a/docs/index.html b/docs/index.html index 1f3f1aeb..9e522d9f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -268,7 +268,7 @@

A programming language designed for LLMs to write, not For Agents → SKILL.md

- v0.0.182 + v0.0.183 CI

diff --git a/docs/index.md b/docs/index.md index a48c7bda..6242893d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,7 @@ From the Latin *veritas* — truth. In Vera, verification is a first-class citizen. -**Current version:** [0.0.182](https://github.com/aallan/vera/releases/tag/v0.0.182) · [GitHub](https://github.com/aallan/vera) · [SKILL.md](https://veralang.dev/SKILL.md) (agent language reference) +**Current version:** [0.0.183](https://github.com/aallan/vera/releases/tag/v0.0.183) · [GitHub](https://github.com/aallan/vera) · [SKILL.md](https://veralang.dev/SKILL.md) (agent language reference) ## Why? diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 4c37f17d..6580947b 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -2,7 +2,7 @@ > Vera is a statically typed, purely functional programming language designed for large language models to write. It uses typed slot references (@T.n) instead of variable names, requires contracts on every function, and compiles to WebAssembly. -This file contains the core Vera language documentation — language reference, agent instructions, FAQ, error codes, and formal grammar — compiled into a single document. Version 0.0.182. For the full documentation index including the 13-chapter specification and supplementary docs, see llms.txt. +This file contains the core Vera language documentation — language reference, agent instructions, FAQ, error codes, and formal grammar — compiled into a single document. Version 0.0.183. For the full documentation index including the 13-chapter specification and supplementary docs, see llms.txt. ======================================================================== @@ -3157,6 +3157,7 @@ Every diagnostic has a stable error code. Codes are grouped by compiler phase: - **E526**: Division or modulo by zero - **E527**: Array index out of bounds - **E528**: Arithmetic overflow +- **E529**: float_to_int domain (NaN, infinity, or out of i64 range) - **E600**: Unsupported parameter type - **E601**: Unsupported return type - **E602**: Unsupported body expression type diff --git a/docs/llms.txt b/docs/llms.txt index 6df4f16d..bc9da6cd 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -4,7 +4,7 @@ Vera uses De Bruijn indexing for bindings: `@Int.0` is the most recent `Int` binding, `@Int.1` the one before. There are no variable names. Contracts are mandatory — every function must declare `requires(...)`, `ensures(...)`, and `effects(...)`. The Z3 SMT solver verifies contracts statically where possible; remaining contracts become runtime assertions. All side effects (IO, Http, State, Exceptions, Async, Inference, Random) are tracked in the type system via algebraic effects. -Current version: 0.0.182. The reference compiler is written in Python. Install with `pip install -e .` from the repository. +Current version: 0.0.183. The reference compiler is written in Python. Install with `pip install -e .` from the repository. ## Homepage diff --git a/pyproject.toml b/pyproject.toml index 1c2abfb1..ab7c8cb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "vera" -version = "0.0.182" +version = "0.0.183" description = "Vera: a programming language designed for LLMs, with full contracts, algebraic effects, and typed slot references" readme = "README.md" license = "MIT" diff --git a/scripts/check_spec_examples.py b/scripts/check_spec_examples.py index 2001fd12..4f517807 100644 --- a/scripts/check_spec_examples.py +++ b/scripts/check_spec_examples.py @@ -142,7 +142,7 @@ ("05-functions.md", 324): "FRAGMENT", # fn(A -> B) in param position # Chapter 6 — inline function type in type alias - ("06-contracts.md", 349): "FRAGMENT", # type SafeDiv = fn(...) + fn apply_div + ("06-contracts.md", 352): "FRAGMENT", # type SafeDiv = fn(...) + fn apply_div # Chapter 7 — anonymous function at top level ("07-effects.md", 116): "FRAGMENT", # effect Logger + anonymous fn body diff --git a/spec/06-contracts.md b/spec/06-contracts.md index a438c0b7..7d85581e 100644 --- a/spec/06-contracts.md +++ b/spec/06-contracts.md @@ -245,6 +245,7 @@ The verifier checks the contracts the programmer wrote, and **auto-synthesises** | `a / b`, `a % b` (Int / Nat) | `b != 0` | E526 | | `arr[i]` (`Array`) | `0 <= i < array_length(arr)` | E527 | | `a + b`, `a * b` (Int / Nat); `a - b` (Int) | result within i64 / u64 range — no overflow | E528 | +| `float_to_int(x)` | `x` finite (not NaN / Inf) and `trunc(x)` within i64 range | E529 | To discharge an operation obligation, the programmer encodes the constraint in a precondition (`requires(@Int.0 != 0)`), a guarding `if` (whose path condition holds in the relevant branch), or a refinement type (`{ @Int | @Int.0 != 0 }`). A function that performs `@Int.1 / @Int.0` with `requires(true)` therefore no longer verifies cleanly: the unguarded divisor is a compile error (E526). @@ -256,6 +257,8 @@ The `@Nat` obligations (E502 / E503) carry the most nuance, spanning many bindin **String length** ([#802](https://github.com/aallan/vera/issues/802)). Vera strings are UTF-8 byte sequences and `string_length` returns the **byte** count, but Z3's string theory (SMT-LIB 2.6) models strings as sequences of Unicode **code points** — its `Length` counts code points, which disagrees with the runtime on every multibyte character (`string_length("é")` is `2`, not `1`). `string_length` is therefore modeled at **Tier 1** only for a string **literal**, whose exact byte length is known; on any non-literal argument it defers to a runtime-guarded **Tier 3** obligation (Z3's string theory has no byte-length operator). The boolean predicates `string_contains` / `string_starts_with` / `string_ends_with` stay **Tier 1**: UTF-8 is self-synchronizing, so a valid substring / prefix / suffix matches at the byte level exactly when it matches at the code-point level. The other byte/offset-sensitive string builtins (`string_slice`, `string_index_of`, `string_char_code`, `string_chars`) are not translated to Z3 and already fall to **Tier 3**. Two further deferrals keep the predicates honest. Z3's string-sort alphabet only reaches U+2FFFF, and its Python binding silently stores any higher code point as the literal's *escape text* rather than the character — so a literal containing a code point **above U+2FFFF** is unusable in the `z3.StringVal`-based predicate translation (`string_contains` / `string_starts_with` / `string_ends_with`) and defers to **Tier 3** there, instead of letting a predicate match phantom escape bytes the runtime never sees. `string_length` is unaffected by this one — it byte-counts the *decoded* literal, so an astral literal's length stays **Tier 1**. A **lone surrogate** (U+D800–U+DFFF) defers on **both** paths: `z3.StringVal` stores it as phantom escape text just like the astral case, and — since it has no UTF-8 encoding at all — `string_length`'s byte count cannot be taken either. +**Numeric type conversions** ([#807](https://github.com/aallan/vera/issues/807)). Three Float64 builtins are modeled at **Tier 1**. `float_clamp(v, lo, hi)` is pure Float64 and modeled **unconditionally** as the faithful WASM `f64.min(f64.max(v, lo), hi)` — NaN-propagating and ±0-correct. Z3's own `fp.min` / `fp.max` *diverge* from WASM here (SMT-LIB returns the non-NaN operand and leaves ±0 implementation-defined, whereas WASM propagates NaN and pins the ±0 sign), so a naive `fpMin` / `fpMax` model would be **unsound** — it would prove `!float_is_nan(float_clamp(NaN, …))`, which the runtime refutes. `float_clamp` is total, so it carries no obligation. `int_to_float(n)` and `float_to_int(x)` cross the Int↔Float boundary, and Z3's *symbolic* Int↔Real↔FP reasoning is **unreliable** — it returns spurious counterexamples that do not satisfy their own constraints, non-deterministically across timeouts. These are therefore modeled at Tier 1 **only for a concrete (constant-foldable) argument**, where Z3 is merely constant-folding; a symbolic argument defers to a sound **Tier 3** (matching the audit principle: defer to Tier 3 what Z3 cannot soundly model). `int_to_float` is total (`f64.convert_i64_s` never traps). `float_to_int` is **partial** — `i64.trunc_f64_s` traps on NaN / ±Inf / out-of-i64-range — so a concrete argument additionally carries the domain obligation above (a provable violation is a loud **E529**), and a symbolic argument's Tier-3 obligation is guarded by the codegen trunc trap. (The four format/parse Float64 builtins — `float_to_string`, `parse_float64`, `decimal_from_float`, `decimal_to_float` — remain **Tier 3** by necessity: Z3's string theory cannot format or parse a float, and `Decimal` is an opaque host handle.) + Runtime traps for unguarded primitives are Vera-native: each trap carries a kind label (`divide_by_zero`, `out_of_bounds`, etc.), a per-kind Fix paragraph naming the precondition that would have prevented it, and a source backtrace — so a missing static guarantee is still a recoverable signal. ### 6.4.4 SMT Solver Integration diff --git a/tests/test_float64_builtins_807.py b/tests/test_float64_builtins_807.py new file mode 100644 index 00000000..e15d4c9f --- /dev/null +++ b/tests/test_float64_builtins_807.py @@ -0,0 +1,441 @@ +"""#807 — Tier-1 Z3 modeling for the modelable Float64 builtins deferred from +#797: `float_clamp`, `int_to_float`, `float_to_int`. + +Follow-up to the #392 `smt.py` soundness audit (#797 mapped `@Float64` to +`z3.FPSort(11, 53)`). Three builtins stayed Tier-3 and are modeled here: + + - `float_clamp(v, lo, hi)` — pure Float64; modeled unconditionally with a + *faithful* WASM `f64.min(f64.max(v, lo), hi)` semantics (NaN-propagating, + ±0-correct). Z3's own `fp.min`/`fp.max` DIVERGE from WASM on NaN (they + return the non-NaN operand), so a naive `fpMin`/`fpMax` model would be + UNSOUND — test_clamp_nan_propagation_not_dropped guards exactly that. + + - `int_to_float(n)` / `float_to_int(x)` — cross the Int↔Float boundary. Z3's + symbolic Int↔Real↔FP reasoning is unreliable (it returns spurious `sat` + counterexamples that don't satisfy their own constraints, non- + deterministically across timeouts). So these are modeled at Tier 1 ONLY + for a *concrete* (constant-foldable) argument, where Z3 is just constant- + folding; a symbolic argument defers to Tier 3 (sound — `float_to_int`'s + `i64.trunc_f64_s` traps natively on NaN/Inf/out-of-range). This matches the + audit principle: defer to Tier 3 what Z3 cannot SOUNDLY model. + + - `float_to_int` is partial (traps), so a concrete arg additionally gets a + domain obligation (E529): NaN / Inf / out-of-i64-range is a compile error. + +Written test-first: each FAILS on the pre-#807 verifier (where all three defer +to Tier 3, so the modeled obligations land as `tier3`, not `verified`/ +`violated`). +""" + +from __future__ import annotations + +import math +import tempfile + +import pytest +import z3 + +from vera.parser import parse_to_ast, parse_file +from vera.transform import transform +from vera.checker import typecheck_with_artifacts +from vera.codegen import compile as compile_vera, execute +from vera.codegen.api import WasmTrapError +from vera.smt import _FLOAT64_SORT, _wasm_fp_max, _wasm_fp_min +from vera.verifier import VerifyResult, verify + + +def _verify(source: str) -> VerifyResult: + ast = parse_to_ast(source) + _diags, arts = typecheck_with_artifacts(ast, source) + return verify( + ast, source, + expr_types=arts.expr_semantic_types, + expr_target_types=arts.expr_target_types, + ) + + +def _ok(result: VerifyResult) -> bool: + return not any(d.severity == "error" for d in result.diagnostics) + + +def _ens(result: VerifyResult) -> list[object]: + return [o for o in result.obligations if o.kind == "ensures"] + + +# -------------------------------------------------------------------------- +# Verify-vs-run differential helpers (#807). The codegen for these builtins is +# the authoritative runtime semantics; the differential confirms the Z3 model +# AGREES with wasmtime bit-for-bit (NaN, ±0, ±inf, ties). +# -------------------------------------------------------------------------- + +# (Vera source expression, Python float) for each interesting Float64 value. +_FLOAT_VALUES: dict[str, tuple[str, float]] = { + "nan": ("nan()", math.nan), + "+inf": ("infinity()", math.inf), + "-inf": ("0.0 - infinity()", -math.inf), + "+0": ("0.0", 0.0), + "-0": ("(0.0 - 1.0) * 0.0", -0.0), # IEEE: +0 * -1 = -0 + "2.5": ("2.5", 2.5), + "-2.5": ("0.0 - 2.5", -2.5), + "0.5": ("0.5", 0.5), + "1.0": ("1.0", 1.0), + "-1.0": ("0.0 - 1.0", -1.0), +} + + +def _run_float_expr(body: str) -> float: + """Compile and run `{ }` returning the raw f64 (NaN/sign preserved).""" + src = ( + "public fn f(@Unit -> @Float64)\n" + " requires(true) ensures(true) effects(pure)\n" + f"{{ {body} }}\n" + ) + with tempfile.NamedTemporaryFile( + mode="w", suffix=".vera", delete=False, encoding="utf-8" + ) as fh: + fh.write(src) + fh.flush() + path = fh.name + result = compile_vera(transform(parse_file(path)), source=src, file=path) + errors = [d for d in result.diagnostics if d.severity == "error"] + assert not errors, f"compile errors: {[d.description for d in errors]}" + value = execute(result, fn_name="f").value + assert isinstance(value, float), f"expected float, got {value!r}" + return value + + +def _py_to_z3fp(x: float) -> z3.FPRef: + if math.isnan(x): + return z3.fpNaN(_FLOAT64_SORT) + if math.isinf(x): + return (z3.fpPlusInfinity(_FLOAT64_SORT) if x > 0 + else z3.fpMinusInfinity(_FLOAT64_SORT)) + if x == 0.0: + return (z3.fpMinusZero(_FLOAT64_SORT) if math.copysign(1.0, x) < 0 + else z3.fpPlusZero(_FLOAT64_SORT)) + return z3.FPVal(x, _FLOAT64_SORT) + + +def _model_matches_runtime(model: z3.FPRef, runtime: float) -> bool: + """True iff the simplified Z3 model term is BIT-IDENTICAL to the runtime + float — NaN↔NaN, and otherwise equal value AND equal zero-sign (so +0 and + -0 are distinguished, unlike bare fpEQ).""" + m = z3.simplify(model) + if math.isnan(runtime): + return z3.is_true(z3.simplify(z3.fpIsNaN(m))) + rt = _py_to_z3fp(runtime) + same = z3.And( + z3.fpEQ(m, rt), + z3.fpIsNegative(m) == z3.fpIsNegative(rt), + ) + return z3.is_true(z3.simplify(same)) + + +class TestFloatClampTier1_807: + def test_clamp_in_range_verifies(self) -> None: + # Sound contract: for non-NaN x, float_clamp(x, 0.0, 1.0) ∈ [0,1] so + # result >= 0.0. Pre-#807 float_clamp deferred to Tier 3 → the ensures + # could not be modeled (tier3). With the faithful model it discharges. + result = _verify(""" +public fn cl(@Float64 -> @Float64) + requires(!float_is_nan(@Float64.0)) + ensures(@Float64.result >= 0.0) + effects(pure) +{ float_clamp(@Float64.0, 0.0, 1.0) } +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + assert _ok(result), [d.description for d in result.diagnostics] + + def test_clamp_nan_propagation_not_dropped(self) -> None: + # SOUNDNESS GUARD distinguishing the faithful model from a naive + # z3.fpMin/fpMax one. Runtime f64.min/f64.max PROPAGATE NaN, so + # float_clamp(NaN, 0, 1) = NaN and `!float_is_nan(result)` is FALSE at + # runtime. A naive SMT fp.min/fp.max model drops the NaN (returns the + # other operand) and would UNSOUNDLY prove `!float_is_nan(result)`. The + # faithful model must NOT prove it — Z3 finds the NaN counterexample + # (violated). (Pre-#807: tier3, also not "verified" — so this still + # guards against a future naive model.) + result = _verify(""" +public fn cl(@Float64 -> @Float64) + requires(true) + ensures(!float_is_nan(@Float64.result)) + effects(pure) +{ float_clamp(@Float64.0, 0.0, 1.0) } +""") + ens = _ens(result) + assert ens and all(o.status == "violated" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + + +def _clamp_triples() -> list[tuple[str, str, str]]: + keys = list(_FLOAT_VALUES) + triples: list[tuple[str, str, str]] = [] + # every value clamped to the normal [0, 1] range + for v in keys: + triples.append((v, "+0", "1.0")) + # tricky bound pairs, each with a representative set of values + bound_pairs = [ + ("1.0", "+0"), # lo > hi → must clamp to hi (codegen order) + ("-1.0", "1.0"), # spans zero + ("+0", "-0"), # ±0 bounds + ("-0", "+0"), + ("nan", "1.0"), # NaN low bound → NaN propagates + ("+0", "nan"), # NaN high bound + ("-inf", "+inf"), # infinite bounds + ] + for lo, hi in bound_pairs: + for v in ("0.5", "-0", "nan", "+inf", "-2.5"): + triples.append((v, lo, hi)) + return triples + + +class TestFloatClampDifferential807: + """Verify-vs-run: the Z3 model of float_clamp must agree with wasmtime + bit-for-bit on NaN / ±0 / ±inf / ties / lo>hi — the confirmation the issue + requires before landing the model (the fp.rem-vs-fmod trap #797 caught).""" + + @pytest.mark.parametrize("v,lo,hi", _clamp_triples()) + def test_clamp_model_agrees_with_runtime( + self, v: str, lo: str, hi: str + ) -> None: + v_src, v_f = _FLOAT_VALUES[v] + lo_src, lo_f = _FLOAT_VALUES[lo] + hi_src, hi_f = _FLOAT_VALUES[hi] + runtime = _run_float_expr( + f"float_clamp({v_src}, {lo_src}, {hi_src})" + ) + model = _wasm_fp_min( + _wasm_fp_max(_py_to_z3fp(v_f), _py_to_z3fp(lo_f)), + _py_to_z3fp(hi_f), + ) + assert _model_matches_runtime(model, runtime), ( + f"float_clamp({v}, {lo}, {hi}): runtime={runtime!r} " + f"model={z3.simplify(model)}" + ) + + +class TestIntToFloatTier1_807: + def test_concrete_positive_verifies(self) -> None: + # int_to_float(42) is a concrete arg → modeled as the folded FP 42.0. + # Pre-#807 it deferred to Tier 3 (ensures tier3). + result = _verify(""" +public fn f(@Unit -> @Float64) + requires(true) ensures(@Float64.result == 42.0) effects(pure) +{ int_to_float(42) } +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + assert _ok(result), [d.description for d in result.diagnostics] + + def test_concrete_negative_verifies(self) -> None: + result = _verify(""" +public fn f(@Unit -> @Float64) + requires(true) ensures(@Float64.result == 0.0 - 7.0) effects(pure) +{ int_to_float(0 - 7) } +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + + def test_symbolic_defers_to_tier3(self) -> None: + # SOUNDNESS GUARD for the concrete-gating decision: a symbolic + # int_to_float argument must stay Tier 3 — Z3's symbolic Int↔Real↔FP + # reasoning returns spurious counterexamples, so a contract over + # int_to_float(@Int.0) must NOT be discharged (verified) or refuted + # (violated) at Tier 1. It defers (tier3). A regression to + # unconditional modeling would flip this to verified/violated/timeout. + result = _verify(""" +public fn f(@Int -> @Float64) + requires(true) ensures(@Float64.result >= 0.0) effects(pure) +{ int_to_float(@Int.0) } +""") + ens = _ens(result) + assert ens and all(o.status == "tier3" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + + +def _i2f_triples() -> list[int]: + # i64 boundary + the 2^53 rounding boundary (where i64→f64 loses precision + # and convert_i64_s rounds nearest-ties-to-even) + small values. + return [ + 0, 1, -1, 42, -7, 1000000, + 9007199254740992, # 2^53 (last exactly representable) + 9007199254740993, # 2^53 + 1 (rounds to even → 2^53) + 9007199254740995, # 2^53 + 3 (rounds to even → 2^53 + 4) + 9223372036854775807, # i64.MAX + ] + + +class TestIntToFloatDifferential807: + """Verify-vs-run: int_to_float's Z3 model fpToFP(RNE, ToReal(n)) must agree + with wasmtime f64.convert_i64_s(n) bit-for-bit, including the 2^53 rounding + boundary and the i64 max.""" + + @pytest.mark.parametrize("n", _i2f_triples()) + def test_model_agrees_with_runtime(self, n: int) -> None: + body = f"int_to_float({n})" if n >= 0 else f"int_to_float(0 - {-n})" + runtime = _run_float_expr(body) + model = z3.simplify( + z3.fpToFP(z3.RNE(), z3.ToReal(z3.IntVal(n)), _FLOAT64_SORT) + ) + assert _model_matches_runtime(model, runtime), ( + f"int_to_float({n}): runtime={runtime!r} model={model}" + ) + + +def _run_int_expr(body: str, sig: str = "@Unit -> @Int") -> int: + """Compile and run `{ }` returning the i64 result.""" + src = ( + f"public fn f({sig})\n" + " requires(true) ensures(true) effects(pure)\n" + f"{{ {body} }}\n" + ) + with tempfile.NamedTemporaryFile( + mode="w", suffix=".vera", delete=False, encoding="utf-8" + ) as fh: + fh.write(src) + fh.flush() + path = fh.name + result = compile_vera(transform(parse_file(path)), source=src, file=path) + errors = [d for d in result.diagnostics if d.severity == "error"] + assert not errors, f"compile errors: {[d.description for d in errors]}" + value = execute(result, fn_name="f").value + assert isinstance(value, int), f"expected int, got {value!r}" + return value + + +def _errs(result: VerifyResult, code: str) -> list[object]: + return [d for d in result.diagnostics + if d.severity == "error" and d.error_code == code] + + +class TestFloatToIntTier1_807: + def test_concrete_safe_verifies(self) -> None: + # float_to_int(3.9) is a concrete, finite, in-range arg → value modeled + # as the truncated int 3 and the domain obligation discharges. + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(@Int.result == 3) effects(pure) +{ float_to_int(3.9) } +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + assert _ok(result), [d.description for d in result.diagnostics] + + def test_concrete_negative_truncates_toward_zero(self) -> None: + # trunc(-3.9) = -3 (toward zero, not floor -4). + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(@Int.result == 0 - 3) effects(pure) +{ float_to_int(0.0 - 3.9) } +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + + def test_nan_arg_is_E529(self) -> None: + # float_to_int(nan()) traps at runtime (i64.trunc_f64_s) → a concrete + # NaN arg is a provable domain violation → loud E529 compile error. + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ float_to_int(nan()) } +""") + assert _errs(result, "E529"), [ + (d.error_code, d.severity) for d in result.diagnostics + ] + + def test_infinity_arg_is_E529(self) -> None: + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ float_to_int(infinity()) } +""") + assert _errs(result, "E529"), [ + (d.error_code, d.severity) for d in result.diagnostics + ] + + def test_out_of_range_is_E529(self) -> None: + # ~1.8e19 > i64.MAX (9.22e18): finite but trunc out of i64 range → traps + # at runtime → E529 (Vera has no e-notation, so build it by *). + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ float_to_int(9000000000000000000.0 * 2.0) } +""") + assert _errs(result, "E529"), [ + (d.error_code, d.severity) for d in result.diagnostics + ] + + def test_symbolic_defers_to_tier3(self) -> None: + # SOUNDNESS GUARD for concrete-gating: a symbolic float_to_int argument + # must defer to Tier 3 (codegen i64.trunc_f64_s traps), NOT be modeled — + # Z3's symbolic FP↔Real reasoning is unreliable. The domain obligation + # lands tier3 and no spurious E529 is raised. + result = _verify(""" +public fn f(@Float64 -> @Int) + requires(true) ensures(@Int.result >= 0) effects(pure) +{ float_to_int(@Float64.0) } +""") + dom = [o for o in result.obligations + if o.kind == "float_to_int_domain"] + assert dom and all(o.status == "tier3" for o in dom), [ + (o.kind, o.status) for o in result.obligations + ] + assert not _errs(result, "E529"), "no spurious E529 on symbolic arg" + + +class TestFloatToIntDifferential807: + """Verify-vs-run: float_to_int's modeled value must equal the runtime + i64.trunc_f64_s for in-range concrete args, and the runtime must TRAP + exactly where the verifier raises E529 (NaN / Inf / out-of-range).""" + + @pytest.mark.parametrize("body,expected", [ + ("float_to_int(3.9)", 3), + ("float_to_int(0.0 - 3.9)", -3), + ("float_to_int(0.0)", 0), + ("float_to_int(2.5)", 2), + ("float_to_int(0.0 - 0.5)", 0), + ("float_to_int(9007199254740992.0)", 9007199254740992), # 2^53 + ("float_to_int(0.0 - 9007199254740992.0)", -9007199254740992), + ]) + def test_value_model_agrees_with_runtime( + self, body: str, expected: int + ) -> None: + runtime = _run_int_expr(body) + assert runtime == expected, f"{body}: runtime={runtime}" + # The verifier must model the SAME value (ensures discharges at Tier 1). + lit = str(expected) if expected >= 0 else f"0 - {-expected}" + result = _verify(f""" +public fn f(@Unit -> @Int) + requires(true) ensures(@Int.result == {lit}) effects(pure) +{{ {body} }} +""") + ens = _ens(result) + assert ens and all(o.status == "verified" for o in ens), [ + (o.kind, o.status) for o in result.obligations + ] + assert _ok(result), [d.description for d in result.diagnostics] + + @pytest.mark.parametrize("body", [ + "float_to_int(nan())", + "float_to_int(infinity())", + "float_to_int(0.0 - infinity())", + "float_to_int(9000000000000000000.0 * 2.0)", + ]) + def test_domain_violations_trap_at_runtime(self, body: str) -> None: + # The runtime MUST trap exactly where the verifier raises E529 — this is + # the agreement the issue requires for the trap cases. + with pytest.raises(WasmTrapError): + _run_int_expr(body) diff --git a/tests/test_verifier.py b/tests/test_verifier.py index 7afef80b..5f42398e 100644 --- a/tests/test_verifier.py +++ b/tests/test_verifier.py @@ -4039,6 +4039,14 @@ def test_overall_tier_counts(self) -> None: example contracts over a slot-arg string_length move T1 -> T3. Net: -2 T1, +2 T3, +0 total (the obligations persist, only their tier changes): 273/78/351 -> 271/80/351. + + #807: float_to_int(x) now carries a domain obligation (NaN / Inf / + out-of-i64-range, E529) at every site. `json.vera` has one SYMBOLIC + site — `float_to_int(@Float64.0 * 10.0)` — which defers to Tier 3 (Z3's + FP<->Real reasoning is unreliable, so symbolic float_to_int is concrete- + gated to Tier 3, guarded by the codegen trunc trap). No example has a + concrete float_to_int site, so no T1 is added. Net: +1 T3, +1 total: + 271/80/351 -> 271/81/352. """ t1 = t3 = total = t3u = 0 for f in sorted(EXAMPLES_DIR.glob("*.vera")): @@ -4052,8 +4060,8 @@ def test_overall_tier_counts(self) -> None: t3u += sum(1 for o in result.obligations if o.status == "tier3_unguarded") assert t1 == 271, f"Expected 271 T1, got {t1}" - assert t3 == 80, f"Expected 80 T3, got {t3}" - assert total == 351, f"Expected 351 total, got {total}" + assert t3 == 81, f"Expected 81 T3, got {t3}" + assert total == 352, f"Expected 352 total, got {total}" assert t3u == 0, f"Expected 0 tier3_unguarded, got {t3u}" diff --git a/uv.lock b/uv.lock index a857b940..59da73d5 100644 --- a/uv.lock +++ b/uv.lock @@ -1540,7 +1540,7 @@ wheels = [ [[package]] name = "vera" -version = "0.0.182" +version = "0.0.183" source = { editable = "." } dependencies = [ { name = "lark" }, diff --git a/vera/__init__.py b/vera/__init__.py index b5c6eb3a..954bcfad 100644 --- a/vera/__init__.py +++ b/vera/__init__.py @@ -1,4 +1,4 @@ """Vera: a programming language designed for LLMs.""" -__version__ = "0.0.182" +__version__ = "0.0.183" version = __version__ diff --git a/vera/errors.py b/vera/errors.py index 52129a45..b2c903d0 100644 --- a/vera/errors.py +++ b/vera/errors.py @@ -512,6 +512,7 @@ def diagnose_lark_error( "E526": "Division or modulo by zero", "E527": "Array index out of bounds", "E528": "Arithmetic overflow", + "E529": "float_to_int domain (NaN, infinity, or out of i64 range)", # E6xx — Codegen "E600": "Unsupported parameter type", "E601": "Unsupported return type", diff --git a/vera/obligations/core.py b/vera/obligations/core.py index 59667f09..060eaa9a 100644 --- a/vera/obligations/core.py +++ b/vera/obligations/core.py @@ -80,6 +80,13 @@ # check like index_bounds: prove P -> tier-1, prove ¬P -> # loud E507 (always traps at runtime), else tier3 (the # §11.14.1 `unreachable` trap is the guard). + "float_to_int_domain", # float_to_int(x) domain obligation at one site + # (#807). `i64.trunc_f64_s` traps on NaN / +/-Inf / + # out-of-i64-range. Concrete-gated: a concrete finite + # in-range arg -> tier-1; a concrete NaN/Inf/out-of-range arg + # -> loud E529; a symbolic arg -> honest tier3 (Z3's FP<->Real + # reasoning is unreliable; the codegen trunc trap is the + # guard). ] ObligationStatus = Literal[ diff --git a/vera/smt.py b/vera/smt.py index 108497f9..1ba5d88a 100644 --- a/vera/smt.py +++ b/vera/smt.py @@ -40,6 +40,76 @@ # default to RNE, so `_translate_binary` needs no rounding-mode change. _FLOAT64_SORT = z3.FPSort(11, 53) +# i64 range — the codegen target width for @Int and the trap boundary for +# float_to_int's `i64.trunc_f64_s` (#807). +_I64_MIN = -(2**63) +_I64_MAX = 2**63 - 1 + + +def _wasm_fp_max(a: z3.FPRef, b: z3.FPRef) -> z3.FPRef: + """Z3 model of WASM ``f64.max`` (#807). + + Z3's own ``z3.fpMax`` does NOT match WASM: SMT-LIB ``fp.max`` returns the + *other* operand when one is NaN and leaves the ±0 case implementation- + defined, whereas WASM ``f64.max`` PROPAGATES NaN and returns ``+0`` for any + ``max(±0, ∓0)``. Modeling ``float_clamp`` with ``z3.fpMax`` would therefore + be unsound (it would prove ``!float_is_nan(clamp(NaN, …))``). This builds + the faithful WASM semantics explicitly: + + - either operand NaN → NaN; + - strictly greater operand wins; + - on a tie (includes ±0 and equal values): both ±0 → ``+0`` unless BOTH + are ``-0`` (in which case ``-0``); otherwise the (equal) operand. + """ + return z3.If( + z3.Or(z3.fpIsNaN(a), z3.fpIsNaN(b)), + z3.fpNaN(_FLOAT64_SORT), + z3.If( + z3.fpGT(a, b), + a, + z3.If( + z3.fpGT(b, a), + b, + z3.If( + z3.And(z3.fpIsZero(a), z3.fpIsZero(b)), + z3.If( + z3.And(z3.fpIsNegative(a), z3.fpIsNegative(b)), + z3.fpMinusZero(_FLOAT64_SORT), + z3.fpPlusZero(_FLOAT64_SORT), + ), + a, + ), + ), + ), + ) + + +def _wasm_fp_min(a: z3.FPRef, b: z3.FPRef) -> z3.FPRef: + """Z3 model of WASM ``f64.min`` (#807) — the ``min`` dual of + :func:`_wasm_fp_max`. NaN propagates; on a ±0 tie WASM returns ``-0`` if + EITHER operand is ``-0`` (otherwise ``+0``).""" + return z3.If( + z3.Or(z3.fpIsNaN(a), z3.fpIsNaN(b)), + z3.fpNaN(_FLOAT64_SORT), + z3.If( + z3.fpLT(a, b), + a, + z3.If( + z3.fpLT(b, a), + b, + z3.If( + z3.And(z3.fpIsZero(a), z3.fpIsZero(b)), + z3.If( + z3.Or(z3.fpIsNegative(a), z3.fpIsNegative(b)), + z3.fpMinusZero(_FLOAT64_SORT), + z3.fpPlusZero(_FLOAT64_SORT), + ), + a, + ), + ), + ), + ) + # ===================================================================== # Slot environment — De Bruijn → Z3 variable mapping @@ -1142,6 +1212,70 @@ def _translate_call( if call.name == "infinity" and len(call.args) == 0: return z3.fpPlusInfinity(_FLOAT64_SORT) + # Built-in: float_clamp(v, lo, hi) → f64.min(f64.max(v, lo), hi) (#807). + # Pure Float64, so modeled unconditionally — Z3 reasons reliably over the + # FP sort here (unlike the Int↔Float conversions below). Must mirror the + # codegen ORDER exactly: max-then-min, so that lo > hi clamps to hi (the + # reverse, min-then-max, would clamp to lo). Uses the faithful WASM + # min/max helpers, NOT z3.fpMin/fpMax which diverge on NaN/±0. + if call.name == "float_clamp" and len(call.args) == 3: + v = self.translate_expr(call.args[0], env) + lo = self.translate_expr(call.args[1], env) + hi = self.translate_expr(call.args[2], env) + if (isinstance(v, z3.FPRef) + and isinstance(lo, z3.FPRef) + and isinstance(hi, z3.FPRef)): + return _wasm_fp_min(_wasm_fp_max(v, lo), hi) + return None + + # Built-in: int_to_float(n) → f64.convert_i64_s (#807). Modeled as + # fpToFP(RNE, ToReal(n)) — round-nearest-ties-to-even, matching the + # runtime conversion — but ONLY for a CONCRETE (constant-foldable) + # argument. Z3's symbolic Int↔Real↔FP reasoning is unreliable: it + # returns spurious `sat` counterexamples that don't satisfy their own + # constraints (non-deterministically across timeouts), so a symbolic + # argument defers to a sound Tier 3 rather than risk a false + # discharge/refutation. (int_to_float is total — no trap — so no + # obligation is needed; out-of-i64 n cannot occur at runtime, and the + # over-approximation is sound for the concrete case we model.) + if call.name == "int_to_float" and len(call.args) == 1: + n = self.translate_expr(call.args[0], env) + if not isinstance(n, z3.ArithRef) or n.sort() != z3.IntSort(): + return None + ns = z3.simplify(n) + if not z3.is_int_value(ns): + return None # symbolic → Tier 3 + return z3.simplify( + z3.fpToFP(z3.RNE(), z3.ToReal(ns), _FLOAT64_SORT) + ) + + # Built-in: float_to_int(x) → i64.trunc_f64_s (#807). Partial: traps on + # NaN / ±Inf / out-of-i64-range, so the verifier ALSO emits a domain + # obligation (E529) at each site. Here we model only the VALUE, and only + # for a CONCRETE, finite, in-range argument — computed exactly as the + # truncated-toward-zero integer (Python int() on the exact rational of + # the FP literal). A symbolic argument (Z3's FP↔Real reasoning is + # unreliable — see int_to_float above) or a NaN/Inf/out-of-range literal + # (no integer value; the obligation flags it, the runtime traps) returns + # None → Tier 3. + if call.name == "float_to_int" and len(call.args) == 1: + x = self.translate_expr(call.args[0], env) + if not isinstance(x, z3.FPRef): + return None + xs = z3.simplify(x) + if not z3.is_fp_value(xs): + return None # symbolic → Tier 3 + if (z3.is_true(z3.simplify(z3.fpIsNaN(xs))) + or z3.is_true(z3.simplify(z3.fpIsInf(xs)))): + return None # no integer value; obligation flags, runtime traps + real = z3.simplify(z3.fpToReal(xs)) + if not z3.is_rational_value(real): + return None # pragma: no cover — finite FP → rational + truncated = int(real.as_fraction()) # toward zero + if not (_I64_MIN <= truncated <= _I64_MAX): + return None # out of range; obligation flags, runtime traps + return z3.IntVal(truncated) + # Built-in: byte_to_int() — identity (both IntSort in Z3) if call.name == "byte_to_int" and len(call.args) == 1: return self.translate_expr(call.args[0], env) diff --git a/vera/verifier.py b/vera/verifier.py index 4ce3cebc..bd362311 100644 --- a/vera/verifier.py +++ b/vera/verifier.py @@ -2063,6 +2063,13 @@ def _walk_for_primitive_op_obligations( self._walk_for_primitive_op_obligations( decl, arg, smt, slot_env, assumptions, ) + # #807: float_to_int(x) compiles to `i64.trunc_f64_s`, which traps on + # NaN / ±Inf / out-of-i64-range — a partial op, so it carries a + # domain obligation just like div-by-zero (#801) and overflow (#798). + if expr.name == "float_to_int" and len(expr.args) == 1: + self._check_float_to_int_domain_obligation( + decl, expr, smt, slot_env, assumptions, + ) return if isinstance(expr, ast.ModuleCall): @@ -3475,6 +3482,76 @@ def _check_overflow_obligation( self.summary.tier3_runtime += 1 self._record_obligation(decl.name, "int_overflow", expr, "tier3") + def _check_float_to_int_domain_obligation( + self, + decl: ast.FnDecl, + call: ast.FnCall, + smt: SmtContext, + slot_env: SlotEnv, + assumptions: list[object], + ) -> None: + """Discharge the ``float_to_int`` domain obligation at one site (#807). + + ``float_to_int(x)`` compiles to ``i64.trunc_f64_s``, which TRAPS when + ``x`` is ``NaN``, ``±Inf``, or when ``trunc(x)`` falls outside + ``[i64.MIN, i64.MAX]``. So each site carries a "``x`` is finite and in + range" obligation, mirroring div-by-zero (#801) and overflow (#798): + + - a CONCRETE finite in-range argument → **Tier 1** (verified); + - a concrete ``NaN`` / ``Inf`` / out-of-range argument → provable trap → + **loud E529**; + - a symbolic argument → **honest Tier 3**, guarded by the codegen trunc + trap. + + Concrete-gated: Z3's symbolic ``FP``↔``Real`` reasoning is unreliable + (it returns spurious counterexamples — see :mod:`vera.smt`'s + ``int_to_float`` note), so a symbolic argument defers straight to Tier 3 + rather than risk a spurious discharge or a false E529. A concrete FP + literal is classified directly (exact, no solver call), so no + ``assumptions`` are consulted — nothing a precondition could assert can + change the value of a literal. + """ + x = smt.translate_expr(call.args[0], slot_env) + self.summary.total += 1 + if not isinstance(x, z3.FPRef): + # Untranslatable argument → Tier 3. + self.summary.tier3_runtime += 1 + self._record_obligation( + decl.name, "float_to_int_domain", call, "tier3", + ) + return + xs = z3.simplify(x) + if not z3.is_fp_value(xs): + # Symbolic argument → Tier 3 (codegen trunc trap guards). + self.summary.tier3_runtime += 1 + self._record_obligation( + decl.name, "float_to_int_domain", call, "tier3", + ) + return + + # Concrete FP literal — classify the domain directly. + is_nan = z3.is_true(z3.simplify(z3.fpIsNaN(xs))) + is_inf = z3.is_true(z3.simplify(z3.fpIsInf(xs))) + in_range = False + if not is_nan and not is_inf: + real = z3.simplify(z3.fpToReal(xs)) + if z3.is_rational_value(real): + truncated = int(real.as_fraction()) # toward zero + in_range = _I64_MIN <= truncated <= _I64_MAX + + if not is_nan and not is_inf and in_range: + self.summary.tier1_verified += 1 + self._record_obligation( + decl.name, "float_to_int_domain", call, "verified", + ) + else: + self.summary.total -= 1 # don't count — it's an error + self._record_obligation( + decl.name, "float_to_int_domain", call, "violated", + error_code="E529", + ) + self._report_float_to_int_domain(decl, call, is_nan, is_inf) + def _fresh_slot_var( self, smt: SmtContext, te: ast.TypeExpr, ) -> object | None: @@ -4574,6 +4651,40 @@ def _report_overflow( error_code="E528", ) + def _report_float_to_int_domain( + self, + decl: ast.FnDecl, + call: ast.FnCall, + is_nan: bool, + is_inf: bool, + ) -> None: + """Emit an E529 diagnostic for a provably out-of-domain float_to_int.""" + reason = ( + "NaN" if is_nan else "infinite" if is_inf else "out of i64 range" + ) + operand = ast.format_expr(call) + self._error( + call, + f"`{operand}` in '{decl.name}' provably traps: the argument is " + f"{reason}.", + rationale=( + "float_to_int compiles to `i64.trunc_f64_s`, which traps at " + "runtime on NaN, +/-infinity, or a value whose truncation falls " + "outside the i64 range. The SMT solver proved the argument is " + "always one of these (#807)." + ), + fix=( + "Guard the conversion so the argument is finite and in range — " + "check `!float_is_nan(x)` and `!float_is_infinite(x)` and bound " + "its magnitude before calling float_to_int, or handle the " + "out-of-domain case explicitly." + ), + spec_ref=( + 'Chapter 6, Section 6.4.3 "Primitive Operation Safety"' + ), + error_code="E529", + ) + def _report_assert_violation( self, decl: ast.FnDecl, From 117328b28cf565f72d85aea5a4ca5f3a348c38e6 Mon Sep 17 00:00:00 2001 From: T Date: Sat, 27 Jun 2026 17:58:11 +0100 Subject: [PATCH 2/5] test(807): strengthen E529 coverage + reword E529 rationale (review-toolkit) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pr-review-toolkit pass on #811 (code-reviewer + silent-failure-hunter found the fix sound — no critical/important issues; the deferral discipline was cleared end-to-end). Addressed the actionable findings: - verifier.py: the E529 rationale said "The SMT solver proved ..." but the concrete-gated float_to_int domain check classifies the FP literal directly in Python with no solver call (the handler's own docstring says exactly that). Reworded to mechanism-neutral ("a constant the verifier determined ..."). - tests: pin the three DISTINCT E529 reason strings (NaN / infinite / out of i64 range) — previously only error_code == "E529" was asserted, so a swapped or collapsed label would pass silently. Added negative-infinity and negative-out-of-range E529 cases, plus the i64.MIN edge to the int_to_float verify-vs-run differential (the asymmetric two's-complement boundary). Also filed #812 (pre-existing, found by silent-failure-hunter: an out-of-range integer literal is accepted by the checker, then fails at codegen with an opaque `i64.const ... out of range` error — codegen-guarded, so not a soundness hole) and recorded it in ROADMAP Tier 1. No behaviour change (diagnostic-text + tests + roadmap). Refs #807. Co-Authored-By: Claude --- ROADMAP.md | 3 +- TESTING.md | 4 +- tests/test_float64_builtins_807.py | 66 +++++++++++++++++++++++------- vera/verifier.py | 4 +- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 444cb91d..eadb36cd 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ Priority lives in this file and nowhere else — issues carry kind and area labe ## Where we are -5,250 tests, 93 conformance programs, 35 examples, 13 spec chapters. +5,253 tests, 93 conformance programs, 35 examples, 13 spec chapters. ## The roadmap @@ -21,6 +21,7 @@ The infrastructure that catches the next regression before a user does, plus the | Issue | What | |---|---| | [#808](https://github.com/aallan/vera/issues/808) | Give the `@Int` / `@Nat` arithmetic-overflow runtime trap (#798) a precise `overflow` trap kind, so a dynamic overflow surfaces as a dedicated trap message rather than the generic `unreachable` kind it currently shares. | +| [#812](https://github.com/aallan/vera/issues/812) | Range-check integer literals in the checker (`@Int` within i64, `@Nat` within u64) with a clean E1xx diagnostic, instead of an out-of-range literal flowing through to an opaque `i64.const … out of range` codegen failure. Found during the #807 review. | | [#592](https://github.com/aallan/vera/issues/592) | End-to-end behavioural tests for the five UTF-8 decode sites currently pinned only by structural greps. | | [#645](https://github.com/aallan/vera/issues/645) | Explicit `encoding='utf-8'` at every text-mode file call, with a pre-commit check to hold the line. | | [#657](https://github.com/aallan/vera/issues/657) | Convert `INVARIANT_DEFENSIVE` sites and audit `PROPAGATE` cleanup (follow-up to the #626 error-handling taxonomy). | diff --git a/TESTING.md b/TESTING.md index 31405f54..d39efc3a 100644 --- a/TESTING.md +++ b/TESTING.md @@ -6,7 +6,7 @@ This is the single source of truth for Vera's testing infrastructure, coverage d | Metric | Value | |--------|-------| -| **Tests** | 5,250 across 46 files (~63,000 lines of test code; 5,218 passed + 16 stress, 16 skipped) | +| **Tests** | 5,253 across 46 files (~63,000 lines of test code; 5,221 passed + 16 stress, 16 skipped) | | **Compiler code coverage** | 95% Python, 61% JavaScript — 91% combined (CI minimum: 80%) | | **Conformance programs** | 93 programs across 9 spec chapters, validating every language feature | | **Example programs** | 35, all validated through `vera check` + `vera verify` | @@ -95,7 +95,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists | `test_readme.py` | 2 | 79 | README code sample parsing | | `test_html.py` | 4 | 187 | HTML landing page code samples: parse, check, verify | | `test_float64_fp.py` | 9 | 204 | #797 — `@Float64` contracts via Z3's IEEE-754 FloatingPoint sort: unsound relational / reflexive contracts (rounding at 2^53, `NaN`, `Inf`) flip from proved to violated/Tier-3, NaN-guarded contracts still verify at Tier 1, `==`/`!=` use IEEE `fpEQ`/`fpNEQ` (incl. `+0.0 == -0.0`), `%` matches codegen truncated remainder (not `fp.rem`; NaN-by-zero + large-magnitude edges), and `float_is_nan` / `float_is_infinite` / `nan()` / `infinity()` translate to FP predicates / constants. Also guards mixed `@Float64`/`@Int` ordering as a clean E142 (not a Z3 crash). Test-first: each fails on the pre-fix Real-sort verifier | -| `test_float64_builtins_807.py` | 77 | 441 | #807 — Tier-1 modeling of the modelable `@Float64` builtins. `float_clamp` modeled unconditionally as faithful WASM `f64.min(f64.max(v,lo),hi)` (the NaN-propagation soundness guard distinguishes it from a naive `z3.fpMin`/`fpMax`); `int_to_float` / `float_to_int` concrete-gated (symbolic args defer to Tier 3 — Z3's symbolic FP↔Real reasoning returns spurious counterexamples); `float_to_int` domain obligation (E529) for concrete NaN/Inf/out-of-range args. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (±0, ±inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases) | +| `test_float64_builtins_807.py` | 80 | 479 | #807 — Tier-1 modeling of the modelable `@Float64` builtins. `float_clamp` modeled unconditionally as faithful WASM `f64.min(f64.max(v,lo),hi)` (the NaN-propagation soundness guard distinguishes it from a naive `z3.fpMin`/`fpMax`); `int_to_float` / `float_to_int` concrete-gated (symbolic args defer to Tier 3 — Z3's symbolic FP↔Real reasoning returns spurious counterexamples); `float_to_int` domain obligation (E529) for concrete NaN/Inf/out-of-range args. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (±0, ±inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases) | | `test_build_site.py` | 23 | 316 | Site-asset tooling — `_abs_links` rewriting (relative links, fenced-block immunity incl. inline backticks and tilde fences, http/https/fragment pass-through, Vera effect syntax not mis-parsed), `build_site` `` stability (preserve/refresh keyed on URL-structure change), and `check_site_assets` sitemap staleness (missing / date-only-clean / structural-stale) | | `test_check_changelog_updated.py` | 68 | 708 | `check_changelog_updated.py` unit + end-to-end tests: file classification (incl. file-style exact-match vs directory-style prefix-match), CHANGELOG diff parsing with `[Unreleased]` section tracking, bare-heading rejection, and full-file context (regression test for bullets far below the heading), `Skip-changelog:` trailer detection, temp-repo integration covering substantive/exempt/label/trailer paths, and `GIT_*`-env hermeticity of the temp-repo fixtures (regression for the pre-commit-hook env leak) | | `test_check_doc_counts.py` | 15 | 150 | `check_doc_counts.py` planning-document checks: KNOWN_ISSUES refactoring line counts (±10% tolerance band incl. the exact-boundary case, drift detection, empty-file citation, hyphenated paths, missing file/section/rows) and HISTORY version-row format (issue-link limit, ` — ` separator rejection, dateless-row and prose exemption, line-number reporting) | diff --git a/tests/test_float64_builtins_807.py b/tests/test_float64_builtins_807.py index e15d4c9f..90e4e6bd 100644 --- a/tests/test_float64_builtins_807.py +++ b/tests/test_float64_builtins_807.py @@ -40,6 +40,7 @@ from vera.checker import typecheck_with_artifacts from vera.codegen import compile as compile_vera, execute from vera.codegen.api import WasmTrapError +from vera.errors import Diagnostic from vera.smt import _FLOAT64_SORT, _wasm_fp_max, _wasm_fp_min from vera.verifier import VerifyResult, verify @@ -262,8 +263,11 @@ def test_symbolic_defers_to_tier3(self) -> None: ] +_I64_MIN = -(2**63) + + def _i2f_triples() -> list[int]: - # i64 boundary + the 2^53 rounding boundary (where i64→f64 loses precision + # i64 boundaries + the 2^53 rounding boundary (where i64→f64 loses precision # and convert_i64_s rounds nearest-ties-to-even) + small values. return [ 0, 1, -1, 42, -7, 1000000, @@ -271,17 +275,27 @@ def _i2f_triples() -> list[int]: 9007199254740993, # 2^53 + 1 (rounds to even → 2^53) 9007199254740995, # 2^53 + 3 (rounds to even → 2^53 + 4) 9223372036854775807, # i64.MAX + _I64_MIN, # i64.MIN — the asymmetric two's-complement edge ] +def _int_to_float_body(n: int) -> str: + # The literal -2^63 (i64.MIN) cannot be written directly: |i64.MIN| = 2^63 + # is out of the positive i64 literal range, so build it as + # `0 - i64.MAX - 1` (each step stays in range). + if n == _I64_MIN: + return "int_to_float(0 - 9223372036854775807 - 1)" + return f"int_to_float({n})" if n >= 0 else f"int_to_float(0 - {-n})" + + class TestIntToFloatDifferential807: """Verify-vs-run: int_to_float's Z3 model fpToFP(RNE, ToReal(n)) must agree with wasmtime f64.convert_i64_s(n) bit-for-bit, including the 2^53 rounding - boundary and the i64 max.""" + boundary and the i64 min/max.""" @pytest.mark.parametrize("n", _i2f_triples()) def test_model_agrees_with_runtime(self, n: int) -> None: - body = f"int_to_float({n})" if n >= 0 else f"int_to_float(0 - {-n})" + body = _int_to_float_body(n) runtime = _run_float_expr(body) model = z3.simplify( z3.fpToFP(z3.RNE(), z3.ToReal(z3.IntVal(n)), _FLOAT64_SORT) @@ -312,7 +326,7 @@ def _run_int_expr(body: str, sig: str = "@Unit -> @Int") -> int: return value -def _errs(result: VerifyResult, code: str) -> list[object]: +def _errs(result: VerifyResult, code: str) -> list[Diagnostic]: return [d for d in result.diagnostics if d.severity == "error" and d.error_code == code] @@ -346,15 +360,18 @@ def test_concrete_negative_truncates_toward_zero(self) -> None: def test_nan_arg_is_E529(self) -> None: # float_to_int(nan()) traps at runtime (i64.trunc_f64_s) → a concrete - # NaN arg is a provable domain violation → loud E529 compile error. + # NaN arg is a provable domain violation → loud E529 compile error. The + # reason string must name NaN specifically (the report distinguishes + # NaN / infinite / out-of-range — pin it so a swapped/collapsed label + # can't pass silently). result = _verify(""" public fn f(@Unit -> @Int) requires(true) ensures(true) effects(pure) { float_to_int(nan()) } """) - assert _errs(result, "E529"), [ - (d.error_code, d.severity) for d in result.diagnostics - ] + errs = _errs(result, "E529") + assert errs, [(d.error_code, d.severity) for d in result.diagnostics] + assert "argument is NaN" in errs[0].description, errs[0].description def test_infinity_arg_is_E529(self) -> None: result = _verify(""" @@ -362,9 +379,19 @@ def test_infinity_arg_is_E529(self) -> None: requires(true) ensures(true) effects(pure) { float_to_int(infinity()) } """) - assert _errs(result, "E529"), [ - (d.error_code, d.severity) for d in result.diagnostics - ] + errs = _errs(result, "E529") + assert errs, [(d.error_code, d.severity) for d in result.diagnostics] + assert "argument is infinite" in errs[0].description, errs[0].description + + def test_negative_infinity_arg_is_E529(self) -> None: + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ float_to_int(0.0 - infinity()) } +""") + errs = _errs(result, "E529") + assert errs, [(d.error_code, d.severity) for d in result.diagnostics] + assert "argument is infinite" in errs[0].description, errs[0].description def test_out_of_range_is_E529(self) -> None: # ~1.8e19 > i64.MAX (9.22e18): finite but trunc out of i64 range → traps @@ -374,9 +401,20 @@ def test_out_of_range_is_E529(self) -> None: requires(true) ensures(true) effects(pure) { float_to_int(9000000000000000000.0 * 2.0) } """) - assert _errs(result, "E529"), [ - (d.error_code, d.severity) for d in result.diagnostics - ] + errs = _errs(result, "E529") + assert errs, [(d.error_code, d.severity) for d in result.diagnostics] + assert "out of i64 range" in errs[0].description, errs[0].description + + def test_negative_out_of_range_is_E529(self) -> None: + # ~-1.8e19 < i64.MIN: the low-side (two's-complement) out-of-range edge. + result = _verify(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ float_to_int(0.0 - 9000000000000000000.0 * 2.0) } +""") + errs = _errs(result, "E529") + assert errs, [(d.error_code, d.severity) for d in result.diagnostics] + assert "out of i64 range" in errs[0].description, errs[0].description def test_symbolic_defers_to_tier3(self) -> None: # SOUNDNESS GUARD for concrete-gating: a symbolic float_to_int argument diff --git a/vera/verifier.py b/vera/verifier.py index bd362311..1b865669 100644 --- a/vera/verifier.py +++ b/vera/verifier.py @@ -4670,8 +4670,8 @@ def _report_float_to_int_domain( rationale=( "float_to_int compiles to `i64.trunc_f64_s`, which traps at " "runtime on NaN, +/-infinity, or a value whose truncation falls " - "outside the i64 range. The SMT solver proved the argument is " - "always one of these (#807)." + "outside the i64 range. The argument is a constant the verifier " + "determined is always one of these (#807)." ), fix=( "Guard the conversion so the argument is finite and in range — " From 8367a4f2b75a51fe635b1e2c3d525d10a7c86aad Mon Sep 17 00:00:00 2001 From: T Date: Sat, 27 Jun 2026 18:22:02 +0100 Subject: [PATCH 3/5] fix(812): range-check integer literals against their target machine type (E149) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found during the #807 review (silent-failure-hunter pass). Closes a silent Tier-1 soundness hole and a loud codegen wart with one check. An integer literal must fit its target type — Int is i64, Nat is u64 — now checked at type-check time via the bidirectional `expected` type (refinements stripped to the base). A literal past its bound is a clean E149: - SILENT + unsound (the important one): a literal in (i64.MAX, u64.MAX] used in an Int context (e.g. 18446744073709551615) made `vera verify` prove the postcondition `result == 18446744073709551615` while the runtime returned -1 (the i64 reinterpretation of the all-ones bit pattern) — a Tier-1 proof the runtime violates. - LOUD (#812 as filed): a literal >= 2^64 was accepted by `vera check`, then failed at codegen with a raw `i64.const ... out of range` WAT error. The asymmetric edge -9223372036854775808 (i64.MIN) stays valid — its magnitude 2^63 is checked under the u64 bound via unary minus — while -2^64 is caught at the inner magnitude literal. Non-breaking: conformance 93, examples 35, the full checker + overflow + codegen suites green; no corpus program used an out-of-range literal. Filed #813 for the broader, distinct soundness bug this surfaced: a NON-literal Nat value > i64.MAX widened to Int reinterprets the same way (the widen(u64.MAX) repro proves a false postcondition) — the Nat-subtype-of-Int relation needs a coercion obligation, a focused #392-style follow-up. Closes #812. Refs #807. Co-Authored-By: Claude --- CHANGELOG.md | 4 ++ HISTORY.md | 2 +- ROADMAP.md | 4 +- TESTING.md | 4 +- docs/llms-full.txt | 1 + scripts/check_spec_examples.py | 2 +- spec/04-expressions.md | 2 + tests/test_checker.py | 95 ++++++++++++++++++++++++++++++++++ vera/checker/expressions.py | 42 +++++++++++++++ vera/errors.py | 1 + 10 files changed, 151 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95bab3be..ff3dc9a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Tier-1 verification modeling for the modelable `@Float64` builtins `float_clamp`, `int_to_float`, and `float_to_int`** (follow-up to [#797](https://github.com/aallan/vera/issues/797), the [#392](https://github.com/aallan/vera/issues/392) `smt.py` soundness audit's `@Float64` → FloatingPoint-sort fix; the three were left as Tier-3 deferrals there). `float_clamp(v, lo, hi)` is modeled unconditionally as the faithful WASM `f64.min(f64.max(v, lo), hi)`: Z3's own `fp.min` / `fp.max` *diverge* from WASM on `NaN` (SMT-LIB returns the non-`NaN` operand; WASM propagates `NaN`) and on signed zero, so the model builds those semantics explicitly — a naive `fpMin` / `fpMax` would unsoundly prove `!float_is_nan(float_clamp(NaN, …))`. `int_to_float` and `float_to_int` cross the Int↔Float boundary, where Z3's *symbolic* Int↔Real↔FP reasoning is unreliable (it returns spurious counterexamples that do not satisfy their own constraints), so they are modeled at Tier 1 **only for a concrete (constant-foldable) argument** and a symbolic one defers to a sound **Tier 3** — matching the audit principle of deferring what Z3 cannot soundly model. `float_to_int` compiles to `i64.trunc_f64_s`, which traps on `NaN` / `±Inf` / out-of-`i64`-range, so a concrete out-of-domain argument is now a loud compile error (**E529**) and a symbolic one a runtime-guarded Tier-3 trap. Each model is confirmed by a verify-vs-run differential. The four format/parse `@Float64` builtins (`float_to_string`, `parse_float64`, `decimal_from_float`, `decimal_to_float`) remain Tier-3 by necessity. ([#807](https://github.com/aallan/vera/issues/807)) +### Fixed + +- **Out-of-range integer literals are now a clean compile error (E149) instead of an opaque codegen failure or a silent unsoundness** (found during the #807 review). An integer literal must fit its target machine type — `@Int` is i64, `@Nat` is u64. Previously a literal `>= 2^64` was accepted by `vera check` then failed at codegen with a raw `i64.const ... out of range` error; worse, a literal in (i64.MAX, u64.MAX] used as `@Int` (e.g. `18446744073709551615`) made `vera verify` **prove** `ensures(@Int.result == 18446744073709551615)` while the runtime returned `-1` (the i64 reinterpretation of the all-ones bit pattern) — a Tier-1 proof the runtime violates. The checker now range-checks every integer literal against its target type's bound and emits **E149** with a clear message; the asymmetric edge `-9223372036854775808` (i64.MIN) stays valid. ([#812](https://github.com/aallan/vera/issues/812)) + ## [0.0.182] - 2026-06-27 ### Fixed diff --git a/HISTORY.md b/HISTORY.md index 3a8eff2f..d675555c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -393,7 +393,7 @@ Proof obligations became first-class records with a warm Z3 session, and the LSP | v0.0.180 | 26 Jun | **`@Float64` contracts are verified against Z3's IEEE-754 FloatingPoint sort instead of mathematical reals, so Tier-1 proofs respect `NaN`/`Inf`/signed-zero/rounding and `%` (C `fmod`) and match the runtime, so unsound relational / reflexive Float64 contracts are now correctly rejected** ([#797](https://github.com/aallan/vera/issues/797)). | | v0.0.181 | 27 Jun | **`@Int` / `@Nat` arithmetic overflow now traps: `+` / `-` / `*` are partial operations carrying an auto-synthesised `int_overflow` obligation, so a result that provably leaves i64 / u64 range is a compile error (E528) and a dynamic one is a runtime-guarded trap, instead of the verifier proving postconditions the two's-complement runtime then violated** ([#798](https://github.com/aallan/vera/issues/798)). | | v0.0.182 | 27 Jun | **`string_length` UTF-8 byte soundness defers non-literal `string_length` to Tier 3 and models string literals at their exact byte length, completing the `smt.py` soundness audit** ([#802](https://github.com/aallan/vera/issues/802)). | -| v0.0.183 | 27 Jun | **Tier-1 verification modeling for the modelable `@Float64` builtins: `float_clamp` (faithful WASM `f64.min`/`f64.max`, NaN/±0-correct), and concrete-gated `int_to_float` / `float_to_int` (symbolic args defer to Tier 3 since Z3's FP↔Real reasoning is unreliable), with a `float_to_int` domain obligation (E529) and verify-vs-run differentials** ([#807](https://github.com/aallan/vera/issues/807)). | +| v0.0.183 | 27 Jun | **Tier-1 verification modeling for the modelable `@Float64` builtins: `float_clamp` (faithful WASM `f64.min`/`f64.max`, NaN/±0-correct), and concrete-gated `int_to_float` / `float_to_int` (symbolic args defer to Tier 3 since Z3's FP↔Real reasoning is unreliable), with a `float_to_int` domain obligation (E529) and verify-vs-run differentials** ([#807](https://github.com/aallan/vera/issues/807)). Also range-checks integer literals against their target machine type (E149, #812), closing a silent unsoundness where a literal in (i64.MAX, u64.MAX] used as `@Int` verified but ran to `-1`. | --- diff --git a/ROADMAP.md b/ROADMAP.md index eadb36cd..b9e14de5 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ Priority lives in this file and nowhere else — issues carry kind and area labe ## Where we are -5,253 tests, 93 conformance programs, 35 examples, 13 spec chapters. +5,261 tests, 93 conformance programs, 35 examples, 13 spec chapters. ## The roadmap @@ -21,7 +21,7 @@ The infrastructure that catches the next regression before a user does, plus the | Issue | What | |---|---| | [#808](https://github.com/aallan/vera/issues/808) | Give the `@Int` / `@Nat` arithmetic-overflow runtime trap (#798) a precise `overflow` trap kind, so a dynamic overflow surfaces as a dedicated trap message rather than the generic `unreachable` kind it currently shares. | -| [#812](https://github.com/aallan/vera/issues/812) | Range-check integer literals in the checker (`@Int` within i64, `@Nat` within u64) with a clean E1xx diagnostic, instead of an out-of-range literal flowing through to an opaque `i64.const … out of range` codegen failure. Found during the #807 review. | +| [#813](https://github.com/aallan/vera/issues/813) | **Soundness:** emit a verifier obligation at every `@Nat → @Int` coercion that the value is `<= i64.MAX`. A `@Nat` value in (i64.MAX, u64.MAX] currently reinterprets to a negative `@Int` at runtime while Tier 1 proves over the unbounded non-negative value — a false postcondition (the `widen(u64.MAX)` repro). Mirror with a codegen guard + verify-vs-run differential, like #798/#799. Found during the #807 review. | | [#592](https://github.com/aallan/vera/issues/592) | End-to-end behavioural tests for the five UTF-8 decode sites currently pinned only by structural greps. | | [#645](https://github.com/aallan/vera/issues/645) | Explicit `encoding='utf-8'` at every text-mode file call, with a pre-commit check to hold the line. | | [#657](https://github.com/aallan/vera/issues/657) | Convert `INVARIANT_DEFENSIVE` sites and audit `PROPAGATE` cleanup (follow-up to the #626 error-handling taxonomy). | diff --git a/TESTING.md b/TESTING.md index d39efc3a..441f6ee5 100644 --- a/TESTING.md +++ b/TESTING.md @@ -6,7 +6,7 @@ This is the single source of truth for Vera's testing infrastructure, coverage d | Metric | Value | |--------|-------| -| **Tests** | 5,253 across 46 files (~63,000 lines of test code; 5,221 passed + 16 stress, 16 skipped) | +| **Tests** | 5,261 across 46 files (~63,000 lines of test code; 5,229 passed + 16 stress, 16 skipped) | | **Compiler code coverage** | 95% Python, 61% JavaScript — 91% combined (CI minimum: 80%) | | **Conformance programs** | 93 programs across 9 spec chapters, validating every language feature | | **Example programs** | 35, all validated through `vera check` + `vera verify` | @@ -57,7 +57,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists |------|------:|------:|----------------| | `test_parser.py` | 129 | 968 | Grammar rules, operator precedence, parse errors | | `test_ast.py` | 128 | 1,122 | AST transformation, node structure, serialisation, string escape sequences, ability declarations | -| `test_checker.py` | 548 | 6,347 | Type synthesis, slot resolution, effects, effect subtyping, contracts, exhaustiveness, cross-module typing, visibility, error codes, string built-ins, generic rejection, IO operation types, Markdown types, Regex types, abilities, Map collection, Set collection, Decimal type, Json type, Html type, Http effect, Inference effect, removed legacy name regression | +| `test_checker.py` | 556 | 6,442 | Type synthesis, slot resolution, effects, effect subtyping, contracts, exhaustiveness, cross-module typing, visibility, error codes, string built-ins, generic rejection, IO operation types, Markdown types, Regex types, abilities, Map collection, Set collection, Decimal type, Json type, Html type, Http effect, Inference effect, removed legacy name regression | | `test_obligations.py` | 284 | 1087 | Reified proof obligations + warm `VerificationSession` (#222 Phase A): full-corpus differential oracle (warm session == cold `verify()` on diagnostics, summary, and obligation stream, plus warm-twice determinism, across all 35 examples and every verify/run-level conformance program), summary↔obligation tier-bookkeeping consistency, per-kind unit tests (requires / ensures / decreases / nat_sub / call_pre statuses, counterexamples, error codes), content-key stability + same-text-two-sites span disambiguation, session solver reuse, type-error short-circuit, ADT-registry resync between programs; plus the Phase B incremental suite — identical-source full replay, callee-body-edit replays callers while callee-contract-edit invalidates them, span-shift and ADT-edit conservative invalidation, cross-program isolation, timeout-status never cached (monkeypatched solver), FIFO eviction bound; plus the #727 dedup pin — a violating call in a let RHS records exactly one E501 diagnostic and one call_pre obligation | | `test_verifier.py` | 473 | 9,296 | Z3 verification, counterexamples, tier classification, call-site preconditions, branch-aware preconditions, pipe operator, cross-module contracts, match/ADT verification, decreases verification, mutual recursion, refined Bool/String/Float64 param sorts, **@Nat subtraction underflow obligation** (#520 — Path-A obligation discharge via requires/path-conditions/path-aware Z3 refutation, pure-literal exclusion, Int-Int and Nat-Int → Int exemptions), **@Nat binding-site narrowing obligation** (#552 — Tier-1 `value >= 0` at let/call-arg/effect-op-arg/ctor-field/match-bind/literal-destructure narrowing, #520 double-emit disjointness, E503 counterexample-witness pin; #747 — generic-instantiation, ADT sub-pattern, non-literal tuple-destructure, and cross-module imported-ctor narrowing, with concrete-site Tier-3 narrowings (incl. the if-expr-source SMT gap, recorded per-component) classified as codegen-guarded `tier3_runtime`, generic function-formal call-args guarded on the monomorphised callee (keyed on `call_target`, not the erased generic decl), and the two genuinely-unguarded residuals — effect-op-arg narrowings (no codegen guard yet, #754) and generic-instantiated constructor-field narrowings (constructors carry no per-field `@Nat` mono metadata) — surfacing the `E504` unguarded warning; #749 — IndexExpr/InterpolatedString walker-recursion pins, `_fresh_slot_var` nat-alias unit test, `_narrows_into_nat` verifier/codegen soundness parity), **refinement-predicate verification** (#746 — Tier-1 discharge at let/call-arg/ctor-field/effect-op-arg/match-bind/tuple-destructure narrowings and refined return positions, E505 violation with counterexample, E506 Tier-3 for untranslatable/non-primitive-base predicates, the R3 already-refined-source exemption via predicate-AST equality, the R5 violating-return pin, the refinement-over-`@Nat` `>= 0 && P` conjoin, and the **refined-ADT-sub-pattern arm-fact carry** — a `Some(@PosInt)` bind on `Option` carries the field's source-type predicate into the arm body so a downstream `@Nat` narrowing discharges instead of a false E503, and reaches call **preconditions** in the arm body via the SMT match-translation fact hook so `Some(@PosInt) -> needs_positive(@PosInt.0)` discharges instead of a false E501, while a genuine `Option` narrowing stays obligated (E505/E501), never silently assumed; an **alias-base refined return** (`{ @Age \| @Age.0 >= 18 }`) is assumed by the caller via the predicate binder name; and a **refined return from a match arm** (`Some(@PosInt) -> @PosInt.0`) discharges via a global `arm-matched => source-fact` SMT implication on both the normal and generic-fast paths, a violating `Option` payload still E505), **per-monomorphization generic verification** (#732 — an instantiated `forall` body is verified per concrete instantiation: a generic-body `@Nat` underflow is caught (naming the instantiation, with a counterexample) or discharged when guarded, the never-instantiated residual stays Tier-3 `E520`, collapsed-type-var (`A=B=Int`) De Bruijn reindex soundness, one-diagnostic dedup across multiple instantiations, and discovery of a generic reached only through a `decreases(...)` measure, plus a mixed-`tier3`/`timeout` aggregate-label completeness pin and a recursive-generic-clone source-name pin so `decreases` resolves on the monomorphised instance), **primitive-operation safety obligations** (#680 — division/modulo by-zero `E526` and array-index-bounds `E527`, the in-bounds/out-of-bounds two-check with float-exemption, honest Tier-3 for opaque lengths, the off-by-one and `i >= 0` lower-bound pins, walker recursion through array-literal / assert / interpolated-string / let-destructure positions, the opaque-shadow Tier-3 routing that stops an untranslatable `let`/destructure from silently discharging a stale same-type-outer divisor, De Bruijn-correct `E526`/`E527` fix hints rendered from the actual operands, Tier-1 projection of literal-constructor destructure components (`let Tuple<@Int, @Int> = Tuple(10, 6); _ / @Int.1` discharges `10 != 0` rather than shadowing to Tier-3, while a non-literal source stays a tracked Tier-3 shadow), a tracked placeholder pushed for *every* destructured component so same-type De Bruijn positions don't collapse (`Tuple(10, )` keeps `@Int.0` on the opaque second component, never silently discharging against the literal `10`), `_contains_opaque_shadow` recursion so a *compound* divisor / subtraction operand embedding a shadow (`shadow + 1`) falls to Tier-3 instead of a spurious `E526`/`E502` counterexample, and tracked shadowing of match-arm pattern slots bound over an untranslatable (effect-op) scrutinee so a primitive op in the arm falls to Tier-3 rather than silently discharging against a stale same-name outer (`match Source.next(()) { Some(@Int) -> 1 / @Int.0 }`); plus a **multi-agent shadow/projection audit battery** (`TestShadowAuditDivision680` / `TestShadowAuditSubtraction680` / `TestShadowAuditIndex680` / `TestDestructureDeBruijnAlignment680` / `TestShadowAuditInteractions680`) — 57 differential tests pinning the invariant trichotomy (safe → verified, opaque/shadow → Tier-3, provably-unsafe → loud) across compound shadows, modulo parity, multi-component De Bruijn alignment with value-distinct siblings, intervening-type namespaces, stacked / nested destructures, opaque match scrutinees, nested blocks, and intra-block scoping; **mutation-validated** — every shadow/projection test was confirmed to flip RED when its target machinery (`_is_opaque_shadow` / `_contains_opaque_shadow` / the De Bruijn placeholder push / the `let`-shadow / the match-arm `_fresh_pattern_env` / literal projection) is deliberately broken, so none is green-for-the-wrong-reason) | | `test_soundness_392.py` | 36 | 584 | #392 audit batches 1–2 — verifier soundness/completeness fixes: signed div/mod truncate toward zero (#799), body `assert(P)` carries a Tier-1 obligation (#800), divisions in contract predicates carry a `div_zero` obligation (#801), and the #804 assume-half of #800's `assert` rule — a prior `assert`/`assume` discharges later obligations (including a later call's precondition) + the postcondition at Tier 1, removing false E501/E503/E500/E505. Test-first: each fails on the pre-fix verifier | diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 6580947b..464cb219 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -3095,6 +3095,7 @@ Every diagnostic has a stable error code. Codes are grouped by compiler phase: - **E146**: Unary not requires Bool - **E147**: Unary negate requires numeric - **E148**: Non-convertible type in string interpolation +- **E149**: Integer literal out of range for its target type - **E160**: Array index must be Int or Nat - **E161**: Cannot index non-array type - **E170**: Let binding type mismatch diff --git a/scripts/check_spec_examples.py b/scripts/check_spec_examples.py index 4f517807..4dcdac32 100644 --- a/scripts/check_spec_examples.py +++ b/scripts/check_spec_examples.py @@ -119,7 +119,7 @@ ("01-lexical-structure.md", 45): "FRAGMENT", # "fn let if then ..." # Chapter 4 — type alias + bare let statement (not top-level) - ("04-expressions.md", 132): "FRAGMENT", # type alias + let @PosInt = 42 + ("04-expressions.md", 134): "FRAGMENT", # type alias + let @PosInt = 42 # Chapter 5 — template with metavariable placeholders ("05-functions.md", 19): "FRAGMENT", # fn function_name(@ParamType1 ...) diff --git a/spec/04-expressions.md b/spec/04-expressions.md index c99bfeff..e7bd80e4 100644 --- a/spec/04-expressions.md +++ b/spec/04-expressions.md @@ -20,6 +20,8 @@ Literal expressions produce values of the corresponding primitive type: Integer literals in a context expecting `Nat` are checked for non-negativity at compile time. +Integer literals are also range-checked against their target machine type at compile time ([#812](https://github.com/aallan/vera/issues/812)): `Int` is a signed 64-bit integer (range `-2^63 .. 2^63 - 1`) and `Nat` an unsigned one (`0 .. 2^64 - 1`). A literal outside its target's range is a compile error (**E149**) rather than an opaque codegen failure or a silent reinterpretation of its bit pattern — for example `18446744073709551615` is valid as `Nat` but not as `Int`. (The magnitude `2^63` is valid only negated, as `Int`'s minimum `-9223372036854775808`.) + ## 4.3 Slot References Slot references (Chapter 3) are expressions: diff --git a/tests/test_checker.py b/tests/test_checker.py index b8d08570..789dcfad 100644 --- a/tests/test_checker.py +++ b/tests/test_checker.py @@ -6345,3 +6345,98 @@ def test_byte_to_int_then_arithmetic_works(self): { byte_to_int(@Byte.1) - byte_to_int(@Byte.0) } """ _check_ok(src) + + +class TestIntegerLiteralRange812: + """#812 — integer literals must fit their target machine type (`@Int` = i64, + `@Nat` = u64), checked at type-check time. + + Before this check the gap had two faces, both rooted in the verifier + modeling a literal at its unbounded mathematical value while codegen emits a + fixed-width `i64.const`: + + - LOUD: a literal >= 2^64 was accepted by `vera check`, then failed at + codegen with an opaque `i64.const ... out of range` WAT error. + - SILENT + UNSOUND: a literal in (i64.MAX, u64.MAX] used as `@Int` made + `vera verify` prove `ensures(@Int.result == 18446744073709551615)` while + the runtime returned `-1` (the i64 reinterpretation of the all-ones bit + pattern) — the verifier proving a postcondition the runtime violates. + + Both are now a clean compile-time E149. + """ + + def _e149(self, source: str) -> None: + errs = _errors(source) + assert any(e.error_code == "E149" for e in errs), \ + f"expected E149, got {[(e.error_code, e.description) for e in errs]}" + + def test_literal_in_int_context_exceeding_i64_is_error(self) -> None: + # The SILENT soundness bug: u64.MAX as @Int verified `== u64.MAX` but ran + # to -1. Now rejected at check time before it can reach that false proof. + self._e149(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ 18446744073709551615 } +""") + + def test_int_context_i64_max_plus_one_is_error(self) -> None: + self._e149(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ 9223372036854775808 } +""") + + def test_literal_exceeding_u64_is_error(self) -> None: + # The LOUD case (#812 as filed): >= 2^64 previously reached codegen. + self._e149(""" +public fn f(@Unit -> @Nat) + requires(true) ensures(true) effects(pure) +{ 18446744073709551616 } +""") + + def test_int_literal_at_i64_max_ok(self) -> None: + _check_ok(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ 9223372036854775807 } +""") + + def test_nat_literal_at_u64_max_ok(self) -> None: + # u64.MAX is valid as @Nat — only the @Int context (and > u64.MAX) errors. + _check_ok(""" +public fn f(@Unit -> @Nat) + requires(true) ensures(true) effects(pure) +{ 18446744073709551615 } +""") + + def test_call_arg_int_context_exceeding_i64_is_error(self) -> None: + # The target type flows through a call argument too (bidirectional). + self._e149(""" +public fn g(@Int -> @Int) + requires(true) ensures(true) effects(pure) +{ @Int.0 } +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ g(18446744073709551615) } +""") + + def test_negated_i64_min_literal_ok(self) -> None: + # i64.MIN = -(2^63): the magnitude 2^63 exceeds i64.MAX but is valid as + # the operand of negation — the asymmetric i64 range [-2^63, 2^63-1]. + # `-N` parses as unary-minus over the magnitude literal, which is checked + # against the u64 bound (2^63 <= u64.MAX), so i64.MIN is NOT falsely + # rejected. (Guards the asymmetric boundary against a future tightening.) + _check_ok(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ -9223372036854775808 } +""") + + def test_negated_literal_exceeding_u64_magnitude_is_error(self) -> None: + # -(2^64): the magnitude itself exceeds u64.MAX, caught at the inner + # literal regardless of the negation. + self._e149(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ -18446744073709551616 } +""") diff --git a/vera/checker/expressions.py b/vera/checker/expressions.py index ec95c3c1..ea6cb1ca 100644 --- a/vera/checker/expressions.py +++ b/vera/checker/expressions.py @@ -32,6 +32,14 @@ types_equal, ) +# Machine bounds for the integer types (#812): `@Int` is i64, `@Nat` is u64. An +# integer literal must fit its target type's range — otherwise codegen's +# fixed-width `i64.const` either hard-fails (> u64.MAX) or silently reinterprets +# the bit pattern (an `@Int` literal in (i64.MAX, u64.MAX] runs as a negative) +# while the verifier reasons over the unbounded mathematical value. +_I64_MAX = 2**63 - 1 +_U64_MAX = 2**64 - 1 + class ExpressionsMixin: """Mixin providing expression type synthesis and related methods.""" @@ -124,6 +132,40 @@ def _synth_expr_impl(self, expr: ast.Expr, *, and expected.name == "Byte" and 0 <= expr.value <= 255): return BYTE + # #812: range-check the literal against its target machine type + # before typing it. The target's base type (refinements stripped) + # decides the bound: an `@Int` context is i64 (max 2^63-1), anything + # else — `@Nat`, or no expected type, where a non-negative literal + # defaults to `@Nat` below — is u64 (max 2^64-1). A literal past its + # bound is a clean error here instead of (loud) an opaque + # `i64.const … out of range` codegen failure or (silent, unsound) a + # Tier-1 proof over a value the i64 runtime reinterprets. + base = base_type(expected) if expected is not None else None + targets_int = (isinstance(base, PrimitiveType) + and base.name == "Int") + bound = _I64_MAX if targets_int else _U64_MAX + if expr.value > bound: + type_name = "@Int (i64)" if targets_int else "@Nat (u64)" + self._error( + expr, + f"Integer literal {expr.value} is out of range for " + f"{type_name}; the maximum is {bound}.", + rationale=( + "Integer literals are fixed-width at runtime — `@Int` " + "is a signed 64-bit integer, `@Nat` an unsigned one. A " + "literal beyond the type's range cannot be represented: " + "codegen would either reject it or silently reinterpret " + "its bit pattern, diverging from what the verifier " + "proved (#812)." + ), + fix=( + "Use a literal within the type's range, or choose a " + "wider target type (`@Nat` reaches " + f"{_U64_MAX} where `@Int` stops at {_I64_MAX})." + ), + spec_ref='Chapter 4, Section 4.2 "Literals"', + error_code="E149", + ) # Non-negative integer literals are Nat (which is a subtype of # Int). This lets literals like 0, 1, 42 satisfy Nat parameters # without refinement verification. diff --git a/vera/errors.py b/vera/errors.py index b2c903d0..035bf2ac 100644 --- a/vera/errors.py +++ b/vera/errors.py @@ -447,6 +447,7 @@ def diagnose_lark_error( "E146": "Unary not requires Bool", "E147": "Unary negate requires numeric", "E148": "Non-convertible type in string interpolation", + "E149": "Integer literal out of range for its target type", "E160": "Array index must be Int or Nat", "E161": "Cannot index non-array type", "E170": "Let binding type mismatch", From a7414fecbca3c2246272ded1d8a0dc4f707cf819 Mon Sep 17 00:00:00 2001 From: T Date: Sat, 27 Jun 2026 19:02:56 +0100 Subject: [PATCH 4/5] fix(812): reject out-of-range negated integer literals (CodeRabbit Critical) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit caught a Critical hole in the #812 literal range check on the negative side. -9223372036854775809 (= -(2^63 + 1), one below i64.MIN) parses as unary minus over the magnitude literal 9223372036854775809; that operand is checked against the u64 bound (it has no Int context under the negation), so the band (2^63, u64.MAX] slipped past and ran to a wrong POSITIVE value (9223372036854775807) — the same silent reinterpretation the positive check closes. _check_unary now range-checks a negated integer literal: the magnitude may reach 2^63 (forming i64.MIN) but no further -> E149. A magnitude > u64.MAX is already caught at the operand literal, so the band check caps at u64.MAX to avoid a double diagnostic. i64.MIN (-9223372036854775808) stays valid. Mutation-validated (neutralizing the band check flips the new test RED); conformance 93, examples 35, checker + codegen suites green. Refs #807, #812. Co-Authored-By: Claude --- ROADMAP.md | 2 +- TESTING.md | 4 ++-- tests/test_checker.py | 13 +++++++++++++ vera/checker/expressions.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index b9e14de5..67ac0083 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ Priority lives in this file and nowhere else — issues carry kind and area labe ## Where we are -5,261 tests, 93 conformance programs, 35 examples, 13 spec chapters. +5,262 tests, 93 conformance programs, 35 examples, 13 spec chapters. ## The roadmap diff --git a/TESTING.md b/TESTING.md index 441f6ee5..11902598 100644 --- a/TESTING.md +++ b/TESTING.md @@ -6,7 +6,7 @@ This is the single source of truth for Vera's testing infrastructure, coverage d | Metric | Value | |--------|-------| -| **Tests** | 5,261 across 46 files (~63,000 lines of test code; 5,229 passed + 16 stress, 16 skipped) | +| **Tests** | 5,262 across 46 files (~63,000 lines of test code; 5,230 passed + 16 stress, 16 skipped) | | **Compiler code coverage** | 95% Python, 61% JavaScript — 91% combined (CI minimum: 80%) | | **Conformance programs** | 93 programs across 9 spec chapters, validating every language feature | | **Example programs** | 35, all validated through `vera check` + `vera verify` | @@ -57,7 +57,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists |------|------:|------:|----------------| | `test_parser.py` | 129 | 968 | Grammar rules, operator precedence, parse errors | | `test_ast.py` | 128 | 1,122 | AST transformation, node structure, serialisation, string escape sequences, ability declarations | -| `test_checker.py` | 556 | 6,442 | Type synthesis, slot resolution, effects, effect subtyping, contracts, exhaustiveness, cross-module typing, visibility, error codes, string built-ins, generic rejection, IO operation types, Markdown types, Regex types, abilities, Map collection, Set collection, Decimal type, Json type, Html type, Http effect, Inference effect, removed legacy name regression | +| `test_checker.py` | 557 | 6,455 | Type synthesis, slot resolution, effects, effect subtyping, contracts, exhaustiveness, cross-module typing, visibility, error codes, string built-ins, generic rejection, IO operation types, Markdown types, Regex types, abilities, Map collection, Set collection, Decimal type, Json type, Html type, Http effect, Inference effect, removed legacy name regression | | `test_obligations.py` | 284 | 1087 | Reified proof obligations + warm `VerificationSession` (#222 Phase A): full-corpus differential oracle (warm session == cold `verify()` on diagnostics, summary, and obligation stream, plus warm-twice determinism, across all 35 examples and every verify/run-level conformance program), summary↔obligation tier-bookkeeping consistency, per-kind unit tests (requires / ensures / decreases / nat_sub / call_pre statuses, counterexamples, error codes), content-key stability + same-text-two-sites span disambiguation, session solver reuse, type-error short-circuit, ADT-registry resync between programs; plus the Phase B incremental suite — identical-source full replay, callee-body-edit replays callers while callee-contract-edit invalidates them, span-shift and ADT-edit conservative invalidation, cross-program isolation, timeout-status never cached (monkeypatched solver), FIFO eviction bound; plus the #727 dedup pin — a violating call in a let RHS records exactly one E501 diagnostic and one call_pre obligation | | `test_verifier.py` | 473 | 9,296 | Z3 verification, counterexamples, tier classification, call-site preconditions, branch-aware preconditions, pipe operator, cross-module contracts, match/ADT verification, decreases verification, mutual recursion, refined Bool/String/Float64 param sorts, **@Nat subtraction underflow obligation** (#520 — Path-A obligation discharge via requires/path-conditions/path-aware Z3 refutation, pure-literal exclusion, Int-Int and Nat-Int → Int exemptions), **@Nat binding-site narrowing obligation** (#552 — Tier-1 `value >= 0` at let/call-arg/effect-op-arg/ctor-field/match-bind/literal-destructure narrowing, #520 double-emit disjointness, E503 counterexample-witness pin; #747 — generic-instantiation, ADT sub-pattern, non-literal tuple-destructure, and cross-module imported-ctor narrowing, with concrete-site Tier-3 narrowings (incl. the if-expr-source SMT gap, recorded per-component) classified as codegen-guarded `tier3_runtime`, generic function-formal call-args guarded on the monomorphised callee (keyed on `call_target`, not the erased generic decl), and the two genuinely-unguarded residuals — effect-op-arg narrowings (no codegen guard yet, #754) and generic-instantiated constructor-field narrowings (constructors carry no per-field `@Nat` mono metadata) — surfacing the `E504` unguarded warning; #749 — IndexExpr/InterpolatedString walker-recursion pins, `_fresh_slot_var` nat-alias unit test, `_narrows_into_nat` verifier/codegen soundness parity), **refinement-predicate verification** (#746 — Tier-1 discharge at let/call-arg/ctor-field/effect-op-arg/match-bind/tuple-destructure narrowings and refined return positions, E505 violation with counterexample, E506 Tier-3 for untranslatable/non-primitive-base predicates, the R3 already-refined-source exemption via predicate-AST equality, the R5 violating-return pin, the refinement-over-`@Nat` `>= 0 && P` conjoin, and the **refined-ADT-sub-pattern arm-fact carry** — a `Some(@PosInt)` bind on `Option` carries the field's source-type predicate into the arm body so a downstream `@Nat` narrowing discharges instead of a false E503, and reaches call **preconditions** in the arm body via the SMT match-translation fact hook so `Some(@PosInt) -> needs_positive(@PosInt.0)` discharges instead of a false E501, while a genuine `Option` narrowing stays obligated (E505/E501), never silently assumed; an **alias-base refined return** (`{ @Age \| @Age.0 >= 18 }`) is assumed by the caller via the predicate binder name; and a **refined return from a match arm** (`Some(@PosInt) -> @PosInt.0`) discharges via a global `arm-matched => source-fact` SMT implication on both the normal and generic-fast paths, a violating `Option` payload still E505), **per-monomorphization generic verification** (#732 — an instantiated `forall` body is verified per concrete instantiation: a generic-body `@Nat` underflow is caught (naming the instantiation, with a counterexample) or discharged when guarded, the never-instantiated residual stays Tier-3 `E520`, collapsed-type-var (`A=B=Int`) De Bruijn reindex soundness, one-diagnostic dedup across multiple instantiations, and discovery of a generic reached only through a `decreases(...)` measure, plus a mixed-`tier3`/`timeout` aggregate-label completeness pin and a recursive-generic-clone source-name pin so `decreases` resolves on the monomorphised instance), **primitive-operation safety obligations** (#680 — division/modulo by-zero `E526` and array-index-bounds `E527`, the in-bounds/out-of-bounds two-check with float-exemption, honest Tier-3 for opaque lengths, the off-by-one and `i >= 0` lower-bound pins, walker recursion through array-literal / assert / interpolated-string / let-destructure positions, the opaque-shadow Tier-3 routing that stops an untranslatable `let`/destructure from silently discharging a stale same-type-outer divisor, De Bruijn-correct `E526`/`E527` fix hints rendered from the actual operands, Tier-1 projection of literal-constructor destructure components (`let Tuple<@Int, @Int> = Tuple(10, 6); _ / @Int.1` discharges `10 != 0` rather than shadowing to Tier-3, while a non-literal source stays a tracked Tier-3 shadow), a tracked placeholder pushed for *every* destructured component so same-type De Bruijn positions don't collapse (`Tuple(10, )` keeps `@Int.0` on the opaque second component, never silently discharging against the literal `10`), `_contains_opaque_shadow` recursion so a *compound* divisor / subtraction operand embedding a shadow (`shadow + 1`) falls to Tier-3 instead of a spurious `E526`/`E502` counterexample, and tracked shadowing of match-arm pattern slots bound over an untranslatable (effect-op) scrutinee so a primitive op in the arm falls to Tier-3 rather than silently discharging against a stale same-name outer (`match Source.next(()) { Some(@Int) -> 1 / @Int.0 }`); plus a **multi-agent shadow/projection audit battery** (`TestShadowAuditDivision680` / `TestShadowAuditSubtraction680` / `TestShadowAuditIndex680` / `TestDestructureDeBruijnAlignment680` / `TestShadowAuditInteractions680`) — 57 differential tests pinning the invariant trichotomy (safe → verified, opaque/shadow → Tier-3, provably-unsafe → loud) across compound shadows, modulo parity, multi-component De Bruijn alignment with value-distinct siblings, intervening-type namespaces, stacked / nested destructures, opaque match scrutinees, nested blocks, and intra-block scoping; **mutation-validated** — every shadow/projection test was confirmed to flip RED when its target machinery (`_is_opaque_shadow` / `_contains_opaque_shadow` / the De Bruijn placeholder push / the `let`-shadow / the match-arm `_fresh_pattern_env` / literal projection) is deliberately broken, so none is green-for-the-wrong-reason) | | `test_soundness_392.py` | 36 | 584 | #392 audit batches 1–2 — verifier soundness/completeness fixes: signed div/mod truncate toward zero (#799), body `assert(P)` carries a Tier-1 obligation (#800), divisions in contract predicates carry a `div_zero` obligation (#801), and the #804 assume-half of #800's `assert` rule — a prior `assert`/`assume` discharges later obligations (including a later call's precondition) + the postcondition at Tier 1, removing false E501/E503/E500/E505. Test-first: each fails on the pre-fix verifier | diff --git a/tests/test_checker.py b/tests/test_checker.py index 789dcfad..70d27fe4 100644 --- a/tests/test_checker.py +++ b/tests/test_checker.py @@ -6430,6 +6430,19 @@ def test_negated_i64_min_literal_ok(self) -> None: public fn f(@Unit -> @Int) requires(true) ensures(true) effects(pure) { -9223372036854775808 } +""") + + def test_negated_i64_min_minus_one_is_error(self) -> None: + # SOUNDNESS: -(2^63 + 1) = -9223372036854775809 is one below i64.MIN, so + # it is out of @Int range. It parses as unary-minus over the magnitude + # literal 9223372036854775809, which is <= u64.MAX — so without an + # explicit unary-neg bound it slipped through and ran to a wrong POSITIVE + # value (9223372036854775807), the same silent reinterpretation the + # positive check closes. Must be E149. + self._e149(""" +public fn f(@Unit -> @Int) + requires(true) ensures(true) effects(pure) +{ -9223372036854775809 } """) def test_negated_literal_exceeding_u64_magnitude_is_error(self) -> None: diff --git a/vera/checker/expressions.py b/vera/checker/expressions.py index ea6cb1ca..12dec864 100644 --- a/vera/checker/expressions.py +++ b/vera/checker/expressions.py @@ -38,6 +38,7 @@ # the bit pattern (an `@Int` literal in (i64.MAX, u64.MAX] runs as a negative) # while the verifier reasons over the unbounded mathematical value. _I64_MAX = 2**63 - 1 +_I64_MIN = -(2**63) _U64_MAX = 2**64 - 1 @@ -516,6 +517,34 @@ def _check_unary(self, expr: ast.UnaryExpr) -> Type | None: error_code="E147", ) return UnknownType() + # #812: a negated integer literal `-m` is an `@Int` (i64), whose + # magnitude may reach 2^63 (forming i64.MIN = -2^63) but no further. + # The operand literal `m` is checked above against the u64 bound (it + # has no `@Int` context under the negation), so the band + # (2^63, u64.MAX] slips past — and would reinterpret to a wrong value + # at runtime (e.g. `-9223372036854775809` runs to 9223372036854775807). + # Reject it here. (`m > u64.MAX` is already E149 at the operand, so + # cap at u64.MAX to avoid a double diagnostic.) + if (isinstance(expr.operand, ast.IntLit) + and 2**63 < expr.operand.value <= _U64_MAX): + self._error( + expr, + f"Integer literal -{expr.operand.value} is out of range " + f"for @Int (i64); the minimum is {_I64_MIN}.", + rationale=( + "A negated integer literal is a signed 64-bit `@Int`; " + "its magnitude must fit the i64 range, which is " + "asymmetric (the minimum -2^63 has magnitude 2^63, but " + "no negative value goes below it). A larger magnitude " + "would reinterpret its bit pattern at runtime (#812)." + ), + fix=( + "Use a value within the signed 64-bit range " + f"({_I64_MIN} .. {_I64_MAX})." + ), + spec_ref='Chapter 4, Section 4.2 "Literals"', + error_code="E149", + ) # Negating Nat produces Int (may go negative) if types_equal(operand_base, NAT): return INT From 3c17e49914e29b0284421219225678378d6b4753 Mon Sep 17 00:00:00 2001 From: T Date: Sat, 27 Jun 2026 22:07:59 +0100 Subject: [PATCH 5/5] test(807): unlink temp files + low-side trap case; sync README count (CodeRabbit) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three CodeRabbit findings on a7414fe, all verified valid: - tests/test_float64_builtins_807.py: the _run_float_expr / _run_int_expr helpers used NamedTemporaryFile(delete=False) with no cleanup, leaking a .vera file per parametrized run. Wrapped both in try/finally with Path(path).unlink(missing_ok=True) (delete=False stays — Windows can't reopen a held temp file via parse_file). - tests/test_float64_builtins_807.py: added the low-side float_to_int trap case (float_to_int(0.0 - 9000000000000000000.0 * 2.0), < i64.MIN) to the runtime differential, mirroring the existing high-side / NaN / Inf cases so both bounds are exercised. - README.md: the active-development status line said 5,250 tests (stale — check_doc_counts does not gate README); synced to the live 5,263 total. Test-only + README. No behaviour change. Refs #807. Co-Authored-By: Claude --- README.md | 2 +- ROADMAP.md | 2 +- TESTING.md | 4 ++-- tests/test_float64_builtins_807.py | 38 ++++++++++++++++++++---------- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 89bc2e65..30ab7ace 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ cp /path/to/vera/SKILL.md ~/.claude/skills/vera-language/SKILL.md ## Project status -Vera is in **active development** at v0.0.183 — 1,400+ commits, 183 releases, 5,250 tests, 91% code coverage, 93 conformance programs, 35 examples, and a 13-chapter specification. See **[HISTORY.md](HISTORY.md)** for how the compiler was built. +Vera is in **active development** at v0.0.183 — 1,400+ commits, 183 releases, 5,263 tests, 91% code coverage, 93 conformance programs, 35 examples, and a 13-chapter specification. See **[HISTORY.md](HISTORY.md)** for how the compiler was built. The reference compiler — parser, AST, type checker, contract verifier (Z3), WASM code generator, module system, browser runtime, and runtime contract insertion — is working. The language specification is in draft across [13 chapters](spec/). diff --git a/ROADMAP.md b/ROADMAP.md index 67ac0083..7a03ead2 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ Priority lives in this file and nowhere else — issues carry kind and area labe ## Where we are -5,262 tests, 93 conformance programs, 35 examples, 13 spec chapters. +5,263 tests, 93 conformance programs, 35 examples, 13 spec chapters. ## The roadmap diff --git a/TESTING.md b/TESTING.md index 11902598..b64db33e 100644 --- a/TESTING.md +++ b/TESTING.md @@ -6,7 +6,7 @@ This is the single source of truth for Vera's testing infrastructure, coverage d | Metric | Value | |--------|-------| -| **Tests** | 5,262 across 46 files (~63,000 lines of test code; 5,230 passed + 16 stress, 16 skipped) | +| **Tests** | 5,263 across 46 files (~63,000 lines of test code; 5,231 passed + 16 stress, 16 skipped) | | **Compiler code coverage** | 95% Python, 61% JavaScript — 91% combined (CI minimum: 80%) | | **Conformance programs** | 93 programs across 9 spec chapters, validating every language feature | | **Example programs** | 35, all validated through `vera check` + `vera verify` | @@ -95,7 +95,7 @@ python scripts/fix_allowlists.py --fix # auto-fix stale allowlists | `test_readme.py` | 2 | 79 | README code sample parsing | | `test_html.py` | 4 | 187 | HTML landing page code samples: parse, check, verify | | `test_float64_fp.py` | 9 | 204 | #797 — `@Float64` contracts via Z3's IEEE-754 FloatingPoint sort: unsound relational / reflexive contracts (rounding at 2^53, `NaN`, `Inf`) flip from proved to violated/Tier-3, NaN-guarded contracts still verify at Tier 1, `==`/`!=` use IEEE `fpEQ`/`fpNEQ` (incl. `+0.0 == -0.0`), `%` matches codegen truncated remainder (not `fp.rem`; NaN-by-zero + large-magnitude edges), and `float_is_nan` / `float_is_infinite` / `nan()` / `infinity()` translate to FP predicates / constants. Also guards mixed `@Float64`/`@Int` ordering as a clean E142 (not a Z3 crash). Test-first: each fails on the pre-fix Real-sort verifier | -| `test_float64_builtins_807.py` | 80 | 479 | #807 — Tier-1 modeling of the modelable `@Float64` builtins. `float_clamp` modeled unconditionally as faithful WASM `f64.min(f64.max(v,lo),hi)` (the NaN-propagation soundness guard distinguishes it from a naive `z3.fpMin`/`fpMax`); `int_to_float` / `float_to_int` concrete-gated (symbolic args defer to Tier 3 — Z3's symbolic FP↔Real reasoning returns spurious counterexamples); `float_to_int` domain obligation (E529) for concrete NaN/Inf/out-of-range args. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (±0, ±inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases) | +| `test_float64_builtins_807.py` | 81 | 491 | #807 — Tier-1 modeling of the modelable `@Float64` builtins. `float_clamp` modeled unconditionally as faithful WASM `f64.min(f64.max(v,lo),hi)` (the NaN-propagation soundness guard distinguishes it from a naive `z3.fpMin`/`fpMax`); `int_to_float` / `float_to_int` concrete-gated (symbolic args defer to Tier 3 — Z3's symbolic FP↔Real reasoning returns spurious counterexamples); `float_to_int` domain obligation (E529) for concrete NaN/Inf/out-of-range args. Verify-vs-run differentials confirm each model agrees with wasmtime bit-for-bit (±0, ±inf, NaN, ties, lo>hi, the 2^53 rounding boundary, i64 max, and the trap cases) | | `test_build_site.py` | 23 | 316 | Site-asset tooling — `_abs_links` rewriting (relative links, fenced-block immunity incl. inline backticks and tilde fences, http/https/fragment pass-through, Vera effect syntax not mis-parsed), `build_site` `` stability (preserve/refresh keyed on URL-structure change), and `check_site_assets` sitemap staleness (missing / date-only-clean / structural-stale) | | `test_check_changelog_updated.py` | 68 | 708 | `check_changelog_updated.py` unit + end-to-end tests: file classification (incl. file-style exact-match vs directory-style prefix-match), CHANGELOG diff parsing with `[Unreleased]` section tracking, bare-heading rejection, and full-file context (regression test for bullets far below the heading), `Skip-changelog:` trailer detection, temp-repo integration covering substantive/exempt/label/trailer paths, and `GIT_*`-env hermeticity of the temp-repo fixtures (regression for the pre-commit-hook env leak) | | `test_check_doc_counts.py` | 15 | 150 | `check_doc_counts.py` planning-document checks: KNOWN_ISSUES refactoring line counts (±10% tolerance band incl. the exact-boundary case, drift detection, empty-file citation, hyphenated paths, missing file/section/rows) and HISTORY version-row format (issue-link limit, ` — ` separator rejection, dateless-row and prose exemption, line-number reporting) | diff --git a/tests/test_float64_builtins_807.py b/tests/test_float64_builtins_807.py index 90e4e6bd..062a774f 100644 --- a/tests/test_float64_builtins_807.py +++ b/tests/test_float64_builtins_807.py @@ -31,6 +31,7 @@ import math import tempfile +from pathlib import Path import pytest import z3 @@ -97,12 +98,17 @@ def _run_float_expr(body: str) -> float: fh.write(src) fh.flush() path = fh.name - result = compile_vera(transform(parse_file(path)), source=src, file=path) - errors = [d for d in result.diagnostics if d.severity == "error"] - assert not errors, f"compile errors: {[d.description for d in errors]}" - value = execute(result, fn_name="f").value - assert isinstance(value, float), f"expected float, got {value!r}" - return value + try: + result = compile_vera(transform(parse_file(path)), source=src, file=path) + errors = [d for d in result.diagnostics if d.severity == "error"] + assert not errors, f"compile errors: {[d.description for d in errors]}" + value = execute(result, fn_name="f").value + assert isinstance(value, float), f"expected float, got {value!r}" + return value + finally: + # delete=False (Windows can't reopen a held temp file via parse_file); + # unlink manually so parametrized runs don't leak .vera files. + Path(path).unlink(missing_ok=True) def _py_to_z3fp(x: float) -> z3.FPRef: @@ -318,12 +324,17 @@ def _run_int_expr(body: str, sig: str = "@Unit -> @Int") -> int: fh.write(src) fh.flush() path = fh.name - result = compile_vera(transform(parse_file(path)), source=src, file=path) - errors = [d for d in result.diagnostics if d.severity == "error"] - assert not errors, f"compile errors: {[d.description for d in errors]}" - value = execute(result, fn_name="f").value - assert isinstance(value, int), f"expected int, got {value!r}" - return value + try: + result = compile_vera(transform(parse_file(path)), source=src, file=path) + errors = [d for d in result.diagnostics if d.severity == "error"] + assert not errors, f"compile errors: {[d.description for d in errors]}" + value = execute(result, fn_name="f").value + assert isinstance(value, int), f"expected int, got {value!r}" + return value + finally: + # delete=False (Windows can't reopen a held temp file via parse_file); + # unlink manually so parametrized runs don't leak .vera files. + Path(path).unlink(missing_ok=True) def _errs(result: VerifyResult, code: str) -> list[Diagnostic]: @@ -470,7 +481,8 @@ def test_value_model_agrees_with_runtime( "float_to_int(nan())", "float_to_int(infinity())", "float_to_int(0.0 - infinity())", - "float_to_int(9000000000000000000.0 * 2.0)", + "float_to_int(9000000000000000000.0 * 2.0)", # > i64.MAX + "float_to_int(0.0 - 9000000000000000000.0 * 2.0)", # < i64.MIN ]) def test_domain_violations_trap_at_runtime(self, body: str) -> None: # The runtime MUST trap exactly where the verifier raises E529 — this is