Engram/tests/test_memory_proofs.py at main · rohitdev14/Engram · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
"""
Memory layer — Storage proof tests.

Tests the challenge/response protocol that proves a miner
actually holds a vector, not just its CID.

Covers:
  - Single-CID: valid proof, tampered proof, wrong embedding, wrong CID, expiry
  - Batch: all-pass, partial fail, position shuffling, expired, nonce mismatch
  - Replay protection (nonce reuse rejected)
  - ChallengeDispatcher integration (record stats, slashable threshold)
"""

from __future__ import annotations

import time
import pytest
import numpy as np

try:
    import engram_core
    _RUST = True
except ImportError:
    _RUST = False

_BATCH = _RUST and hasattr(engram_core, "generate_batch_challenge")
_PARTS = _RUST and hasattr(engram_core, "generate_response_from_parts")

pytestmark = pytest.mark.skipif(not _RUST, reason="engram_core Rust module not built")


# ── Helpers ───────────────────────────────────────────────────────────────────

def _emb(values: list[float]) -> list[float]:
    return [float(v) for v in values]

EMB_A = _emb([0.1, 0.2, 0.3, 0.4, 0.5])
EMB_B = _emb([0.9, 0.8, 0.7, 0.6, 0.5])
EMB_WRONG = _emb([9.9, 9.9, 9.9, 9.9, 9.9])


def make_cid(emb: list[float]) -> str:
    return engram_core.generate_cid(emb, {}, "v1")


# ── CID integrity ─────────────────────────────────────────────────────────────

def test_cid_deterministic() -> None:
    assert make_cid(EMB_A) == make_cid(EMB_A)


def test_different_embeddings_different_cids() -> None:
    assert make_cid(EMB_A) != make_cid(EMB_B)


def test_cid_format() -> None:
    cid = make_cid(EMB_A)
    assert cid.startswith("v1::")
    assert len(cid.split("::", 1)[1]) == 64


def test_verify_cid_passes() -> None:
    cid = make_cid(EMB_A)
    assert engram_core.verify_cid(cid, EMB_A, {}, "v1")


def test_verify_cid_wrong_embedding_fails() -> None:
    cid = make_cid(EMB_A)
    assert not engram_core.verify_cid(cid, EMB_B, {}, "v1")


def test_parse_cid() -> None:
    cid = make_cid(EMB_A)
    version, digest = engram_core.parse_cid(cid)
    assert version == "v1"
    assert len(digest) == 64


# ── Single-CID challenge/response ────────────────────────────────────────────

def test_valid_proof() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 60)
    resp = engram_core.generate_response(ch, EMB_A)
    assert engram_core.verify_response(ch, resp, EMB_A)


def test_wrong_embedding_fails() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 60)
    resp = engram_core.generate_response(ch, EMB_A)
    assert not engram_core.verify_response(ch, resp, EMB_WRONG)


def test_wrong_cid_in_response_fails() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 60)
    resp = engram_core.generate_response(ch, EMB_A)

    # Swap the CID in the challenge for a different one
    ch2 = engram_core.generate_challenge(make_cid(EMB_B), 60)
    assert not engram_core.verify_response(ch2, resp, EMB_A)


def test_challenge_fields() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 30)
    assert ch.cid == cid
    assert len(ch.nonce_hex) == 64      # 32 bytes → 64 hex chars
    assert ch.issued_at > 0
    assert ch.expires_at == ch.issued_at + 30


def test_response_fields() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 60)
    resp = engram_core.generate_response(ch, EMB_A)
    assert resp.cid == cid
    assert resp.nonce_hex == ch.nonce_hex
    assert len(resp.embedding_hash) == 64
    assert len(resp.proof) == 64


@pytest.mark.skipif(not _PARTS, reason="raw proof response API not in installed wheel (rebuild needed)")
def test_response_from_parts_matches_response() -> None:
    cid = make_cid(EMB_A)
    ch = engram_core.generate_challenge(cid, 60)
    resp = engram_core.generate_response(ch, EMB_A)
    from_parts = engram_core.generate_response_from_parts(
        ch.cid,
        ch.nonce_hex,
        ch.expires_at,
        EMB_A,
    )
    assert from_parts.cid == resp.cid
    assert from_parts.nonce_hex == resp.nonce_hex
    assert from_parts.embedding_hash == resp.embedding_hash
    assert from_parts.proof == resp.proof
    assert engram_core.verify_response(ch, from_parts, EMB_A)


def test_each_challenge_has_unique_nonce() -> None:
    cid = make_cid(EMB_A)
    ch1 = engram_core.generate_challenge(cid, 60)
    ch2 = engram_core.generate_challenge(cid, 60)
    assert ch1.nonce_hex != ch2.nonce_hex


# ── Batch challenge/response ──────────────────────────────────────────────────
# These tests require the wheel to be rebuilt after the batch API was added.
# In CI the wheel is always rebuilt; locally skip if the API isn't present yet.

@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_all_valid() -> None:
    cids = [make_cid(EMB_A), make_cid(EMB_B)]
    embs = [EMB_A, EMB_B]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, embs)
    results = engram_core.verify_batch_response(batch, resp, embs)
    assert results == [True, True]


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_one_wrong_embedding() -> None:
    cids = [make_cid(EMB_A), make_cid(EMB_B)]
    embs = [EMB_A, EMB_B]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, embs)

    # Verify with wrong embedding for the second slot
    results = engram_core.verify_batch_response(batch, resp, [EMB_A, EMB_WRONG])
    assert results == [True, False]


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_all_wrong_embeddings() -> None:
    cids = [make_cid(EMB_A), make_cid(EMB_B)]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, [EMB_A, EMB_B])
    results = engram_core.verify_batch_response(batch, resp, [EMB_WRONG, EMB_WRONG])
    assert results == [False, False]


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_proof_not_shuffleable() -> None:
    """A miner cannot swap valid proofs between CID slots."""
    cids = [make_cid(EMB_A), make_cid(EMB_B)]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, [EMB_A, EMB_B])

    # Verifying with reversed embeddings exposes the index-binding invariant.
    results = engram_core.verify_batch_response(batch, resp, [EMB_B, EMB_A])
    assert results == [False, False]


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_single_entry() -> None:
    cids = [make_cid(EMB_A)]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, [EMB_A])
    results = engram_core.verify_batch_response(batch, resp, [EMB_A])
    assert results == [True]


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_large() -> None:
    """Batch with many CIDs — all should verify correctly."""
    n = 50
    embs = [_emb([float(i) / n, 1.0 - float(i) / n, 0.0, 0.0, 0.0]) for i in range(n)]
    cids = [make_cid(e) for e in embs]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, embs)
    results = engram_core.verify_batch_response(batch, resp, embs)
    assert all(results)
    assert len(results) == n


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_fields() -> None:
    cids = [make_cid(EMB_A), make_cid(EMB_B)]
    batch = engram_core.generate_batch_challenge(cids, 30)
    assert batch.cids == cids
    assert len(batch.nonce_hex) == 64
    assert batch.expires_at == batch.issued_at + 30


@pytest.mark.skipif(not _BATCH, reason="batch proof API not in installed wheel (rebuild needed)")
def test_batch_entry_fields() -> None:
    cids = [make_cid(EMB_A)]
    batch = engram_core.generate_batch_challenge(cids, 60)
    resp = engram_core.generate_batch_response(batch, [EMB_A])
    entry = resp.entries[0]
    assert entry.cid == cids[0]
    assert len(entry.embedding_hash) == 64
    assert len(entry.proof) == 64


# ── ChallengeDispatcher (Python layer) ───────────────────────────────────────

from engram.validator.challenge import ChallengeDispatcher


@pytest.fixture
def dispatcher() -> ChallengeDispatcher:
    return ChallengeDispatcher()


def test_dispatcher_register_cid(dispatcher: ChallengeDispatcher) -> None:
    cid = make_cid(EMB_A)
    dispatcher.register_cid(cid)
    assert cid in dispatcher._known_cids_set


def test_dispatcher_pick_random_cid(dispatcher: ChallengeDispatcher) -> None:
    assert dispatcher.pick_random_cid() is None
    cid = make_cid(EMB_A)
    dispatcher.register_cid(cid)
    assert dispatcher.pick_random_cid() == cid


def test_dispatcher_build_challenge(dispatcher: ChallengeDispatcher) -> None:
    cid = make_cid(EMB_A)
    ch = dispatcher.build_challenge(cid)
    assert ch is not None
    assert ch.cid == cid


def test_dispatcher_verify_valid_response(dispatcher: ChallengeDispatcher) -> None:
    cid = make_cid(EMB_A)
    ch = dispatcher.build_challenge(cid)
    resp = engram_core.generate_response(ch, EMB_A)
    ok = dispatcher.verify_response(ch, resp.embedding_hash, resp.proof, EMB_A)
    assert ok is True


def test_dispatcher_rejects_replay(dispatcher: ChallengeDispatcher) -> None:
    """The same nonce must be rejected a second time."""
    cid = make_cid(EMB_A)
    ch = dispatcher.build_challenge(cid)
    resp = engram_core.generate_response(ch, EMB_A)

    # First use — valid
    ok1 = dispatcher.verify_response(ch, resp.embedding_hash, resp.proof, EMB_A)
    assert ok1 is True

    # Second use of same nonce — must be rejected
    ok2 = dispatcher.verify_response(ch, resp.embedding_hash, resp.proof, EMB_A)
    assert ok2 is False


def test_dispatcher_record_and_slash(dispatcher: ChallengeDispatcher) -> None:
    from engram.config import MIN_CHALLENGES_BEFORE_SLASH

    uid = "miner42"
    # Fail every challenge
    for _ in range(MIN_CHALLENGES_BEFORE_SLASH):
        dispatcher.record_result(uid, passed=False)

    assert uid in dispatcher.slashable_miners()


def test_dispatcher_passing_miner_not_slashed(dispatcher: ChallengeDispatcher) -> None:
    from engram.config import MIN_CHALLENGES_BEFORE_SLASH

    uid = "honest_miner"
    for _ in range(MIN_CHALLENGES_BEFORE_SLASH):
        dispatcher.record_result(uid, passed=True)

    assert uid not in dispatcher.slashable_miners()


def test_dispatcher_partial_failure_below_threshold(dispatcher: ChallengeDispatcher) -> None:
    from engram.config import MIN_CHALLENGES_BEFORE_SLASH, SLASH_THRESHOLD

    uid = "ok_miner"
    for i in range(MIN_CHALLENGES_BEFORE_SLASH):
        # Pass more than SLASH_THRESHOLD of the time
        dispatcher.record_result(uid, passed=(i % 3 != 0))

    record = dispatcher.get_record(uid)
    if record.success_rate >= SLASH_THRESHOLD:
        assert uid not in dispatcher.slashable_miners()