Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions specs/065-evaluation-foundation/datasets/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Spec 065 — Evaluation datasets

## `security_corpus_v1.json` (D2)

Labeled security regression corpus the D2 detection scorer measures against
(precision / recall / F1 / FPR per detector). Conforms to
[`../contracts/security-corpus.schema.json`](../contracts/security-corpus.schema.json)
and the cross-entity invariants in [`../data-model.md`](../data-model.md)
(INV-3, INV-4). Validated by `corpus_test.go` in this directory.

**Composition (43 entries):**

| Label | Category | Count |
|-------|----------|-------|
| malicious | `tool_poisoning` | 6 |
| malicious | `prompt_injection` | 6 |
| malicious | `shadowing` | 4 |
| malicious | `rug_pull` | 4 |
| benign | `benign` (clean base rate) | 15 |
| benign | `hard_negative` (attack-resembling) | 8 |

Hard negatives are benign descriptions that *superficially resemble* an attack
(e.g. a secrets-scanner that lists `~/.ssh/id_rsa` as an example, a tool that
legitimately says "ignore case"). They exist to expose noisy detectors
(SC-004 / INV-3). Each hard-negative `id` is `hn_<attack_category>_<n>`, encoding
the attack it mimics so false positives map back to a category.

## Provenance & licensing (FR-007 / CN-005 / R-07 / R-A)

Every entry carries `provenance.{source,license}`, and the test fails the build
if any license is outside the redistributable allowlist (CN-005 / INV-4).

- **`self-authored` / `self-authored`** — the dominant source; short
tool-description strings written from scratch, modeled on public attack
taxonomies. Redistributable by construction.
- **`DVMCP` / `MIT`** — a subset adapted from the MIT-licensed
[Damn Vulnerable MCP](https://github.com/harishsg993010/damn-vulnerable-MCP-server)
project.

### External benchmarks (referenced, NOT vendored)

Per CN-005 and risk R-A, the following are **referenced externally only** and no
text from them is vendored into this repo:

- **MCPTox** and **MCP-AttackBench** — restrictive / research-only licenses.
- **`mcp-injection-experiments`** — LICENSE unconfirmed (research.md R-A); where it
inspired a pattern, the corresponding entry was rewritten from scratch and
labeled `self-authored`. The corpus test rejects any entry sourced from these.
208 changes: 208 additions & 0 deletions specs/065-evaluation-foundation/datasets/corpus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Package datasets holds the validator for the Spec 065 evaluation datasets.
//
// This test enforces the D2 security-corpus contract
// (specs/065-evaluation-foundation/contracts/security-corpus.schema.json) plus
// the cross-entity invariants from data-model.md that plain JSON Schema cannot
// express (INV-3, INV-4, SC-004): every entry carries a redistributable
// provenance, label/category are coherent, and every attack category is
// covered by at least one malicious sample and at least one attack-resembling
// benign hard negative.
package datasets

import (
"encoding/json"
"os"
"strings"
"testing"

"github.com/santhosh-tekuri/jsonschema/v6"
)

const (
corpusFile = "security_corpus_v1.json"
schemaFile = "../contracts/security-corpus.schema.json"
)

// attackCategories are the four malicious taxonomies the corpus must cover.
var attackCategories = []string{"tool_poisoning", "prompt_injection", "shadowing", "rug_pull"}

// redistributableLicenses is the allowlist enforcing CN-005 / FR-007 / INV-4:
// no entry may carry a redistribution-restricted license.
var redistributableLicenses = map[string]bool{
"self-authored": true,
"MIT": true,
"Apache-2.0": true,
"BSD-3-Clause": true,
"CC0-1.0": true,
}

// restrictedSources must never be vendored (CN-005 + R-A): MCPTox / MCP-AttackBench
// are restrictive; mcp-injection-experiments has an unconfirmed LICENSE (R-A) so we
// deliberately reference it externally only and never vendor its text.
var restrictedSources = map[string]bool{
"MCPTox": true,
"MCP-AttackBench": true,
"mcp-injection-experiments": true,
}

type provenance struct {
Source string `json:"source"`
License string `json:"license"`
}

type entry struct {
ID string `json:"id"`
Description string `json:"description"`
Label string `json:"label"`
Category string `json:"category"`
Provenance provenance `json:"provenance"`
}

type corpus struct {
Entries []entry `json:"entries"`
}

func loadCorpus(t *testing.T) corpus {
t.Helper()
raw, err := os.ReadFile(corpusFile)
if err != nil {
t.Fatalf("read %s: %v", corpusFile, err)
}
inst, err := jsonschema.UnmarshalJSON(strings.NewReader(string(raw)))
if err != nil {
t.Fatalf("parse %s: %v", corpusFile, err)
}

// Validate against the committed JSON Schema contract.
schemaRaw, err := os.ReadFile(schemaFile)
if err != nil {
t.Fatalf("read %s: %v", schemaFile, err)
}
schemaDoc, err := jsonschema.UnmarshalJSON(strings.NewReader(string(schemaRaw)))
if err != nil {
t.Fatalf("parse schema: %v", err)
}
c := jsonschema.NewCompiler()
if err := c.AddResource("security-corpus.schema.json", schemaDoc); err != nil {
t.Fatalf("add schema resource: %v", err)
}
sch, err := c.Compile("security-corpus.schema.json")
if err != nil {
t.Fatalf("compile schema: %v", err)
}
if err := sch.Validate(inst); err != nil {
t.Fatalf("corpus fails schema contract: %v", err)
}

// Re-decode into typed structs for the invariant checks.
var typed corpus
if err := json.Unmarshal(raw, &typed); err != nil {
t.Fatalf("decode corpus into structs: %v", err)
}
return typed
}

func TestCorpus_SchemaAndStructure(t *testing.T) {
c := loadCorpus(t)

if len(c.Entries) == 0 {
t.Fatal("corpus has no entries (schema minItems=1)")
}

seen := map[string]bool{}
for i, e := range c.Entries {
if e.ID == "" {
t.Errorf("entry %d: empty id", i)
}
if seen[e.ID] {
t.Errorf("entry %d: duplicate id %q", i, e.ID)
}
seen[e.ID] = true

if strings.TrimSpace(e.Description) == "" {
t.Errorf("entry %q: empty description", e.ID)
}

// INV-4 / FR-007: label + category + provenance license present and coherent.
switch e.Label {
case "malicious":
if !contains(attackCategories, e.Category) {
t.Errorf("entry %q: malicious label requires an attack category, got %q", e.ID, e.Category)
}
case "benign":
if e.Category != "benign" && e.Category != "hard_negative" {
t.Errorf("entry %q: benign label requires category benign|hard_negative, got %q", e.ID, e.Category)
}
default:
t.Errorf("entry %q: invalid label %q", e.ID, e.Label)
}

// CN-005 / INV-4: redistributable provenance only; restricted sources never vendored.
if e.Provenance.Source == "" {
t.Errorf("entry %q: empty provenance.source", e.ID)
}
if restrictedSources[e.Provenance.Source] {
t.Errorf("entry %q: source %q is restricted/unconfirmed and must not be vendored (CN-005/R-A)", e.ID, e.Provenance.Source)
}
if !redistributableLicenses[e.Provenance.License] {
t.Errorf("entry %q: license %q is not in the redistributable allowlist (CN-005/FR-007)", e.ID, e.Provenance.License)
}
}
}

func TestCorpus_AttackCoverageAndHardNegatives(t *testing.T) {
c := loadCorpus(t)

maliciousByCat := map[string]int{}
hardNegByMimic := map[string]int{}

for _, e := range c.Entries {
if e.Label == "malicious" {
maliciousByCat[e.Category]++
}
if e.Label == "benign" && e.Category == "hard_negative" {
// Convention: hard-negative ids encode the attack they mimic as
// hn_<attack_category>_<n> so INV-3 (which attack a benign FP resembles)
// is machine-readable.
mimic := mimickedAttack(e.ID)
if mimic == "" {
t.Errorf("hard_negative %q: id must be hn_<attack_category>_<n> to declare the mimicked attack", e.ID)
continue
}
hardNegByMimic[mimic]++
}
}

// INV-3 / SC-004: every attack category is covered by >=1 malicious sample
// AND >=1 attack-resembling benign hard negative.
for _, cat := range attackCategories {
if maliciousByCat[cat] == 0 {
t.Errorf("attack category %q has no malicious sample", cat)
}
if hardNegByMimic[cat] == 0 {
t.Errorf("attack category %q has no hard-negative benign (SC-004/INV-3)", cat)
}
}
}

func mimickedAttack(id string) string {
rest, ok := strings.CutPrefix(id, "hn_")
if !ok {
return ""
}
for _, cat := range attackCategories {
if strings.HasPrefix(rest, cat+"_") {
return cat
}
}
return ""
}

func contains(xs []string, x string) bool {
for _, v := range xs {
if v == x {
return true
}
}
return false
}
Loading
Loading