Skip to content

Commit 171b6fe

Browse files
authored
Update validator.py
1 parent abf6280 commit 171b6fe

1 file changed

Lines changed: 217 additions & 12 deletions

File tree

src/stamp/engine/validator.py

Lines changed: 217 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,224 @@
11
# File: src/stamp/engine/validator.py
22

3+
from __future__ import annotations
4+
5+
import json
6+
from typing import Dict, List, Tuple, Any
7+
8+
from jsonschema import Draft202012Validator, ValidationError
9+
10+
11+
class ValidationResult:
12+
"""
13+
Structured result object returned by Validator.validate().
14+
15+
Attributes:
16+
fatal_errors: list of fatal error strings
17+
repairable_errors: list of repairable error strings
18+
warnings: list of warning strings
19+
metadata: parsed metadata object (may be {} if invalid)
20+
body: markdown body text
21+
had_metadata: bool — whether a metadata fence was detected
22+
"""
23+
24+
def __init__(
25+
self,
26+
fatal_errors: List[str],
27+
repairable_errors: List[str],
28+
warnings: List[str],
29+
metadata: Dict[str, Any],
30+
body: str,
31+
had_metadata: bool,
32+
):
33+
self.fatal_errors = fatal_errors
34+
self.repairable_errors = repairable_errors
35+
self.warnings = warnings
36+
self.metadata = metadata
37+
self.body = body
38+
self.had_metadata = had_metadata
39+
40+
@property
41+
def status(self) -> str:
42+
"""
43+
Returns pass / repairable / fail based on spec-required classification.
44+
"""
45+
if self.fatal_errors:
46+
return "fail"
47+
if self.repairable_errors:
48+
return "repairable"
49+
return "pass"
50+
51+
352
class Validator:
453
"""
5-
Minimal Validator Implementation
6-
--------------------------------
7-
Returns empty error sets regardless of input.
8-
This allows the CLI to operate while real validation is developed.
54+
Validator
55+
---------
56+
Implements ARI-compliant error classification as defined in Stamp-Spec.md §4.
57+
58+
Responsibilities:
59+
- Accept metadata + body from Parser and errors from Loader.
60+
- Classify loader failures as fatal errors.
61+
- Validate metadata against ARI Metadata Schema (jsonschema).
62+
- Identify repairable vs fatal errors per policy.
63+
- Surface warnings (non-fatal anomalies).
964
"""
1065

11-
def __init__(self, schema: dict = None):
12-
self.schema = schema or {}
66+
def __init__(self, schema: Dict[str, Any]):
67+
"""
68+
schema: JSON schema dict loaded by engine initializer.
69+
"""
70+
self.schema = schema
71+
self.json_validator = Draft202012Validator(schema)
72+
73+
# -------------------------------------------------------------------------
74+
# PUBLIC VALIDATION INTERFACE
75+
# -------------------------------------------------------------------------
76+
77+
def validate(
78+
self,
79+
metadata: Dict,
80+
body: str,
81+
had_metadata: bool,
82+
loader_errors: List[str],
83+
) -> ValidationResult:
84+
"""
85+
Main entrypoint for validation.
86+
87+
Inputs:
88+
metadata: dict from Parser
89+
body: markdown body
90+
had_metadata: bool (Parser detected fence)
91+
loader_errors: errors returned by Loader
92+
93+
Outputs:
94+
ValidationResult
95+
"""
96+
97+
fatal: List[str] = []
98+
repairable: List[str] = []
99+
warnings: List[str] = []
100+
101+
# -------------------------------------------------------------
102+
# 1. Loader errors → always fatal
103+
# -------------------------------------------------------------
104+
if loader_errors:
105+
fatal.extend(loader_errors)
106+
107+
# -------------------------------------------------------------
108+
# 2. No metadata block present → repairable (Stamp must inject)
109+
# -------------------------------------------------------------
110+
if not had_metadata:
111+
repairable.append("Missing metadata block (no top-of-file YAML found).")
112+
return ValidationResult(fatal, repairable, warnings, metadata, body, had_metadata)
113+
114+
# -------------------------------------------------------------
115+
# 3. Empty metadata dict
116+
# -------------------------------------------------------------
117+
if metadata == {}:
118+
# Could be empty but valid YAML or malformed. Parser already handled that.
119+
repairable.append("Empty or unparseable metadata block detected.")
120+
return ValidationResult(fatal, repairable, warnings, metadata, body, had_metadata)
121+
122+
# -------------------------------------------------------------
123+
# 4. JSON Schema validation
124+
# -------------------------------------------------------------
125+
schema_errors = list(self.json_validator.iter_errors(metadata))
126+
for err in schema_errors:
127+
self._classify_schema_error(err, fatal, repairable)
128+
129+
# -------------------------------------------------------------
130+
# 5. Policy-level additional checks
131+
# -------------------------------------------------------------
132+
self._apply_policy_rules(metadata, fatal, repairable, warnings)
133+
134+
return ValidationResult(
135+
fatal=fatal,
136+
repairable=repairable,
137+
warnings=warnings,
138+
metadata=metadata,
139+
body=body,
140+
had_metadata=had_metadata,
141+
)
142+
143+
# -------------------------------------------------------------------------
144+
# INTERNAL HELPERS
145+
# -------------------------------------------------------------------------
146+
147+
def _classify_schema_error(
148+
self,
149+
err: ValidationError,
150+
fatal: List[str],
151+
repairable: List[str],
152+
) -> None:
153+
"""
154+
Classify schema-level validation problems into fatal or repairable.
155+
156+
Rules based on Stamp-Spec.md §4:
157+
- Missing required fields → fatal
158+
- Invalid enumerations → fatal
159+
- Type mismatches that can be deterministically corrected → repairable
160+
"""
161+
path = ".".join([str(p) for p in err.path]) or "<root>"
162+
163+
if err.validator == "required":
164+
fatal.append(f"Missing required field: {err.message}")
165+
elif err.validator == "enum":
166+
fatal.append(f"Invalid value for '{path}': {err.message}")
167+
elif err.validator == "type":
168+
repairable.append(f"Incorrect type for '{path}': {err.message}")
169+
else:
170+
# conservative: treat unknown schema errors as fatal
171+
fatal.append(f"Schema validation error at {path}: {err.message}")
172+
173+
# -------------------------------------------------------------------------
174+
175+
def _apply_policy_rules(
176+
self,
177+
metadata: Dict[str, Any],
178+
fatal: List[str],
179+
repairable: List[str],
180+
warnings: List[str],
181+
) -> None:
182+
"""
183+
Apply ARI Metadata Policy v3.0.1 beyond JSON Schema:
184+
185+
- Domain & Type validity are enforced by schema (fatal).
186+
- ai_assisted logic must match ai_assistance_details requirement.
187+
- 'updated' must not precede 'created'.
188+
- anchors list must not be empty for governed artifacts.
189+
- dependency list issues classified as repairable.
190+
"""
191+
192+
# ai_assisted consistency
193+
ai_assisted = metadata.get("ai_assisted")
194+
details = metadata.get("ai_assistance_details")
195+
196+
if ai_assisted in ("partial", "extensive") and not details:
197+
repairable.append("ai_assistance_details missing for AI-assisted document.")
198+
199+
if ai_assisted == "none" and details:
200+
repairable.append("ai_assistance_details should not be present when ai_assisted='none'.")
201+
202+
# date ordering
203+
created = metadata.get("created")
204+
updated = metadata.get("updated")
205+
206+
if created and updated and updated < created:
207+
repairable.append("updated date precedes created date (repairable normalization).")
208+
209+
# anchors validity
210+
anchors = metadata.get("anchors", [])
211+
if not isinstance(anchors, list):
212+
repairable.append("anchors field must be an array.")
213+
elif len(anchors) == 0:
214+
repairable.append("Missing anchors list (required by ARI Policy §8).")
215+
216+
# dependencies shape
217+
dependencies = metadata.get("dependencies", [])
218+
if not isinstance(dependencies, list):
219+
repairable.append("dependencies field must be an array.")
220+
221+
# future-dated updated timestamp → warning
222+
if updated and updated > "2030-01-01":
223+
warnings.append("Updated timestamp is suspiciously far in the future.")
13224

14-
def validate(self, metadata: dict) -> dict:
15-
return {
16-
"fatal_errors": [],
17-
"repairable_errors": [],
18-
"warnings": []
19-
}

0 commit comments

Comments
 (0)