|
1 | 1 | # File: src/stamp/engine/validator.py |
2 | 2 |
|
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import json |
| 6 | +from typing import Dict, List, Tuple, Any |
| 7 | + |
| 8 | +from jsonschema import Draft202012Validator, ValidationError |
| 9 | + |
| 10 | + |
| 11 | +class ValidationResult: |
| 12 | + """ |
| 13 | + Structured result object returned by Validator.validate(). |
| 14 | +
|
| 15 | + Attributes: |
| 16 | + fatal_errors: list of fatal error strings |
| 17 | + repairable_errors: list of repairable error strings |
| 18 | + warnings: list of warning strings |
| 19 | + metadata: parsed metadata object (may be {} if invalid) |
| 20 | + body: markdown body text |
| 21 | + had_metadata: bool — whether a metadata fence was detected |
| 22 | + """ |
| 23 | + |
| 24 | + def __init__( |
| 25 | + self, |
| 26 | + fatal_errors: List[str], |
| 27 | + repairable_errors: List[str], |
| 28 | + warnings: List[str], |
| 29 | + metadata: Dict[str, Any], |
| 30 | + body: str, |
| 31 | + had_metadata: bool, |
| 32 | + ): |
| 33 | + self.fatal_errors = fatal_errors |
| 34 | + self.repairable_errors = repairable_errors |
| 35 | + self.warnings = warnings |
| 36 | + self.metadata = metadata |
| 37 | + self.body = body |
| 38 | + self.had_metadata = had_metadata |
| 39 | + |
| 40 | + @property |
| 41 | + def status(self) -> str: |
| 42 | + """ |
| 43 | + Returns pass / repairable / fail based on spec-required classification. |
| 44 | + """ |
| 45 | + if self.fatal_errors: |
| 46 | + return "fail" |
| 47 | + if self.repairable_errors: |
| 48 | + return "repairable" |
| 49 | + return "pass" |
| 50 | + |
| 51 | + |
3 | 52 | class Validator: |
4 | 53 | """ |
5 | | - Minimal Validator Implementation |
6 | | - -------------------------------- |
7 | | - Returns empty error sets regardless of input. |
8 | | - This allows the CLI to operate while real validation is developed. |
| 54 | + Validator |
| 55 | + --------- |
| 56 | + Implements ARI-compliant error classification as defined in Stamp-Spec.md §4. |
| 57 | +
|
| 58 | + Responsibilities: |
| 59 | + - Accept metadata + body from Parser and errors from Loader. |
| 60 | + - Classify loader failures as fatal errors. |
| 61 | + - Validate metadata against ARI Metadata Schema (jsonschema). |
| 62 | + - Identify repairable vs fatal errors per policy. |
| 63 | + - Surface warnings (non-fatal anomalies). |
9 | 64 | """ |
10 | 65 |
|
11 | | - def __init__(self, schema: dict = None): |
12 | | - self.schema = schema or {} |
| 66 | + def __init__(self, schema: Dict[str, Any]): |
| 67 | + """ |
| 68 | + schema: JSON schema dict loaded by engine initializer. |
| 69 | + """ |
| 70 | + self.schema = schema |
| 71 | + self.json_validator = Draft202012Validator(schema) |
| 72 | + |
| 73 | + # ------------------------------------------------------------------------- |
| 74 | + # PUBLIC VALIDATION INTERFACE |
| 75 | + # ------------------------------------------------------------------------- |
| 76 | + |
| 77 | + def validate( |
| 78 | + self, |
| 79 | + metadata: Dict, |
| 80 | + body: str, |
| 81 | + had_metadata: bool, |
| 82 | + loader_errors: List[str], |
| 83 | + ) -> ValidationResult: |
| 84 | + """ |
| 85 | + Main entrypoint for validation. |
| 86 | +
|
| 87 | + Inputs: |
| 88 | + metadata: dict from Parser |
| 89 | + body: markdown body |
| 90 | + had_metadata: bool (Parser detected fence) |
| 91 | + loader_errors: errors returned by Loader |
| 92 | +
|
| 93 | + Outputs: |
| 94 | + ValidationResult |
| 95 | + """ |
| 96 | + |
| 97 | + fatal: List[str] = [] |
| 98 | + repairable: List[str] = [] |
| 99 | + warnings: List[str] = [] |
| 100 | + |
| 101 | + # ------------------------------------------------------------- |
| 102 | + # 1. Loader errors → always fatal |
| 103 | + # ------------------------------------------------------------- |
| 104 | + if loader_errors: |
| 105 | + fatal.extend(loader_errors) |
| 106 | + |
| 107 | + # ------------------------------------------------------------- |
| 108 | + # 2. No metadata block present → repairable (Stamp must inject) |
| 109 | + # ------------------------------------------------------------- |
| 110 | + if not had_metadata: |
| 111 | + repairable.append("Missing metadata block (no top-of-file YAML found).") |
| 112 | + return ValidationResult(fatal, repairable, warnings, metadata, body, had_metadata) |
| 113 | + |
| 114 | + # ------------------------------------------------------------- |
| 115 | + # 3. Empty metadata dict |
| 116 | + # ------------------------------------------------------------- |
| 117 | + if metadata == {}: |
| 118 | + # Could be empty but valid YAML or malformed. Parser already handled that. |
| 119 | + repairable.append("Empty or unparseable metadata block detected.") |
| 120 | + return ValidationResult(fatal, repairable, warnings, metadata, body, had_metadata) |
| 121 | + |
| 122 | + # ------------------------------------------------------------- |
| 123 | + # 4. JSON Schema validation |
| 124 | + # ------------------------------------------------------------- |
| 125 | + schema_errors = list(self.json_validator.iter_errors(metadata)) |
| 126 | + for err in schema_errors: |
| 127 | + self._classify_schema_error(err, fatal, repairable) |
| 128 | + |
| 129 | + # ------------------------------------------------------------- |
| 130 | + # 5. Policy-level additional checks |
| 131 | + # ------------------------------------------------------------- |
| 132 | + self._apply_policy_rules(metadata, fatal, repairable, warnings) |
| 133 | + |
| 134 | + return ValidationResult( |
| 135 | + fatal=fatal, |
| 136 | + repairable=repairable, |
| 137 | + warnings=warnings, |
| 138 | + metadata=metadata, |
| 139 | + body=body, |
| 140 | + had_metadata=had_metadata, |
| 141 | + ) |
| 142 | + |
| 143 | + # ------------------------------------------------------------------------- |
| 144 | + # INTERNAL HELPERS |
| 145 | + # ------------------------------------------------------------------------- |
| 146 | + |
| 147 | + def _classify_schema_error( |
| 148 | + self, |
| 149 | + err: ValidationError, |
| 150 | + fatal: List[str], |
| 151 | + repairable: List[str], |
| 152 | + ) -> None: |
| 153 | + """ |
| 154 | + Classify schema-level validation problems into fatal or repairable. |
| 155 | +
|
| 156 | + Rules based on Stamp-Spec.md §4: |
| 157 | + - Missing required fields → fatal |
| 158 | + - Invalid enumerations → fatal |
| 159 | + - Type mismatches that can be deterministically corrected → repairable |
| 160 | + """ |
| 161 | + path = ".".join([str(p) for p in err.path]) or "<root>" |
| 162 | + |
| 163 | + if err.validator == "required": |
| 164 | + fatal.append(f"Missing required field: {err.message}") |
| 165 | + elif err.validator == "enum": |
| 166 | + fatal.append(f"Invalid value for '{path}': {err.message}") |
| 167 | + elif err.validator == "type": |
| 168 | + repairable.append(f"Incorrect type for '{path}': {err.message}") |
| 169 | + else: |
| 170 | + # conservative: treat unknown schema errors as fatal |
| 171 | + fatal.append(f"Schema validation error at {path}: {err.message}") |
| 172 | + |
| 173 | + # ------------------------------------------------------------------------- |
| 174 | + |
| 175 | + def _apply_policy_rules( |
| 176 | + self, |
| 177 | + metadata: Dict[str, Any], |
| 178 | + fatal: List[str], |
| 179 | + repairable: List[str], |
| 180 | + warnings: List[str], |
| 181 | + ) -> None: |
| 182 | + """ |
| 183 | + Apply ARI Metadata Policy v3.0.1 beyond JSON Schema: |
| 184 | +
|
| 185 | + - Domain & Type validity are enforced by schema (fatal). |
| 186 | + - ai_assisted logic must match ai_assistance_details requirement. |
| 187 | + - 'updated' must not precede 'created'. |
| 188 | + - anchors list must not be empty for governed artifacts. |
| 189 | + - dependency list issues classified as repairable. |
| 190 | + """ |
| 191 | + |
| 192 | + # ai_assisted consistency |
| 193 | + ai_assisted = metadata.get("ai_assisted") |
| 194 | + details = metadata.get("ai_assistance_details") |
| 195 | + |
| 196 | + if ai_assisted in ("partial", "extensive") and not details: |
| 197 | + repairable.append("ai_assistance_details missing for AI-assisted document.") |
| 198 | + |
| 199 | + if ai_assisted == "none" and details: |
| 200 | + repairable.append("ai_assistance_details should not be present when ai_assisted='none'.") |
| 201 | + |
| 202 | + # date ordering |
| 203 | + created = metadata.get("created") |
| 204 | + updated = metadata.get("updated") |
| 205 | + |
| 206 | + if created and updated and updated < created: |
| 207 | + repairable.append("updated date precedes created date (repairable normalization).") |
| 208 | + |
| 209 | + # anchors validity |
| 210 | + anchors = metadata.get("anchors", []) |
| 211 | + if not isinstance(anchors, list): |
| 212 | + repairable.append("anchors field must be an array.") |
| 213 | + elif len(anchors) == 0: |
| 214 | + repairable.append("Missing anchors list (required by ARI Policy §8).") |
| 215 | + |
| 216 | + # dependencies shape |
| 217 | + dependencies = metadata.get("dependencies", []) |
| 218 | + if not isinstance(dependencies, list): |
| 219 | + repairable.append("dependencies field must be an array.") |
| 220 | + |
| 221 | + # future-dated updated timestamp → warning |
| 222 | + if updated and updated > "2030-01-01": |
| 223 | + warnings.append("Updated timestamp is suspiciously far in the future.") |
13 | 224 |
|
14 | | - def validate(self, metadata: dict) -> dict: |
15 | | - return { |
16 | | - "fatal_errors": [], |
17 | | - "repairable_errors": [], |
18 | | - "warnings": [] |
19 | | - } |
|
0 commit comments