Project-Navi · Navi Bot (project-navi-bot) · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
@@ -254,21 +254,21 @@
         "filename": "tests/test_grippy_review.py",
         "hashed_secret": "3e536cc49ad17f2f50dc6f0aab08495bddbb2833",
         "is_verified": false,
-        "line_number": 907
+        "line_number": 908
       },
       {
         "type": "Secret Keyword",
         "filename": "tests/test_grippy_review.py",
         "hashed_secret": "80c3eb3a746f82974a9696275d8b52a37fba449b",
         "is_verified": false,
-        "line_number": 939
+        "line_number": 940
       },
       {
         "type": "AWS Access Key",
         "filename": "tests/test_grippy_review.py",
         "hashed_secret": "1d5bc0e7232d07b336f4d35db4c0200142962f4a",
         "is_verified": false,
-        "line_number": 1464
+        "line_number": 1465
       }
     ],
     "tests/test_grippy_rule_secrets.py": [
@@ -295,5 +295,5 @@
       }
     ]
   },
-  "generated_at": "2026-04-03T17:34:56Z"
+  "generated_at": "2026-04-04T18:46:04Z"
 }
@@ -17,3 +17,43 @@ GitHub's REST API returns `406 Not Acceptable` when a PR diff exceeds their size
 **Fix:** Handle 406 in `fetch_pr_diff()` by falling back to per-file diffs via `GET /repos/{owner}/{repo}/pulls/{pr}/files` and fetching individual file patches, or truncating gracefully with a warning.
 
 **Workaround:** Split large fixture additions into separate PRs.
+
+### DEBT-002: No migration path for serialized Finding objects missing `finding_type`
+
+**Filed:** 2026-04-04
+**Module:** `schema.py` — `Finding`, `FindingType`
+**Severity:** Low
+
+The `finding_type` field was added to `Finding` as required (no default) to avoid OpenAI structured output `$ref` + `default` keyword conflicts. Historical serialized `Finding` records (if any exist in databases or artifacts) will fail Pydantic validation on deserialization because they lack this field.
+
+**Impact:** Low — no known production consumers persist `Finding` objects long-term. CI reviews are ephemeral.
+
+**Fix:** If persistent Finding storage is added, include a migration to inject `finding_type: "issue"` into legacy records.
+
+### DEBT-003: Cached LLM agents may not emit `finding_type` field
+
+**Filed:** 2026-04-04
+**Module:** `prompts_data/output-schema.md` — `finding_type` field
+**Severity:** Low
+
+Older orchestrators or LLM agents running on cached prompts without the `finding_type` field will produce `Finding` objects that fail Pydantic validation. The field is required, so missing it causes a parse error.
+
+**Impact:** Low — grippy CI always runs from the current commit's prompts. Only affects external consumers using stale schema docs.
+
+**Fix:** No action needed unless grippy is deployed as a service with cached prompt versions.
+
+### DEBT-004: Formalize appreciative inquiry in review output
+
+**Filed:** 2026-04-04
+**Module:** `prompts_data/system-core.md`, `prompts_data/scoring-rubric.md`, `schema.py`
+**Severity:** Enhancement
+
+Grippy naturally produces positive observations about good code alongside problems — the `finding_type: "note"` field now supports this structurally. Formalizing this as appreciative inquiry (deliberately surfacing what's working well, not just what's broken) would improve the review experience for humans. Research shows balanced feedback increases engagement with review findings and reduces defensive responses to criticism.
+
+**Shape:**
+- Prompt guidance: instruct grippy to include 1-2 notes per review highlighting strong patterns, good test coverage, or security improvements — not just problems
+- Scoring rubric: notes don't deduct (already implemented) but could contribute to a "strengths" section in the summary
+- Summary format: pair findings with acknowledged strengths ("3 issues found, 2 strong patterns noted")
+- Personality: grippy's grudging-respect tone is a natural fit for appreciative inquiry — "I hate to admit it, but this auth flow is actually solid"
+
+**Why:** Code review tools that only report problems train humans to dread their output. Balanced feedback produces better outcomes than pure criticism.
@@ -22,6 +22,7 @@
 from grippy.schema import (
     Finding,
     FindingCategory,
+    FindingType,
     GrippyReview,
     Score,
     ScoreBreakdown,
@@ -207,11 +208,18 @@ def _is_nogrip_suppressed(finding: Finding, nogrip_index: _NoGripIndex) -> bool:
 
 
 def _recompute_score(findings: list[Finding]) -> Score:
-    """Recompute score from the scoring set using the deduction rubric."""
+    """Recompute score from the scoring set using the deduction rubric.
+
+    Only ``issue`` findings deduct points. ``note`` findings (positive
+    observations) are included in the scoring set for display but do
+    not affect the score or severity counts.
+    """
     category_deductions: dict[FindingCategory, int] = {}
     severity_counts = dict.fromkeys(Severity, 0)
 
     for f in findings:
+        if f.finding_type == FindingType.NOTE and not f.rule_id:
+            continue
         category_deductions[f.category] = category_deductions.get(f.category, 0) + _DEDUCTION.get(
             f.severity, 0
         )
@@ -257,11 +265,18 @@ def _recompute_score(findings: list[Finding]) -> Score:
 
 
 def _derive_verdict(score: int, findings: list[Finding], mode: str) -> Verdict:
-    """Derive verdict from recomputed score and mode (call-site authority)."""
+    """Derive verdict from recomputed score and mode (call-site authority).
+
+    Only ``issue`` findings contribute to severity gates. ``note`` findings
+    (positive observations) do not trigger FAIL regardless of their severity label.
+    Rule-backed findings (``rule_id`` set) are always treated as issues even if
+    the LLM labels them as notes — deterministic rules cannot be downgraded.
+    """
     threshold = _THRESHOLD.get(mode, 70)
 
-    has_critical = any(f.severity == Severity.CRITICAL for f in findings)
-    high_count = sum(1 for f in findings if f.severity == Severity.HIGH)
+    issues = [f for f in findings if f.finding_type != FindingType.NOTE or f.rule_id]
+    has_critical = any(f.severity == Severity.CRITICAL for f in issues)
+    high_count = sum(1 for f in issues if f.severity == Severity.HIGH)
 
     if has_critical:
         status, blocking = VerdictStatus.FAIL, True

@@ -36,6 +36,7 @@ You MUST produce a single JSON object conforming to this schema. No markdown wra
   "findings": [
     {
       "id": "F-001",
+      "finding_type": "issue | note",
       "severity": "CRITICAL | HIGH | MEDIUM | LOW",
       "confidence": 0,
       "category": "security | logic | governance | reliability | observability",
@@ -109,8 +110,12 @@ You MUST produce a single JSON object conforming to this schema. No markdown wra
 ## Schema Rules
 
 1. **findings** array may be empty (all-clear state)
-2. **escalations** array may be empty
-3. **confidence** is per-finding, 0-100. The filter pipeline uses this.
+2. **finding_type** distinguishes actionable problems from positive observations:
+   - `"issue"` — a problem that needs fixing. Deducts from score. Use this for bugs, vulnerabilities, missing error handling, etc.
+   - `"note"` — a positive observation or acknowledgment of good practice. Does NOT deduct from score. Use this when you want to highlight something the author did well, or acknowledge a security improvement without penalizing the PR.
+   - When in doubt, use `"issue"`. Do NOT use `"note"` to soften a real problem.
+3. **escalations** array may be empty
+4. **confidence** is per-finding, 0-100. The filter pipeline uses this.
 4. **grippy_note** is the personality-injected comment. Keep it under 280 characters.
 5. **evidence** should quote or reference specific code — never fabricate evidence.
 6. **line_start** and **line_end** must reference lines in the DIFF, not the full file, for inline comment placement.

@@ -25,6 +25,13 @@ class ComplexityTier(StrEnum):
     CRITICAL = "CRITICAL"
 
 
+class FindingType(StrEnum):
+    """Whether a finding reports a problem or a positive observation."""
+
+    ISSUE = "issue"  # actionable problem — deducts from score
+    NOTE = "note"  # positive observation / praise — does NOT deduct
+
+
 class FindingCategory(StrEnum):
     SECURITY = "security"
     LOGIC = "logic"
@@ -95,6 +102,7 @@ class Finding(BaseModel):
     model_config = {"frozen": True}
 
     id: str = Field(description="F-001 through F-999")
+    finding_type: FindingType
     severity: Severity
     confidence: int = Field(ge=0, le=100)
     category: FindingCategory

@@ -23,6 +23,7 @@ def _make_finding(
 ) -> Finding:
     return Finding(
         id="F-001",
+        finding_type="issue",
         severity=severity,
         confidence=90,
         category=category,
@@ -1562,6 +1563,7 @@ def test_script_tag_stripped_from_inline_comment(self) -> None:
         # Rebuild with malicious description — Finding is frozen, so create fresh
         finding = Finding(
             id="F-001",
+            finding_type="issue",
             severity="HIGH",
             confidence=90,
             category="security",
@@ -1587,6 +1589,7 @@ def test_iframe_tag_stripped(self) -> None:
 
         finding = Finding(
             id="F-001",
+            finding_type="issue",
             severity="HIGH",
             confidence=90,
             category="security",
@@ -1607,6 +1610,7 @@ def test_event_handler_stripped(self) -> None:
 
         finding = Finding(
             id="F-001",
+            finding_type="issue",
             severity="HIGH",
             confidence=90,
             category="security",
@@ -1627,6 +1631,7 @@ def test_javascript_scheme_stripped(self) -> None:
 
         finding = Finding(
             id="F-001",
+            finding_type="issue",
             severity="HIGH",
             confidence=90,
             category="security",
@@ -1648,6 +1653,7 @@ def test_sanitization_applied_in_summary_off_diff(self) -> None:
 
         finding = Finding(
             id="F-001",
+            finding_type="issue",
             severity="HIGH",
             confidence=90,
             category="security",

@@ -50,6 +50,7 @@ def _make_review(**overrides: Any) -> GrippyReview:
         "findings": [
             Finding(
                 id="F-001",
+                finding_type="issue",
                 severity=Severity.HIGH,
                 confidence=85,
                 category=FindingCategory.SECURITY,

@@ -70,6 +70,7 @@ def _make_review(**overrides: Any) -> GrippyReview:
         "findings": [
             Finding(
                 id="F-001",
+                finding_type="issue",
                 severity=Severity.LOW,
                 confidence=80,
                 category=FindingCategory.LOGIC,

@@ -36,6 +36,7 @@ def _f(**kw: object) -> Finding:
     """Build a Finding with sensible defaults, overridable by keyword."""
     defaults: dict = {
         "id": "F-001",
+        "finding_type": "issue",
         "severity": "HIGH",
         "confidence": 85,
         "category": "security",
@@ -678,3 +679,87 @@ def test_nogrip_prevents_fixture_false_positive_from_blocking(self) -> None:
         assert result.verdict.status == VerdictStatus.PASS
         assert result.verdict.merge_blocking is False
         assert result.meta.nogrip_suppressed_count == 2
+
+
+# ---------------------------------------------------------------------------
+# finding_type: note vs issue
+# ---------------------------------------------------------------------------
+
+
+class TestFindingTypeNote:
+    """Notes (positive observations) must not deduct from score or trigger FAIL."""
+
+    def test_note_findings_do_not_deduct_from_score(self) -> None:
+        """4 HIGH notes → score stays 100, not 100 - 4*15 = 40."""
+        findings = [_f(id=f"F-{i:03d}", finding_type="note") for i in range(1, 5)]
+        score = _recompute_score(findings)
+        assert score.overall == 100
+        assert score.deductions.high_count == 0
+        assert score.deductions.total_deduction == 0
+
+    def test_note_findings_do_not_trigger_high_count_fail(self) -> None:
+        """2+ HIGH notes should not auto-FAIL the verdict."""
+        findings = [
+            _f(id="F-001", finding_type="note"),
+            _f(id="F-002", finding_type="note"),
+            _f(id="F-003", finding_type="note"),
+        ]
+        verdict = _derive_verdict(100, findings, "pr_review")
+        assert verdict.status == VerdictStatus.PASS
+        assert verdict.merge_blocking is False
+
+    def test_mixed_issues_and_notes_only_issues_deduct(self) -> None:
+        """1 HIGH issue + 3 HIGH notes → score = 85 (only 1 deduction)."""
+        findings = [
+            _f(id="F-001", finding_type="issue"),
+            _f(id="F-002", finding_type="note"),
+            _f(id="F-003", finding_type="note"),
+            _f(id="F-004", finding_type="note"),
+        ]
+        score = _recompute_score(findings)
+        assert score.overall == 85  # 100 - 15 (one HIGH issue)
+        assert score.deductions.high_count == 1
+
+    def test_mixed_verdict_only_counts_issues(self) -> None:
+        """1 HIGH issue + 3 HIGH notes → PROVISIONAL (1 HIGH), not FAIL (4 HIGH)."""
+        findings = [
+            _f(id="F-001", finding_type="issue"),
+            _f(id="F-002", finding_type="note"),
+            _f(id="F-003", finding_type="note"),
+            _f(id="F-004", finding_type="note"),
+        ]
+        verdict = _derive_verdict(85, findings, "pr_review")
+        assert verdict.status == VerdictStatus.PROVISIONAL
+
+    def test_finding_type_defaults_to_issue(self) -> None:
+        """Omitting finding_type defaults to issue (backward compatible)."""
+        finding = _f()
+        assert finding.finding_type == "issue"
+
+    def test_note_in_filter_review_does_not_block(self) -> None:
+        """Full pipeline: 4 HIGH notes → PASS, not FAIL."""
+        findings = [_f(id=f"F-{i:03d}", finding_type="note") for i in range(1, 5)]
+        review = _review(findings)
+        result = filter_review(review, mode="pr_review")
+        assert result.score.overall == 100
+        assert result.verdict.status == VerdictStatus.PASS
+        assert result.verdict.merge_blocking is False
+
+    def test_rule_backed_note_still_deducts(self) -> None:
+        """A note with rule_id is coerced to issue — rules can't be downgraded."""
+        findings = [
+            _f(id="F-001", finding_type="note", rule_id="secrets-in-diff"),
+            _f(id="F-002", finding_type="note", rule_id="eval-exec"),
+        ]
+        score = _recompute_score(findings)
+        assert score.overall == 70  # 100 - 2*15 (two HIGH issues)
+        assert score.deductions.high_count == 2
+
+    def test_rule_backed_note_triggers_verdict_gate(self) -> None:
+        """2+ HIGH rule-backed notes still trigger auto-FAIL."""
+        findings = [
+            _f(id="F-001", finding_type="note", rule_id="secrets-in-diff"),
+            _f(id="F-002", finding_type="note", rule_id="eval-exec"),
+        ]
+        verdict = _derive_verdict(70, findings, "pr_review")
+        assert verdict.status == VerdictStatus.FAIL
@@ -381,6 +381,7 @@ def _review_with_findings(self, rule_ids: list[str | None]) -> GrippyReview:
         data["findings"] = [
             {
                 "id": f"F-{i:03d}",
+                "finding_type": "issue",
                 "severity": "HIGH",
                 "confidence": 90,
                 "category": "security",
@@ -437,6 +438,7 @@ def _review_with_file_findings(self, rule_files: list[tuple[str, str]]) -> Gripp
         data["findings"] = [
             {
                 "id": f"F-{i:03d}",
+                "finding_type": "issue",
                 "severity": "HIGH",
                 "confidence": 90,
                 "category": "security",
@@ -515,6 +517,7 @@ def _review_dict_with_rule_ids(self, rule_ids: list[str | None]) -> dict:
         data["findings"] = [
             {
                 "id": f"F-{i:03d}",
+                "finding_type": "issue",
                 "severity": "HIGH",
                 "confidence": 90,
                 "category": "security",
@@ -630,6 +633,7 @@ def _review_dict_with_file_findings(
         data["findings"] = [
             {
                 "id": f"F-{i:03d}",
+                "finding_type": "issue",
                 "severity": "HIGH",
                 "confidence": 90,
                 "category": "security",

@@ -49,6 +49,7 @@
 def _make_finding(**overrides: Any) -> Finding:
     defaults: dict[str, Any] = {
         "id": "F-001",
+        "finding_type": "issue",
         "severity": Severity.HIGH,
         "confidence": 85,
         "category": FindingCategory.SECURITY,

@@ -27,6 +27,7 @@ def _minimal_finding(**overrides: object) -> dict:
     """Return a minimal valid Finding dict, with overrides applied."""
     base: dict = {
         "id": "F-001",
+        "finding_type": "issue",
         "severity": "CRITICAL",
         "confidence": 85,
         "category": "security",

@@ -67,6 +67,7 @@ def _make_finding(**overrides: Any) -> Finding:
     """Build a Finding with sane defaults, overridable per-field."""
     defaults: dict[str, Any] = {
         "id": "F-099",
+        "finding_type": "issue",
         "severity": Severity.MEDIUM,
         "confidence": 75,
         "category": FindingCategory.SECURITY,