Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions internal/analyze/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,24 @@ func Run(ctx context.Context, cfg *config.Config, dir string, opts Options) erro
}

// GetGraph returns the display graph for dir, running analysis if the cache
// is cold or force is true. It returns the graph and the zip hash used as the
// cache key (useful for downstream commands).
// is cold or force is true. It returns the graph and the cache key.
//
// Uses git-based fingerprinting (~1ms for clean repos) to check the cache
// before creating a zip. Only creates and uploads the zip on cache miss.
func GetGraph(ctx context.Context, cfg *config.Config, dir string, force bool) (*api.Graph, string, error) {
// Fast path: check cache using git fingerprint before creating zip.
if !force {
fingerprint, err := cache.RepoFingerprint(dir)
if err == nil {
key := cache.AnalysisKey(fingerprint, "graph")
if g, _ := cache.Get(key); g != nil {
ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
return g, key, nil
}
}
}
Comment on lines +37 to +47
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Return one canonical key for the same repo state.

Simple repro: on a cold cache, the first call returns hash at Line 92; the second call on the unchanged repo returns fpKey from Lines 41-44. That breaks the current GetGraph contract in internal/analyze/integration_test.go:64-65, which expects both calls to yield the same key. It also means a legacy zip-hash hit at Lines 65-67 never gets upgraded to the fast path because the fingerprint entry is never backfilled.

💡 Suggested shape
 func GetGraph(ctx context.Context, cfg *config.Config, dir string, force bool) (*api.Graph, string, error) {
-	// Fast path: check cache using git fingerprint before creating zip.
-	if !force {
-		fingerprint, err := cache.RepoFingerprint(dir)
-		if err == nil {
-			key := cache.AnalysisKey(fingerprint, "graph")
-			if g, _ := cache.Get(key); g != nil {
-				ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
-				return g, key, nil
-			}
-		}
-	}
+	var fpKey string
+	if fingerprint, err := cache.RepoFingerprint(dir); err == nil {
+		fpKey = cache.AnalysisKey(fingerprint, "graph")
+		if !force {
+			if g, _ := cache.Get(fpKey); g != nil {
+				ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
+				return g, fpKey, nil
+			}
+		}
+	}
@@
 	if !force {
 		if g, _ := cache.Get(hash); g != nil {
+			if fpKey != "" {
+				if err := cache.Put(fpKey, g); err != nil {
+					ui.Warn("could not write cache: %v", err)
+				}
+				ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
+				return g, fpKey, nil
+			}
 			ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
 			return g, hash, nil
 		}
 	}
@@
-	return g, hash, nil
+	if fpKey != "" {
+		return g, fpKey, nil
+	}
+	return g, hash, nil
 }

Also applies to: 63-68, 91-92

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/analyze/handler.go` around lines 37 - 47, The code returns different
cache keys for the same repo state because the legacy zip-hash path and the
fingerprint fast-path produce different keys; update GetGraph so it always
returns one canonical key based on RepoFingerprint when available: when
RepoFingerprint(dir) succeeds compute fpKey := cache.AnalysisKey(fingerprint,
"graph") and use/return that key; if you hit a graph under the legacy zip/hash
key, backfill the fingerprint entry by storing the same graph under fpKey (e.g.
cache.Set(fpKey, graph) or the equivalent cache API) before returning so
subsequent calls return the canonical fpKey. Ensure references: RepoFingerprint,
AnalysisKey, GetGraph, and the cache get/set calls are updated accordingly.


// Cache miss: create zip and upload.
spin := ui.Start("Creating repository archive…")
zipPath, err := createZip(dir)
spin.Stop()
Expand All @@ -45,6 +60,7 @@ func GetGraph(ctx context.Context, cfg *config.Config, dir string, force bool) (
return nil, "", err
}

// Also check by zip hash (covers non-git repos and fingerprint edge cases).
if !force {
if g, _ := cache.Get(hash); g != nil {
ui.Success("Using cached analysis (repoId: %s)", g.RepoID())
Expand All @@ -60,6 +76,14 @@ func GetGraph(ctx context.Context, cfg *config.Config, dir string, force bool) (
return nil, hash, err
}

// Cache under both keys: fingerprint (fast lookup) and zip hash (fallback).
fingerprint, fpErr := cache.RepoFingerprint(dir)
if fpErr == nil {
fpKey := cache.AnalysisKey(fingerprint, "graph")
if err := cache.Put(fpKey, g); err != nil {
ui.Warn("could not write cache: %v", err)
}
}
if err := cache.Put(hash, g); err != nil {
ui.Warn("could not write cache: %v", err)
}
Expand Down
55 changes: 55 additions & 0 deletions internal/cache/fingerprint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package cache

import (
"crypto/sha256"
"encoding/hex"
"fmt"
"os/exec"
"strings"
)

// RepoFingerprint returns a fast, content-based cache key for the repo at dir.
//
// For clean git repos (~1ms): returns the commit SHA.
// For dirty git repos (~100ms): returns commitSHA:dirtyHash.
// For non-git dirs: returns empty string and an error.
func RepoFingerprint(dir string) (string, error) {
commitSHA, err := gitOutput(dir, "rev-parse", "HEAD")
if err != nil {
return "", fmt.Errorf("not a git repo: %w", err)
}

dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=no")
if err != nil {
return commitSHA, nil
}
Comment on lines +23 to +25
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don’t treat git status failure as a clean fingerprint.

At Line 24, returning commitSHA on status failure can create false cache hits for unknown repo state. Prefer returning an error (or at least a dirty sentinel) so caller falls back to zip-hash safely.

✅ Minimal correctness fix
 	dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=all")
 	if err != nil {
-		return commitSHA, nil
+		return "", fmt.Errorf("git status failed: %w", err)
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if err != nil {
return commitSHA, nil
}
if err != nil {
return "", fmt.Errorf("git status failed: %w", err)
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/cache/fingerprint.go` around lines 23 - 25, The current code returns
commitSHA when running "git status" fails, causing false clean fingerprints;
instead propagate the error so callers can fallback to zip-hash. In the function
in internal/cache/fingerprint.go where you check err after running git status
(the branch that currently does "if err != nil { return commitSHA, nil }"),
replace that return with returning an empty/zero fingerprint and the wrapped
error (e.g., return "", fmt.Errorf("git status failed: %w", err)) so callers
receive the failure; update any callers of this function to handle the returned
error path accordingly.


if dirty == "" {
return commitSHA, nil
}

// Dirty: hash the diff to capture uncommitted changes.
diff, err := gitOutput(dir, "diff", "HEAD")
if err != nil {
return commitSHA + ":dirty", nil
}
h := sha256.Sum256([]byte(diff))
return commitSHA + ":" + hex.EncodeToString(h[:8]), nil
Comment on lines +22 to +37
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

tmp="$(mktemp -d)"
cd "$tmp"

git init -q
git config user.email "test@test.com"
git config user.name "test"

echo 'package main' > main.go
git add main.go
git commit -q -m "init"

echo '--- baseline ---'
git status --porcelain --untracked-files=no | sed -n '1,5p'
git diff HEAD | wc -c

echo 'package main // new file' > new.go

echo '--- after adding untracked file ---'
echo '[status with --untracked-files=no]'
git status --porcelain --untracked-files=no | sed -n '1,5p'
echo '[status with --untracked-files=all]'
git status --porcelain --untracked-files=all | sed -n '1,5p'
echo '[git diff HEAD bytes]'
git diff HEAD | wc -c

Repository: supermodeltools/cli

Length of output: 301


Untracked files bypass cache invalidation and cause stale results to be returned.

At lines 22 and 31–37, the code uses --untracked-files=no with git status and git diff HEAD to detect dirty repos. The test confirms this misses untracked files entirely: when you add a new untracked file like new.go, git status --porcelain --untracked-files=no returns empty—the file is invisible. Since git diff HEAD also ignores untracked files, the fingerprint stays as the plain commit SHA instead of switching to the :dirty variant. This means the cache will keep serving old analysis results even though the codebase has actually changed.

Concrete scenario: you commit code, run analysis (cached as commit SHA), then add a new untracked file, run analysis again—and get the stale cached result instead of re-analyzing.

The fix is straightforward: switch to --untracked-files=all and check for the ?? prefix (which marks untracked files), then return an error to skip caching.

Suggested fix
-	dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=no")
+	dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=all")
 	if err != nil {
 		return commitSHA, nil
 	}

 	if dirty == "" {
 		return commitSHA, nil
 	}
+
+	// Conservative: if untracked files exist, skip fingerprint caching to avoid stale hits.
+	if strings.Contains(dirty, "?? ") {
+		return "", fmt.Errorf("untracked files present")
+	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=no")
if err != nil {
return commitSHA, nil
}
if dirty == "" {
return commitSHA, nil
}
// Dirty: hash the diff to capture uncommitted changes.
diff, err := gitOutput(dir, "diff", "HEAD")
if err != nil {
return commitSHA + ":dirty", nil
}
h := sha256.Sum256([]byte(diff))
return commitSHA + ":" + hex.EncodeToString(h[:8]), nil
dirty, err := gitOutput(dir, "status", "--porcelain", "--untracked-files=all")
if err != nil {
return commitSHA, nil
}
if dirty == "" {
return commitSHA, nil
}
// Conservative: if untracked files exist, skip fingerprint caching to avoid stale hits.
if strings.Contains(dirty, "?? ") {
return "", fmt.Errorf("untracked files present")
}
// Dirty: hash the diff to capture uncommitted changes.
diff, err := gitOutput(dir, "diff", "HEAD")
if err != nil {
return commitSHA + ":dirty", nil
}
h := sha256.Sum256([]byte(diff))
return commitSHA + ":" + hex.EncodeToString(h[:8]), nil
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/cache/fingerprint.go` around lines 22 - 37, The current fingerprint
logic calls gitOutput(dir, "status", "--porcelain", "--untracked-files=no") and
therefore ignores untracked files; change the git status call to use
"--untracked-files=all" (i.e. gitOutput(dir, "status", "--porcelain",
"--untracked-files=all")), then inspect the status output for lines beginning
with "??" (untracked files) and, if any are present, return a non-cacheable
result (e.g. return an error to skip caching or return commitSHA + ":dirty" per
project convention) instead of treating the repo as clean; keep the existing
diff hashing path (gitOutput(dir, "diff", "HEAD") and sha256 logic) for truly
modified files.

}

// gitOutput runs a git command in dir and returns its trimmed stdout.
func gitOutput(dir string, args ...string) (string, error) {
cmd := exec.Command("git", append([]string{"-C", dir}, args...)...)
out, err := cmd.Output()
if err != nil {
return "", err
}
return strings.TrimSpace(string(out)), nil
}

// AnalysisKey builds a cache key for a specific analysis type on a repo state.
func AnalysisKey(fingerprint, analysisType string) string {
h := sha256.New()
fmt.Fprintf(h, "%s\x00%s", fingerprint, analysisType)
return hex.EncodeToString(h.Sum(nil))
}
114 changes: 114 additions & 0 deletions internal/cache/fingerprint_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package cache

import (
"os"
"os/exec"
"path/filepath"
"testing"
)

func initGitRepo(t *testing.T) string {
t.Helper()
dir := t.TempDir()
run(t, dir, "git", "init")
run(t, dir, "git", "config", "user.email", "test@test.com")
run(t, dir, "git", "config", "user.name", "test")
if err := os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main\n"), 0o600); err != nil {
t.Fatal(err)
}
run(t, dir, "git", "add", ".")
run(t, dir, "git", "commit", "-m", "init")
return dir
}

func run(t *testing.T, dir, name string, args ...string) {
t.Helper()
cmd := exec.Command(name, args...)
cmd.Dir = dir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
t.Fatalf("%s %v: %v", name, args, err)
}
}

func TestRepoFingerprint_CleanRepo(t *testing.T) {
dir := initGitRepo(t)
fp, err := RepoFingerprint(dir)
if err != nil {
t.Fatal(err)
}
if fp == "" {
t.Fatal("expected non-empty fingerprint")
}
// Should be a plain commit SHA (40 hex chars).
if len(fp) != 40 {
t.Errorf("expected 40-char commit SHA, got %q (%d chars)", fp, len(fp))
}
}

func TestRepoFingerprint_DirtyRepo(t *testing.T) {
dir := initGitRepo(t)
// Modify a tracked file.
if err := os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main\n// dirty\n"), 0o600); err != nil {
t.Fatal(err)
}
fp, err := RepoFingerprint(dir)
if err != nil {
t.Fatal(err)
}
// Dirty fingerprint should contain a colon separator.
if len(fp) <= 40 {
t.Errorf("expected dirty fingerprint (>40 chars), got %q", fp)
}
}

func TestRepoFingerprint_StableForClean(t *testing.T) {
dir := initGitRepo(t)
fp1, _ := RepoFingerprint(dir)
fp2, _ := RepoFingerprint(dir)
Comment on lines +68 to +69
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

These tests should fail fast on fingerprint errors.

At Line 68, Line 69, Line 77, and Line 85, ignored errors can mask regressions (e.g., empty fingerprints compared as equal). Please assert err == nil in each call.

🔧 Suggested test hardening
-	fp1, _ := RepoFingerprint(dir)
-	fp2, _ := RepoFingerprint(dir)
+	fp1, err := RepoFingerprint(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fp2, err := RepoFingerprint(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if fp1 != fp2 {
 		t.Errorf("fingerprint should be stable: %q != %q", fp1, fp2)
 	}

Also applies to: 77-77, 85-85

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/cache/fingerprint_test.go` around lines 68 - 69, The tests call
RepoFingerprint twice (and at other spots) while discarding the returned error,
which can hide failures; update each call to RepoFingerprint in
fingerprint_test.go (including the occurrences currently at the locations
referenced) to assert that err == nil immediately after the call (e.g., using
the test framework's require/Assert/if err != nil t.Fatalf), so failures fail
fast and do not allow empty/invalid fingerprints to be compared as equal.

if fp1 != fp2 {
t.Errorf("fingerprint should be stable: %q != %q", fp1, fp2)
}
}

func TestRepoFingerprint_ChangesAfterCommit(t *testing.T) {
dir := initGitRepo(t)
fp1, _ := RepoFingerprint(dir)

if err := os.WriteFile(filepath.Join(dir, "new.go"), []byte("package main\n"), 0o600); err != nil {
t.Fatal(err)
}
run(t, dir, "git", "add", ".")
run(t, dir, "git", "commit", "-m", "second")

fp2, _ := RepoFingerprint(dir)
if fp1 == fp2 {
t.Error("fingerprint should change after commit")
}
}

func TestRepoFingerprint_NotGitRepo(t *testing.T) {
dir := t.TempDir()
_, err := RepoFingerprint(dir)
if err == nil {
t.Error("expected error for non-git dir")
}
}

func TestAnalysisKey_DifferentTypes(t *testing.T) {
fp := "abc123"
k1 := AnalysisKey(fp, "graph")
k2 := AnalysisKey(fp, "dead-code")
if k1 == k2 {
t.Error("different analysis types should produce different keys")
}
}

func TestAnalysisKey_Stable(t *testing.T) {
k1 := AnalysisKey("abc", "graph")
k2 := AnalysisKey("abc", "graph")
if k1 != k2 {
t.Error("same inputs should produce same key")
}
}
Loading