diff --git a/CLAUDE.md b/CLAUDE.md index 0a8b4fb..c3f8a3f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -142,6 +142,28 @@ system handles MCP registration, hooks, and skills declaratively via: └── testdata/ # Fixtures for E2E tests ``` +## Output & Logging + +Lumen has two execution contexts with distinct output strategies: + +**Interactive** (`lumen index`, `lumen purge`, `lumen search`): +- Progress and status → `tui.Progress` (pterm) on **stderr** +- Completion summaries → `fmt.Printf` on **stdout** +- Errors → `fmt.Fprintf(os.Stderr, ...)` + +**Background / MCP** (`lumen stdio` MCP server, background indexer spawned by +SessionStart hook): +- All output → `slog` (JSON) → `~/.local/share/lumen/debug.log` +- Use `newDebugLogger()` from `cmd/log.go` — opens the log file; falls back to + stderr only if the file cannot be created +- stderr of the background indexer process is set to `nil` (discarded) so that + pterm output never pollutes the log file + +**Rule**: never mix these. Interactive commands use tui/fmt; background/MCP code +uses slog. If a command can run in both modes (e.g. `lumen index`), add slog for +the background path and keep tui/fmt for the interactive path — they coexist +because slog writes to the log file while tui writes to the process stderr. + ## Key Design Decisions - **Merkle tree for diffs**: Avoid re-indexing unchanged code diff --git a/cmd/hook.go b/cmd/hook.go index 827a8bc..959d301 100644 --- a/cmd/hook.go +++ b/cmd/hook.go @@ -20,6 +20,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/spf13/cobra" @@ -27,6 +28,11 @@ import ( "github.com/ory/lumen/internal/store" ) +// backgroundIndexStaleness is how old last_indexed_at must be before +// SessionStart spawns a background indexer. This prevents every new terminal +// from triggering a full merkle walk when the index was just updated. +const backgroundIndexStaleness = 5 * time.Minute + // NOTE: Hooks are now declared in hooks/hooks.json (plugin system). // The hook subcommands remain as the execution targets for those declarations. @@ -79,10 +85,6 @@ func runHookSessionStart(_ *cobra.Command, args []string) error { cwd, _ = os.Getwd() } - // Kick off a background incremental re-index so the index is fresh - // by the time the first semantic_search arrives. - spawnBackgroundIndexer(cwd) - content := generateSessionContext(mcpName, cwd) out := hookOutput{ @@ -135,6 +137,15 @@ func generateSessionContextInternal(mcpName, cwd string, findDonor func(string, } defer func() { _ = s.Close() }() + // Spawn background indexer if the index is stale or has never been + // successfully completed. This avoids spawning on every session start + // when the index was recently updated. + if val, metaErr := s.GetMeta("last_indexed_at"); metaErr != nil || val == "" { + bgIndexer(cwd) + } else if t, parseErr := time.Parse(time.RFC3339, val); parseErr != nil || time.Since(t) > backgroundIndexStaleness { + bgIndexer(cwd) + } + stats, err := s.Stats() if err != nil { return directive diff --git a/cmd/hook_spawn_unix.go b/cmd/hook_spawn_unix.go index c8e20c9..c7cf4b6 100644 --- a/cmd/hook_spawn_unix.go +++ b/cmd/hook_spawn_unix.go @@ -19,10 +19,7 @@ package cmd import ( "os" "os/exec" - "path/filepath" "syscall" - - "github.com/ory/lumen/internal/config" ) // spawnBackgroundIndexer launches "lumen index " as a fully @@ -39,13 +36,11 @@ func spawnBackgroundIndexer(projectPath string) { } cmd := exec.Command(exe, "index", projectPath) cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + // Discard stdout and stderr — the background indexer uses slog which + // writes structured JSON directly to debug.log. Piping stderr to the log + // file would mix in pterm progress output with the structured log lines. cmd.Stdout = nil - - logPath := filepath.Join(config.XDGDataDir(), "lumen", "debug.log") - if f, err := os.OpenFile(logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644); err == nil { - cmd.Stderr = f - defer func() { _ = f.Close() }() - } + cmd.Stderr = nil if err := cmd.Start(); err != nil { return diff --git a/cmd/hook_test.go b/cmd/hook_test.go index 5eb92b2..a75d18f 100644 --- a/cmd/hook_test.go +++ b/cmd/hook_test.go @@ -20,10 +20,31 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/ory/lumen/internal/config" + "github.com/ory/lumen/internal/store" ) +// writeHookTestDB creates a minimal SQLite DB stamped with last_indexed_at, +// using the configured embedding model's dimensions so store.New doesn't reset +// the schema when generateSessionContextInternal opens it. +func writeHookTestDB(t *testing.T, dbPath string, lastIndexedAt time.Time) { + t.Helper() + cfg, err := config.Load() + if err != nil { + t.Fatalf("config.Load: %v", err) + } + s, err := store.New(dbPath, cfg.Dims) + if err != nil { + t.Fatalf("store.New: %v", err) + } + defer func() { _ = s.Close() }() + if err := s.SetMeta("last_indexed_at", lastIndexedAt.UTC().Format(time.RFC3339)); err != nil { + t.Fatalf("SetMeta: %v", err) + } +} + // TestMain detects when the cmd test binary is invoked as a background // indexer subprocess (via spawnBackgroundIndexer → os.Executable()) and exits // immediately instead of running the full test suite. Without this guard, @@ -208,34 +229,55 @@ func TestGenerateSessionContextInternal_SpawnsWhenNoDB(t *testing.T) { }) } -func TestGenerateSessionContextInternal_NoSpawnWhenDBExists(t *testing.T) { +func TestGenerateSessionContextInternal_NoSpawnWhenFresh(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Use the same model the function will load so the DB path matches. cfg, err := config.Load() if err != nil { t.Fatalf("config.Load: %v", err) } - dbPath := config.DBPathForProject("/myproject", cfg.Model) if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { t.Fatal(err) } - if err := os.WriteFile(dbPath, []byte{}, 0o644); err != nil { + writeHookTestDB(t, dbPath, time.Now().Add(-30*time.Second)) + + called := false + generateSessionContextInternal("lumen", "/myproject", + func(_, _ string) string { return "" }, + func(_ string) { called = true }, + ) + if called { + t.Fatal("bgIndexer must not be called when index was recently updated") + } +} + +func TestGenerateSessionContextInternal_SpawnsWhenStale(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("XDG_DATA_HOME", tmpDir) + + cfg, err := config.Load() + if err != nil { + t.Fatalf("config.Load: %v", err) + } + dbPath := config.DBPathForProject("/myproject", cfg.Model) + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { t.Fatal(err) } + writeHookTestDB(t, dbPath, time.Now().Add(-10*time.Minute)) called := false generateSessionContextInternal("lumen", "/myproject", - func(_, _ string) string { return "/some/donor.db" }, + func(_, _ string) string { return "" }, func(_ string) { called = true }, ) - if called { - t.Fatal("bgIndexer must not be called when an index already exists") + if !called { + t.Fatal("bgIndexer must be called when index is stale") } } + func TestGenerateSessionContextInternal_MessageWithDonor(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) diff --git a/cmd/index.go b/cmd/index.go index 59669f5..9208a2f 100644 --- a/cmd/index.go +++ b/cmd/index.go @@ -17,6 +17,7 @@ package cmd import ( "context" "fmt" + "log/slog" "os" "os/signal" "path/filepath" @@ -45,6 +46,11 @@ var indexCmd = &cobra.Command{ } func runIndex(cmd *cobra.Command, args []string) error { + logger, logFile := newDebugLogger() + if logFile != nil { + defer func() { _ = logFile.Close() }() + } + cfg, err := config.Load() if err != nil { return err @@ -72,6 +78,7 @@ func runIndex(cmd *cobra.Command, args []string) error { if lock == nil { // Another indexer is already running for this project — skip silently. // This is the normal case when multiple Claude terminals are open. + logger.Info("index skipped: another indexer is already running", "project", projectPath) fmt.Fprintln(os.Stderr, "Another indexer is already running for this project. Skipping.") return nil } @@ -82,12 +89,13 @@ func runIndex(cmd *cobra.Command, args []string) error { ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) defer stop() - idx, err := setupIndexer(&cfg, dbPath) + idx, err := setupIndexer(&cfg, dbPath, logger) if err != nil { return err } defer func() { _ = idx.Close() }() + logger.Info("indexing started", "project", projectPath, "model", cfg.Model, "dims", cfg.Dims) p := tui.NewProgress(os.Stderr) p.Info(fmt.Sprintf("Indexing %s (model: %s, dims: %d)", projectPath, cfg.Model, cfg.Dims)) @@ -98,13 +106,47 @@ func runIndex(cmd *cobra.Command, args []string) error { // A signal arrived; treat as clean exit. If an unrelated error // also occurred in the same instant, it is intentionally dropped — // the cancellation is the primary cause and the lock will be released. + logger.Info("indexing cancelled by signal", "project", projectPath) return nil } + logger.Error("indexing failed", "project", projectPath, "err", err) return err } + elapsed := time.Since(start).Round(time.Millisecond) + if stats.Reason == "already fresh" { + logger.Info("index already fresh", + "project", projectPath, + "elapsed", elapsed.String(), + ) + } else { + logger.Info("indexing complete", + "project", projectPath, + "reason", stats.Reason, + "total_files", stats.TotalFiles, + "files_unchanged", stats.TotalFiles-stats.FilesChanged, + "files_added", stats.FilesAdded, + "files_modified", stats.FilesModified, + "files_removed", stats.FilesRemoved, + "indexed_files", stats.IndexedFiles, + "chunks_created", stats.ChunksCreated, + "old_root_hash", stats.OldRootHash, + "new_root_hash", stats.NewRootHash, + "elapsed", elapsed.String(), + ) + } + if stats.Reason != "" { + fmt.Printf("Reason: %s\n", stats.Reason) + } + if stats.OldRootHash != "" { + fmt.Printf("Root hash: %s -> %s\n", stats.OldRootHash[:16], stats.NewRootHash[:16]) + } else if stats.NewRootHash != "" { + fmt.Printf("Root hash: (none) -> %s\n", stats.NewRootHash[:16]) + } + fmt.Printf("Files: %d added, %d modified, %d removed (%d total in project)\n", + stats.FilesAdded, stats.FilesModified, stats.FilesRemoved, stats.TotalFiles) fmt.Printf("Done. Indexed %d files, %d chunks in %s.\n", - stats.IndexedFiles, stats.ChunksCreated, time.Since(start).Round(time.Millisecond)) + stats.IndexedFiles, stats.ChunksCreated, elapsed) return nil } @@ -124,7 +166,7 @@ func applyModelFlag(cmd *cobra.Command, cfg *config.Config) error { } // setupIndexer receives dbPath so it is computed exactly once in runIndex. -func setupIndexer(cfg *config.Config, dbPath string) (*index.Indexer, error) { +func setupIndexer(cfg *config.Config, dbPath string, logger *slog.Logger) (*index.Indexer, error) { emb, err := newEmbedder(*cfg) if err != nil { return nil, fmt.Errorf("create embedder: %w", err) @@ -134,6 +176,7 @@ func setupIndexer(cfg *config.Config, dbPath string) (*index.Indexer, error) { if err != nil { return nil, fmt.Errorf("create indexer: %w", err) } + idx.SetLogger(logger) return idx, nil } @@ -152,6 +195,7 @@ func performIndexing(ctx context.Context, cmd *cobra.Command, idx *index.Indexer } if !reindexed { + stats.Reason = "already fresh" fmt.Println("Index is already up to date.") } diff --git a/cmd/search.go b/cmd/search.go index 8f9dc2e..1a64f45 100644 --- a/cmd/search.go +++ b/cmd/search.go @@ -151,7 +151,7 @@ func runSearch(cmd *cobra.Command, args []string) error { tr.record("path resolution", indexRoot) // Span 2: indexer setup - idx, err := setupIndexer(&cfg, indexRoot) + idx, err := setupIndexer(&cfg, indexRoot, nil) if err != nil { return fmt.Errorf("setup indexer: %w", err) } diff --git a/cmd/stdio.go b/cmd/stdio.go index dc239d2..75a0c89 100644 --- a/cmd/stdio.go +++ b/cmd/stdio.go @@ -58,7 +58,6 @@ type SemanticSearchInput struct { Cwd string `json:"cwd,omitempty" jsonschema:"The current working directory / project root. Used as index root when provided."` NResults int `json:"n_results,omitempty" jsonschema:"Max results to return, default 8"` MinScore *float64 `json:"min_score,omitempty" jsonschema:"Minimum score threshold (-1 to 1). Results below this score are excluded. Default depends on embedding model. Use -1 to return all results."` - ForceReindex bool `json:"force_reindex,omitempty" jsonschema:"Force full re-index before searching"` Summary bool `json:"summary,omitempty" jsonschema:"When true, return only file path, symbol, kind, line range, and score — no code content. Useful for location-only queries."` MaxLines int `json:"max_lines,omitempty" jsonschema:"Truncate each code snippet to this many lines. Default: unlimited."` } @@ -81,6 +80,7 @@ type SemanticSearchOutput struct { IndexedFiles int `json:"indexed_files,omitempty"` FilteredHint string `json:"filtered_hint,omitempty"` SeedWarning string `json:"seed_warning,omitempty"` + StaleWarning string `json:"stale_warning,omitempty"` } // IndexStatusInput defines the parameters for the index_status tool. @@ -120,6 +120,8 @@ type HealthCheckOutput struct { // adds 1-3s of pure filesystem I/O even when nothing has changed. // Override with LUMEN_FRESHNESS_TTL (e.g. "1s", "30s") for testing. const defaultFreshnessTTL = 30 * time.Second +const reindexTimeout = 15 * time.Second +const backgroundReindexMaxDuration = 10 * time.Minute type cacheEntry struct { idx *index.Indexer @@ -136,9 +138,11 @@ type indexerCache struct { model string cfg config.Config freshnessTTL time.Duration // 0 means use defaultFreshnessTTL - findDonorFunc func(string, string) string // nil uses config.FindDonorIndex - seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor - log *slog.Logger + findDonorFunc func(string, string) string // nil uses config.FindDonorIndex + seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor + ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh + log *slog.Logger + wg sync.WaitGroup // tracks background reindex goroutines } // logger returns ic.log, falling back to a discarding logger when the field @@ -150,8 +154,12 @@ func (ic *indexerCache) logger() *slog.Logger { return ic.log } -// Close closes all cached indexers. Call on MCP server shutdown. +// Close waits for any background reindex goroutines to finish, then +// closes all cached indexers. Call on MCP server shutdown. +// Worst-case wait is backgroundReindexMaxDuration (10 min) if a +// background reindex is in progress. func (ic *indexerCache) Close() { + ic.wg.Wait() ic.mu.Lock() defer ic.mu.Unlock() seen := make(map[*index.Indexer]bool) @@ -316,8 +324,14 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string) (* } // If a parent index is already cached, alias and return. + // Guard: only reuse the cached entry if it is the actual owner of effectiveRoot + // (entry.effectiveRoot == effectiveRoot). If the cache holds a guest-alias entry + // for effectiveRoot (e.g. cache["src"] = {ccIdx, effectiveRoot:"cc"} written when + // "src" was a projectPath routed to cc), reusing it here would pair the wrong + // indexer with the wrong directory scope, causing EnsureFresh to scan one directory + // and write results into a different DB. if effectiveRoot != projectPath { - if entry, ok := ic.cache[effectiveRoot]; ok { + if entry, ok := ic.cache[effectiveRoot]; ok && entry.effectiveRoot == effectiveRoot { ic.cache[projectPath] = cacheEntry{idx: entry.idx, effectiveRoot: effectiveRoot} return entry.idx, effectiveRoot, "", nil } @@ -330,7 +344,15 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string) (* // Seed from sibling worktree if this is a new index. var seedWarning string + isNewDB := false if _, statErr := os.Stat(dbPath); os.IsNotExist(statErr) { + isNewDB = true + ic.logger().Info("creating new index database", + "effective_root", effectiveRoot, + "db_path", dbPath, + "model", ic.model, + "index_version", config.IndexVersion, + ) findDonor := ic.findDonorFunc if findDonor == nil { findDonor = config.FindDonorIndex @@ -359,6 +381,7 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string) (* if err != nil { return nil, "", "", fmt.Errorf("create indexer: %w", err) } + idx.SetLogger(ic.logger()) // Pre-populate the freshness TTL if the index was recently stamped by // background pre-warming (SessionStart hook). This avoids a redundant @@ -378,6 +401,9 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string) (* "project_path", projectPath, "effective_root", effectiveRoot, "db_path", dbPath, + "new_index", isNewDB, + "model", ic.model, + "index_version", config.IndexVersion, ) ic.cache[effectiveRoot] = entry @@ -398,7 +424,6 @@ func (ic *indexerCache) handleSemanticSearch(ctx context.Context, req *mcp.CallT ic.logger().Debug("semantic search request", "cwd", input.Cwd, "search_path", input.Path, - "force_reindex", input.ForceReindex, "n_results", input.NResults, ) @@ -410,7 +435,7 @@ func (ic *indexerCache) handleSemanticSearch(ctx context.Context, req *mcp.CallT progress := buildProgressFunc(ctx, req) dbPath := config.DBPathForProject(effectiveRoot, ic.model) - out, err := ic.ensureIndexed(ctx, idx, input, effectiveRoot, dbPath, progress) + out, err := ic.ensureIndexed(idx, input, effectiveRoot, dbPath, progress) if err != nil { return nil, nil, err } @@ -539,6 +564,7 @@ func validateSearchInput(input *SemanticSearchInput) error { return nil } + func buildProgressFunc(ctx context.Context, req *mcp.CallToolRequest) index.ProgressFunc { token := req.Params.GetProgressToken() if token == nil { @@ -546,8 +572,7 @@ func buildProgressFunc(ctx context.Context, req *mcp.CallToolRequest) index.Prog } return func(current, total int, message string) { if total == 0 { - // Skip indeterminate notifications (e.g. "Scanning files...") — - // MCP progress requires Total > 0 for meaningful progress tracking. + // Skip indeterminate notifications — MCP progress requires Total > 0. return } _ = req.Session.NotifyProgress(ctx, &mcp.ProgressNotificationParams{ @@ -559,35 +584,10 @@ func buildProgressFunc(ctx context.Context, req *mcp.CallToolRequest) index.Prog } } -func (ic *indexerCache) ensureIndexed(ctx context.Context, idx *index.Indexer, input SemanticSearchInput, projectDir string, dbPath string, progress index.ProgressFunc) (SemanticSearchOutput, error) { +func (ic *indexerCache) ensureIndexed(idx *index.Indexer, input SemanticSearchInput, projectDir string, dbPath string, progress index.ProgressFunc) (SemanticSearchOutput, error) { start := time.Now() out := SemanticSearchOutput{} - if input.ForceReindex { - // Skip force reindex if background indexer is running to avoid - // concurrent SQLite writes that could exceed busy_timeout. - if indexlock.IsHeld(indexlock.LockPathForDB(dbPath)) { - ic.logger().Info("force reindex skipped: background indexer is running", "project", projectDir) - return out, nil - } - ic.logger().Info("force reindex requested", "cwd", input.Cwd, "search_path", input.Path, "effective_root", projectDir) - stats, err := idx.Index(ctx, projectDir, true, progress) - if err != nil { - return out, fmt.Errorf("force reindex: %w", err) - } - ic.touchChecked(projectDir) - out.Reindexed = true - out.IndexedFiles = stats.IndexedFiles - ic.logger().Info("force reindex complete", - "cwd", input.Cwd, - "search_path", input.Path, - "effective_root", projectDir, - "indexed_files", stats.IndexedFiles, - "elapsed_ms", time.Since(start).Milliseconds(), - ) - return out, nil - } - // Skip the merkle tree walk if we confirmed freshness recently. The walk // costs 1-3s on large projects even when nothing changed. if ic.recentlyChecked(projectDir) { @@ -603,6 +603,8 @@ func (ic *indexerCache) ensureIndexed(ctx context.Context, idx *index.Indexer, i // avoid duplicating the in-progress Merkle walk. The TOCTOU race is benign: // worst case is redundant work, not corruption (SQLite WAL mode). if indexlock.IsHeld(indexlock.LockPathForDB(dbPath)) { + ic.logger().Info("skipping reindex: background indexer is running", "project", projectDir) + out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." return out, nil } @@ -611,37 +613,123 @@ func (ic *indexerCache) ensureIndexed(ctx context.Context, idx *index.Indexer, i "effective_root", projectDir, ) - reindexed, stats, err := idx.EnsureFresh(ctx, projectDir, progress) - elapsed := time.Since(start) - if err != nil { - return out, fmt.Errorf("ensure fresh: %w", err) + // Run EnsureFresh in a goroutine with a 15s timeout. If reindexing + // takes longer, return stale results with a warning while the + // goroutine continues in the background. + type freshResult struct { + reindexed bool + stats index.Stats + err error + skipped bool // true when flock was held by another process } - ic.touchChecked(projectDir) + done := make(chan freshResult, 1) // buffered: goroutine must never block on send - if !reindexed { - ic.logger().Debug("index fresh, caching result", - "cwd", input.Cwd, - "effective_root", projectDir, - "elapsed_ms", elapsed.Milliseconds(), - ) - } else { - ic.logger().Info("reindex triggered", - "cwd", input.Cwd, - "search_path", input.Path, - "effective_root", projectDir, - "total_project_files", stats.TotalFiles, - "files_indexed", stats.IndexedFiles, - "chunks_created", stats.ChunksCreated, - "files_changed", stats.FilesChanged, - "elapsed_ms", elapsed.Milliseconds(), - ) - } + bgCtx, bgCancel := context.WithTimeout(context.Background(), backgroundReindexMaxDuration) + + lockPath := indexlock.LockPathForDB(dbPath) + ic.wg.Go(func() { + defer bgCancel() - out.Reindexed = reindexed - if reindexed { - out.IndexedFiles = stats.IndexedFiles + lk, lockErr := indexlock.TryAcquire(lockPath) + if lockErr != nil { + ic.logger().Warn("background reindex: failed to acquire lock", "project", projectDir, "err", lockErr) + done <- freshResult{skipped: true} + return + } + if lk == nil { + // Another process grabbed the lock between our IsHeld check and now. + ic.logger().Debug("background reindex: lock held by another process, skipping", "project", projectDir) + done <- freshResult{skipped: true} + return + } + defer lk.Release() + + // If a recent external process (e.g. lumen index from SessionStart) + // already updated the index within freshnessTTL, trust the DB timestamp + // and skip the expensive merkle tree walk. + if lastAt, ok := idx.LastIndexedAt(); ok { + ttl := ic.freshnessTTL + if ttl == 0 { + ttl = defaultFreshnessTTL + } + if time.Since(lastAt) < ttl { + ic.logger().Debug("skipping merkle walk: index recently updated by external process", + "project", projectDir, + "last_indexed_at", lastAt, + ) + ic.touchChecked(projectDir) + done <- freshResult{} + return + } + } + + ensureFresh := ic.ensureFreshFunc + if ensureFresh == nil { + ensureFresh = func(ctx context.Context, idx *index.Indexer, dir string, p index.ProgressFunc) (bool, index.Stats, error) { + return idx.EnsureFresh(ctx, dir, p) + } + } + reindexed, stats, err := ensureFresh(bgCtx, idx, projectDir, progress) + if err != nil { + ic.logger().Warn("background reindex failed", "project", projectDir, "err", err) + } else { + ic.touchChecked(projectDir) + } + done <- freshResult{reindexed: reindexed, stats: stats, err: err} + }) + + timer := time.NewTimer(reindexTimeout) + defer timer.Stop() + + select { + case result := <-done: + bgCancel() // release context resources early + if result.skipped { + ic.logger().Info("reindex skipped: lock held by another process", "project", projectDir) + out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + return out, nil + } + if result.err != nil { + return out, fmt.Errorf("ensure fresh: %w", result.err) + } + elapsed := time.Since(start) + if !result.reindexed { + ic.logger().Debug("index fresh, caching result", + "cwd", input.Cwd, + "effective_root", projectDir, + "elapsed_ms", elapsed.Milliseconds(), + ) + } else { + ic.logger().Info("reindex triggered", + "cwd", input.Cwd, + "search_path", input.Path, + "effective_root", projectDir, + "reason", result.stats.Reason, + "total_project_files", result.stats.TotalFiles, + "files_indexed", result.stats.IndexedFiles, + "chunks_created", result.stats.ChunksCreated, + "files_added", result.stats.FilesAdded, + "files_modified", result.stats.FilesModified, + "files_removed", result.stats.FilesRemoved, + "old_root_hash", result.stats.OldRootHash, + "new_root_hash", result.stats.NewRootHash, + "elapsed_ms", elapsed.Milliseconds(), + ) + } + out.Reindexed = result.reindexed + if result.reindexed { + out.IndexedFiles = result.stats.IndexedFiles + } + return out, nil + + case <-timer.C: + ic.logger().Info("reindex timeout, returning stale results", + "project", projectDir, + "timeout", reindexTimeout, + ) + out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + return out, nil } - return out, nil } // recentlyChecked reports whether the index for projectDir was confirmed fresh @@ -1003,6 +1091,10 @@ func formatSearchResults(projectPath string, out SemanticSearchOutput) string { b.WriteString("\nWarning: ") b.WriteString(out.SeedWarning) } + if out.StaleWarning != "" { + b.WriteString("\nWarning: ") + b.WriteString(out.StaleWarning) + } if out.FilteredHint != "" { b.WriteString("\n") b.WriteString(out.FilteredHint) @@ -1018,6 +1110,9 @@ func formatSearchResults(projectPath string, out SemanticSearchOutput) string { if out.SeedWarning != "" { fmt.Fprintf(&b, "\nWarning: %s", out.SeedWarning) } + if out.StaleWarning != "" { + fmt.Fprintf(&b, "\nWarning: %s", out.StaleWarning) + } b.WriteString(":\n") // Group results by relative file path. diff --git a/cmd/stdio_test.go b/cmd/stdio_test.go index ff3cbcf..0b64d20 100644 --- a/cmd/stdio_test.go +++ b/cmd/stdio_test.go @@ -30,6 +30,7 @@ import ( "flag" "github.com/ory/lumen/internal/config" + "github.com/ory/lumen/internal/index" "github.com/ory/lumen/internal/indexlock" "github.com/ory/lumen/internal/store" ) @@ -1020,7 +1021,7 @@ func TestEnsureIndexed_SkipsWhenLockHeld(t *testing.T) { // With the lock held by the subprocess, ensureIndexed must skip EnsureFresh. input := SemanticSearchInput{Cwd: projectPath, Path: projectPath, Query: "test", NResults: 8} - out, err := ic.ensureIndexed(context.Background(), idx, input, effectiveRoot, dbPath, nil) + out, err := ic.ensureIndexed(idx, input, effectiveRoot, dbPath, nil) if err != nil { t.Fatalf("ensureIndexed returned unexpected error: %v", err) } @@ -1195,6 +1196,31 @@ func TestFormatSearchResults_IncludesSeedWarning(t *testing.T) { } } +func TestFormatSearchResults_StaleWarning(t *testing.T) { + out := SemanticSearchOutput{ + Results: []SearchResultItem{ + {FilePath: "/proj/main.go", Symbol: "main", Kind: "function", StartLine: 1, EndLine: 5, Score: 0.9}, + }, + StaleWarning: "Index is being updated in the background.", + } + text := formatSearchResults("/proj", out) + if !strings.Contains(text, "Warning: Index is being updated") { + t.Fatalf("expected stale warning in output, got:\n%s", text) + } +} + +func TestFormatSearchResults_NoStaleWarning(t *testing.T) { + out := SemanticSearchOutput{ + Results: []SearchResultItem{ + {FilePath: "/proj/main.go", Symbol: "main", Kind: "function", StartLine: 1, EndLine: 5, Score: 0.9}, + }, + } + text := formatSearchResults("/proj", out) + if strings.Contains(text, "Warning:") { + t.Fatalf("unexpected warning in output, got:\n%s", text) + } +} + func TestEnsureIndexed_FreshnessTTL(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) @@ -1226,7 +1252,7 @@ func TestEnsureIndexed_FreshnessTTL(t *testing.T) { dbPath := config.DBPathForProject(effectiveRoot, ic.model) // First call: no TTL entry yet — runs EnsureFresh and records lastCheckedAt. - _, err = ic.ensureIndexed(context.Background(), idx, input, effectiveRoot, dbPath, nil) + _, err = ic.ensureIndexed(idx, input, effectiveRoot, dbPath, nil) if err != nil { t.Fatalf("first ensureIndexed: %v", err) } @@ -1243,7 +1269,7 @@ func TestEnsureIndexed_FreshnessTTL(t *testing.T) { t.Fatal("expected recentlyChecked=true immediately after ensureIndexed") } - out, err := ic.ensureIndexed(context.Background(), idx, input, effectiveRoot, dbPath, nil) + out, err := ic.ensureIndexed(idx, input, effectiveRoot, dbPath, nil) if err != nil { t.Fatalf("second ensureIndexed: %v", err) } @@ -1252,3 +1278,199 @@ func TestEnsureIndexed_FreshnessTTL(t *testing.T) { t.Fatal("second call should not reindex within TTL") } } + +func TestIndexerCache_CloseWaitsForBackground(t *testing.T) { + ic := &indexerCache{ + cache: make(map[string]cacheEntry), + } + + done := make(chan struct{}) + ic.wg.Go(func() { + time.Sleep(100 * time.Millisecond) + close(done) + }) + + ic.Close() + + select { + case <-done: + // goroutine finished before Close returned — correct + default: + t.Fatal("Close() returned before background goroutine finished") + } +} + +func TestEnsureIndexed_FlockHeldSkipsReindex(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + lockPath := indexlock.LockPathForDB(dbPath) + + // Pre-acquire the lock to simulate a running indexer. + lk, err := indexlock.TryAcquire(lockPath) + if err != nil { + t.Fatal(err) + } + if lk == nil { + t.Fatal("expected to acquire lock") + } + defer lk.Release() + + ic := &indexerCache{ + cache: make(map[string]cacheEntry), + } + + idx, idxErr := index.NewIndexer(dbPath, &stubEmbedder{}, 512) + if idxErr != nil { + t.Fatal(idxErr) + } + defer func() { _ = idx.Close() }() + + out, err := ic.ensureIndexed( + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + if err != nil { + t.Fatal(err) + } + if out.StaleWarning == "" { + t.Fatal("expected StaleWarning when flock held") + } +} + +func TestEnsureIndexed_TimeoutReturnsStaleWarning(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + idx, err := index.NewIndexer(dbPath, &stubEmbedder{}, 512) + if err != nil { + t.Fatal(err) + } + defer func() { _ = idx.Close() }() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + tmpDir: {idx: idx, effectiveRoot: tmpDir}, + }, + ensureFreshFunc: func(ctx context.Context, _ *index.Indexer, _ string, _ index.ProgressFunc) (bool, index.Stats, error) { + select { + case <-time.After(30 * time.Second): + return true, index.Stats{IndexedFiles: 100}, nil + case <-ctx.Done(): + return false, index.Stats{}, ctx.Err() + } + }, + } + + start := time.Now() + out, err := ic.ensureIndexed( + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + elapsed := time.Since(start) + + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if out.StaleWarning == "" { + t.Fatal("expected StaleWarning to be set after timeout") + } + if elapsed > 20*time.Second { + t.Fatalf("ensureIndexed took %v, expected ~15s timeout", elapsed) + } + if out.Reindexed { + t.Fatal("expected Reindexed=false after timeout") + } + + // Wait for background goroutine to finish (WaitGroup). + ic.Close() +} + +func TestEnsureIndexed_FastEnsureFreshNoWarning(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + idx, err := index.NewIndexer(dbPath, &stubEmbedder{}, 512) + if err != nil { + t.Fatal(err) + } + defer func() { _ = idx.Close() }() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + tmpDir: {idx: idx, effectiveRoot: tmpDir}, + }, + ensureFreshFunc: func(_ context.Context, _ *index.Indexer, _ string, _ index.ProgressFunc) (bool, index.Stats, error) { + return true, index.Stats{IndexedFiles: 42}, nil + }, + } + + out, err := ic.ensureIndexed( + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if out.StaleWarning != "" { + t.Fatalf("unexpected StaleWarning: %s", out.StaleWarning) + } + if !out.Reindexed { + t.Fatal("expected Reindexed=true") + } + if out.IndexedFiles != 42 { + t.Fatalf("expected IndexedFiles=42, got %d", out.IndexedFiles) + } + + ic.Close() +} + +func TestEnsureIndexed_SkipsMerkleWalkWhenRecentlyIndexedExternally(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + idx, err := index.NewIndexer(dbPath, &stubEmbedder{}, 512) + if err != nil { + t.Fatal(err) + } + defer func() { _ = idx.Close() }() + + // Simulate an external process (e.g. lumen index from SessionStart) having + // recently written last_indexed_at to the DB. + if err := writeDBWithLastIndexedAt(t, dbPath, time.Now().Add(-5*time.Second)); err != nil { + t.Fatal(err) + } + + ensureFreshCalled := false + ic := &indexerCache{ + cache: map[string]cacheEntry{ + tmpDir: {idx: idx, effectiveRoot: tmpDir}, + }, + ensureFreshFunc: func(_ context.Context, _ *index.Indexer, _ string, _ index.ProgressFunc) (bool, index.Stats, error) { + ensureFreshCalled = true + return true, index.Stats{IndexedFiles: 42}, nil + }, + } + + out, err := ic.ensureIndexed( + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if out.StaleWarning != "" { + t.Fatalf("unexpected StaleWarning: %s", out.StaleWarning) + } + if ensureFreshCalled { + t.Fatal("expected EnsureFresh to be skipped when index was recently updated externally") + } + if !ic.recentlyChecked(tmpDir) { + t.Fatal("expected recentlyChecked=true after skipping merkle walk") + } + + ic.Close() +} diff --git a/docs/superpowers/plans/2026-03-23-nonblocking-semantic-search.md b/docs/superpowers/plans/2026-03-23-nonblocking-semantic-search.md new file mode 100644 index 0000000..d2d32d3 --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-nonblocking-semantic-search.md @@ -0,0 +1,596 @@ +# Non-blocking semantic_search Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `semantic_search` return stale results with a warning after 15s instead of blocking indefinitely on reindexing. + +**Architecture:** Replace the synchronous `EnsureFresh()` call in `ensureIndexed()` with a goroutine guarded by a 15s timeout. The goroutine acquires the flock, reindexes, and calls `touchChecked()` on success. A `sync.WaitGroup` on `indexerCache` ensures graceful shutdown. + +**Tech Stack:** Go, `sync.WaitGroup`, `indexlock` (flock), `context.Background()` + +**Spec:** `docs/superpowers/specs/2026-03-23-nonblocking-semantic-search-design.md` + +--- + +## Chunk 1: Core implementation + +### Task 1: Add `sync.WaitGroup` and `StaleWarning` field + +**Files:** +- Modify: `cmd/stdio.go:78-84` (SemanticSearchOutput struct) +- Modify: `cmd/stdio.go:132-142` (indexerCache struct) + +- [ ] **Step 1: Add `StaleWarning` to `SemanticSearchOutput`** + +In `cmd/stdio.go`, add the field after `SeedWarning`: + +```go +type SemanticSearchOutput struct { + Results []SearchResultItem `json:"results"` + Reindexed bool `json:"reindexed"` + IndexedFiles int `json:"indexed_files,omitempty"` + FilteredHint string `json:"filtered_hint,omitempty"` + SeedWarning string `json:"seed_warning,omitempty"` + StaleWarning string `json:"stale_warning,omitempty"` +} +``` + +- [ ] **Step 2: Add `wg` field to `indexerCache`** + +In `cmd/stdio.go`, add a `sync.WaitGroup` to `indexerCache`: + +```go +type indexerCache struct { + mu sync.RWMutex + cache map[string]cacheEntry + embedder embedder.Embedder + model string + cfg config.Config + freshnessTTL time.Duration + findDonorFunc func(string, string) string + seedFunc func(string, string) (bool, error) + log *slog.Logger + wg sync.WaitGroup // tracks background reindex goroutines +} +``` + +- [ ] **Step 3: Add constant for reindex timeout** + +Add near `defaultFreshnessTTL` (line 122): + +```go +const reindexTimeout = 15 * time.Second +const backgroundReindexMaxDuration = 10 * time.Minute +``` + +- [ ] **Step 4: Compile check** + +Run: `go build ./...` +Expected: PASS (no behavior change yet) + +- [ ] **Step 5: Commit** + +```bash +git add cmd/stdio.go +git commit -m "refactor(cmd): add StaleWarning field and WaitGroup to indexerCache" +``` + +--- + +### Task 2: Update `Close()` to wait for background goroutines + +**Files:** +- Modify: `cmd/stdio.go:153-165` (Close method) + +- [ ] **Step 1: Write the failing test** + +In `cmd/stdio_test.go`, add a test that verifies `Close()` waits for a background goroutine tracked by `wg`: + +```go +func TestIndexerCache_CloseWaitsForBackground(t *testing.T) { + ic := &indexerCache{ + cache: make(map[string]cacheEntry), + } + + done := make(chan struct{}) + ic.wg.Add(1) + go func() { + defer ic.wg.Done() + time.Sleep(100 * time.Millisecond) + close(done) + }() + + ic.Close() + + select { + case <-done: + // goroutine finished before Close returned — correct + default: + t.Fatal("Close() returned before background goroutine finished") + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./cmd -run TestIndexerCache_CloseWaitsForBackground -count=1` +Expected: FAIL — `Close()` does not call `wg.Wait()` yet + +- [ ] **Step 3: Update `Close()` to wait** + +Replace the `Close` method (lines 153-165): + +```go +// Close waits for any background reindex goroutines to finish, then +// closes all cached indexers. Call on MCP server shutdown. +func (ic *indexerCache) Close() { + ic.wg.Wait() + ic.mu.Lock() + defer ic.mu.Unlock() + seen := make(map[*index.Indexer]bool) + for _, entry := range ic.cache { + if !seen[entry.idx] { + seen[entry.idx] = true + _ = entry.idx.Close() + } + } + ic.cache = nil +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./cmd -run TestIndexerCache_CloseWaitsForBackground -count=1` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add cmd/stdio.go cmd/stdio_test.go +git commit -m "feat(cmd): Close() waits for background reindex goroutines" +``` + +--- + +### Task 3: Implement non-blocking `ensureIndexed()` + +**Files:** +- Modify: `cmd/stdio.go:568-651` (ensureIndexed method) + +- [ ] **Step 1: Replace synchronous `EnsureFresh` with timeout-guarded goroutine** + +Replace lines 615-650 (from the `logger.Debug("freshness TTL expired...")` through the end of `ensureIndexed`) with: + +```go + ic.logger().Debug("freshness TTL expired or first check, building merkle tree", + "cwd", input.Cwd, + "effective_root", projectDir, + ) + + // Run EnsureFresh in a goroutine with a 15s timeout. If reindexing + // takes longer, return stale results with a warning while the + // goroutine continues in the background. + type freshResult struct { + reindexed bool + stats index.Stats + err error + } + done := make(chan freshResult, 1) // buffered: goroutine must never block on send + + bgCtx, bgCancel := context.WithTimeout(context.Background(), backgroundReindexMaxDuration) + + lockPath := indexlock.LockPathForDB(dbPath) + ic.wg.Add(1) + go func() { + defer ic.wg.Done() + defer bgCancel() + + lk, lockErr := indexlock.TryAcquire(lockPath) + if lockErr != nil { + ic.logger().Warn("background reindex: failed to acquire lock", "project", projectDir, "err", lockErr) + done <- freshResult{} + return + } + if lk == nil { + // Another process grabbed the lock between our IsHeld check and now. + ic.logger().Debug("background reindex: lock held by another process, skipping", "project", projectDir) + done <- freshResult{} + return + } + defer lk.Release() + + reindexed, stats, err := idx.EnsureFresh(bgCtx, projectDir, nil) // nil progress: request ctx may be gone + if err != nil { + ic.logger().Warn("background reindex failed", "project", projectDir, "err", err) + } else { + ic.touchChecked(projectDir) + } + done <- freshResult{reindexed: reindexed, stats: stats, err: err} + }() + + timer := time.NewTimer(reindexTimeout) + defer timer.Stop() + + select { + case result := <-done: + bgCancel() // release context resources early + if result.err != nil { + return out, fmt.Errorf("ensure fresh: %w", result.err) + } + elapsed := time.Since(start) + if !result.reindexed { + ic.logger().Debug("index fresh, caching result", + "cwd", input.Cwd, + "effective_root", projectDir, + "elapsed_ms", elapsed.Milliseconds(), + ) + } else { + ic.logger().Info("reindex triggered", + "cwd", input.Cwd, + "search_path", input.Path, + "effective_root", projectDir, + "total_project_files", result.stats.TotalFiles, + "files_indexed", result.stats.IndexedFiles, + "chunks_created", result.stats.ChunksCreated, + "files_changed", result.stats.FilesChanged, + "elapsed_ms", elapsed.Milliseconds(), + ) + } + out.Reindexed = result.reindexed + if result.reindexed { + out.IndexedFiles = result.stats.IndexedFiles + } + return out, nil + + case <-timer.C: + ic.logger().Info("reindex timeout, returning stale results", + "project", projectDir, + "timeout", reindexTimeout, + ) + out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. A follow-up search in ~30s will return fresh results." + return out, nil + } +``` + +- [ ] **Step 2: Compile check** + +Run: `go build ./...` +Expected: PASS + +- [ ] **Step 3: Run existing tests** + +Run: `go test ./cmd -count=1` +Expected: PASS (existing behavior preserved for fast paths) + +- [ ] **Step 4: Commit** + +```bash +git add cmd/stdio.go +git commit -m "feat(cmd): non-blocking ensureIndexed with 15s timeout and background reindex" +``` + +--- + +### Task 4: Update `formatSearchResults` to render `StaleWarning` + +**Files:** +- Modify: `cmd/stdio.go:1001+` (formatSearchResults function) + +- [ ] **Step 1: Write the failing test** + +Add to `cmd/stdio_test.go`: + +```go +func TestFormatSearchResults_StaleWarning(t *testing.T) { + out := SemanticSearchOutput{ + Results: []SearchResultItem{ + {FilePath: "/proj/main.go", Symbol: "main", Kind: "function", StartLine: 1, EndLine: 5, Score: 0.9}, + }, + StaleWarning: "Index is being updated in the background.", + } + text := formatSearchResults("/proj", out) + if !strings.Contains(text, "Warning: Index is being updated") { + t.Fatalf("expected stale warning in output, got:\n%s", text) + } +} + +func TestFormatSearchResults_NoStaleWarning(t *testing.T) { + out := SemanticSearchOutput{ + Results: []SearchResultItem{ + {FilePath: "/proj/main.go", Symbol: "main", Kind: "function", StartLine: 1, EndLine: 5, Score: 0.9}, + }, + } + text := formatSearchResults("/proj", out) + if strings.Contains(text, "Warning:") { + t.Fatalf("unexpected warning in output, got:\n%s", text) + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `go test ./cmd -run TestFormatSearchResults_StaleWarning -count=1` +Expected: FAIL — StaleWarning not rendered yet + +- [ ] **Step 3: Add StaleWarning rendering** + +In `formatSearchResults`, after the `SeedWarning` block (around line 1024-1026), add: + +```go + if out.StaleWarning != "" { + fmt.Fprintf(&b, "\nWarning: %s", out.StaleWarning) + } +``` + +Also add it in the empty-results branch (after line 1010-1011): + +```go + if out.StaleWarning != "" { + b.WriteString("\nWarning: ") + b.WriteString(out.StaleWarning) + } +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `go test ./cmd -run TestFormatSearchResults -count=1` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add cmd/stdio.go cmd/stdio_test.go +git commit -m "feat(cmd): render StaleWarning in semantic_search output" +``` + +--- + +## Chunk 2: Testing and verification + +### Task 5: Test flock-skip fast path + +**Files:** +- Modify: `cmd/stdio_test.go` + +- [ ] **Step 1: Write the test** + +Verify that when the flock is already held (by session-start or another process), +`ensureIndexed` returns immediately with no `StaleWarning` (existing behavior preserved): + +```go +func TestEnsureIndexed_FlockHeldSkipsReindex(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + lockPath := indexlock.LockPathForDB(dbPath) + + // Pre-acquire the lock to simulate a running indexer. + lk, err := indexlock.TryAcquire(lockPath) + if err != nil { + t.Fatal(err) + } + if lk == nil { + t.Fatal("expected to acquire lock") + } + defer lk.Release() + + ic := &indexerCache{ + cache: make(map[string]cacheEntry), + } + + idx, idxErr := index.NewIndexer(dbPath, nil) + if idxErr != nil { + t.Fatal(idxErr) + } + defer idx.Close() + + out, err := ic.ensureIndexed( + context.Background(), + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + if err != nil { + t.Fatal(err) + } + if out.StaleWarning != "" { + t.Fatalf("expected no StaleWarning when flock held, got: %s", out.StaleWarning) + } +} +``` + +- [ ] **Step 2: Run test** + +Run: `go test ./cmd -run TestEnsureIndexed_FlockHeldSkipsReindex -count=1` +Expected: PASS + +- [ ] **Step 3: Commit** + +```bash +git add cmd/stdio_test.go +git commit -m "test(cmd): verify ensureIndexed skips reindex when flock is held" +``` + +--- + +### Task 6: Test timeout path — inject slow EnsureFresh via hook + +**Files:** +- Modify: `cmd/stdio.go:132-142` (add test hook to indexerCache) +- Modify: `cmd/stdio_test.go` + +To test the timeout path without a real slow embedder, add an optional test hook +to `indexerCache` that wraps `EnsureFresh`. This is the same pattern used for +`findDonorFunc` and `seedFunc`. + +- [ ] **Step 1: Add `ensureFreshFunc` hook to `indexerCache`** + +```go +type indexerCache struct { + // ... existing fields ... + ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh +} +``` + +- [ ] **Step 2: Use the hook in `ensureIndexed`'s goroutine** + +In the goroutine body (Task 3), replace: +```go +reindexed, stats, err := idx.EnsureFresh(bgCtx, projectDir, nil) +``` +with: +```go + ensureFresh := ic.ensureFreshFunc + if ensureFresh == nil { + ensureFresh = func(ctx context.Context, idx *index.Indexer, dir string, p index.ProgressFunc) (bool, index.Stats, error) { + return idx.EnsureFresh(ctx, dir, p) + } + } + reindexed, stats, err := ensureFresh(bgCtx, idx, projectDir, nil) +``` + +- [ ] **Step 3: Write the timeout test** + +```go +func TestEnsureIndexed_TimeoutReturnsStaleWarning(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + // Create a real indexer so ensureIndexed has something to work with. + idx, err := index.NewIndexer(dbPath, nil) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + tmpDir: {idx: idx, effectiveRoot: tmpDir}, + }, + // Simulate a slow EnsureFresh that takes longer than reindexTimeout. + ensureFreshFunc: func(ctx context.Context, _ *index.Indexer, _ string, _ index.ProgressFunc) (bool, index.Stats, error) { + select { + case <-time.After(30 * time.Second): + return true, index.Stats{IndexedFiles: 100}, nil + case <-ctx.Done(): + return false, index.Stats{}, ctx.Err() + } + }, + } + + start := time.Now() + out, err := ic.ensureIndexed( + context.Background(), + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + elapsed := time.Since(start) + + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if out.StaleWarning == "" { + t.Fatal("expected StaleWarning to be set after timeout") + } + if elapsed > 20*time.Second { + t.Fatalf("ensureIndexed took %v, expected ~15s timeout", elapsed) + } + if out.Reindexed { + t.Fatal("expected Reindexed=false after timeout") + } + + // Wait for background goroutine to finish (WaitGroup). + ic.Close() +} +``` + +- [ ] **Step 4: Run tests** + +Run: `go test ./cmd -run TestEnsureIndexed_TimeoutReturnsStaleWarning -count=1 -timeout=60s` +Expected: PASS — returns in ~15s with StaleWarning set + +- [ ] **Step 5: Write the fast-path test (completes before timeout)** + +```go +func TestEnsureIndexed_FastEnsureFreshNoWarning(t *testing.T) { + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + idx, err := index.NewIndexer(dbPath, nil) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + tmpDir: {idx: idx, effectiveRoot: tmpDir}, + }, + // Simulate a fast EnsureFresh. + ensureFreshFunc: func(_ context.Context, _ *index.Indexer, _ string, _ index.ProgressFunc) (bool, index.Stats, error) { + return true, index.Stats{IndexedFiles: 42}, nil + }, + } + + out, err := ic.ensureIndexed( + context.Background(), + idx, + SemanticSearchInput{Cwd: tmpDir, Path: tmpDir, Query: "test"}, + tmpDir, dbPath, nil, + ) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if out.StaleWarning != "" { + t.Fatalf("unexpected StaleWarning: %s", out.StaleWarning) + } + if !out.Reindexed { + t.Fatal("expected Reindexed=true") + } + if out.IndexedFiles != 42 { + t.Fatalf("expected IndexedFiles=42, got %d", out.IndexedFiles) + } + + ic.Close() +} +``` + +- [ ] **Step 6: Run all tests** + +Run: `go test ./cmd -run TestEnsureIndexed -count=1 -timeout=60s` +Expected: PASS + +- [ ] **Step 7: Commit** + +```bash +git add cmd/stdio.go cmd/stdio_test.go +git commit -m "test(cmd): add timeout and fast-path tests for non-blocking ensureIndexed" +``` + +--- + +### Task 7: Full test suite and lint + +**Files:** None (verification only) + +- [ ] **Step 1: Run full test suite** + +Run: `go test ./... -count=1` +Expected: PASS + +- [ ] **Step 2: Run linter** + +Run: `golangci-lint run` +Expected: PASS with zero issues + +- [ ] **Step 3: Run vet** + +Run: `go vet ./...` +Expected: PASS (external dependency warnings OK) + +- [ ] **Step 4: Final commit if any lint fixes needed** + +```bash +git add -A +git commit -m "style: fix lint issues from non-blocking search implementation" +``` diff --git a/docs/superpowers/specs/2026-03-23-nonblocking-semantic-search-design.md b/docs/superpowers/specs/2026-03-23-nonblocking-semantic-search-design.md new file mode 100644 index 0000000..d7a0d41 --- /dev/null +++ b/docs/superpowers/specs/2026-03-23-nonblocking-semantic-search-design.md @@ -0,0 +1,148 @@ +# Non-blocking semantic_search with partial results + +**Date:** 2026-03-23 +**Status:** Draft +**Branch:** `reindex-fixes` + +## Problem + +When the freshness TTL expires and no background indexer holds the flock, +`ensureIndexed()` calls `EnsureFresh()` synchronously. On large codebases with +many changed files this blocks the agent for minutes — it cannot respond, search, +or do anything else until reindexing completes. + +The session-start hook already spawns a background indexer, but if that indexer +finishes before the first search and files change afterward, the next +`semantic_search` call pays the full synchronous reindex cost. + +## Goal + +`semantic_search` must never block the agent for more than 15 seconds waiting on +reindexing. If reindexing takes longer, return results from the stale index with +a warning that results may be incomplete, while reindexing continues in the +background. + +## Design + +### New output field + +Add `StaleWarning` to `SemanticSearchOutput`: + +```go +type SemanticSearchOutput struct { + Results []SearchResultItem `json:"results"` + Reindexed bool `json:"reindexed"` + IndexedFiles int `json:"indexed_files,omitempty"` + FilteredHint string `json:"filtered_hint,omitempty"` + SeedWarning string `json:"seed_warning,omitempty"` + StaleWarning string `json:"stale_warning,omitempty"` // NEW +} +``` + +When the 15s timeout fires, `StaleWarning` carries: + +> "Index is being updated in the background. Results may be incomplete or +> outdated. A follow-up search in ~30s will return fresh results." + +### Modified `ensureIndexed()` flow + +Replace the synchronous `EnsureFresh()` call (lines 615-650) with a +timeout-guarded goroutine: + +``` +freshnessTTL miss AND flock NOT held: + + 1. Create a buffered done channel (cap 1) and a result struct. + 2. Spawn goroutine: + a. Try to acquire flock via TryAcquire(). + b. If flock acquired: + - Run idx.EnsureFresh(bgCtx, projectDir, nil) + (pass nil progress — the MCP request context may be gone + by the time the goroutine runs, so progress notifications + would fail) + - On success: call ic.touchChecked(projectDir) so subsequent + searches benefit from the freshness TTL cache. + - On error: log the error at Warn level. Do NOT call + touchChecked (next search retries). + - Release flock (defer). + - Send result (reindexed, stats, err) on done channel. + c. If flock NOT acquired (race — another process grabbed it): + - Send zero result on done channel (skip). + 3. Select on done channel with 15s timeout: + a. Done received in time → process as today (touchChecked, set + Reindexed/IndexedFiles, return). + b. Timeout fires: + - Log at Info level: "reindex timeout, returning stale results". + - Set out.StaleWarning with the warning message. + - Do NOT call touchChecked() — next search retries freshness. + - Return immediately — search proceeds against stale index. + - The goroutine's result is never read; the buffered channel + ensures it does not block. +``` + +**Why buffered channel (cap 1):** If the timeout fires first, the caller never +reads from the done channel. An unbuffered channel would cause the goroutine to +block on send forever, leaking it. A buffered channel lets the goroutine send +and exit cleanly. + +### Goroutine context and lifecycle + +- The goroutine uses `context.Background()` with a 10-minute timeout as a safety + net — NOT the request context, which would be cancelled when the response is + sent. +- The flock prevents concurrent reindexing: subsequent `semantic_search` calls see + `IsHeld() == true` and skip (existing fast-path at line 611). +- When the goroutine finishes, it releases the flock. The next search with an + expired freshness TTL sees a fresh index. +- If the MCP server process exits, the OS releases the flock — no leaked locks. +- **Graceful shutdown**: `indexerCache` should track background goroutines via a + `sync.WaitGroup`. `Close()` calls `wg.Wait()` before closing indexers, so a + background `EnsureFresh` is not interrupted mid-write. The 10-minute context + timeout is the upper bound — in practice reindexing finishes much sooner. + +### `formatSearchResults` update + +Render `StaleWarning` in the text output, following the existing pattern for +`SeedWarning` and `FilteredHint`: + +```go +if out.StaleWarning != "" { + fmt.Fprintf(&b, "\nWarning: %s", out.StaleWarning) +} +``` + +### What does NOT change + +- **ForceReindex path** — stays synchronous. It is explicitly requested by the + user via `/lumen:reindex`, so blocking is expected. +- **Session-start background indexer** — works as before, acquires flock. +- **Flock check fast-path** (line 611) — still skips when lock is held. +- **Freshness TTL** — still skips merkle walks within TTL window. + +## Files touched + +| File | Change | +|------|--------| +| `cmd/stdio.go` | `SemanticSearchOutput` struct: add `StaleWarning` field | +| `cmd/stdio.go` | `ensureIndexed()`: replace synchronous `EnsureFresh` with timeout-guarded goroutine + flock | +| `cmd/stdio.go` | `indexerCache` struct: add `sync.WaitGroup` for background goroutine tracking | +| `cmd/stdio.go` | `Close()`: wait for background goroutines before closing indexers | +| `cmd/stdio.go` | `formatSearchResults()`: render `StaleWarning` in output text | + +No new files. No new packages. + +## Testing + +- **Unit test**: Mock `EnsureFresh` to sleep > 15s, verify `StaleWarning` is set + and results are returned from stale index. +- **Unit test**: Mock `EnsureFresh` to complete in < 15s, verify no + `StaleWarning` and `Reindexed` is true. +- **Unit test**: Verify flock is acquired by the goroutine (subsequent calls see + `IsHeld() == true`). +- **E2E test** (if feasible): Trigger reindex on a large fixture, verify search + returns within ~15s with warning. + +## Timeout value + +Hardcoded at 15 seconds. No env var for now — YAGNI. Can be made configurable +via `LUMEN_SEARCH_TIMEOUT` later if needed. diff --git a/e2e_test.go b/e2e_test.go index dc9d3f0..ab125c5 100644 --- a/e2e_test.go +++ b/e2e_test.go @@ -894,42 +894,6 @@ func TestE2E_IndexStatus(t *testing.T) { } } -func TestE2E_ForceReindex(t *testing.T) { - t.Parallel() - session := startServer(t) - projectPath := sampleProjectPath(t) - - // Normal search triggers indexing. - out1 := callSearch(t, session, map[string]any{ - "query": "config", - "path": projectPath, - }) - if !out1.Reindexed { - t.Error("first search: expected Reindexed=true") - } - - // Second search (no changes) should skip. - out2 := callSearch(t, session, map[string]any{ - "query": "config", - "path": projectPath, - }) - if out2.Reindexed { - t.Error("second search (no changes): expected Reindexed=false") - } - - // Force reindex should re-index even with no changes. - out3 := callSearch(t, session, map[string]any{ - "query": "config", - "path": projectPath, - "force_reindex": true, - }) - if !out3.Reindexed { - t.Error("force_reindex: expected Reindexed=true") - } - if out3.IndexedFiles != 5 { - t.Errorf("force_reindex: expected IndexedFiles=5, got %d", out3.IndexedFiles) - } -} func TestE2E_ProgressNotifications(t *testing.T) { t.Parallel() diff --git a/internal/index/index.go b/internal/index/index.go index 8f2cd99..5a7bad0 100644 --- a/internal/index/index.go +++ b/internal/index/index.go @@ -19,6 +19,7 @@ import ( "context" "database/sql" "fmt" + "log/slog" "os" "path/filepath" "slices" @@ -54,6 +55,18 @@ type Stats struct { IndexedFiles int ChunksCreated int FilesChanged int + + // Breakdown of changed files by category. + FilesAdded int + FilesModified int + FilesRemoved int + + // Reason explains why reindexing was triggered. + Reason string + + // OldRootHash and NewRootHash are the merkle root hashes before and after. + OldRootHash string + NewRootHash string } // StatusInfo holds information about the current index state for a project. @@ -73,6 +86,13 @@ type Indexer struct { emb embedder.Embedder chunker chunker.Chunker maxChunkTokens int + logger *slog.Logger + dsn string // path to the SQLite database file; used for corruption recovery +} + +// SetLogger attaches a logger to the indexer for structured diagnostic output. +func (idx *Indexer) SetLogger(l *slog.Logger) { + idx.logger = l } // NewIndexer creates a new Indexer backed by a SQLite store at dsn, @@ -88,9 +108,28 @@ func NewIndexer(dsn string, emb embedder.Embedder, maxChunkTokens int) (*Indexer emb: emb, chunker: chunker.NewMultiChunker(chunker.DefaultLanguages(maxChunkTokens)), maxChunkTokens: maxChunkTokens, + dsn: dsn, }, nil } +// rebuildStore closes the current store, deletes the database files, and +// opens a fresh store. Must be called while holding idx.mu.Lock() or before +// the Indexer is shared with other goroutines. +func (idx *Indexer) rebuildStore() error { + _ = idx.store.Close() + if idx.dsn != "" && idx.dsn != ":memory:" { + for _, suffix := range []string{"", "-wal", "-shm"} { + _ = os.Remove(idx.dsn + suffix) + } + } + s, err := store.New(idx.dsn, idx.emb.Dimensions()) + if err != nil { + return fmt.Errorf("open fresh store: %w", err) + } + idx.store = s + return nil +} + // Close closes the underlying store. func (idx *Indexer) Close() error { return idx.store.Close() @@ -113,17 +152,51 @@ func (idx *Indexer) Index(ctx context.Context, projectDir string, force bool, pr idx.mu.Lock() defer idx.mu.Unlock() + storedHash, err := idx.store.GetMeta("root_hash") + if err != nil && err != sql.ErrNoRows { + return Stats{}, fmt.Errorf("get root_hash: %w", err) + } + // If not forcing, check root hash before doing any work. if !force { - storedHash, err := idx.store.GetMeta("root_hash") - if err != nil && err != sql.ErrNoRows { - return Stats{}, fmt.Errorf("get root_hash: %w", err) - } if storedHash == curTree.RootHash { return Stats{}, nil } } - return idx.indexWithTree(ctx, projectDir, force, curTree, progress) + + stats, indexErr := idx.indexWithTree(ctx, projectDir, storedHash, force, curTree, progress) + if indexErr != nil { + if !store.IsCorruptionErr(indexErr) { + return stats, indexErr + } + if idx.logger != nil { + idx.logger.Error("corrupted database detected during index, rebuilding", + "project", projectDir, "err", indexErr) + } + if rebuildErr := idx.rebuildStore(); rebuildErr != nil { + return Stats{}, fmt.Errorf("rebuild corrupted db: %w", rebuildErr) + } + // Retry with force=true so the fresh DB gets a full index pass. + stats, indexErr = idx.indexWithTree(ctx, projectDir, "", true, curTree, progress) + if indexErr != nil { + return stats, fmt.Errorf("reindex after rebuild: %w", indexErr) + } + stats.OldRootHash = storedHash + stats.NewRootHash = curTree.RootHash + stats.Reason = "rebuilt after corruption" + return stats, nil + } + + if force { + stats.Reason = "force reindex requested" + } else if storedHash == "" || err == sql.ErrNoRows { + stats.Reason = "fresh index (no previous root hash)" + } else { + stats.Reason = "root hash changed" + } + stats.OldRootHash = storedHash + stats.NewRootHash = curTree.RootHash + return stats, nil } // EnsureFresh checks if the index is stale and re-indexes if needed. @@ -146,17 +219,44 @@ func (idx *Indexer) EnsureFresh(ctx context.Context, projectDir string, progress return false, Stats{}, nil } - stats, err := idx.indexWithTree(ctx, projectDir, false, curTree, progress) + var reason string + switch { + case storedHash == "" || err == sql.ErrNoRows: + reason = "fresh index (no previous root hash)" + default: + reason = "root hash changed" + } + + stats, err := idx.indexWithTree(ctx, projectDir, storedHash, false, curTree, progress) if err != nil { - return false, stats, err + if !store.IsCorruptionErr(err) { + return false, stats, err + } + if idx.logger != nil { + idx.logger.Error("corrupted database detected during reindex, rebuilding", + "project", projectDir, "err", err) + } + if rebuildErr := idx.rebuildStore(); rebuildErr != nil { + return false, Stats{}, fmt.Errorf("rebuild corrupted db: %w", rebuildErr) + } + // Retry with empty storedHash so the fresh DB gets a full index pass. + stats, err = idx.indexWithTree(ctx, projectDir, "", false, curTree, progress) + if err != nil { + return false, stats, fmt.Errorf("reindex after rebuild: %w", err) + } + reason = "rebuilt after corruption" + storedHash = "" } + stats.Reason = reason + stats.OldRootHash = storedHash + stats.NewRootHash = curTree.RootHash return true, stats, nil } // indexWithTree is the internal implementation of Index that accepts a pre-built // merkle tree, so callers that already have one (e.g. EnsureFresh) do not need // to build it again. -func (idx *Indexer) indexWithTree(ctx context.Context, projectDir string, force bool, curTree *merkle.Tree, progress ProgressFunc) (Stats, error) { +func (idx *Indexer) indexWithTree(ctx context.Context, projectDir, oldRootHash string, force bool, curTree *merkle.Tree, progress ProgressFunc) (Stats, error) { var stats Stats stats.TotalFiles = len(curTree.Files) @@ -192,16 +292,40 @@ func (idx *Indexer) indexWithTree(ctx context.Context, projectDir string, force filesToRemove = append(filesToRemove, path) } } + stats.FilesAdded = len(filesToIndex) + stats.FilesRemoved = len(filesToRemove) } else { oldTree := &merkle.Tree{Files: oldHashes} added, removed, modified := merkle.Diff(oldTree, curTree) filesToIndex = append(filesToIndex, added...) filesToIndex = append(filesToIndex, modified...) filesToRemove = removed + stats.FilesAdded = len(added) + stats.FilesModified = len(modified) + stats.FilesRemoved = len(removed) } stats.FilesChanged = len(filesToIndex) + len(filesToRemove) + if idx.logger != nil { + logArgs := []any{ + "project", projectDir, + "total_files", stats.TotalFiles, + "files_unchanged", stats.TotalFiles - stats.FilesChanged, + "files_to_add", stats.FilesAdded, + "files_to_modify", stats.FilesModified, + "files_to_remove", stats.FilesRemoved, + "old_root_hash", oldRootHash, + "new_root_hash", curTree.RootHash, + } + if git.IsWorktree(projectDir) { + if worktrees, err := git.ListWorktrees(projectDir); err == nil && len(worktrees) > 0 { + logArgs = append(logArgs, "main_worktree", worktrees[0]) + } + } + idx.logger.Info("indexing plan", logArgs...) + } + if progress != nil { progress(0, len(filesToIndex), fmt.Sprintf("Found %d files to index", len(filesToIndex))) } diff --git a/internal/store/store.go b/internal/store/store.go index 2893d48..35009b1 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -18,6 +18,7 @@ package store import ( "database/sql" "fmt" + "os" "strings" sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" @@ -30,6 +31,26 @@ func init() { sqlite_vec.Auto() } +// IsCorruptionErr reports whether err indicates SQLite database corruption. +// These are the canonical SQLite error messages for an unrecoverable on-disk +// data problem; the only safe recovery is to delete the database and rebuild. +func IsCorruptionErr(err error) bool { + if err == nil { + return false + } + msg := err.Error() + return strings.Contains(msg, "database disk image is malformed") || + strings.Contains(msg, "disk I/O error") +} + +// deleteDBFiles removes the SQLite database file and its WAL/SHM sidecars. +// Errors are silently ignored — the file may already be gone or unwritable. +func deleteDBFiles(path string) { + for _, suffix := range []string{"", "-wal", "-shm"} { + _ = os.Remove(path + suffix) + } +} + // SearchResult represents a single result from a vector search. type SearchResult struct { FilePath string @@ -56,7 +77,21 @@ type Store struct { // New opens (or creates) a SQLite database at dsn, enables WAL mode and // foreign keys, and creates the schema tables if they do not exist. // dimensions specifies the size of the embedding vectors. +// +// If the database file is corrupted (SQLite returns a corruption error during +// open or schema setup), New deletes the file and its WAL/SHM sidecars and +// retries once from a clean state. In-memory databases (dsn == ":memory:") +// are never deleted. func New(dsn string, dimensions int) (*Store, error) { + s, err := openStore(dsn, dimensions) + if err != nil && IsCorruptionErr(err) && dsn != ":memory:" { + deleteDBFiles(dsn) + s, err = openStore(dsn, dimensions) + } + return s, err +} + +func openStore(dsn string, dimensions int) (*Store, error) { db, err := sql.Open("sqlite3", dsn) if err != nil { return nil, fmt.Errorf("open db: %w", err) @@ -70,7 +105,7 @@ func New(dsn string, dimensions int) (*Store, error) { "PRAGMA synchronous=NORMAL", "PRAGMA cache_size=-64000", "PRAGMA temp_store=MEMORY", - "PRAGMA busy_timeout=30000", + "PRAGMA busy_timeout=120000", } for _, p := range pragmas { if _, err := db.Exec(p); err != nil { @@ -319,7 +354,7 @@ func (s *Store) insertChunksInTransaction(chunks []chunker.Chunk, vectors [][]fl defer func() { _ = chunkStmt.Close() }() vecStmt, err := tx.Prepare( - `INSERT INTO vec_chunks (id, embedding) VALUES (?, ?)`, + `INSERT OR REPLACE INTO vec_chunks (id, embedding) VALUES (?, ?)`, ) if err != nil { return fmt.Errorf("prepare vec insert: %w", err) diff --git a/internal/tui/progress.go b/internal/tui/progress.go index 22e2f93..d9020b0 100644 --- a/internal/tui/progress.go +++ b/internal/tui/progress.go @@ -16,8 +16,10 @@ package tui import ( + "fmt" "io" "os" + "time" "github.com/pterm/pterm" "golang.org/x/term" @@ -28,11 +30,12 @@ import ( // configured writer (typically os.Stderr to avoid interfering with // MCP stdio on stdout). type Progress struct { - writer io.Writer - bar *pterm.ProgressbarPrinter - info pterm.PrefixPrinter - success pterm.PrefixPrinter - errpr pterm.PrefixPrinter + writer io.Writer + bar *pterm.ProgressbarPrinter + info pterm.PrefixPrinter + success pterm.PrefixPrinter + errpr pterm.PrefixPrinter + isTerminal bool } // NewProgress creates a new Progress that writes to w. @@ -41,17 +44,19 @@ type Progress struct { // via PTerm's global output writer. func NewProgress(w io.Writer) *Progress { f, isFile := w.(*os.File) - if !isFile || !term.IsTerminal(int(f.Fd())) { + isTerm := isFile && term.IsTerminal(int(f.Fd())) + if !isTerm { pterm.DisableStyling() } // Redirect PTerm's global output (used for cursor control etc.) to w // so nothing escapes to the default os.Stdout. pterm.SetDefaultOutput(w) return &Progress{ - writer: w, - info: *pterm.Info.WithWriter(w), - success: *pterm.Success.WithWriter(w), - errpr: *pterm.Error.WithWriter(w), + writer: w, + info: *pterm.Info.WithWriter(w), + success: *pterm.Success.WithWriter(w), + errpr: *pterm.Error.WithWriter(w), + isTerminal: isTerm, } } @@ -93,13 +98,28 @@ func (p *Progress) Stop() { // AsProgressFunc returns a callback compatible with index.ProgressFunc. // Calls with total=0 print an info line; the progress bar is started on // the first call with total>0 and stopped when current reaches total. +// When writing to a non-terminal (e.g. a log file), progress bar is skipped +// entirely and a plain-text status line is emitted at most every 5 seconds. func (p *Progress) AsProgressFunc() func(current, total int, message string) { + const logInterval = 5 * time.Second started := false + var lastLog time.Time return func(current, total int, message string) { if total == 0 { p.Info(message) return } + if !p.isTerminal { + // Non-terminal: emit a plain status line every logInterval. + now := time.Now() + if current < total && now.Sub(lastLog) < logInterval { + return + } + lastLog = now + pct := current * 100 / total + _, _ = fmt.Fprintf(p.writer, "Indexing: %d/%d (%d%%)\n", current, total, pct) + return + } if !started { p.Start("Indexing", total) started = true diff --git a/skills/doctor/SKILL.md b/skills/doctor/SKILL.md index a452d32..59fb9a4 100644 --- a/skills/doctor/SKILL.md +++ b/skills/doctor/SKILL.md @@ -13,6 +13,6 @@ Run a health check on the Lumen semantic search setup for the current project. time - If MCP or plugin issues found (not index issues), suggest remediation (e.g. "reinstall the lumen plugin") - - If the index is stale or does not exist, use the `semantic_search` MCP tool - call with `force_reindex: true` to trigger a re-index and report the new - status after completion. + - If the index is stale or does not exist, inform the user that reindexing + is triggered automatically by the SessionStart hook — ask them to open a + new terminal session in the project directory to kick it off. diff --git a/skills/reindex/SKILL.md b/skills/reindex/SKILL.md index c627da3..74bf8dd 100644 --- a/skills/reindex/SKILL.md +++ b/skills/reindex/SKILL.md @@ -1,12 +1,5 @@ # Lumen Reindex -Force a full re-index of the current project's codebase. - -## Steps - -1. Call mcp**lumen**semantic_search with: - - path: the current working directory - - query: "index status" (a simple query to trigger the search) - - force_reindex: true - - summary: true -2. Report how many files were indexed +Reindexing is handled automatically by the SessionStart hook. To trigger a +fresh index, ask the user to open a new terminal session in the project +directory. The hook will detect stale state and reindex in the background.