Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
508a4e8
docs: add spec for non-blocking semantic_search with partial results
aeneasr Mar 23, 2026
927115f
docs: address spec review feedback for non-blocking search
aeneasr Mar 23, 2026
01a8f63
docs: add implementation plan for non-blocking semantic_search
aeneasr Mar 23, 2026
d6b5483
refactor(cmd): add StaleWarning field and WaitGroup to indexerCache
aeneasr Mar 23, 2026
56c3280
feat(cmd): Close() waits for background reindex goroutines
aeneasr Mar 23, 2026
ceeee30
feat(cmd): non-blocking ensureIndexed with 15s timeout and background…
aeneasr Mar 23, 2026
a2e6bd2
style(cmd): use WaitGroup.Go instead of manual Add/Done pattern
aeneasr Mar 23, 2026
7904ce6
docs(cmd): note worst-case wait time on Close()
aeneasr Mar 23, 2026
7822ecc
feat(cmd): render StaleWarning in semantic_search output
aeneasr Mar 23, 2026
09a6cdb
test(cmd): verify ensureIndexed skips reindex when flock is held
aeneasr Mar 23, 2026
97e02f6
test(cmd): add timeout and fast-path tests for non-blocking ensureInd…
aeneasr Mar 23, 2026
d03cb18
style(cmd): fix errcheck lint in new test functions
aeneasr Mar 23, 2026
51b843d
fix(store): increase busy_timeout to 120s and use INSERT OR REPLACE f…
aeneasr Mar 23, 2026
c958bcc
fix(cmd): eliminate reindex fragmentation causing constant cpu usage
aeneasr Mar 23, 2026
a0fce08
feat(index): add slog to background indexer and enrich Stats with cha…
aeneasr Mar 23, 2026
eab93ad
refactor(cmd): remove force_reindex — reindexing now session-start only
aeneasr Mar 24, 2026
7888d46
feat(index): add root hashes and worktree info to indexing plan log
aeneasr Mar 24, 2026
e4b6e6a
fix(store,index): auto-recover from SQLite database corruption
aeneasr Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ system handles MCP registration, hooks, and skills declaratively via:
└── testdata/ # Fixtures for E2E tests
```

## Output & Logging

Lumen has two execution contexts with distinct output strategies:

**Interactive** (`lumen index`, `lumen purge`, `lumen search`):
- Progress and status → `tui.Progress` (pterm) on **stderr**
- Completion summaries → `fmt.Printf` on **stdout**
- Errors → `fmt.Fprintf(os.Stderr, ...)`

**Background / MCP** (`lumen stdio` MCP server, background indexer spawned by
SessionStart hook):
- All output → `slog` (JSON) → `~/.local/share/lumen/debug.log`
- Use `newDebugLogger()` from `cmd/log.go` — opens the log file; falls back to
stderr only if the file cannot be created
- stderr of the background indexer process is set to `nil` (discarded) so that
pterm output never pollutes the log file

**Rule**: never mix these. Interactive commands use tui/fmt; background/MCP code
uses slog. If a command can run in both modes (e.g. `lumen index`), add slog for
the background path and keep tui/fmt for the interactive path — they coexist
because slog writes to the log file while tui writes to the process stderr.

## Key Design Decisions

- **Merkle tree for diffs**: Avoid re-indexing unchanged code
Expand Down
19 changes: 15 additions & 4 deletions cmd/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ import (
"os"
"path/filepath"
"strings"
"time"

"github.com/spf13/cobra"

"github.com/ory/lumen/internal/config"
"github.com/ory/lumen/internal/store"
)

// backgroundIndexStaleness is how old last_indexed_at must be before
// SessionStart spawns a background indexer. This prevents every new terminal
// from triggering a full merkle walk when the index was just updated.
const backgroundIndexStaleness = 5 * time.Minute

// NOTE: Hooks are now declared in hooks/hooks.json (plugin system).
// The hook subcommands remain as the execution targets for those declarations.

Expand Down Expand Up @@ -79,10 +85,6 @@ func runHookSessionStart(_ *cobra.Command, args []string) error {
cwd, _ = os.Getwd()
}

// Kick off a background incremental re-index so the index is fresh
// by the time the first semantic_search arrives.
spawnBackgroundIndexer(cwd)

content := generateSessionContext(mcpName, cwd)

out := hookOutput{
Expand Down Expand Up @@ -135,6 +137,15 @@ func generateSessionContextInternal(mcpName, cwd string, findDonor func(string,
}
defer func() { _ = s.Close() }()

// Spawn background indexer if the index is stale or has never been
// successfully completed. This avoids spawning on every session start
// when the index was recently updated.
if val, metaErr := s.GetMeta("last_indexed_at"); metaErr != nil || val == "" {
bgIndexer(cwd)
} else if t, parseErr := time.Parse(time.RFC3339, val); parseErr != nil || time.Since(t) > backgroundIndexStaleness {
bgIndexer(cwd)
}

stats, err := s.Stats()
if err != nil {
return directive
Expand Down
13 changes: 4 additions & 9 deletions cmd/hook_spawn_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ package cmd
import (
"os"
"os/exec"
"path/filepath"
"syscall"

"github.com/ory/lumen/internal/config"
)

// spawnBackgroundIndexer launches "lumen index <projectPath>" as a fully
Expand All @@ -39,13 +36,11 @@ func spawnBackgroundIndexer(projectPath string) {
}
cmd := exec.Command(exe, "index", projectPath)
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
// Discard stdout and stderr — the background indexer uses slog which
// writes structured JSON directly to debug.log. Piping stderr to the log
// file would mix in pterm progress output with the structured log lines.
cmd.Stdout = nil

logPath := filepath.Join(config.XDGDataDir(), "lumen", "debug.log")
if f, err := os.OpenFile(logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644); err == nil {
cmd.Stderr = f
defer func() { _ = f.Close() }()
}
cmd.Stderr = nil

if err := cmd.Start(); err != nil {
return
Expand Down
56 changes: 49 additions & 7 deletions cmd/hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,31 @@ import (
"path/filepath"
"strings"
"testing"
"time"

"github.com/ory/lumen/internal/config"
"github.com/ory/lumen/internal/store"
)

// writeHookTestDB creates a minimal SQLite DB stamped with last_indexed_at,
// using the configured embedding model's dimensions so store.New doesn't reset
// the schema when generateSessionContextInternal opens it.
func writeHookTestDB(t *testing.T, dbPath string, lastIndexedAt time.Time) {
t.Helper()
cfg, err := config.Load()
if err != nil {
t.Fatalf("config.Load: %v", err)
}
s, err := store.New(dbPath, cfg.Dims)
if err != nil {
t.Fatalf("store.New: %v", err)
}
defer func() { _ = s.Close() }()
if err := s.SetMeta("last_indexed_at", lastIndexedAt.UTC().Format(time.RFC3339)); err != nil {
t.Fatalf("SetMeta: %v", err)
}
}

// TestMain detects when the cmd test binary is invoked as a background
// indexer subprocess (via spawnBackgroundIndexer → os.Executable()) and exits
// immediately instead of running the full test suite. Without this guard,
Expand Down Expand Up @@ -208,34 +229,55 @@ func TestGenerateSessionContextInternal_SpawnsWhenNoDB(t *testing.T) {
})
}

func TestGenerateSessionContextInternal_NoSpawnWhenDBExists(t *testing.T) {
func TestGenerateSessionContextInternal_NoSpawnWhenFresh(t *testing.T) {
tmpDir := t.TempDir()
t.Setenv("XDG_DATA_HOME", tmpDir)

// Use the same model the function will load so the DB path matches.
cfg, err := config.Load()
if err != nil {
t.Fatalf("config.Load: %v", err)
}

dbPath := config.DBPathForProject("/myproject", cfg.Model)
if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(dbPath, []byte{}, 0o644); err != nil {
writeHookTestDB(t, dbPath, time.Now().Add(-30*time.Second))

called := false
generateSessionContextInternal("lumen", "/myproject",
func(_, _ string) string { return "" },
func(_ string) { called = true },
)
if called {
t.Fatal("bgIndexer must not be called when index was recently updated")
}
}

func TestGenerateSessionContextInternal_SpawnsWhenStale(t *testing.T) {
tmpDir := t.TempDir()
t.Setenv("XDG_DATA_HOME", tmpDir)

cfg, err := config.Load()
if err != nil {
t.Fatalf("config.Load: %v", err)
}
dbPath := config.DBPathForProject("/myproject", cfg.Model)
if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
t.Fatal(err)
}
writeHookTestDB(t, dbPath, time.Now().Add(-10*time.Minute))

called := false
generateSessionContextInternal("lumen", "/myproject",
func(_, _ string) string { return "/some/donor.db" },
func(_, _ string) string { return "" },
func(_ string) { called = true },
)
if called {
t.Fatal("bgIndexer must not be called when an index already exists")
if !called {
t.Fatal("bgIndexer must be called when index is stale")
}
}


func TestGenerateSessionContextInternal_MessageWithDonor(t *testing.T) {
tmpDir := t.TempDir()
t.Setenv("XDG_DATA_HOME", tmpDir)
Expand Down
50 changes: 47 additions & 3 deletions cmd/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package cmd
import (
"context"
"fmt"
"log/slog"
"os"
"os/signal"
"path/filepath"
Expand Down Expand Up @@ -45,6 +46,11 @@ var indexCmd = &cobra.Command{
}

func runIndex(cmd *cobra.Command, args []string) error {
logger, logFile := newDebugLogger()
if logFile != nil {
defer func() { _ = logFile.Close() }()
}

cfg, err := config.Load()
if err != nil {
return err
Expand Down Expand Up @@ -72,6 +78,7 @@ func runIndex(cmd *cobra.Command, args []string) error {
if lock == nil {
// Another indexer is already running for this project — skip silently.
// This is the normal case when multiple Claude terminals are open.
logger.Info("index skipped: another indexer is already running", "project", projectPath)
fmt.Fprintln(os.Stderr, "Another indexer is already running for this project. Skipping.")
return nil
}
Expand All @@ -82,12 +89,13 @@ func runIndex(cmd *cobra.Command, args []string) error {
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer stop()

idx, err := setupIndexer(&cfg, dbPath)
idx, err := setupIndexer(&cfg, dbPath, logger)
if err != nil {
return err
}
defer func() { _ = idx.Close() }()

logger.Info("indexing started", "project", projectPath, "model", cfg.Model, "dims", cfg.Dims)
p := tui.NewProgress(os.Stderr)
p.Info(fmt.Sprintf("Indexing %s (model: %s, dims: %d)", projectPath, cfg.Model, cfg.Dims))

Expand All @@ -98,13 +106,47 @@ func runIndex(cmd *cobra.Command, args []string) error {
// A signal arrived; treat as clean exit. If an unrelated error
// also occurred in the same instant, it is intentionally dropped —
// the cancellation is the primary cause and the lock will be released.
logger.Info("indexing cancelled by signal", "project", projectPath)
return nil
}
logger.Error("indexing failed", "project", projectPath, "err", err)
return err
}

elapsed := time.Since(start).Round(time.Millisecond)
if stats.Reason == "already fresh" {
logger.Info("index already fresh",
"project", projectPath,
"elapsed", elapsed.String(),
)
} else {
logger.Info("indexing complete",
"project", projectPath,
"reason", stats.Reason,
"total_files", stats.TotalFiles,
"files_unchanged", stats.TotalFiles-stats.FilesChanged,
"files_added", stats.FilesAdded,
"files_modified", stats.FilesModified,
"files_removed", stats.FilesRemoved,
"indexed_files", stats.IndexedFiles,
"chunks_created", stats.ChunksCreated,
"old_root_hash", stats.OldRootHash,
"new_root_hash", stats.NewRootHash,
"elapsed", elapsed.String(),
)
}
if stats.Reason != "" {
fmt.Printf("Reason: %s\n", stats.Reason)
}
if stats.OldRootHash != "" {
fmt.Printf("Root hash: %s -> %s\n", stats.OldRootHash[:16], stats.NewRootHash[:16])
} else if stats.NewRootHash != "" {
fmt.Printf("Root hash: (none) -> %s\n", stats.NewRootHash[:16])
}
fmt.Printf("Files: %d added, %d modified, %d removed (%d total in project)\n",
stats.FilesAdded, stats.FilesModified, stats.FilesRemoved, stats.TotalFiles)
fmt.Printf("Done. Indexed %d files, %d chunks in %s.\n",
stats.IndexedFiles, stats.ChunksCreated, time.Since(start).Round(time.Millisecond))
stats.IndexedFiles, stats.ChunksCreated, elapsed)
return nil
}

Expand All @@ -124,7 +166,7 @@ func applyModelFlag(cmd *cobra.Command, cfg *config.Config) error {
}

// setupIndexer receives dbPath so it is computed exactly once in runIndex.
func setupIndexer(cfg *config.Config, dbPath string) (*index.Indexer, error) {
func setupIndexer(cfg *config.Config, dbPath string, logger *slog.Logger) (*index.Indexer, error) {
emb, err := newEmbedder(*cfg)
if err != nil {
return nil, fmt.Errorf("create embedder: %w", err)
Expand All @@ -134,6 +176,7 @@ func setupIndexer(cfg *config.Config, dbPath string) (*index.Indexer, error) {
if err != nil {
return nil, fmt.Errorf("create indexer: %w", err)
}
idx.SetLogger(logger)
return idx, nil
}

Expand All @@ -152,6 +195,7 @@ func performIndexing(ctx context.Context, cmd *cobra.Command, idx *index.Indexer
}

if !reindexed {
stats.Reason = "already fresh"
fmt.Println("Index is already up to date.")
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func runSearch(cmd *cobra.Command, args []string) error {
tr.record("path resolution", indexRoot)

// Span 2: indexer setup
idx, err := setupIndexer(&cfg, indexRoot)
idx, err := setupIndexer(&cfg, indexRoot, nil)
if err != nil {
return fmt.Errorf("setup indexer: %w", err)
}
Expand Down
Loading
Loading