Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ marketplace.
| ---------------------------- | ----------------------------------- | ------------------------------- |
| SQLite | Vector storage + schema persistence | Uses sqlite-vec for KNN search |
| MCP (Model Context Protocol) | Agent integration | stdio transport |
| Ollama/LM Studio | Embeddings generation | Local models, configurable |
| Ollama/LM Studio/OpenAI API | Embeddings generation | Local + cloud, configurable |
| Go AST | Code parsing into semantic chunks | Functions, types, methods, etc. |
| Cobra | CLI framework | Subcommands: index, stdio |

Expand Down Expand Up @@ -106,16 +106,19 @@ system handles MCP registration, hooks, and skills declaratively via:

## Environment Variables

| Variable | Default | Description |
| ------------------------ | ------------------------ | ------------------------------------------ |
| `LUMEN_BACKEND` | `ollama` | Embedding backend (`ollama` or `lmstudio`) |
| `LUMEN_EMBED_MODEL` | see note ¹ | Embedding model (must be in registry) |
| `OLLAMA_HOST` | `http://localhost:11434` | Ollama server URL |
| `LM_STUDIO_HOST` | `http://localhost:1234` | LM Studio server URL |
| `LUMEN_MAX_CHUNK_TOKENS` | `512` | Max tokens per chunk before splitting |
| Variable | Default | Description |
| ------------------------ | -------------------------- | ------------------------------------------------------- |
| `LUMEN_BACKEND` | `ollama` | Embedding backend (`ollama`, `lmstudio`, or `openai`) |
| `LUMEN_EMBED_MODEL` | see note ¹ | Embedding model (must be in registry) |
| `OLLAMA_HOST` | `http://localhost:11434` | Ollama server URL |
| `LM_STUDIO_HOST` | `http://localhost:1234` | LM Studio server URL |
| `OPENAI_API_KEY` | (none) | API key for `openai` backend (required) |
| `OPENAI_BASE_URL` | `https://api.openai.com` | Base URL for `openai` backend (Voyage AI, Azure, etc.) |
| `LUMEN_MAX_CHUNK_TOKENS` | `512` | Max tokens per chunk before splitting |

¹ `ordis/jina-embeddings-v2-base-code` (Ollama),
`nomic-ai/nomic-embed-code-GGUF` (LM Studio)
`nomic-ai/nomic-embed-code-GGUF` (LM Studio),
`text-embedding-3-small` (OpenAI)

## Project Structure

Expand Down
2 changes: 2 additions & 0 deletions cmd/embedder.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ func newEmbedder(cfg config.Config) (embedder.Embedder, error) {
return embedder.NewOllama(cfg.Model, cfg.Dims, cfg.CtxLength, cfg.OllamaHost)
case config.BackendLMStudio:
return embedder.NewLMStudio(cfg.Model, cfg.Dims, cfg.LMStudioHost)
case config.BackendOpenAI:
return embedder.NewOpenAI(cfg.Model, cfg.Dims, cfg.OpenAIBaseURL, cfg.OpenAIAPIKey)
default:
return nil, fmt.Errorf("unknown backend %q", cfg.Backend)
}
Expand Down
23 changes: 20 additions & 3 deletions cmd/stdio.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,11 +481,20 @@ func (ic *indexerCache) handleIndexStatus(_ context.Context, _ *mcp.CallToolRequ

// handleHealthCheck pings the configured embedding service and reports status.
func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRequest, _ HealthCheckInput) (*mcp.CallToolResult, any, error) {
host := ic.cfg.OllamaHost
probeURL := host + "/api/tags"
if ic.cfg.Backend == config.BackendLMStudio {
var host, probeURL string
switch ic.cfg.Backend {
case config.BackendOllama:
host = ic.cfg.OllamaHost
probeURL = host + "/api/tags"
case config.BackendLMStudio:
host = ic.cfg.LMStudioHost
probeURL = host + "/v1/models"
case config.BackendOpenAI:
host = ic.cfg.OpenAIBaseURL
probeURL = host + "/v1/models"
default:
return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
fmt.Sprintf("unknown backend %q", ic.cfg.Backend)), nil, nil
}

probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
Expand All @@ -497,13 +506,21 @@ func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRe
fmt.Sprintf("failed to create request: %v", err)), nil, nil
}

if ic.cfg.Backend == config.BackendOpenAI && ic.cfg.OpenAIAPIKey != "" {
req.Header.Set("Authorization", "Bearer "+ic.cfg.OpenAIAPIKey)
}

resp, err := http.DefaultClient.Do(req)
if err != nil {
return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
fmt.Sprintf("service unreachable: %v", err)), nil, nil
}
_ = resp.Body.Close()

if resp.StatusCode == http.StatusUnauthorized {
return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
"service reachable but API key is invalid"), nil, nil
}
if resp.StatusCode >= 500 {
return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
fmt.Sprintf("service returned HTTP %d", resp.StatusCode)), nil, nil
Expand Down
20 changes: 17 additions & 3 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const (
BackendOllama = "ollama"
// BackendLMStudio is the backend identifier for LM Studio.
BackendLMStudio = "lmstudio"
// BackendOpenAI is the backend identifier for OpenAI-compatible APIs.
BackendOpenAI = "openai"
)

// Config holds the resolved configuration for the lumen process.
Expand All @@ -41,25 +43,35 @@ type Config struct {
OllamaHost string
Backend string
LMStudioHost string
OpenAIBaseURL string
OpenAIAPIKey string
}

// Load reads configuration from environment variables and the model registry.
func Load() (Config, error) {
backend := EnvOrDefault("LUMEN_BACKEND", BackendOllama)
if backend != BackendOllama && backend != BackendLMStudio {
return Config{}, fmt.Errorf("unknown backend %q: must be %q or %q", backend, BackendOllama, BackendLMStudio)
if backend != BackendOllama && backend != BackendLMStudio && backend != BackendOpenAI {
return Config{}, fmt.Errorf("unknown backend %q: must be %q, %q, or %q", backend, BackendOllama, BackendLMStudio, BackendOpenAI)
}

defaultModel := embedder.DefaultOllamaModel
if backend == BackendLMStudio {
switch backend {
case BackendLMStudio:
defaultModel = embedder.DefaultLMStudioModel
case BackendOpenAI:
defaultModel = embedder.DefaultOpenAIModel
}

model := EnvOrDefault("LUMEN_EMBED_MODEL", defaultModel)
spec, ok := embedder.KnownModels[model]
if !ok {
return Config{}, fmt.Errorf("unknown embedding model %q", model)
}
openAIAPIKey := os.Getenv("OPENAI_API_KEY")
if backend == BackendOpenAI && openAIAPIKey == "" {
return Config{}, fmt.Errorf("OPENAI_API_KEY environment variable is required for the %q backend", BackendOpenAI)
}

return Config{
Model: model,
Dims: spec.Dims,
Expand All @@ -68,6 +80,8 @@ func Load() (Config, error) {
OllamaHost: EnvOrDefault("OLLAMA_HOST", "http://localhost:11434"),
Backend: backend,
LMStudioHost: EnvOrDefault("LM_STUDIO_HOST", "http://localhost:1234"),
OpenAIBaseURL: EnvOrDefault("OPENAI_BASE_URL", "https://api.openai.com"),
OpenAIAPIKey: openAIAPIKey,
}, nil
}

Expand Down
8 changes: 8 additions & 0 deletions internal/embedder/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const DefaultOllamaModel = "ordis/jina-embeddings-v2-base-code"
// DefaultLMStudioModel is the default model when using the LM Studio backend.
const DefaultLMStudioModel = "nomic-ai/nomic-embed-code-GGUF"

// DefaultOpenAIModel is the default model when using the OpenAI-compatible backend.
const DefaultOpenAIModel = "text-embedding-3-small"

// DefaultModel is an alias for DefaultOllamaModel for backward compatibility.
const DefaultModel = DefaultOllamaModel

Expand Down Expand Up @@ -66,4 +69,9 @@ var KnownModels = map[string]ModelSpec{
"qwen3-embedding:4b": {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
"qwen3-embedding:0.6b": {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
"all-minilm": {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
"text-embedding-3-small": {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
"text-embedding-3-large": {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
"text-embedding-ada-002": {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
"voyage-code-3": {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
"voyage-3-large": {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
}
11 changes: 11 additions & 0 deletions internal/embedder/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ func TestKnownModels(t *testing.T) {
"qwen3-embedding:4b": {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
"qwen3-embedding:0.6b": {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
"all-minilm": {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
"text-embedding-3-small": {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
"text-embedding-3-large": {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
"text-embedding-ada-002": {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
"voyage-code-3": {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
"voyage-3-large": {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
}

for name, want := range expected {
Expand Down Expand Up @@ -61,6 +66,12 @@ func TestDefaultLMStudioModelInRegistry(t *testing.T) {
}
}

func TestDefaultOpenAIModelInRegistry(t *testing.T) {
if _, ok := KnownModels[DefaultOpenAIModel]; !ok {
t.Errorf("DefaultOpenAIModel %q is not in KnownModels", DefaultOpenAIModel)
}
}

func TestDimensionAwareMinScore(t *testing.T) {
tests := []struct {
dims int
Expand Down
166 changes: 166 additions & 0 deletions internal/embedder/openai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2026 Aeneas Rekkas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package embedder

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"slices"
"time"

"github.com/sethvargo/go-retry"
)

// OpenAI implements the Embedder interface using an OpenAI-compatible
// /v1/embeddings endpoint. It works with OpenAI, Voyage AI, Azure OpenAI,
// Together AI, and any other service exposing the same wire format.
type OpenAI struct {
model string
dimensions int
baseURL string
apiKey string
client *http.Client
}

// NewOpenAI creates a new OpenAI-compatible embedder.
// baseURL is the API base URL (e.g. "https://api.openai.com").
// apiKey is the Bearer token for authentication.
func NewOpenAI(model string, dimensions int, baseURL string, apiKey string) (*OpenAI, error) {
if apiKey == "" {
return nil, fmt.Errorf("API key is required for OpenAI-compatible backend")
}
return &OpenAI{
model: model,
dimensions: dimensions,
baseURL: baseURL,
apiKey: apiKey,
client: &http.Client{
Timeout: 10 * time.Minute,
},
}, nil
}

// Dimensions returns the embedding vector dimensionality.
func (o *OpenAI) Dimensions() int {
return o.dimensions
}

// ModelName returns the model name used for embeddings.
func (o *OpenAI) ModelName() string {
return o.model
}

// openaiEmbedRequest is the JSON body sent to /v1/embeddings.
type openaiEmbedRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
}

// openaiEmbedItem is a single embedding item in the response.
type openaiEmbedItem struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
}

// openaiEmbedResponse is the JSON body returned from /v1/embeddings.
type openaiEmbedResponse struct {
Data []openaiEmbedItem `json:"data"`
}

// Embed converts texts into embedding vectors, splitting into batches of 32.
func (o *OpenAI) Embed(ctx context.Context, texts []string) ([][]float32, error) {
if len(texts) == 0 {
return nil, nil
}

var allVecs [][]float32
for i := 0; i < len(texts); i += embedBatchSize {
batch := texts[i:min(i+embedBatchSize, len(texts))]

vecs, err := o.embedBatch(ctx, batch)
if err != nil {
return nil, fmt.Errorf("embedding batch starting at %d: %w", i, err)
}
allVecs = append(allVecs, vecs...)
}

return allVecs, nil
}

// embedBatch sends a single batch of texts to the /v1/embeddings endpoint.
// Retries up to embedMaxRetries times on transient errors (5xx, 429 rate limits,
// network failures), respecting context cancellation between attempts.
func (o *OpenAI) embedBatch(ctx context.Context, texts []string) ([][]float32, error) {
bodyBytes, err := json.Marshal(openaiEmbedRequest{
Model: o.model,
Input: texts,
})
if err != nil {
return nil, fmt.Errorf("marshalling request: %w", err)
}

b := retry.NewExponential(100 * time.Millisecond)

var embedResp openaiEmbedResponse
err = retry.Do(ctx, retry.WithMaxRetries(embedMaxRetries-1, b), func(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, o.baseURL+"/v1/embeddings", bytes.NewReader(bodyBytes))
if err != nil {
return fmt.Errorf("creating request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+o.apiKey)

resp, err := o.client.Do(req)
if err != nil {
return retry.RetryableError(fmt.Errorf("request failed: %w", err))
}

body, readErr := io.ReadAll(resp.Body)
_ = resp.Body.Close()

if resp.StatusCode == http.StatusTooManyRequests {
return retry.RetryableError(fmt.Errorf("rate limited: status %d", resp.StatusCode))
}
if resp.StatusCode >= 500 {
return retry.RetryableError(fmt.Errorf("server error: status %d", resp.StatusCode))
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
if readErr != nil {
return fmt.Errorf("reading response body: %w", readErr)
}

return json.Unmarshal(body, &embedResp)
})
if err != nil {
return nil, fmt.Errorf("openai embed: %w", err)
}

// Sort by index — OpenAI spec allows out-of-order responses.
slices.SortFunc(embedResp.Data, func(a, b openaiEmbedItem) int {
return a.Index - b.Index
})

vecs := make([][]float32, len(embedResp.Data))
for i, item := range embedResp.Data {
vecs[i] = item.Embedding
}
return vecs, nil
}
Loading
Loading