ory · aeneasr · Mar 15, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -69,7 +69,7 @@ marketplace.
 | ---------------------------- | ----------------------------------- | ------------------------------- |
 | SQLite                       | Vector storage + schema persistence | Uses sqlite-vec for KNN search  |
 | MCP (Model Context Protocol) | Agent integration                   | stdio transport                 |
-| Ollama/LM Studio             | Embeddings generation               | Local models, configurable      |
+| Ollama/LM Studio/OpenAI API  | Embeddings generation               | Local + cloud, configurable     |
 | Go AST                       | Code parsing into semantic chunks   | Functions, types, methods, etc. |
 | Cobra                        | CLI framework                       | Subcommands: index, stdio       |
 
@@ -106,16 +106,19 @@ system handles MCP registration, hooks, and skills declaratively via:
 
 ## Environment Variables
 
-| Variable                 | Default                  | Description                                |
-| ------------------------ | ------------------------ | ------------------------------------------ |
-| `LUMEN_BACKEND`          | `ollama`                 | Embedding backend (`ollama` or `lmstudio`) |
-| `LUMEN_EMBED_MODEL`      | see note ¹               | Embedding model (must be in registry)      |
-| `OLLAMA_HOST`            | `http://localhost:11434` | Ollama server URL                          |
-| `LM_STUDIO_HOST`         | `http://localhost:1234`  | LM Studio server URL                       |
-| `LUMEN_MAX_CHUNK_TOKENS` | `512`                    | Max tokens per chunk before splitting      |
+| Variable                 | Default                    | Description                                             |
+| ------------------------ | -------------------------- | ------------------------------------------------------- |
+| `LUMEN_BACKEND`          | `ollama`                   | Embedding backend (`ollama`, `lmstudio`, or `openai`)   |
+| `LUMEN_EMBED_MODEL`      | see note ¹                 | Embedding model (must be in registry)                   |
+| `OLLAMA_HOST`            | `http://localhost:11434`   | Ollama server URL                                       |
+| `LM_STUDIO_HOST`         | `http://localhost:1234`    | LM Studio server URL                                    |
+| `OPENAI_API_KEY`         | (none)                     | API key for `openai` backend (required)                 |
+| `OPENAI_BASE_URL`        | `https://api.openai.com`   | Base URL for `openai` backend (Voyage AI, Azure, etc.)  |
+| `LUMEN_MAX_CHUNK_TOKENS` | `512`                      | Max tokens per chunk before splitting                   |
 
 ¹ `ordis/jina-embeddings-v2-base-code` (Ollama),
-`nomic-ai/nomic-embed-code-GGUF` (LM Studio)
+`nomic-ai/nomic-embed-code-GGUF` (LM Studio),
+`text-embedding-3-small` (OpenAI)
 
 ## Project Structure
 

diff --git a/cmd/embedder.go b/cmd/embedder.go
@@ -28,6 +28,8 @@ func newEmbedder(cfg config.Config) (embedder.Embedder, error) {
 		return embedder.NewOllama(cfg.Model, cfg.Dims, cfg.CtxLength, cfg.OllamaHost)
 	case config.BackendLMStudio:
 		return embedder.NewLMStudio(cfg.Model, cfg.Dims, cfg.LMStudioHost)
+	case config.BackendOpenAI:
+		return embedder.NewOpenAI(cfg.Model, cfg.Dims, cfg.OpenAIBaseURL, cfg.OpenAIAPIKey)
 	default:
 		return nil, fmt.Errorf("unknown backend %q", cfg.Backend)
 	}

diff --git a/cmd/stdio.go b/cmd/stdio.go
@@ -481,11 +481,20 @@ func (ic *indexerCache) handleIndexStatus(_ context.Context, _ *mcp.CallToolRequ
 
 // handleHealthCheck pings the configured embedding service and reports status.
 func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRequest, _ HealthCheckInput) (*mcp.CallToolResult, any, error) {
-	host := ic.cfg.OllamaHost
-	probeURL := host + "/api/tags"
-	if ic.cfg.Backend == config.BackendLMStudio {
+	var host, probeURL string
+	switch ic.cfg.Backend {
+	case config.BackendOllama:
+		host = ic.cfg.OllamaHost
+		probeURL = host + "/api/tags"
+	case config.BackendLMStudio:
 		host = ic.cfg.LMStudioHost
 		probeURL = host + "/v1/models"
+	case config.BackendOpenAI:
+		host = ic.cfg.OpenAIBaseURL
+		probeURL = host + "/v1/models"
+	default:
+		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
+			fmt.Sprintf("unknown backend %q", ic.cfg.Backend)), nil, nil
 	}
 
 	probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
@@ -497,13 +506,21 @@ func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRe
 			fmt.Sprintf("failed to create request: %v", err)), nil, nil
 	}
 
+	if ic.cfg.Backend == config.BackendOpenAI && ic.cfg.OpenAIAPIKey != "" {
+		req.Header.Set("Authorization", "Bearer "+ic.cfg.OpenAIAPIKey)
+	}
+
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
 			fmt.Sprintf("service unreachable: %v", err)), nil, nil
 	}
 	_ = resp.Body.Close()
 
+	if resp.StatusCode == http.StatusUnauthorized {
+		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
+			"service reachable but API key is invalid"), nil, nil
+	}
 	if resp.StatusCode >= 500 {
 		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
 			fmt.Sprintf("service returned HTTP %d", resp.StatusCode)), nil, nil

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -30,6 +30,8 @@ const (
 	BackendOllama = "ollama"
 	// BackendLMStudio is the backend identifier for LM Studio.
 	BackendLMStudio = "lmstudio"
+	// BackendOpenAI is the backend identifier for OpenAI-compatible APIs.
+	BackendOpenAI = "openai"
 )
 
 // Config holds the resolved configuration for the lumen process.
@@ -41,25 +43,35 @@ type Config struct {
 	OllamaHost     string
 	Backend        string
 	LMStudioHost   string
+	OpenAIBaseURL  string
+	OpenAIAPIKey   string
 }
 
 // Load reads configuration from environment variables and the model registry.
 func Load() (Config, error) {
 	backend := EnvOrDefault("LUMEN_BACKEND", BackendOllama)
-	if backend != BackendOllama && backend != BackendLMStudio {
-		return Config{}, fmt.Errorf("unknown backend %q: must be %q or %q", backend, BackendOllama, BackendLMStudio)
+	if backend != BackendOllama && backend != BackendLMStudio && backend != BackendOpenAI {
+		return Config{}, fmt.Errorf("unknown backend %q: must be %q, %q, or %q", backend, BackendOllama, BackendLMStudio, BackendOpenAI)
 	}
 
 	defaultModel := embedder.DefaultOllamaModel
-	if backend == BackendLMStudio {
+	switch backend {
+	case BackendLMStudio:
 		defaultModel = embedder.DefaultLMStudioModel
+	case BackendOpenAI:
+		defaultModel = embedder.DefaultOpenAIModel
 	}
 
 	model := EnvOrDefault("LUMEN_EMBED_MODEL", defaultModel)
 	spec, ok := embedder.KnownModels[model]
 	if !ok {
 		return Config{}, fmt.Errorf("unknown embedding model %q", model)
 	}
+	openAIAPIKey := os.Getenv("OPENAI_API_KEY")
+	if backend == BackendOpenAI && openAIAPIKey == "" {
+		return Config{}, fmt.Errorf("OPENAI_API_KEY environment variable is required for the %q backend", BackendOpenAI)
+	}
+
 	return Config{
 		Model:          model,
 		Dims:           spec.Dims,
@@ -68,6 +80,8 @@ func Load() (Config, error) {
 		OllamaHost:     EnvOrDefault("OLLAMA_HOST", "http://localhost:11434"),
 		Backend:        backend,
 		LMStudioHost:   EnvOrDefault("LM_STUDIO_HOST", "http://localhost:1234"),
+		OpenAIBaseURL:  EnvOrDefault("OPENAI_BASE_URL", "https://api.openai.com"),
+		OpenAIAPIKey:   openAIAPIKey,
 	}, nil
 }
 

diff --git a/internal/embedder/models.go b/internal/embedder/models.go
@@ -28,6 +28,9 @@ const DefaultOllamaModel = "ordis/jina-embeddings-v2-base-code"
 // DefaultLMStudioModel is the default model when using the LM Studio backend.
 const DefaultLMStudioModel = "nomic-ai/nomic-embed-code-GGUF"
 
+// DefaultOpenAIModel is the default model when using the OpenAI-compatible backend.
+const DefaultOpenAIModel = "text-embedding-3-small"
+
 // DefaultModel is an alias for DefaultOllamaModel for backward compatibility.
 const DefaultModel = DefaultOllamaModel
 
@@ -66,4 +69,9 @@ var KnownModels = map[string]ModelSpec{
 	"qwen3-embedding:4b":                 {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
 	"qwen3-embedding:0.6b":               {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
 	"all-minilm":                         {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
+	"text-embedding-3-small":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+	"text-embedding-3-large":             {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
+	"text-embedding-ada-002":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+	"voyage-code-3":                      {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
+	"voyage-3-large":                     {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
 }
diff --git a/internal/embedder/models_test.go b/internal/embedder/models_test.go
@@ -25,6 +25,11 @@ func TestKnownModels(t *testing.T) {
 		"qwen3-embedding:4b":                 {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
 		"qwen3-embedding:0.6b":               {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
 		"all-minilm":                         {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
+		"text-embedding-3-small":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+		"text-embedding-3-large":             {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
+		"text-embedding-ada-002":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+		"voyage-code-3":                      {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
+		"voyage-3-large":                     {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
 	}
 
 	for name, want := range expected {
@@ -61,6 +66,12 @@ func TestDefaultLMStudioModelInRegistry(t *testing.T) {
 	}
 }
 
+func TestDefaultOpenAIModelInRegistry(t *testing.T) {
+	if _, ok := KnownModels[DefaultOpenAIModel]; !ok {
+		t.Errorf("DefaultOpenAIModel %q is not in KnownModels", DefaultOpenAIModel)
+	}
+}
+
 func TestDimensionAwareMinScore(t *testing.T) {
 	tests := []struct {
 		dims int

diff --git a/internal/embedder/openai.go b/internal/embedder/openai.go
@@ -0,0 +1,166 @@
+// Copyright 2026 Aeneas Rekkas
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package embedder
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"slices"
+	"time"
+
+	"github.com/sethvargo/go-retry"
+)
+
+// OpenAI implements the Embedder interface using an OpenAI-compatible
+// /v1/embeddings endpoint. It works with OpenAI, Voyage AI, Azure OpenAI,
+// Together AI, and any other service exposing the same wire format.
+type OpenAI struct {
+	model      string
+	dimensions int
+	baseURL    string
+	apiKey     string
+	client     *http.Client
+}
+
+// NewOpenAI creates a new OpenAI-compatible embedder.
+// baseURL is the API base URL (e.g. "https://api.openai.com").
+// apiKey is the Bearer token for authentication.
+func NewOpenAI(model string, dimensions int, baseURL string, apiKey string) (*OpenAI, error) {
+	if apiKey == "" {
+		return nil, fmt.Errorf("API key is required for OpenAI-compatible backend")
+	}
+	return &OpenAI{
+		model:      model,
+		dimensions: dimensions,
+		baseURL:    baseURL,
+		apiKey:     apiKey,
+		client: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}, nil
+}
+
+// Dimensions returns the embedding vector dimensionality.
+func (o *OpenAI) Dimensions() int {
+	return o.dimensions
+}
+
+// ModelName returns the model name used for embeddings.
+func (o *OpenAI) ModelName() string {
+	return o.model
+}
+
+// openaiEmbedRequest is the JSON body sent to /v1/embeddings.
+type openaiEmbedRequest struct {
+	Model string   `json:"model"`
+	Input []string `json:"input"`
+}
+
+// openaiEmbedItem is a single embedding item in the response.
+type openaiEmbedItem struct {
+	Embedding []float32 `json:"embedding"`
+	Index     int       `json:"index"`
+}
+
+// openaiEmbedResponse is the JSON body returned from /v1/embeddings.
+type openaiEmbedResponse struct {
+	Data []openaiEmbedItem `json:"data"`
+}
+
+// Embed converts texts into embedding vectors, splitting into batches of 32.
+func (o *OpenAI) Embed(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+
+	var allVecs [][]float32
+	for i := 0; i < len(texts); i += embedBatchSize {
+		batch := texts[i:min(i+embedBatchSize, len(texts))]
+
+		vecs, err := o.embedBatch(ctx, batch)
+		if err != nil {
+			return nil, fmt.Errorf("embedding batch starting at %d: %w", i, err)
+		}
+		allVecs = append(allVecs, vecs...)
+	}
+
+	return allVecs, nil
+}
+
+// embedBatch sends a single batch of texts to the /v1/embeddings endpoint.
+// Retries up to embedMaxRetries times on transient errors (5xx, 429 rate limits,
+// network failures), respecting context cancellation between attempts.
+func (o *OpenAI) embedBatch(ctx context.Context, texts []string) ([][]float32, error) {
+	bodyBytes, err := json.Marshal(openaiEmbedRequest{
+		Model: o.model,
+		Input: texts,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("marshalling request: %w", err)
+	}
+
+	b := retry.NewExponential(100 * time.Millisecond)
+
+	var embedResp openaiEmbedResponse
+	err = retry.Do(ctx, retry.WithMaxRetries(embedMaxRetries-1, b), func(ctx context.Context) error {
+		req, err := http.NewRequestWithContext(ctx, http.MethodPost, o.baseURL+"/v1/embeddings", bytes.NewReader(bodyBytes))
+		if err != nil {
+			return fmt.Errorf("creating request: %w", err)
+		}
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("Authorization", "Bearer "+o.apiKey)
+
+		resp, err := o.client.Do(req)
+		if err != nil {
+			return retry.RetryableError(fmt.Errorf("request failed: %w", err))
+		}
+
+		body, readErr := io.ReadAll(resp.Body)
+		_ = resp.Body.Close()
+
+		if resp.StatusCode == http.StatusTooManyRequests {
+			return retry.RetryableError(fmt.Errorf("rate limited: status %d", resp.StatusCode))
+		}
+		if resp.StatusCode >= 500 {
+			return retry.RetryableError(fmt.Errorf("server error: status %d", resp.StatusCode))
+		}
+		if resp.StatusCode != http.StatusOK {
+			return fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
+		}
+		if readErr != nil {
+			return fmt.Errorf("reading response body: %w", readErr)
+		}
+
+		return json.Unmarshal(body, &embedResp)
+	})
+	if err != nil {
+		return nil, fmt.Errorf("openai embed: %w", err)
+	}
+
+	// Sort by index — OpenAI spec allows out-of-order responses.
+	slices.SortFunc(embedResp.Data, func(a, b openaiEmbedItem) int {
+		return a.Index - b.Index
+	})
+
+	vecs := make([][]float32, len(embedResp.Data))
+	for i, item := range embedResp.Data {
+		vecs[i] = item.Embedding
+	}
+	return vecs, nil
+}