diff --git a/CLAUDE.md b/CLAUDE.md
index 0a8b4fb..8149550 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -69,7 +69,7 @@ marketplace.
 | ---------------------------- | ----------------------------------- | ------------------------------- |
 | SQLite                       | Vector storage + schema persistence | Uses sqlite-vec for KNN search  |
 | MCP (Model Context Protocol) | Agent integration                   | stdio transport                 |
-| Ollama/LM Studio             | Embeddings generation               | Local models, configurable      |
+| Ollama/LM Studio/OpenAI API  | Embeddings generation               | Local + cloud, configurable     |
 | Go AST                       | Code parsing into semantic chunks   | Functions, types, methods, etc. |
 | Cobra                        | CLI framework                       | Subcommands: index, stdio       |
 
@@ -106,16 +106,19 @@ system handles MCP registration, hooks, and skills declaratively via:
 
 ## Environment Variables
 
-| Variable                 | Default                  | Description                                |
-| ------------------------ | ------------------------ | ------------------------------------------ |
-| `LUMEN_BACKEND`          | `ollama`                 | Embedding backend (`ollama` or `lmstudio`) |
-| `LUMEN_EMBED_MODEL`      | see note ¹               | Embedding model (must be in registry)      |
-| `OLLAMA_HOST`            | `http://localhost:11434` | Ollama server URL                          |
-| `LM_STUDIO_HOST`         | `http://localhost:1234`  | LM Studio server URL                       |
-| `LUMEN_MAX_CHUNK_TOKENS` | `512`                    | Max tokens per chunk before splitting      |
+| Variable                 | Default                    | Description                                             |
+| ------------------------ | -------------------------- | ------------------------------------------------------- |
+| `LUMEN_BACKEND`          | `ollama`                   | Embedding backend (`ollama`, `lmstudio`, or `openai`)   |
+| `LUMEN_EMBED_MODEL`      | see note ¹                 | Embedding model (must be in registry)                   |
+| `OLLAMA_HOST`            | `http://localhost:11434`   | Ollama server URL                                       |
+| `LM_STUDIO_HOST`         | `http://localhost:1234`    | LM Studio server URL                                    |
+| `OPENAI_API_KEY`         | (none)                     | API key for `openai` backend (required)                 |
+| `OPENAI_BASE_URL`        | `https://api.openai.com`   | Base URL for `openai` backend (Voyage AI, Azure, etc.)  |
+| `LUMEN_MAX_CHUNK_TOKENS` | `512`                      | Max tokens per chunk before splitting                   |
 
 ¹ `ordis/jina-embeddings-v2-base-code` (Ollama),
-`nomic-ai/nomic-embed-code-GGUF` (LM Studio)
+`nomic-ai/nomic-embed-code-GGUF` (LM Studio),
+`text-embedding-3-small` (OpenAI)
 
 ## Project Structure
 
diff --git a/cmd/embedder.go b/cmd/embedder.go
index e9aeb48..61570c1 100644
--- a/cmd/embedder.go
+++ b/cmd/embedder.go
@@ -28,6 +28,8 @@ func newEmbedder(cfg config.Config) (embedder.Embedder, error) {
 		return embedder.NewOllama(cfg.Model, cfg.Dims, cfg.CtxLength, cfg.OllamaHost)
 	case config.BackendLMStudio:
 		return embedder.NewLMStudio(cfg.Model, cfg.Dims, cfg.LMStudioHost)
+	case config.BackendOpenAI:
+		return embedder.NewOpenAI(cfg.Model, cfg.Dims, cfg.OpenAIBaseURL, cfg.OpenAIAPIKey)
 	default:
 		return nil, fmt.Errorf("unknown backend %q", cfg.Backend)
 	}
diff --git a/cmd/stdio.go b/cmd/stdio.go
index 249059a..91b35ad 100644
--- a/cmd/stdio.go
+++ b/cmd/stdio.go
@@ -481,11 +481,20 @@ func (ic *indexerCache) handleIndexStatus(_ context.Context, _ *mcp.CallToolRequ
 
 // handleHealthCheck pings the configured embedding service and reports status.
 func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRequest, _ HealthCheckInput) (*mcp.CallToolResult, any, error) {
-	host := ic.cfg.OllamaHost
-	probeURL := host + "/api/tags"
-	if ic.cfg.Backend == config.BackendLMStudio {
+	var host, probeURL string
+	switch ic.cfg.Backend {
+	case config.BackendOllama:
+		host = ic.cfg.OllamaHost
+		probeURL = host + "/api/tags"
+	case config.BackendLMStudio:
 		host = ic.cfg.LMStudioHost
 		probeURL = host + "/v1/models"
+	case config.BackendOpenAI:
+		host = ic.cfg.OpenAIBaseURL
+		probeURL = host + "/v1/models"
+	default:
+		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
+			fmt.Sprintf("unknown backend %q", ic.cfg.Backend)), nil, nil
 	}
 
 	probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
@@ -497,6 +506,10 @@ func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRe
 			fmt.Sprintf("failed to create request: %v", err)), nil, nil
 	}
 
+	if ic.cfg.Backend == config.BackendOpenAI && ic.cfg.OpenAIAPIKey != "" {
+		req.Header.Set("Authorization", "Bearer "+ic.cfg.OpenAIAPIKey)
+	}
+
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
@@ -504,6 +517,10 @@ func (ic *indexerCache) handleHealthCheck(ctx context.Context, _ *mcp.CallToolRe
 	}
 	_ = resp.Body.Close()
 
+	if resp.StatusCode == http.StatusUnauthorized {
+		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
+			"service reachable but API key is invalid"), nil, nil
+	}
 	if resp.StatusCode >= 500 {
 		return healthResult(ic.cfg.Backend, host, ic.cfg.Model, false,
 			fmt.Sprintf("service returned HTTP %d", resp.StatusCode)), nil, nil
diff --git a/internal/config/config.go b/internal/config/config.go
index bb06b22..e0d8a95 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -30,6 +30,8 @@ const (
 	BackendOllama = "ollama"
 	// BackendLMStudio is the backend identifier for LM Studio.
 	BackendLMStudio = "lmstudio"
+	// BackendOpenAI is the backend identifier for OpenAI-compatible APIs.
+	BackendOpenAI = "openai"
 )
 
 // Config holds the resolved configuration for the lumen process.
@@ -41,18 +43,23 @@ type Config struct {
 	OllamaHost     string
 	Backend        string
 	LMStudioHost   string
+	OpenAIBaseURL  string
+	OpenAIAPIKey   string
 }
 
 // Load reads configuration from environment variables and the model registry.
 func Load() (Config, error) {
 	backend := EnvOrDefault("LUMEN_BACKEND", BackendOllama)
-	if backend != BackendOllama && backend != BackendLMStudio {
-		return Config{}, fmt.Errorf("unknown backend %q: must be %q or %q", backend, BackendOllama, BackendLMStudio)
+	if backend != BackendOllama && backend != BackendLMStudio && backend != BackendOpenAI {
+		return Config{}, fmt.Errorf("unknown backend %q: must be %q, %q, or %q", backend, BackendOllama, BackendLMStudio, BackendOpenAI)
 	}
 
 	defaultModel := embedder.DefaultOllamaModel
-	if backend == BackendLMStudio {
+	switch backend {
+	case BackendLMStudio:
 		defaultModel = embedder.DefaultLMStudioModel
+	case BackendOpenAI:
+		defaultModel = embedder.DefaultOpenAIModel
 	}
 
 	model := EnvOrDefault("LUMEN_EMBED_MODEL", defaultModel)
@@ -60,6 +67,11 @@ func Load() (Config, error) {
 	if !ok {
 		return Config{}, fmt.Errorf("unknown embedding model %q", model)
 	}
+	openAIAPIKey := os.Getenv("OPENAI_API_KEY")
+	if backend == BackendOpenAI && openAIAPIKey == "" {
+		return Config{}, fmt.Errorf("OPENAI_API_KEY environment variable is required for the %q backend", BackendOpenAI)
+	}
+
 	return Config{
 		Model:          model,
 		Dims:           spec.Dims,
@@ -68,6 +80,8 @@ func Load() (Config, error) {
 		OllamaHost:     EnvOrDefault("OLLAMA_HOST", "http://localhost:11434"),
 		Backend:        backend,
 		LMStudioHost:   EnvOrDefault("LM_STUDIO_HOST", "http://localhost:1234"),
+		OpenAIBaseURL:  EnvOrDefault("OPENAI_BASE_URL", "https://api.openai.com"),
+		OpenAIAPIKey:   openAIAPIKey,
 	}, nil
 }
 
diff --git a/internal/embedder/models.go b/internal/embedder/models.go
index df5c4ae..28fbc80 100644
--- a/internal/embedder/models.go
+++ b/internal/embedder/models.go
@@ -28,6 +28,9 @@ const DefaultOllamaModel = "ordis/jina-embeddings-v2-base-code"
 // DefaultLMStudioModel is the default model when using the LM Studio backend.
 const DefaultLMStudioModel = "nomic-ai/nomic-embed-code-GGUF"
 
+// DefaultOpenAIModel is the default model when using the OpenAI-compatible backend.
+const DefaultOpenAIModel = "text-embedding-3-small"
+
 // DefaultModel is an alias for DefaultOllamaModel for backward compatibility.
 const DefaultModel = DefaultOllamaModel
 
@@ -66,4 +69,9 @@ var KnownModels = map[string]ModelSpec{
 	"qwen3-embedding:4b":                 {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
 	"qwen3-embedding:0.6b":               {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
 	"all-minilm":                         {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
+	"text-embedding-3-small":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+	"text-embedding-3-large":             {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
+	"text-embedding-ada-002":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+	"voyage-code-3":                      {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
+	"voyage-3-large":                     {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
 }
diff --git a/internal/embedder/models_test.go b/internal/embedder/models_test.go
index 41bd1ba..7f8de09 100644
--- a/internal/embedder/models_test.go
+++ b/internal/embedder/models_test.go
@@ -25,6 +25,11 @@ func TestKnownModels(t *testing.T) {
 		"qwen3-embedding:4b":                 {Dims: 2560, CtxLength: 40960, Backend: "ollama", MinScore: 0.30},
 		"qwen3-embedding:0.6b":               {Dims: 1024, CtxLength: 32768, Backend: "ollama", MinScore: 0.30},
 		"all-minilm":                         {Dims: 384, CtxLength: 512, Backend: "ollama", MinScore: 0.20},
+		"text-embedding-3-small":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+		"text-embedding-3-large":             {Dims: 3072, CtxLength: 8191, Backend: "openai", MinScore: 0.15},
+		"text-embedding-ada-002":             {Dims: 1536, CtxLength: 8191, Backend: "openai", MinScore: 0.20},
+		"voyage-code-3":                      {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
+		"voyage-3-large":                     {Dims: 1024, CtxLength: 32000, Backend: "openai", MinScore: 0.25},
 	}
 
 	for name, want := range expected {
@@ -61,6 +66,12 @@ func TestDefaultLMStudioModelInRegistry(t *testing.T) {
 	}
 }
 
+func TestDefaultOpenAIModelInRegistry(t *testing.T) {
+	if _, ok := KnownModels[DefaultOpenAIModel]; !ok {
+		t.Errorf("DefaultOpenAIModel %q is not in KnownModels", DefaultOpenAIModel)
+	}
+}
+
 func TestDimensionAwareMinScore(t *testing.T) {
 	tests := []struct {
 		dims int
diff --git a/internal/embedder/openai.go b/internal/embedder/openai.go
new file mode 100644
index 0000000..eb5cebf
--- /dev/null
+++ b/internal/embedder/openai.go
@@ -0,0 +1,166 @@
+// Copyright 2026 Aeneas Rekkas
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package embedder
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"slices"
+	"time"
+
+	"github.com/sethvargo/go-retry"
+)
+
+// OpenAI implements the Embedder interface using an OpenAI-compatible
+// /v1/embeddings endpoint. It works with OpenAI, Voyage AI, Azure OpenAI,
+// Together AI, and any other service exposing the same wire format.
+type OpenAI struct {
+	model      string
+	dimensions int
+	baseURL    string
+	apiKey     string
+	client     *http.Client
+}
+
+// NewOpenAI creates a new OpenAI-compatible embedder.
+// baseURL is the API base URL (e.g. "https://api.openai.com").
+// apiKey is the Bearer token for authentication.
+func NewOpenAI(model string, dimensions int, baseURL string, apiKey string) (*OpenAI, error) {
+	if apiKey == "" {
+		return nil, fmt.Errorf("API key is required for OpenAI-compatible backend")
+	}
+	return &OpenAI{
+		model:      model,
+		dimensions: dimensions,
+		baseURL:    baseURL,
+		apiKey:     apiKey,
+		client: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}, nil
+}
+
+// Dimensions returns the embedding vector dimensionality.
+func (o *OpenAI) Dimensions() int {
+	return o.dimensions
+}
+
+// ModelName returns the model name used for embeddings.
+func (o *OpenAI) ModelName() string {
+	return o.model
+}
+
+// openaiEmbedRequest is the JSON body sent to /v1/embeddings.
+type openaiEmbedRequest struct {
+	Model string   `json:"model"`
+	Input []string `json:"input"`
+}
+
+// openaiEmbedItem is a single embedding item in the response.
+type openaiEmbedItem struct {
+	Embedding []float32 `json:"embedding"`
+	Index     int       `json:"index"`
+}
+
+// openaiEmbedResponse is the JSON body returned from /v1/embeddings.
+type openaiEmbedResponse struct {
+	Data []openaiEmbedItem `json:"data"`
+}
+
+// Embed converts texts into embedding vectors, splitting into batches of 32.
+func (o *OpenAI) Embed(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+
+	var allVecs [][]float32
+	for i := 0; i < len(texts); i += embedBatchSize {
+		batch := texts[i:min(i+embedBatchSize, len(texts))]
+
+		vecs, err := o.embedBatch(ctx, batch)
+		if err != nil {
+			return nil, fmt.Errorf("embedding batch starting at %d: %w", i, err)
+		}
+		allVecs = append(allVecs, vecs...)
+	}
+
+	return allVecs, nil
+}
+
+// embedBatch sends a single batch of texts to the /v1/embeddings endpoint.
+// Retries up to embedMaxRetries times on transient errors (5xx, 429 rate limits,
+// network failures), respecting context cancellation between attempts.
+func (o *OpenAI) embedBatch(ctx context.Context, texts []string) ([][]float32, error) {
+	bodyBytes, err := json.Marshal(openaiEmbedRequest{
+		Model: o.model,
+		Input: texts,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("marshalling request: %w", err)
+	}
+
+	b := retry.NewExponential(100 * time.Millisecond)
+
+	var embedResp openaiEmbedResponse
+	err = retry.Do(ctx, retry.WithMaxRetries(embedMaxRetries-1, b), func(ctx context.Context) error {
+		req, err := http.NewRequestWithContext(ctx, http.MethodPost, o.baseURL+"/v1/embeddings", bytes.NewReader(bodyBytes))
+		if err != nil {
+			return fmt.Errorf("creating request: %w", err)
+		}
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("Authorization", "Bearer "+o.apiKey)
+
+		resp, err := o.client.Do(req)
+		if err != nil {
+			return retry.RetryableError(fmt.Errorf("request failed: %w", err))
+		}
+
+		body, readErr := io.ReadAll(resp.Body)
+		_ = resp.Body.Close()
+
+		if resp.StatusCode == http.StatusTooManyRequests {
+			return retry.RetryableError(fmt.Errorf("rate limited: status %d", resp.StatusCode))
+		}
+		if resp.StatusCode >= 500 {
+			return retry.RetryableError(fmt.Errorf("server error: status %d", resp.StatusCode))
+		}
+		if resp.StatusCode != http.StatusOK {
+			return fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
+		}
+		if readErr != nil {
+			return fmt.Errorf("reading response body: %w", readErr)
+		}
+
+		return json.Unmarshal(body, &embedResp)
+	})
+	if err != nil {
+		return nil, fmt.Errorf("openai embed: %w", err)
+	}
+
+	// Sort by index — OpenAI spec allows out-of-order responses.
+	slices.SortFunc(embedResp.Data, func(a, b openaiEmbedItem) int {
+		return a.Index - b.Index
+	})
+
+	vecs := make([][]float32, len(embedResp.Data))
+	for i, item := range embedResp.Data {
+		vecs[i] = item.Embedding
+	}
+	return vecs, nil
+}
diff --git a/internal/embedder/openai_test.go b/internal/embedder/openai_test.go
new file mode 100644
index 0000000..0b34e7a
--- /dev/null
+++ b/internal/embedder/openai_test.go
@@ -0,0 +1,231 @@
+// Copyright 2026 Aeneas Rekkas
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package embedder
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// makeOpenAIResponse builds a realistic OpenAI /v1/embeddings response.
+func makeOpenAIResponse(model string, embeddings [][]float32) map[string]any {
+	data := make([]map[string]any, len(embeddings))
+	for i, e := range embeddings {
+		data[i] = map[string]any{
+			"object":    "embedding",
+			"embedding": e,
+			"index":     i,
+		}
+	}
+	return map[string]any{
+		"object": "list",
+		"data":   data,
+		"model":  model,
+		"usage": map[string]any{
+			"prompt_tokens": 8,
+			"total_tokens":  8,
+		},
+	}
+}
+
+func TestOpenAIEmbedder_Embed(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/v1/embeddings" {
+			t.Fatalf("unexpected path: %s", r.URL.Path)
+		}
+		if auth := r.Header.Get("Authorization"); auth != "Bearer test-key" {
+			t.Fatalf("unexpected auth header: %s", auth)
+		}
+		resp := makeOpenAIResponse("text-embedding-3-small", [][]float32{
+			{0.0023064255, -0.009327292, 0.015834473, 0.0069007568},
+			{-0.0069352793, 0.020878976, 0.008590698, -0.012878418},
+		})
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer server.Close()
+
+	e, err := NewOpenAI("text-embedding-3-small", 4, server.URL, "test-key")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	vecs, err := e.Embed(context.Background(), []string{"hello", "world"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(vecs) != 2 {
+		t.Fatalf("expected 2 vectors, got %d", len(vecs))
+	}
+	if len(vecs[0]) != 4 {
+		t.Fatalf("expected 4 dimensions, got %d", len(vecs[0]))
+	}
+}
+
+func TestOpenAIEmbedder_Dimensions(t *testing.T) {
+	e, _ := NewOpenAI("text-embedding-3-small", 1536, "https://api.openai.com", "test-key")
+	if e.Dimensions() != 1536 {
+		t.Fatalf("expected 1536, got %d", e.Dimensions())
+	}
+}
+
+func TestOpenAIEmbedder_ModelName(t *testing.T) {
+	e, _ := NewOpenAI("text-embedding-3-small", 1536, "https://api.openai.com", "test-key")
+	if e.ModelName() != "text-embedding-3-small" {
+		t.Fatalf("expected text-embedding-3-small, got %s", e.ModelName())
+	}
+}
+
+func TestOpenAIEmbedder_Batching(t *testing.T) {
+	callCount := 0
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		callCount++
+		var req map[string]any
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		input := req["input"].([]any)
+
+		embeddings := make([][]float32, len(input))
+		for i := range input {
+			embeddings[i] = []float32{0.1, 0.2, 0.3, 0.4}
+		}
+		_ = json.NewEncoder(w).Encode(makeOpenAIResponse("text-embedding-3-small", embeddings))
+	}))
+	defer server.Close()
+
+	e, _ := NewOpenAI("text-embedding-3-small", 4, server.URL, "test-key")
+	texts := make([]string, 50)
+	for i := range texts {
+		texts[i] = "text"
+	}
+
+	vecs, err := e.Embed(context.Background(), texts)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(vecs) != 50 {
+		t.Fatalf("expected 50 vectors, got %d", len(vecs))
+	}
+	if callCount != 2 {
+		t.Fatalf("expected 2 batch calls (32+18), got %d", callCount)
+	}
+}
+
+func TestOpenAIEmbedder_OrderingByIndex(t *testing.T) {
+	// Mock returns items in reversed index order to verify sorting.
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		resp := map[string]any{
+			"object": "list",
+			"data": []map[string]any{
+				{"object": "embedding", "embedding": []float32{0.9, 0.9, 0.9, 0.9}, "index": 1},
+				{"object": "embedding", "embedding": []float32{0.1, 0.2, 0.3, 0.4}, "index": 0},
+			},
+			"model": "text-embedding-3-small",
+			"usage": map[string]any{"prompt_tokens": 4, "total_tokens": 4},
+		}
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer server.Close()
+
+	e, _ := NewOpenAI("text-embedding-3-small", 4, server.URL, "test-key")
+	vecs, err := e.Embed(context.Background(), []string{"first", "second"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(vecs) != 2 {
+		t.Fatalf("expected 2 vectors, got %d", len(vecs))
+	}
+	if vecs[0][0] != 0.1 {
+		t.Fatalf("expected vecs[0][0]=0.1 (index:0 item), got %v", vecs[0][0])
+	}
+	if vecs[1][0] != 0.9 {
+		t.Fatalf("expected vecs[1][0]=0.9 (index:1 item), got %v", vecs[1][0])
+	}
+}
+
+func TestOpenAIEmbedder_ErrorHandling(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer server.Close()
+
+	e, _ := NewOpenAI("text-embedding-3-small", 4, server.URL, "test-key")
+	_, err := e.Embed(context.Background(), []string{"hello"})
+	if err == nil {
+		t.Fatal("expected error for 500 response")
+	}
+}
+
+func TestOpenAIEmbedder_RateLimitRetry(t *testing.T) {
+	var calls atomic.Int32
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		n := calls.Add(1)
+		if n == 1 {
+			w.Header().Set("Retry-After", "0")
+			w.WriteHeader(http.StatusTooManyRequests)
+			return
+		}
+		resp := makeOpenAIResponse("text-embedding-3-small", [][]float32{
+			{0.1, 0.2, 0.3, 0.4},
+		})
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer server.Close()
+
+	e, _ := NewOpenAI("text-embedding-3-small", 4, server.URL, "test-key")
+	vecs, err := e.Embed(context.Background(), []string{"hello"})
+	if err != nil {
+		t.Fatalf("expected success after retry, got: %v", err)
+	}
+	if len(vecs) != 1 {
+		t.Fatalf("expected 1 vector, got %d", len(vecs))
+	}
+	if got := calls.Load(); got != 2 {
+		t.Fatalf("expected 2 calls (1 rate-limited + 1 success), got %d", got)
+	}
+}
+
+func TestOpenAI_Embed_ContextCancelledStopsRetry(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer srv.Close()
+
+	emb, _ := NewOpenAI("text-embedding-3-small", 4, srv.URL, "test-key")
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	start := time.Now()
+	_, err := emb.Embed(ctx, []string{"hello"})
+	elapsed := time.Since(start)
+
+	if err == nil {
+		t.Fatal("expected error from cancelled context")
+	}
+	if elapsed > 500*time.Millisecond {
+		t.Fatalf("expected fast failure on pre-cancelled context, took %v", elapsed)
+	}
+}
+
+func TestOpenAIEmbedder_EmptyAPIKey(t *testing.T) {
+	_, err := NewOpenAI("text-embedding-3-small", 1536, "https://api.openai.com", "")
+	if err == nil {
+		t.Fatal("expected error for empty API key")
+	}
+}