-
Notifications
You must be signed in to change notification settings - Fork 117
site: v0.4 release notes #1456
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
site: v0.4 release notes #1456
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,301 @@ | ||||||||||||||||||
| { | ||||||||||||||||||
| "series": { | ||||||||||||||||||
| "version": "v0.4", | ||||||||||||||||||
| "title": "Envoy AI Gateway v0.4.x", | ||||||||||||||||||
| "subtitle": "Release introducing Model Context Protocol (MCP) Gateway, OpenAI Image Generation, first-party Anthropic support, guided output decoding for GCP Vertex AI/Gemini, cross-namespace references, enhanced authentication, and comprehensive observability improvements.", | ||||||||||||||||||
| "badge": "Latest", | ||||||||||||||||||
| "badgeType": "milestone" | ||||||||||||||||||
| }, | ||||||||||||||||||
| "releases": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "version": "v0.4.0", | ||||||||||||||||||
| "date": "October 30, 2025", | ||||||||||||||||||
| "type": "minor", | ||||||||||||||||||
| "tags": [ | ||||||||||||||||||
| { "text": "MCP Gateway", "type": "feature" }, | ||||||||||||||||||
| { "text": "Image Generation", "type": "feature" }, | ||||||||||||||||||
| { "text": "First-Party Anthropic Support", "type": "feature" }, | ||||||||||||||||||
| { "text": "Guided Output Decoding", "type": "feature" }, | ||||||||||||||||||
| { "text": "Cross-Namespace Support", "type": "feature" }, | ||||||||||||||||||
| { "text": "Enhanced Auth", "type": "feature" }, | ||||||||||||||||||
| { "text": "Client SDK", "type": "feature" }, | ||||||||||||||||||
| { "text": "InferencePool v1", "type": "feature" } | ||||||||||||||||||
| ], | ||||||||||||||||||
| "overview": "Envoy AI Gateway v0.4.0 is a major release introducing Model Context Protocol (MCP) Gateway support with OAuth and API key authentication, OpenAI Image Generation API support, first-party Anthropic provider integration, guided output decoding for GCP Vertex AI/Gemini models, cross-namespace resource references with ReferenceGrant, enhanced AWS and Azure authentication, type-safe Kubernetes client SDK, InferencePool v1 support, and comprehensive observability improvements. This release also includes route-level header mutations, cached token tracking, and standalone CLI auto-configuration for multiple providers.", | ||||||||||||||||||
| "features": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Model Context Protocol (MCP) Gateway", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "New <code>MCPRoute</code> CRD", | ||||||||||||||||||
| "description": "Introduces <code>MCPRoute</code> custom resource for routing MCP requests to backend MCP servers, enabling unified AI API for multiple MCP backends." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Complete MCP spec implementation", | ||||||||||||||||||
| "description": "Includes streamable HTTP transport, JSON-RPC 2.0 support, and MCP spec-compliant OAuth 2.0 authorization with JWKS validation and Protected Resource Metadata." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Server multiplexing and tool routing", | ||||||||||||||||||
| "description": "Aggregates multiple MCP servers behind a single endpoint with intelligent tool routing, tool filtering (exact match and regex patterns), and collision detection." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Upstream authentication", | ||||||||||||||||||
| "description": "Supports both OAuth-based authentication and API key authentication for secure backend MCP server communication with configurable headers." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Session management", | ||||||||||||||||||
| "description": "Implements MCP session handling with encryption, rotatable seeds, and graceful session lifecycle management." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Full observability integration", | ||||||||||||||||||
| "description": "Provides comprehensive monitoring, logging, and tracing for MCP operations with configurable access logs and metrics enrichment." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "OpenAI Image Generation Support", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Complete <code>/v1/images/generations</code> endpoint", | ||||||||||||||||||
| "description": "End-to-end support for OpenAI's image generation API including request/response translation, Brotli encoding/decoding, and full protocol compatibility." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Image generation tracing", | ||||||||||||||||||
| "description": "OpenInference-compliant distributed tracing for image generation requests with detailed request parameters and timing information." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Image generation metrics", | ||||||||||||||||||
| "description": "Comprehensive metrics instrumentation following OpenTelemetry Gen AI semantic conventions for image generation operations." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "First-Party Anthropic Provider", | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not only the first party but also Anthropic on AWS Bedrock made it to this release #1418 |
||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Direct api.anthropic.com support", | ||||||||||||||||||
| "description": "Native integration with Anthropic's API at <code>api.anthropic.com</code>, complementing existing GCP Vertex AI Anthropic support." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Anthropic API key authentication", | ||||||||||||||||||
| "description": "Native <code>x-api-key</code> header-based authentication matching Anthropic's API conventions and SDK patterns." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Passthrough translator with token usage tracking", | ||||||||||||||||||
| "description": "Efficient passthrough translation layer that captures token usage and maintains API compatibility while minimizing overhead." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Standalone CLI auto-configuration", | ||||||||||||||||||
| "description": "Auto-configuration from ANTHROPIC_API_KEY environment variable in standalone mode, enabling zero-config deployments." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Guided Output Support for GCP Vertex AI/Gemini", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Guided regex support", | ||||||||||||||||||
| "description": "Constrains model outputs to match specific regular expressions for GCP Vertex AI/Gemini models, enabling structured text generation." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Guided choice support", | ||||||||||||||||||
| "description": "Restricts model outputs to predefined choices for GCP Vertex AI/Gemini models, ensuring responses conform to expected values." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Guided JSON support", | ||||||||||||||||||
| "description": "Ensures model outputs are valid JSON conforming to specified schemas for GCP Vertex AI/Gemini models, with OpenAI-compatible API translation." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Cross-Namespace Resource References", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Cross-namespace <code>AIServiceBackend</code> references", | ||||||||||||||||||
| "description": "<code>AIGatewayRoute</code> can now reference <code>AIServiceBackend</code> resources in different namespaces, enabling multi-tenant and organizational separation patterns." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "ReferenceGrant validation", | ||||||||||||||||||
| "description": "Comprehensive ReferenceGrant integration following Gateway API patterns, with automatic validation and clear error messages when grants are missing." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Event-driven reconciliation", | ||||||||||||||||||
| "description": "ReferenceGrant changes automatically trigger reconciliation of affected routes, ensuring security policies are always up-to-date." | ||||||||||||||||||
| } | ||||||||||||||||||
|
Comment on lines
+121
to
+124
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is redundant and not for release note |
||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Enhanced Upstream Authentication", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "AWS SDK default credential chain", | ||||||||||||||||||
| "description": "Support for AWS SDK's default credential chain including IRSA (IAM Roles for Service Accounts), EKS Pod Identity, EC2 Instance Profiles, and environment variables, eliminating need for static credentials." | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Azure API key authentication", | ||||||||||||||||||
| "description": "Native Azure OpenAI API key authentication using the <code>api-key</code> header, matching Azure SDK conventions and console practices." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Azure CLI auto-configuration", | ||||||||||||||||||
| "description": "Standalone CLI auto-configuration from <code>AZURE_OPENAI_API_KEY</code> and <code>AZURE_OPENAI_ENDPOINT</code> environment variables, compatible with Elastic and other Azure-based deployments." | ||||||||||||||||||
| } | ||||||||||||||||||
|
Comment on lines
+138
to
+141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would like to clarify what CLI here refers to
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well i think we can delete this entry as it overlaps with the entry "Multi-provider auto-configuration" in "Standalone Mode and CLI" section below. Autoconfig hasn't existed in v0.3 in the first place so yeh |
||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Traffic Management and Configuration", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Route-level header mutations", | ||||||||||||||||||
| "description": "New <code>headerMutation</code> field in <code>AIGatewayRouteRuleBackendRef</code> enables per-route header manipulation with smart merge logic when combined with backend-level mutations. Essential for GCP provisioned throughput and other advanced routing scenarios." | ||||||||||||||||||
| }, | ||||||||||||||||||
|
Comment on lines
+148
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not only the route level but also wehave AIServiceBackend level (4bfaf91) one, and they are both new in v0.4. |
||||||||||||||||||
| { | ||||||||||||||||||
| "title": "<code>InferencePool</code> v1 support", | ||||||||||||||||||
| "description": "Updated to Gateway API Inference Extension v1.0, providing stable intelligent endpoint selection with enhanced performance and reliability." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Cached token usage tracking for actual token usage reporting", | ||||||||||||||||||
| "description": "Captures and reports cached token statistics from cloud providers (Anthropic, Bedrock, etc.), providing accurate cost attribution for prompt caching features." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Standalone Mode and CLI", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Docker image support", | ||||||||||||||||||
| "description": "Official Docker images for the aigw CLI published to GitHub Container Registry, enabling containerized standalone deployments with proper health checks and lifecycle management." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Multi-provider auto-configuration", | ||||||||||||||||||
| "description": "Zero-config standalone mode supporting automatic configuration from <code>OPENAI_API_KEY</code>, <code>AZURE_OPENAI_API_KEY</code>, or <code>ANTHROPIC_API_KEY</code> environment variables with intelligent dialect detection and OpenAI SDK compatibility. Generates complete Envoy configuration automatically." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "MCP server configuration", | ||||||||||||||||||
| "description": "Native MCP support in standalone mode via <code>--mcp-config</code> and <code>--mcp-json</code> flags, enabling unified LLM and MCP server configuration in a single aigw run invocation without Kubernetes." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "XDG Base Directory standards", | ||||||||||||||||||
| "description": "Proper separation of configuration, data, state, and runtime files following XDG Base Directory specification, improving organization and enabling better cleanup and management of aigw state." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Environment variable substitution", | ||||||||||||||||||
| "description": "Support for environment variable substitution in configuration files, enabling dynamic configuration and secrets management in standalone deployments." | ||||||||||||||||||
| }, | ||||||||||||||||||
|
Comment on lines
+180
to
+183
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not a new so let's delete
Suggested change
|
||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Enhanced readiness monitoring", | ||||||||||||||||||
| "description": "Improved Envoy readiness detection and status reporting in standalone mode, providing clear insights into when the gateway is ready to accept traffic with better error messages." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Consolidated admin server", | ||||||||||||||||||
| "description": "Unified admin server on a single port serving both <code>/metrics</code> and <code>/health</code> endpoints, simplifying monitoring and health check configuration." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Improved error handling", | ||||||||||||||||||
| "description": "<code>aigw</code> CLI now fails fast and exits cleanly if external processor fails to start, preventing silent failures and improving debugging experience." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Developer Experience", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Type-safe Kubernetes client SDK", | ||||||||||||||||||
| "description": "Generated client libraries for all AI Gateway CRDs following standard Kubernetes client-go patterns, enabling developers to build controllers, operators, CLI tools, and custom integrations with type safety." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Observability Enhancements", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "OpenTelemetry native metrics export", | ||||||||||||||||||
| "description": "Support for OTEL-native metrics export (in addition to Prometheus), enabling integration with Elastic Stack, OTEL-TUI, and other OTEL-native observability systems. Includes console exporter for ad-hoc debugging without external dependencies." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Embeddings tracing implementation", | ||||||||||||||||||
| "description": "Complete OpenInference-compliant tracing for embeddings operations, complementing existing chat completion tracing." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Azure OpenAI embeddings translator", | ||||||||||||||||||
| "description": "Native support for Azure OpenAI embeddings API with proper protocol translation and token usage tracking." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Enhanced <code>/messages</code> endpoint metrics", | ||||||||||||||||||
| "description": "Distinct metrics for Anthropic's <code>/messages</code> endpoint, providing accurate attribution separate from <code>/chat/completions</code> endpoints." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Legacy <code>/v1/completions</code> endpoint", | ||||||||||||||||||
| "description": "Full pass-through support for OpenAI's legacy <code>/v1/completions</code> endpoint with complete tracing and metrics implementation, ensuring backward compatibility and comprehensive observability." | ||||||||||||||||||
| }, | ||||||||||||||||||
|
Comment on lines
+227
to
+229
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i am not sure if this falls into this observability category. it is the same layer as /image/generations above |
||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Original model tracking", | ||||||||||||||||||
| "description": "Metrics now track both the original requested model and any overridden model names, providing accurate attribution in multi-provider and model virtualization scenarios." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Provider-Specific Enhancements", | ||||||||||||||||||
| "items": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "AWS Bedrock reasoning support", | ||||||||||||||||||
| "description": "Full support for reasoning/thinking tokens in AWS Bedrock responses for both streaming and non-streaming modes, properly exposing extended thinking processes in Claude and other reasoning-capable models." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "GCP Vertex AI safety settings", | ||||||||||||||||||
| "description": "Support for GCP-specific safety settings configuration, allowing fine-grained control over content filtering and safety thresholds for Gemini models." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Gemini usage chunk improvements", | ||||||||||||||||||
| "description": "Accurate completion_tokens reporting in streaming usage chunks for Gemini models, ensuring proper token accounting during streaming responses." | ||||||||||||||||||
| } | ||||||||||||||||||
| ] | ||||||||||||||||||
| } | ||||||||||||||||||
| ], | ||||||||||||||||||
| "apiChanges": [ | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "New <code>MCPRoute</code> CRD", | ||||||||||||||||||
| "description": "Introduces <code>MCPRoute</code> custom resource with comprehensive fields for MCP server configuration, tool filtering, authentication policies (OAuth and API key), and Protected Resource Metadata." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Cross-namespace references in <code>AIGatewayRoute</code>", | ||||||||||||||||||
| "description": "Added namespace field to <code>AIGatewayRouteRuleBackendRef</code>, enabling cross-namespace backend references with ReferenceGrant validation." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Route-level header mutations", | ||||||||||||||||||
| "description": "Added headerMutation field to <code>AIGatewayRouteRuleBackendRef</code> for per-route header manipulation with smart merge logic." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Anthropic API key authentication", | ||||||||||||||||||
| "description": "Added <code>AnthropicAPIKey</code> to <code>BackendSecurityPolicy</code> for <code>x-api-key</code> header authentication." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Azure API key authentication", | ||||||||||||||||||
| "description": "Added AzureAPIKey to <code>BackendSecurityPolicy</code> for api-key header authentication." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "AWS credential chain support", | ||||||||||||||||||
| "description": "<code>BackendSecurityPolicy</code> AWS auth now supports SDK default credential chain when credentials are not explicitly provided." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "<code>InferencePool</code> v1", | ||||||||||||||||||
| "description": "Updated to support Gateway API Inference Extension v1.0 (inference.networking.k8s.io/v1) instead of v1alpha1." | ||||||||||||||||||
| }, | ||||||||||||||||||
| { | ||||||||||||||||||
| "title": "Enforced <code>Backend</code> resource requirement", | ||||||||||||||||||
| "description": "Added CRD validation to <code>AIServiceBackend</code> explicitly requiring Envoy Gateway <code>Backend</code> resources (Kubernetes Service is not supported)." | ||||||||||||||||||
| } | ||||||||||||||||||
| ], | ||||||||||||||||||
| "dependencies": [ | ||||||||||||||||||
| { "title": "Go 1.25.3", "description": "Updated to Go 1.25.3 for improved performance and security." }, | ||||||||||||||||||
| { "title": "Envoy Gateway v1.6", "description": "Built on Envoy Gateway v1.6 for proven data plane capabilities and enhanced features." }, | ||||||||||||||||||
| { "title": "Envoy v1.36", "description": "Leveraging Envoy Proxy v1.36's battle-tested networking capabilities." }, | ||||||||||||||||||
| { "title": "Gateway API v1.4.0", "description": "Support for Gateway API v1.4.0 specifications." }, | ||||||||||||||||||
| { "title": "Gateway API Inference Extension v1.0.2", "description": "Integration with Gateway API Inference Extension v1.0.2 for stable intelligent endpoint selection." } | ||||||||||||||||||
| ] | ||||||||||||||||||
| } | ||||||||||||||||||
| ], | ||||||||||||||||||
| "navigation": { | ||||||||||||||||||
| "previous": { "version": "v0.3.x Series", "path": "/release-notes/v0.3" }, | ||||||||||||||||||
| "next": null | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add to navigation