Skip to content

Commit

Permalink
Rename pkg/ext-proc to pkg/epp (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
tchap authored Feb 19, 2025
1 parent 6130ee0 commit 2577f63
Show file tree
Hide file tree
Showing 51 changed files with 112 additions and 110 deletions.
10 changes: 5 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG BUILDER_IMAGE=golang:1.23-alpine
ARG BASE_IMAGE=gcr.io/distroless/base-debian10

## Multistage build
FROM ${BUILDER_IMAGE} as builder
FROM ${BUILDER_IMAGE} AS builder
ENV CGO_ENABLED=0
ENV GOOS=linux
ENV GOARCH=amd64
Expand All @@ -19,13 +19,13 @@ COPY cmd ./cmd
COPY pkg ./pkg
COPY internal ./internal
COPY api ./api
WORKDIR /src/cmd/ext-proc
RUN go build -o /ext-proc
WORKDIR /src/cmd/epp
RUN go build -o /epp

## Multistage deploy
FROM ${BASE_IMAGE}

WORKDIR /
COPY --from=builder /ext-proc /ext-proc
COPY --from=builder /epp /epp

ENTRYPOINT ["/ext-proc"]
ENTRYPOINT ["/epp"]
4 changes: 2 additions & 2 deletions cmd/ext-proc/health.go → cmd/epp/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
"google.golang.org/grpc/codes"
healthPb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type healthServer struct {
Expand Down
12 changes: 6 additions & 6 deletions cmd/ext-proc/main.go → cmd/epp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/vllm"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion docs/dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const(
)
```

The guidelines are written in the context of a k8s controller. Our [ext-proc](../pkg/ext-proc/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows:
The guidelines are written in the context of a k8s controller. Our [epp](../pkg/epp/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows:

1. The server startup process and configuration.

Expand Down
2 changes: 1 addition & 1 deletion docs/proposals/003-endpoint-picker-protocol/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

The Endpoint Picker, or EPP, is a core component of the inference extension. Ultimately it's
responsible for picking an endpoint from the `InferencePool`. A reference implementation can be
found [here](../../../pkg/ext-proc/).
found [here](../../../pkg/epp/).

## Proxy Protocol

Expand Down
4 changes: 2 additions & 2 deletions pkg/ext-proc/backend/fake.go → pkg/epp/backend/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type FakePodMetricsClient struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ import (
"github.com/go-logr/logr"
"go.uber.org/multierr"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
)

var (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ import (
"github.com/prometheus/common/expfmt"
"go.uber.org/multierr"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"google.golang.org/protobuf/proto"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

func TestPromToPodMetrics(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type InferenceModelReconciler struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

var (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

// InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
)

var (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type PodReconciler struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
)

var (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

// The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

func TestHasSynced(t *testing.T) {
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ import (
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"google.golang.org/protobuf/types/known/structpb"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

// HandleRequestBody handles body of the request to the backend server, such as parsing the "model"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"sigs.k8s.io/controller-runtime/pkg/log"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

// HandleResponseHeaders processes response headers from the backend model server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/google/go-cmp/cmp"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down
11 changes: 5 additions & 6 deletions pkg/ext-proc/handlers/server.go → pkg/epp/handlers/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

func NewServer(scheduler Scheduler, targetEndpointKey string, datastore datastore.Datastore) *Server {
Expand Down Expand Up @@ -185,7 +185,6 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err)
}
}

}

// RequestContext stores context information during the life time of an HTTP request.
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sigs.k8s.io/controller-runtime/pkg/log"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (

"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down Expand Up @@ -119,31 +119,32 @@ func TestRecordRequestErrorCounter(t *testing.T) {
name string
reqs []requests
invalid bool
}{{
name: "multiple requests",
reqs: []requests{
{
modelName: "m10",
targetModelName: "t10",
error: errutil.Internal,
},
{
modelName: "m10",
targetModelName: "t10",
error: errutil.Internal,
},
{
modelName: "m10",
targetModelName: "t11",
error: errutil.ModelServerError,
},
{
modelName: "m20",
targetModelName: "t20",
error: errutil.InferencePoolResourceExhausted,
}{
{
name: "multiple requests",
reqs: []requests{
{
modelName: "m10",
targetModelName: "t10",
error: errutil.Internal,
},
{
modelName: "m10",
targetModelName: "t10",
error: errutil.Internal,
},
{
modelName: "m10",
targetModelName: "t11",
error: errutil.ModelServerError,
},
{
modelName: "m20",
targetModelName: "t20",
error: errutil.InferencePoolResourceExhausted,
},
},
},
},
}
Register()
for _, scenario := range scenarios {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import (
"math"

"github.com/go-logr/logr"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type Filter interface {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

func TestFilter(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ import (

"github.com/go-logr/logr"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

const (
Expand Down Expand Up @@ -102,7 +102,8 @@ var (
filter: func(logger logr.Logger, req *LLMRequest, pods []*datastore.PodMetrics) ([]*datastore.PodMetrics, error) {
logger.V(logutil.DEFAULT).Info("Request dropped", "request", req)
return []*datastore.PodMetrics{}, errutil.Error{
Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources"}
Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources",
}
},
},
}
Expand Down
File renamed without changes.
Loading

0 comments on commit 2577f63

Please sign in to comment.