diff --git a/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml b/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml index 0a338f78a1dd..4a412d963ed7 100644 --- a/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml +++ b/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml @@ -6,7 +6,7 @@ periodics: testgrid-dashboards: sig-testing-kind testgrid-tab-name: compatibility-version-test-n-minus-1 description: Uses kind to run e2e tests from the n-1 kubernetes release against a latest kubernetes master components w/ --emulated-version=n-1 set. - # TODO(aaron-prindle) route the alert email to a rotation vs individual email + # TODO(#34269) route the alert email to a rotation vs individual email and update owners in experiment/compatibility-versions testgrid-alert-email: aprindle@google.com testgrid-num-columns-recent: '6' labels: @@ -64,7 +64,7 @@ periodics: testgrid-dashboards: sig-testing-kind testgrid-tab-name: compatibility-version-test-n-minus-2 description: Uses kind to run e2e tests from the n-2 kubernetes release against a latest kubernetes master components w/ --emulated-version=n-2 set. - # TODO(aaron-prindle) route the alert email to a rotation vs individual email + # TODO(#34269) route the alert email to a rotation vs individual email and update owners in experiment/compatibility-versions testgrid-alert-email: aprindle@google.com testgrid-num-columns-recent: '6' labels: @@ -115,3 +115,54 @@ periodics: # this is mostly for building kubernetes memory: 9Gi cpu: 7 +- interval: 6h + cluster: k8s-infra-prow-build + name: ci-kubernetes-e2e-kind-compatibility-versions-feature-gate-test + annotations: + testgrid-dashboards: sig-testing-kind + testgrid-tab-name: compatibility-versions-feature-gate-test + description: Uses kind to run bespoke feature gate tests from the n-1 kubernetes release yaml files against a latest kubernetes master components w/ --emulated-version=n-1 set. + # TODO(#34269) route the alert email to a rotation vs individual email and update owners in experiment/compatibility-versions + testgrid-alert-email: aprindle@google.com + testgrid-num-failures-to-alert: "2" + testgrid-num-columns-recent: '6' + labels: + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" + decorate: true + decoration_config: + timeout: 60m + extra_refs: + - org: kubernetes + repo: kubernetes + base_ref: master + path_alias: k8s.io/kubernetes + workdir: true + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/krte:v20241230-3006692a6f-master + imagePullPolicy: Always # pull latest image for canary testing + command: + - wrapper.sh + - bash + - -c + - curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" && ./../test-infra/experiment/compatibility-versions/compatibility-versions-feature-gate-test.sh + env: + - name: RUNTIME_CONFIG + value: '{"api/beta":"true", "api/ga":"true"}' + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + resources: + limits: + memory: 14Gi + cpu: 7 + requests: + # these are both a bit below peak usage during build + # this is mostly for building kubernetes + memory: 14Gi + cpu: 7 diff --git a/experiment/compatibility-versions/OWNERS b/experiment/compatibility-versions/OWNERS index 7d8e3b5ab5dd..79dc1649bb0b 100644 --- a/experiment/compatibility-versions/OWNERS +++ b/experiment/compatibility-versions/OWNERS @@ -1,5 +1,6 @@ # See the OWNERS docs at https://go.k8s.io/owners +# TODO(#34269) update owners in experiment/compatibility-versions to a group/rotation and route the alert email to a rotation vs individual email reviewers: - aaron-prindle approvers: diff --git a/experiment/compatibility-versions/compatibility-versions-feature-gate-test.sh b/experiment/compatibility-versions/compatibility-versions-feature-gate-test.sh new file mode 100755 index 000000000000..6e2a5550fedd --- /dev/null +++ b/experiment/compatibility-versions/compatibility-versions-feature-gate-test.sh @@ -0,0 +1,259 @@ +#!/usr/bin/env bash +# Copyright 2025 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# hack script for running kind clusters, fetching kube-apiserver metrics, and validating feature gates +# must be run with a kubernetes checkout in $PWD (IE from the checkout) +# Usage: compatibility-versions-feature-gate-test.sh + +set -o errexit -o nounset -o pipefail +set -o xtrace + +# Settings: +# GA_ONLY: true - limit to GA APIs/features as much as possible +# false - (default) APIs and features left at defaults + +# FEATURE_GATES: +# JSON or YAML encoding of a string/bool map: {"FeatureGateA": true, "FeatureGateB": false} +# Enables or disables feature gates in the entire cluster. +# Cannot be used when GA_ONLY=true. + +# RUNTIME_CONFIG: +# JSON or YAML encoding of a string/string (!) map: {"apia.example.com/v1alpha1": "true", "apib.example.com/v1beta1": "false"} +# Enables API groups in the apiserver via --runtime-config. +# Cannot be used when GA_ONLY=true. + +# cleanup logic for cleanup on exit +CLEANED_UP=false +cleanup() { + if [ "$CLEANED_UP" = "true" ]; then + return + fi + # KIND_CREATE_ATTEMPTED is true once we: kind create + if [ "${KIND_CREATE_ATTEMPTED:-}" = true ]; then + kind "export" logs "${ARTIFACTS}" || true + kind delete cluster || true + fi + rm -f _output/bin/kubectl || true + # remove our tempdir, this needs to be last, or it will prevent kind delete + if [ -n "${TMP_DIR:-}" ]; then + rm -rf "${TMP_DIR:?}" + fi + CLEANED_UP=true +} + +# setup signal handlers +# shellcheck disable=SC2317 # this is not unreachable code +signal_handler() { + cleanup +} +trap signal_handler INT TERM + +# build kubernetes / node image, kubectl binary +build() { + # build the node image w/ kubernetes + kind build node-image -v 1 + # make sure we have kubectl + make all WHAT="cmd/kubectl" + + # Ensure the built kubectl is used instead of system + export PATH="${PWD}/_output/bin:$PATH" +} + +check_structured_log_support() { + case "${KUBE_VERSION}" in + v1.1[0-8].*) + echo "$1 is only supported on versions >= v1.19, got ${KUBE_VERSION}" + exit 1 + ;; + esac +} + +# up a cluster with kind +create_cluster() { + # Grab the version of the cluster we're about to start + KUBE_VERSION="$(docker run --rm --entrypoint=cat "kindest/node:latest" /kind/version)" + + # Default Log level for all components in test clusters + KIND_CLUSTER_LOG_LEVEL=${KIND_CLUSTER_LOG_LEVEL:-4} + + EMULATED_VERSION=${EMULATED_VERSION:-} + + # potentially enable --logging-format + CLUSTER_LOG_FORMAT=${CLUSTER_LOG_FORMAT:-} + scheduler_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + controllerManager_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + apiServer_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + kubelet_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + + if [ -n "$CLUSTER_LOG_FORMAT" ]; then + check_structured_log_support "CLUSTER_LOG_FORMAT" + scheduler_extra_args="${scheduler_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + controllerManager_extra_args="${controllerManager_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + apiServer_extra_args="${apiServer_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + fi + + KUBELET_LOG_FORMAT=${KUBELET_LOG_FORMAT:-$CLUSTER_LOG_FORMAT} + if [ -n "$KUBELET_LOG_FORMAT" ]; then + check_structured_log_support "KUBECTL_LOG_FORMAT" + kubelet_extra_args="${kubelet_extra_args} + \"logging-format\": \"${KUBELET_LOG_FORMAT}\"" + fi + + # JSON or YAML map injected into featureGates config + feature_gates="${FEATURE_GATES:-{\}}" + # --runtime-config argument value passed to the API server, again as a map + runtime_config="${RUNTIME_CONFIG:-{\}}" + + case "${GA_ONLY:-false}" in + false) + : + ;; + true) + if [ "${feature_gates}" != "{}" ]; then + echo "GA_ONLY=true and FEATURE_GATES=${feature_gates} are mutually exclusive." + exit 1 + fi + if [ "${runtime_config}" != "{}" ]; then + echo "GA_ONLY=true and RUNTIME_CONFIG=${runtime_config} are mutually exclusive." + exit 1 + fi + + echo "Limiting to GA APIs and features for ${KUBE_VERSION}" + feature_gates='{"AllAlpha":false,"AllBeta":false}' + runtime_config='{"api/alpha":"false", "api/beta":"false"}' + ;; + *) + echo "\$GA_ONLY set to '${GA_ONLY}'; supported values are true and false (default)" + exit 1 + ;; + esac + + # create the config file + cat < "${ARTIFACTS}/kind-config.yaml" +# config for 1 control plane node and 2 workers (necessary for conformance) +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + ipFamily: ${IP_FAMILY:-ipv4} + kubeProxyMode: ${KUBE_PROXY_MODE:-iptables} + # don't pass through host search paths + # TODO: possibly a reasonable default in the future for kind ... + dnsSearch: [] +nodes: +- role: control-plane +featureGates: ${feature_gates} +runtimeConfig: ${runtime_config} +kubeadmConfigPatches: +- | + kind: ClusterConfiguration + metadata: + name: config + apiServer: + extraArgs: +${apiServer_extra_args} + "emulated-version": "${EMULATED_VERSION}" + controllerManager: + extraArgs: +${controllerManager_extra_args} + "emulated-version": "${EMULATED_VERSION}" + scheduler: + extraArgs: +${scheduler_extra_args} + "emulated-version": "${EMULATED_VERSION}" + --- + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: +${kubelet_extra_args} + --- + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: +${kubelet_extra_args} +EOF + + KIND_CREATE_ATTEMPTED=true + kind create cluster \ + --image=kindest/node:latest \ + --retain \ + --wait=1m \ + -v=3 \ + "--config=${ARTIFACTS}/kind-config.yaml" + + # debug cluster version + kubectl version + + # Patch kube-proxy to set the verbosity level + kubectl patch -n kube-system daemonset/kube-proxy \ + --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/command/-", "value": "--v='"${KIND_CLUSTER_LOG_LEVEL}"'" }]' +} + +fetch_metrics() { + local output_file="$1" + echo "Fetching metrics to ${output_file}..." + kubectl get --raw /metrics > "${output_file}" +} + + +main() { + TMP_DIR=$(mktemp -d) + export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" + mkdir -p "${ARTIFACTS}" + + export EMULATED_VERSION=$(get_latest_release_version) + export PREV_VERSIONED_FEATURE_LIST=${PREV_VERSIONED_FEATURE_LIST:-"release-${EMULATED_VERSION}/test/featuregates_linter/test_data/versioned_feature_list.yaml"} + export UNVERSIONED_FEATURE_LIST=${UNVERSIONED_FEATURE_LIST:-"release-${EMULATED_VERSION}/test/featuregates_linter/test_data/unversioned_feature_list.yaml"} + + # Create and validate previous cluster + git clone --filter=blob:none --single-branch --branch "release-${EMULATED_VERSION}" https://github.com/kubernetes/kubernetes.git "release-${EMULATED_VERSION}" + + # Build current version + build + + # Create and validate latest cluster + KUBECONFIG="${HOME}/.kube/kind-test-config-latest" + export KUBECONFIG + create_cluster + LATEST_METRICS="${ARTIFACTS}/latest_metrics.txt" + fetch_metrics "${LATEST_METRICS}" + LATEST_RESULTS="${ARTIFACTS}/latest_results.txt" + + VALIDATE_SCRIPT="${VALIDATE_SCRIPT:-${PWD}/../test-infra/experiment/compatibility-versions/validate-compatibility-versions-feature-gates.sh}" + "${VALIDATE_SCRIPT}" "${EMULATED_VERSION}" "${LATEST_METRICS}" "${PREV_VERSIONED_FEATURE_LIST}" "${UNVERSIONED_FEATURE_LIST}" "${LATEST_RESULTS}" + + # Report results + echo "=== Latest Cluster (${EMULATED_VERSION}) Validation ===" + cat "${LATEST_RESULTS}" + + if grep -q "FAIL" "${LATEST_RESULTS}"; then + echo "Validation failures detected" + exit 1 + fi + + cleanup +} + +get_latest_release_version() { + git ls-remote --heads https://github.com/kubernetes/kubernetes.git | \ + grep -o 'release-[0-9]\+\.[0-9]\+' | \ + sort -t. -k1,1n -k2,2n | \ + tail -n1 | \ + cut -d- -f2 +} + +main \ No newline at end of file diff --git a/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh b/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh index e51c88760bbe..d5c28343a3b8 100755 --- a/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh +++ b/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/experiment/compatibility-versions/validate-compatibility-versions-feature-gates.sh b/experiment/compatibility-versions/validate-compatibility-versions-feature-gates.sh new file mode 100755 index 000000000000..2c4c90f71663 --- /dev/null +++ b/experiment/compatibility-versions/validate-compatibility-versions-feature-gates.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# Copyright 2025 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script validates feature gates from a Kubernetes cluster's /metrics endpoint +# against expected values defined in versioned and unversioned feature lists. +# +# Usage: validate-compatibility-versions-feature-gates.sh + +set -o errexit -o nounset -o pipefail + +# Check arg count +if [[ $# -ne 5 ]]; then + echo "Usage: ${0} " + exit 1 +fi + +emulated_version="$1" # e.g. "1.32" +metrics_file="$2" # path to /metrics +feature_list="$3" # versioned_feature_list.yaml +unversioned_feature_list="$4" # unversioned_feature_list.yaml +results_file="$5" + +echo "Validating features for ${emulated_version}..." +rm -f "${results_file}" +touch "${results_file}" + +# Parse /metrics -> actual_features[featureName] = 0 or 1 +declare -A actual_features +declare -A actual_stages + +while IFS= read -r line; do + # Example line: + # kubernetes_feature_enabled{name="DisableKubeletCloudCredentialProviders",stage="Alpha"} 1 + + # Capture name in group [1], stage in [2], and numeric value (0 or 1) in [3]. + # NOTE: The capture group for stage="([^"]*)" matches any stage text (including empty). + if [[ "$line" =~ ^kubernetes_feature_enabled\{name=\"([^\"]+)\",stage=\"([^\"]*)\"}.*\ ([0-9]+)$ ]]; then + feature_name="${BASH_REMATCH[1]}" + feature_stage="${BASH_REMATCH[2]}" + feature_value="${BASH_REMATCH[3]}" + + # Store these in two separate maps + actual_features["$feature_name"]="$feature_value" + actual_stages["$feature_name"]="$feature_stage" + fi +done < <(grep '^kubernetes_feature_enabled' "${metrics_file}") + +# Build the "expected" sets from versioned_feature_list.yaml +# => expected_stage[featureName], expected_lock[featureName], expected_value[featureName] +declare -A expected_stage +declare -A expected_lock +declare -A expected_value + +feature_stream="$( + yq e -o=json '.' "${feature_list}" \ + | jq -c '.[]' +)" + +while IFS= read -r feature_entry; do + feature_name=$(echo "${feature_entry}" | jq -r '.name') + specs_json=$(echo "${feature_entry}" | jq -c '.versionedSpecs') + + # Numeric parse for .version vs emulated_version + target_spec="$( + echo "${specs_json}" \ + | jq -r --arg ver "${emulated_version}" ' + [ .[] + | select( + ( .version | sub("^v"; "") | tonumber ) + <= + ($ver | sub("^v"; "") | tonumber) + ) + ] + | last + ' + )" + + # If no matching spec, skip + if [[ -z "$target_spec" || "$target_spec" == "null" ]]; then + continue + fi + + # Read fields + raw_stage=$(echo "$target_spec" | jq -r '.preRelease') + lockToDefault=$(echo "$target_spec" | jq -r '.lockToDefault') + defaultVal=$(echo "$target_spec" | jq -r '.default') + + # Convert defaultVal (true/false) -> 1/0 + want="0" + if [[ "$defaultVal" == "true" ]]; then + want="1" + fi + + expected_stage["$feature_name"]="${raw_stage^^}" + expected_lock["$feature_name"]="$lockToDefault" + expected_value["$feature_name"]="$want" +done < <(echo "$feature_stream") + +# Build the "expected_unversioned" sets from unversioned_feature_list.yaml +# => expected_unversioned_stage[featureName], expected_unversioned_lock[featureName], expected_unversioned_value[featureName] +declare -A expected_unversioned_stage +declare -A expected_unversioned_lock +declare -A expected_unversioned_value + +unversioned_feature_stream="$( + yq e -o=json '.' "${unversioned_feature_list}" \ + | jq -c '.[]' +)" + +while IFS= read -r unversioned_feature_entry; do + unversioned_feature_name=$(echo "${unversioned_feature_entry}" | jq -r '.name') + unversioned_specs_json=$(echo "${unversioned_feature_entry}" | jq -c '.versionedSpecs') # Although named versionedSpecs in YAML, it's unversioned list. + + # Unversioned should always use the first spec (assuming only one exists or first one is the default) + target_unversioned_spec="$( + echo "${unversioned_specs_json}" \ + | jq -r '.[0]' # Get the first spec + )" + + # If no spec, skip (should not happen in valid file) + if [[ -z "$target_unversioned_spec" || "$target_unversioned_spec" == "null" ]]; then + continue + fi + + # Read fields - these are default values for unversioned features + raw_unversioned_stage=$(echo "$target_unversioned_spec" | jq -r '.preRelease') # Can use this or default to GA + unversioned_lockToDefault=$(echo "$target_unversioned_spec" | jq -r '.lockToDefault') + unversioned_defaultVal=$(echo "$target_unversioned_spec" | jq -r '.default') + + # Convert defaultVal (true/false) -> 1/0 + unversioned_want="0" + if [[ "$unversioned_defaultVal" == "true" ]]; then + unversioned_want="1" + fi + + expected_unversioned_stage["$unversioned_feature_name"]="$raw_unversioned_stage" + expected_unversioned_lock["$unversioned_feature_name"]="$unversioned_lockToDefault" + expected_unversioned_value["$unversioned_feature_name"]="$unversioned_want" +done < <(echo "$unversioned_feature_stream") + + +# For each "expected" feature (versioned): +# - If missing from /metrics => fail unless stage==ALPHA or lock==true +# - If present & stage!=ALPHA => compare numeric value +for feature_name in "${!expected_stage[@]}"; do + stage="${expected_stage[$feature_name]}" + locked="${expected_lock[$feature_name]}" + want="${expected_value[$feature_name]}" + + got="${actual_features[$feature_name]:-}" # empty if missing + + # If present, but stage==ALPHA => no checks are done + if [[ "$stage" == "ALPHA" ]]; then + continue + fi + + if [[ -z "$got" ]]; then + # Missing from metrics + if [[ "$locked" == "true" ]]; then + continue + fi + echo "FAIL: expected feature gate '$feature_name' not found in metrics (stage=${stage}, lockToDefault=${locked})" \ + >> "${results_file}" + continue + fi + + # If present, stage!=ALPHA => compare true/false enabled value + if [[ "$got" != "$want" ]]; then + echo "FAIL: feature '$feature_name' expected value $want, got $got" \ + >> "${results_file}" + fi +done + +# For each "expected_unversioned" feature: +# - If missing from /metrics => fail unless stage==ALPHA or lock==true +# - If present => compare numeric value +for unversioned_feature_name in "${!expected_unversioned_stage[@]}"; do + unversioned_stage="${expected_unversioned_stage[$unversioned_feature_name]}" + unversioned_locked="${expected_unversioned_lock[$unversioned_feature_name]}" + unversioned_want="${expected_unversioned_value[$unversioned_feature_name]}" + + got="${actual_features[$unversioned_feature_name]:-}" # empty if missing + + # If present, but stage==ALPHA => no checks are done + if [[ "$unversioned_stage" == "ALPHA" ]]; then + continue + fi + + if [[ -z "$got" ]]; then + # Missing from metrics + if [[ "$unversioned_locked" == "true" ]]; then + continue + fi + echo "FAIL: expected unversioned feature gate '$unversioned_feature_name' not found in metrics (lockToDefault=${unversioned_locked})" \ + >> "${results_file}" + continue + fi + + # If present, compare true/false enabled value + if [[ "$got" != "$unversioned_want" ]]; then + echo "FAIL: unversioned feature '$unversioned_feature_name' expected value $unversioned_want, got $got" \ + >> "${results_file}" + fi +done + +# For each actual feature in /metrics not in the "expected" maps (versioned OR unversioned), +# - if it's "1", we fail as "unexpected feature". because new gates not found in previous +# expected gates can only be introduced if they are off by default (0) but not on by default (1) + # NOTE: if the new feature is a client-go feature then we do not fail but continue +for feature_name in "${!actual_features[@]}"; do + if [[ -z "${expected_stage[$feature_name]:-}" ]] && [[ -z "${expected_unversioned_stage[$feature_name]:-}" ]]; then + got="${actual_features[$feature_name]}" + if [[ "$got" == "1" ]]; then + # Check to see if gate is found in client-go and if so, continue + if grep -q "$feature_name" staging/src/k8s.io/client-go/features/known_features.go; then + continue + fi + echo "FAIL: unexpected feature '$feature_name' found in /metrics, got=1" \ + >> "${results_file}" + fi + fi +done + + +if grep -q "FAIL" "$results_file"; then + echo "Validation failures detected" + exit 1 +fi \ No newline at end of file