From 1ddc998774af2a728eafb57071a6c5c7181a1ca4 Mon Sep 17 00:00:00 2001
From: Jon Huhn <nojnhuh@users.noreply.github.com>
Date: Tue, 14 Jan 2025 12:42:50 -0600
Subject: [PATCH 1/2] Verify environment variables in e2e test

---
 test/e2e/e2e.sh | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/test/e2e/e2e.sh b/test/e2e/e2e.sh
index cf7a9a7..2cf9803 100755
--- a/test/e2e/e2e.sh
+++ b/test/e2e/e2e.sh
@@ -27,6 +27,17 @@ kubectl create -f demo/gpu-test3.yaml
 kubectl create -f demo/gpu-test4.yaml
 kubectl create -f demo/gpu-test5.yaml
 
+function verify-env {
+  local namespace="$1"
+  local pod="$2"
+  for ctr in $(kubectl get pod -n "$namespace" "$pod" -o jsonpath='{.spec.containers[*].name}'); do
+    if ! kubectl logs -n "$namespace" "$pod" -c "$ctr" | grep -q "GPU_DEVICE_"; then
+      echo "Pod $namespace/$pod, container $ctr missing GPU_DEVICE_ environment variables"
+      exit 1
+    fi
+  done
+}
+
 kubectl wait --for=condition=Ready -n gpu-test1 pod/pod0 --timeout=120s
 kubectl wait --for=condition=Ready -n gpu-test1 pod/pod1 --timeout=120s
 gpu_test_1=$(kubectl get pods -n gpu-test1 | grep -c 'Running')
@@ -34,6 +45,8 @@ if [ $gpu_test_1 != 2 ]; then
     echo "gpu_test_1 $gpu_test_1 failed to match against 2 expected pods"
     exit 1
 fi
+verify-env gpu-test1 pod0
+verify-env gpu-test1 pod1
 
 
 kubectl wait --for=condition=Ready -n gpu-test2 pod/pod0 --timeout=120s
@@ -42,6 +55,7 @@ if [ $gpu_test_2 != 1 ]; then
     echo "gpu_test_2 $gpu_test_2 failed to match against 1 expected pod"
     exit 1
 fi
+verify-env gpu-test2 pod0
 
 kubectl wait --for=condition=Ready -n gpu-test3 pod/pod0 --timeout=120s
 gpu_test_3=$(kubectl get pods -n gpu-test3 | grep -c 'Running')
@@ -49,14 +63,17 @@ if [ $gpu_test_3 != 1 ]; then
     echo "gpu_test_3 $gpu_test_3 failed to match against 1 expected pod"
     exit 1
 fi
+verify-env gpu-test3 pod0
 
 kubectl wait --for=condition=Ready -n gpu-test4 pod/pod0 --timeout=120s
 kubectl wait --for=condition=Ready -n gpu-test4 pod/pod1 --timeout=120s
 gpu_test_4=$(kubectl get pods -n gpu-test4 | grep -c 'Running')
 if [ $gpu_test_4 != 2 ]; then
-    echo "gpu_test_4 $gpu_test_4 failed to match against 1 expected pods"
+    echo "gpu_test_4 $gpu_test_4 failed to match against 2 expected pods"
     exit 1
 fi
+verify-env gpu-test4 pod0
+verify-env gpu-test4 pod1
 
 kubectl wait --for=condition=Ready -n gpu-test5 pod/pod0 --timeout=120s
 gpu_test_5=$(kubectl get pods -n gpu-test5 | grep -c 'Running')
@@ -64,6 +81,7 @@ if [ $gpu_test_5 != 1 ]; then
     echo "gpu_test_5 $gpu_test_5 failed to match against 1 expected pod"
     exit 1
 fi
+verify-env gpu-test5 pod0
 
 # test that deletion is fast (less than the default grace period of 30s)
 # see https://github.com/kubernetes/kubernetes/issues/127188 for details

From a4795f1e51aca3fef219c54494529c08573833da Mon Sep 17 00:00:00 2001
From: Jon Huhn <nojnhuh@users.noreply.github.com>
Date: Wed, 15 Jan 2025 16:04:25 -0600
Subject: [PATCH 2/2] fixup! Verify environment variables in e2e test

---
 test/e2e/e2e.sh | 294 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 279 insertions(+), 15 deletions(-)

diff --git a/test/e2e/e2e.sh b/test/e2e/e2e.sh
index 2cf9803..95d3102 100755
--- a/test/e2e/e2e.sh
+++ b/test/e2e/e2e.sh
@@ -27,15 +27,41 @@ kubectl create -f demo/gpu-test3.yaml
 kubectl create -f demo/gpu-test4.yaml
 kubectl create -f demo/gpu-test5.yaml
 
-function verify-env {
-  local namespace="$1"
-  local pod="$2"
-  for ctr in $(kubectl get pod -n "$namespace" "$pod" -o jsonpath='{.spec.containers[*].name}'); do
-    if ! kubectl logs -n "$namespace" "$pod" -c "$ctr" | grep -q "GPU_DEVICE_"; then
-      echo "Pod $namespace/$pod, container $ctr missing GPU_DEVICE_ environment variables"
-      exit 1
-    fi
+function gpus-from-logs {
+  local logs="$1"
+  echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_[[:digit:]]+=\"(.+)\"$/\1/p"
+}
+
+function gpu-id {
+  local gpu="$1"
+  echo "$gpu" | sed -nE "s/^gpu-([[:digit:]]+)$/\1/p"
+}
+
+function gpu-sharing-strategy-from-logs {
+  local logs="$1"
+  local id="$2"
+  echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_SHARING_STRATEGY=\"(.+)\"$/\1/p"
+}
+
+function gpu-timeslice-interval-from-logs {
+  local logs="$1"
+  local id="$2"
+  echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_TIMESLICE_INTERVAL=\"(.+)\"$/\1/p"
+}
+
+function gpu-partition-count-from-logs {
+  local logs="$1"
+  local id="$2"
+  echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_PARTITION_COUNT=\"(.+)\"$/\1/p"
+}
+
+declare -a observed_gpus
+function gpu-already-seen {
+  local gpu="$1"
+  for seen in "${observed_gpus[@]}"; do
+    if [[ "$gpu" == "$seen" ]]; then return 0; fi;
   done
+  return 1
 }
 
 kubectl wait --for=condition=Ready -n gpu-test1 pod/pod0 --timeout=120s
@@ -45,8 +71,36 @@ if [ $gpu_test_1 != 2 ]; then
     echo "gpu_test_1 $gpu_test_1 failed to match against 2 expected pods"
     exit 1
 fi
-verify-env gpu-test1 pod0
-verify-env gpu-test1 pod1
+
+gpu_test1_pod0_ctr0_logs=$(kubectl logs -n gpu-test1 pod0 -c ctr0)
+gpu_test1_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod0_ctr0_logs")
+gpu_test1_pod0_ctr0_gpus_count=$(echo "$gpu_test1_pod0_ctr0_gpus" | wc -w)
+if [[ $gpu_test1_pod0_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test1/pod0, container ctr0 to have 1 GPU, but got $gpu_test1_pod0_ctr0_gpus_count: $gpu_test1_pod0_ctr0_gpus"
+  exit 1
+fi
+gpu_test1_pod0_ctr0_gpu="$gpu_test1_pod0_ctr0_gpus"
+if gpu-already-seen "$gpu_test1_pod0_ctr0_gpu"; then
+  echo "Pod gpu-test1/pod0, container ctr0 should have a new GPU but claimed $gpu_test1_pod0_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test1/pod0, container ctr0 claimed $gpu_test1_pod0_ctr0_gpu"
+observed_gpus+=("$gpu_test1_pod0_ctr0_gpu")
+
+gpu_test1_pod1_ctr0_logs=$(kubectl logs -n gpu-test1 pod1 -c ctr0)
+gpu_test1_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod1_ctr0_logs")
+gpu_test1_pod1_ctr0_gpus_count=$(echo "$gpu_test1_pod1_ctr0_gpus" | wc -w)
+if [[ $gpu_test1_pod1_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test1/pod1, container ctr0 to have 1 GPU, but got $gpu_test1_pod1_ctr0_gpus_count: $gpu_test1_pod1_ctr0_gpus"
+  exit 1
+fi
+gpu_test1_pod1_ctr0_gpu="$gpu_test1_pod1_ctr0_gpus"
+if gpu-already-seen "$gpu_test1_pod1_ctr0_gpu"; then
+  echo "Pod gpu-test1/pod1, container ctr0 should have a new GPU but claimed $gpu_test1_pod1_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test1/pod1, container ctr0 claimed $gpu_test1_pod1_ctr0_gpu"
+observed_gpus+=("$gpu_test1_pod1_ctr0_gpu")
 
 
 kubectl wait --for=condition=Ready -n gpu-test2 pod/pod0 --timeout=120s
@@ -55,7 +109,23 @@ if [ $gpu_test_2 != 1 ]; then
     echo "gpu_test_2 $gpu_test_2 failed to match against 1 expected pod"
     exit 1
 fi
-verify-env gpu-test2 pod0
+
+gpu_test2_pod0_ctr0_logs=$(kubectl logs -n gpu-test2 pod0 -c ctr0)
+gpu_test2_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test2_pod0_ctr0_logs")
+gpu_test2_pod0_ctr0_gpus_count=$(echo "$gpu_test2_pod0_ctr0_gpus" | wc -w)
+if [[ $gpu_test2_pod0_ctr0_gpus_count != 2 ]]; then
+  echo "Expected Pod gpu-test2/pod0, container ctr0 to have 2 GPUs, but got $gpu_test2_pod0_ctr0_gpus_count: $gpu_test2_pod0_ctr0_gpus"
+  exit 1
+fi
+echo "$gpu_test2_pod0_ctr0_gpus" | while read gpu_test2_pod0_ctr0_gpu; do
+  if gpu-already-seen "$gpu_test2_pod0_ctr0_gpu"; then
+    echo "Pod gpu-test2/pod0, container ctr0 should have a new GPU but claimed $gpu_test2_pod0_ctr0_gpu which is already claimed"
+    exit 1
+  fi
+  echo "Pod gpu-test2/pod0, container ctr0 claimed $gpu_test2_pod0_ctr0_gpu"
+  observed_gpus+=("$gpu_test2_pod0_ctr0_gpu")
+done
+
 
 kubectl wait --for=condition=Ready -n gpu-test3 pod/pod0 --timeout=120s
 gpu_test_3=$(kubectl get pods -n gpu-test3 | grep -c 'Running')
@@ -63,7 +133,56 @@ if [ $gpu_test_3 != 1 ]; then
     echo "gpu_test_3 $gpu_test_3 failed to match against 1 expected pod"
     exit 1
 fi
-verify-env gpu-test3 pod0
+
+gpu_test3_pod0_ctr0_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr0)
+gpu_test3_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr0_logs")
+gpu_test3_pod0_ctr0_gpus_count=$(echo "$gpu_test3_pod0_ctr0_gpus" | wc -w)
+if [[ $gpu_test3_pod0_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr0 to have 1 GPU, but got $gpu_test3_pod0_ctr0_gpus_count: $gpu_test3_pod0_ctr0_gpus"
+  exit 1
+fi
+gpu_test3_pod0_ctr0_gpu="$gpu_test3_pod0_ctr0_gpus"
+if gpu-already-seen "$gpu_test3_pod0_ctr0_gpu"; then
+  echo "Pod gpu-test3/pod0, container ctr0 should have a new GPU but claimed $gpu_test3_pod0_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test3/pod0, container ctr0 claimed $gpu_test3_pod0_ctr0_gpu"
+observed_gpus+=("$gpu_test3_pod0_ctr0_gpu")
+gpu_test3_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
+if [[ "$gpu_test3_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr0_sharing_strategy"
+  exit 1
+fi
+gpu_test3_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
+if [[ "$gpu_test3_pod0_ctr0_timeslice_interval" != "Default" ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr0 to have timeslice interval Default, got $gpu_test3_pod0_ctr0_timeslice_interval"
+  exit 1
+fi
+
+gpu_test3_pod0_ctr1_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr1)
+gpu_test3_pod0_ctr1_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr1_logs")
+gpu_test3_pod0_ctr1_gpus_count=$(echo "$gpu_test3_pod0_ctr1_gpus" | wc -w)
+if [[ $gpu_test3_pod0_ctr1_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr1 to have 1 GPU, but got $gpu_test3_pod0_ctr1_gpus_count: $gpu_test3_pod0_ctr1_gpus"
+  exit 1
+fi
+gpu_test3_pod0_ctr1_gpu="$gpu_test3_pod0_ctr1_gpus"
+echo "Pod gpu-test3/pod0, container ctr1 claimed $gpu_test3_pod0_ctr1_gpu"
+if [[ "$gpu_test3_pod0_ctr1_gpu" != "$gpu_test3_pod0_ctr0_gpu" ]]; then
+  echo "Pod gpu-test3/pod0, container ctr1 should claim the same GPU as Pod gpu-test3/pod0, container ctr0, but did not"
+  exit 1
+fi
+gpu_test3_pod0_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
+if [[ "$gpu_test3_pod0_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr1 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr1_sharing_strategy"
+  exit 1
+fi
+gpu_test3_pod0_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
+if [[ "$gpu_test3_pod0_ctr1_timeslice_interval" != "Default" ]]; then
+  echo "Expected Pod gpu-test3/pod0, container ctr1 to have timeslice interval Default, got $gpu_test3_pod0_ctr1_timeslice_interval"
+  exit 1
+fi
+
 
 kubectl wait --for=condition=Ready -n gpu-test4 pod/pod0 --timeout=120s
 kubectl wait --for=condition=Ready -n gpu-test4 pod/pod1 --timeout=120s
@@ -72,8 +191,56 @@ if [ $gpu_test_4 != 2 ]; then
     echo "gpu_test_4 $gpu_test_4 failed to match against 2 expected pods"
     exit 1
 fi
-verify-env gpu-test4 pod0
-verify-env gpu-test4 pod1
+
+gpu_test4_pod0_ctr0_logs=$(kubectl logs -n gpu-test4 pod0 -c ctr0)
+gpu_test4_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod0_ctr0_logs")
+gpu_test4_pod0_ctr0_gpus_count=$(echo "$gpu_test4_pod0_ctr0_gpus" | wc -w)
+if [[ $gpu_test4_pod0_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test4/pod0, container ctr0 to have 1 GPU, but got $gpu_test4_pod0_ctr0_gpus_count: $gpu_test4_pod0_ctr0_gpus"
+  exit 1
+fi
+gpu_test4_pod0_ctr0_gpu="$gpu_test4_pod0_ctr0_gpus"
+if gpu-already-seen "$gpu_test4_pod0_ctr0_gpu"; then
+  echo "Pod gpu-test4/pod0, container ctr0 should have a new GPU but claimed $gpu_test4_pod0_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test4/pod0, container ctr0 claimed $gpu_test4_pod0_ctr0_gpu"
+observed_gpus+=("$gpu_test4_pod0_ctr0_gpu")
+gpu_test4_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
+if [[ "$gpu_test4_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test4/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod0_ctr0_sharing_strategy"
+  exit 1
+fi
+gpu_test4_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
+if [[ "$gpu_test4_pod0_ctr0_timeslice_interval" != "Default" ]]; then
+  echo "Expected Pod gpu-test4/pod0, container ctr0 to have timeslice interval Default, got $gpu_test4_pod0_ctr0_timeslice_interval"
+  exit 1
+fi
+
+gpu_test4_pod1_ctr0_logs=$(kubectl logs -n gpu-test4 pod1 -c ctr0)
+gpu_test4_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod1_ctr0_logs")
+gpu_test4_pod1_ctr0_gpus_count=$(echo "$gpu_test4_pod1_ctr0_gpus" | wc -w)
+if [[ $gpu_test4_pod1_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test4/pod1, container ctr0 to have 1 GPU, but got $gpu_test4_pod1_ctr0_gpus_count: $gpu_test4_pod1_ctr0_gpus"
+  exit 1
+fi
+gpu_test4_pod1_ctr0_gpu="$gpu_test4_pod1_ctr0_gpus"
+echo "Pod gpu-test4/pod1, container ctr0 claimed $gpu_test4_pod1_ctr0_gpu"
+if [[ "$gpu_test4_pod1_ctr0_gpu" != "$gpu_test4_pod1_ctr0_gpu" ]]; then
+  echo "Pod gpu-test4/pod1, container ctr0 should claim the same GPU as Pod gpu-test4/pod1, container ctr0, but did not"
+  exit 1
+fi
+gpu_test4_pod1_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
+if [[ "$gpu_test4_pod1_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test4/pod1, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod1_ctr0_sharing_strategy"
+  exit 1
+fi
+gpu_test4_pod1_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
+if [[ "$gpu_test4_pod1_ctr0_timeslice_interval" != "Default" ]]; then
+  echo "Expected Pod gpu-test4/pod1, container ctr0 to have timeslice interval Default, got $gpu_test4_pod1_ctr0_timeslice_interval"
+  exit 1
+fi
+
 
 kubectl wait --for=condition=Ready -n gpu-test5 pod/pod0 --timeout=120s
 gpu_test_5=$(kubectl get pods -n gpu-test5 | grep -c 'Running')
@@ -81,7 +248,104 @@ if [ $gpu_test_5 != 1 ]; then
     echo "gpu_test_5 $gpu_test_5 failed to match against 1 expected pod"
     exit 1
 fi
-verify-env gpu-test5 pod0
+
+gpu_test5_pod0_ts_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr0)
+gpu_test5_pod0_ts_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr0_logs")
+gpu_test5_pod0_ts_ctr0_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr0_gpus" | wc -w)
+if [[ $gpu_test5_pod0_ts_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr0_gpus_count: $gpu_test5_pod0_ts_ctr0_gpus"
+  exit 1
+fi
+gpu_test5_pod0_ts_ctr0_gpu="$gpu_test5_pod0_ts_ctr0_gpus"
+if gpu-already-seen "$gpu_test5_pod0_ts_ctr0_gpu"; then
+  echo "Pod gpu-test5/pod0, container ts-ctr0 should have a new GPU but claimed $gpu_test5_pod0_ts_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test5/pod0, container ts-ctr0 claimed $gpu_test5_pod0_ts_ctr0_gpu"
+observed_gpus+=("$gpu_test5_pod0_ts_ctr0_gpu")
+gpu_test5_pod0_ts_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
+if [[ "$gpu_test5_pod0_ts_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr0_sharing_strategy"
+  exit 1
+fi
+gpu_test5_pod0_ts_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
+if [[ "$gpu_test5_pod0_ts_ctr0_timeslice_interval" != "Long" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr0_timeslice_interval"
+  exit 1
+fi
+
+gpu_test5_pod0_ts_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr1)
+gpu_test5_pod0_ts_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr1_logs")
+gpu_test5_pod0_ts_ctr1_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr1_gpus" | wc -w)
+if [[ $gpu_test5_pod0_ts_ctr1_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr1_gpus_count: $gpu_test5_pod0_ts_ctr1_gpus"
+  exit 1
+fi
+gpu_test5_pod0_ts_ctr1_gpu="$gpu_test5_pod0_ts_ctr1_gpus"
+echo "Pod gpu-test5/pod0, container ts-ctr1 claimed $gpu_test5_pod0_ts_ctr1_gpu"
+if [[ "$gpu_test5_pod0_ts_ctr1_gpu" != "$gpu_test5_pod0_ts_ctr0_gpu" ]]; then
+  echo "Pod gpu-test5/pod0, container ts-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container ts-ctr0, but did not"
+  exit 1
+fi
+gpu_test5_pod0_ts_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
+if [[ "$gpu_test5_pod0_ts_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr1_sharing_strategy"
+  exit 1
+fi
+gpu_test5_pod0_ts_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
+if [[ "$gpu_test5_pod0_ts_ctr1_timeslice_interval" != "Long" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr1_timeslice_interval"
+  exit 1
+fi
+
+gpu_test5_pod0_sp_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr0)
+gpu_test5_pod0_sp_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr0_logs")
+gpu_test5_pod0_sp_ctr0_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr0_gpus" | wc -w)
+if [[ $gpu_test5_pod0_sp_ctr0_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr0_gpus_count: $gpu_test5_pod0_sp_ctr0_gpus"
+  exit 1
+fi
+gpu_test5_pod0_sp_ctr0_gpu="$gpu_test5_pod0_sp_ctr0_gpus"
+if gpu-already-seen "$gpu_test5_pod0_sp_ctr0_gpu"; then
+  echo "Pod gpu-test5/pod0, container sp-ctr0 should have a new GPU but claimed $gpu_test5_pod0_sp_ctr0_gpu which is already claimed"
+  exit 1
+fi
+echo "Pod gpu-test5/pod0, container sp-ctr0 claimed $gpu_test5_pod0_sp_ctr0_gpu"
+observed_gpus+=("$gpu_test5_pod0_sp_ctr0_gpu")
+gpu_test5_pod0_sp_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
+if [[ "$gpu_test5_pod0_sp_ctr0_sharing_strategy" != "SpacePartitioning" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr0_sharing_strategy"
+  exit 1
+fi
+gpu_test5_pod0_sp_ctr0_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
+if [[ "$gpu_test5_pod0_sp_ctr0_partition_count" != "10" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have partition count 10, got $gpu_test5_pod0_sp_ctr0_partition_count"
+  exit 1
+fi
+
+gpu_test5_pod0_sp_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr1)
+gpu_test5_pod0_sp_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr1_logs")
+gpu_test5_pod0_sp_ctr1_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr1_gpus" | wc -w)
+if [[ $gpu_test5_pod0_sp_ctr1_gpus_count != 1 ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr1_gpus_count: $gpu_test5_pod0_sp_ctr1_gpus"
+  exit 1
+fi
+gpu_test5_pod0_sp_ctr1_gpu="$gpu_test5_pod0_sp_ctr1_gpus"
+echo "Pod gpu-test5/pod0, container sp-ctr1 claimed $gpu_test5_pod0_sp_ctr1_gpu"
+if [[ "$gpu_test5_pod0_sp_ctr1_gpu" != "$gpu_test5_pod0_sp_ctr0_gpu" ]]; then
+  echo "Pod gpu-test5/pod0, container sp-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container sp-ctr0, but did not"
+  exit 1
+fi
+gpu_test5_pod0_sp_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
+if [[ "$gpu_test5_pod0_sp_ctr1_sharing_strategy" != "SpacePartitioning" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr1_sharing_strategy"
+  exit 1
+fi
+gpu_test5_pod0_sp_ctr1_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
+if [[ "$gpu_test5_pod0_sp_ctr1_partition_count" != "10" ]]; then
+  echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have partition count 10, got $gpu_test5_pod0_sp_ctr1_partition_count"
+  exit 1
+fi
 
 # test that deletion is fast (less than the default grace period of 30s)
 # see https://github.com/kubernetes/kubernetes/issues/127188 for details