Merge pull request kubernetes-sigs#1995 from aojea/add-github-actions…

…-docker Fix entrypoint cgroups mounts
AkihiroSuda · Jan 20, 2021 · ee16568 · ee16568
2 parents c8955ab + 28e8771
commit ee16568
Show file tree

Hide file tree

Showing 3 changed files with 146 additions and 45 deletions.
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -0,0 +1,94 @@
+name: Docker
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  docker:
+    name: Docker
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        ipFamily: [ipv4, ipv6]
+        deployment: [singleNode, multiNode]
+    env:
+      JOB_NAME: "docker-${{ matrix.deployment }}-${{ matrix.ipFamily }}"
+      IP_FAMILY: ${{ matrix.ipFamily }}
+    steps:
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@v2
+
+      - name: Verify
+        run: make verify
+
+      - name: Install kind
+        run: sudo make install INSTALL_DIR=/usr/local/bin
+
+      - name: Install kubectl
+        run: |
+          curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
+          chmod +x ./kubectl
+          sudo mv ./kubectl /usr/local/bin/kubectl
+
+      - name: Enable ipv4 and ipv6 forwarding
+        run: |
+          sudo sysctl -w net.ipv6.conf.all.forwarding=1
+          sudo sysctl -w net.ipv4.ip_forward=1
+
+      - name: Create single node cluster
+        if: ${{ matrix.deployment == 'singleNode' }}
+        run: |
+          cat <<EOF | /usr/local/bin/kind create cluster -v7 --wait 1m --retain --config=-
+          kind: Cluster
+          apiVersion: kind.x-k8s.io/v1alpha4
+          networking:
+            ipFamily: ${IP_FAMILY}
+          EOF
+
+      - name: Create multi node cluster
+        if: ${{ matrix.deployment == 'multiNode' }}
+        run: |
+          cat <<EOF | /usr/local/bin/kind create cluster -v7 --wait 1m --retain --config=-
+          kind: Cluster
+          apiVersion: kind.x-k8s.io/v1alpha4
+          networking:
+            ipFamily: ${IP_FAMILY}
+          nodes:
+          - role: control-plane
+          - role: worker
+          - role: worker
+          EOF
+
+      - name: Get Cluster status
+        run: |
+          # wait network is ready
+          kubectl wait --for=condition=ready pods --namespace=kube-system -l k8s-app=kube-dns
+          kubectl get nodes -o wide
+          kubectl get pods -A
+
+      - name: Load docker image
+        run: |
+          docker pull busybox
+          /usr/local/bin/kind load docker-image busybox
+
+      - name: Export logs
+        if: always()
+        run: |
+          mkdir -p /tmp/kind/logs
+          /usr/local/bin/kind export logs /tmp/kind/logs
+          sudo chown -R $USER:$USER /tmp/kind/logs
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }}
+          path: /tmp/kind/logs
+
+      - name: Delete cluster
+        run: /usr/local/bin/kind delete cluster
diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint
@@ -62,32 +62,34 @@ fix_mount() {
   mount --make-rshared /
 }
 
+# helper used by fix_cgroup
 mount_kubelet_cgroup_root() {
-  cgroup_root=$1
-  subsystem=$2
-
-  if [ -n "${cgroup_root}" ]; then
-    # This is because we set Kubelet's cgroup-root to `/kubelet` by
-    # default. We have to do that because otherwise, it'll collide
-    # with the cgroups used by the Kubelet running on the host if we
-    # run kind cluster within a Kubernetes pod, resulting in random
-    # processes to be killed.
-    mkdir -p "${subsystem}/${cgroup_root}"
-    if [ "${subsystem}" == "/sys/fs/cgroup/cpuset" ]; then
-      # This is needed. Otherwise, assigning process to the cgroup
-      # (or any nested cgroup) would result in ENOSPC.
-      cat "${subsystem}/cpuset.cpus" > "${subsystem}/${cgroup_root}/cpuset.cpus"
-      cat "${subsystem}/cpuset.mems" > "${subsystem}/${cgroup_root}/cpuset.mems"
-    fi
-    # We need to perform a self bind mount here because otherwise,
-    # systemd might delete the cgroup unintentionally before the
-    # kubelet starts.
-    mount --bind "${subsystem}/${cgroup_root}" "${subsystem}/${cgroup_root}"
+  local cgroup_root=$1
+  local subsystem=$2
+  if [ -z "${cgroup_root}" ]; then
+    return 0
   fi
+  mkdir -p "${subsystem}/${cgroup_root}"
+  if [ "${subsystem}" == "/sys/fs/cgroup/cpuset" ]; then
+    # This is needed. Otherwise, assigning process to the cgroup
+    # (or any nested cgroup) would result in ENOSPC.
+    cat "${subsystem}/cpuset.cpus" > "${subsystem}/${cgroup_root}/cpuset.cpus"
+    cat "${subsystem}/cpuset.mems" > "${subsystem}/${cgroup_root}/cpuset.mems"
+  fi
+  # We need to perform a self bind mount here because otherwise,
+  # systemd might delete the cgroup unintentionally before the
+  # kubelet starts.
+  mount --bind "${subsystem}/${cgroup_root}" "${subsystem}/${cgroup_root}"
 }
 
 fix_cgroup() {
   echo 'INFO: fix cgroup mounts for all subsystems'
+  # see: https://d2iq.com/blog/running-kind-inside-a-kubernetes-cluster-for-continuous-integration
+  # capture initial state before modifying
+  local current_cgroup
+  current_cgroup=$(grep systemd /proc/self/cgroup | cut -d: -f3)
+  local cgroup_subsystems
+  cgroup_subsystems=$(findmnt -lun -o source,target -t cgroup | grep "${current_cgroup}" | awk '{print $2}')
   # For each cgroup subsystem, Docker does a bind mount from the current
   # cgroup to the root of the cgroup subsystem. For instance:
   #   /sys/fs/cgroup/memory/docker/<cid> -> /sys/fs/cgroup/memory
@@ -99,32 +101,37 @@ fix_cgroup() {
   # This is because `/proc/<pid>/cgroup` is not affected by the bind mount.
   # The following is a workaround to recreate the original cgroup
   # environment by doing another bind mount for each subsystem.
-  local docker_cgroup_mounts
-  docker_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep docker || true)
-  if [[ -n "${docker_cgroup_mounts}" ]]; then
-    local docker_cgroup cgroup_subsystems subsystem
-    docker_cgroup=$(echo "${docker_cgroup_mounts}" | head -n 1 | cut -d' ' -f 4)
-    cgroup_subsystems=$(echo "${docker_cgroup_mounts}" | cut -d' ' -f 5)
-    echo "${cgroup_subsystems}" |
-    while IFS= read -r subsystem; do
-      mkdir -p "${subsystem}${docker_cgroup}"
-      mount --bind "${subsystem}" "${subsystem}${docker_cgroup}"
-      mount_kubelet_cgroup_root "/kubelet" "${subsystem}"
-    done
-  fi
-  local podman_cgroup_mounts
-  podman_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep libpod || true)
-  if [[ -n "${podman_cgroup_mounts}" ]]; then
-    local podman_cgroup cgroup_subsystems subsystem
-    podman_cgroup=$(echo "${podman_cgroup_mounts}" | head -n 1 | cut -d' ' -f 4)
-    cgroup_subsystems=$(echo "${podman_cgroup_mounts}" | cut -d' ' -f 5)
-    echo "${cgroup_subsystems}" |
-    while IFS= read -r subsystem; do
-      mkdir -p "${subsystem}${podman_cgroup}"
-      mount --bind "${subsystem}" "${subsystem}${podman_cgroup}"
-      mount_kubelet_cgroup_root "/kubelet" "${subsystem}"
+  local cgroup_mounts
+  # xref: https://github.com/kubernetes/minikube/pull/9508
+  # Example inputs:
+  #
+  # Docker:               /docker/562a56986a84b3cd38d6a32ac43fdfcc8ad4d2473acf2839cbf549273f35c206 /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:143 master:23 - cgroup devices rw,devices
+  # podman:               /libpod_parent/libpod-73a4fb9769188ae5dc51cb7e24b9f2752a4af7b802a8949f06a7b2f2363ab0e9 ...
+  # Cloud Shell:          /kubepods/besteffort/pod3d6beaa3004913efb68ce073d73494b0/accdf94879f0a494f317e9a0517f23cdd18b35ff9439efd0175f17bbc56877c4 /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,memory
+  # GitHub actions #9304: /actions_job/0924fbbcf7b18d2a00c171482b4600747afc367a9dfbeac9d6b14b35cda80399 /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:263 master:24 - cgroup cgroup rw,memory
+  cgroup_mounts=$(grep -E -o '/[[:alnum:]].* /sys/fs/cgroup.*.*cgroup' /proc/self/mountinfo || true)
+  if [[ -n "${cgroup_mounts}" ]]; then
+    local mount_root
+    mount_root=$(echo "${cgroup_mounts}" | head -n 1 | cut -d' ' -f1)
+    for mount_point in $(echo "${cgroup_mounts}" | cut -d' ' -f 2); do
+      # bind mount each mount_point to mount_point + mount_root
+      # mount --bind /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/docker/fb07bb6daf7730a3cb14fc7ff3e345d1e47423756ce54409e66e01911bab2160
+      local target="${mount_point}${mount_root}"
+      if ! findmnt "${target}"; then
+        mkdir -p "${target}"
+        mount --bind "${mount_point}" "${target}"
+      fi
     done
   fi
+  # kubelet will try to manage cgroups / pods that are not owned by it when
+  # "nesting" clusters, unless we instruct it to use a different cgroup root.
+  # We do this, and when doing so we must fixup this alternative root
+  # currently this is hardcoded to be /kubelet
+  mount --make-rprivate /sys/fs/cgroup
+  echo "${cgroup_subsystems}" |
+  while IFS= read -r subsystem; do
+    mount_kubelet_cgroup_root "/kubelet" "${subsystem}"
+  done
 }
 
 fix_machine_id() {

diff --git a/pkg/build/nodeimage/defaults.go b/pkg/build/nodeimage/defaults.go
@@ -20,7 +20,7 @@ package nodeimage
 const DefaultImage = "kindest/node:latest"
 
 // DefaultBaseImage is the default base image used
-const DefaultBaseImage = "kindest/base:v20201130-23777eca"
+const DefaultBaseImage = "kindest/base:v20210119-77baa200"
 
 // DefaultMode is the default kubernetes build mode for the built image
 // see pkg/build/kube.Bits