From 8e56aa104fd38536b46012486ea2991aea8123c3 Mon Sep 17 00:00:00 2001 From: mdzurick Date: Mon, 22 Jun 2026 23:12:32 +0000 Subject: [PATCH] Retry prerequisite package installs on transient repo failures The CUDA yum repo CDN intermittently serves a stale repomd.xml that references rotated repodata files, producing 404s during dnf metadata refresh and failing the prerequisites build. Wrap the package-manager calls in temp_install_if_command_unknown with a retry helper that clears metadata between attempts so a fresh repomd.xml is fetched, rather than reusing the stale cached one. Signed-off-by: mdzurick --- scripts/bootstrap_prerequisites.sh | 24 +++++++++++++++++++++--- scripts/install_prerequisites.sh | 24 +++++++++++++++++++++--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/scripts/bootstrap_prerequisites.sh b/scripts/bootstrap_prerequisites.sh index 5c580907c11..90cabaa5c49 100644 --- a/scripts/bootstrap_prerequisites.sh +++ b/scripts/bootstrap_prerequisites.sh @@ -205,13 +205,31 @@ echo "Building prerequisites in $PREREQS_BUILD_DIR" # Remove below if you wish to debug pre-req build failures trap "rm -rf $PREREQS_BUILD_DIR" EXIT +# Retry a command, clearing package-manager metadata between attempts. The CUDA +# yum repo CDN intermittently serves a stale repomd.xml that points at rotated +# repodata files, producing 404s; clearing metadata forces a fresh fetch. +function retry { + local n=0 max=5 delay=15 + until "$@"; do + n=$((n+1)) + if [ "$n" -ge "$max" ]; then + echo "Command failed after $max attempts: $*" >&2 + return 1 + fi + echo "Attempt $n/$max failed; clearing repo metadata and retrying in ${delay}s..." >&2 + if [ -x "$(command -v dnf)" ]; then dnf clean all || true + elif [ -x "$(command -v apt-get)" ]; then apt-get clean || true; fi + sleep "$delay" + done +} + function temp_install_if_command_unknown { if [ ! -x "$(command -v $1)" ]; then if [ -x "$(command -v apt-get)" ]; then - if [ -z "$PKG_UNINSTALL" ]; then apt-get update; fi - apt-get install -y --no-install-recommends $2 + if [ -z "$PKG_UNINSTALL" ]; then retry apt-get update; fi + retry apt-get install -y --no-install-recommends $2 elif [ -x "$(command -v dnf)" ]; then - dnf install -y --nobest --setopt=install_weak_deps=False $2 + retry dnf install -y --nobest --setopt=install_weak_deps=False $2 elif [ -x "$(command -v brew)" ]; then HOMEBREW_NO_AUTO_UPDATE=1 brew install $2 else diff --git a/scripts/install_prerequisites.sh b/scripts/install_prerequisites.sh index f19162d5004..8989cd6f084 100755 --- a/scripts/install_prerequisites.sh +++ b/scripts/install_prerequisites.sh @@ -205,13 +205,31 @@ echo "Building prerequisites in $PREREQS_BUILD_DIR" # Remove below if you wish to debug pre-req build failures trap "rm -rf $PREREQS_BUILD_DIR" EXIT +# Retry a command, clearing package-manager metadata between attempts. The CUDA +# yum repo CDN intermittently serves a stale repomd.xml that points at rotated +# repodata files, producing 404s; clearing metadata forces a fresh fetch. +function retry { + local n=0 max=5 delay=15 + until "$@"; do + n=$((n+1)) + if [ "$n" -ge "$max" ]; then + echo "Command failed after $max attempts: $*" >&2 + return 1 + fi + echo "Attempt $n/$max failed; clearing repo metadata and retrying in ${delay}s..." >&2 + if [ -x "$(command -v dnf)" ]; then dnf clean all || true + elif [ -x "$(command -v apt-get)" ]; then apt-get clean || true; fi + sleep "$delay" + done +} + function temp_install_if_command_unknown { if [ ! -x "$(command -v $1)" ]; then if [ -x "$(command -v apt-get)" ]; then - if [ -z "$PKG_UNINSTALL" ]; then apt-get update; fi - apt-get install -y --no-install-recommends $2 + if [ -z "$PKG_UNINSTALL" ]; then retry apt-get update; fi + retry apt-get install -y --no-install-recommends $2 elif [ -x "$(command -v dnf)" ]; then - dnf install -y --nobest --setopt=install_weak_deps=False $2 + retry dnf install -y --nobest --setopt=install_weak_deps=False $2 elif [ -x "$(command -v brew)" ]; then HOMEBREW_NO_AUTO_UPDATE=1 brew install $2 else