|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +# |
| 16 | +# ============================================================================== |
| 17 | + |
| 18 | +set -e |
| 19 | +set -x |
| 20 | + |
| 21 | +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) |
| 22 | +# If rocm-smi exists locally (it should) use it to find |
| 23 | +# out how many GPUs we have to test with. |
| 24 | +rocm-smi -i |
| 25 | +STATUS=$? |
| 26 | +if [ $STATUS -ne 0 ]; then TF_GPU_COUNT=1; else |
| 27 | + TF_GPU_COUNT=$(rocm-smi -i|grep 'Device ID' |grep 'GPU' |wc -l) |
| 28 | +fi |
| 29 | +TF_TESTS_PER_GPU=1 |
| 30 | +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) |
| 31 | + |
| 32 | +echo "" |
| 33 | +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." |
| 34 | +echo "" |
| 35 | + |
| 36 | +# First positional argument (if any) specifies the ROCM_INSTALL_DIR |
| 37 | +if [[ -n $1 ]]; then |
| 38 | + ROCM_INSTALL_DIR=$1 |
| 39 | +else |
| 40 | + if [[ -z "${ROCM_PATH}" ]]; then |
| 41 | + ROCM_INSTALL_DIR=/opt/rocm-6.1.0 |
| 42 | + else |
| 43 | + ROCM_INSTALL_DIR=$ROCM_PATH |
| 44 | + fi |
| 45 | +fi |
| 46 | + |
| 47 | +export PYTHON_BIN_PATH=`which python3` |
| 48 | +PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"` |
| 49 | +export TF_PYTHON_VERSION=$PYTHON_VERSION |
| 50 | +export TF_NEED_ROCM=1 |
| 51 | +export ROCM_PATH=$ROCM_INSTALL_DIR |
| 52 | +TAGS_FILTER="gpu,requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm" |
| 53 | +UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})" |
| 54 | +TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}" |
| 55 | +if [ -f /usertools/rocm.bazelrc ]; then |
| 56 | + # Use the bazelrc files in /usertools if available |
| 57 | + if [ ! -d /tf ];then |
| 58 | + # The bazelrc files in /usertools expect /tf to exist |
| 59 | + mkdir /tf |
| 60 | + fi |
| 61 | + |
| 62 | + bazel \ |
| 63 | + --bazelrc=/usertools/rocm.bazelrc \ |
| 64 | + test \ |
| 65 | + --config=sigbuild_local_cache \ |
| 66 | + --config=rocm \ |
| 67 | + --config=xla_cpp \ |
| 68 | + --build_tag_filters=${TAGS_FILTER} \ |
| 69 | + --test_tag_filters=${TAGS_FILTER} \ |
| 70 | + --keep_going \ |
| 71 | + --test_output=errors \ |
| 72 | + --local_test_jobs=${N_TEST_JOBS} \ |
| 73 | + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ |
| 74 | + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ |
| 75 | + --repo_env=HERMETIC_PYTHON_VERSION=3.11 \ |
| 76 | + --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \ |
| 77 | + --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \ |
| 78 | + --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \ |
| 79 | + -- //xla/... |
| 80 | +else |
| 81 | + |
| 82 | + yes "" | $PYTHON_BIN_PATH configure.py |
| 83 | + bazel \ |
| 84 | + test \ |
| 85 | + -k \ |
| 86 | + --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm --keep_going \ |
| 87 | + --build_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm \ |
| 88 | + --config=rocm \ |
| 89 | + --test_output=errors \ |
| 90 | + --local_test_jobs=${N_TEST_JOBS} \ |
| 91 | + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ |
| 92 | + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ |
| 93 | + --repo_env=HERMETIC_PYTHON_VERSION=3.11 \ |
| 94 | + --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \ |
| 95 | + --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \ |
| 96 | + --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \ |
| 97 | + -- //xla/... |
| 98 | +fi |
0 commit comments