From c8d97a5bf1389492aa7976261ca4bbd58c13fa83 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 10:46:01 -0600 Subject: [PATCH 01/32] Add RAPIDS-based devcontainer for velox/cuDF development Adds devcontainer configurations for CUDA 12.9 and 13.1 based on RAPIDS devcontainers infrastructure. Includes helper scripts for building velox, presto, and RAPIDS libraries (rmm, cudf, kvikio). Key features: - Based on rapidsai/devcontainers with UCX and OpenMPI - Integrates rapids-build-utils for build-rmm, build-cudf, etc. - Auto-detects RAPIDS library paths for velox cmake configuration - sccache with S3 remote caching and sccache-dist support - Persisted venvs, pip cache, and bash history across rebuilds - Codespaces support --- .devcontainer/Dockerfile | 94 +++++++++++++++++++ .devcontainer/cuda12.9/devcontainer.json | 65 +++++++++++++ .devcontainer/cuda13.1/devcontainer.json | 65 +++++++++++++ scripts/devcontainer/build-presto | 112 ++++++++++++++++++++++ scripts/devcontainer/build-velox | 72 +++++++++++++++ scripts/devcontainer/clean-presto | 33 +++++++ scripts/devcontainer/clean-velox | 54 +++++++++++ scripts/devcontainer/configure-velox | 113 +++++++++++++++++++++++ scripts/devcontainer/post-create | 90 ++++++++++++++++++ scripts/devcontainer/test-presto | 75 +++++++++++++++ scripts/devcontainer/test-velox | 88 ++++++++++++++++++ 11 files changed, 861 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/cuda12.9/devcontainer.json create mode 100644 .devcontainer/cuda13.1/devcontainer.json create mode 100755 scripts/devcontainer/build-presto create mode 100755 scripts/devcontainer/build-velox create mode 100755 scripts/devcontainer/clean-presto create mode 100755 scripts/devcontainer/clean-velox create mode 100755 scripts/devcontainer/configure-velox create mode 100755 scripts/devcontainer/post-create create mode 100755 scripts/devcontainer/test-presto create mode 100755 scripts/devcontainer/test-velox diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..3de9b6c4 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,94 @@ +# Velox development container based on RAPIDS devcontainers +# +# Uses RAPIDS devcontainer base image (Ubuntu) for compatibility with +# RAPIDS build utilities and cuDF development. + +ARG CUDA_VERSION=12.9 +ARG BASE=rapidsai/devcontainers:latest-cpp-mambaforge + +FROM ${BASE} + +ARG TARGETARCH +ARG CUDA_VERSION + +# Install velox build dependencies +RUN apt-get update -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + # Velox dependencies not in RAPIDS base + libdouble-conversion-dev \ + libevent-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + liblz4-dev \ + liblzo2-dev \ + libre2-dev \ + libsnappy-dev \ + libsodium-dev \ + libthrift-dev \ + libzstd-dev \ + # Additional tools + ninja-build \ + ccache \ + && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/* + +# Create build directories +RUN mkdir -p /opt/velox-build /opt/presto-build && \ + chown -R coder:coder /opt/velox-build /opt/presto-build + +# Install velox helper scripts +COPY velox-testing/scripts/devcontainer/* /usr/local/bin/ +RUN chmod 755 /usr/local/bin/build-* /usr/local/bin/configure-* \ + /usr/local/bin/test-* /usr/local/bin/clean-* /usr/local/bin/post-create + +# Environment for velox builds +ENV VELOX_DEPENDENCY_SOURCE=SYSTEM \ + GTest_SOURCE=BUNDLED \ + cudf_SOURCE=BUNDLED \ + faiss_SOURCE=BUNDLED \ + CUDA_VERSION=${CUDA_VERSION} \ + CUDAARCHS="RAPIDS" \ + DEFAULT_VIRTUAL_ENV=rapids \ + PYTHON_PACKAGE_MANAGER=pip + +# Python environment +ENV PYTHONSAFEPATH="1" \ + PYTHONUNBUFFERED="1" \ + PYTHONDONTWRITEBYTECODE="1" + +# Persist bash history to cache +ENV HISTFILE="/home/coder/.cache/._bash_history" + +### +# sccache configuration +### +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" \ + SCCACHE_REGION="us-east-2" \ + SCCACHE_BUCKET="rapids-sccache-devs" \ + SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true \ + SCCACHE_IDLE_TIMEOUT=0 + +### +# sccache-dist configuration +### +# Enable sccache-dist by default +ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1 +# Compile locally if max retries exceeded +ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true +# Retry transient errors 4 times (for a total of 5 attempts) +ENV SCCACHE_DIST_MAX_RETRIES=4 +# 1hr 59min (to accommodate debug builds) +ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140 +ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com" + +# Build as much in parallel as possible +ENV INFER_NUM_DEVICE_ARCHITECTURES=1 \ + MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20 + +# Add velox environment to bashrc +RUN echo "export CUDA_VERSION=${CUDA_VERSION}" >> /home/coder/.bashrc && \ + echo 'export CMAKE_C_COMPILER_LAUNCHER=sccache' >> /home/coder/.bashrc && \ + echo 'export CMAKE_CXX_COMPILER_LAUNCHER=sccache' >> /home/coder/.bashrc && \ + echo 'export CMAKE_CUDA_COMPILER_LAUNCHER=sccache' >> /home/coder/.bashrc + +USER coder +WORKDIR /home/coder diff --git a/.devcontainer/cuda12.9/devcontainer.json b/.devcontainer/cuda12.9/devcontainer.json new file mode 100644 index 00000000..e481b82a --- /dev/null +++ b/.devcontainer/cuda12.9/devcontainer.json @@ -0,0 +1,65 @@ +{ + "name": "velox-dev-cuda12.9", + "build": { + "context": "${localWorkspaceFolder}/..", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA_VERSION": "12.9", + "BASE": "rapidsai/devcontainers:latest-cpp-cuda12.9-ucx1.19.0-openmpi5.0.7" + } + }, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:latest": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/velox-testing-cuda12.9-venvs}"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/velox-testing,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../velox,target=/home/coder/velox,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../presto,target=/home/coder/presto,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rmm,target=/home/coder/rmm,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../cudf,target=/home/coder/cudf,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../kvikio,target=/home/coder/kvikio,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../devcontainers,target=/home/coder/devcontainers,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/velox-testing-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "hostRequirements": { + "gpu": "optional" + }, + "runArgs": [ + "--rm", + "--name", "${localEnv:USER:anon}-velox-dev-cuda12.9", + "--ulimit", "nofile=500000" + ], + "postCreateCommand": "/home/coder/velox-testing/scripts/devcontainer/post-create", + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode.cpptools", + "ms-vscode.cmake-tools", + "llvm-vs-code-extensions.vscode-clangd" + ], + "settings": { + "cmake.buildDirectory": "/opt/velox-build/${buildType}", + "cmake.configureOnOpen": false + } + } + }, + "containerEnv": { + "VELOX_DEPENDENCY_SOURCE": "SYSTEM", + "GTest_SOURCE": "BUNDLED", + "cudf_SOURCE": "BUNDLED", + "faiss_SOURCE": "BUNDLED" + } +} diff --git a/.devcontainer/cuda13.1/devcontainer.json b/.devcontainer/cuda13.1/devcontainer.json new file mode 100644 index 00000000..2eee9087 --- /dev/null +++ b/.devcontainer/cuda13.1/devcontainer.json @@ -0,0 +1,65 @@ +{ + "name": "velox-dev-cuda13.1", + "build": { + "context": "${localWorkspaceFolder}/..", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA_VERSION": "13.1", + "BASE": "rapidsai/devcontainers:latest-cpp-cuda13.1-ucx1.19.0-openmpi5.0.7" + } + }, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:latest": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/velox-testing-cuda13.1-venvs}"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/velox-testing,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../velox,target=/home/coder/velox,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../presto,target=/home/coder/presto,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rmm,target=/home/coder/rmm,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../cudf,target=/home/coder/cudf,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../kvikio,target=/home/coder/kvikio,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../devcontainers,target=/home/coder/devcontainers,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/velox-testing-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "hostRequirements": { + "gpu": "optional" + }, + "runArgs": [ + "--rm", + "--name", "${localEnv:USER:anon}-velox-dev-cuda13.1", + "--ulimit", "nofile=500000" + ], + "postCreateCommand": "/home/coder/velox-testing/scripts/devcontainer/post-create", + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode.cpptools", + "ms-vscode.cmake-tools", + "llvm-vs-code-extensions.vscode-clangd" + ], + "settings": { + "cmake.buildDirectory": "/opt/velox-build/${buildType}", + "cmake.configureOnOpen": false + } + } + }, + "containerEnv": { + "VELOX_DEPENDENCY_SOURCE": "SYSTEM", + "GTest_SOURCE": "BUNDLED", + "cudf_SOURCE": "BUNDLED", + "faiss_SOURCE": "BUNDLED" + } +} diff --git a/scripts/devcontainer/build-presto b/scripts/devcontainer/build-presto new file mode 100755 index 00000000..229a7732 --- /dev/null +++ b/scripts/devcontainer/build-presto @@ -0,0 +1,112 @@ +#!/bin/bash +# Build presto-native-execution with velox and cuDF support +# Usage: build-presto [--release|--debug] [-j N] + +set -euo pipefail + +# Defaults +BUILD_TYPE="${BUILD_TYPE:-release}" +NUM_THREADS="${NUM_THREADS:-$(nproc --all --ignore=1)}" +BUILD_BASE_DIR="${BUILD_BASE_DIR:-/opt/presto-build}" + +usage() { + cat </dev/null && [[ -d "${repo_path}" ]]; then + local build_dir + build_dir=$(rapids-get-cmake-build-dir "${repo_path}/cpp" 2>/dev/null || true) + if [[ -n "${build_dir}" && -d "${build_dir}" ]]; then + echo "${build_dir}" + fi + fi +} + +# Add RAPIDS dependency paths +# Check for cudf +CUDF_BUILD_DIR=$(get_rapids_cmake_dir "cudf") +if [[ -n "${CUDF_BUILD_DIR}" ]]; then + echo "Found cudf build at: ${CUDF_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Dcudf_ROOT=${CUDF_BUILD_DIR}" +fi + +# Check for rmm +RMM_BUILD_DIR=$(get_rapids_cmake_dir "rmm") +if [[ -n "${RMM_BUILD_DIR}" ]]; then + echo "Found rmm build at: ${RMM_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Drmm_ROOT=${RMM_BUILD_DIR}" +fi + +# Check for kvikio +KVIKIO_BUILD_DIR=$(get_rapids_cmake_dir "kvikio") +if [[ -n "${KVIKIO_BUILD_DIR}" ]]; then + echo "Found kvikio build at: ${KVIKIO_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Dkvikio_ROOT=${KVIKIO_BUILD_DIR}" +fi + +echo "" + +make cmake-and-build BUILD_TYPE="${BUILD_TYPE}" BUILD_DIR="" \ + BUILD_BASE_DIR="${BUILD_BASE_DIR}" EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS}" \ + NUM_THREADS="${NUM_THREADS}" + +echo "" +echo "=== Build complete ===" +echo "Binaries in: ${BUILD_BASE_DIR}" diff --git a/scripts/devcontainer/build-velox b/scripts/devcontainer/build-velox new file mode 100755 index 00000000..ee10a9ce --- /dev/null +++ b/scripts/devcontainer/build-velox @@ -0,0 +1,72 @@ +#!/bin/bash +# Build velox with cuDF support +# Usage: build-velox [--release|--debug] [-j N] [--cuda-arch ARCH] + +set -euo pipefail + +# Defaults +BUILD_TYPE="${BUILD_TYPE:-release}" +NUM_THREADS="${NUM_THREADS:-$(nproc --all --ignore=1)}" +BUILD_BASE_DIR="${BUILD_BASE_DIR:-/opt/velox-build}" +CUDA_ARCH="${CUDA_ARCH:-}" + +usage() { + cat </dev/null && [[ -d "${repo_path}" ]]; then + local build_dir + build_dir=$(rapids-get-cmake-build-dir "${repo_path}/cpp" 2>/dev/null || true) + if [[ -n "${build_dir}" && -d "${build_dir}" ]]; then + echo "${build_dir}" + fi + fi +} + +# Add RAPIDS dependency paths +# Check for cudf +CUDF_BUILD_DIR=$(get_rapids_cmake_dir "cudf") +if [[ -n "${CUDF_BUILD_DIR}" ]]; then + echo "Found cudf build at: ${CUDF_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Dcudf_ROOT=${CUDF_BUILD_DIR}" +fi + +# Check for rmm +RMM_BUILD_DIR=$(get_rapids_cmake_dir "rmm") +if [[ -n "${RMM_BUILD_DIR}" ]]; then + echo "Found rmm build at: ${RMM_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Drmm_ROOT=${RMM_BUILD_DIR}" +fi + +# Check for kvikio +KVIKIO_BUILD_DIR=$(get_rapids_cmake_dir "kvikio") +if [[ -n "${KVIKIO_BUILD_DIR}" ]]; then + echo "Found kvikio build at: ${KVIKIO_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Dkvikio_ROOT=${KVIKIO_BUILD_DIR}" +fi + +echo "" + +make cmake BUILD_DIR="${BUILD_TYPE}" BUILD_TYPE="${BUILD_TYPE}" \ + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS}" BUILD_BASE_DIR="${BUILD_BASE_DIR}" + +echo "" +echo "=== Configuration complete ===" +echo "Build directory: ${BUILD_BASE_DIR}/${BUILD_TYPE}" +echo "" +echo "To build, run: build-velox --${BUILD_TYPE}" diff --git a/scripts/devcontainer/post-create b/scripts/devcontainer/post-create new file mode 100755 index 00000000..66d3f68d --- /dev/null +++ b/scripts/devcontainer/post-create @@ -0,0 +1,90 @@ +#!/bin/bash +# Post-create setup script for devcontainer +# Runs once after container creation to initialize environment + +set -euo pipefail + +echo "=== Velox Development Container Setup ===" +echo "" + +# Check for required repositories +REQUIRED_REPOS=("velox" "presto" "rmm" "cudf" "kvikio") +MISSING_REPOS=() + +for repo in "${REQUIRED_REPOS[@]}"; do + if [[ -d "${HOME}/${repo}" ]]; then + echo "Found ${repo} at ~/${repo}" + else + MISSING_REPOS+=("${repo}") + fi +done +echo "" + +if [[ ${#MISSING_REPOS[@]} -gt 0 ]]; then + echo "ERROR: Missing required repositories:" + for repo in "${MISSING_REPOS[@]}"; do + echo " - ${repo}" + done + echo "" + echo "Expected directory layout:" + echo " ~/code/" + echo " ├── velox-testing/ (this repo)" + echo " ├── velox/ (facebookincubator/velox)" + echo " ├── presto/ (prestodb/presto)" + echo " ├── rmm/ (rapidsai/rmm)" + echo " ├── cudf/ (rapidsai/cudf)" + echo " └── kvikio/ (rapidsai/kvikio)" + echo "" + exit 1 +fi + +# Create cache directories if they don't exist +mkdir -p ~/.cache + +# Ensure build directories exist and are writable +for dir in /opt/velox-build /opt/presto-build; do + if [[ ! -w "${dir}" ]]; then + sudo mkdir -p "${dir}" + sudo chown -R "$(id -u):$(id -g)" "${dir}" + fi +done + +# Set up Python virtual environment for RAPIDS builds +if command -v rapids-make-pip-env &>/dev/null; then + echo "Setting up Python virtual environment..." + rapids-make-pip-env rapids + echo "" +fi + +# Initialize sccache stats +if command -v sccache &>/dev/null; then + echo "sccache is available:" + sccache --show-stats 2>/dev/null || echo " (no stats yet)" + echo "" +fi + +# Display available commands +echo "Available commands:" +echo " build-velox - Build velox with cuDF support" +echo " build-presto - Build presto-native-execution" +echo " test-velox - Run velox tests" +echo " test-presto - Run presto tests" +echo " configure-velox - Run CMake configuration only" +echo " clean-velox - Clean velox build directory" +echo " clean-presto - Clean presto build directory" +echo "" +echo "RAPIDS build commands (build-rmm, build-cudf, etc.) are also available." +echo "Run any command with --help for usage information." +echo "" + +# Check for GPU +if command -v nvidia-smi &>/dev/null; then + echo "GPU detected:" + nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo " (unable to query GPU)" + echo "" +else + echo "No GPU detected. CPU-only builds will be used." + echo "" +fi + +echo "=== Setup complete ===" diff --git a/scripts/devcontainer/test-presto b/scripts/devcontainer/test-presto new file mode 100755 index 00000000..9d766a33 --- /dev/null +++ b/scripts/devcontainer/test-presto @@ -0,0 +1,75 @@ +#!/bin/bash +# Run presto-native-execution tests +# Usage: test-presto [-j N] [--filter PATTERN] + +set -euo pipefail + +# Defaults +BUILD_TYPE="${BUILD_TYPE:-release}" +BUILD_BASE_DIR="${BUILD_BASE_DIR:-/opt/presto-build}" +NUM_THREADS="${NUM_THREADS:-2}" + +usage() { + cat <}" +echo "Exclude: ${EXCLUDE:-}" +echo "" + +ctest -j "${NUM_THREADS}" --output-on-failure --no-tests=error \ + ${FILTER:+-R "${FILTER}"} \ + ${EXCLUDE:+-E "${EXCLUDE}"} + +echo "" +echo "=== Tests complete ===" diff --git a/scripts/devcontainer/test-velox b/scripts/devcontainer/test-velox new file mode 100755 index 00000000..52f99a13 --- /dev/null +++ b/scripts/devcontainer/test-velox @@ -0,0 +1,88 @@ +#!/bin/bash +# Run velox tests +# Usage: test-velox [-j N] [--filter PATTERN] [--cuda] + +set -euo pipefail + +# Defaults +BUILD_TYPE="${BUILD_TYPE:-release}" +BUILD_BASE_DIR="${BUILD_BASE_DIR:-/opt/velox-build}" +NUM_THREADS="${NUM_THREADS:-2}" # Limited to avoid OOM with GPU tests +CUDA_ONLY=false + +usage() { + cat <}" +echo "Exclude: ${EXCLUDE}" +echo "" + +ctest -j "${NUM_THREADS}" ${LABEL_FILTER} --output-on-failure --no-tests=error \ + ${FILTER:+-R "${FILTER}"} \ + ${EXCLUDE:+-E "${EXCLUDE}"} + +echo "" +echo "=== Tests complete ===" From 090585d9488835147df86d580f857e6664958d66 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 11:06:01 -0600 Subject: [PATCH 02/32] Add librdkafka-dev dependency --- .devcontainer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3de9b6c4..c99813a8 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -21,6 +21,7 @@ RUN apt-get update -y && \ libgoogle-glog-dev \ liblz4-dev \ liblzo2-dev \ + librdkafka-dev \ libre2-dev \ libsnappy-dev \ libsodium-dev \ From 7e1a8016850dc5f05781c000384a14a6b9dff0b8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 11:18:20 -0600 Subject: [PATCH 03/32] Add duckdb_SOURCE=BUNDLED --- .devcontainer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index c99813a8..2c6e7506 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -45,6 +45,7 @@ RUN chmod 755 /usr/local/bin/build-* /usr/local/bin/configure-* \ ENV VELOX_DEPENDENCY_SOURCE=SYSTEM \ GTest_SOURCE=BUNDLED \ cudf_SOURCE=BUNDLED \ + duckdb_SOURCE=BUNDLED \ faiss_SOURCE=BUNDLED \ CUDA_VERSION=${CUDA_VERSION} \ CUDAARCHS="RAPIDS" \ From dd903d207a6536fba02e962a687ad6a9df7ae2f3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 11:20:49 -0600 Subject: [PATCH 04/32] Fix _SOURCE vars: pass as CMake -D flags, not env vars VELOX_DEPENDENCY_SOURCE is read from env by CMake, but individual overrides like GTest_SOURCE, cudf_SOURCE, duckdb_SOURCE, faiss_SOURCE must be passed as -D flags to CMake. --- .devcontainer/Dockerfile | 6 ++---- .devcontainer/cuda12.9/devcontainer.json | 5 +---- .devcontainer/cuda13.1/devcontainer.json | 5 +---- scripts/devcontainer/configure-velox | 6 ++++++ 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 2c6e7506..c2d7704e 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -42,11 +42,9 @@ RUN chmod 755 /usr/local/bin/build-* /usr/local/bin/configure-* \ /usr/local/bin/test-* /usr/local/bin/clean-* /usr/local/bin/post-create # Environment for velox builds +# VELOX_DEPENDENCY_SOURCE is read from env by CMake +# Individual _SOURCE overrides must be passed as -D flags in configure-velox ENV VELOX_DEPENDENCY_SOURCE=SYSTEM \ - GTest_SOURCE=BUNDLED \ - cudf_SOURCE=BUNDLED \ - duckdb_SOURCE=BUNDLED \ - faiss_SOURCE=BUNDLED \ CUDA_VERSION=${CUDA_VERSION} \ CUDAARCHS="RAPIDS" \ DEFAULT_VIRTUAL_ENV=rapids \ diff --git a/.devcontainer/cuda12.9/devcontainer.json b/.devcontainer/cuda12.9/devcontainer.json index e481b82a..9d6c3dfc 100644 --- a/.devcontainer/cuda12.9/devcontainer.json +++ b/.devcontainer/cuda12.9/devcontainer.json @@ -57,9 +57,6 @@ } }, "containerEnv": { - "VELOX_DEPENDENCY_SOURCE": "SYSTEM", - "GTest_SOURCE": "BUNDLED", - "cudf_SOURCE": "BUNDLED", - "faiss_SOURCE": "BUNDLED" + "VELOX_DEPENDENCY_SOURCE": "SYSTEM" } } diff --git a/.devcontainer/cuda13.1/devcontainer.json b/.devcontainer/cuda13.1/devcontainer.json index 2eee9087..bbbdd4d7 100644 --- a/.devcontainer/cuda13.1/devcontainer.json +++ b/.devcontainer/cuda13.1/devcontainer.json @@ -57,9 +57,6 @@ } }, "containerEnv": { - "VELOX_DEPENDENCY_SOURCE": "SYSTEM", - "GTest_SOURCE": "BUNDLED", - "cudf_SOURCE": "BUNDLED", - "faiss_SOURCE": "BUNDLED" + "VELOX_DEPENDENCY_SOURCE": "SYSTEM" } } diff --git a/scripts/devcontainer/configure-velox b/scripts/devcontainer/configure-velox index 056f3ca0..86106365 100755 --- a/scripts/devcontainer/configure-velox +++ b/scripts/devcontainer/configure-velox @@ -64,6 +64,12 @@ EXTRA_CMAKE_FLAGS+=" -DVELOX_ENABLE_PARQUET=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_MONO_LIBRARY=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_BUILD_SHARED=ON" +# Dependencies that must be built from source (not available as system packages) +EXTRA_CMAKE_FLAGS+=" -DGTest_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dcudf_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dduckdb_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dfaiss_SOURCE=BUNDLED" + # Auto-detect RAPIDS library paths from rapids-build-utils if available # This allows velox to find cudf, rmm, kvikio built by the RAPIDS build system get_rapids_cmake_dir() { From 3893ad943fdcbe9fd4b11902e5f6fe671de23c1a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 12:14:33 -0600 Subject: [PATCH 05/32] Fix Velox CMake configuration for all dependencies - Fix DuckDB_SOURCE case sensitivity (DuckDB vs duckdb) - Resolve CUDAARCHS from RAPIDS to actual architecture list for CUDA 13.x - Use prebuilt cudf (SYSTEM) when available, fall back to BUNDLED - Add BUNDLED sources: simdjson, FastFloat, folly, absl, gRPC, xsimd, Arrow, geos - Add system packages: libfmt-dev, libprotobuf-dev, libprotoc-dev, libc-ares-dev, libstemmer-dev, libboost-all-dev, bison, flex, protobuf-compiler --- .devcontainer/Dockerfile | 11 ++++++++++- scripts/devcontainer/configure-velox | 26 ++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index c2d7704e..97e3df35 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -15,19 +15,28 @@ ARG CUDA_VERSION RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Velox dependencies not in RAPIDS base + libc-ares-dev \ + libboost-all-dev \ libdouble-conversion-dev \ libevent-dev \ + libfmt-dev \ libgflags-dev \ libgoogle-glog-dev \ liblz4-dev \ liblzo2-dev \ + libprotobuf-dev \ + libprotoc-dev \ + protobuf-compiler \ librdkafka-dev \ libre2-dev \ libsnappy-dev \ libsodium-dev \ + libstemmer-dev \ libthrift-dev \ libzstd-dev \ - # Additional tools + # Build tools + bison \ + flex \ ninja-build \ ccache \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/* diff --git a/scripts/devcontainer/configure-velox b/scripts/devcontainer/configure-velox index 86106365..01b380fc 100755 --- a/scripts/devcontainer/configure-velox +++ b/scripts/devcontainer/configure-velox @@ -48,7 +48,7 @@ cd "${VELOX_DIR}" echo "=== Configuring Velox ===" echo "Build type: ${BUILD_TYPE}" -echo "CUDA archs: ${CUDAARCHS:-RAPIDS}" +echo "CUDA archs: ${CUDAARCHS}" echo "Build dir: ${BUILD_BASE_DIR}/${BUILD_TYPE}" echo "" @@ -57,6 +57,12 @@ export CMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER:-sccache}" export CMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER:-sccache}" export CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-sccache}" +# Resolve CUDAARCHS - Velox doesn't understand "RAPIDS" so we expand it +# These are the RAPIDS-supported architectures for CUDA 13.x (from rapids-cmake) +if [[ "${CUDAARCHS:-}" == "RAPIDS" || -z "${CUDAARCHS:-}" ]]; then + export CUDAARCHS="75-real;80-real;86-real;90a-real;100f-real;120a-real;120" +fi + # Build CMake flags EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_CUDF=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_ENABLE_ARROW=ON" @@ -65,10 +71,18 @@ EXTRA_CMAKE_FLAGS+=" -DVELOX_MONO_LIBRARY=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_BUILD_SHARED=ON" # Dependencies that must be built from source (not available as system packages) +# Note: DuckDB uses mixed-case variable name (DuckDB_SOURCE not duckdb_SOURCE) EXTRA_CMAKE_FLAGS+=" -DGTest_SOURCE=BUNDLED" -EXTRA_CMAKE_FLAGS+=" -Dcudf_SOURCE=BUNDLED" -EXTRA_CMAKE_FLAGS+=" -Dduckdb_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -DDuckDB_SOURCE=BUNDLED" EXTRA_CMAKE_FLAGS+=" -Dfaiss_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dsimdjson_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -DFastFloat_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dfolly_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dabsl_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -DgRPC_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dxsimd_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -DArrow_SOURCE=BUNDLED" +EXTRA_CMAKE_FLAGS+=" -Dgeos_SOURCE=BUNDLED" # Auto-detect RAPIDS library paths from rapids-build-utils if available # This allows velox to find cudf, rmm, kvikio built by the RAPIDS build system @@ -86,11 +100,15 @@ get_rapids_cmake_dir() { } # Add RAPIDS dependency paths -# Check for cudf +# Check for cudf - use prebuilt if available, otherwise build from source CUDF_BUILD_DIR=$(get_rapids_cmake_dir "cudf") if [[ -n "${CUDF_BUILD_DIR}" ]]; then echo "Found cudf build at: ${CUDF_BUILD_DIR}" + EXTRA_CMAKE_FLAGS+=" -Dcudf_SOURCE=SYSTEM" EXTRA_CMAKE_FLAGS+=" -Dcudf_ROOT=${CUDF_BUILD_DIR}" +else + echo "No prebuilt cudf found, will build from source" + EXTRA_CMAKE_FLAGS+=" -Dcudf_SOURCE=BUNDLED" fi # Check for rmm From 13d63d949247bb1154a9d78635ed9e7cca55a7f3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 13:41:28 -0600 Subject: [PATCH 06/32] Update default BASE image to cuda12.9 devcontainer --- .devcontainer/Dockerfile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 97e3df35..6846fba2 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -4,7 +4,7 @@ # RAPIDS build utilities and cuDF development. ARG CUDA_VERSION=12.9 -ARG BASE=rapidsai/devcontainers:latest-cpp-mambaforge +ARG BASE=rapidsai/devcontainers:latest-cpp-cuda12.9-ucx1.19.0-openmpi5.0.7 FROM ${BASE} @@ -14,6 +14,9 @@ ARG CUDA_VERSION # Install velox build dependencies RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + # GCC 14 (avoids GCC 13 false-positive -Wstringop-overflow in fmt) + gcc-14 \ + g++-14 \ # Velox dependencies not in RAPIDS base libc-ares-dev \ libboost-all-dev \ @@ -41,6 +44,12 @@ RUN apt-get update -y && \ ccache \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/* +# Set GCC 14 as default (GCC 13 has false-positive -Wstringop-overflow in fmt) +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 140 \ + --slave /usr/bin/g++ g++ /usr/bin/g++-14 \ + --slave /usr/bin/gcov gcov /usr/bin/gcov-14 && \ + update-alternatives --set gcc /usr/bin/gcc-14 + # Create build directories RUN mkdir -p /opt/velox-build /opt/presto-build && \ chown -R coder:coder /opt/velox-build /opt/presto-build From 4561c8b155782e34b494c41340a5b55c90b8faa5 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Feb 2026 13:42:57 -0600 Subject: [PATCH 07/32] Remove redundant comment about DuckDB variable naming --- scripts/devcontainer/configure-velox | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/devcontainer/configure-velox b/scripts/devcontainer/configure-velox index 01b380fc..c3fdc2bd 100755 --- a/scripts/devcontainer/configure-velox +++ b/scripts/devcontainer/configure-velox @@ -71,7 +71,6 @@ EXTRA_CMAKE_FLAGS+=" -DVELOX_MONO_LIBRARY=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_BUILD_SHARED=ON" # Dependencies that must be built from source (not available as system packages) -# Note: DuckDB uses mixed-case variable name (DuckDB_SOURCE not duckdb_SOURCE) EXTRA_CMAKE_FLAGS+=" -DGTest_SOURCE=BUNDLED" EXTRA_CMAKE_FLAGS+=" -DDuckDB_SOURCE=BUNDLED" EXTRA_CMAKE_FLAGS+=" -Dfaiss_SOURCE=BUNDLED" From 612029da66bb527560e6a0c969efc079586aae7b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 10 Feb 2026 02:46:48 -0600 Subject: [PATCH 08/32] Add additional CMake flags --- scripts/devcontainer/configure-velox | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/devcontainer/configure-velox b/scripts/devcontainer/configure-velox index c3fdc2bd..a621ae94 100755 --- a/scripts/devcontainer/configure-velox +++ b/scripts/devcontainer/configure-velox @@ -64,7 +64,11 @@ if [[ "${CUDAARCHS:-}" == "RAPIDS" || -z "${CUDAARCHS:-}" ]]; then fi # Build CMake flags -EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_CUDF=ON" +# Disable C++20 module scanning — GCC 14 ICEs with -fmodules-ts on CMake 4.x +EXTRA_CMAKE_FLAGS="-DCMAKE_CXX_SCAN_FOR_MODULES=OFF" +# Suppress false-positive stringop-overflow from system fmt v9 (format_dragon/bigint) +EXTRA_CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS=-Wno-error=stringop-overflow" +EXTRA_CMAKE_FLAGS+=" -DVELOX_ENABLE_CUDF=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_ENABLE_ARROW=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_ENABLE_PARQUET=ON" EXTRA_CMAKE_FLAGS+=" -DVELOX_MONO_LIBRARY=ON" From b117cec3b759671a5f7883a435c66c5436b30083 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 17 Feb 2026 16:42:26 +0000 Subject: [PATCH 09/32] Simplify devcontainer build scripts and fix presto test support Standalone velox now bundles all dependencies (folly, xsimd, etc.) with no prerequisites. FB OSS deps (folly, fbthrift, proxygen) are built automatically inside build-presto on first run and cached at /opt/fb-deps. - Extract shared constants and RAPIDS detection to _common.sh - Fix xsimd_SOURCE override bug in configure-velox (BUNDLED always won) - Fix CUDAARCHS not propagating to presto build (was setting wrong var) - Fix test-presto pointing at nonexistent release/ subdirectory - Add ldconfig for /opt/fb-deps/lib so presto tests find shared libs - Add libxxhash-dev and gperf to Dockerfile (fbthrift build deps) - Add -no-pie linker flag and -Wno-error=nonnull for presto link - Add devcontainer README --- .devcontainer/Dockerfile | 6 +- .devcontainer/README.md | 161 ++++++++++++++++++ scripts/devcontainer/_common.sh | 52 ++++++ scripts/devcontainer/build-presto | 237 ++++++++++++++++++++++----- scripts/devcontainer/configure-velox | 61 ++----- scripts/devcontainer/post-create | 2 +- scripts/devcontainer/test-presto | 7 +- 7 files changed, 430 insertions(+), 96 deletions(-) create mode 100644 .devcontainer/README.md create mode 100755 scripts/devcontainer/_common.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6846fba2..8d1e9f2f 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -36,10 +36,12 @@ RUN apt-get update -y && \ libsodium-dev \ libstemmer-dev \ libthrift-dev \ + libxxhash-dev \ libzstd-dev \ # Build tools bison \ flex \ + gperf \ ninja-build \ ccache \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/* @@ -51,8 +53,8 @@ RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 140 \ update-alternatives --set gcc /usr/bin/gcc-14 # Create build directories -RUN mkdir -p /opt/velox-build /opt/presto-build && \ - chown -R coder:coder /opt/velox-build /opt/presto-build +RUN mkdir -p /opt/velox-build /opt/presto-build /opt/fb-deps && \ + chown -R coder:coder /opt/velox-build /opt/presto-build /opt/fb-deps # Install velox helper scripts COPY velox-testing/scripts/devcontainer/* /usr/local/bin/ diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 00000000..7f54369d --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,161 @@ +# Velox + Presto Devcontainer + +A GPU-ready development environment for building [Velox](https://github.com/facebookincubator/velox) with cuDF acceleration and [Presto Native Execution](https://github.com/prestodb/presto/tree/master/presto-native-execution). Based on the RAPIDS devcontainer image with pre-built cuDF, RMM, and KvikIO. + +## Quick Start + +```bash +# 1. Clone sibling repos under the same parent directory +mkdir ~/code && cd ~/code +git clone https://github.com//velox-testing.git +git clone https://github.com/facebookincubator/velox.git +git clone https://github.com/prestodb/presto.git +git clone https://github.com/rapidsai/rmm.git +git clone https://github.com/rapidsai/cudf.git +git clone https://github.com/rapidsai/kvikio.git + +# 2. Open in VS Code (or any devcontainer-compatible editor) +code velox-testing + +# 3. Reopen in container (pick CUDA 13.1 or 12.9 variant) + +# 4. Build +build-velox # ~25 min — standalone velox with cuDF +build-presto # ~40 min first run (builds FB deps), ~25 min after + +# 5. Test +test-velox # run velox test suite +test-presto # run presto test suite +``` + +## Directory Layout + +The devcontainer expects this layout on the host: + +``` +~/code/ +├── velox-testing/ # this repo (devcontainer workspace root) +├── velox/ # facebookincubator/velox +├── presto/ # prestodb/presto +├── rmm/ # rapidsai/rmm +├── cudf/ # rapidsai/cudf +└── kvikio/ # rapidsai/kvikio +``` + +All repos are bind-mounted into the container under `/home/coder/`. + +## Build Architecture + +``` + ┌───────────────┐ ┌───────────────────────────────┐ + │ build-velox │ │ build-presto │ + │ │ │ │ + │ ~/velox src │ │ 1. FB deps (auto, once) │ + │ all BUNDLED │ │ folly, fbthrift, proxygen │ + │ MONO_LIB=ON │ │ → /opt/fb-deps │ + │ SHARED=ON │ │ │ + │ │ │ 2. rsync ~/velox → submodule │ + │ │ │ add_subdirectory(velox) │ + │ │ │ MONO_LIB=OFF │ + └───────┬───────┘ └──────────────┬────────────────┘ + │ │ + ▼ ▼ + /opt/velox-build/ /opt/presto-build/ + release/ presto_server + 3879 targets 1978 targets +``` + +Both builds consume pre-built **cudf**, **rmm**, and **kvikio** from the RAPIDS devcontainer. + +**Standalone velox** bundles all its dependencies (folly, xsimd, Arrow, etc.) — no prerequisite steps. + +**Presto** requires Facebook's OSS stack (folly, fbthrift, proxygen, etc.) for its thrift RPC layer. `build-presto` builds these automatically on first run and caches them at `/opt/fb-deps`. Subsequent runs skip this step unless `--rebuild-deps` is passed. + +### Why velox builds twice + +Presto integrates velox via `add_subdirectory()` with different options (`VELOX_MONO_LIBRARY=OFF`, no testing, etc.). The rsync in `build-presto` copies `~/velox` into presto's git submodule directory so both repos share the same source while keeping presto's git state clean. + +## Commands + +| Command | Description | +|---------|-------------| +| `build-velox` | Build standalone velox with cuDF (fully self-contained) | +| `build-presto` | Build presto-native-execution (auto-builds FB deps on first run) | +| `configure-velox` | CMake configure only (for IDE integration) | +| `test-velox` | Run velox tests via ctest | +| `test-presto` | Run presto tests via ctest | +| `clean-velox` | Delete velox build artifacts | +| `clean-presto` | Delete presto build artifacts | + +All commands accept `--help`. Common options: + +```bash +build-velox --debug # debug build +build-velox -j 16 # limit parallelism +build-presto --release # release build (default) +build-presto --rebuild-deps # force rebuild of FB deps +``` + +## CUDA Variants + +Two devcontainer configurations are provided: + +| Path | CUDA | Base Image | +|------|------|------------| +| `.devcontainer/cuda13.1/` | 13.1 | `rapidsai/devcontainers:latest-cpp-cuda13.1-*` | +| `.devcontainer/cuda12.9/` | 12.9 | `rapidsai/devcontainers:latest-cpp-cuda12.9-*` | + +VS Code will prompt you to choose when opening the workspace. The `CUDAARCHS` environment variable defaults to `RAPIDS`, which expands to all RAPIDS-supported architectures. + +## Build Outputs + +| Build | Location | Contents | +|-------|----------|----------| +| FB deps | `/opt/fb-deps/` | folly, fbthrift, proxygen (auto-built by `build-presto`) | +| Velox | `/opt/velox-build/release/` | Mono shared library, tests | +| Presto | `/opt/presto-build/` | `presto_server` binary, tests | + +Build directories are under `/opt/` so they don't pollute mounted source trees and persist across container sessions (unless the container is recreated). + +## Dependency Resolution + +Velox has many dependencies. Each can be `SYSTEM` (pre-installed) or `BUNDLED` (built from source via FetchContent). The build scripts set these automatically: + +| Dependency | Velox Build | Presto Build | Source | +|------------|------------|--------------|--------| +| folly | BUNDLED | SYSTEM | Built from source / `/opt/fb-deps` | +| xsimd | BUNDLED | SYSTEM | Built from source / `/opt/fb-deps` | +| cudf | SYSTEM | SYSTEM | RAPIDS pre-built | +| rmm | SYSTEM | SYSTEM | RAPIDS pre-built | +| kvikio | SYSTEM | SYSTEM | RAPIDS pre-built | +| Arrow | BUNDLED | BUNDLED | Built from source | +| DuckDB | BUNDLED | BUNDLED | Built from source | +| GTest | BUNDLED | BUNDLED | Built from source | +| simdjson | BUNDLED | BUNDLED | Built from source | +| geos | BUNDLED | BUNDLED | Built from source | + +## Workarounds + +The build scripts include several workarounds for toolchain issues: + +- **GCC 14** instead of GCC 13: avoids false-positive `-Wstringop-overflow` in system `fmt` v9. +- **`-DCMAKE_CXX_SCAN_FOR_MODULES=OFF`**: CMake 4.x + GCC 14 + Ninja triggers `-fmodules-ts` which causes GCC 14 ICE (segfault). +- **`-no-pie` linker flag** (presto only): fbthrift static archives have construction vtables with hidden visibility from virtual inheritance in `apache::thrift` exception classes. The linker cannot resolve `R_X86_64_PC32` relocations against hidden symbols in PIE executables. +- **`-Wno-error=nonnull`** (presto only): presto's `SystemConnector.cpp` triggers a false-positive `this` null check warning. + +## Scripts + +All scripts live in `scripts/devcontainer/` and are installed to `/usr/local/bin/` in the container image. They share common functions via `_common.sh`. + +``` +scripts/devcontainer/ +├── _common.sh # Shared constants (CUDA archs, RAPIDS detection) +├── build-velox # Standalone velox build (all deps bundled) +├── build-presto # Presto + velox build (includes FB deps) +├── configure-velox # CMake configure only +├── test-velox # Run velox tests +├── test-presto # Run presto tests +├── clean-velox # Clean velox build dir +├── clean-presto # Clean presto build dir +└── post-create # Devcontainer post-create hook +``` diff --git a/scripts/devcontainer/_common.sh b/scripts/devcontainer/_common.sh new file mode 100755 index 00000000..9439f439 --- /dev/null +++ b/scripts/devcontainer/_common.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Shared constants and functions for velox/presto build scripts. +# Source this file; do not execute directly. + +# RAPIDS-supported CUDA architectures for CUDA 13.x (from rapids-cmake) +RAPIDS_CUDA_ARCHITECTURES="75-real;80-real;86-real;90a-real;100f-real;120a-real;120" + +# Resolve CUDAARCHS: expand "RAPIDS" or empty to the concrete arch list. +resolve_cudaarchs() { + if [[ "${CUDAARCHS:-}" == "RAPIDS" || -z "${CUDAARCHS:-}" ]]; then + export CUDAARCHS="${RAPIDS_CUDA_ARCHITECTURES}" + fi +} + +# Look up the cmake build directory for a RAPIDS library (cudf, rmm, kvikio). +# Prints the path if found; prints nothing otherwise. +get_rapids_cmake_dir() { + local lib_name="$1" + local repo_path="${HOME}/${lib_name}" + + if command -v rapids-get-cmake-build-dir &>/dev/null && [[ -d "${repo_path}" ]]; then + local build_dir + build_dir=$(rapids-get-cmake-build-dir "${repo_path}/cpp" 2>/dev/null || true) + if [[ -n "${build_dir}" && -d "${build_dir}" ]]; then + echo "${build_dir}" + fi + fi +} + +# Append -D flags for pre-built cudf, rmm, kvikio to the variable whose +# name is passed as $1 (must already exist in the caller's scope). +append_rapids_cmake_flags() { + local __var="$1" + + local cudf_dir rmm_dir kvikio_dir + cudf_dir=$(get_rapids_cmake_dir "cudf") + rmm_dir=$(get_rapids_cmake_dir "rmm") + kvikio_dir=$(get_rapids_cmake_dir "kvikio") + + if [[ -n "${cudf_dir}" ]]; then + echo "Found cudf build at: ${cudf_dir}" + printf -v "$__var" '%s %s' "${!__var}" "-Dcudf_ROOT=${cudf_dir}" + fi + if [[ -n "${rmm_dir}" ]]; then + echo "Found rmm build at: ${rmm_dir}" + printf -v "$__var" '%s %s' "${!__var}" "-Drmm_ROOT=${rmm_dir}" + fi + if [[ -n "${kvikio_dir}" ]]; then + echo "Found kvikio build at: ${kvikio_dir}" + printf -v "$__var" '%s %s' "${!__var}" "-Dkvikio_ROOT=${kvikio_dir}" + fi +} diff --git a/scripts/devcontainer/build-presto b/scripts/devcontainer/build-presto index 229a7732..2bdf86fb 100755 --- a/scripts/devcontainer/build-presto +++ b/scripts/devcontainer/build-presto @@ -3,37 +3,47 @@ # Usage: build-presto [--release|--debug] [-j N] set -euo pipefail +# shellcheck source=_common.sh +source "$(dirname "$(readlink -f "$0")")/_common.sh" # Defaults BUILD_TYPE="${BUILD_TYPE:-release}" NUM_THREADS="${NUM_THREADS:-$(nproc --all --ignore=1)}" BUILD_BASE_DIR="${BUILD_BASE_DIR:-/opt/presto-build}" +FB_DEPS_PREFIX="${FB_DEPS_PREFIX:-/opt/fb-deps}" usage() { cat <