Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
16b3b23
Slurm scripts
misiugodfrey Jan 28, 2026
ba18211
untested refactor
misiugodfrey Jan 30, 2026
5883a96
Refactor
Jan 30, 2026
0e61ed7
fix config bug
Jan 30, 2026
a8c995b
more generate fixes
Jan 30, 2026
30efdd6
Appeneded to launch
Jan 30, 2026
5a3e4b4
reverted script changes and copy metadata
Feb 3, 2026
cf4db8f
Merge branch 'main' into misiug/slurmscripts
misiugodfrey Feb 4, 2026
cb86355
remove dead code
Feb 4, 2026
82da5a2
remove absolute paths
Feb 4, 2026
e2d76ce
Merge branch 'main' of https://github.com/rapidsai/velox-testing into…
misiugodfrey Feb 7, 2026
ac96833
Add simplified Slurm scripts
pentschev Feb 11, 2026
56d8703
Reduce timeout
pentschev Feb 11, 2026
d56f868
Increase coordinator/workers timeout
pentschev Feb 11, 2026
a7316f7
Fix worker configuration
pentschev Feb 11, 2026
780cbf4
Fix memory calculation
pentschev Feb 11, 2026
da7e297
Fix missing cluster tag and GPU optimizations
pentschev Feb 11, 2026
1f52268
Remove cluster-tag (Java-only coordinator config)
pentschev Feb 11, 2026
2e7e05d
Reintroduce config comments
pentschev Feb 11, 2026
9159fe5
Remove tpcds properties, match LD_LIBRARY_PATH
pentschev Feb 11, 2026
ac47c24
Copy pre-analyzed hive metastore data
pentschev Feb 16, 2026
9754227
Reduce startup latency starting all containers simultaneously
pentschev Feb 16, 2026
d0c63e8
Disable cuDF JIT expression
pentschev Feb 16, 2026
4b1c21e
Combine setup and run benchmarks in single step for faster startup
pentschev Feb 16, 2026
e85006e
Remove senseless defaults
pentschev Feb 16, 2026
c2b06a7
Add missing files to directory structure
pentschev Feb 16, 2026
653a469
Remove unnecessary properties
pentschev Feb 16, 2026
70f2106
Remove tpch.properties
pentschev Feb 16, 2026
89be5ea
Rename to unified
pentschev Feb 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,10 @@ presto/docker/config/generated*/
# Generated Presto Docker Compose files
presto/docker/docker-compose/generated*/

presto/slurm/presto-nvl72/logs/
presto/slurm/presto-nvl72/*.err
presto/slurm/presto-nvl72/*.out
presto/slurm/presto-nvl72/result_dir/

devstate*

21 changes: 12 additions & 9 deletions benchmark_data_tools/duckdb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
import duckdb
import re

def quote_ident(name: str) -> str:
return '"' + name.replace('"', '""') + '"'

def init_benchmark_tables(benchmark_type, scale_factor):
tables = duckdb.sql("SHOW TABLES").fetchall()
assert len(tables) == 0
Expand All @@ -30,25 +33,25 @@ def init_benchmark_tables(benchmark_type, scale_factor):
def drop_benchmark_tables():
tables = duckdb.sql("SHOW TABLES").fetchall()
for table, in tables:
duckdb.sql(f"DROP TABLE {table}")
duckdb.sql(f"DROP TABLE {quote_ident(table)}")

def create_table(table_name, data_path):
duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet';")
duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet';")

# Generates a sample table with a small limit.
# This is mainly used to extract the schema from the parquet files.
def create_not_null_table_from_sample(table_name, data_path):
duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
ret = duckdb.sql(f"DESCRIBE TABLE {table_name}").fetchall()
duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
ret = duckdb.sql(f"DESCRIBE TABLE {quote_ident(table_name)}").fetchall()
for row in ret:
duckdb.sql(f"ALTER TABLE {table_name} ALTER COLUMN {row[0]} SET NOT NULL;")
duckdb.sql(f"ALTER TABLE {quote_ident(table_name)} ALTER COLUMN {row[0]} SET NOT NULL;")


def create_table_from_sample(table_name, data_path):
duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")


def is_decimal_column(column_type):
Expand Down
2 changes: 0 additions & 2 deletions presto/scripts/common_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ function wait_for_worker_node_registration() {
trap "rm -rf node_response.json" RETURN

echo "Waiting for a worker node to be registered..."
HOSTNAME=${1:-localhost}
PORT=${2:-8080}
COORDINATOR_URL=http://${HOSTNAME}:${PORT}
echo "Coordinator URL: $COORDINATOR_URL"
local -r MAX_RETRIES=12
Expand Down
11 changes: 8 additions & 3 deletions presto/scripts/generate_presto_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function duplicate_worker_configs() {
${coord_config}/config_native.properties
sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" \
${worker_config}/config_native.properties
# make cudf.exchange=true if we are running multiple workers
# make cudf.exchange=true if we are running multiple workers
sed -i "s+cudf.exchange=false+cudf.exchange=true+g" ${worker_config}/config_native.properties
fi
echo "join-distribution-type=PARTITIONED" >> ${coord_config}/config_native.properties
Expand Down Expand Up @@ -84,7 +84,7 @@ RAM_GB=$(lsmem -b | grep "Total online memory" | awk '{print int($4 / (1024*1024
if [[ -z ${VARIANT_TYPE} || ! ${VARIANT_TYPE} =~ ^(cpu|gpu|java)$ ]]; then
echo_error "ERROR: VARIANT_TYPE must be set to a valid variant type (cpu, gpu, java)."
fi
if [[ -z ${VCPU_PER_WORKER} ]]; then
if [[ -z ${VCPU_PER_WORKER:-} ]]; then
if [[ "${VARIANT_TYPE}" == "gpu" ]]; then
VCPU_PER_WORKER=2
else
Expand Down Expand Up @@ -134,16 +134,21 @@ EOF
fi

COORD_CONFIG="${CONFIG_DIR}/etc_coordinator/config_native.properties"
WORKER_CONFIG="${CONFIG_DIR}/etc_worker/config_native.properties"
# now perform other variant-specific modifications to the generated configs
if [[ "${VARIANT_TYPE}" == "gpu" ]]; then
# for GPU variant, uncomment these optimizer settings
# optimizer.joins-not-null-inference-strategy=USE_FUNCTION_METADATA
# optimizer.default-filter-factor-enabled=true
sed -i 's/\#optimizer/optimizer/g' ${COORD_CONFIG}
sed -i 's/query.max-execution-time=.*/query.max-execution-time=10m/g' ${COORD_CONFIG}
echo "cudf.exchange.server.port=0000" >> ${WORKER_CONFIG}

if [[ ${NUM_WORKERS} -eq 1 ]]; then
# Adds a cluster tag for gpu variant
echo "cluster-tag=native-gpu" >> ${COORD_CONFIG}
else
sed -i "s+cudf.exchange=false+cudf.exchange=true+g" ${WORKER_CONFIG}
fi
fi

Expand All @@ -170,7 +175,7 @@ fi

# We want to propagate any changes from the original worker config to the new worker configs even if
# we did not re-generate the configs.
if [[ -n "$NUM_WORKERS" && -n "$GPU_IDS" && "$VARIANT_TYPE" == "gpu" ]]; then
if [[ -n "$NUM_WORKERS" && -n "${GPU_IDS:-}" && "$VARIANT_TYPE" == "gpu" ]]; then
# Count the number of GPU IDs provided
IFS=',' read -ra GPU_ID_ARRAY <<< "$GPU_IDS"
for i in "${GPU_ID_ARRAY[@]}"; do
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"ownerName" : "test_user",
"ownerType" : "USER",
"parameters" : { }
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[ {
"@type" : "notnullconstraint",
"name" : "5951ddfe-541b-48dc-b3ce-0f53eaa83587",
"columns" : [ "c_comment" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "737546f8-881e-4ecb-811f-a282df20bfad",
"columns" : [ "c_custkey" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "203a77a2-fe6c-42b9-a0e2-710c41e4380c",
"columns" : [ "c_mktsegment" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "a71ae4a8-2518-40af-a529-254aa804770d",
"columns" : [ "c_nationkey" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "95df4231-53c6-4399-8fc6-ac7dbc951404",
"columns" : [ "c_name" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "5a8d3a1b-ab02-4014-8fbb-a2ef122d5373",
"columns" : [ "c_phone" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "ed43e5fd-159f-4f66-b89a-08b341f2881c",
"columns" : [ "c_acctbal" ],
"enabled" : true,
"rely" : true,
"enforced" : true
}, {
"@type" : "notnullconstraint",
"name" : "06a73af7-166f-464e-ba62-5eefb1930681",
"columns" : [ "c_address" ],
"enabled" : true,
"rely" : true,
"enforced" : true
} ]
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[ {
"permission" : "SELECT",
"grantOption" : true
}, {
"permission" : "INSERT",
"grantOption" : true
}, {
"permission" : "UPDATE",
"grantOption" : true
}, {
"permission" : "DELETE",
"grantOption" : true
} ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"owner" : "test_user",
"tableType" : "EXTERNAL_TABLE",
"dataColumns" : [ {
"name" : "c_custkey",
"type" : "bigint"
}, {
"name" : "c_name",
"type" : "string"
}, {
"name" : "c_address",
"type" : "string"
}, {
"name" : "c_nationkey",
"type" : "int"
}, {
"name" : "c_phone",
"type" : "string"
}, {
"name" : "c_acctbal",
"type" : "double"
}, {
"name" : "c_mktsegment",
"type" : "string"
}, {
"name" : "c_comment",
"type" : "string"
} ],
"partitionColumns" : [ ],
"parameters" : {
"presto_version" : "testversion",
"presto_query_id" : "20260127_201905_00012_9v9nd",
"EXTERNAL" : "TRUE",
"numFiles" : "0",
"numRows" : "15000000",
"rawDataSize" : "0",
"totalSize" : "0"
},
"storageFormat" : {
"serDe" : "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
"inputFormat" : "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"outputFormat" : "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
},
"storageParameters" : {
"preferred_ordering_columns" : ""
},
"serdeParameters" : { },
"externalLocation" : "file:/var/lib/presto/data/hive/data/user_data/date-scale-100/customer",
"columnStatistics" : {
"c_custkey" : {
"integerStatistics" : {
"min" : 1,
"max" : 15000000
},
"nullsCount" : 0,
"distinctValuesCount" : 15000000
},
"c_acctbal" : {
"doubleStatistics" : {
"min" : -999.99,
"max" : 9999.99
},
"nullsCount" : 0,
"distinctValuesCount" : 1106028
},
"c_phone" : {
"maxValueSizeInBytes" : 19,
"totalSizeInBytes" : 285000000,
"nullsCount" : 0,
"distinctValuesCount" : 15000000
},
"c_mktsegment" : {
"maxValueSizeInBytes" : 14,
"totalSizeInBytes" : 195002670,
"nullsCount" : 0,
"distinctValuesCount" : 5
},
"c_address" : {
"maxValueSizeInBytes" : 44,
"totalSizeInBytes" : 434970931,
"nullsCount" : 0,
"distinctValuesCount" : 15000000
},
"c_nationkey" : {
"integerStatistics" : {
"min" : 0,
"max" : 24
},
"nullsCount" : 0,
"distinctValuesCount" : 25
},
"c_name" : {
"maxValueSizeInBytes" : 22,
"totalSizeInBytes" : 330000000,
"nullsCount" : 0,
"distinctValuesCount" : 14846457
},
"c_comment" : {
"maxValueSizeInBytes" : 120,
"totalSizeInBytes" : 1147296976,
"nullsCount" : 0,
"distinctValuesCount" : 15000000
}
}
}
Binary file not shown.
Loading