Skip to content

Commit f8d9835

Browse files
authored
Enhance Presto scripts (#242)
Co-authored-by: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
1 parent d681d92 commit f8d9835

File tree

5 files changed

+72
-6
lines changed

5 files changed

+72
-6
lines changed

common/testing/integration_tests/test_utils.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,23 @@ def assert_rows_equal(rows_1, rows_2, types):
205205

206206
def initialize_output_dir(config, query_engine):
207207
output_dir = Path(config.getoption("--output-dir"))
208-
if output_dir.exists():
209-
shutil.rmtree(output_dir)
210-
output_dir.mkdir(parents=True, exist_ok=False)
208+
user_reference_results_dir = config.getoption("--reference-results-dir")
209+
output_dir.mkdir(parents=True, exist_ok=True)
210+
211211
if config.getoption(f"--store-{query_engine}-results"):
212-
Path(f"{output_dir}/{query_engine}_results").mkdir(exist_ok=False)
212+
query_engine_results_dir = Path(f"{output_dir}/{query_engine}_results")
213+
if query_engine_results_dir.exists():
214+
shutil.rmtree(query_engine_results_dir)
215+
query_engine_results_dir.mkdir(exist_ok=False)
216+
213217
if config.getoption("--store-reference-results"):
214-
Path(f"{output_dir}/reference_results").mkdir(exist_ok=False)
218+
# Only manage the reference results directory if it's not being overridden by the user
219+
reference_results_dir = Path(f"{output_dir}/reference_results")
220+
if reference_results_dir.exists():
221+
if not user_reference_results_dir or Path(user_reference_results_dir) != reference_results_dir:
222+
shutil.rmtree(reference_results_dir)
223+
else:
224+
raise Exception(
225+
"Reference results directory and store-reference-results should not be set at the same time"
226+
)
227+
reference_results_dir.mkdir(exist_ok=False)

presto/scripts/run_integ_test.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ OPTIONS:
4444
--preview-rows-count Number of rows to include in the preview i.e. when --show-presto-result-preview or
4545
--show-reference-result-preview is specified.
4646
--skip-reference-comparison Skip Presto rows comparison against a reference set of rows.
47+
-e, --explain Run queries with EXPLAIN prefix. Requires --skip-reference-comparison.
48+
--explain-analyze Run queries with EXPLAIN ANALYZE prefix. Requires --skip-reference-comparison.
4749
--reuse-venv If this argument is specified, reuse the existing Python virtual environment if one exists
4850
and skip dependency installation.
4951
@@ -59,6 +61,8 @@ EXAMPLES:
5961
$0 -b tpch -q "1,2" -s my_sf1_schema --store-reference-results
6062
$0 -b tpch -q "1,2" -s my_sf1_schema --show-presto-result-preview --show-reference-result-preview --preview-rows-count 5
6163
$0 -b tpch -q "1,2" -s my_sf1_schema --store-presto-results --skip-reference-comparison
64+
$0 -b tpch -q "1,2" -s my_sf1_schema --skip-reference-comparison --explain
65+
$0 -b tpch -q "1,2" -s my_sf1_schema --skip-reference-comparison --explain-analyze
6266
$0 -h
6367
6468
EOF
@@ -178,6 +182,14 @@ parse_args() {
178182
SKIP_REFERENCE_COMPARISON=true
179183
shift
180184
;;
185+
-e|--explain)
186+
EXPLAIN=true
187+
shift
188+
;;
189+
--explain-analyze)
190+
EXPLAIN_ANALYZE=true
191+
shift
192+
;;
181193
--reuse-venv)
182194
REUSE_VENV=true
183195
shift
@@ -199,6 +211,12 @@ if [[ -z ${BENCHMARK_TYPE} || ! ${BENCHMARK_TYPE} =~ ^tpc(h|ds)$ ]]; then
199211
exit 1
200212
fi
201213

214+
if [[ "${EXPLAIN}" == "true" || "${EXPLAIN_ANALYZE}" == "true" ]] && [[ "${SKIP_REFERENCE_COMPARISON}" != "true" ]]; then
215+
echo "Error: --explain and --explain-analyze require --skip-reference-comparison to also be specified."
216+
print_help
217+
exit 1
218+
fi
219+
202220
set_presto_coordinator_defaults
203221

204222
PYTEST_ARGS=()
@@ -259,6 +277,16 @@ if [[ -n ${SKIP_REFERENCE_COMPARISON} ]]; then
259277
PYTEST_ARGS+=("--skip-reference-comparison")
260278
fi
261279

280+
if [[ "${EXPLAIN}" == "true" ]]; then
281+
PYTEST_ARGS+=("--explain")
282+
PYTEST_ARGS+=("--store-presto-results")
283+
fi
284+
285+
if [[ "${EXPLAIN_ANALYZE}" == "true" ]]; then
286+
PYTEST_ARGS+=("--explain-analyze")
287+
PYTEST_ARGS+=("--store-presto-results")
288+
fi
289+
262290
source "${SCRIPT_DIR}/../../scripts/py_env_functions.sh"
263291

264292
VENV_DIR=".integ_test_venv"

presto/scripts/start_presto_helper.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ elif [[ "$VARIANT_TYPE" == "cpu" ]]; then
128128
conditionally_add_build_target $CPU_WORKER_IMAGE $CPU_WORKER_SERVICE "worker|w"
129129
elif [[ "$VARIANT_TYPE" == "gpu" ]]; then
130130
DOCKER_COMPOSE_FILE="native-gpu"
131+
FIRST_GPU_ID=0
132+
if [[ -n $GPU_IDS ]]; then
133+
FIRST_GPU_ID=$(echo $GPU_IDS | cut -d',' -f1)
134+
fi
135+
if [[ -n "$NUM_WORKERS" && "$NUM_WORKERS" -gt 1 ]]; then
136+
GPU_WORKER_SERVICE="presto-native-worker-gpu-${FIRST_GPU_ID}"
137+
fi
131138
conditionally_add_build_target $GPU_WORKER_IMAGE $GPU_WORKER_SERVICE "worker|w"
132139
else
133140
echo "Internal error: unexpected VARIANT_TYPE value: $VARIANT_TYPE"

presto/testing/integration_tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ def pytest_addoption(parser):
2222
parser.addoption("--show-reference-result-preview", action="store_true", default=False)
2323
parser.addoption("--preview-rows-count", default=3, type=int)
2424
parser.addoption("--skip-reference-comparison", action="store_true", default=False)
25+
parser.addoption("--explain", action="store_true", default=False)
26+
parser.addoption("--explain-analyze", action="store_true", default=False)

presto/testing/integration_tests/test_utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
from pathlib import Path
5+
6+
import pandas as pd
7+
48
from common.testing.integration_tests.test_utils import (
59
create_duckdb_table, # noqa: F401
610
initialize_output_dir, # noqa: F401
@@ -13,8 +17,20 @@
1317
def execute_query_and_compare_results(request_config, presto_cursor, queries, query_id):
1418
query = queries[query_id]
1519

16-
presto_cursor.execute(query)
20+
explain = request_config.getoption("--explain")
21+
explain_analyze = request_config.getoption("--explain-analyze")
22+
explain_statement = "EXPLAIN " if explain else "EXPLAIN ANALYZE " if explain_analyze else ""
23+
24+
presto_cursor.execute(explain_statement + query)
1725
presto_rows = presto_cursor.fetchall()
1826
presto_columns = [desc[0] for desc in presto_cursor.description]
1927

28+
if explain or explain_analyze:
29+
if request_config.getoption("--store-presto-results"):
30+
output_dir = request_config.getoption("--output-dir")
31+
plan_path = Path(output_dir) / "presto_results" / f"{query_id.lower()}.plan"
32+
df = pd.DataFrame(presto_rows, columns=presto_columns)
33+
df.to_csv(plan_path, index=False)
34+
return
35+
2036
base_execute_query_and_compare_results(request_config, queries, query_id, "presto", presto_rows, presto_columns)

0 commit comments

Comments
 (0)