Skip to content

Commit c8d26ba

Browse files
authored
feat: Support Show runtime settings (#18564)
## Which issue does this PR close? - Closes #18452 ## Rationale for this change Previously, the `SHOW` command could not display runtime configuration settings such as `datafusion.runtime.memory_limit`, `datafusion.runtime.max_temp_directory_size`, etc. When users attempted to view these settings, they would receive an error: Error during planning: 'datafusion.runtime.memory_limit' is not a variable which can be viewed with 'SHOW' This PR enables users to query runtime settings using the `SHOW` command, making them accessible in the same way as other DataFusion configuration variables. ## What changes are included in this PR? 1. **Added `config_entries()` method to `RuntimeEnv`** - Returns runtime configuration as `ConfigEntry` objects with human-readable values (e.g., "100M", "unlimited") 2. **Exposed getter methods in `DiskManager`** - Added `max_temp_directory_size()` and `temp_dir_paths()` to access disk manager configuration 3. **Updated `InformationSchema`** - Modified `make_df_settings()` to include runtime configuration entries from `RuntimeEnv` 4. **Modified SQL statement validation** - Added check for `datafusion.runtime.*` variables to allow SHOW command ## Are these changes tested? Yes, comprehensive tests have been added: - **set_variable.slt** - Tests SHOW and SET for all 4 runtime variables (memory_limit, max_temp_directory_size, metadata_cache_limit, temp_directory) - **information_schema.slt** - Verifies runtime variables appear in `information_schema.df_settings` - Tests cover default values, setting custom values, and querying via information_schema ## Are there any user-facing changes? Yes, users can now: **View runtime settings with SHOW**: ```sql SHOW datafusion.runtime.memory_limit; -- Returns: datafusion.runtime.memory_limit | unlimited
1 parent 8b3725a commit c8d26ba

File tree

6 files changed

+190
-24
lines changed

6 files changed

+190
-24
lines changed

datafusion/catalog/src/information_schema.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use datafusion_common::config::{ConfigEntry, ConfigOptions};
3232
use datafusion_common::error::Result;
3333
use datafusion_common::types::NativeType;
3434
use datafusion_common::DataFusionError;
35+
use datafusion_execution::runtime_env::RuntimeEnv;
3536
use datafusion_execution::TaskContext;
3637
use datafusion_expr::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF};
3738
use datafusion_expr::{TableType, Volatility};
@@ -215,11 +216,16 @@ impl InformationSchemaConfig {
215216
fn make_df_settings(
216217
&self,
217218
config_options: &ConfigOptions,
219+
runtime_env: &Arc<RuntimeEnv>,
218220
builder: &mut InformationSchemaDfSettingsBuilder,
219221
) {
220222
for entry in config_options.entries() {
221223
builder.add_setting(entry);
222224
}
225+
// Add runtime configuration entries
226+
for entry in runtime_env.config_entries() {
227+
builder.add_setting(entry);
228+
}
223229
}
224230

225231
fn make_routines(
@@ -1060,7 +1066,12 @@ impl PartitionStream for InformationSchemaDfSettings {
10601066
// TODO: Stream this
10611067
futures::stream::once(async move {
10621068
// create a mem table with the names of tables
1063-
config.make_df_settings(ctx.session_config().options(), &mut builder);
1069+
let runtime_env = ctx.runtime_env();
1070+
config.make_df_settings(
1071+
ctx.session_config().options(),
1072+
&runtime_env,
1073+
&mut builder,
1074+
);
10641075
Ok(builder.finish())
10651076
}),
10661077
))

datafusion/execution/src/disk_manager.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,24 @@ impl DiskManager {
246246
self.used_disk_space.load(Ordering::Relaxed)
247247
}
248248

249+
/// Returns the maximum temporary directory size in bytes
250+
pub fn max_temp_directory_size(&self) -> u64 {
251+
self.max_temp_directory_size
252+
}
253+
254+
/// Returns the temporary directory paths
255+
pub fn temp_dir_paths(&self) -> Vec<PathBuf> {
256+
self.local_dirs
257+
.lock()
258+
.as_ref()
259+
.map(|dirs| {
260+
dirs.iter()
261+
.map(|temp_dir| temp_dir.path().to_path_buf())
262+
.collect()
263+
})
264+
.unwrap_or_default()
265+
}
266+
249267
/// Return true if this disk manager supports creating temporary
250268
/// files. If this returns false, any call to `create_tmp_file`
251269
/// will error.

datafusion/execution/src/runtime_env.rs

Lines changed: 99 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,41 @@ impl Debug for RuntimeEnv {
9191
}
9292
}
9393

94+
/// Creates runtime configuration entries with the provided values
95+
///
96+
/// This helper function defines the structure and metadata for all runtime configuration
97+
/// entries to avoid duplication between `RuntimeEnv::config_entries()` and
98+
/// `RuntimeEnvBuilder::entries()`.
99+
fn create_runtime_config_entries(
100+
memory_limit: Option<String>,
101+
max_temp_directory_size: Option<String>,
102+
temp_directory: Option<String>,
103+
metadata_cache_limit: Option<String>,
104+
) -> Vec<ConfigEntry> {
105+
vec![
106+
ConfigEntry {
107+
key: "datafusion.runtime.memory_limit".to_string(),
108+
value: memory_limit,
109+
description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
110+
},
111+
ConfigEntry {
112+
key: "datafusion.runtime.max_temp_directory_size".to_string(),
113+
value: max_temp_directory_size,
114+
description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
115+
},
116+
ConfigEntry {
117+
key: "datafusion.runtime.temp_directory".to_string(),
118+
value: temp_directory,
119+
description: "The path to the temporary file directory.",
120+
},
121+
ConfigEntry {
122+
key: "datafusion.runtime.metadata_cache_limit".to_string(),
123+
value: metadata_cache_limit,
124+
description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
125+
}
126+
]
127+
}
128+
94129
impl RuntimeEnv {
95130
/// Registers a custom `ObjectStore` to be used with a specific url.
96131
/// This allows DataFusion to create external tables from urls that do not have
@@ -173,6 +208,64 @@ impl RuntimeEnv {
173208
) -> Result<Arc<dyn EncryptionFactory>> {
174209
self.parquet_encryption_factory_registry.get_factory(id)
175210
}
211+
212+
/// Returns the current runtime configuration entries
213+
pub fn config_entries(&self) -> Vec<ConfigEntry> {
214+
use crate::memory_pool::MemoryLimit;
215+
216+
/// Convert bytes to a human-readable format
217+
fn format_byte_size(size: u64) -> String {
218+
const GB: u64 = 1024 * 1024 * 1024;
219+
const MB: u64 = 1024 * 1024;
220+
const KB: u64 = 1024;
221+
222+
match size {
223+
s if s >= GB => format!("{}G", s / GB),
224+
s if s >= MB => format!("{}M", s / MB),
225+
s if s >= KB => format!("{}K", s / KB),
226+
s => format!("{s}"),
227+
}
228+
}
229+
230+
let memory_limit_value = match self.memory_pool.memory_limit() {
231+
MemoryLimit::Finite(size) => Some(format_byte_size(
232+
size.try_into()
233+
.expect("Memory limit size conversion failed"),
234+
)),
235+
MemoryLimit::Infinite => Some("unlimited".to_string()),
236+
MemoryLimit::Unknown => None,
237+
};
238+
239+
let max_temp_dir_size = self.disk_manager.max_temp_directory_size();
240+
let max_temp_dir_value = format_byte_size(max_temp_dir_size);
241+
242+
let temp_paths = self.disk_manager.temp_dir_paths();
243+
let temp_dir_value = if temp_paths.is_empty() {
244+
None
245+
} else {
246+
Some(
247+
temp_paths
248+
.iter()
249+
.map(|p| p.display().to_string())
250+
.collect::<Vec<_>>()
251+
.join(","),
252+
)
253+
};
254+
255+
let metadata_cache_limit = self.cache_manager.get_metadata_cache_limit();
256+
let metadata_cache_value = format_byte_size(
257+
metadata_cache_limit
258+
.try_into()
259+
.expect("Metadata cache size conversion failed"),
260+
);
261+
262+
create_runtime_config_entries(
263+
memory_limit_value,
264+
Some(max_temp_dir_value),
265+
temp_dir_value,
266+
Some(metadata_cache_value),
267+
)
268+
}
176269
}
177270

178271
impl Default for RuntimeEnv {
@@ -359,28 +452,12 @@ impl RuntimeEnvBuilder {
359452

360453
/// Returns a list of all available runtime configurations with their current values and descriptions
361454
pub fn entries(&self) -> Vec<ConfigEntry> {
362-
vec![
363-
ConfigEntry {
364-
key: "datafusion.runtime.memory_limit".to_string(),
365-
value: None, // Default is system-dependent
366-
description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
367-
},
368-
ConfigEntry {
369-
key: "datafusion.runtime.max_temp_directory_size".to_string(),
370-
value: Some("100G".to_string()),
371-
description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
372-
},
373-
ConfigEntry {
374-
key: "datafusion.runtime.temp_directory".to_string(),
375-
value: None, // Default is system-dependent
376-
description: "The path to the temporary file directory.",
377-
},
378-
ConfigEntry {
379-
key: "datafusion.runtime.metadata_cache_limit".to_string(),
380-
value: Some("50M".to_owned()),
381-
description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
382-
}
383-
]
455+
create_runtime_config_entries(
456+
None,
457+
Some("100G".to_string()),
458+
None,
459+
Some("50M".to_owned()),
460+
)
384461
}
385462

386463
/// Generate documentation that can be included in the user guide

datafusion/sql/src/statement.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
18691869
.iter()
18701870
.any(|opt| opt.key == variable);
18711871

1872-
if !is_valid_variable {
1872+
// Check if it's a runtime variable
1873+
let is_runtime_variable = variable.starts_with("datafusion.runtime.");
1874+
1875+
if !is_valid_variable && !is_runtime_variable {
18731876
return plan_err!(
18741877
"'{variable}' is not a variable which can be viewed with 'SHOW'"
18751878
);

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ datafusion.optimizer.repartition_sorts true
314314
datafusion.optimizer.repartition_windows true
315315
datafusion.optimizer.skip_failed_rules false
316316
datafusion.optimizer.top_down_join_key_reordering true
317+
datafusion.runtime.max_temp_directory_size 100G
318+
datafusion.runtime.memory_limit unlimited
319+
datafusion.runtime.metadata_cache_limit 50M
320+
datafusion.runtime.temp_directory NULL
317321
datafusion.sql_parser.collect_spans false
318322
datafusion.sql_parser.default_null_ordering nulls_max
319323
datafusion.sql_parser.dialect generic
@@ -436,6 +440,10 @@ datafusion.optimizer.repartition_sorts true Should DataFusion execute sorts in a
436440
datafusion.optimizer.repartition_windows true Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level
437441
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
438442
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
443+
datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
444+
datafusion.runtime.memory_limit unlimited Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
445+
datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
446+
datafusion.runtime.temp_directory NULL The path to the temporary file directory.
439447
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
440448
datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
441449
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.

datafusion/sqllogictest/test_files/set_variable.slt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,52 @@ RESET datafusion.execution.batches_size
366366
# reset invalid variable - extra suffix on valid field
367367
statement error DataFusion error: Invalid or Unsupported Configuration: Config field is a scalar usize and does not have nested field "bar"
368368
RESET datafusion.execution.batch_size.bar
369+
370+
############
371+
## Test runtime configuration variables
372+
############
373+
374+
# Test SHOW runtime.memory_limit (default value)
375+
query TT
376+
SHOW datafusion.runtime.memory_limit
377+
----
378+
datafusion.runtime.memory_limit unlimited
379+
380+
# Test SET and SHOW runtime.memory_limit
381+
statement ok
382+
SET datafusion.runtime.memory_limit = '100M'
383+
384+
query TT
385+
SHOW datafusion.runtime.memory_limit
386+
----
387+
datafusion.runtime.memory_limit 100M
388+
389+
# Test SET and SHOW runtime.max_temp_directory_size
390+
statement ok
391+
SET datafusion.runtime.max_temp_directory_size = '10G'
392+
393+
query TT
394+
SHOW datafusion.runtime.max_temp_directory_size
395+
----
396+
datafusion.runtime.max_temp_directory_size 10G
397+
398+
# Test SET and SHOW runtime.metadata_cache_limit
399+
statement ok
400+
SET datafusion.runtime.metadata_cache_limit = '200M'
401+
402+
query TT
403+
SHOW datafusion.runtime.metadata_cache_limit
404+
----
405+
datafusion.runtime.metadata_cache_limit 200M
406+
407+
# Note: runtime.temp_directory shows the actual temp directory path with a unique suffix,
408+
# so we cannot test the exact value. We verify it exists in information_schema instead.
409+
410+
# Test that all runtime variables appear in information_schema.df_settings
411+
query T
412+
SELECT name FROM information_schema.df_settings WHERE name LIKE 'datafusion.runtime.%' ORDER BY name
413+
----
414+
datafusion.runtime.max_temp_directory_size
415+
datafusion.runtime.memory_limit
416+
datafusion.runtime.metadata_cache_limit
417+
datafusion.runtime.temp_directory

0 commit comments

Comments
 (0)