diff --git a/Cargo.lock b/Cargo.lock index 518e2ee919cef..f6777a6c235ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2310,6 +2310,7 @@ dependencies = [ "blake2", "blake3", "chrono", + "chrono-tz", "criterion", "ctor", "datafusion-common", diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index a4f9211fe017c..2bdc05abe3806 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -40,7 +40,7 @@ workspace = true [features] crypto_expressions = ["md-5", "sha2", "blake2", "blake3"] # enable datetime functions -datetime_expressions = [] +datetime_expressions = ["chrono-tz"] # Enable encoding by default so the doctests work. In general don't automatically enable all packages. default = [ "datetime_expressions", @@ -71,6 +71,7 @@ base64 = { version = "0.22", optional = true } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.8", optional = true } chrono = { workspace = true } +chrono-tz = { version = "0.10.4", optional = true } datafusion-common = { workspace = true } datafusion-doc = { workspace = true } datafusion-execution = { workspace = true } diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index d95a197872eb3..ed865fa6e8d50 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -114,16 +114,21 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("f", DataType::Timestamp(TimeUnit::Nanosecond, None), true).into(); let arg_field = Field::new("a", DataType::Utf8, false).into(); let arg_fields = vec![arg_field]; - let config_options = Arc::new(ConfigOptions::default()); + let mut options = ConfigOptions::default(); + options.execution.time_zone = Some("UTC".into()); + let config_options = Arc::new(options); + + let to_timestamp_udf = to_timestamp(config_options.as_ref()); c.bench_function("to_timestamp_no_formats_utf8", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let arr_data = data(); let batch_len = arr_data.len(); let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -137,13 +142,14 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_no_formats_largeutf8", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let data = cast(&data(), &DataType::LargeUtf8).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -157,13 +163,14 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_no_formats_utf8view", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let data = cast(&data(), &DataType::Utf8View).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -177,6 +184,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_utf8", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -196,7 +204,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), @@ -210,6 +218,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_largeutf8", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -237,7 +246,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), @@ -251,6 +260,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_utf8view", |b| { + let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -279,7 +289,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp() + to_timestamp_udf .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index 5b152081b64ca..2db64beafa9b7 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -15,31 +15,57 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; +use arrow::array::timezone::Tz; use arrow::array::{ Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray, StringArrayType, StringViewArray, }; -use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; -use arrow::datatypes::DataType; +use arrow::compute::DecimalCast; +use arrow::compute::kernels::cast_utils::string_to_datetime; +use arrow::datatypes::{DataType, TimeUnit}; +use arrow_buffer::ArrowNativeType; use chrono::LocalResult::Single; use chrono::format::{Parsed, StrftimeItems, parse}; use chrono::{DateTime, TimeZone, Utc}; - use datafusion_common::cast::as_generic_string_array; use datafusion_common::{ - DataFusionError, Result, ScalarType, ScalarValue, exec_datafusion_err, exec_err, - unwrap_or_internal_err, + DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, + internal_datafusion_err, unwrap_or_internal_err, }; use datafusion_expr::ColumnarValue; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; -/// Calls string_to_timestamp_nanos and converts the error type -pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result { - string_to_timestamp_nanos(s).map_err(|e| e.into()) +static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); + +/// Converts a string representation of a date‑time into a timestamp expressed in +/// nanoseconds since the Unix epoch. +/// +/// This helper is a thin wrapper around the more general `string_to_datetime` +/// function. It accepts an optional `timezone` which, if `None`, defaults to +/// Coordinated Universal Time (UTC). The string `s` must contain a valid +/// date‑time format that can be parsed by the underlying chrono parser. +/// +/// # Return Value +/// +/// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`. +/// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed +/// value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804) +/// or the parsed value does not correspond to an unambiguous time. +pub(crate) fn string_to_timestamp_nanos_with_timezone( + timezone: &Option, + s: &str, +) -> Result { + let tz = timezone.as_ref().unwrap_or(&UTC); + let dt = string_to_datetime(tz, s)?; + let parsed = dt + .timestamp_nanos_opt() + .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; + + Ok(parsed) } /// Checks that all the arguments from the second are of type [Utf8], [LargeUtf8] or [Utf8View] @@ -69,13 +95,12 @@ pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) -> Result< /// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers /// relative to the provided `timezone` /// -/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled -/// -/// * `2023-01-01 040506 America/Los_Angeles` -/// /// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error /// will be returned /// +/// Note that parsing [IANA timezones] is not supported yet in chrono - +/// and this implementation only supports named timezones at the end of the string preceded by a space. +/// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html /// [IANA timezones]: https://www.iana.org/time-zones pub(crate) fn string_to_datetime_formatted( @@ -89,11 +114,55 @@ pub(crate) fn string_to_datetime_formatted( ) }; + let mut datetime_str = s; + let mut format = format; + + // Manually handle the most common case of a named timezone at the end of the timestamp. + // Note that %+ handles 'Z' at the end of the string without a space. This code doesn't + // handle named timezones with no preceding space since that would require writing a + // custom parser (or switching to Jiff) + let tz: Option = if format.trim_end().ends_with(" %Z") { + // grab the string after the last space as the named timezone + if let Some((dt_str, timezone_name)) = datetime_str.trim_end().rsplit_once(' ') { + datetime_str = dt_str; + + // attempt to parse the timezone name + let result: Result = + timezone_name.parse(); + let Ok(tz) = result else { + return Err(err(&result.unwrap_err().to_string())); + }; + + // successfully parsed the timezone name, remove the ' %Z' from the format + format = &format[..format.len() - 3]; + + Some(tz) + } else { + None + } + } else if format.contains("%Z") { + return Err(err( + "'%Z' is only supported at the end of the format string preceded by a space", + )); + } else { + None + }; + let mut parsed = Parsed::new(); - parse(&mut parsed, s, StrftimeItems::new(format)).map_err(|e| err(&e.to_string()))?; + parse(&mut parsed, datetime_str, StrftimeItems::new(format)) + .map_err(|e| err(&e.to_string()))?; - // attempt to parse the string assuming it has a timezone - let dt = parsed.to_datetime(); + let dt = match tz { + Some(tz) => { + // A timezone was manually parsed out, convert it to a fixed offset + match parsed.to_datetime_with_timezone(&tz) { + Ok(dt) => Ok(dt.fixed_offset()), + Err(e) => Err(e), + } + } + // default to parse the string assuming it has a timezone + None => parsed.to_datetime(), + }; if let Err(e) = &dt { // no timezone or other failure, try without a timezone @@ -115,7 +184,7 @@ pub(crate) fn string_to_datetime_formatted( } /// Accepts a string with a `chrono` format and converts it to a -/// nanosecond precision timestamp. +/// nanosecond precision timestamp relative to the provided `timezone`. /// /// See [`chrono::format::strftime`] for the full set of supported formats. /// @@ -141,19 +210,21 @@ pub(crate) fn string_to_datetime_formatted( /// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html #[inline] -pub(crate) fn string_to_timestamp_nanos_formatted( +pub(crate) fn string_to_timestamp_nanos_formatted_with_timezone( + timezone: &Option, s: &str, format: &str, ) -> Result { - string_to_datetime_formatted(&Utc, s, format)? - .naive_utc() - .and_utc() + let dt = string_to_datetime_formatted(timezone.as_ref().unwrap_or(&UTC), s, format)?; + let parsed = dt .timestamp_nanos_opt() - .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}")) + .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; + + Ok(parsed) } /// Accepts a string with a `chrono` format and converts it to a -/// millisecond precision timestamp. +/// millisecond precision timestamp relative to the provided `timezone`. /// /// See [`chrono::format::strftime`] for the full set of supported formats. /// @@ -176,14 +247,14 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Res .timestamp_millis()) } -pub(crate) fn handle( +pub(crate) fn handle( args: &[ColumnarValue], op: F, name: &str, + dt: &DataType, ) -> Result where O: ArrowPrimitiveType, - S: ScalarType, F: Fn(&str) -> Result, { match &args[0] { @@ -210,8 +281,13 @@ where }, ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { Some(a) => { - let result = a.as_ref().map(|x| op(x)).transpose()?; - Ok(ColumnarValue::Scalar(S::scalar(result))) + let result = a + .as_ref() + .map(|x| op(x)) + .transpose()? + .and_then(|v| v.to_i64()); + let s = scalar_value(dt, result)?; + Ok(ColumnarValue::Scalar(s)) } _ => exec_err!("Unsupported data type {scalar:?} for function {name}"), }, @@ -221,15 +297,15 @@ where // Given a function that maps a `&str`, `&str` to an arrow native type, // returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue` // depending on the `args`'s variant. -pub(crate) fn handle_multiple( +pub(crate) fn handle_multiple( args: &[ColumnarValue], op: F, op2: M, name: &str, + dt: &DataType, ) -> Result where O: ArrowPrimitiveType, - S: ScalarType, F: Fn(&str, &str) -> Result, M: Fn(O::Native) -> O::Native, { @@ -298,9 +374,9 @@ where if let Some(s) = x { match op(a, s.as_str()) { Ok(r) => { - ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some( - op2(r), - ))))); + let result = op2(r).to_i64(); + let s = scalar_value(dt, result)?; + ret = Some(Ok(ColumnarValue::Scalar(s))); break; } Err(e) => ret = Some(Err(e)), @@ -454,3 +530,16 @@ where // first map is the iterator, second is for the `Option<_>` array.iter().map(|x| x.map(&op).transpose()).collect() } + +fn scalar_value(dt: &DataType, r: Option) -> Result { + match dt { + DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| v.to_i32()))), + DataType::Timestamp(u, tz) => match u { + TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, tz.clone())), + TimeUnit::Millisecond => Ok(ScalarValue::TimestampMillisecond(r, tz.clone())), + TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())), + TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, tz.clone())), + }, + t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")), + } +} diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index 9872db3faf556..39b9453295df6 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -53,11 +53,14 @@ make_udf_function!(to_date::ToDateFunc, to_date); make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time); make_udf_function!(to_time::ToTimeFunc, to_time); make_udf_function!(to_unixtime::ToUnixtimeFunc, to_unixtime); -make_udf_function!(to_timestamp::ToTimestampFunc, to_timestamp); -make_udf_function!(to_timestamp::ToTimestampSecondsFunc, to_timestamp_seconds); -make_udf_function!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis); -make_udf_function!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros); -make_udf_function!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos); +make_udf_function_with_config!(to_timestamp::ToTimestampFunc, to_timestamp); +make_udf_function_with_config!( + to_timestamp::ToTimestampSecondsFunc, + to_timestamp_seconds +); +make_udf_function_with_config!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis); +make_udf_function_with_config!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros); +make_udf_function_with_config!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos); // create UDF with config make_udf_function_with_config!(now::NowFunc, now); @@ -118,24 +121,24 @@ pub mod expr_fn { args, ),( to_timestamp, - "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`", - args, + "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`", + @config args, ),( to_timestamp_seconds, - "converts a string and optional formats to a `Timestamp(Seconds, None)`", - args, + "converts a string and optional formats to a `Timestamp(Seconds, TimeZone)`", + @config args, ),( to_timestamp_millis, - "converts a string and optional formats to a `Timestamp(Milliseconds, None)`", - args, + "converts a string and optional formats to a `Timestamp(Milliseconds, TimeZone)`", + @config args, ),( to_timestamp_micros, - "converts a string and optional formats to a `Timestamp(Microseconds, None)`", - args, + "converts a string and optional formats to a `Timestamp(Microseconds, TimeZone)`", + @config args, ),( to_timestamp_nanos, - "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`", - args, + "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`", + @config args, )); /// Returns a string representation of a date, time, timestamp or duration based @@ -271,6 +274,7 @@ pub mod expr_fn { /// Returns all DataFusion functions defined in this package pub fn functions() -> Vec> { use datafusion_common::config::ConfigOptions; + let config = ConfigOptions::default(); vec![ current_date(), current_time(), @@ -280,16 +284,16 @@ pub fn functions() -> Vec> { from_unixtime(), make_date(), make_time(), - now(&ConfigOptions::default()), + now(&config), to_char(), to_date(), to_local_time(), to_time(), to_unixtime(), - to_timestamp(), - to_timestamp_seconds(), - to_timestamp_millis(), - to_timestamp_micros(), - to_timestamp_nanos(), + to_timestamp(&config), + to_timestamp_seconds(&config), + to_timestamp_millis(&config), + to_timestamp_micros(&config), + to_timestamp_nanos(&config), ] } diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 471995089ba3b..60c6fdf2df975 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -84,7 +84,7 @@ impl ToDateFunc { fn to_date(&self, args: &[ColumnarValue]) -> Result { match args.len() { - 1 => handle::( + 1 => handle::( args, |s| match Date32Type::parse(s) { Some(v) => Ok(v), @@ -94,8 +94,9 @@ impl ToDateFunc { )), }, "to_date", + &Date32, ), - 2.. => handle_multiple::( + 2.. => handle_multiple::( args, |s, format| { string_to_timestamp_millis_formatted(s, format) @@ -108,6 +109,7 @@ impl ToDateFunc { }, |n| n, "to_date", + &Date32, ), 0 => exec_err!("Unsupported 0 argument count for function to_date"), } diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index fdaf8a764ad8a..58077694b07a0 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -20,25 +20,37 @@ use std::sync::Arc; use crate::datetime::common::*; use arrow::array::Float64Array; +use arrow::array::timezone::Tz; use arrow::datatypes::DataType::*; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{ - ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType, - TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, + ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType, + TimestampNanosecondType, TimestampSecondType, }; +use datafusion_common::config::ConfigOptions; use datafusion_common::format::DEFAULT_CAST_OPTIONS; use datafusion_common::{Result, ScalarType, ScalarValue, exec_err}; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility, }; use datafusion_macros::user_doc; #[user_doc( doc_section(label = "Time and Date Functions"), description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). + +Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range +for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between +`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` +for the input outside of supported bounds. + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. "#, syntax_example = "to_timestamp(expression[, ..., format_n])", sql_example = r#"```sql @@ -63,17 +75,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampFunc { signature: Signature, + timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + description = r#" +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. +"#, syntax_example = "to_timestamp_seconds(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); @@ -97,17 +124,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampSecondsFunc { signature: Signature, + timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + description = r#" +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. +"#, syntax_example = "to_timestamp_millis(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); @@ -131,17 +173,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampMillisFunc { signature: Signature, + timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.", + description = r#" +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. +"#, syntax_example = "to_timestamp_micros(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); @@ -165,17 +222,31 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampMicrosFunc { signature: Signature, + timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + description = r#" +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. +"#, syntax_example = "to_timestamp_nanos(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); @@ -199,83 +270,60 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampNanosFunc { signature: Signature, + timezone: Option>, } -impl Default for ToTimestampFunc { - fn default() -> Self { - Self::new() - } -} - -impl ToTimestampFunc { - pub fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), - } - } -} - -impl Default for ToTimestampSecondsFunc { - fn default() -> Self { - Self::new() - } -} - -impl ToTimestampSecondsFunc { - pub fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), - } - } -} - -impl Default for ToTimestampMillisFunc { - fn default() -> Self { - Self::new() - } -} - -impl ToTimestampMillisFunc { - pub fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), +/// Macro to generate boilerplate constructors and config methods for ToTimestamp* functions. +/// Generates: Default impl, deprecated new(), new_with_config(), and extracts timezone from ConfigOptions. +macro_rules! impl_to_timestamp_constructors { + ($func:ty) => { + impl Default for $func { + fn default() -> Self { + Self::new_with_config(&ConfigOptions::default()) + } } - } -} -impl Default for ToTimestampMicrosFunc { - fn default() -> Self { - Self::new() - } -} + impl $func { + #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")] + /// Deprecated constructor retained for backwards compatibility. + /// + /// Prefer `new_with_config` which allows specifying the + /// timezone via [`ConfigOptions`]. This helper now mirrors the + /// canonical default offset (None) provided by `ConfigOptions::default()`. + pub fn new() -> Self { + Self::new_with_config(&ConfigOptions::default()) + } -impl ToTimestampMicrosFunc { - pub fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), + pub fn new_with_config(config: &ConfigOptions) -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + timezone: config + .execution + .time_zone + .as_ref() + .map(|tz| Arc::from(tz.as_str())), + } + } } - } -} - -impl Default for ToTimestampNanosFunc { - fn default() -> Self { - Self::new() - } + }; } -impl ToTimestampNanosFunc { - pub fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), - } - } -} +impl_to_timestamp_constructors!(ToTimestampFunc); +impl_to_timestamp_constructors!(ToTimestampSecondsFunc); +impl_to_timestamp_constructors!(ToTimestampMillisFunc); +impl_to_timestamp_constructors!(ToTimestampMicrosFunc); +impl_to_timestamp_constructors!(ToTimestampNanosFunc); /// to_timestamp SQL function /// @@ -283,6 +331,15 @@ impl ToTimestampNanosFunc { /// The supported range for integer input is between `-9223372037` and `9223372036`. /// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. /// Please use `to_timestamp_seconds` for the input outside of supported bounds. +/// Macro to generate the with_updated_config method for ToTimestamp* functions. +macro_rules! impl_with_updated_config { + () => { + fn with_updated_config(&self, config: &ConfigOptions) -> Option { + Some(Self::new_with_config(config).into()) + } + }; +} + impl ScalarUDFImpl for ToTimestampFunc { fn as_any(&self) -> &dyn Any { self @@ -296,15 +353,18 @@ impl ScalarUDFImpl for ToTimestampFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(return_type_for(&arg_types[0], Nanosecond)) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Timestamp(Nanosecond, self.timezone.clone())) } + impl_with_updated_config!(); + fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let args = args.args; + let datafusion_expr::ScalarFunctionArgs { args, .. } = args; + if args.is_empty() { return exec_err!( "to_timestamp function requires 1 or more arguments, got {}", @@ -317,13 +377,13 @@ impl ScalarUDFImpl for ToTimestampFunc { validate_data_types(&args, "to_timestamp")?; } + let tz = self.timezone.clone(); + match args[0].data_type() { Int32 | Int64 => args[0] .cast_to(&Timestamp(Second, None), None)? - .cast_to(&Timestamp(Nanosecond, None), None), - Null | Timestamp(_, None) => { - args[0].cast_to(&Timestamp(Nanosecond, None), None) - } + .cast_to(&Timestamp(Nanosecond, tz), None), + Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None), Float64 => { let rescaled = arrow::compute::kernels::numeric::mul( &args[0].to_array(1)?, @@ -333,15 +393,12 @@ impl ScalarUDFImpl for ToTimestampFunc { )?; Ok(ColumnarValue::Array(arrow::compute::cast_with_options( &rescaled, - &Timestamp(Nanosecond, None), + &Timestamp(Nanosecond, tz), &DEFAULT_CAST_OPTIONS, )?)) } - Timestamp(_, Some(tz)) => { - args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) - } Utf8View | LargeUtf8 | Utf8 => { - to_timestamp_impl::(&args, "to_timestamp") + to_timestamp_impl::(&args, "to_timestamp", &tz) } Decimal128(_, _) => { match &args[0] { @@ -354,14 +411,12 @@ impl ScalarUDFImpl for ToTimestampFunc { let scale_factor = 10_i128.pow(*scale as u32); let seconds = value / scale_factor; let fraction = value % scale_factor; - let nanos = (fraction * 1_000_000_000) / scale_factor; - let timestamp_nanos = seconds * 1_000_000_000 + nanos; Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( Some(timestamp_nanos as i64), - None, + tz, ))) } _ => exec_err!("Invalid decimal value"), @@ -372,6 +427,7 @@ impl ScalarUDFImpl for ToTimestampFunc { } } } + fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -390,15 +446,18 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(return_type_for(&arg_types[0], Second)) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Timestamp(Second, self.timezone.clone())) } + impl_with_updated_config!(); + fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let args = args.args; + let datafusion_expr::ScalarFunctionArgs { args, .. } = args; + if args.is_empty() { return exec_err!( "to_timestamp_seconds function requires 1 or more arguments, got {}", @@ -411,14 +470,17 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { validate_data_types(&args, "to_timestamp")?; } + let tz = self.timezone.clone(); + match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => { - args[0].cast_to(&Timestamp(Second, None), None) - } - Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None), - Utf8View | LargeUtf8 | Utf8 => { - to_timestamp_impl::(&args, "to_timestamp_seconds") + Null | Int32 | Int64 | Timestamp(_, _) | Decimal128(_, _) => { + args[0].cast_to(&Timestamp(Second, tz), None) } + Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( + &args, + "to_timestamp_seconds", + &self.timezone, + ), other => { exec_err!( "Unsupported data type {} for function to_timestamp_seconds", @@ -427,6 +489,7 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { } } } + fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -445,15 +508,18 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(return_type_for(&arg_types[0], Millisecond)) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Timestamp(Millisecond, self.timezone.clone())) } + impl_with_updated_config!(); + fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let args = args.args; + let datafusion_expr::ScalarFunctionArgs { args, .. } = args; + if args.is_empty() { return exec_err!( "to_timestamp_millis function requires 1 or more arguments, got {}", @@ -467,15 +533,13 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, None) => { - args[0].cast_to(&Timestamp(Millisecond, None), None) - } - Timestamp(_, Some(tz)) => { - args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None) + Null | Int32 | Int64 | Timestamp(_, _) => { + args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None) } Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( &args, "to_timestamp_millis", + &self.timezone, ), other => { exec_err!( @@ -485,6 +549,7 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { } } } + fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -503,15 +568,18 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(return_type_for(&arg_types[0], Microsecond)) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Timestamp(Microsecond, self.timezone.clone())) } + impl_with_updated_config!(); + fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let args = args.args; + let datafusion_expr::ScalarFunctionArgs { args, .. } = args; + if args.is_empty() { return exec_err!( "to_timestamp_micros function requires 1 or more arguments, got {}", @@ -525,15 +593,13 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, None) => { - args[0].cast_to(&Timestamp(Microsecond, None), None) - } - Timestamp(_, Some(tz)) => { - args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None) + Null | Int32 | Int64 | Timestamp(_, _) => { + args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None) } Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( &args, "to_timestamp_micros", + &self.timezone, ), other => { exec_err!( @@ -543,6 +609,7 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { } } } + fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -561,15 +628,18 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(return_type_for(&arg_types[0], Nanosecond)) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Timestamp(Nanosecond, self.timezone.clone())) } + impl_with_updated_config!(); + fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let args = args.args; + let datafusion_expr::ScalarFunctionArgs { args, .. } = args; + if args.is_empty() { return exec_err!( "to_timestamp_nanos function requires 1 or more arguments, got {}", @@ -583,15 +653,14 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, None) => { - args[0].cast_to(&Timestamp(Nanosecond, None), None) - } - Timestamp(_, Some(tz)) => { - args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) - } - Utf8View | LargeUtf8 | Utf8 => { - to_timestamp_impl::(&args, "to_timestamp_nanos") + Null | Int32 | Int64 | Timestamp(_, _) => { + args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None) } + Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( + &args, + "to_timestamp_nanos", + &self.timezone, + ), other => { exec_err!( "Unsupported data type {} for function to_timestamp_nanos", @@ -600,23 +669,16 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { } } } + fn documentation(&self) -> Option<&Documentation> { self.doc() } } -/// Returns the return type for the to_timestamp_* function, preserving -/// the timezone if it exists. -fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType { - match arg { - Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))), - _ => Timestamp(unit, None), - } -} - fn to_timestamp_impl>( args: &[ColumnarValue], name: &str, + timezone: &Option>, ) -> Result { let factor = match T::UNIT { Second => 1_000_000_000, @@ -625,17 +687,26 @@ fn to_timestamp_impl>( Nanosecond => 1, }; + let tz = match timezone.clone() { + Some(tz) => Some(tz.parse::()?), + None => None, + }; + match args.len() { - 1 => handle::( + 1 => handle::( args, - |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor), + move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor), name, + &Timestamp(T::UNIT, timezone.clone()), ), - n if n >= 2 => handle_multiple::( + n if n >= 2 => handle_multiple::( args, - string_to_timestamp_nanos_formatted, + move |s, format| { + string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format) + }, |n| n / factor, name, + &Timestamp(T::UNIT, timezone.clone()), ), _ => exec_err!("Unsupported 0 argument count for function {name}"), } @@ -652,35 +723,110 @@ mod tests { }; use arrow::array::{ArrayRef, Int64Array, StringBuilder}; use arrow::datatypes::{Field, TimeUnit}; - use chrono::Utc; + use chrono::{DateTime, FixedOffset, Utc}; use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue, assert_contains}; - use datafusion_expr::ScalarFunctionImplementation; + use datafusion_expr::{ScalarFunctionArgs, ScalarFunctionImplementation}; use super::*; fn to_timestamp(args: &[ColumnarValue]) -> Result { - to_timestamp_impl::(args, "to_timestamp") + let timezone: Option> = Some("UTC".into()); + to_timestamp_impl::(args, "to_timestamp", &timezone) } /// to_timestamp_millis SQL function fn to_timestamp_millis(args: &[ColumnarValue]) -> Result { - to_timestamp_impl::(args, "to_timestamp_millis") + let timezone: Option> = Some("UTC".into()); + to_timestamp_impl::( + args, + "to_timestamp_millis", + &timezone, + ) } /// to_timestamp_micros SQL function fn to_timestamp_micros(args: &[ColumnarValue]) -> Result { - to_timestamp_impl::(args, "to_timestamp_micros") + let timezone: Option> = Some("UTC".into()); + to_timestamp_impl::( + args, + "to_timestamp_micros", + &timezone, + ) } /// to_timestamp_nanos SQL function fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result { - to_timestamp_impl::(args, "to_timestamp_nanos") + let timezone: Option> = Some("UTC".into()); + to_timestamp_impl::( + args, + "to_timestamp_nanos", + &timezone, + ) } /// to_timestamp_seconds SQL function fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result { - to_timestamp_impl::(args, "to_timestamp_seconds") + let timezone: Option> = Some("UTC".into()); + to_timestamp_impl::(args, "to_timestamp_seconds", &timezone) + } + + fn udfs_and_timeunit() -> Vec<(Box, TimeUnit)> { + let udfs: Vec<(Box, TimeUnit)> = vec![ + ( + Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())), + Nanosecond, + ), + ( + Box::new(ToTimestampSecondsFunc::new_with_config( + &ConfigOptions::default(), + )), + Second, + ), + ( + Box::new(ToTimestampMillisFunc::new_with_config( + &ConfigOptions::default(), + )), + Millisecond, + ), + ( + Box::new(ToTimestampMicrosFunc::new_with_config( + &ConfigOptions::default(), + )), + Microsecond, + ), + ( + Box::new(ToTimestampNanosFunc::new_with_config( + &ConfigOptions::default(), + )), + Nanosecond, + ), + ]; + udfs + } + + fn validate_expected_error( + options: &mut ConfigOptions, + args: ScalarFunctionArgs, + expected_err: &str, + ) { + let udfs = udfs_and_timeunit(); + + for (udf, _) in udfs { + match udf + .with_updated_config(options) + .unwrap() + .invoke_with_args(args.clone()) + { + Ok(_) => panic!("Expected error but got success"), + Err(e) => { + assert!( + e.to_string().contains(expected_err), + "Can not find expected error '{expected_err}'. Actual error '{e}'" + ); + } + } + } } #[test] @@ -751,6 +897,368 @@ mod tests { Ok(()) } + #[test] + fn to_timestamp_respects_execution_timezone() -> Result<()> { + let udfs = udfs_and_timeunit(); + + let mut options = ConfigOptions::default(); + options.execution.time_zone = Some("-05:00".to_string()); + + let time_zone: Option> = options + .execution + .time_zone + .as_ref() + .map(|tz| Arc::from(tz.as_str())); + + for (udf, time_unit) in udfs { + let field = Field::new("arg", Utf8, true).into(); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "2020-09-08T13:42:29".to_string(), + )))], + arg_fields: vec![field], + number_rows: 1, + return_field: Field::new( + "f", + Timestamp(time_unit, Some("-05:00".into())), + true, + ) + .into(), + config_options: Arc::new(options.clone()), + }; + + let result = udf + .with_updated_config(&options.clone()) + .unwrap() + .invoke_with_args(args)?; + let result = match time_unit { + Second => { + let ColumnarValue::Scalar(ScalarValue::TimestampSecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Millisecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Microsecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Nanosecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + }; + + let scale = match time_unit { + Second => 1_000_000_000, + Millisecond => 1_000_000, + Microsecond => 1_000, + Nanosecond => 1, + }; + + let offset = FixedOffset::west_opt(5 * 3600).unwrap(); + let result = Some( + DateTime::::from_timestamp_nanos(result * scale) + .with_timezone(&offset) + .to_string(), + ); + + assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string())); + } + + Ok(()) + } + + #[test] + fn to_timestamp_formats_respects_execution_timezone() -> Result<()> { + let udfs = udfs_and_timeunit(); + + let mut options = ConfigOptions::default(); + options.execution.time_zone = Some("-05:00".to_string()); + + let time_zone: Option> = options + .execution + .time_zone + .as_ref() + .map(|tz| Arc::from(tz.as_str())); + + let expr_field = Field::new("arg", Utf8, true).into(); + let format_field: Arc = Field::new("fmt", Utf8, true).into(); + + for (udf, time_unit) in udfs { + for (value, format, expected_str) in [ + ( + "2020-09-08 09:42:29 -05:00", + "%Y-%m-%d %H:%M:%S %z", + Some("2020-09-08 09:42:29 -05:00"), + ), + ( + "2020-09-08T13:42:29Z", + "%+", + Some("2020-09-08 08:42:29 -05:00"), + ), + ( + "2020-09-08 13:42:29 UTC", + "%Y-%m-%d %H:%M:%S %Z", + Some("2020-09-08 08:42:29 -05:00"), + ), + ( + "+0000 2024-01-01 12:00:00", + "%z %Y-%m-%d %H:%M:%S", + Some("2024-01-01 07:00:00 -05:00"), + ), + ( + "20200908134229+0100", + "%Y%m%d%H%M%S%z", + Some("2020-09-08 07:42:29 -05:00"), + ), + ( + "2020-09-08+0230 13:42", + "%Y-%m-%d%z %H:%M", + Some("2020-09-08 06:12:00 -05:00"), + ), + ] { + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some( + format.to_string(), + ))), + ], + arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], + number_rows: 1, + return_field: Field::new( + "f", + Timestamp(time_unit, Some("-05:00".into())), + true, + ) + .into(), + config_options: Arc::new(options.clone()), + }; + let result = udf + .with_updated_config(&options.clone()) + .unwrap() + .invoke_with_args(args)?; + let result = match time_unit { + Second => { + let ColumnarValue::Scalar(ScalarValue::TimestampSecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Millisecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Microsecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + Nanosecond => { + let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( + Some(value), + tz, + )) = result + else { + panic!("expected scalar timestamp"); + }; + + assert_eq!(tz, time_zone); + + value + } + }; + + let scale = match time_unit { + Second => 1_000_000_000, + Millisecond => 1_000_000, + Microsecond => 1_000, + Nanosecond => 1, + }; + let offset = FixedOffset::west_opt(5 * 3600).unwrap(); + let result = Some( + DateTime::::from_timestamp_nanos(result * scale) + .with_timezone(&offset) + .to_string(), + ); + + assert_eq!(result, expected_str.map(|s| s.to_string())); + } + } + + Ok(()) + } + + #[test] + fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> { + let field: Arc = Field::new("arg", Utf8, true).into(); + let return_field: Arc = + Field::new("f", Timestamp(Nanosecond, None), true).into(); + + let mut options = ConfigOptions::default(); + options.execution.time_zone = Some("Invalid/Timezone".to_string()); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "2020-09-08T13:42:29Z".to_string(), + )))], + arg_fields: vec![Arc::clone(&field)], + number_rows: 1, + return_field: Arc::clone(&return_field), + config_options: Arc::new(options.clone()), + }; + + let expected_err = + "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; + + validate_expected_error(&mut options, args, expected_err); + + Ok(()) + } + + #[test] + fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> { + let expr_field: Arc = Field::new("arg", Utf8, true).into(); + let format_field: Arc = Field::new("fmt", Utf8, true).into(); + let return_field: Arc = + Field::new("f", Timestamp(Nanosecond, None), true).into(); + + let mut options = ConfigOptions::default(); + options.execution.time_zone = Some("Invalid/Timezone".to_string()); + + let expected_err = + "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; + + let make_args = |value: &str, format: &str| ScalarFunctionArgs { + args: vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))), + ], + arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], + number_rows: 1, + return_field: Arc::clone(&return_field), + config_options: Arc::new(options.clone()), + }; + + for (value, format, _expected_str) in [ + ( + "2020-09-08 09:42:29 -05:00", + "%Y-%m-%d %H:%M:%S %z", + Some("2020-09-08 09:42:29 -05:00"), + ), + ( + "2020-09-08T13:42:29Z", + "%+", + Some("2020-09-08 08:42:29 -05:00"), + ), + ( + "2020-09-08 13:42:29 +0000", + "%Y-%m-%d %H:%M:%S %z", + Some("2020-09-08 08:42:29 -05:00"), + ), + ( + "+0000 2024-01-01 12:00:00", + "%z %Y-%m-%d %H:%M:%S", + Some("2024-01-01 07:00:00 -05:00"), + ), + ( + "20200908134229+0100", + "%Y%m%d%H%M%S%z", + Some("2020-09-08 07:42:29 -05:00"), + ), + ( + "2020-09-08+0230 13:42", + "%Y-%m-%d%z %H:%M", + Some("2020-09-08 06:12:00 -05:00"), + ), + ] { + let args = make_args(value, format); + validate_expected_error(&mut options.clone(), args, expected_err); + } + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "2020-09-08T13:42:29".to_string(), + ))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "%Y-%m-%dT%H:%M:%S".to_string(), + ))), + ], + arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], + number_rows: 1, + return_field: Arc::clone(&return_field), + config_options: Arc::new(options.clone()), + }; + + validate_expected_error(&mut options.clone(), args, expected_err); + + Ok(()) + } + #[test] fn to_timestamp_invalid_input_type() -> Result<()> { // pass the wrong type of input array to to_timestamp and test @@ -920,7 +1428,11 @@ mod tests { } fn parse_timestamp_formatted(s: &str, format: &str) -> Result { - let result = string_to_timestamp_nanos_formatted(s, format); + let result = string_to_timestamp_nanos_formatted_with_timezone( + &Some("UTC".parse()?), + s, + format, + ); if let Err(e) = &result { eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}"); } @@ -988,13 +1500,21 @@ mod tests { } #[test] - fn test_tz() { + fn test_no_tz() { let udfs: Vec> = vec![ - Box::new(ToTimestampFunc::new()), - Box::new(ToTimestampSecondsFunc::new()), - Box::new(ToTimestampMillisFunc::new()), - Box::new(ToTimestampNanosFunc::new()), - Box::new(ToTimestampSecondsFunc::new()), + Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())), + Box::new(ToTimestampSecondsFunc::new_with_config( + &ConfigOptions::default(), + )), + Box::new(ToTimestampMillisFunc::new_with_config( + &ConfigOptions::default(), + )), + Box::new(ToTimestampNanosFunc::new_with_config( + &ConfigOptions::default(), + )), + Box::new(ToTimestampSecondsFunc::new_with_config( + &ConfigOptions::default(), + )), ]; let mut nanos_builder = TimestampNanosecondArray::builder(2); @@ -1027,8 +1547,8 @@ mod tests { for array in arrays { let rt = udf.return_type(&[array.data_type()]).unwrap(); let arg_field = Field::new("arg", array.data_type().clone(), true).into(); - assert!(matches!(rt, Timestamp(_, Some(_)))); - let args = datafusion_expr::ScalarFunctionArgs { + assert!(matches!(rt, Timestamp(_, None))); + let args = ScalarFunctionArgs { args: vec![array.clone()], arg_fields: vec![arg_field], number_rows: 4, @@ -1043,7 +1563,7 @@ mod tests { _ => panic!("Expected a columnar array"), }; let ty = array.data_type(); - assert!(matches!(ty, Timestamp(_, Some(_)))); + assert!(matches!(ty, Timestamp(_, None))); } } @@ -1078,7 +1598,7 @@ mod tests { let rt = udf.return_type(&[array.data_type()]).unwrap(); assert!(matches!(rt, Timestamp(_, None))); let arg_field = Field::new("arg", array.data_type().clone(), true).into(); - let args = datafusion_expr::ScalarFunctionArgs { + let args = ScalarFunctionArgs { args: vec![array.clone()], arg_fields: vec![arg_field], number_rows: 5, diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs index fb6315d8f0e06..5ebcce0a7cfc2 100644 --- a/datafusion/functions/src/datetime/to_unixtime.rs +++ b/datafusion/functions/src/datetime/to_unixtime.rs @@ -135,7 +135,7 @@ impl ScalarUDFImpl for ToUnixtimeFunc { .cast_to(&DataType::Timestamp(TimeUnit::Second, tz), None)? .cast_to(&DataType::Int64, None), DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => { - ToTimestampSecondsFunc::new() + ToTimestampSecondsFunc::new_with_config(args.config_options.as_ref()) .invoke_with_args(args)? .cast_to(&DataType::Int64, None) } diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index 3d728a178497a..4adc331fef669 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -41,6 +41,17 @@ /// - `Vec` argument (single argument followed by a comma) /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas) /// - Functions that require config (marked with `@config` prefix) +/// +/// Note on configuration construction paths: +/// - The convenience wrappers generated for `@config` functions call the inner +/// constructor with `ConfigOptions::default()`. These wrappers are intended +/// primarily for programmatic `Expr` construction and convenience usage. +/// - When functions are registered in a session, DataFusion will call +/// `with_updated_config()` to create a `ScalarUDF` instance using the session's +/// actual `ConfigOptions`. This also happens when configuration changes at runtime +/// (e.g., via `SET` statements). In short: the macro uses the default config for +/// convenience constructors; the session config is applied when functions are +/// registered or when configuration is updated. #[macro_export] macro_rules! export_functions { ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => { @@ -59,6 +70,24 @@ macro_rules! export_functions { } }; + // function that requires config and takes a vector argument + (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => { + #[doc = $DOC] + pub fn $FUNC($arg: Vec) -> datafusion_expr::Expr { + use datafusion_common::config::ConfigOptions; + super::$FUNC(&ConfigOptions::default()).call($arg) + } + }; + + // function that requires config and variadic arguments + (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => { + #[doc = $DOC] + pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr { + use datafusion_common::config::ConfigOptions; + super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*]) + } + }; + // single vector argument (a single argument followed by a comma) (single $FUNC:ident, $DOC:expr, $arg:ident,) => { #[doc = $DOC] diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 5749b1c53d852..dbb924ef7aa63 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -193,6 +193,8 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01' ---- 2000-01-01T01:01:01Z +statement ok +RESET datafusion.execution.time_zone ########## ## cast tests @@ -2331,7 +2333,7 @@ CREATE TABLE foo (time TIMESTAMPTZ) AS VALUES ('2020-01-01T03:00:00+05:00') statement ok -SET TIME ZONE = '+00' +RESET datafusion.execution.time_zone # verify column type query T @@ -2445,7 +2447,7 @@ true true true true true true # known issues. currently overflows (expects default precision to be microsecond instead of nanoseconds. Work pending) #verify extreme values #query PPPPPPPP -#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp) +#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp as t1, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp) as t2 #---- #0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 @@ -2771,8 +2773,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1; query PT SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1; ---- -2018-07-01T06:00:00Z Timestamp(ns, "+00") -2018-07-01T07:00:00Z Timestamp(ns, "+00") +2018-07-01T06:00:00 Timestamp(ns) +2018-07-01T07:00:00 Timestamp(ns) query D SELECT 0::TIME @@ -4339,7 +4341,7 @@ SELECT arrow_cast(a, 'LargeUtf8') FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a) ---- -Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z +Timestamp(ns) 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00 query P SELECT diff --git a/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt b/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt new file mode 100644 index 0000000000000..d48e41d1204de --- /dev/null +++ b/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## to_timestamp timezone tests +########## + +## Reset timezone for other tests +statement ok +RESET datafusion.execution.time_zone + +## Test 1: Default timezone (None) with naive timestamp +## Naive timestamps (without explicit timezone) should be interpreted as UTC by default +query P +SELECT to_timestamp('2020-09-08T13:42:29'); +---- +2020-09-08T13:42:29 + +## Test 2: Explicit UTC timezone ('Z' suffix) +## Explicit timezone should be respected regardless of session timezone +query P +SELECT to_timestamp('2020-09-08T13:42:29Z'); +---- +2020-09-08T13:42:29 + +## Test 3: Explicit timezone offset (+05:00) +## Explicit offset should be respected - this is 13:42:29+05:00 which is 08:42:29 UTC +query P +SELECT to_timestamp('2020-09-08T13:42:29+05:00'); +---- +2020-09-08T08:42:29 + +## Test 4: Explicit timezone offset without colon (+0500) +## Should handle offset formats without colons +query P +SELECT to_timestamp('2020-09-08T13:42:29+0500'); +---- +2020-09-08T08:42:29 + +## Test 5: Negative timezone offset +query P +SELECT to_timestamp('2020-09-08T13:42:29-03:30'); +---- +2020-09-08T17:12:29 + +## Test 6: Configure session timezone to America/New_York +statement ok +SET datafusion.execution.time_zone = 'America/New_York'; + +## Test 7: Naive timestamp with configured timezone +## '2020-09-08T13:42:29' in America/New_York is EDT (UTC-4) +## So this should become '2020-09-08T13:42:29-04:00' +query P +SELECT to_timestamp('2020-09-08T13:42:29'); +---- +2020-09-08T13:42:29-04:00 + +## Test 8: Explicit UTC should be transformed to configured timezone +query P +SELECT to_timestamp('2020-09-08T13:42:29Z'); +---- +2020-09-08T09:42:29-04:00 + +## Test 9: Explicit offset should be transformed to configured timezone +query P +SELECT to_timestamp('2020-09-08T13:42:29+05:00'); +---- +2020-09-08T04:42:29-04:00 + +## Test 10: Check arrow_typeof returns timstamp in configured timezone +## Result should be Timestamp(Nanosecond, "America/New_York") regardless of input timezone +query T +SELECT arrow_typeof(to_timestamp('2020-09-08T13:42:29Z')); +---- +Timestamp(ns, "America/New_York") + +## Test 11: Configure to offset-based timezone +statement ok +SET datafusion.execution.time_zone = '+05:30'; + +## Test 12: Naive timestamp with offset-based configured timezone +query P +SELECT to_timestamp('2020-09-08T13:42:29'); +---- +2020-09-08T13:42:29+05:30 + +## Test 13: Reset to None +statement ok +RESET datafusion.execution.time_zone + +## Test 14: Naive timestamp +query P +SELECT to_timestamp('2020-09-08T13:42:29'); +---- +2020-09-08T13:42:29 + +query P +SELECT to_timestamp('2020-09-08T13:42:29Z'); +---- +2020-09-08T13:42:29 + +query P +SELECT to_timestamp('2020-09-08T13:42:29+05:00'); +---- +2020-09-08T08:42:29 + +statement ok +SET datafusion.execution.time_zone = 'America/New_York'; + +## Test 15: to_timestamp with format string - naive timestamp with session timezone + +query P +SELECT to_timestamp('2020-09-08 13:42:29', '%Y-%m-%d %H:%M:%S'); +---- +2020-09-08T13:42:29-04:00 + +## Test 16: to_timestamp with format string - explicit timezone should be respected +statement ok +SET datafusion.execution.time_zone = 'UTC'; + +query P +SELECT to_timestamp('2020-09-08 13:42:29 +0000', '%Y-%m-%d %H:%M:%S %z'); +---- +2020-09-08T13:42:29Z + +query P +SELECT to_timestamp('2020-09-08 13:42:29 America/Toronto', '%Y-%m-%d %H:%M:%S %Z'); +---- +2020-09-08T17:42:29Z + +query error Error parsing timestamp from '2020-09-08 13:42:29America/Toronto' using format '%Y-%m-%d %H:%M:%S%Z': '%Z' is only supported at the end of the format string preceded by a space +SELECT to_timestamp('2020-09-08 13:42:29America/Toronto', '%Y-%m-%d %H:%M:%S%Z'); + +## Test 17: Test all precision variants respect timezone +statement ok +SET datafusion.execution.time_zone = 'America/New_York'; + +## to_timestamp_seconds +query P +SELECT to_timestamp_seconds('2020-09-08T13:42:29'); +---- +2020-09-08T13:42:29-04:00 + +## to_timestamp_millis +query P +SELECT to_timestamp_millis('2020-09-08T13:42:29.123'); +---- +2020-09-08T13:42:29.123-04:00 + +## to_timestamp_micros +query P +SELECT to_timestamp_micros('2020-09-08T13:42:29.123456'); +---- +2020-09-08T13:42:29.123456-04:00 + +## to_timestamp_nanos +query P +SELECT to_timestamp_nanos('2020-09-08T13:42:29.123456789'); +---- +2020-09-08T13:42:29.123456789-04:00 + +## test integers +query T +select arrow_typeof(to_timestamp_seconds(61)) +---- +Timestamp(s, "America/New_York") + +query T +select arrow_typeof(to_timestamp_millis(61)) +---- +Timestamp(ms, "America/New_York") + +query T +select arrow_typeof(to_timestamp_micros(61)) +---- +Timestamp(µs, "America/New_York") + +query T +select arrow_typeof(to_timestamp_nanos(61)) +---- +Timestamp(ns, "America/New_York") + +query T +select arrow_typeof(to_timestamp(61)) +---- +Timestamp(ns, "America/New_York") + +## Reset timezone for other tests +statement ok +RESET datafusion.execution.time_zone diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 360311552f025..fe1ed1cab6bd7 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2867,9 +2867,19 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). -Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. +Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range +for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between +`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` +for the input outside of supported bounds. + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. ```sql to_timestamp(expression[, ..., format_n]) @@ -2878,7 +2888,11 @@ to_timestamp(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +- **format_n**: + Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. + Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully + parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is + only supported at the end of the string preceded by a space. #### Example @@ -2901,7 +2915,14 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_micros` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. ```sql to_timestamp_micros(expression[, ..., format_n]) @@ -2910,7 +2931,11 @@ to_timestamp_micros(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +- **format_n**: + Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. + Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully + parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is + only supported at the end of the string preceded by a space. #### Example @@ -2933,7 +2958,14 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_millis` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. ```sql to_timestamp_millis(expression[, ..., format_n]) @@ -2942,7 +2974,11 @@ to_timestamp_millis(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +- **format_n**: + Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. + Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully + parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is + only supported at the end of the string preceded by a space. #### Example @@ -2965,7 +3001,13 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_nanos` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. ```sql to_timestamp_nanos(expression[, ..., format_n]) @@ -2974,7 +3016,11 @@ to_timestamp_nanos(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +- **format_n**: + Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. + Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully + parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is + only supported at the end of the string preceded by a space. #### Example @@ -2997,7 +3043,14 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_seconds` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00`) in the session time zone. Supports strings, +integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the +session time zone, or UTC if no session time zone is set. +Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). + +The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. +The time zone can be a value like +00:00, 'Europe/London' etc. ```sql to_timestamp_seconds(expression[, ..., format_n]) @@ -3006,7 +3059,11 @@ to_timestamp_seconds(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +- **format_n**: + Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. + Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully + parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is + only supported at the end of the string preceded by a space. #### Example