Skip to content

Commit 2817654

Browse files
uros-dbHyukjinKwon
authored andcommitted
[SPARK-52892][SPARK-52893][SPARK-52894][PYTHON] Support TIME in the hour, minute, and second functions in PySpark
### What changes were proposed in this pull request? Implement the `hour`, `minute`, and `second` functions for TIME type in PySpark & PySpark Connect API. ### Why are the changes needed? Expand API support for the `hour`, `minute`, and `second` functions for TIME type. ### Does this PR introduce _any_ user-facing change? Yes, the new functions for TIME type are now available in Python API. ### How was this patch tested? Added appropriate Python function tests. - pyspark.sql.tests.test_functions - pyspark.sql.tests.connect.test_parity_functions ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51586 from uros-db/python-hr_min_sec. Authored-by: Uros Bojanic <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent cdb4f71 commit 2817654

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

python/pyspark/sql/functions/builtin.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10127,10 +10127,13 @@ def hour(col: "ColumnOrName") -> Column:
1012710127
.. versionchanged:: 3.4.0
1012810128
Supports Spark Connect.
1012910129

10130+
.. versionchanged:: 4.1.0
10131+
Added support for time type.
10132+
1013010133
Parameters
1013110134
----------
1013210135
col : :class:`~pyspark.sql.Column` or column name
10133-
target date/timestamp column to work on.
10136+
target date/time/timestamp column to work on.
1013410137

1013510138
Returns
1013610139
-------
@@ -10177,6 +10180,21 @@ def hour(col: "ColumnOrName") -> Column:
1017710180
|2015-04-08 13:08:15| timestamp| 13|
1017810181
|2024-10-31 10:09:16| timestamp| 10|
1017910182
+-------------------+----------+--------+
10183+
10184+
Example 3: Extract the hours from a time column
10185+
10186+
>>> import datetime
10187+
>>> from pyspark.sql import functions as sf
10188+
>>> df = spark.createDataFrame([
10189+
... ("13:08:15",),
10190+
... ("10:09:16",)], ['t']).withColumn("t", sf.col("t").cast("time"))
10191+
>>> df.select("*", sf.typeof('t'), sf.hour('t')).show()
10192+
+--------+---------+-------+
10193+
| t|typeof(t)|hour(t)|
10194+
+--------+---------+-------+
10195+
|13:08:15| time(6)| 13|
10196+
|10:09:16| time(6)| 10|
10197+
+--------+---------+-------+
1018010198
"""
1018110199
return _invoke_function_over_columns("hour", col)
1018210200

@@ -10191,10 +10209,13 @@ def minute(col: "ColumnOrName") -> Column:
1019110209
.. versionchanged:: 3.4.0
1019210210
Supports Spark Connect.
1019310211

10212+
.. versionchanged:: 4.1.0
10213+
Added support for time type.
10214+
1019410215
Parameters
1019510216
----------
1019610217
col : :class:`~pyspark.sql.Column` or column name
10197-
target date/timestamp column to work on.
10218+
target date/time/timestamp column to work on.
1019810219

1019910220
See Also
1020010221
--------
@@ -10241,6 +10262,21 @@ def minute(col: "ColumnOrName") -> Column:
1024110262
|2015-04-08 13:08:15| timestamp| 8|
1024210263
|2024-10-31 10:09:16| timestamp| 9|
1024310264
+-------------------+----------+----------+
10265+
10266+
Example 3: Extract the minutes from a time column
10267+
10268+
>>> import datetime
10269+
>>> from pyspark.sql import functions as sf
10270+
>>> df = spark.createDataFrame([
10271+
... ("13:08:15",),
10272+
... ("10:09:16",)], ['t']).withColumn("t", sf.col("t").cast("time"))
10273+
>>> df.select("*", sf.typeof('t'), sf.minute('t')).show()
10274+
+--------+---------+---------+
10275+
| t|typeof(t)|minute(t)|
10276+
+--------+---------+---------+
10277+
|13:08:15| time(6)| 8|
10278+
|10:09:16| time(6)| 9|
10279+
+--------+---------+---------+
1024410280
"""
1024510281
return _invoke_function_over_columns("minute", col)
1024610282

@@ -10255,10 +10291,13 @@ def second(col: "ColumnOrName") -> Column:
1025510291
.. versionchanged:: 3.4.0
1025610292
Supports Spark Connect.
1025710293

10294+
.. versionchanged:: 4.1.0
10295+
Added support for time type.
10296+
1025810297
Parameters
1025910298
----------
1026010299
col : :class:`~pyspark.sql.Column` or column name
10261-
target date/timestamp column to work on.
10300+
target date/time/timestamp column to work on.
1026210301

1026310302
Returns
1026410303
-------
@@ -10305,6 +10344,21 @@ def second(col: "ColumnOrName") -> Column:
1030510344
|2015-04-08 13:08:15| timestamp| 15|
1030610345
|2024-10-31 10:09:16| timestamp| 16|
1030710346
+-------------------+----------+----------+
10347+
10348+
Example 3: Extract the seconds from a time column
10349+
10350+
>>> import datetime
10351+
>>> from pyspark.sql import functions as sf
10352+
>>> df = spark.createDataFrame([
10353+
... ("13:08:15",),
10354+
... ("10:09:16",)], ['t']).withColumn("t", sf.col("t").cast("time"))
10355+
>>> df.select("*", sf.typeof('t'), sf.second('t')).show()
10356+
+--------+---------+---------+
10357+
| t|typeof(t)|second(t)|
10358+
+--------+---------+---------+
10359+
|13:08:15| time(6)| 15|
10360+
|10:09:16| time(6)| 16|
10361+
+--------+---------+---------+
1030810362
"""
1030910363
return _invoke_function_over_columns("second", col)
1031010364

python/pyspark/sql/tests/test_functions.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,30 @@ def test_dayname(self):
579579
row = df.select(F.dayname(df.date)).first()
580580
self.assertEqual(row[0], "Mon")
581581

582+
def test_hour(self):
583+
# SPARK-52892: test the hour function with time.
584+
df = self.spark.range(1).select(F.lit(datetime.time(12, 34, 56)).alias("time"))
585+
row_from_col = df.select(F.hour(df.time)).first()
586+
self.assertEqual(row_from_col[0], 12)
587+
row_from_name = df.select(F.hour("time")).first()
588+
self.assertEqual(row_from_name[0], 12)
589+
590+
def test_minute(self):
591+
# SPARK-52893: test the minute function with time.
592+
df = self.spark.range(1).select(F.lit(datetime.time(12, 34, 56)).alias("time"))
593+
row_from_col = df.select(F.minute(df.time)).first()
594+
self.assertEqual(row_from_col[0], 34)
595+
row_from_name = df.select(F.minute("time")).first()
596+
self.assertEqual(row_from_name[0], 34)
597+
598+
def test_second(self):
599+
# SPARK-52894: test the second function with time.
600+
df = self.spark.range(1).select(F.lit(datetime.time(12, 34, 56)).alias("time"))
601+
row_from_col = df.select(F.second(df.time)).first()
602+
self.assertEqual(row_from_col[0], 56)
603+
row_from_name = df.select(F.second("time")).first()
604+
self.assertEqual(row_from_name[0], 56)
605+
582606
# Test added for SPARK-37738; change Python API to accept both col & int as input
583607
def test_date_add_function(self):
584608
dt = datetime.date(2021, 12, 27)

0 commit comments

Comments
 (0)