From 91c9331057f1bc6a3b90d2800974ef2cf6c5a544 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Fri, 23 May 2025 19:37:44 -0700 Subject: [PATCH] fix + test --- python/pyspark/pandas/strings.py | 8 +++++++- python/pyspark/pandas/tests/series/test_string_ops_adv.py | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py index 7e572bd1fae3e..6213750f9c9d5 100644 --- a/python/pyspark/pandas/strings.py +++ b/python/pyspark/pandas/strings.py @@ -2031,7 +2031,13 @@ def pudf(s: pd.Series) -> pd.Series: if expand: psdf = psser.to_frame() scol = psdf._internal.data_spark_columns[0] - spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)] + + if ps.get_option("compute.ansi_mode_support"): + spark_columns = [ + F.try_element_at(scol, F.lit(i + 1)).alias(str(i)) for i in range(n + 1) + ] + else: + spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)] column_labels = [(i,) for i in range(n + 1)] internal = psdf._internal.with_new_columns( spark_columns, diff --git a/python/pyspark/pandas/tests/series/test_string_ops_adv.py b/python/pyspark/pandas/tests/series/test_string_ops_adv.py index e00252110daec..d50b2c0621ec0 100644 --- a/python/pyspark/pandas/tests/series/test_string_ops_adv.py +++ b/python/pyspark/pandas/tests/series/test_string_ops_adv.py @@ -185,6 +185,8 @@ def test_string_split(self): with self.assertRaises(NotImplementedError): self.check_func(lambda x: x.str.split(expand=True)) + self.check_func_on_series(lambda x: repr(x.str.split("-", n=1, expand=True)), pser) + @unittest.skipIf(is_ansi_mode_test, ansi_mode_not_supported_message) def test_string_rsplit(self): self.check_func_on_series(lambda x: repr(x.str.rsplit()), self.pser[:-1])