temp

ilicmarkodb · ilicmarkodb · commit 2f1bee58809c · 2025-07-28T16:34:55.000+02:00
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
@@ -1379,6 +1379,26 @@ def add1(x):
         result = empty_df.select(add1("id"))
         self.assertEqual(result.collect(), [])
 
+    def test_udf_with_collated_string_types(self):
+        @udf("string")
+        def my_udf(input_val):
+            return "%s - %s" % (type(input_val), input_val)
+
+        string_types = [
+            StringType(),
+            StringType("UTF8_BINARY"),
+            StringType("UTF8_LCASE"),
+            StringType("UNICODE"),
+        ]
+        data = [("hello",)]
+        expected = "<class 'str'> - hello"
+
+        for string_type in string_types:
+            schema = StructType([StructField("input_col", string_type, True)])
+            df = self.spark.createDataFrame(data, schema=schema)
+            row = df.select(my_udf(df.input_col)).collect()[0][0]
+            self.assertEqual(row, expected)
+
 
 class UDFTests(BaseUDFTestsMixin, ReusedSQLTestCase):
     @classmethod
diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py
@@ -3437,6 +3437,41 @@ def eval(self):
                     udtf(TestUDTF, returnType=ret_type)().collect()
 
 
+def test_udtf_with_collated_string_types(self):
+    class MyUDTF:
+        def eval(self, val1, val2, val3, val4):
+            yield (val1 + "1", val2 + "2", val3 + "3", val4 + "4")
+
+    my_udtf = udtf(
+        MyUDTF,
+        returnType=StructType(
+            [
+                StructField("out1", StringType()),
+                StructField("out2", StringType()),
+                StructField("out3", StringType()),
+                StructField("out4", StringType()),
+            ]
+        ),
+    )
+
+    schema = StructType(
+        [
+            StructField("col1", StringType(), True),
+            StructField("col2", StringType("UTF8_BINARY"), True),
+            StructField("col3", StringType("UTF8_LCASE"), True),
+            StructField("col4", StringType("UNICODE"), True),
+        ]
+    )
+
+    data = [("hello", "hello", "hello", "hello")]
+    df = self.spark.createDataFrame(data, schema=schema)
+
+    result_row = df.select(my_udtf(df.col1, df.col2, df.col3, df.col4)).collect()[0]
+
+    expected = ("hello1", "hello2", "hello3", "hello4")
+    self.assertEqual(result_row, expected)
+
+
 class UDTFArrowTests(UDTFArrowTestsMixin, ReusedSQLTestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -79,7 +79,7 @@ object EvaluatePython {
 
     case (d: Decimal, _) => d.toJavaBigDecimal
 
-    case (s: UTF8String, StringType) => s.toString
+    case (s: UTF8String, _: StringType) => s.toString
 
     case (other, _) => other
   }

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ object EvaluatePython {`
`79`	`79`
`80`	`80`	`case (d: Decimal, _) => d.toJavaBigDecimal`
`81`	`81`
`82`		`- case (s: UTF8String, StringType) => s.toString`
	`82`	`+ case (s: UTF8String, _: StringType) => s.toString`
`83`	`83`
`84`	`84`	`case (other, _) => other`
`85`	`85`	`}`