-
Notifications
You must be signed in to change notification settings - Fork 315
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/decimal support #982
base: main
Are you sure you want to change the base?
Changes from all commits
e42a0c4
ca84f6a
5dc2143
aefc393
9d31c3a
c5b660d
ec9ca58
4f29c18
3c1b505
338210a
66e357b
ceab50f
7ff1c8e
546ba36
a72e152
4c88ece
592a57d
c33f697
f9a0aa0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.Spark.Sql; | ||
using Microsoft.Spark.Sql.Types; | ||
using Xunit; | ||
|
||
namespace Microsoft.Spark.E2ETest.IpcTests | ||
{ | ||
|
||
[Collection("Spark E2E Tests")] | ||
public class DataTypesTests | ||
{ | ||
private readonly SparkSession _spark; | ||
|
||
public DataTypesTests(SparkFixture fixture) | ||
{ | ||
_spark = fixture.Spark; | ||
} | ||
|
||
/// <summary> | ||
/// Tests that we can pass a decimal over to Apache Spark and collect it back again, include a check | ||
/// for the minimum and maximum decimal that .NET can represent | ||
/// </summary> | ||
[Fact] | ||
public void TestDecimalType() | ||
{ | ||
var df = _spark.CreateDataFrame( | ||
new List<GenericRow> | ||
{ | ||
new GenericRow( | ||
new object[] | ||
{ | ||
decimal.MinValue, decimal.MaxValue, decimal.Zero, decimal.MinusOne, | ||
new object[] | ||
{ | ||
decimal.MinValue, decimal.MaxValue, decimal.Zero, decimal.MinusOne | ||
} | ||
}), | ||
}, | ||
new StructType( | ||
new List<StructField>() | ||
{ | ||
new StructField("min", new DecimalType(38, 0)), | ||
new StructField("max", new DecimalType(38, 0)), | ||
new StructField("zero", new DecimalType(38, 0)), | ||
new StructField("minusOne", new DecimalType(38, 0)), | ||
new StructField("array", new ArrayType(new DecimalType(38,0))) | ||
})); | ||
|
||
Row row = df.Collect().First(); | ||
Assert.Equal(decimal.MinValue, row[0]); | ||
Assert.Equal(decimal.MaxValue, row[1]); | ||
Assert.Equal(decimal.Zero, row[2]); | ||
Assert.Equal(decimal.MinusOne, row[3]); | ||
Assert.Equal(new object[]{decimal.MinValue, decimal.MaxValue, decimal.Zero, decimal.MinusOne}, | ||
row[4]); | ||
} | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -269,6 +269,9 @@ private object CallJavaMethod( | |
case 'd': | ||
returnValue = SerDe.ReadDouble(inputStream); | ||
break; | ||
case 'm': | ||
returnValue = decimal.Parse(SerDe.ReadString(inputStream)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use decimal.Parse(SerDe.ReadString(inputStream), CultureInfo.InvariantCulture) to ensure we are using invariant culture on the wire. |
||
break; | ||
case 'b': | ||
returnValue = Convert.ToBoolean(inputStream.ReadByte()); | ||
break; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,6 +322,13 @@ public static void Write(Stream s, long value) | |
public static void Write(Stream s, double value) => | ||
Write(s, BitConverter.DoubleToInt64Bits(value)); | ||
|
||
/// <summary> | ||
/// Writes a decimal to a stream as a string. | ||
/// </summary> | ||
/// <param name="s">The stream to write</param> | ||
/// <param name="value">The decimal to write</param> | ||
public static void Write(Stream s, decimal value) => Write(s, value.ToString()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should use ToString(CultureInfo.InvariantCulture) if we are using a string on the wire. |
||
|
||
/// <summary> | ||
/// Writes a string to a stream. | ||
/// </summary> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't gotten to dive deep into whether this is an issue yet, but want to bring it to attention just in case:
There was a time when we were comparing SQL Server output to Spark SQL output trying to migrate a pipeline to Synapse, and when attempting to diff two tables, found an issue with a
double
.SQL Server uses, presumably, C#'s (and JavaScript, which the Python Notebook table preview in Synapse uses)'s conception of floats:
-0.0 == 0.0
, but the JVM/Spark in some cases compares by bit and differentiates because of the signed bit:-0.0 != 0.0
.It's resolved in later versions of Spark's DataFrames, and may not apply in the case of
[decimal]String
, so it may not be problematic.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's okay!