Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import math

import pyarrow
import pytest
import shapely
from sedonadb.testing import PostGIS, SedonaDB, geom_or_null, val_or_null
import math


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
Expand Down Expand Up @@ -1668,6 +1670,49 @@ def test_st_geomfromwkb(eng, geom):
eng.assert_query_result(f"SELECT ST_GeomFromWKB({wkb})", expected)


# `ST_GeomFromWKBUnchecked` is not available in PostGIS
@pytest.mark.parametrize("eng", [SedonaDB])
@pytest.mark.parametrize(
("geom"),
[
"POINT (1 1)",
"POINT EMPTY",
"LINESTRING EMPTY",
"POLYGON EMPTY",
"GEOMETRYCOLLECTION EMPTY",
"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
"MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))",
"GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))",
],
)
def test_st_geomfromwkbunchecked(eng, geom):
eng = eng.create_or_skip()

expected = geom
if geom == "POINT EMPTY":
# arrow-c returns POINT (nan nan) instead of POINT EMPTY
expected = "POINT (nan nan)"

wkb = shapely.from_wkt(geom).wkb
wkb = "0x" + wkb.hex()

eng.assert_query_result(f"SELECT ST_GeomFromWKBUnchecked({wkb})", expected)


@pytest.mark.parametrize("eng", [SedonaDB])
def test_st_geomfromwkbunchecked_invalid_wkb(eng):
eng = eng.create_or_skip()

# Invalid WKB payload can still convert to geometry column
eng.assert_query_result(
"SELECT ST_AsBinary(ST_GeomFromWKBUnchecked(0x01))", b"\x01"
)

# Using invalid WKB elsewhere may result in undefined behavior.
with pytest.raises(pyarrow.lib.ArrowInvalid, match="failed to fill whole buffer"):
eng.execute_and_collect("SELECT ST_AsText(ST_GeomFromWKBUnchecked(0x01))")


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "index", "expected"),
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_geometrytype::st_geometry_type_udf,
crate::st_geomfromwkb::st_geogfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkbunchecked_udf,
crate::st_geomfromwkt::st_geogfromwkt_udf,
crate::st_geomfromwkt::st_geomfromwkt_udf,
crate::st_geomfromwkt::st_geomfromewkt_udf,
Expand Down
128 changes: 123 additions & 5 deletions rust/sedona-functions/src/st_geomfromwkb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,21 @@ pub fn st_geomfromwkb_udf() -> SedonaScalarUDF {
)
}

/// ST_GeomFromWKBUnchecked() scalar UDF implementation
///
/// An implementation of WKB reading using GeoRust's wkb crate without validation.
pub fn st_geomfromwkbunchecked_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new(
"st_geomfromwkbunchecked",
vec![Arc::new(STGeomFromWKB {
validate: false,
out_type: WKB_VIEW_GEOMETRY,
})],
Volatility::Immutable,
Some(doc_unchecked("ST_GeomFromWKBUnchecked", "Geometry")),
)
}

/// ST_GeogFromWKB() scalar UDF implementation
///
/// An implementation of WKB reading using GeoRust's wkb crate.
Expand Down Expand Up @@ -77,6 +92,29 @@ fn doc(name: &str, out_type_name: &str) -> Documentation {
.build()
}

/// Documentation for `ST_GeomFromWKBUnchecked()`.
///
/// Parameterized for reuse if `ST_GeogFromWKBUnchecked()` is implemented in the future.
fn doc_unchecked(name: &str, out_type_name: &str) -> Documentation {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Is it the case that SQL reference (e.g. https://sedona.apache.org/sedonadb/latest/reference/sql/#st_geomfromwkb) can be auto-generated through these doc() traits, but we still have to update them manually?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's currently being updated manually. It is also in a state of flux right now where we're trying to separate the pages from each other in a way that each page can have a bit more detail, but still generate the main page. For now having the doc reference like you did here is perfect.

Documentation::builder(
DOC_SECTION_OTHER,
format!(
"Construct a {out_type_name} from WKB without validation. Invalid WKB input may result in undefined behavior."
),
format!("{name} (Wkb: Binary)"),
)
.with_argument(
"WKB",
format!(
"binary: Well-known binary representation of the {}",
out_type_name.to_lowercase()
),
)
.with_sql_example(format!("SELECT {name}([01 02 00 00 00 02 00 00 00 00 00 00 00 84 D6 00 C0 00 00 00 00 80 B5 D6 BF 00 00 00 60 E1 EF F7 BF 00 00 00 80 07 5D E5 BF])"))
.with_related_udf("ST_AsText")
.build()
}

#[derive(Debug)]
struct STGeomFromWKB {
validate: bool,
Expand Down Expand Up @@ -117,7 +155,7 @@ impl SedonaScalarKernel for STGeomFromWKB {

#[cfg(test)]
mod tests {
use arrow_array::BinaryArray;
use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray};
use datafusion_common::scalar::ScalarValue;
use datafusion_expr::ScalarUDF;
use rstest::rstest;
Expand All @@ -134,6 +172,9 @@ mod tests {
0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
];
const INVALID_WKB_LEN: [u8; 0] = [];
const INVALID_WKB_CONTENT: [u8; 5] = [0x01, 0x00, 0x00, 0x00, 0x00];
const INVALID_WKBS: [&[u8]; 2] = [&INVALID_WKB_LEN, &INVALID_WKB_CONTENT];

#[test]
fn udf_metadata() {
Expand All @@ -144,6 +185,10 @@ mod tests {
let geom_from_wkb: ScalarUDF = st_geomfromwkb_udf().into();
assert_eq!(geom_from_wkb.name(), "st_geomfromwkb");
assert!(geom_from_wkb.documentation().is_some());

let geom_from_wkb_unchecked: ScalarUDF = st_geomfromwkbunchecked_udf().into();
assert_eq!(geom_from_wkb_unchecked.name(), "st_geomfromwkbunchecked");
assert!(geom_from_wkb_unchecked.documentation().is_some());
}

#[rstest]
Expand Down Expand Up @@ -176,16 +221,89 @@ mod tests {
);
}

#[rstest]
fn udf_unchecked(#[values(DataType::Binary, DataType::BinaryView)] data_type: DataType) {
let udf = st_geomfromwkbunchecked_udf();
let tester = ScalarUdfTester::new(
udf.clone().into(),
vec![SedonaType::Arrow(data_type.clone())],
);

assert_eq!(tester.return_type().unwrap(), WKB_VIEW_GEOMETRY);

assert_scalar_equal(
&tester.invoke_scalar(POINT12.to_vec()).unwrap(),
&create_scalar(Some("POINT (1 2)"), &WKB_VIEW_GEOMETRY),
);

assert_scalar_equal(
&tester.invoke_scalar(ScalarValue::Null).unwrap(),
&create_scalar(None, &WKB_VIEW_GEOMETRY),
);

let binary_array: BinaryArray = [Some(POINT12), None, Some(POINT12)].iter().collect();
assert_array_equal(
&tester.invoke_array(Arc::new(binary_array)).unwrap(),
&create_array(
&[Some("POINT (1 2)"), None, Some("POINT (1 2)")],
&WKB_VIEW_GEOMETRY,
),
);
}

#[test]
fn invalid_wkb() {
let udf = st_geomfromwkb_udf();
let tester = ScalarUdfTester::new(udf.into(), vec![SedonaType::Arrow(DataType::Binary)]);

let err = tester
.invoke_scalar(ScalarValue::Binary(Some(vec![])))
.unwrap_err();
for invalid in INVALID_WKBS {
let _err = tester
.invoke_scalar(ScalarValue::Binary(Some(invalid.to_vec())))
.unwrap_err();
}
}

assert_eq!(err.message(), "failed to fill whole buffer");
#[rstest]
fn unchecked_invalid_wkb(
#[values(DataType::Binary, DataType::BinaryView)] data_type: DataType,
) {
let udf = st_geomfromwkbunchecked_udf();
let tester = ScalarUdfTester::new(udf.into(), vec![SedonaType::Arrow(data_type.clone())]);

for invalid in INVALID_WKBS {
let invalid_scalar = match data_type {
DataType::Binary => ScalarValue::Binary(Some(invalid.to_vec())),
DataType::BinaryView => ScalarValue::BinaryView(Some(invalid.to_vec())),
_ => unreachable!(),
};

assert_scalar_equal(
&tester.invoke_scalar(invalid_scalar).unwrap(),
&ScalarValue::BinaryView(Some(invalid.to_vec())),
);

let input_array: ArrayRef = match data_type {
DataType::Binary => Arc::new(
[Some(invalid), None, Some(invalid)]
.iter()
.collect::<BinaryArray>(),
),
DataType::BinaryView => Arc::new(
[Some(invalid), None, Some(invalid)]
.iter()
.collect::<BinaryViewArray>(),
),
_ => unreachable!(),
};

let expected_array: BinaryViewArray =
[Some(invalid), None, Some(invalid)].iter().collect();

assert_array_equal(
&tester.invoke_array(input_array).unwrap(),
&(Arc::new(expected_array) as ArrayRef),
);
}
}

#[test]
Expand Down
Loading