Skip to content

Commit c259361

Browse files
feat(rust/sedona-geos): Implement ST_LineMerge() (#503)
Co-authored-by: Peter Nguyen <petern0408@gmail.com>
1 parent edcb909 commit c259361

7 files changed

Lines changed: 310 additions & 0 deletions

File tree

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ mod st_issimple;
3333
mod st_isvalid;
3434
mod st_isvalidreason;
3535
mod st_length;
36+
mod st_line_merge;
3637
mod st_makevalid;
3738
mod st_minimumclearance;
3839
mod st_minimumclearance_line;

c/sedona-geos/src/register.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::{
3131
st_isvalid::st_is_valid_impl,
3232
st_isvalidreason::st_is_valid_reason_impl,
3333
st_length::st_length_impl,
34+
st_line_merge::st_line_merge_impl,
3435
st_makevalid::st_make_valid_impl,
3536
st_minimumclearance::st_minimum_clearance_impl,
3637
st_minimumclearance_line::st_minimum_clearance_line_impl,
@@ -81,6 +82,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
8182
("st_isvalid", st_is_valid_impl()),
8283
("st_isvalidreason", st_is_valid_reason_impl()),
8384
("st_length", st_length_impl()),
85+
("st_linemerge", st_line_merge_impl()),
8486
("st_numinteriorrings", st_num_interior_rings_impl()),
8587
("st_numpoints", st_num_points_impl()),
8688
("st_nrings", st_nrings_impl()),

c/sedona-geos/src/st_line_merge.rs

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use arrow_array::builder::BinaryBuilder;
21+
use datafusion_common::{error::Result, DataFusionError, ScalarValue};
22+
use datafusion_expr::ColumnarValue;
23+
use geos::Geom;
24+
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
25+
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
26+
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};
27+
28+
use crate::executor::GeosExecutor;
29+
use crate::geos_to_wkb::write_geos_geometry;
30+
31+
pub fn st_line_merge_impl() -> ScalarKernelRef {
32+
Arc::new(STLineMerge {})
33+
}
34+
35+
#[derive(Debug)]
36+
struct STLineMerge {}
37+
38+
impl SedonaScalarKernel for STLineMerge {
39+
fn return_type(
40+
&self,
41+
args: &[sedona_schema::datatypes::SedonaType],
42+
) -> datafusion_common::Result<Option<sedona_schema::datatypes::SedonaType>> {
43+
let matcher = ArgMatcher::new(
44+
vec![
45+
ArgMatcher::is_geometry(),
46+
ArgMatcher::optional(ArgMatcher::is_boolean()),
47+
],
48+
WKB_GEOMETRY,
49+
);
50+
matcher.match_args(args)
51+
}
52+
53+
fn invoke_batch(
54+
&self,
55+
arg_types: &[sedona_schema::datatypes::SedonaType],
56+
args: &[datafusion_expr::ColumnarValue],
57+
) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
58+
let executor = GeosExecutor::new(arg_types, args);
59+
let mut builder = BinaryBuilder::with_capacity(
60+
executor.num_iterations(),
61+
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
62+
);
63+
64+
let directed = match args.get(1) {
65+
Some(ColumnarValue::Scalar(ScalarValue::Boolean(Some(opt_bool)))) => *opt_bool,
66+
_ => false,
67+
};
68+
69+
executor.execute_wkb_void(|maybe_wkb| {
70+
match maybe_wkb {
71+
Some(wkb) => {
72+
invoke_scalar(&wkb, &mut builder, directed)?;
73+
builder.append_value([]);
74+
}
75+
None => builder.append_null(),
76+
}
77+
78+
Ok(())
79+
})?;
80+
81+
executor.finish(Arc::new(builder.finish()))
82+
}
83+
}
84+
85+
fn invoke_scalar(
86+
geos_geom: &geos::Geometry,
87+
writer: &mut impl std::io::Write,
88+
directed: bool,
89+
) -> Result<()> {
90+
// PostGIS seems to return the original geometry if it is empty
91+
let is_empty = geos_geom.is_empty().map_err(|e| {
92+
DataFusionError::Execution(format!("Failed to check if the geometry is empty: {e}"))
93+
})?;
94+
if is_empty {
95+
write_geos_geometry(geos_geom, writer)?;
96+
return Ok(());
97+
}
98+
99+
let result = if directed {
100+
geos_geom.line_merge_directed()
101+
} else {
102+
geos_geom.line_merge()
103+
};
104+
105+
let geom =
106+
result.map_err(|e| DataFusionError::Execution(format!("Failed to merge lines: {e}")))?;
107+
108+
write_geos_geometry(&geom, writer)?;
109+
110+
Ok(())
111+
}
112+
113+
#[cfg(test)]
114+
mod tests {
115+
use arrow_array::ArrayRef;
116+
use datafusion_common::ScalarValue;
117+
use rstest::rstest;
118+
use sedona_expr::scalar_udf::SedonaScalarUDF;
119+
use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
120+
use sedona_testing::create::create_array;
121+
use sedona_testing::testers::ScalarUdfTester;
122+
123+
use super::*;
124+
125+
#[rstest]
126+
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
127+
use arrow_schema::DataType;
128+
129+
let udf = SedonaScalarUDF::from_impl("st_linemerge", st_line_merge_impl());
130+
let tester = ScalarUdfTester::new(
131+
udf.into(),
132+
vec![sedona_type, SedonaType::Arrow(DataType::Boolean)],
133+
);
134+
tester.assert_return_type(WKB_GEOMETRY);
135+
136+
let input = vec![
137+
Some("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))"),
138+
Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), // opposite direction
139+
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), // doesn't touch
140+
];
141+
142+
let expected: ArrayRef = create_array(
143+
&[
144+
Some("LINESTRING (0 0, 1 0, 1 1)"),
145+
Some("LINESTRING (0 0, 1 0, 1 1)"),
146+
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"),
147+
],
148+
&WKB_GEOMETRY,
149+
);
150+
151+
assert_eq!(
152+
&tester
153+
.invoke_wkb_array_scalar(input.clone(), false)
154+
.unwrap(),
155+
&expected
156+
);
157+
158+
// If directed is true, lines with opposite directions won't be merged
159+
160+
let expected_directed: ArrayRef = create_array(
161+
&[
162+
Some("LINESTRING (0 0, 1 0, 1 1)"),
163+
Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"),
164+
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"),
165+
],
166+
&WKB_GEOMETRY,
167+
);
168+
169+
assert_eq!(
170+
&tester.invoke_wkb_array_scalar(input, true).unwrap(),
171+
&expected_directed
172+
);
173+
174+
// handle NULL
175+
176+
let result = tester
177+
.invoke_scalar_scalar(ScalarValue::Null, false)
178+
.unwrap();
179+
assert!(result.is_null());
180+
}
181+
}

python/sedonadb/tests/functions/test_functions.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,68 @@ def test_st_isring_non_linestring_error(eng, geom):
17971797
eng.assert_query_result(f"SELECT ST_IsRing(ST_GeomFromText('{geom}'))", None)
17981798

17991799

1800+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
1801+
@pytest.mark.parametrize(
1802+
("geom", "expected"),
1803+
[
1804+
(None, None),
1805+
("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"),
1806+
# opposite direction
1807+
(
1808+
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
1809+
"LINESTRING (0 0, 1 0, 1 1)",
1810+
),
1811+
# non-touching
1812+
(
1813+
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
1814+
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
1815+
),
1816+
# empty cases
1817+
("POINT EMPTY", "POINT (nan nan)"),
1818+
("LINESTRING EMPTY", "LINESTRING EMPTY"),
1819+
("POLYGON EMPTY", "POLYGON EMPTY"),
1820+
("MULTIPOINT EMPTY", "MULTIPOINT EMPTY"),
1821+
("MULTILINESTRING EMPTY", "MULTILINESTRING EMPTY"),
1822+
("MULTIPOLYGON EMPTY", "MULTIPOLYGON EMPTY"),
1823+
("GEOMETRYCOLLECTION EMPTY", "GEOMETRYCOLLECTION EMPTY"),
1824+
# Note that the behaviour on non-multilinestring geometry is not documented.
1825+
# But, we test such cases here as well to detect if there's any difference.
1826+
("POINT (0 0)", "GEOMETRYCOLLECTION EMPTY"),
1827+
("LINESTRING (0 0, 1 0)", "LINESTRING (0 0, 1 0)"),
1828+
("POLYGON ((0 0, 0 1, 1 0, 0 0))", "LINESTRING (0 0, 0 1, 1 0, 0 0)"),
1829+
],
1830+
)
1831+
def test_st_linemerge(eng, geom, expected):
1832+
eng = eng.create_or_skip()
1833+
eng.assert_query_result(
1834+
f"SELECT ST_LineMerge({geom_or_null(geom)})",
1835+
expected,
1836+
)
1837+
1838+
1839+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
1840+
@pytest.mark.parametrize(
1841+
("geom", "expected"),
1842+
[
1843+
("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"),
1844+
(
1845+
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
1846+
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
1847+
),
1848+
(
1849+
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
1850+
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
1851+
),
1852+
],
1853+
)
1854+
def test_st_linemerge_directed(eng, geom, expected):
1855+
eng = eng.create_or_skip()
1856+
eng.assert_query_result(
1857+
f"SELECT ST_LineMerge({geom_or_null(geom)}, true)",
1858+
expected,
1859+
)
1860+
1861+
18001862
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
18011863
@pytest.mark.parametrize(
18021864
("geom", "expected"),

rust/sedona-functions/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub mod st_isclosed;
5050
mod st_iscollection;
5151
pub mod st_isempty;
5252
mod st_length;
53+
mod st_line_merge;
5354
mod st_makeline;
5455
mod st_numgeometries;
5556
mod st_perimeter;

rust/sedona-functions/src/register.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub fn default_function_set() -> FunctionSet {
5858
crate::predicates::st_knn_udf,
5959
crate::predicates::st_touches_udf,
6060
crate::predicates::st_within_udf,
61+
crate::st_line_merge::st_line_merge_udf,
6162
crate::referencing::st_line_interpolate_point_udf,
6263
crate::referencing::st_line_locate_point_udf,
6364
crate::sd_format::sd_format_udf,
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility};
18+
use sedona_expr::scalar_udf::SedonaScalarUDF;
19+
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};
20+
21+
/// ST_LineMerge() scalar UDF implementation
22+
///
23+
/// Stub function for line merging.
24+
pub fn st_line_merge_udf() -> SedonaScalarUDF {
25+
SedonaScalarUDF::new_stub(
26+
"st_linemerge",
27+
ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY),
28+
Volatility::Immutable,
29+
Some(st_line_merge_doc()),
30+
)
31+
}
32+
33+
fn st_line_merge_doc() -> Documentation {
34+
Documentation::builder(
35+
DOC_SECTION_OTHER,
36+
"Merge the line segments in a geometry",
37+
"ST_LineMerge (Geom: Geometry)",
38+
)
39+
.with_argument("geom", "geometry: Input geometry")
40+
.with_argument(
41+
"directed",
42+
"If true, lines with opposite directions will not be merged",
43+
)
44+
.with_sql_example(
45+
"SELECT ST_LineMerge(ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))'))",
46+
)
47+
.build()
48+
}
49+
50+
#[cfg(test)]
51+
mod tests {
52+
use datafusion_expr::ScalarUDF;
53+
54+
use super::*;
55+
56+
#[test]
57+
fn udf_metadata() {
58+
let udf: ScalarUDF = st_line_merge_udf().into();
59+
assert_eq!(udf.name(), "st_linemerge");
60+
assert!(udf.documentation().is_some())
61+
}
62+
}

0 commit comments

Comments
 (0)