Skip to content

Commit 17555b4

Browse files
authored
feat: Allow passing GeoArrow arrays and chunked arrays directly to layer constructors (#828)
### Change list - Implement `total_bounds` and `weighted_centroid` for `geoarrow.box` arrays - Always add positional row index when the user passes in a raw array or chunked array - Convert a geoarrow box column to a geoarrow polygon column in the table trait validation directly. - Add new tests for passing `geoarrow.box` array and chunked array into `PolygonLayer` constructor. Closes #823
1 parent 70e0ef2 commit 17555b4

File tree

10 files changed

+160
-21
lines changed

10 files changed

+160
-21
lines changed

lonboard/_geoarrow/ops/bbox.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from dataclasses import dataclass
77

88
import numpy as np
9-
from arro3.core import Array, ChunkedArray, DataType, Field, list_flatten
9+
from arro3.core import Array, ChunkedArray, DataType, Field, list_flatten, struct_field
1010

1111
from lonboard._constants import EXTENSION_NAME
1212

@@ -44,6 +44,9 @@ def total_bounds(field: Field, column: ChunkedArray) -> Bbox:
4444
if extension_type_name == EXTENSION_NAME.MULTIPOLYGON:
4545
return _total_bounds_nest_3(column)
4646

47+
if extension_type_name == EXTENSION_NAME.BOX:
48+
return _total_bounds_box(column)
49+
4750
assert False
4851

4952

@@ -91,3 +94,30 @@ def _total_bounds_nest_3(column: ChunkedArray) -> Bbox:
9194
bbox.update(_coords_bbox(coords))
9295

9396
return bbox
97+
98+
99+
def _total_bounds_box(column: ChunkedArray) -> Bbox:
100+
"""Compute the total bounds of a geoarrow.box column."""
101+
bbox = Bbox()
102+
for chunk in column.chunks:
103+
is_2d = len(chunk.field.type.fields) == 4
104+
is_3d = len(chunk.field.type.fields) == 6
105+
106+
if is_2d:
107+
minx = np.min(struct_field(chunk, 0))
108+
miny = np.min(struct_field(chunk, 1))
109+
maxx = np.max(struct_field(chunk, 2))
110+
maxy = np.max(struct_field(chunk, 3))
111+
elif is_3d:
112+
minx = np.min(struct_field(chunk, 0))
113+
miny = np.min(struct_field(chunk, 1))
114+
maxx = np.max(struct_field(chunk, 3))
115+
maxy = np.max(struct_field(chunk, 4))
116+
else:
117+
raise ValueError(
118+
f"Unexpected box type with {len(chunk.field.type.fields)} fields.\n"
119+
"Only 2D and 3D boxes are supported.",
120+
)
121+
bbox.update(Bbox(minx=minx, miny=miny, maxx=maxx, maxy=maxy))
122+
123+
return bbox

lonboard/_geoarrow/ops/centroid.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from dataclasses import dataclass
66

77
import numpy as np
8-
from arro3.core import Array, ChunkedArray, DataType, Field, list_flatten
8+
from arro3.core import Array, ChunkedArray, DataType, Field, list_flatten, struct_field
99

1010
from lonboard._constants import EXTENSION_NAME
1111

@@ -109,6 +109,9 @@ def weighted_centroid(field: Field, column: ChunkedArray) -> WeightedCentroid:
109109
if extension_type_name == EXTENSION_NAME.MULTIPOLYGON:
110110
return _weighted_centroid_nest_3(column)
111111

112+
if extension_type_name == EXTENSION_NAME.BOX:
113+
return _weighted_centroid_box(column)
114+
112115
assert False
113116

114117

@@ -146,3 +149,34 @@ def _weighted_centroid_nest_3(column: ChunkedArray) -> WeightedCentroid:
146149
centroid.update_coords(coords)
147150

148151
return centroid
152+
153+
154+
def _weighted_centroid_box(column: ChunkedArray) -> WeightedCentroid:
155+
"""Compute the weighted centroid of a box geometry."""
156+
centroid = WeightedCentroid()
157+
for chunk in column.chunks:
158+
is_2d = len(chunk.field.type.fields) == 4
159+
is_3d = len(chunk.field.type.fields) == 6
160+
161+
if is_2d:
162+
minx = struct_field(chunk, 0)
163+
miny = struct_field(chunk, 1)
164+
maxx = struct_field(chunk, 2)
165+
maxy = struct_field(chunk, 3)
166+
elif is_3d:
167+
minx = struct_field(chunk, 0)
168+
miny = struct_field(chunk, 1)
169+
maxx = struct_field(chunk, 3)
170+
maxy = struct_field(chunk, 4)
171+
else:
172+
raise ValueError(
173+
f"Unexpected box type with {len(chunk.field.type.fields)} fields.\n"
174+
"Only 2D and 3D boxes are supported.",
175+
)
176+
177+
meanx = float((np.mean(minx) + np.mean(maxx)) / 2)
178+
meany = float((np.mean(miny) + np.mean(maxy)) / 2)
179+
180+
centroid.update(WeightedCentroid(x=meanx, y=meany, num_items=len(chunk)))
181+
182+
return centroid

lonboard/_geoarrow/ops/coord_layout.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def make_geometry_interleaved(
3333
geom_field = table.schema.field(geom_col_idx)
3434
geom_column = table.column(geom_col_idx)
3535

36+
# The GeoArrow box extension type is only struct, not interleaved. It will be
37+
# converted to an interleaved polygon separately, if needed.
38+
if geom_field.metadata.get(b"ARROW:extension:name") == EXTENSION_NAME.BOX:
39+
return table
40+
3641
new_field, new_column = transpose_column(field=geom_field, column=geom_column)
3742
return table.set_column(geom_col_idx, new_field, new_column)
3843

lonboard/_geoarrow/row_index.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import numpy as np
2+
from arro3.core import Array, ChunkedArray, Table
3+
4+
5+
def add_positional_row_index(
6+
table: Table,
7+
) -> Table:
8+
num_rows = table.num_rows
9+
if num_rows <= np.iinfo(np.uint8).max:
10+
arange_col = Array(np.arange(num_rows, dtype=np.uint8))
11+
elif num_rows <= np.iinfo(np.uint16).max:
12+
arange_col = Array(np.arange(num_rows, dtype=np.uint16))
13+
elif num_rows <= np.iinfo(np.uint32).max:
14+
arange_col = Array(np.arange(num_rows, dtype=np.uint32))
15+
else:
16+
arange_col = Array(np.arange(num_rows, dtype=np.uint64))
17+
18+
return table.append_column("row_index", ChunkedArray([arange_col]))

lonboard/_layer.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,19 @@
2020
import ipywidgets
2121
import traitlets
2222
import traitlets as t
23-
from arro3.core import Table
23+
from arro3.core import ChunkedArray, Schema, Table
2424

2525
from lonboard._base import BaseExtension, BaseWidget
2626
from lonboard._constants import EXTENSION_NAME, OGC_84
2727
from lonboard._geoarrow._duckdb import from_duckdb as _from_duckdb
28-
from lonboard._geoarrow.box_to_polygon import parse_box_encoded_table
28+
from lonboard._geoarrow.c_stream_import import import_arrow_c_stream
2929
from lonboard._geoarrow.geopandas_interop import geopandas_to_geoarrow
3030
from lonboard._geoarrow.ops import reproject_table
3131
from lonboard._geoarrow.ops.bbox import Bbox, total_bounds
3232
from lonboard._geoarrow.ops.centroid import WeightedCentroid, weighted_centroid
3333
from lonboard._geoarrow.ops.coord_layout import make_geometry_interleaved
3434
from lonboard._geoarrow.parse_wkb import parse_serialized_table
35+
from lonboard._geoarrow.row_index import add_positional_row_index
3536
from lonboard._serialization import infer_rows_per_chunk
3637
from lonboard._utils import auto_downcast as _auto_downcast
3738
from lonboard._utils import get_geometry_column_index, remove_extension_kwargs
@@ -361,6 +362,16 @@ def __init__(
361362
A Layer with the initialized data.
362363
363364
"""
365+
imported_stream = import_arrow_c_stream(table)
366+
if isinstance(imported_stream, Table):
367+
table_o3 = imported_stream
368+
else:
369+
assert isinstance(imported_stream, ChunkedArray)
370+
field = imported_stream.field.with_name("geometry")
371+
schema = Schema([field])
372+
table = Table.from_arrays([imported_stream], schema=schema)
373+
table = add_positional_row_index(table)
374+
364375
table_o3 = Table.from_arrow(table)
365376
parsed_tables = parse_serialized_table(table_o3)
366377
assert len(parsed_tables) == 1, (
@@ -1067,7 +1078,6 @@ def __init__(
10671078
_rows_per_chunk: int | None = None,
10681079
**kwargs: Unpack[PolygonLayerKwargs],
10691080
) -> None:
1070-
table = parse_box_encoded_table(Table.from_arrow(table))
10711081
super().__init__(table=table, _rows_per_chunk=_rows_per_chunk, **kwargs)
10721082

10731083
@classmethod
@@ -1857,7 +1867,6 @@ def __init__(
18571867
_rows_per_chunk: int | None = None,
18581868
**kwargs: Unpack[SolidPolygonLayerKwargs],
18591869
) -> None:
1860-
table = parse_box_encoded_table(Table.from_arrow(table))
18611870
super().__init__(table=table, _rows_per_chunk=_rows_per_chunk, **kwargs)
18621871

18631872
@classmethod

lonboard/_viz.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from lonboard._geoarrow.extension_types import construct_geometry_array
2525
from lonboard._geoarrow.geopandas_interop import geopandas_to_geoarrow
2626
from lonboard._geoarrow.parse_wkb import parse_serialized_table
27+
from lonboard._geoarrow.row_index import add_positional_row_index
2728
from lonboard._layer import PathLayer, PolygonLayer, ScatterplotLayer
2829
from lonboard._map import Map
2930
from lonboard._utils import (
@@ -447,18 +448,7 @@ def _viz_geoarrow_chunked_array(
447448
field = ca.field.with_name("geometry")
448449
schema = Schema([field])
449450
table = Table.from_arrays([ca], schema=schema)
450-
451-
num_rows = len(ca)
452-
if num_rows <= np.iinfo(np.uint8).max:
453-
arange_col = Array(np.arange(num_rows, dtype=np.uint8))
454-
elif num_rows <= np.iinfo(np.uint16).max:
455-
arange_col = Array(np.arange(num_rows, dtype=np.uint16))
456-
elif num_rows <= np.iinfo(np.uint32).max:
457-
arange_col = Array(np.arange(num_rows, dtype=np.uint32))
458-
else:
459-
arange_col = Array(np.arange(num_rows, dtype=np.uint64))
460-
461-
table = table.append_column("row_index", ChunkedArray([arange_col]))
451+
table = add_positional_row_index(table)
462452
return _viz_geoarrow_table(table, **kwargs)
463453

464454

lonboard/traits.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
from traitlets import TraitError, Undefined
2828
from traitlets.utils.descriptions import class_of, describe
2929

30+
from lonboard._constants import EXTENSION_NAME
31+
from lonboard._geoarrow.box_to_polygon import parse_box_encoded_table
3032
from lonboard._serialization import (
3133
ACCESSOR_SERIALIZATION,
3234
TABLE_SERIALIZATION,
@@ -42,7 +44,6 @@
4244
from traitlets.traitlets import TraitType
4345
from traitlets.utils.sentinel import Sentinel
4446

45-
from lonboard._constants import EXTENSION_NAME
4647
from lonboard._layer import BaseArrowLayer
4748

4849
DEFAULT_INITIAL_VIEW_STATE = {
@@ -198,6 +199,11 @@ def validate(self, obj: BaseArrowLayer, value: Any) -> Table:
198199

199200
# No restriction on the allowed geometry types in this table
200201
if allowed_geometry_types:
202+
# If we allow polygons as input, then we also allow geoarrow.box.
203+
# Convert boxes to Polygons
204+
if EXTENSION_NAME.POLYGON in allowed_geometry_types:
205+
value = parse_box_encoded_table(value)
206+
201207
geometry_extension_type = value.schema.field(geom_col_idx).metadata.get(
202208
b"ARROW:extension:name",
203209
)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ dev = [
8888
"pytest>=8.3.4",
8989
"ruff>=0.12.0",
9090
"sidecar>=0.7.0",
91+
"types-geopandas>=1.1.1.20250708",
9192
"types-shapely>=2.1.0.20250512",
9293
]
9394
# Note: this is defined as a separate group so that it can be not installed in

tests/test_box.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import geoarrow.pyarrow as ga
2-
from arro3.core import Table
2+
from arro3.core import ChunkedArray, Table
33

4-
from lonboard import PolygonLayer, SolidPolygonLayer, viz
4+
from lonboard import Map, PolygonLayer, SolidPolygonLayer, viz
55

66

77
def test_viz_box():
@@ -17,6 +17,31 @@ def test_viz_box():
1717
assert isinstance(m.layers[0], PolygonLayer)
1818

1919

20+
def test_box_polygon_layer():
21+
arr = ga.box(
22+
[
23+
"LINESTRING (0 10, 34 -1)",
24+
"LINESTRING (10 20, 44 -10)",
25+
"LINESTRING (20 40, 54 5)",
26+
],
27+
)
28+
layer = PolygonLayer(arr)
29+
_m = Map(layer)
30+
31+
32+
def test_box_polygon_layer_chunked_array():
33+
arr = ga.box(
34+
[
35+
"LINESTRING (0 10, 34 -1)",
36+
"LINESTRING (10 20, 44 -10)",
37+
"LINESTRING (20 40, 54 5)",
38+
],
39+
)
40+
ca = ChunkedArray([arr])
41+
layer = PolygonLayer(ca)
42+
_m = Map(layer)
43+
44+
2045
def test_viz_box_polygon_layer():
2146
arr = ga.box(
2247
[

uv.lock

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)