From 00c5307b7239e3c1299635a1ce9dff7f7591032a Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Wed, 31 May 2023 21:31:52 -0700 Subject: [PATCH 1/5] Add documentation for list arrays' values property --- python/pyarrow/array.pxi | 102 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 49ae9ceb36a..53d15a4d800 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1914,7 +1914,7 @@ cdef class BaseListArray(Array): The returned Array is logically a concatenation of all the sub-lists in this Array. - Note that this method is different from ``self.values()`` in that + Note that this method is different from ``self.values`` in that it takes care of the slicing offset as well as null elements backed by non-empty sub-lists. @@ -2053,6 +2053,38 @@ cdef class ListArray(BaseListArray): @property def values(self): + """ + Return the underlying array of values which backs the ListArray. + + This is logically a concatenation of all the sub-lists in this array. + + Note even null values are included. If any of the list slots + are null, but are backed by a non-empty sub-list, those values + will be included in the output. + + Compare with ``flatten``, which returns only the non-null + values. + + Returns + ------- + values : Array + + Examples + -------- + >>> import pyarrow as pa + >>> array = pa.array([[1, 2], None, [3, 4, None, 6]]) + >>> array.values + + [ + 1, + 2, + 3, + 4, + null, + 6 + ] + + """ cdef CListArray* arr = self.ap return pyarrow_wrap_array(arr.values()) @@ -2140,6 +2172,40 @@ cdef class LargeListArray(BaseListArray): @property def values(self): + """ + Return the underlying array of values which backs the LargeListArray. + + This is logically a concatenation of all the sub-lists in this array. + + Note even null values are included. If any of the list slots + are null, but are backed by a non-empty sub-list, those values + will be included in the output. + + Compare with ``flatten``, which returns only the non-null + values. + + Returns + ------- + values : Array + + Examples + -------- + >>> import pyarrow as pa + >>> array = pa.array( + ... [[1, 2], None, [3, 4, None, 6]], + ... type=pa.large_list(pa.int32()), + ... ) + >>> array.values + + [ + 1, + 2, + 3, + 4, + null, + 6 + ] + """ cdef CLargeListArray* arr = self.ap return pyarrow_wrap_array(arr.values()) @@ -2296,6 +2362,40 @@ cdef class FixedSizeListArray(BaseListArray): @property def values(self): + """ + Return the underlying array of values which backs the + FixedSizeListArray. + + This is logically a concatenation of all the sub-lists in this array. + + Note even null values are included. + + Compare with ``flatten``, which returns only the non-null + sub-list values. + + Returns + ------- + values : Array + + Examples + -------- + >>> import pyarrow as pa + >>> array = pa.array( + ... [[1, 2], None, [3, None]], + ... type=pa.list_(pa.int32(), 2) + ... ) + >>> array.values + + [ + 1, + 2, + null, + null, + 3, + null + ] + + """ cdef CFixedSizeListArray* arr = self.ap return pyarrow_wrap_array(arr.values()) From da414da1774fa898bf166adb907fa1d1d2fece50 Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Thu, 1 Jun 2023 06:12:32 -0700 Subject: [PATCH 2/5] Use ellipsis in docs instead of memory addresses --- python/pyarrow/array.pxi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 53d15a4d800..f0718f1eb2d 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2196,7 +2196,7 @@ cdef class LargeListArray(BaseListArray): ... type=pa.large_list(pa.int32()), ... ) >>> array.values - + [ 1, 2, @@ -2385,7 +2385,7 @@ cdef class FixedSizeListArray(BaseListArray): ... type=pa.list_(pa.int32(), 2) ... ) >>> array.values - + [ 1, 2, From c3786ec82703d6be5bfe8c3986c5bb5cc4772469 Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Tue, 6 Jun 2023 10:13:39 -0700 Subject: [PATCH 3/5] Improve clarity of comments for ListArray.values and friends Co-authored-by: Alenka Frim Co-authored-by: Joris Van den Bossche --- python/pyarrow/array.pxi | 42 ++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index f0718f1eb2d..b4d2d771dbc 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2054,21 +2054,26 @@ cdef class ListArray(BaseListArray): @property def values(self): """ - Return the underlying array of values which backs the ListArray. + Return the underlying array of values which backs the ListArray + ignoring the array's offset. This is logically a concatenation of all the sub-lists in this array. - Note even null values are included. If any of the list slots - are null, but are backed by a non-empty sub-list, those values - will be included in the output. + If any of the list elements are null, but are backed by a + non-empty sub-list, those elements will be included in the + output. - Compare with ``flatten``, which returns only the non-null - values. + Compare with :meth:`flatten`, which returns only the non-null + values taking into consideration the array's offset. Returns ------- values : Array + See Also + -------- + ListArray.flatten : ... + Examples -------- >>> import pyarrow as pa @@ -2173,21 +2178,26 @@ cdef class LargeListArray(BaseListArray): @property def values(self): """ - Return the underlying array of values which backs the LargeListArray. + Return the underlying array of values which backs the LargeListArray + ignoring the array's offset. This is logically a concatenation of all the sub-lists in this array. - Note even null values are included. If any of the list slots - are null, but are backed by a non-empty sub-list, those values - will be included in the output. + If any of the list elements are null, but are backed by a + non-empty sub-list, those elements will be included in the + output. - Compare with ``flatten``, which returns only the non-null - values. + Compare with :meth:`flatten`, which returns only the non-null + values taking into consideration the array's offset. Returns ------- values : Array + See Also + -------- + LargeListArray.flatten : ... + Examples -------- >>> import pyarrow as pa @@ -2368,15 +2378,19 @@ cdef class FixedSizeListArray(BaseListArray): This is logically a concatenation of all the sub-lists in this array. - Note even null values are included. + Note even null elements are included. - Compare with ``flatten``, which returns only the non-null + Compare with :meth:`flatten`, which returns only the non-null sub-list values. Returns ------- values : Array + See Also + -------- + FixedSizeListArray.flatten : ... + Examples -------- >>> import pyarrow as pa From 847c3316b56549d52228fccc509171156b18e446 Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Tue, 6 Jun 2023 10:28:12 -0700 Subject: [PATCH 4/5] Add examples of using .values on list slices --- python/pyarrow/array.pxi | 62 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index b4d2d771dbc..362dcb7b55f 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2076,6 +2076,9 @@ cdef class ListArray(BaseListArray): Examples -------- + + The values include null elements from sub-lists: + >>> import pyarrow as pa >>> array = pa.array([[1, 2], None, [3, 4, None, 6]]) >>> array.values @@ -2089,6 +2092,34 @@ cdef class ListArray(BaseListArray): 6 ] + If an array is sliced, the slice still uses the same + underlying data as the original array, just with an + offset. Since values ignores the offset, the values are the + same: + + >>> sliced = array.slice(1, 2) + >>> sliced + + [ + null, + [ + 3, + 4, + null, + 6 + ] + ] + >>> sliced.values + + [ + 1, + 2, + 3, + 4, + null, + 6 + ] + """ cdef CListArray* arr = self.ap return pyarrow_wrap_array(arr.values()) @@ -2200,6 +2231,9 @@ cdef class LargeListArray(BaseListArray): Examples -------- + + The values include null elements from the sub-lists: + >>> import pyarrow as pa >>> array = pa.array( ... [[1, 2], None, [3, 4, None, 6]], @@ -2215,6 +2249,34 @@ cdef class LargeListArray(BaseListArray): null, 6 ] + + If an array is sliced, the slice still uses the same + underlying data as the original array, just with an + offset. Since values ignores the offset, the values are the + same: + + >>> sliced = array.slice(1, 2) + >>> sliced + + [ + null, + [ + 3, + 4, + null, + 6 + ] + ] + >>> sliced.values + + [ + 1, + 2, + 3, + 4, + null, + 6 + ] """ cdef CLargeListArray* arr = self.ap return pyarrow_wrap_array(arr.values()) From b6c12f23c55b34bab84e0890ca7118975942e496 Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Wed, 23 Aug 2023 09:18:09 -0700 Subject: [PATCH 5/5] Remove misleading line from ListArray.values docstring --- python/pyarrow/array.pxi | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 362dcb7b55f..9914ac37a50 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2057,8 +2057,6 @@ cdef class ListArray(BaseListArray): Return the underlying array of values which backs the ListArray ignoring the array's offset. - This is logically a concatenation of all the sub-lists in this array. - If any of the list elements are null, but are backed by a non-empty sub-list, those elements will be included in the output. @@ -2212,8 +2210,6 @@ cdef class LargeListArray(BaseListArray): Return the underlying array of values which backs the LargeListArray ignoring the array's offset. - This is logically a concatenation of all the sub-lists in this array. - If any of the list elements are null, but are backed by a non-empty sub-list, those elements will be included in the output. @@ -2438,8 +2434,6 @@ cdef class FixedSizeListArray(BaseListArray): Return the underlying array of values which backs the FixedSizeListArray. - This is logically a concatenation of all the sub-lists in this array. - Note even null elements are included. Compare with :meth:`flatten`, which returns only the non-null