Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions python/pyarrow/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from typing import (
Any,
Tuple,
)

from pyarrow.interchange.column import (
Expand Down Expand Up @@ -204,7 +205,9 @@ def column_to_array(
pa.Array
"""
buffers = col.get_buffers()
data = buffers_to_array(buffers, col.size(),
data_type = col.dtype
data = buffers_to_array(buffers, data_type,
col.size(),
col.describe_null,
col.offset,
allow_copy)
Expand Down Expand Up @@ -236,7 +239,9 @@ def bool_column_to_array(
)

buffers = col.get_buffers()
data = buffers_to_array(buffers, col.size(),
data_type = col.dtype
data = buffers_to_array(buffers, data_type,
col.size(),
col.describe_null,
col.offset)
data = pc.cast(data, pa.bool_())
Expand Down Expand Up @@ -274,11 +279,15 @@ def categorical_column_to_dictionary(
raise NotImplementedError(
"Non-dictionary categoricals not supported yet")

# We need to first convert the dictionary column
cat_column = categorical["categories"]
dictionary = column_to_array(cat_column)

# Then we need to convert the indices
# Here we need to use the buffer data type!
buffers = col.get_buffers()
indices = buffers_to_array(buffers, col.size(),
_, data_type = buffers["data"]
indices = buffers_to_array(buffers, data_type,
col.size(),
col.describe_null,
col.offset)

Expand Down Expand Up @@ -326,6 +335,7 @@ def map_date_type(data_type):

def buffers_to_array(
buffers: ColumnBuffers,
data_type: Tuple[DtypeKind, int, str, str],
length: int,
describe_null: ColumnNullType,
offset: int = 0,
Expand All @@ -339,6 +349,9 @@ def buffers_to_array(
buffer : ColumnBuffers
Dictionary containing tuples of underlying buffers and
their associated dtype.
data_type : Tuple[DtypeKind, int, str, str],
Dtype description of the column as a tuple ``(kind, bit-width, format string,
endianness)``.
length : int
The number of values in the array.
describe_null: ColumnNullType
Expand All @@ -360,7 +373,7 @@ def buffers_to_array(
is responsible for keeping the memory owner object alive as long as
the returned PyArrow array is being used.
"""
data_buff, data_type = buffers["data"]
data_buff, _ = buffers["data"]
try:
validity_buff, validity_dtype = buffers["validity"]
except TypeError:
Expand Down