From d86d427ecc332e205872ce15702f260d39c75fdb Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Mon, 9 Feb 2026 20:54:52 +0100
Subject: [PATCH] Add type stubs for core data structures

Add type annotation stubs for array, table, tensor, builder, memory,
device, config, and types modules. Includes type-ignore annotations in related tests.
---
 python/pyarrow-stubs/pyarrow/array.pyi       | 894 +++++++++++++++++++
 python/pyarrow-stubs/pyarrow/builder.pyi     |  51 ++
 python/pyarrow-stubs/pyarrow/config.pyi      |  72 ++
 python/pyarrow-stubs/pyarrow/device.pyi      |  66 ++
 python/pyarrow-stubs/pyarrow/memory.pyi      |  94 ++
 python/pyarrow-stubs/pyarrow/table.pyi       | 686 ++++++++++++++
 python/pyarrow-stubs/pyarrow/tensor.pyi      | 268 ++++++
 python/pyarrow-stubs/pyarrow/types.pyi       | 227 +++++
 python/pyarrow/array.pxi                     |   2 +-
 python/pyarrow/scalar.pxi                    |   2 +-
 python/pyarrow/tests/test_array.py           |  71 +-
 python/pyarrow/tests/test_convert_builtin.py |  42 +-
 python/pyarrow/tests/test_device.py          |  12 +-
 python/pyarrow/tests/test_schema.py          |  11 +-
 python/pyarrow/tests/test_sparse_tensor.py   |  20 +-
 python/pyarrow/tests/test_table.py           | 120 ++-
 python/pyarrow/tests/test_tensor.py          |   2 +-
 python/pyarrow/tests/test_types.py           |  55 +-
 18 files changed, 2561 insertions(+), 134 deletions(-)
 create mode 100644 python/pyarrow-stubs/pyarrow/array.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/builder.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/config.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/device.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/memory.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/table.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/tensor.pyi
 create mode 100644 python/pyarrow-stubs/pyarrow/types.pyi

diff --git a/python/pyarrow-stubs/pyarrow/array.pyi b/python/pyarrow-stubs/pyarrow/array.pyi
new file mode 100644
index 00000000000..547e9c949d5
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/array.pyi
@@ -0,0 +1,894 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from collections.abc import Iterable, Iterator, Sequence
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import (
+    Any,
+    Generic,
+    Literal,
+    TypeVar,
+)
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._compute import CastOptions
+from pyarrow._stubs_typing import (
+    ArrayLike,
+    Indices,
+    Mask,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportPyArrowArray,
+)
+from pyarrow.lib import (
+    Buffer,
+    Device,
+    MemoryManager,
+    MemoryPool,
+    Tensor,
+    _Weakrefable,
+)
+from typing_extensions import deprecated
+import builtins
+
+from .scalar import (  # noqa: F401
+    BinaryScalar,
+    BinaryViewScalar,
+    BooleanScalar,
+    Date32Scalar,
+    Date64Scalar,
+    DictionaryScalar,
+    DoubleScalar,
+    DurationScalar,
+    ExtensionScalar,
+    FixedSizeBinaryScalar,
+    FixedSizeListScalar,
+    FloatScalar,
+    HalfFloatScalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    Int8Scalar,
+    LargeBinaryScalar,
+    LargeListScalar,
+    LargeStringScalar,
+    ListScalar,
+    ListViewScalar,
+    MapScalar,
+    MonthDayNanoIntervalScalar,
+    NullScalar,
+    RunEndEncodedScalar,
+    Scalar,
+    StringScalar,
+    StringViewScalar,
+    StructScalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    UInt8Scalar,
+    UnionScalar,
+)
+from .device import DeviceAllocationType
+from ._types import (  # noqa: F401
+    BaseExtensionType,
+    BinaryType,
+    DataType,
+    Field,
+    Float64Type,
+    Int64Type,
+    MapType,
+    StringType,
+    StructType,
+    _AsPyType,
+    _BasicDataType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _RunEndType,
+    _Size,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+)
+from ._stubs_typing import NullableCollection
+
+
+def array(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray
+    | SupportArrowDeviceArray | SupportPyArrowArray,
+    type: Any | None = None,
+    mask: Mask | pd.Series[bool] | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def asarray(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray
+    | SupportArrowDeviceArray,
+    type: _DataTypeT | Any | None = None,
+) -> Array[Scalar[_DataTypeT]] | ArrayLike: ...
+
+
+def nulls(
+    size: int,
+    type: Any | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def repeat(
+    value: Any,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def infer_type(values: Iterable[Any], mask: Mask | None = None,
+               from_pandas: bool = False) -> DataType: ...
+
+
+class ArrayStatistics(_Weakrefable):
+    @property
+    def null_count(self) -> int | None: ...
+
+    @property
+    def distinct_count(self) -> int | None: ...
+
+    @property
+    def is_null_count_exact(self) -> bool | None: ...
+
+    @property
+    def is_distinct_count_exact(self) -> bool | None: ...
+
+    @property
+    def min(self) -> Any | None: ...
+
+    @property
+    def is_min_exact(self) -> bool | None: ...
+
+    @property
+    def max(self) -> Any | None: ...
+
+    @property
+    def is_max_exact(self) -> bool | None: ...
+
+
+_ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
+
+
+class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
+    def to_pandas(
+        self,
+        memory_pool: MemoryPool | None = None,
+        categories: list | tuple | None = None,
+        strings_to_categorical: bool = False,
+        zero_copy_only: bool = False,
+        integer_object_nulls: bool = False,
+        date_as_object: bool = True,
+        timestamp_as_object: bool = False,
+        use_threads: bool = True,
+        deduplicate_objects: bool = True,
+        ignore_metadata: bool = False,
+        safe: bool = True,
+        split_blocks: bool = False,
+        self_destruct: bool = False,
+        maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
+        types_mapper: Any = None,  # Callable[[DataType], ExtensionDtype | None] | None
+        coerce_temporal_nanoseconds: bool = False,
+    ) -> _ConvertAs: ...
+
+
+_CastAs = TypeVar("_CastAs", bound=DataType)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+
+
+class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    def as_py(self) -> list[Any]: ...
+
+    def diff(self, other: Self) -> str: ...
+
+    # Private attribute used internally (e.g., for column names in batches)
+    _name: str | None
+
+    def cast(
+        self,
+        target_type: _CastAs | str,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_CastAs]]: ...
+
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]: ...
+
+    def sum(self, **kwargs) -> _Scalar_co: ...
+
+    @property
+    def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+    def unique(self) -> Self: ...
+
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        type: _DataTypeT | None = None,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]: ...
+
+    @staticmethod
+    def from_buffers(
+        type: _DataTypeT,
+        length: int,
+        buffers: Sequence[Buffer | None],
+        null_count: int = -1,
+        offset=0,
+        children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
+    ) -> Array[Scalar[_DataTypeT]]: ...
+
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
+    def to_string(
+        self,
+        *,
+        indent: int = 2,
+        top_level_indent: int = 0,
+        window: int = 10,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def equals(self, other: Array | Any) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray: ...
+
+    def is_nan(self) -> BooleanArray: ...
+
+    def is_valid(self) -> BooleanArray: ...
+
+    def fill_null(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]: ...
+
+    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def filter(
+        self,
+        mask: Mask,
+        *,
+        null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    ) -> Self: ...
+
+    def index(
+        self: Array[_ScalarT] | Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _ScalarT | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def to_numpy(self, zero_copy_only: bool = True,
+                 writable: bool = False) -> np.ndarray: ...
+
+    def to_pylist(
+        self,
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[Any]: ...
+
+    tolist = to_pylist
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def offset(self) -> int: ...
+
+    def buffers(self) -> list[Buffer | None]: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def __dlpack__(self, stream: int | None = None) -> Any: ...
+
+    def __dlpack_device__(self) -> tuple[int, int]: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def statistics(self) -> ArrayStatistics | None: ...
+
+
+class NullArray(Array[NullScalar]):
+    ...
+
+
+class BooleanArray(Array[BooleanScalar]):
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+
+class NumericArray(Array[_ScalarT]):
+    ...
+
+
+class IntegerArray(NumericArray[_ScalarT]):
+    ...
+
+
+class FloatingPointArray(NumericArray[_ScalarT]):
+    ...
+
+
+class Int8Array(IntegerArray[Int8Scalar]):
+    ...
+
+
+class UInt8Array(IntegerArray[UInt8Scalar]):
+    ...
+
+
+class Int16Array(IntegerArray[Int16Scalar]):
+    ...
+
+
+class UInt16Array(IntegerArray[UInt16Scalar]):
+    ...
+
+
+class Int32Array(IntegerArray[Int32Scalar]):
+    ...
+
+
+class UInt32Array(IntegerArray[UInt32Scalar]):
+    ...
+
+
+class Int64Array(IntegerArray[Int64Scalar]):
+    ...
+
+
+class UInt64Array(IntegerArray[UInt64Scalar]):
+    ...
+
+
+class Date32Array(NumericArray[Date32Scalar]):
+    ...
+
+
+class Date64Array(NumericArray[Date64Scalar]):
+    ...
+
+
+class TimestampArray(NumericArray[TimestampScalar[_Unit, _Tz]]):
+    ...
+
+
+class Time32Array(NumericArray[Time32Scalar[_Time32Unit]]):
+    ...
+
+
+class Time64Array(NumericArray[Time64Scalar[_Time64Unit]]):
+    ...
+
+
+class DurationArray(NumericArray[DurationScalar[_Unit]]):
+    ...
+
+
+class MonthDayNanoIntervalArray(Array[MonthDayNanoIntervalScalar]):
+    ...
+
+
+class HalfFloatArray(FloatingPointArray[HalfFloatScalar]):
+    ...
+
+
+class FloatArray(FloatingPointArray[FloatScalar]):
+    ...
+
+
+class DoubleArray(FloatingPointArray[DoubleScalar]):
+    ...
+
+
+class FixedSizeBinaryArray(Array[FixedSizeBinaryScalar]):
+    ...
+
+
+class Decimal32Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal64Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal128Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal256Array(FixedSizeBinaryArray):
+    ...
+
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array: ...
+
+    def value_parent_indices(self) -> Int64Array: ...
+
+    def value_lengths(self) -> Int32Array: ...
+
+
+class ListArray(BaseListArray[_ScalarT]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int] | list[int | None],
+        values: Array[Scalar[_DataTypeT]] | list[int] | list[float] | list[str]
+        | list[bytes] | list,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> (ListArray[ListScalar[
+        _DataTypeT | Int64Type | Float64Type | StringType | BinaryType
+    ]] | ListArray): ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+
+class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array | list[int] | list[int | None],
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+
+class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @property
+    def sizes(self) -> Int32Array: ...
+
+
+class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+    @property
+    def sizes(self) -> Int64Array: ...
+
+
+class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        list_size: _Size | None = None,
+        *,
+        type: DataType | None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, _Size | None]: ...
+
+    @property
+    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]: ...
+
+
+_MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
+_MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
+
+
+class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array | list[int] | None,
+        keys: Array[Scalar[_MapKeyT]] | np.ndarray | list | None = None,
+        items: Array[Scalar[_MapItemT]] | np.ndarray | list | None = None,
+        values: Array | DataType | None = None,
+        *,
+        type: DataType | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+
+    @property
+    def keys(self) -> Array: ...
+
+    @property
+    def items(self) -> Array: ...
+
+
+class UnionArray(Array[UnionScalar]):
+    @deprecated("Use fields() instead")
+    def child(self, pos: int) -> Field: ...
+
+    def field(self, pos: int) -> Array: ...
+
+    @property
+    def type_codes(self) -> Int8Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @staticmethod
+    def from_dense(
+        types: Int8Array,
+        value_offsets: Int32Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | list[int] | None = None,
+    ) -> UnionArray: ...
+
+    @staticmethod
+    def from_sparse(
+        types: Int8Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | list[int] | None = None,
+    ) -> UnionArray: ...
+
+
+class StringArray(Array[StringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class LargeStringArray(Array[LargeStringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class StringViewArray(Array[StringViewScalar]):
+    ...
+
+
+class BinaryArray(Array[BinaryScalar]):
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class LargeBinaryArray(Array[LargeBinaryScalar]):
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class BinaryViewArray(Array[BinaryViewScalar]):
+    ...
+
+
+class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
+    def dictionary_encode(self) -> Self: ...  # type: ignore[override]
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @property
+    def indices(self) -> Array[Scalar[_IndexT]]: ...
+    @property
+    def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: _BasicValueT,
+        length: int,
+        buffers: list[Buffer],
+        dictionary: Array | np.ndarray | pd.Series,
+        null_count: int = -1,
+        offset: int = 0,
+    ) -> DictionaryArray[Any, _BasicValueT]: ...
+
+    @staticmethod
+    def from_arrays(
+        indices: Indices | Sequence[int | None],
+        dictionary: Array | np.ndarray | pd.Series | list[Any],
+        mask: np.ndarray | pd.Series | BooleanArray | None = None,
+        ordered: bool = False,
+        from_pandas: bool = False,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> DictionaryArray: ...
+
+
+class StructArray(Array[StructScalar]):
+    def field(self, index: int | str) -> Array: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
+
+    @staticmethod
+    def from_arrays(
+        arrays: Iterable[Array | np.ndarray | list],
+        names: Sequence[str] | list[Field] | None = None,
+        fields: list[Field] | None = None,
+        mask=None,
+        memory_pool: MemoryPool | None = None,
+        type: StructType | None = None,
+    ) -> StructArray: ...
+
+    def sort(self, order: Order = "ascending", by: str |
+             None = None, **kwargs) -> StructArray: ...
+
+
+class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+    @staticmethod
+    def from_arrays(
+        run_ends: Int16Array | Int32Array | Int64Array | list[int],
+        values: Array | list[Any], type: DataType | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: DataType,
+        length: int,
+        buffers: list[Buffer] | list[None],
+        null_count: int = -1,
+        offset=0,
+        children: tuple[Array, Array] | list[list[int]] | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
+
+    @property
+    def run_ends(self) -> Array[Scalar[_RunEndType]]: ...
+
+    @property
+    def values(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    def find_physical_offset(self) -> int: ...
+
+    def find_physical_length(self) -> int: ...
+
+
+_ArrayT = TypeVar("_ArrayT", bound=Array)
+
+
+class ExtensionArray(Array[ExtensionScalar], Generic[_ArrayT]):
+    @property
+    def storage(self) -> Any: ...
+
+    @staticmethod
+    def from_storage(typ: BaseExtensionType,
+                     storage: _ArrayT) -> ExtensionArray[_ArrayT]: ...
+
+
+class JsonArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class UuidArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
+    def to_numpy_ndarray(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    @classmethod
+    def from_numpy_ndarray(
+        cls, obj: np.ndarray,
+        dim_names: list[str] | tuple[str, ...] | None = None
+    ) -> Self: ...
+
+
+class OpaqueArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class Bool8Array(ExtensionArray):
+    def to_numpy(self, zero_copy_only: bool = ...,
+                 writable: bool = ...) -> np.ndarray: ...
+
+    @classmethod
+    def from_storage(cls, storage: Int8Array) -> Self: ...  # type: ignore[override]
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray) -> Self: ...
+
+
+def concat_arrays(arrays: Iterable[_ArrayT],
+                  memory_pool: MemoryPool | None = None) -> _ArrayT: ...
+
+
+def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]: ...
+
+
+__all__ = [
+    "array",
+    "asarray",
+    "nulls",
+    "repeat",
+    "infer_type",
+    "_PandasConvertible",
+    "Array",
+    "NullArray",
+    "BooleanArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "FixedSizeBinaryArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "BaseListArray",
+    "ListArray",
+    "LargeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "FixedSizeListArray",
+    "MapArray",
+    "UnionArray",
+    "StringArray",
+    "LargeStringArray",
+    "StringViewArray",
+    "BinaryArray",
+    "LargeBinaryArray",
+    "BinaryViewArray",
+    "DictionaryArray",
+    "StructArray",
+    "RunEndEncodedArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "FixedShapeTensorArray",
+    "concat_arrays",
+    "_empty_array",
+    "_CastAs",
+]
diff --git a/python/pyarrow-stubs/pyarrow/builder.pyi b/python/pyarrow-stubs/pyarrow/builder.pyi
new file mode 100644
index 00000000000..9001d9835b6
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/builder.pyi
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+
+class StringBuilder(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | float | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | float | None]): ...
+
+    def finish(self) -> StringArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+class StringViewBuilder(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | float | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | float | None]): ...
+
+    def finish(self) -> StringViewArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow-stubs/pyarrow/config.pyi b/python/pyarrow-stubs/pyarrow/config.pyi
new file mode 100644
index 00000000000..069b70e553a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/config.pyi
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import NamedTuple
+
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+
+class CppBuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+
+class BuildInfo(NamedTuple):
+    build_type: str
+    cpp_build_info: CppBuildInfo
+
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+
+build_info: BuildInfo
+cpp_build_info: CppBuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "CppBuildInfo",
+    "RuntimeInfo",
+    "build_info",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/pyarrow-stubs/pyarrow/device.pyi b/python/pyarrow-stubs/pyarrow/device.pyi
new file mode 100644
index 00000000000..7787ac44deb
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/device.pyi
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+
+class DeviceAllocationType(enum.Enum):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+
+class Device(_Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+    @property
+    def device_id(self) -> int: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+
+class MemoryManager(_Weakrefable):
+    @property
+    def device(self) -> Device: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def default_cpu_memory_manager() -> MemoryManager: ...
+
+
+__all__ = ["DeviceAllocationType", "Device",
+           "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow-stubs/pyarrow/memory.pyi b/python/pyarrow-stubs/pyarrow/memory.pyi
new file mode 100644
index 00000000000..f80e01ab21c
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/memory.pyi
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.lib import _Weakrefable
+
+
+class MemoryPool(_Weakrefable):
+    def release_unused(self) -> None: ...
+
+    def bytes_allocated(self) -> int: ...
+
+    def total_bytes_allocated(self) -> int: ...
+
+    def max_memory(self) -> int | None: ...
+
+    def num_allocations(self) -> int: ...
+
+    def print_stats(self) -> None: ...
+
+    @property
+    def backend_name(self) -> str: ...
+
+
+class LoggingMemoryPool(MemoryPool):
+    ...
+
+
+class ProxyMemoryPool(MemoryPool):
+    ...
+
+
+def default_memory_pool() -> MemoryPool: ...
+
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool: ...
+
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool: ...
+
+
+def system_memory_pool() -> MemoryPool: ...
+
+
+def jemalloc_memory_pool() -> MemoryPool: ...
+
+
+def mimalloc_memory_pool() -> MemoryPool: ...
+
+
+def set_memory_pool(pool: MemoryPool) -> None: ...
+
+
+def log_memory_allocations(enable: bool = True) -> None: ...
+
+
+def total_allocated_bytes() -> int: ...
+
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None: ...
+
+
+def supported_memory_backends() -> list[str]: ...
+
+
+__all__ = [
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "default_memory_pool",
+    "proxy_memory_pool",
+    "logging_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "set_memory_pool",
+    "log_memory_allocations",
+    "total_allocated_bytes",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+]
diff --git a/python/pyarrow-stubs/pyarrow/table.pyi b/python/pyarrow-stubs/pyarrow/table.pyi
new file mode 100644
index 00000000000..6dd61674d40
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/table.pyi
@@ -0,0 +1,686 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from collections.abc import (
+    Collection, Generator, Iterable, Iterator, Sequence, Mapping)
+from typing import Any, Generic, Literal, TypeVar
+import builtins
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, MemoryManager, MemoryPool, Schema
+from pyarrow.lib import Field as _Field
+
+from .array import Array, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from ._ipc import RecordBatchReader
+from .scalar import BooleanScalar, Int64Scalar, Scalar, StructScalar
+from .tensor import Tensor
+from ._stubs_typing import NullableCollection
+from ._types import DataType, _AsPyType, _BasicDataType, _DataTypeT
+
+Field: TypeAlias = _Field[DataType]
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+ArrayOrChunkedArray: TypeAlias = Array[_Scalar_co] | ChunkedArray[_Scalar_co]
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed | str
+AggregateOptions: TypeAlias = (ScalarAggregateOptions | CountOptions
+                               | TDigestOptions | VarianceOptions | FunctionOptions)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+
+    def as_py(self) -> list[Any]: ...
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+
+    # Private attribute used internally for column names
+    _name: str | None
+
+    def length(self) -> int: ...
+
+    __len__ = length
+
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def null_count(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def __getitem__(
+        self, key: int | np.integer | builtins.slice) -> _Scalar_co | Self: ...
+
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_nan(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_valid(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def cast(
+        self, target_type: _CastAs | str | None, safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None
+    ) -> Self | ChunkedArray[Scalar[_CastAs]]: ...
+
+    def fill_null(self, fill_value: Scalar[_DataTypeT] | Any) -> Self: ...
+
+    def equals(self, other: Self | Any) -> bool: ...
+
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool |
+                None = None) -> list[ChunkedArray[Any]]: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool |
+                       None = None) -> Array[_Scalar_co]: ...
+
+    def unique(self) -> ChunkedArray[_Scalar_co]: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def filter(self, mask: Mask,
+               null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    @property
+    def num_chunks(self) -> int: ...
+
+    def chunk(self, i: int) -> Array[_Scalar_co]: ...
+
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]: ...
+
+    def iterchunks(
+        self: ArrayOrChunkedArray[_ScalarT],
+    ) -> Generator[Array, None, None]: ...
+
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]: ...
+
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def chunked_array(
+    arrays: Iterable[NullableCollection[Any]]
+    | Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray]
+    | Iterable[Array[_ScalarT]] | Array[_ScalarT]
+    | SupportArrowArray | SupportArrowStream,
+    type: DataType | str | None = None,
+) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]: ...
+
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame: ...
+
+    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self: ...
+
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT: ...
+
+    @property
+    def column_names(self) -> list[str]: ...
+
+    @property
+    def columns(self) -> list[_ColumnT]: ...
+
+    def drop_null(self) -> Self: ...
+
+    def field(self, i: int | str) -> Field: ...
+
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping:
+        Mapping[Any, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray | range],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    def itercolumns(self) -> Generator[_ColumnT, None, None]: ...
+
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]: ...
+
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def filter(
+        self,
+        mask: Mask | Expression,
+        null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list[Any]]: ...
+
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]: ...
+
+    def to_string(self, *, show_metadata: bool = False,
+                  preview_cols: int = 0) -> str: ...
+
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self: ...
+
+    def add_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def append_column(
+        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+
+
+class RecordBatch(_Tabular[Array]):
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def replace_schema_metadata(
+        self,
+        metadata: dict[str, str]
+        | dict[bytes, bytes]
+        | dict[bytes, str]
+        | dict[str, bytes]
+        | None = None
+    ) -> Self: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self | Any, check_metadata: bool = False) -> bool: ...
+
+    def select(self, columns: Iterable[str] |
+               Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def cast(self, target_schema: Schema, safe: bool | None = None,
+             options: CastOptions | None = None) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Iterable[Any],
+        names: list[str] | tuple[str, ...] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[bytes, bytes]
+        | Mapping[str, str]
+        | Mapping[bytes, str]
+        | Mapping[str, bytes]
+        | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: Sequence[str | int] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(self) -> StructArray: ...
+
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0): ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None): ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs): ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+
+class Table(_Tabular[ChunkedArray[Any]]):
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def select(self, columns: Iterable[str] |
+               Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def replace_schema_metadata(
+        self, metadata: dict[str, str]
+        | dict[bytes, bytes]
+        | dict[bytes, str]
+        | dict[str, bytes]
+        | None = None
+    ) -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def equals(self, other: Self | Any, check_metadata: bool = False) -> bool: ...
+
+    def cast(self, target_schema: Schema, safe: bool | None = None,
+             options: CastOptions | None = None) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: Sequence[str | int] | None = None,
+        safe: bool = True,
+    ) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays:
+        Collection[ArrayOrChunkedArray[Any] | Collection[NDArray[Any]] | list[Any]],
+        names: list[str] | tuple[str, ...] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[bytes, bytes]
+        | Mapping[str, str]
+        | Mapping[bytes, str]
+        | Mapping[str, bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[StructScalar]: ...
+
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch],
+                     schema: Schema | None = None) -> Self: ...
+
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]: ...
+
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def add_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def drop(self, columns: str | list[str]) -> Self: ...
+
+    def group_by(self, keys: str | list[str],
+                 use_threads: bool = True) -> TableGroupBy: ...
+
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self: ...
+
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def record_batch(
+    data: Mapping[str, list[Any] | Array[Any]]
+    | Collection[Array[Any] | ChunkedArray[Any] | list[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | Schema | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+) -> RecordBatch: ...
+
+
+def table(
+    data: Collection[ArrayOrChunkedArray[Any] | list[Any] | range | str]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray
+    | Mapping[str, list[Any] | Array[Any] | ChunkedArray[Any] | range]
+    | Mapping[str, Any],
+    names: list[str] | Schema | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table: ...
+
+
+class TableGroupBy:
+
+    keys: str | list[str]
+
+    def __init__(self, table: Table, keys: str |
+                 list[str], use_threads: bool = True): ...
+
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table: ...
+
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch: ...
+
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+    "Aggregation",
+    "AggregateOptions",
+]
diff --git a/python/pyarrow-stubs/pyarrow/tensor.pyi b/python/pyarrow-stubs/pyarrow/tensor.pyi
new file mode 100644
index 00000000000..ba40c7b299d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/tensor.pyi
@@ -0,0 +1,268 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from collections.abc import Sequence
+import numpy as np
+
+from pyarrow.lib import _Weakrefable
+from pyarrow._types import DataType
+from scipy.sparse import coo_matrix, csr_matrix
+from sparse import COO  # type: ignore[import-untyped, import-not-found]
+
+
+class Tensor(_Weakrefable):
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def equals(self, other: Tensor) -> bool: ...
+
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_contiguous(self) -> bool: ...
+
+    @property
+    def ndim(self) -> int: ...
+
+    @property
+    def size(self) -> str: ...
+
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+
+    @property
+    def strides(self) -> tuple[int, ...]: ...
+
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCOOTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        coords: np.ndarray,
+        shape: Sequence[int],
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_pydata_sparse(
+        cls, obj: COO, dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> coo_matrix: ...
+
+    def to_pydata_sparse(self) -> COO: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def has_canonical_format(self) -> bool: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCSRMatrix(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: Sequence[int],
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCSCMatrix(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+
+class SparseCSFTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: Sequence[np.ndarray],
+        indices: Sequence[np.ndarray],
+        shape: tuple[int, ...],
+        axis_order: Sequence[int] | None = None,
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+__all__ = [
+    "Tensor",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+]
diff --git a/python/pyarrow-stubs/pyarrow/types.pyi b/python/pyarrow-stubs/pyarrow/types.pyi
new file mode 100644
index 00000000000..9e5a0568db0
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/types.pyi
@@ -0,0 +1,227 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+from enum import IntEnum
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    UInt32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | UInt32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = (TimestampType[Any, Any]
+                        | DurationType[Any] | _Time | _Date | _Interval)
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[UInt32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+def is_boolean_value(obj: Any) -> bool: ...
+def is_integer_value(obj: Any) -> bool: ...
+def is_float_value(obj: Any) -> bool: ...
+
+
+__all__ = [
+    "lib",
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
+
+
+class TypesEnum(IntEnum):
+    INTERVAL_MONTHS = 0
+    INTERVAL_DAY_TIME = 1
+    INTERVAL_MONTH_DAY_NANO = 2
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index ec58ac727e5..47ce1ab9b81 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3632,7 +3632,7 @@ cdef class FixedSizeListArray(BaseListArray):
         Or create from a values array, list size and matching type:
 
         >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
-        >>> arr = pa.FixedSizeListArray.from_arrays(values,type=typ)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
         >>> arr
         <pyarrow.lib.FixedSizeListArray object at ...>
         [
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 83cabcf447d..16fed344e4d 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1036,7 +1036,7 @@ cdef class StructScalar(Scalar, Mapping):
 
         Parameters
         ----------
-        index : Union[int, str]
+        key : Union[int, str]
             Index / position or name of the field.
 
         Returns
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index d09d9f45c7d..8a257ca48d6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -18,19 +18,23 @@
 from collections.abc import Iterable
 import datetime
 import decimal
-import hypothesis as h
-import hypothesis.strategies as st
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 import itertools
-import pytest
+import pytest  # type: ignore[import-not-found]
 import struct
 import subprocess
 import sys
 import weakref
+from typing import TYPE_CHECKING
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -71,7 +75,7 @@ def test_constructor_raises():
     # This could happen by wrong capitalization.
     # ARROW-2638: prevent calling extension class constructors directly
     with pytest.raises(TypeError):
-        pa.Array([1, 2])
+        pa.Array([1, 2])  # type: ignore[reportCallIssue]
 
 
 def test_list_format():
@@ -321,11 +325,11 @@ def test_asarray():
 
     arr = pa.array(range(4))
 
-    # The iterator interface gives back an array of Int64Value's
+    # The iterator interface gives back an array of Int64Type's
     np_arr = np.asarray([_ for _ in arr])
     assert np_arr.tolist() == [0, 1, 2, 3]
     assert np_arr.dtype == np.dtype('O')
-    assert isinstance(np_arr[0], pa.lib.Int64Value)
+    assert isinstance(np_arr[0], pa.lib.Int64Type)
 
     # Calling with the arrow array gives back an array with 'int64' dtype
     np_arr = np.asarray(arr)
@@ -649,8 +653,8 @@ def test_array_eq():
 
 @pytest.mark.numpy
 def test_array_from_buffers():
-    values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
-    nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
+    values_buf = pa.py_buffer(np.array([4, 5, 6, 7], dtype=np.int16()))
+    nulls_buf = pa.py_buffer(np.array([0b00001101], dtype=np.uint8()))
     arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
     assert arr.type == pa.int16()
     assert arr.to_pylist() == [4, None, 6, 7]
@@ -665,7 +669,9 @@ def test_array_from_buffers():
     assert arr.to_pylist() == [None, 6, 7]
 
     with pytest.raises(TypeError):
-        pa.Array.from_buffers(pa.int16(), 3, ['', ''], offset=1)
+        pa.Array.from_buffers(
+            pa.int16(), 3, ['', ''], offset=1  # type: ignore[reportArgumentType]
+        )
 
 
 def test_string_binary_from_buffers():
@@ -859,7 +865,8 @@ def test_struct_array_from_chunked():
     chunked_arr = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
 
     with pytest.raises(TypeError, match="Expected Array"):
-        pa.StructArray.from_arrays([chunked_arr], ["foo"])
+        pa.StructArray.from_arrays(
+            [chunked_arr], ["foo"])  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.parametrize("offset", (0, 1))
@@ -1179,24 +1186,24 @@ def test_map_from_arrays():
     keys = pa.array(pykeys, type='binary')
     items = pa.array(pyitems, type='i4')
 
-    result = pa.MapArray.from_arrays(offsets, keys, items)
+    result = pa.MapArray.from_arrays(offsets, keys, items)  # type: ignore[arg-type]
     expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
 
     assert result.equals(expected)
 
     # pass in the type explicitly
-    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
-        keys.type,
-        items.type
-    ))
+    result = pa.MapArray.from_arrays(offsets, keys, items,  # type: ignore[arg-type]
+                                     pa.map_(keys.type, items.type))
     assert result.equals(expected)
 
     # pass in invalid types
     with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.string())
+        pa.MapArray.from_arrays(
+            offsets, keys, items, pa.string()  # type: ignore[arg-type]
+        )
 
     with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             # Larger than the original i4
             pa.int64()
@@ -1234,7 +1241,7 @@ def test_map_from_arrays():
     # error if null bitmap and offsets with nulls passed
     msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
     with pytest.raises(pa.ArrowInvalid, match=msg1):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             items.type),
             mask=pa.array([False, True, False], type=pa.bool_())
@@ -2718,7 +2725,7 @@ def test_interval_array_from_relativedelta():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8,
+        pa.MonthDayNano([13, 8,  # type: ignore[arg-type]
                          (datetime.timedelta(seconds=1, microseconds=1,
                                              minutes=1, hours=1) //
                           datetime.timedelta(microseconds=1)) * 1000])]
@@ -2751,7 +2758,7 @@ def test_interval_array_from_tuple():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([1, 2, -3])]
+        pa.MonthDayNano([1, 2, -3])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     assert arr.to_pylist() == expected_list
@@ -2772,8 +2779,8 @@ def test_interval_array_from_dateoffset():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8, 3661000001001]),
-        pa.MonthDayNano([0, 0, 0])]
+        pa.MonthDayNano([13, 8, 3661000001001]),  # type: ignore[arg-type]
+        pa.MonthDayNano([0, 0, 0])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     expected_from_pandas = [
@@ -2937,7 +2944,7 @@ def test_buffers_primitive():
     # Slicing does not affect the buffers but the offset
     a_sliced = a[1:]
     buffers = a_sliced.buffers()
-    a_sliced.offset == 1
+    assert a_sliced.offset == 1
     assert len(buffers) == 2
     null_bitmap = buffers[0].to_pybytes()
     assert 1 <= len(null_bitmap) <= 64  # XXX this is varying
@@ -2945,7 +2952,7 @@ def test_buffers_primitive():
 
     assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
 
-    a = pa.array(np.int8([4, 5, 6]))
+    a = pa.array(np.array([4, 5, 6], dtype=np.int8))
     buffers = a.buffers()
     assert len(buffers) == 2
     # No null bitmap from Numpy int array
@@ -3031,7 +3038,7 @@ def test_nbytes_size():
 def test_invalid_tensor_constructor_repr():
     # ARROW-2638: prevent calling extension class constructors directly
     with pytest.raises(TypeError):
-        repr(pa.Tensor([1]))
+        repr(pa.Tensor([1]))  # type: ignore[reportCallIssue]
 
 
 def test_invalid_tensor_construction():
@@ -3549,7 +3556,7 @@ def test_array_supported_masks():
 
     with pytest.raises(pa.ArrowTypeError):
         arr = pa.array([4, None, 4, 3],
-                       mask=[1.0, 2.0, 3.0, 4.0])
+                       mask=[1.0, 2.0, 3.0, 4.0])  # type: ignore[reportArgumentType]
 
     with pytest.raises(pa.ArrowTypeError):
         arr = pa.array([4, None, 4, 3],
@@ -3836,11 +3843,11 @@ def test_concat_array_invalid_type():
     # ARROW-9920 - do not segfault on non-array input
 
     with pytest.raises(TypeError, match="should contain Array objects"):
-        pa.concat_arrays([None])
+        pa.concat_arrays([None])  # type: ignore[reportArgumentType]
 
     arr = pa.chunked_array([[0, 1], [3, 4]])
     with pytest.raises(TypeError, match="should contain Array objects"):
-        pa.concat_arrays(arr)
+        pa.concat_arrays(arr)  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.pandas
@@ -4369,7 +4376,7 @@ def test_non_cpu_array():
     with pytest.raises(NotImplementedError):
         [i for i in iter(arr)]
     with pytest.raises(NotImplementedError):
-        arr == arr2
+        _ = arr == arr2
     with pytest.raises(NotImplementedError):
         arr.is_null()
     with pytest.raises(NotImplementedError):
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index c10ae0f62b4..6e48a4ff076 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -21,13 +21,18 @@
 import itertools
 import math
 import re
+from typing import TYPE_CHECKING, cast
 
 import hypothesis as h
 import pytest
-try:
+
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -66,7 +71,7 @@ def __int__(self):
 
 class MyBrokenInt:
     def __int__(self):
-        1/0  # MARKER
+        _ = 1/0  # MARKER
 
 
 def test_iterable_types():
@@ -137,7 +142,7 @@ def test_object_with_getitem():
     # https://github.com/apache/arrow/issues/34944
     # considered as sequence because of __getitem__, but has no length
     with pytest.raises(TypeError, match="has no len()"):
-        pa.array(ObjectWithOnlyGetitem())
+        pa.array(ObjectWithOnlyGetitem())  # type: ignore[arg-type]
 
 
 def _as_list(xs):
@@ -845,7 +850,7 @@ def test_large_binary_value(ty):
     assert isinstance(arr, pa.Array)
     assert arr.type == ty
     assert len(arr) == 4
-    buf = arr[1].as_buffer()
+    buf = cast(pa.FixedSizeBinaryScalar, arr[1]).as_buffer()
     assert len(buf) == len(s) * nrepeats
 
 
@@ -1091,11 +1096,11 @@ def expected_datetime_value(dt):
         ),
     ]
     utcdata = [
-        pytz.utc.localize(data[0]),
+        pytz.utc.localize(cast(datetime.datetime, data[0])),
         data[1],
         None,
-        data[3].astimezone(pytz.utc),
-        data[4].astimezone(pytz.utc),
+        cast(datetime.datetime, data[3]).astimezone(pytz.utc),
+        cast(datetime.datetime, data[4]).astimezone(pytz.utc),
     ]
 
     ty = pa.timestamp(unit, tz=timezone)
@@ -1223,9 +1228,9 @@ def test_sequence_timestamp_from_mixed_builtin_and_pandas_datetimes():
         None,
     ]
     utcdata = [
-        data[0].astimezone(pytz.utc),
-        pytz.utc.localize(data[1]),
-        data[2].astimezone(pytz.utc),
+        cast(datetime.datetime, data[0]).astimezone(pytz.utc),
+        pytz.utc.localize(cast(datetime.datetime, data[1])),
+        cast(datetime.datetime, data[2]).astimezone(pytz.utc),
         None,
     ]
 
@@ -2103,8 +2108,8 @@ def test_map_from_dicts():
     assert arr.to_pylist() == expected
 
     # With omitted values
-    data[1] = None
-    expected[1] = None
+    data[1] = None  # type: ignore[call-overload]
+    expected[1] = None  # type: ignore[call-overload]
 
     arr = pa.array(expected, type=pa.map_(pa.binary(), pa.int32()))
 
@@ -2429,6 +2434,7 @@ def test_nested_auto_chunking(ty, char):
     }
 
 
+@pytest.mark.numpy
 @pytest.mark.large_memory
 def test_array_from_pylist_data_overflow():
     # Regression test for ARROW-12983
@@ -2451,6 +2457,7 @@ def test_array_from_pylist_data_overflow():
     assert len(arr.chunks) > 1
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_array_from_pylist_offset_overflow():
@@ -2475,6 +2482,7 @@ def test_array_from_pylist_offset_overflow():
     assert len(arr.chunks) > 1
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 @pytest.mark.parametrize(('data', 'scalar_data', 'value_type'), [
     ([True, False, None], [pa.scalar(True), pa.scalar(False), None], pa.bool_()),
@@ -2512,8 +2520,10 @@ def test_array_from_pylist_offset_overflow():
         pa.timestamp('us')
     ),
     (
-        [pa.MonthDayNano([1, -1, -10100])],
-        [pa.scalar(pa.MonthDayNano([1, -1, -10100]))],
+        [pa.MonthDayNano([1, -1, -10100])],  # type: ignore[call-arg, arg-type]
+        [pa.scalar(
+            pa.MonthDayNano([1, -1, -10100])  # type: ignore[call-arg, arg-type]
+        )],
         pa.month_day_nano_interval()
     ),
     (["a", "b"], [pa.scalar("a"), pa.scalar("b")], pa.string()),
diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py
index dc1a51e6d00..00f8bbf720d 100644
--- a/python/pyarrow/tests/test_device.py
+++ b/python/pyarrow/tests/test_device.py
@@ -59,11 +59,15 @@ def test_copy_to():
 
         batch_copied = batch.copy_to(dest)
         assert batch_copied.equals(batch)
-        assert batch_copied["col"].buffers()[1].device == mm.device
-        assert batch_copied["col"].buffers()[1].address != arr.buffers()[1].address
+        buffer = batch_copied.column("col").buffers()[1]
+        assert buffer is not None
+        assert buffer.device == mm.device
+        buffer_orig = arr.buffers()[1]
+        assert buffer_orig is not None
+        assert buffer.address != buffer_orig.address
 
     with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
-        arr.copy_to(mm.device.device_type)
+        arr.copy_to(mm.device.device_type)  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
-        batch.copy_to(mm.device.device_type)
+        batch.copy_to(mm.device.device_type)  # type: ignore[arg-type]
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 029e14ca162..5a7b9989358 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -259,7 +259,7 @@ def test_schema():
   child 0, item: int8"""
 
     with pytest.raises(TypeError):
-        pa.schema([None])
+        pa.schema([None])  # type: ignore[list-item]
 
 
 def test_schema_weakref():
@@ -548,7 +548,7 @@ def test_schema_equals_invalid_type():
 
     for val in [None, 'string', pa.array([1, 2])]:
         with pytest.raises(TypeError):
-            schema.equals(val)
+            schema.equals(val)  # type: ignore[invalid-argument-type]
 
 
 def test_schema_equality_operators():
@@ -594,7 +594,7 @@ def test_schema_get_fields():
     with pytest.raises(KeyError):
         schema.field('other')
     with pytest.raises(TypeError):
-        schema.field(0.0)
+        schema.field(0.0)  # type: ignore[arg-type]
     with pytest.raises(IndexError):
         schema.field(4)
 
@@ -706,6 +706,7 @@ def test_empty_table():
         assert table.schema == schema
 
 
+@pytest.mark.numpy
 @pytest.mark.pandas
 def test_schema_from_pandas():
     import pandas as pd
@@ -782,7 +783,7 @@ def test_schema_merge():
 
     # raise proper error when passing a non-Schema value
     with pytest.raises(TypeError):
-        pa.unify_schemas([a, 1])
+        pa.unify_schemas([a, 1])  # type: ignore[list-item]
 
 
 def test_undecodable_metadata():
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index eca8090d77a..2ce48b651b1 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -26,15 +26,16 @@
 import pyarrow as pa
 
 try:
-    from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
+    from scipy.sparse import (  # type: ignore[reportMissingModuleSource]
+        csr_array, coo_array, csr_matrix, coo_matrix)
 except ImportError:
-    coo_matrix = None
-    csr_matrix = None
-    csr_array = None
-    coo_array = None
+    coo_matrix = None  # type: ignore[assignment, misc]
+    csr_matrix = None  # type: ignore[assignment, misc]
+    csr_array = None  # type: ignore[assignment, misc]
+    coo_array = None  # type: ignore[assignment, misc]
 
 try:
-    import sparse
+    import sparse  # type: ignore[import-untyped, import-not-found]
 except ImportError:
     sparse = None
 
@@ -401,7 +402,7 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
     assert np.array_equal(array, result_array)
 
 
-@pytest.mark.skipif(not coo_matrix, reason="requires scipy")
+@pytest.mark.skipif(coo_matrix is None, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
@@ -443,7 +444,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
     assert out_scipy_matrix.has_canonical_format
 
 
-@pytest.mark.skipif(not csr_matrix, reason="requires scipy")
+@pytest.mark.skipif(csr_matrix is None, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
@@ -483,7 +484,8 @@ def test_pydata_sparse_sparse_coo_tensor_roundtrip(dtype_str, arrow_type):
     shape = (4, 6)
     dim_names = ("x", "y")
 
-    sparse_array = sparse.COO(data=data, coords=coords, shape=shape)
+    sparse_array = sparse.COO(  # type: ignore[reportOptionalMemberAccess]
+        data=data, coords=coords, shape=shape)
     sparse_tensor = pa.SparseCOOTensor.from_pydata_sparse(sparse_array,
                                                           dim_names=dim_names)
     out_sparse_array = sparse_tensor.to_pydata_sparse()
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index b65fb7d952c..6263afd03a5 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -18,12 +18,13 @@
 from collections import OrderedDict
 from collections.abc import Iterable
 import sys
+from typing import cast
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -418,7 +419,8 @@ def test_to_pandas_empty_table():
     table = pa.table(df)
     result = table.schema.empty_table().to_pandas()
     assert result.shape == (0, 2)
-    tm.assert_frame_equal(result, df.iloc[:0])
+    expected = df.iloc[:0]
+    tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
@@ -486,12 +488,25 @@ def test_chunked_array_unify_dictionaries():
         pa.array(["foo", "bar", None, "foo"]).dictionary_encode(),
         pa.array(["quux", None, "foo"]).dictionary_encode(),
     ])
-    assert arr.chunk(0).dictionary.equals(pa.array(["foo", "bar"]))
-    assert arr.chunk(1).dictionary.equals(pa.array(["quux", "foo"]))
+    chunk_0 = arr.chunk(0)
+    assert isinstance(chunk_0, pa.DictionaryArray)
+    assert chunk_0.dictionary.equals(pa.array(["foo", "bar"]))
+
+    chunk_1 = arr.chunk(1)
+    assert isinstance(chunk_1, pa.DictionaryArray)
+    assert chunk_1.dictionary.equals(pa.array(["quux", "foo"]))
+
     arr = arr.unify_dictionaries()
     expected_dict = pa.array(["foo", "bar", "quux"])
-    assert arr.chunk(0).dictionary.equals(expected_dict)
-    assert arr.chunk(1).dictionary.equals(expected_dict)
+
+    chunk_0 = arr.chunk(0)
+    assert isinstance(chunk_0, pa.DictionaryArray)
+    assert chunk_0.dictionary.equals(expected_dict)
+
+    chunk_1 = arr.chunk(1)
+    assert isinstance(chunk_1, pa.DictionaryArray)
+    assert chunk_1.dictionary.equals(expected_dict)
+
     assert arr.to_pylist() == ["foo", "bar", None, "foo", "quux", None, "foo"]
 
 
@@ -716,7 +731,7 @@ def test_recordbatch_take():
 def test_recordbatch_column_sets_private_name():
     # ARROW-6429
     rb = pa.record_batch([pa.array([1, 2, 3, 4])], names=['a0'])
-    assert rb[0]._name == 'a0'
+    assert rb.column(0)._name == 'a0'
 
 
 def test_recordbatch_from_arrays_validate_schema():
@@ -798,7 +813,7 @@ def test_recordbatch_get_field():
         batch.field('d')
 
     with pytest.raises(TypeError):
-        batch.field(None)
+        batch.field(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         batch.field(4)
@@ -819,7 +834,7 @@ def test_recordbatch_select_column():
         batch.column('d')
 
     with pytest.raises(TypeError):
-        batch.column(None)
+        batch.column(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         batch.column(4)
@@ -933,7 +948,10 @@ def test_table_from_struct_array_chunked_array():
         [[{"ints": 1}, {"floats": 1.0}]],
         type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
     )
-    result = pa.Table.from_struct_array(chunked_struct_array)
+    assert isinstance(chunked_struct_array.type, pa.StructType)
+    # Cast to the proper type for type checker
+    struct_chunked_array = cast(pa.ChunkedArray, chunked_struct_array)
+    result = pa.Table.from_struct_array(struct_chunked_array)
     assert result.equals(pa.Table.from_arrays(
         [
             pa.array([1, None], type=pa.int32()),
@@ -1189,7 +1207,7 @@ def test_recordbatch_to_tensor_null():
         batch.to_tensor()
 
     result = batch.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1, arr2]).astype(np.float64, order="F")
+    x = np.column_stack([arr1, arr2]).astype(np.float64, order="F")  # type: ignore[no-matching-overload]
     expected = pa.Tensor.from_numpy(x)
 
     np.testing.assert_equal(result.to_numpy(), x)
@@ -1223,7 +1241,7 @@ def test_recordbatch_to_tensor_null():
     )
 
     result = batch.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1, arr2]).astype(np.float32, order="F")
+    x = np.column_stack([arr1, arr2]).astype(np.float32, order="F")  # type: ignore[no-matching-overload]
     expected = pa.Tensor.from_numpy(x)
 
     np.testing.assert_equal(result.to_numpy(), x)
@@ -1339,7 +1357,7 @@ def test_recordbatchlist_schema_equals():
 def test_table_column_sets_private_name():
     # ARROW-6429
     t = pa.table([pa.array([1, 2, 3, 4])], names=['a0'])
-    assert t[0]._name == 'a0'
+    assert t.column(0)._name == 'a0'
 
 
 def test_table_equals():
@@ -1500,7 +1518,8 @@ def test_table_from_arrays_preserves_column_metadata():
     field1 = pa.field('field2', pa.int64(), nullable=False)
     table = pa.Table.from_arrays([arr0, arr1],
                                  schema=pa.schema([field0, field1]))
-    assert b"a" in table.field(0).metadata
+    field0_metadata = table.field(0).metadata
+    assert field0_metadata is not None and b"a" in field0_metadata
     assert table.field(1).nullable is False
 
 
@@ -1565,7 +1584,7 @@ def test_table_get_field():
         table.field('d')
 
     with pytest.raises(TypeError):
-        table.field(None)
+        table.field(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         table.field(4)
@@ -1586,7 +1605,7 @@ def test_table_select_column():
         table.column('d')
 
     with pytest.raises(TypeError):
-        table.column(None)
+        table.column(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         table.column(4)
@@ -1879,22 +1898,41 @@ def test_table_unify_dictionaries():
 
     table = pa.Table.from_batches([batch1, batch2])
     table = table.replace_schema_metadata({b"key1": b"value1"})
-    assert table.column(0).chunk(0).dictionary.equals(
-        pa.array(["foo", "bar"]))
-    assert table.column(0).chunk(1).dictionary.equals(
-        pa.array(["quux", "foo"]))
-    assert table.column(1).chunk(0).dictionary.equals(
-        pa.array([123, 456, 789]))
-    assert table.column(1).chunk(1).dictionary.equals(
-        pa.array([456, 789]))
+    chunk_0_0 = table.column(0).chunk(0)
+    assert isinstance(chunk_0_0, pa.DictionaryArray)
+    assert chunk_0_0.dictionary.equals(pa.array(["foo", "bar"]))
+
+    chunk_0_1 = table.column(0).chunk(1)
+    assert isinstance(chunk_0_1, pa.DictionaryArray)
+    assert chunk_0_1.dictionary.equals(pa.array(["quux", "foo"]))
+
+    chunk_1_0 = table.column(1).chunk(0)
+    assert isinstance(chunk_1_0, pa.DictionaryArray)
+    assert chunk_1_0.dictionary.equals(pa.array([123, 456, 789]))
+
+    chunk_1_1 = table.column(1).chunk(1)
+    assert isinstance(chunk_1_1, pa.DictionaryArray)
+    assert chunk_1_1.dictionary.equals(pa.array([456, 789]))
 
     table = table.unify_dictionaries(pa.default_memory_pool())
     expected_dict_0 = pa.array(["foo", "bar", "quux"])
     expected_dict_1 = pa.array([123, 456, 789])
-    assert table.column(0).chunk(0).dictionary.equals(expected_dict_0)
-    assert table.column(0).chunk(1).dictionary.equals(expected_dict_0)
-    assert table.column(1).chunk(0).dictionary.equals(expected_dict_1)
-    assert table.column(1).chunk(1).dictionary.equals(expected_dict_1)
+
+    chunk_0_0 = table.column(0).chunk(0)
+    assert isinstance(chunk_0_0, pa.DictionaryArray)
+    assert chunk_0_0.dictionary.equals(expected_dict_0)
+
+    chunk_0_1 = table.column(0).chunk(1)
+    assert isinstance(chunk_0_1, pa.DictionaryArray)
+    assert chunk_0_1.dictionary.equals(expected_dict_0)
+
+    chunk_1_0 = table.column(1).chunk(0)
+    assert isinstance(chunk_1_0, pa.DictionaryArray)
+    assert chunk_1_0.dictionary.equals(expected_dict_1)
+
+    chunk_1_1 = table.column(1).chunk(1)
+    assert isinstance(chunk_1_1, pa.DictionaryArray)
+    assert chunk_1_1.dictionary.equals(expected_dict_1)
 
     assert table.to_pydict() == {
         'a': ["foo", "bar", None, "foo", "quux", "foo", None, "quux"],
@@ -1964,13 +2002,13 @@ def test_concat_tables_invalid_option():
     t = pa.Table.from_arrays([list(range(10))], names=('a',))
 
     with pytest.raises(ValueError, match="Invalid promote_options: invalid"):
-        pa.concat_tables([t, t], promote_options="invalid")
+        pa.concat_tables([t, t], promote_options="invalid")  # type: ignore[arg-type]
 
 
 def test_concat_tables_none_table():
     # ARROW-11997
     with pytest.raises(AttributeError):
-        pa.concat_tables([None])
+        pa.concat_tables([None])  # type: ignore[arg-type]
 
 
 @pytest.mark.pandas
@@ -2113,7 +2151,7 @@ def test_concat_batches_different_schema():
 def test_concat_batches_none_batches():
     # ARROW-11997
     with pytest.raises(AttributeError):
-        pa.concat_batches([None])
+        pa.concat_batches([None])  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize(
@@ -2264,7 +2302,7 @@ def test_from_arrays_schema(data, klass):
     # with different and incompatible schema
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
     with pytest.raises((NotImplementedError, TypeError)):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
     # Cannot pass both schema and metadata / names
     with pytest.raises(ValueError):
@@ -2369,7 +2407,7 @@ def test_table_from_pydict_arrow_arrays(data, klass):
     # with different and incompatible schema
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
     with pytest.raises((NotImplementedError, TypeError)):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize('data, klass', [
@@ -2386,7 +2424,7 @@ def test_table_from_pydict_schema(data, klass):
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64()),
                         ('ints', pa.int64())])
     with pytest.raises(KeyError, match='ints'):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
     # data has columns not present in schema -> ignored
     schema = pa.schema([('strs', pa.utf8())])
@@ -2590,10 +2628,10 @@ def test_table_factory_function_args_pandas():
 
 def test_factory_functions_invalid_input():
     with pytest.raises(TypeError, match="Expected pandas DataFrame, python"):
-        pa.table("invalid input")
+        pa.table("invalid input")  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="Expected pandas DataFrame"):
-        pa.record_batch("invalid input")
+        pa.record_batch("invalid input")  # type: ignore[arg-type]
 
 
 def test_table_repr_to_string():
@@ -2727,8 +2765,8 @@ def test_table_function_unicode_schema():
     schema = pa.schema([(col_a, pa.int32()), (col_b, pa.string())])
 
     result = pa.table(d, schema=schema)
-    assert result[0].chunk(0).equals(pa.array([1, 2, 3], type='int32'))
-    assert result[1].chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
+    assert result.column(0).chunk(0).equals(pa.array([1, 2, 3], type='int32'))
+    assert result.column(1).chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
 
 
 def test_table_take_vanilla_functionality():
@@ -3603,7 +3641,7 @@ def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_arr
 
     # equals() test
     with pytest.raises(NotImplementedError):
-        cuda_chunked_array == cuda_chunked_array
+        cuda_chunked_array == cuda_chunked_array  # type: ignore[reportUnusedExpression]
 
     # to_pandas() test
     with pytest.raises(NotImplementedError):
@@ -3860,7 +3898,7 @@ def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
 
     # __dataframe__() test
     with pytest.raises(NotImplementedError):
-        from_dataframe(cuda_recordbatch.__dataframe__())
+        from_dataframe(cuda_recordbatch.__dataframe__())  # type: ignore[misc]
 
 
 def verify_cuda_table(table, expected_schema):
@@ -4059,7 +4097,7 @@ def test_table_non_cpu(cuda_context, cpu_table, cuda_table,
 
     # __dataframe__() test
     with pytest.raises(NotImplementedError):
-        from_dataframe(cuda_table.__dataframe__())
+        from_dataframe(cuda_table.__dataframe__())  # type: ignore[misc]
 
     # __reduce__() test
     with pytest.raises(NotImplementedError):
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index debb1066280..c3726fdbbf4 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -213,7 +213,7 @@ def test_tensor_memoryview():
         dtype = data.dtype
         lst = data.tolist()
         tensor = pa.Tensor.from_numpy(data)
-        m = memoryview(tensor)
+        m = memoryview(tensor)  # type: ignore[reportArgumentType]
         assert m.format == expected_format
         assert m.shape == data.shape
         assert m.strides == data.strides
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 539f0172454..c224392510d 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -24,16 +24,22 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-try:
-    import hypothesis.extra.pytz as tzst
-except ImportError:
-    tzst = None
+from typing import Any, TYPE_CHECKING
 import weakref
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+    import hypothesis.extra.pytz as tzst
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
+    try:
+        import hypothesis.extra.pytz as tzst
+    except ImportError:
+        tzst = None
+
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -411,7 +417,7 @@ def test_tzinfo_to_string_errors():
 if tzst:
     timezones = tzst.timezones()
 else:
-    timezones = st.none()
+    timezones = st.none()  # type: ignore[assignment]
 
 
 @h.given(timezones)
@@ -465,7 +471,7 @@ class BuggyTimezone2(datetime.tzinfo):
         def tzname(self, dt):
             return None
 
-        def utcoffset(self, dt):
+        def utcoffset(self, dt):  # type: ignore[override]
             return "one hour"
 
     class BuggyTimezone3(datetime.tzinfo):
@@ -473,7 +479,7 @@ class BuggyTimezone3(datetime.tzinfo):
         Wrong timezone name type
         """
 
-        def tzname(self, dt):
+        def tzname(self, dt):  # type: ignore[override]
             return 240
 
         def utcoffset(self, dt):
@@ -732,13 +738,13 @@ def test_struct_type():
 
     # Neither integer nor string
     with pytest.raises(TypeError):
-        ty[None]
+        ty[None]  # type: ignore[reportArgumentType]
 
     with pytest.raises(TypeError):
-        ty.field(None)
+        ty.field(None)  # type: ignore[reportArgumentType]
 
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Construct from list of tuples
     ty = pa.struct([('a', pa.int64()),
@@ -746,7 +752,7 @@ def test_struct_type():
                     ('b', pa.int32())])
     assert list(ty) == fields
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Construct from mapping
     fields = [pa.field('a', pa.int64()),
@@ -755,7 +761,7 @@ def test_struct_type():
                                 ('b', pa.int32())]))
     assert list(ty) == fields
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Invalid args
     with pytest.raises(TypeError):
@@ -862,7 +868,7 @@ def test_dictionary_type():
 
     # invalid index type raises
     with pytest.raises(TypeError):
-        pa.dictionary(pa.string(), pa.int64())
+        pa.dictionary(pa.string(), pa.int64())  # type: ignore[reportArgumentType]
 
 
 def test_dictionary_ordered_equals():
@@ -951,7 +957,7 @@ def test_run_end_encoded_type():
         pa.run_end_encoded(None, pa.utf8())
 
     with pytest.raises(ValueError):
-        pa.run_end_encoded(pa.int8(), pa.utf8())
+        pa.run_end_encoded(pa.int8(), pa.utf8())  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.parametrize('t,check_func', [
@@ -1084,12 +1090,12 @@ def test_timedelta_overflow():
         pa.scalar(d, type=pa.duration('ns'))
 
     # microsecond resolution, not overflow
-    pa.scalar(d, type=pa.duration('us')).as_py() == d
+    assert pa.scalar(d, type=pa.duration('us')).as_py() == d
 
     # second/millisecond resolution, not overflow
     for d in [datetime.timedelta.min, datetime.timedelta.max]:
-        pa.scalar(d, type=pa.duration('ms')).as_py() == d
-        pa.scalar(d, type=pa.duration('s')).as_py() == d
+        _ = pa.scalar(d, type=pa.duration('ms')).as_py() == d
+        _ = pa.scalar(d, type=pa.duration('s')).as_py() == d
 
 
 def test_type_equality_operators():
@@ -1127,11 +1133,11 @@ def test_key_value_metadata():
     assert m1 != {'a': 'A', 'b': 'C'}
 
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata({'a': 1})
+        pa.KeyValueMetadata({'a': 1})  # type: ignore[reportArgumentType]
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata({1: 'a'})
+        pa.KeyValueMetadata({1: 'a'})  # type: ignore[reportArgumentType]
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata(a=1)
+        pa.KeyValueMetadata(a=1)  # type: ignore[reportArgumentType]
 
     expected = [(b'a', b'A'), (b'b', b'B')]
     result = [(k, v) for k, v in m3.items()]
@@ -1258,6 +1264,7 @@ def test_field_metadata():
 
     assert f1.metadata is None
     assert f2.metadata == {}
+    assert f3.metadata is not None
     assert f3.metadata[b'bizz'] == b'bazz'
 
 
@@ -1394,7 +1401,7 @@ def __arrow_c_schema__(self):
         return self.schema.__arrow_c_schema__()
 
 
-class SchemaMapping(Mapping):
+class SchemaMapping(Mapping[Any, Any]):
     def __init__(self, schema):
         self.schema = schema