Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci/scripts/python_test_type_annotations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ pip install mypy pyright ty
# Run type checkers
cd "${pyarrow_dir}"
mypy
pyright
ty check
pyright --stats
ty check --verbose --output-format concise
51 changes: 50 additions & 1 deletion ci/scripts/python_wheel_validate_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,34 @@
# under the License.

import argparse
import ast
from pathlib import Path
import re
import zipfile


def _count_docstrings(source):
"""Count docstrings in module, function, and class bodies."""
tree = ast.parse(source)
count = 0
for node in ast.walk(tree):
if isinstance(node, (ast.Module, ast.FunctionDef,
ast.AsyncFunctionDef, ast.ClassDef)):
if (node.body
and isinstance(node.body[0], ast.Expr)
and isinstance(node.body[0].value, ast.Constant)
and isinstance(node.body[0].value.value, str)):
count += 1
return count


def validate_wheel(path):
p = Path(path)
wheels = list(p.glob('*.whl'))
error_msg = f"{len(wheels)} wheels found but only 1 expected ({wheels})"
assert len(wheels) == 1, error_msg
f = zipfile.ZipFile(wheels[0])

outliers = [
info.filename for info in f.filelist if not re.match(
r'(pyarrow/|pyarrow-[-.\w\d]+\.dist-info/|pyarrow\.libs/)', info.filename
Expand All @@ -37,8 +54,40 @@ def validate_wheel(path):
assert any(info.filename.split("/")[-1] == filename
for info in f.filelist), \
f"{filename} is missing from the wheel."

assert any(info.filename == "pyarrow/py.typed" for info in f.filelist), \
"pyarrow/py.typed is missing from the wheel."

source_root = Path(__file__).resolve().parents[2]
stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow"
assert stubs_dir.exists(), f"Stub source directory not found: {stubs_dir}"

expected_stub_files = {
f"pyarrow/{stub_file.relative_to(stubs_dir).as_posix()}"
for stub_file in stubs_dir.rglob("*.pyi")
}

wheel_stub_files = {
info.filename
for info in f.filelist
if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi")
}

assert wheel_stub_files == expected_stub_files, (
"Wheel .pyi files differ from python/pyarrow-stubs/pyarrow.\n"
f"Missing in wheel: {sorted(expected_stub_files - wheel_stub_files)}\n"
f"Unexpected in wheel: {sorted(wheel_stub_files - expected_stub_files)}"
)

wheel_docstring_count = sum(
_count_docstrings(f.read(wsf).decode("utf-8"))
for wsf in wheel_stub_files
)

print(f"Found {wheel_docstring_count} docstring(s) in wheel stubs.")
assert wheel_docstring_count, "No docstrings found in wheel stub files."

print(f"The wheel: {wheels[0]} seems valid.")
# TODO(GH-32609): Validate some docstrings were generated and added.

def main():
parser = argparse.ArgumentParser()
Expand Down
51 changes: 51 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1025,3 +1025,54 @@ if(PYARROW_BUILD_PARQUET)
target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
endif()
endif()

#
# Type stubs with docstring injection
#
# Stubs live in pyarrow-stubs/pyarrow/ during development but are installed
# alongside the package so type checkers can find them (PEP 561).
set(PYARROW_REQUIRE_STUB_DOCSTRINGS OFF)
if(DEFINED SKBUILD_STATE
AND SKBUILD_STATE STREQUAL "wheel"
AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
AND DEFINED ENV{CI}
AND NOT "$ENV{CI}" STREQUAL "")
set(PYARROW_REQUIRE_STUB_DOCSTRINGS ON)
endif()

set(PYARROW_STUBS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pyarrow-stubs/pyarrow")
if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}")
install(DIRECTORY "${PYARROW_STUBS_SOURCE_DIR}/"
DESTINATION "."
FILES_MATCHING
PATTERN "*.pyi")

if(DEFINED SKBUILD_STATE
AND SKBUILD_STATE STREQUAL "wheel"
AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
install(CODE "
execute_process(
COMMAND \"${Python3_EXECUTABLE}\"
\"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\"
\"${CMAKE_INSTALL_PREFIX}\"
\"${CMAKE_CURRENT_SOURCE_DIR}\"
RESULT_VARIABLE _pyarrow_stub_docstrings_result
)
if(NOT _pyarrow_stub_docstrings_result EQUAL 0)
if(${PYARROW_REQUIRE_STUB_DOCSTRINGS})
message(FATAL_ERROR \"Stub docstring injection failed (exit code: \${_pyarrow_stub_docstrings_result})\")
else()
message(WARNING \"Stub docstring injection failed (exit code: \${_pyarrow_stub_docstrings_result})\")
endif()
endif()
")
endif()
else()
if(PYARROW_REQUIRE_STUB_DOCSTRINGS)
message(FATAL_ERROR "PyArrow stub source directory not found at ${PYARROW_STUBS_SOURCE_DIR}; "
"cannot build CI wheel without .pyi files.")
else()
message(WARNING "PyArrow stub source directory not found at ${PYARROW_STUBS_SOURCE_DIR}; "
"wheel will be built without .pyi files.")
endif()
endif()
11 changes: 7 additions & 4 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ exclude = [
[tool.scikit-build]
cmake.build-type = "Release"
metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"]
sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "pyarrow-stubs/"]
wheel.packages = ["pyarrow"]
wheel.install-dir = "pyarrow"

Expand All @@ -102,7 +102,7 @@ version_scheme = 'guess-next-dev'
git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
fallback_version = '24.0.0a0'

# TODO: Enable type checking once stubs are merged
# TODO: Enable more type checks as more stubs are merged
[tool.mypy]
files = ["pyarrow-stubs"]
mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs"
Expand All @@ -113,7 +113,7 @@ exclude = [
"^scripts/",
]

# TODO: Enable type checking once stubs are merged
# TODO: Enable more type checks as more stubs are merged
[tool.pyright]
pythonPlatform = "All"
pythonVersion = "3.10"
Expand All @@ -128,7 +128,10 @@ exclude = [
stubPath = "pyarrow-stubs"
typeCheckingMode = "basic"

# TODO: Enable type checking once stubs are merged
# TODO: Enable more type checks as more stubs are merged
[tool.ty.environment]
extra-paths = ["pyarrow-stubs"]

[tool.ty.src]
include = ["pyarrow-stubs"]
exclude = [
Expand Down
95 changes: 73 additions & 22 deletions python/scripts/update_stub_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,18 @@
"""
Extract docstrings from pyarrow runtime and insert them into stub files.

Usage (from python/ directory with pyarrow built):
python scripts/update_stub_docstrings.py pyarrow-stubs
Usage:
python scripts/update_stub_docstrings.py <install_prefix> <source_dir>
"""

import argparse
import importlib
import inspect
import os
import shutil
import sys
import sysconfig
import tempfile
from pathlib import Path
from textwrap import indent

Expand Down Expand Up @@ -186,7 +190,8 @@ def add_docstrings_to_stubs(stubs_dir):
if module_name in LIB_MODULES:
namespace = "lib"
elif stub_file.parent.name in ("parquet", "interchange"):
namespace = f"{stub_file.parent.name}.{module_name}"
namespace = (stub_file.parent.name if module_name == "__init__"
else f"{stub_file.parent.name}.{module_name}")
elif module_name == "__init__":
namespace = ""
else:
Expand All @@ -198,31 +203,77 @@ def add_docstrings_to_stubs(stubs_dir):
stub_file.write_text(modified.code)


def add_docstrings_from_build(stubs_dir, build_lib):
"""
Entry point for setup.py: update docstrings using pyarrow from build directory.
def _link_or_copy(source, destination):
if sys.platform != "win32":
try:
os.symlink(source, destination)
return
except OSError:
pass

if source.is_dir():
shutil.copytree(source, destination, symlinks=(sys.platform != "win32"))
else:
shutil.copy2(source, destination)


During the build process, pyarrow is not installed in the system Python.
We need to temporarily add the build directory to sys.path so we can
import pyarrow and extract docstrings from it.
def _create_importable_pyarrow(pyarrow_pkg, source_dir, install_pyarrow_dir):
"""
stubs_dir, build_lib = Path(stubs_dir), Path(build_lib)
Populate pyarrow_pkg with source Python modules and installed binary artifacts
so that pyarrow can be imported from the parent directory of pyarrow_pkg.
"""
ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") or ".so"
source_pyarrow = source_dir / "pyarrow"
if not source_pyarrow.exists():
raise FileNotFoundError(f"PyArrow source package not found: {source_pyarrow}")

for source_path in source_pyarrow.iterdir():
if source_path.suffix == ".py":
_link_or_copy(source_path, pyarrow_pkg / source_path.name)
elif source_path.is_dir() and not source_path.name.startswith((".", "__")):
_link_or_copy(source_path, pyarrow_pkg / source_path.name)

for artifact in install_pyarrow_dir.iterdir():
if not artifact.is_file():
continue

sys.path.insert(0, str(build_lib))
try:
add_docstrings_to_stubs(stubs_dir)
finally:
sys.path.pop(0)
destination = pyarrow_pkg / artifact.name
if destination.exists():
continue

is_extension = ext_suffix in artifact.name or artifact.suffix == ".pyd"
is_shared_library = (
".so" in artifact.name or artifact.suffix in (".dylib", ".dll")
)
if is_extension or is_shared_library:
_link_or_copy(artifact, destination)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("stubs_dir", type=Path, help="Path to pyarrow-stubs folder")
parser.add_argument("install_prefix", type=Path,
help="CMAKE_INSTALL_PREFIX used by wheel build")
parser.add_argument("source_dir", type=Path,
help="PyArrow source directory")
args = parser.parse_args()

# Add the directory containing this script's parent (python/) to sys.path
# so pyarrow can be imported when running from the python/ directory
script_dir = Path(__file__).resolve().parent
python_dir = script_dir.parent
sys.path.insert(0, str(python_dir))
add_docstrings_to_stubs(args.stubs_dir.resolve())
install_prefix = args.install_prefix.resolve()
source_dir = args.source_dir.resolve()
install_pyarrow_dir = install_prefix / "pyarrow"
if not install_pyarrow_dir.exists():
install_pyarrow_dir = install_prefix

if not any(install_pyarrow_dir.rglob("*.pyi")):
print("No .pyi files found in install tree, skipping docstring injection")
sys.exit(0)

with tempfile.TemporaryDirectory() as tmpdir:
pyarrow_pkg = Path(tmpdir) / "pyarrow"
pyarrow_pkg.mkdir()
_create_importable_pyarrow(pyarrow_pkg, source_dir, install_pyarrow_dir)

sys.path.insert(0, tmpdir)
try:
add_docstrings_to_stubs(install_pyarrow_dir)
finally:
sys.path.pop(0)