diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e5d367958dd..e886e7527ec 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -239,6 +239,11 @@ jobs: - name: Test shell: bash run: ci/scripts/python_test.sh $(pwd) $(pwd)/build + - name: Test annotations + shell: bash + env: + PYARROW_TEST_ANNOTATIONS: "ON" + run: ci/scripts/python_test_type_annotations.sh $(pwd)/python windows: name: AMD64 Windows 2022 Python 3.13 @@ -296,3 +301,9 @@ jobs: shell: cmd run: | call "ci\scripts\python_test.bat" %cd% + - name: Test annotations + shell: cmd + env: + PYARROW_TEST_ANNOTATIONS: "ON" + run: | + call "ci\scripts\python_test_type_annotations.bat" %cd%\python diff --git a/ci/scripts/python_test_type_annotations.bat b/ci/scripts/python_test_type_annotations.bat new file mode 100644 index 00000000000..5a3d0952dc8 --- /dev/null +++ b/ci/scripts/python_test_type_annotations.bat @@ -0,0 +1,40 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +set PYARROW_DIR=%1 + +if "%PYARROW_TEST_ANNOTATIONS%"=="ON" ( + echo Annotation testing on Windows ... + + @REM Install library stubs. Note some libraries contain their own type hints so they need to be installed. + %PYTHON_CMD% -m pip install fsspec pandas-stubs scipy-stubs types-cffi types-psutil types-requests types-python-dateutil || exit /B 1 + + @REM Install type checkers + %PYTHON_CMD% -m pip install mypy pyright ty || exit /B 1 + + @REM Run type checkers + pushd %PYARROW_DIR% + + mypy || exit /B 1 + pyright || exit /B 1 + ty check || exit /B 1 + popd +) else ( + echo Annotation testing skipped on Windows ... +) diff --git a/ci/scripts/python_test_type_annotations.sh b/ci/scripts/python_test_type_annotations.sh new file mode 100755 index 00000000000..05586b6e1e3 --- /dev/null +++ b/ci/scripts/python_test_type_annotations.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex +pyarrow_dir=${1} + +if [ "${PYARROW_TEST_ANNOTATIONS}" == "ON" ]; then + if [ -n "${ARROW_PYTHON_VENV:-}" ]; then + . "${ARROW_PYTHON_VENV}/bin/activate" + fi + + # Install library stubs. Note some libraries contain their own type hints so they need to be installed. + pip install fsspec pandas-stubs scipy-stubs types-cffi types-psutil types-requests types-python-dateutil + + # Install type checkers + pip install mypy pyright ty + + # Run type checkers + pushd ${pyarrow_dir} + mypy + pyright + ty check; + popd +else + echo "Skipping type annotation tests"; +fi diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 8d113312927..c04e855057d 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -175,6 +175,11 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python +# We first populate stub docstrings and then build the wheel +python setup.py build_ext --inplace +python -m pip install libcst +python ../dev/update_stub_docstrings.py pyarrow-stubs + python setup.py bdist_wheel popd diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 84fcaba42e6..7d41b1b7385 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -34,7 +34,7 @@ def validate_wheel(path): ] assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}" print(f"The wheel: {wheels[0]} seems valid.") - + # TODO(GH-32609): Validate some docstrings were generated and added. def main(): parser = argparse.ArgumentParser() diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index b4b7fed99fd..3c5d41967de 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -132,6 +132,11 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python +@REM We first populate stub docstrings and then build the wheel +%PYTHON_CMD% setup.py build_ext --inplace +%PYTHON_CMD% -m pip install libcst +%PYTHON_CMD% ..\dev\update_stub_docstrings.py pyarrow-stubs + @REM Build wheel %PYTHON_CMD% setup.py bdist_wheel || exit /B 1 diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index a3fbeb3c0b3..9ff871d4d18 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -167,6 +167,11 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python +# We first populate stub docstrings and then build the wheel +python setup.py build_ext --inplace +python -m pip install libcst +python ../dev/update_stub_docstrings.py pyarrow-stubs + python setup.py bdist_wheel echo "=== Strip symbols from wheel ===" diff --git a/compose.yaml b/compose.yaml index 84481e1af76..1d368d4df08 100644 --- a/compose.yaml +++ b/compose.yaml @@ -919,12 +919,14 @@ services: environment: <<: [*common, *ccache, *sccache] PYTEST_ARGS: # inherit + PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: &python-conda-command [" /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-emscripten: # Usage: @@ -1001,6 +1003,7 @@ services: ARROW_S3: "OFF" ARROW_SUBSTRAIT: "OFF" ARROW_WITH_OPENTELEMETRY: "OFF" + PYARROW_TEST_ANNOTATIONS: "ON" SETUPTOOLS_SCM_PRETEND_VERSION: volumes: *ubuntu-volumes deploy: *cuda-deploy @@ -1008,7 +1011,8 @@ services: /bin/bash -c " /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow" + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python" debian-python: # Usage: @@ -1500,6 +1504,7 @@ services: python: ${PYTHON} shm_size: *shm-size environment: + PYARROW_TEST_ANNOTATIONS: "ON" <<: [*common, *ccache, *sccache] PARQUET_REQUIRE_ENCRYPTION: # inherit HYPOTHESIS_PROFILE: # inherit @@ -1510,7 +1515,8 @@ services: /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && mamba uninstall -y numpy && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-docs: # Usage: @@ -1530,13 +1536,15 @@ services: BUILD_DOCS_CPP: "ON" BUILD_DOCS_PYTHON: "ON" PYTEST_ARGS: "--doctest-modules --doctest-cython" + PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: ["/arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && pip install -e /arrow/dev/archery[numpydoc] && archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-dask: # Possible $DASK parameters: diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py new file mode 100644 index 00000000000..1ba69fa27f5 --- /dev/null +++ b/dev/update_stub_docstrings.py @@ -0,0 +1,292 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Utility to extract docstrings from pyarrow and update +# docstrings in stubfiles. +# +# Usage +# ===== +# +# python ../dev/update_stub_docstrings.py pyarrow-stubs + + +import argparse +import importlib +import inspect +import sys +from pathlib import Path +from textwrap import indent + +import libcst +from libcst import matchers as m + +# Add current directory to path to find locally built pyarrow +sys.path.insert(0, ".") + + +def _resolve_object(module, path): + """ + Resolve an object by dotted path from a base module. + + Parameters + ---------- + module : module + The base module (e.g., pyarrow) + path : str + Dotted path like "lib.Array" or "lib.concat_arrays" + + Returns + ------- + tuple + (obj, parent, obj_name) or (None, None, None) if not found + """ + if not path: + return module, None, module.__name__ + + parts = path.split(".") + parent = None + obj = module + + for part in parts: + parent = obj + try: + obj = getattr(obj, part) + except AttributeError: + # Fallback: try vars() for special methods like __init__ + # that may not be directly accessible via getattr + try: + obj = vars(parent).get(part) + if obj is not None: + continue + except TypeError: + pass + return None, None, None + + # Get the object's simple name + obj_name = getattr(obj, "__name__", parts[-1]) + return obj, parent, obj_name + + +def _get_docstring(name, module, indentation): + """ + Extract and format docstring for a symbol. + + Parameters + ---------- + name : str + Dotted name like "lib.Array" or "lib.concat_arrays" + module : module + The pyarrow module + indentation : int + Number of indentation levels (4 spaces each) + + Returns + ------- + str or None + Formatted docstring ready for insertion, or None if not found + """ + obj, parent, obj_name = _resolve_object(module, name) + + if obj is None: + print(f"{name} not found in {module.__name__}, it's probably ok.") + return None + + # Get docstring using inspect.getdoc for cleaner formatting + docstring = inspect.getdoc(obj) + if not docstring: + return None + + # Get parent name for signature detection + parent_name = getattr(parent, "__name__", None) if parent else None + + # Remove signature if present in docstring + # Cython/pybind11 often include signatures like "func_name(...)\n\n..." + if docstring.startswith(obj_name) or ( + parent_name is not None and docstring.startswith(f"{parent_name}.{obj_name}") + ): + docstring = "\n".join(docstring.splitlines()[2:]) + + # Skip empty docstrings + if not docstring.strip(): + return None + + # Format as docstring with proper indentation + indentation_prefix = indentation * " " + docstring = indent(docstring + '\n"""', indentation_prefix) + docstring = '"""\n' + docstring + + return docstring + + +class DocstringInserter(libcst.CSTTransformer): + def __init__(self, module, namespace): + self.module = module + self.base_namespace = namespace + self.stack = [] + self.indentation = 0 + + # Insert module level docstring if _clone_signature is used + def leave_Module(self, original_node, updated_node): + new_body = [] + clone_matcher = m.SimpleStatementLine( + body=[m.Assign( + value=m.Call(func=m.Name(value="_clone_signature")) + ), m.ZeroOrMore()] + ) + for statement in updated_node.body: + new_body.append(statement) + if m.matches(statement, clone_matcher): + name = statement.body[0].targets[0].target.value + if self.base_namespace: + name = f"{self.base_namespace}.{name}" + docstring = _get_docstring(name, self.module, 0) + if docstring is not None: + new_expr = libcst.Expr(value=libcst.SimpleString(docstring)) + new_line = libcst.SimpleStatementLine(body=[new_expr]) + new_body.append(new_line) + + return updated_node.with_changes(body=new_body) + + def visit_ClassDef(self, node): + self.stack.append(node.name.value) + self.indentation += 1 + + def leave_ClassDef(self, original_node, updated_node): + name = ".".join(self.stack) + if self.base_namespace: + name = self.base_namespace + "." + name + + class_matcher_1 = m.ClassDef( + name=m.Name(), + body=m.IndentedBlock( + body=[m.SimpleStatementLine( + body=[m.Expr(m.Ellipsis()), m.ZeroOrMore()] + ), m.ZeroOrMore()] + ) + ) + class_matcher_2 = m.ClassDef( + name=m.Name(), + body=m.IndentedBlock( + body=[m.FunctionDef(), m.ZeroOrMore()] + ) + ) + + if m.matches(updated_node, class_matcher_1): + docstring = _get_docstring(name, self.module, self.indentation) + if docstring is not None: + new_node = libcst.SimpleString(value=docstring) + updated_node = updated_node.deep_replace( + updated_node.body.body[0].body[0].value, new_node) + + if m.matches(updated_node, class_matcher_2): + docstring = _get_docstring(name, self.module, self.indentation) + if docstring is not None: + new_docstring = libcst.SimpleString(value=docstring) + new_docstring_stmt = libcst.SimpleStatementLine( + body=[libcst.Expr(value=new_docstring)] + ) + new_body = [new_docstring_stmt] + list(updated_node.body.body) + updated_node = updated_node.with_changes( + body=updated_node.body.with_changes(body=new_body) + ) + + self.stack.pop() + self.indentation -= 1 + return updated_node + + def visit_FunctionDef(self, node): + self.stack.append(node.name.value) + self.indentation += 1 + + def leave_FunctionDef(self, original_node, updated_node): + name = ".".join(self.stack) + if self.base_namespace: + name = self.base_namespace + "." + name + + function_matcher = m.FunctionDef( + name=m.Name(), + body=m.SimpleStatementSuite( + body=[m.Expr( + m.Ellipsis() + )])) + if m.matches(original_node, function_matcher): + docstring = _get_docstring(name, self.module, self.indentation) + if docstring is not None: + new_docstring = libcst.SimpleString(value=docstring) + new_docstring_stmt = libcst.SimpleStatementLine( + body=[libcst.Expr(value=new_docstring)] + ) + new_body = libcst.IndentedBlock(body=[new_docstring_stmt]) + updated_node = updated_node.with_changes(body=new_body) + + self.stack.pop() + self.indentation -= 1 + return updated_node + + +def add_docs_to_stub_files(pyarrow_folder): + """ + Update stub files with docstrings extracted from pyarrow runtime. + + Parameters + ---------- + pyarrow_folder : Path + Path to the pyarrow-stubs folder + """ + print("Updating docstrings of stub files in:", pyarrow_folder) + + # Load pyarrow using importlib + pyarrow_module = importlib.import_module("pyarrow") + + lib_modules = ["array", "builder", "compat", "config", "device", "error", "io", + "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor", + "_types"] + + for stub_file in pyarrow_folder.rglob('*.pyi'): + if stub_file.name == "_stubs_typing.pyi": + continue + module = stub_file.with_suffix('').name + print(f"[{stub_file} {module}]") + + with open(stub_file, 'r') as f: + tree = libcst.parse_module(f.read()) + + if module in lib_modules: + module = "lib" + elif stub_file.parent.name in ["parquet", "interchange"]: + module = f"{stub_file.parent.name}.{module}" + elif module == "__init__": + module = "" + + modified_tree = tree.visit(DocstringInserter(pyarrow_module, module)) + with open(stub_file, "w") as f: + f.write(modified_tree.code) + print("\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Extract docstrings from pyarrow and update stub files." + ) + parser.add_argument( + "pyarrow_folder", + type=Path, + help="Path to the pyarrow-stubs folder" + ) + args = parser.parse_args() + + add_docs_to_stub_files(args.pyarrow_folder.resolve()) diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index d03b2439b10..596715b9217 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -101,6 +101,74 @@ The test groups currently include: * ``s3``: Tests for Amazon S3 * ``tensorflow``: Tests that involve TensorFlow +Type Checking +============= + +PyArrow provides type stubs (``*.pyi`` files) for static type checking. These +stubs are located in the ``pyarrow-stubs/`` directory and are automatically +included in the distributed wheel packages. + +Running Type Checkers +--------------------- + +We support multiple type checkers. Their configurations are in +``pyproject.toml``. + +**mypy** + +To run mypy on the PyArrow codebase: + +.. code-block:: + + $ cd arrow/python + $ mypy + +The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``. + +**pyright** + +To run pyright: + +.. code-block:: + + $ cd arrow/python + $ pyright + +The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``. + +**ty** + +To run ty (note: currently only partially configured): + +.. code-block:: + + $ cd arrow/python + $ ty check + +Maintaining Type Stubs +----------------------- + +Type stubs for PyArrow are maintained in the ``pyarrow-stubs/`` +directory. These stubs mirror the structure of the main ``pyarrow/`` package. + +When adding or modifying public APIs: + +1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/`` + to reflect the new or changed function/class signatures. + +2. **Include type annotations** where possible. For Cython modules or + dynamically generated APIs such as compute kernels add the corresponding + stub in ``pyarrow-stubs/``. + +3. **Run type checkers** to ensure the stubs are correct and complete. + +The stub files are automatically copied into the built wheel during the build +process and will be included when users install PyArrow, enabling type checking +in downstream projects and for users' IDEs. + +Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type +checkers that PyArrow supports type checking according to :pep:`561`. + Doctest ======= diff --git a/python/MANIFEST.in b/python/MANIFEST.in index ed7012e4b70..2840ba74128 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -4,6 +4,7 @@ include ../NOTICE.txt global-include CMakeLists.txt graft pyarrow +graft pyarrow-stubs graft cmake_modules global-exclude *.so diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi new file mode 100644 index 00000000000..2a68a513099 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Type stubs for PyArrow. + +This is a placeholder stub file. +Complete type annotations will be added in subsequent PRs. +""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow/py.typed b/python/pyarrow/py.typed new file mode 100644 index 00000000000..13a83393a91 --- /dev/null +++ b/python/pyarrow/py.typed @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/pyproject.toml b/python/pyproject.toml index 0a730fd4f78..c3ce61c6c31 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -84,11 +84,11 @@ zip-safe=false include-package-data=true [tool.setuptools.packages.find] -include = ["pyarrow"] +include = ["pyarrow", "pyarrow.*"] namespaces = false [tool.setuptools.package-data] -pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"] +pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd", "py.typed"] [tool.setuptools_scm] root = '..' @@ -96,3 +96,39 @@ version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' fallback_version = '23.0.0a0' + +# TODO: Enable type checking once stubs are merged +[tool.mypy] +files = ["pyarrow-stubs"] +mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs" +exclude = [ + "^pyarrow/", + "^benchmarks/", + "^examples/", + "^scripts/", +] + +# TODO: Enable type checking once stubs are merged +[tool.pyright] +pythonPlatform = "All" +pythonVersion = "3.10" +include = ["pyarrow-stubs"] +exclude = [ + "pyarrow", + "benchmarks", + "examples", + "scripts", + "build", +] +stubPath = "pyarrow-stubs" +typeCheckingMode = "basic" + +# TODO: Enable type checking once stubs are merged +[tool.ty.src] +include = ["pyarrow-stubs"] +exclude = [ + "pyarrow", + "benchmarks", + "examples", + "scripts", +] diff --git a/python/setup.py b/python/setup.py index a27bd3baefd..a25d2d76b36 100755 --- a/python/setup.py +++ b/python/setup.py @@ -121,8 +121,35 @@ def build_extensions(self): def run(self): self._run_cmake() + self._copy_stubs() _build_ext.run(self) + def _copy_stubs(self): + """Copy .pyi stub files from pyarrow-stubs to the build directory.""" + build_cmd = self.get_finalized_command('build') + build_lib = os.path.abspath(build_cmd.build_lib) + + stubs_src = pjoin(setup_dir, 'pyarrow-stubs', 'pyarrow') + stubs_dest = pjoin(build_lib, 'pyarrow') + + if os.path.exists(stubs_src): + print(f"-- Copying stub files from {stubs_src} to {stubs_dest}") + for root, dirs, files in os.walk(stubs_src): + # Calculate relative path from stubs_src + rel_dir = os.path.relpath(root, stubs_src) + dest_dir = pjoin(stubs_dest, rel_dir) if rel_dir != '.' else stubs_dest + + # Create destination directory if needed + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + + # Copy .pyi files + for file in files: + if file.endswith('.pyi'): + src_file = pjoin(root, file) + dest_file = pjoin(dest_dir, file) + shutil.copy2(src_file, dest_file) + # adapted from cmake_build_ext in dynd-python # github.com/libdynd/dynd-python