diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4c4f04188d..9df3085175f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -360,8 +360,6 @@ repos: ?^dev/release/post-09-python\.sh$| ?^dev/release/setup-rhel-rebuilds\.sh$| ?^dev/release/utils-generate-checksum\.sh$| - ?^python/asv-install\.sh$| - ?^python/asv-uninstall\.sh$| ?^swift/gen-protobuffers\.sh$| ) - repo: https://github.com/scop/pre-commit-shfmt diff --git a/ci/scripts/python_benchmark.sh b/ci/scripts/python_benchmark.sh deleted file mode 100755 index f2f320370bc..00000000000 --- a/ci/scripts/python_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Check the ASV benchmarking setup. -# Unfortunately this won't ensure that all benchmarks succeed -# (see https://github.com/airspeed-velocity/asv/issues/449) -source deactivate -conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION -conda activate pyarrow_asv -pip install -q git+https://github.com/pitrou/asv.git@customize_commands - -export PYARROW_WITH_PARQUET=1 -export PYARROW_WITH_ORC=0 -export PYARROW_WITH_GANDIVA=0 - -pushd $ARROW_PYTHON_DIR -# Workaround for https://github.com/airspeed-velocity/asv/issues/631 -DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) -git fetch --depth=100 origin $DEFAULT_BRANCH:$DEFAULT_BRANCH -# Generate machine information (mandatory) -asv machine --yes -# Run benchmarks on the changeset being tested -asv run --no-pull --show-stderr --quick HEAD^! -popd # $ARROW_PYTHON_DIR diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 50f5d56b8d3..c78e0ade265 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -186,4 +186,4 @@ Similarly, use lldb when debugging on macOS. Benchmarking ============ -For running the benchmarks, see :ref:`python-benchmarks`. +For running the benchmarks, see :ref:`benchmarks`. diff --git a/docs/source/python/benchmarks.rst b/docs/source/python/benchmarks.rst deleted file mode 100644 index 68fc03c7bcf..00000000000 --- a/docs/source/python/benchmarks.rst +++ /dev/null @@ -1,55 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - -.. _python-benchmarks: - -Benchmarks -========== - -The ``pyarrow`` package comes with a suite of benchmarks meant to -run with `ASV`_. You'll need to install the ``asv`` package first -(``pip install asv`` or ``conda install -c conda-forge asv``). - -Running the benchmarks ----------------------- - -To run the benchmarks for a locally-built Arrow, run ``asv run --python=same``. - -We use conda environments as part of running the benchmarks. To use the ``asv`` -setup, you must set the ``$CONDA_HOME`` environment variable to point to the -root of your conda installation. - -Running for arbitrary Git revisions ------------------------------------ - -ASV allows to store results and generate graphs of the benchmarks over -the project's evolution. You need to have the latest development version of ASV: - -.. code:: - - pip install git+https://github.com/airspeed-velocity/asv - -Now you should be ready to run ``asv run`` or whatever other command -suits your needs. Note that this can be quite long, as each Arrow needs -to be rebuilt for each Git revision you're running the benchmarks for. - -Compatibility -------------- - -We only expect the benchmarking setup to work on a Unix-like system with bash. - -.. _asv: https://asv.readthedocs.org/ diff --git a/python/.gitignore b/python/.gitignore index dec4ffc1c9b..ce97ba4af62 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -37,8 +37,6 @@ htmlcov # Cache .cache -# benchmark working dir -.asv pyarrow/_table_api.h # manylinux temporary files diff --git a/python/MANIFEST.in b/python/MANIFEST.in index ed7012e4b70..af5733276f1 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -12,4 +12,3 @@ global-exclude *~ global-exclude \#* global-exclude .git* global-exclude .DS_Store -prune .asv diff --git a/python/asv-build.sh b/python/asv-build.sh deleted file mode 100755 index 2de4a2453b6..00000000000 --- a/python/asv-build.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -# ASV doesn't activate its conda environment for us -if [ -z "$ASV_ENV_DIR" ]; then exit 1; fi - -if [ -z "$CONDA_HOME" ]; then - echo "Please set \$CONDA_HOME to point to your root conda installation" - exit 1; -fi - -eval "$($CONDA_HOME/bin/conda shell.bash hook)" - -conda activate $ASV_ENV_DIR -echo "== Conda Prefix for benchmarks: " $CONDA_PREFIX " ==" - -# Build Arrow C++ libraries -export ARROW_HOME=$CONDA_PREFIX -export PARQUET_HOME=$CONDA_PREFIX -export ORC_HOME=$CONDA_PREFIX -export PROTOBUF_HOME=$CONDA_PREFIX -export BOOST_ROOT=$CONDA_PREFIX - -pushd ../cpp -mkdir -p build -pushd build - -cmake -GNinja \ - -DCMAKE_BUILD_TYPE=release \ - -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ - -DARROW_CXXFLAGS=$CXXFLAGS \ - -DARROW_USE_GLOG=off \ - -DARROW_FLIGHT=on \ - -DARROW_GCS=on \ - -DARROW_ORC=on \ - -DARROW_PARQUET=on \ - -DARROW_PYTHON=on \ - -DARROW_S3=on \ - -DARROW_BUILD_TESTS=off \ - .. -cmake --build . --target install - -popd -popd - -# Build pyarrow wrappers -export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1 -export PYARROW_BUILD_TYPE=release -export PYARROW_PARALLEL=8 -export PYARROW_WITH_FLIGHT=1 -export PYARROW_WITH_GCS=1 -export PYARROW_WITH_ORC=1 -export PYARROW_WITH_PARQUET=1 - -python setup.py clean -find pyarrow -name "*.so" -delete -python setup.py develop diff --git a/python/asv-install.sh b/python/asv-install.sh deleted file mode 100755 index beef730b7b8..00000000000 --- a/python/asv-install.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Deliberately empty, but exists so that we don't have to change -# asv.conf.json if we need specific commands here. diff --git a/python/asv-uninstall.sh b/python/asv-uninstall.sh deleted file mode 100755 index beef730b7b8..00000000000 --- a/python/asv-uninstall.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Deliberately empty, but exists so that we don't have to change -# asv.conf.json if we need specific commands here. diff --git a/python/asv.conf.json b/python/asv.conf.json deleted file mode 100644 index b975936c99a..00000000000 --- a/python/asv.conf.json +++ /dev/null @@ -1,187 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -{ - // The version of the config file format. Do not change, unless - // you know what you are doing. - "version": 1, - - // The name of the project being benchmarked - "project": "pyarrow", - - // The project's homepage - "project_url": "https://arrow.apache.org/", - - // The URL or local path of the source code repository for the - // project being benchmarked - "repo": "..", - - // The Python project's subdirectory in your repo. If missing or - // the empty string, the project is assumed to be located at the root - // of the repository. - "repo_subdir": "python", - - // Custom build commands for Arrow. - "build_command": ["/bin/bash {build_dir}/asv-build.sh"], - "install_command": ["/bin/bash {build_dir}/asv-install.sh"], - "uninstall_command": ["/bin/bash {build_dir}/asv-uninstall.sh"], - - // List of branches to benchmark. If not provided, defaults to "master" - // (for git) or "default" (for mercurial). - // "branches": ["master"], // for git - // "branches": ["default"], // for mercurial - - // The DVCS being used. If not set, it will be automatically - // determined from "repo" by looking at the protocol in the URL - // (if remote), or by looking for special directories, such as - // ".git" (if local). - "dvcs": "git", - - // The tool to use to create environments. May be "conda", - // "virtualenv" or other value depending on the plugins in use. - // If missing or the empty string, the tool will be automatically - // determined by looking for tools on the PATH environment - // variable. - "environment_type": "conda", - // Avoid conda-forge to avoid C++ ABI issues - "conda_channels": ["defaults"], - - // the base URL to show a commit for the project. - "show_commit_url": "https://github.com/apache/arrow/commit/", - - // The Pythons you'd like to test against. If not provided, defaults - // to the current version of Python used to run `asv`. - "pythons": ["3.9"], - - // The matrix of dependencies to test. Each key is the name of a - // package (in PyPI) and the values are version numbers. An empty - // list or empty string indicates to just test against the default - // (latest) version. null indicates that the package is to not be - // installed. If the package to be tested is only available from - // PyPi, and the 'environment_type' is conda, then you can preface - // the package name by 'pip+', and the package will be installed via - // pip (with all the conda available packages installed first, - // followed by the pip installed packages). - // - // "matrix": { - // "numpy": ["1.6", "1.7"], - // "six": ["", null], // test with and without six installed - // "pip+emcee": [""], // emcee is only available for install with pip. - // }, - "matrix": { - // Use older boost since it works on more editions of the project - "aws-sdk-cpp": [], - "boost-cpp": ["1.68.0"], - "brotli": [], - "cmake": [], - "cython": [], - "flatbuffers": [], - "libgrpc": [], - "libprotobuf": [], - "lz4-c": [], - "ninja": [], - "numpy": [], - "pandas": ["0.25.1"], - "pip+setuptools_scm": [], - "rapidjson": [], - "re2": [], - "snappy": [], - "thrift-cpp": [], - "zstd": [], - }, - - // Combinations of libraries/python versions can be excluded/included - // from the set to test. Each entry is a dictionary containing additional - // key-value pairs to include/exclude. - // - // An exclude entry excludes entries where all values match. The - // values are regexps that should match the whole string. - // - // An include entry adds an environment. Only the packages listed - // are installed. The 'python' key is required. The exclude rules - // do not apply to includes. - // - // In addition to package names, the following keys are available: - // - // - python - // Python version, as in the *pythons* variable above. - // - environment_type - // Environment type, as above. - // - sys_platform - // Platform, as in sys.platform. Possible values for the common - // cases: 'linux2', 'win32', 'cygwin', 'darwin'. - // - // "exclude": [ - // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows - // {"environment_type": "conda", "six": null}, // don't run without six on conda - // ], - // - // "include": [ - // // additional env for python2.7 - // {"python": "2.7", "numpy": "1.8"}, - // // additional env if run on windows+conda - // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, - // ], - - // The directory (relative to the current directory) that benchmarks are - // stored in. If not provided, defaults to "benchmarks" - "benchmark_dir": "benchmarks", - - // The directory (relative to the current directory) to cache the Python - // environments in. If not provided, defaults to "env" - "env_dir": ".asv/env", - - // The directory (relative to the current directory) that raw benchmark - // results are stored in. If not provided, defaults to "results". - "results_dir": ".asv/results", - - // The directory (relative to the current directory) that the html tree - // should be written to. If not provided, defaults to "html". - "html_dir": "build/benchmarks/html", - - // The number of characters to retain in the commit hashes. - // "hash_length": 8, - - // `asv` will cache wheels of the recent builds in each - // environment, making them faster to install next time. This is - // number of builds to keep, per environment. - // "wheel_cache_size": 0, - - // The commits after which the regression search in `asv publish` - // should start looking for regressions. Dictionary whose keys are - // regexps matching to benchmark names, and values corresponding to - // the commit (exclusive) after which to start looking for - // regressions. The default is to start from the first commit - // with results. If the commit is `null`, regression detection is - // skipped for the matching benchmark. - // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } - - // The thresholds for relative change in results, after which `asv - // publish` starts reporting regressions. Dictionary of the same - // form as in ``regressions_first_commits``, with values - // indicating the thresholds. If multiple entries match, the - // maximum is taken. If no entry matches, the default is 5%. - // - // "regressions_thresholds": { - // "some_benchmark": 0.01, // Threshold of 1% - // "another_benchmark": 0.5, // Threshold of 50% - // } -}