diff --git a/.env b/.env index bdb74d89e1c6..957b76664bba 100644 --- a/.env +++ b/.env @@ -72,7 +72,7 @@ PANDAS=latest PYTHON=3.9 PYTHON_IMAGE_TAG=3.9 PYTHON_ABI_TAG=cp39 -R=4.4 +R=4.5 SPARK=master TURBODBC=latest diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 0ff55a381919..1da536ed2a5c 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -210,7 +210,6 @@ jobs: ARROW_WITH_SNAPPY: ON ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON - GTest_SOURCE: BUNDLED steps: - name: CPU Info run: | @@ -256,6 +255,19 @@ jobs: restore-keys: cpp-ccache-macos-${{ matrix.macos-version }}- - name: Build run: | + if [ "${{ matrix.macos-version }}" = "13" ]; then + # This is a workaround. + # + # Homebrew uses /usr/local as prefix. So packages + # installed by Homebrew also use /usr/local/include. We + # want to include headers for packages installed by + # Homebrew as system headers to ignore warnings in them. + # But "-isystem /usr/local/include" isn't used by CMake + # because /usr/local/include is marked as the default + # include path. So we disable -Werror to avoid build error + # by warnings from packages installed by Homebrew. + export BUILD_WARNING_LEVEL=PRODUCTION + fi ci/scripts/cpp_build.sh $(pwd) $(pwd)/build - name: Test shell: bash @@ -406,7 +418,6 @@ jobs: # We can't use unity build because we don't have enough memory on # GitHub Actions. # CMAKE_UNITY_BUILD: ON - GTest_SOURCE: BUNDLED steps: - name: Disable Crash Dialogs run: | diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 239356d4d5a9..c550a7668b1b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -195,6 +195,7 @@ jobs: brew uninstall pkg-config || : brew uninstall pkg-config@0.29.2 || : brew bundle --file=cpp/Brewfile + python -m pip install \ -r python/requirements-build.txt \ -r python/requirements-test.txt @@ -218,6 +219,20 @@ jobs: - name: Build shell: bash run: | + if [ "${{ matrix.macos-version }}" = "13" ]; then + # This is a workaround. + # + # Homebrew uses /usr/local as prefix. So packages + # installed by Homebrew also use /usr/local/include. We + # want to include headers for packages installed by + # Homebrew as system headers to ignore warnings in them. + # But "-isystem /usr/local/include" isn't used by CMake + # because /usr/local/include is marked as the default + # include path. So we disable -Werror to avoid build error + # by warnings from packages installed by Homebrew. + export BUILD_WARNING_LEVEL=PRODUCTION + fi + python -m pip install wheel ci/scripts/cpp_build.sh $(pwd) $(pwd)/build ci/scripts/python_build.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index ed26e72cf15a..be90eeee39bd 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -79,7 +79,7 @@ jobs: UBUNTU: ${{ matrix.ubuntu }} steps: - name: Checkout Arrow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 submodules: recursive @@ -148,7 +148,7 @@ jobs: ARROW_WITH_ZSTD: ON steps: - name: Checkout Arrow - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 submodules: recursive @@ -248,7 +248,7 @@ jobs: /d 1 ` /f - name: Checkout Arrow - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 submodules: recursive @@ -366,7 +366,7 @@ jobs: /d 1 ` /f - name: Checkout Arrow - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 submodules: recursive @@ -395,6 +395,20 @@ jobs: env: # We can invalidate the current cache by updating this. CACHE_VERSION: "2024-05-09" + - name: Checkout vcpkg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + path: vcpkg + repository: microsoft/vcpkg + - name: Bootstrap vcpkg + run: | + vcpkg\bootstrap-vcpkg.bat + $VCPKG_ROOT = $(Resolve-Path -LiteralPath "vcpkg").ToString() + Write-Output ${VCPKG_ROOT} | ` + Out-File -FilePath ${Env:GITHUB_PATH} -Encoding utf8 -Append + Write-Output "VCPKG_ROOT=${VCPKG_ROOT}" | ` + Out-File -FilePath ${Env:GITHUB_ENV} -Encoding utf8 -Append - name: Setup NuGet credentials for vcpkg caching shell: bash run: | @@ -411,10 +425,14 @@ jobs: - name: Build C++ shell: cmd run: | + set VCPKG_ROOT_KEEP=%VCPKG_ROOT% call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + set VCPKG_ROOT=%VCPKG_ROOT_KEEP% bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" - name: Build GLib shell: cmd run: | + set VCPKG_ROOT_KEEP=%VCPKG_ROOT% call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + set VCPKG_ROOT=%VCPKG_ROOT_KEEP% bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" diff --git a/.github/workflows/verify_rc.yml b/.github/workflows/verify_rc.yml index fe46ae6f230b..dceb04a49231 100644 --- a/.github/workflows/verify_rc.yml +++ b/.github/workflows/verify_rc.yml @@ -21,6 +21,16 @@ on: push: tags: - "*-rc*" + pull_request: + paths: + - ".github/workflows/verify_rc.yml" + workflow_dispatch: + inputs: + rc_tag: + description: "Tag of the rc to verify" + type: string + required: true + permissions: contents: read @@ -28,6 +38,7 @@ permissions: env: TEST_DEFAULT: "0" VERBOSE: "1" + RC_TAG: "${{ inputs.rc_tag || github.event_name == 'pull_request' && 'apache-arrow-20.0.0-rc0' || github.ref_name }}" jobs: apt: @@ -46,9 +57,9 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Run run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} version=${package_id#apache-arrow-} - rc=${GITHUB_REF_NAME#*-rc} + rc=${RC_TAG#*-rc} dev/release/verify-release-candidate.sh ${version} ${rc} binary: @@ -61,9 +72,9 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Run run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} version=${package_id#apache-arrow-} - rc=${GITHUB_REF_NAME#*-rc} + rc=${RC_TAG#*-rc} dev/release/verify-release-candidate.sh ${version} ${rc} wheels-linux: @@ -89,9 +100,9 @@ jobs: run: python -m pip install -e dev/archery[docker] - name: Prepare run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV} - echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV} + echo "RC=${RC_TAG#*-rc}" >> ${GITHUB_ENV} distro=${{ matrix.distro }} if [ "${distro}" = "conda" ]; then echo "SERVICE=${distro}-verify-rc" >> ${GITHUB_ENV} @@ -102,6 +113,8 @@ jobs: echo "$(echo ${os} | tr a-z A-Z)=${version}" >> ${GITHUB_ENV} fi - name: Run + env: + GH_TOKEN: ${{ github.token }} run: | archery docker run \ -e TEST_DEFAULT="${TEST_DEFAULT}" \ @@ -109,6 +122,7 @@ jobs: -e VERBOSE="${VERBOSE}" \ -e VERIFY_RC="${RC}" \ -e VERIFY_VERSION="${VERSION}" \ + -e GH_TOKEN="$GH_TOKEN" \ ${SERVICE} wheels-macos: @@ -126,10 +140,12 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Run + env: + GH_TOKEN: ${{ github.token }} run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} version=${package_id#apache-arrow-} - rc=${GITHUB_REF_NAME#*-rc} + rc=${RC_TAG#*-rc} dev/release/verify-release-candidate.sh ${version} ${rc} wheels-windows: @@ -141,12 +157,14 @@ jobs: TEST_WHEELS: "1" steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: recursive - name: Prepare shell: bash run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV} - echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV} + echo "RC=${RC_TAG#*-rc}" >> ${GITHUB_ENV} - uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1 - name: Install System Dependencies run: | @@ -156,6 +174,8 @@ jobs: shell: bash run: ci/scripts/download_tz_database.sh - name: Run verification + env: + GH_TOKEN: ${{ github.token }} shell: cmd run: | dev/release/verify-release-candidate-wheels.bat %VERSION% %RC% @@ -176,7 +196,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Run run: | - package_id=${GITHUB_REF_NAME%-rc*} + package_id=${RC_TAG%-rc*} version=${package_id#apache-arrow-} - rc=${GITHUB_REF_NAME#*-rc} + rc=${RC_TAG#*-rc} dev/release/verify-release-candidate.sh ${version} ${rc} diff --git a/CHANGELOG.md b/CHANGELOG.md index 6101f5d3cac2..1c686c856851 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,269 @@ +# Apache Arrow 20.0.0 (2025-04-01 07:00:00+00:00) + +## Bug Fixes + +* [GH-30302](https://github.com/apache/arrow/issues/30302) - [C++][Parquet] Preserve the bitwidth of integer dictionary indices on round-trip to Parquet (#45685) +* [GH-31992](https://github.com/apache/arrow/issues/31992) - [C++][Parquet] Handling the special case when DataPageV2 values buffer is empty (#45252) +* [GH-36628](https://github.com/apache/arrow/issues/36628) - [Python][Parquet] Fail when instantiating internal Parquet metadata classes (#45549) +* [GH-37630](https://github.com/apache/arrow/issues/37630) - [C++][Python][Dataset] Allow disabling fragment metadata caching (#45330) +* [GH-39023](https://github.com/apache/arrow/issues/39023) - [C++][CMake] Add missing launcher path conversion for ExternalPackage (#45349) +* [GH-41166](https://github.com/apache/arrow/issues/41166) - [CI][Packaging] Remove unmaintained conda-recipes (#45944) +* [GH-43057](https://github.com/apache/arrow/issues/43057) - [C++] Thread-safe AesEncryptor / AesDecryptor (#44990) +* [GH-44188](https://github.com/apache/arrow/issues/44188) - [Python] Fix pandas roundtrip with bytes column names (#44171) +* [GH-44363](https://github.com/apache/arrow/issues/44363) - [C#] Handle Flight data with zero batches (#45315) +* [GH-45048](https://github.com/apache/arrow/issues/45048) - [C++][Parquet] Deprecate unused `chunk_size` parameter in `parquet::arrow::FileWriter::NewRowGroup()` (#45088) +* [GH-45129](https://github.com/apache/arrow/issues/45129) - [Python][C++] Fix usage of deprecated C++ functionality on pyarrow (#45189) +* [GH-45132](https://github.com/apache/arrow/issues/45132) - [C++][Gandiva] Update LLVM to 18.1 (#45114) +* [GH-45155](https://github.com/apache/arrow/issues/45155) - [Python][CI] Fix path for scientific nightly windows wheel upload (#45222) +* [GH-45159](https://github.com/apache/arrow/issues/45159) - [CI][Integration] Remove substrait consumer-testing integration job (#45463) +* [GH-45169](https://github.com/apache/arrow/issues/45169) - [Python] Adapt to modified pytest ignore collect hook api (#45170) +* [GH-45185](https://github.com/apache/arrow/issues/45185) - [C++][Parquet] Raise an error for invalid repetition levels when delimiting records (#45186) +* [GH-45254](https://github.com/apache/arrow/issues/45254) - [C++][Acero] Fix the row offset truncation in row table merge (#45255) +* [GH-45266](https://github.com/apache/arrow/issues/45266) - [C++][Acero] Fix the running tasks count of Scheduler when get error tasks in multi-threads (#45268) +* [GH-45270](https://github.com/apache/arrow/issues/45270) - [C++][CI] Disable mimalloc on Valgrind builds (#45271) +* [GH-45293](https://github.com/apache/arrow/issues/45293) - [CI] Install patch command to base conda.dockerfile required in case of bundled ORC (#45294) +* [GH-45301](https://github.com/apache/arrow/issues/45301) - [C++] Change PrimitiveArray ctor to protected (#45444) +* [GH-45334](https://github.com/apache/arrow/issues/45334) - [C++][Acero] Fix swiss join overflow issues in row offset calculation for fixed length and null masks (#45336) +* [GH-45347](https://github.com/apache/arrow/issues/45347) - [Packaging][Linux] Use cpp/CMakeLists.txt instead of java/pom.xml to detect version (#45348) +* [GH-45354](https://github.com/apache/arrow/issues/45354) - [GLib] Fix garrow_record_batch_validate() definied location (#45355) +* [GH-45362](https://github.com/apache/arrow/issues/45362) - [C++] Fix identity cast for time and list scalar (#45370) +* [GH-45371](https://github.com/apache/arrow/issues/45371) - [C++] Fix data race in `SimpleRecordBatch::columns` (#45372) +* [GH-45377](https://github.com/apache/arrow/issues/45377) - [CI][R] Ensure install R on ubuntu-24.04 runner for R nightly build jobs (#45464) +* [GH-45378](https://github.com/apache/arrow/issues/45378) - [CI][R] Increase timeout of test-ubuntu-r-sanitizer job (#45379) +* [GH-45380](https://github.com/apache/arrow/issues/45380) - [Python] Expose RankQuantileOptions to Python (#45392) +* [GH-45381](https://github.com/apache/arrow/issues/45381) - [CI][Packaging][Conan] Use the latest supported image (#45387) +* [GH-45390](https://github.com/apache/arrow/issues/45390) - [GLib] Use hyphen-separated words for error tag (#45391) +* [GH-45393](https://github.com/apache/arrow/issues/45393) - [C++][Compute] Fix wrong decoding for 32-bit column in row table (#45473) +* [GH-45396](https://github.com/apache/arrow/issues/45396) - [C++] Use Boost with ARROW_FUZZING (#45397) +* [GH-45423](https://github.com/apache/arrow/issues/45423) - [C++] Don't require Boost library with ARROW_TESTING=ON/ARROW_BUILD_SHARED=OFF (#45424) +* [GH-45436](https://github.com/apache/arrow/issues/45436) - [Docs][Packaging][Linux] Update how to build .deb/.rpm (#45481) +* [GH-45455](https://github.com/apache/arrow/issues/45455) - [GLib] Fix returns positive memory-pool utilization (#45456) +* [GH-45497](https://github.com/apache/arrow/issues/45497) - [C++][CSV] Avoid buffer overflow when a line has too many columns (#45498) +* [GH-45499](https://github.com/apache/arrow/issues/45499) - [CI] Bump actions/cache version on GHA (#45500) +* [GH-45510](https://github.com/apache/arrow/issues/45510) - [CI][C++] Fix LLVM APT repository preparation on Debian (#45511) +* [GH-45512](https://github.com/apache/arrow/issues/45512) - [C++] Clean up undefined symbols in libarrow without IPC (#45513) +* [GH-45514](https://github.com/apache/arrow/issues/45514) - [CI][C++][Docs] Set CUDAToolkit_ROOT explicitly in debian-docs (#45520) +* [GH-45521](https://github.com/apache/arrow/issues/45521) - [CI][Dev][R] Install required cyclocomp package to be used with R lintr (#45524) +* [GH-45530](https://github.com/apache/arrow/issues/45530) - [Python][Packaging] Add pyarrow.libs dir to get_library_dirs (#45766) +* [GH-45536](https://github.com/apache/arrow/issues/45536) - [Dev][R] Update code to match new linters on lintr=3.2.0 (#45556) +* [GH-45537](https://github.com/apache/arrow/issues/45537) - [CI][C++] Add missing includes (iwyu) to file_skyhook.cc (#45538) +* [GH-45541](https://github.com/apache/arrow/issues/45541) - [Doc][C++] Render ASCII art as-is (#45542) +* [GH-45543](https://github.com/apache/arrow/issues/45543) - [Release][C#] Remove NuGet references in script (#45544) +* [GH-45545](https://github.com/apache/arrow/issues/45545) - [C++][Parquet] Add missing includes (#45554) +* [GH-45560](https://github.com/apache/arrow/issues/45560) - [Docs] Fix Statistics schema's "column" examples (#45561) +* [GH-45564](https://github.com/apache/arrow/issues/45564) - [C++][Acero] Add size validation for names and expressions vectors in ProjectNode (#45565) +* [GH-45568](https://github.com/apache/arrow/issues/45568) - [C++][Parquet][CMake] Enable zlib automatically when Thrift is needed (#45569) +* [GH-45578](https://github.com/apache/arrow/issues/45578) - [C++] Use max not min in MakeStatisticsArrayMaxApproximate test (#45579) +* [GH-45582](https://github.com/apache/arrow/issues/45582) - [Python] Preserve decimal32/64/256 metadata in Schema.metadata (#45583) +* [GH-45587](https://github.com/apache/arrow/issues/45587) - [C++][Docs] Fix the statistics schema link in `arrow::RecordBatch::MakeStatisticsArray()`'s docstring (#45588) +* [GH-45614](https://github.com/apache/arrow/issues/45614) - [C++] Use Boost's CMake packages instead of FindBoost.cmake in CMake (#45623) +* [GH-45628](https://github.com/apache/arrow/issues/45628) - [C++] Ensure specifying Boost include directory for bundled Thrift (#45637) +* [GH-45656](https://github.com/apache/arrow/issues/45656) - [C#] Fix failing MacOS builds (#45734) +* [GH-45659](https://github.com/apache/arrow/issues/45659) - [GLib][Ruby] Fix Ruby lint violation(add space after comma) (#45660) +* [GH-45669](https://github.com/apache/arrow/issues/45669) - [C++][Parquet] Add missing `ParquetFileReader::GetReadRanges()` definition (#45684) +* [GH-45693](https://github.com/apache/arrow/issues/45693) - [C++][Gandiva] Fix aes_encrypt/decrypt algorithm selection (#45695) +* [GH-45700](https://github.com/apache/arrow/issues/45700) - [C++][Compute] Added nullptr check in Equals method to handle null impl_ pointers (#45701) +* [GH-45714](https://github.com/apache/arrow/issues/45714) - [CI][R] Don't run tests that use reticulate on CRAN (#46026) +* [GH-45718](https://github.com/apache/arrow/issues/45718) - [R][CI] Fix compilation error on opensuse155 (#45874) +* [GH-45724](https://github.com/apache/arrow/issues/45724) - [Docs] Fix docs image name from ubuntu-docs to debian-docs (#45726) +* [GH-45733](https://github.com/apache/arrow/issues/45733) - [C++][Python] Add biased/unbiased toggle to skew and kurtosis functions (#45762) +* [GH-45739](https://github.com/apache/arrow/issues/45739) - [C++][Python] Fix crash when calling hash_pivot_wider without options (#45740) +* [GH-45758](https://github.com/apache/arrow/issues/45758) - [Python] Add AzureFileSystem documentation (#45759) +* [GH-45782](https://github.com/apache/arrow/issues/45782) - [GLib] Check only the first line for validation error (#45783) +* [GH-45787](https://github.com/apache/arrow/issues/45787) - [Integration][CI] Remove pin for Rust 1.77 on conda integration tests (#45790) +* [GH-45788](https://github.com/apache/arrow/issues/45788) - [C++][Acero] Fix data race in aggregate node (#45789) +* [GH-45850](https://github.com/apache/arrow/issues/45850) - Fix r-devel note about symbols in .a libs (#45870) +* [GH-45862](https://github.com/apache/arrow/issues/45862) - [JS] Fix FixedSizeListBuilder behavior for null slots (#45889) +* [GH-45868](https://github.com/apache/arrow/issues/45868) - [C++][CI] Fix test for ambiguous initialization on C++ 20 (#45871) +* [GH-45879](https://github.com/apache/arrow/issues/45879) - [CI][Release][Ruby] Omit Flight related tests on x86_64 macOS (#45898) +* [GH-45905](https://github.com/apache/arrow/issues/45905) - [C++][Acero] Enlarge the timeout in ConcurrentQueue test to reduce sporadical failures (#45923) +* [GH-45915](https://github.com/apache/arrow/issues/45915) - [JS] Ensure UnionBuilder yields chunks with correct length (#45916) +* [GH-45924](https://github.com/apache/arrow/issues/45924) - [CI] Update chrome_version for emscripten job to latest stable (v134) (#45925) +* [GH-45926](https://github.com/apache/arrow/issues/45926) - [Python] Use pytest.approx for float values on unbiased skew and kurtosis tests (#45929) +* [GH-45930](https://github.com/apache/arrow/issues/45930) - [C++] Don't use ICU C++ API in Azure SDK C++ (#45952) +* [GH-45939](https://github.com/apache/arrow/issues/45939) - [C++][Benchmarking] Fix compilation failures (#45942) +* [GH-45959](https://github.com/apache/arrow/issues/45959) - [C++][CMake] Fix Protobuf dependency in Arrow::arrow_static (#45960) +* [GH-45967](https://github.com/apache/arrow/issues/45967) - [Benchmarking][CI] Benchmarking has stopped working due to failing to build +* [GH-45980](https://github.com/apache/arrow/issues/45980) - [C++] Bump Bundled Snappy version to 1.2.2 (#45981) +* [GH-45994](https://github.com/apache/arrow/issues/45994) - [CI][GLib] Fix vcpkg configuration for Windows job (#46006) +* [GH-45995](https://github.com/apache/arrow/issues/45995) - [Benchmarking][CI] Benchmarking buildkite runs fail to build PyArrow +* [GH-45999](https://github.com/apache/arrow/issues/45999) - [C++][Gandiva] Fix crashes on LLVM 20.1.1 (#46000) +* [GH-46022](https://github.com/apache/arrow/issues/46022) - [C++] Fix build error with g++ 7.5.0 (#46028) +* [GH-46023](https://github.com/apache/arrow/issues/46023) - [CI][MATLAB] libmexclass doesn't work with CMake 4.0.0 (#46033) +* [GH-46041](https://github.com/apache/arrow/issues/46041) - [Python][Packaging] Temporary remove pandas from being installed on free-threaded Windows wheel tests (#46042) +* [GH-46050](https://github.com/apache/arrow/issues/46050) - [R] Add windows to set of paths in Makevars.in (#46055) +* [GH-46067](https://github.com/apache/arrow/issues/46067) - [CI][C++] Remove system Flatbuffers from macOS (#46105) +* [GH-46072](https://github.com/apache/arrow/issues/46072) - [Release] Disable sync in 05-binary-upload.sh (#46074) +* [GH-46075](https://github.com/apache/arrow/issues/46075) - [Release][CI] Fix binary verification (#46076) +* [GH-46077](https://github.com/apache/arrow/issues/46077) - [CI][C++] Disable -Werror on macos-13 (#46106) +* [GH-46081](https://github.com/apache/arrow/issues/46081) - [Release] Don't generate needless `uploaded-files.txt` for Maven repository (#46082) +* [GH-46083](https://github.com/apache/arrow/issues/46083) - [Release][Packages] Use Artifactory for APT/Yum repositories again (#46108) +* [GH-46111](https://github.com/apache/arrow/issues/46111) - [C++][CI] Fix boost 1.88 on MinGW (#46113) +* [GH-46123](https://github.com/apache/arrow/issues/46123) - [C++] Undefined behavior in `compare_internal.cc` and `light_array_internal.cc` (#46124) +* [GH-46134](https://github.com/apache/arrow/issues/46134) - [CI][C++] Explicit conversion of possible `absl::string_view` on protobuf methods to `std::string` (#46136) +* [GH-46159](https://github.com/apache/arrow/issues/46159) - [CI][C++] Stop using possibly missing boost/process/v2.hpp on boost 1.88 and use individual includes (#46160) +* [GH-46167](https://github.com/apache/arrow/issues/46167) - [R][CI] Update Artifacts for R 4.5 in task.yml (#46168) +* [GH-46169](https://github.com/apache/arrow/issues/46169) - [CI][R] Update R version to 4.5 due to 4.4 not being on APT repositories anymore (#46171) +* [GH-46195](https://github.com/apache/arrow/issues/46195) - [Release][C++] verify-rc-source-cpp-macos-amd64 failed to build googlemock + + +## New Features and Improvements + +* [GH-14932](https://github.com/apache/arrow/issues/14932) - [Python] Add python bindings for JSON streaming reader (#45084) +* [GH-18036](https://github.com/apache/arrow/issues/18036) - [Packaging] Build Python wheel for musllinux (#45470) +* [GH-26648](https://github.com/apache/arrow/issues/26648) - [C++] Optimize union equality comparison (#45384) +* [GH-33592](https://github.com/apache/arrow/issues/33592) - [C++] support casting nullable fields to non-nullable if there are no null values (#43782) +* [GH-35289](https://github.com/apache/arrow/issues/35289) - [Python] Support large variable width types in numpy conversion (#36701) +* [GH-36412](https://github.com/apache/arrow/issues/36412) - [Python][CI] Fix deprecation warnings in the pandas nightly build +* [GH-37563](https://github.com/apache/arrow/issues/37563) - [Ruby] Unify tests about basic arrays for `raw_records` and `each_raw_record` (#45861) +* [GH-38694](https://github.com/apache/arrow/issues/38694) - [Release][C#] Release Apache.Arrow.Flight.Sql (#45309) +* [GH-39010](https://github.com/apache/arrow/issues/39010) - [Python] Introduce `maps_as_pydicts` parameter for `to_pylist`, `to_pydict`, `as_py` (#45471) +* [GH-40760](https://github.com/apache/arrow/issues/40760) - [Release] Use repository.apache.org (#45903) +* [GH-41002](https://github.com/apache/arrow/issues/41002) - [Python] Remove pins for pytest-cython and conda-docs pytest (#45240) +* [GH-41764](https://github.com/apache/arrow/issues/41764) - [Parquet][C++] Support future logical types in the Parquet reader (#41765) +* [GH-41816](https://github.com/apache/arrow/issues/41816) - [C++] Add Minimal Meson Build of libarrow (#45441) +* [GH-41985](https://github.com/apache/arrow/issues/41985) - [Python][Docs] Clarify docstring of pyarrow.compute.scalar() (#45668) +* [GH-43118](https://github.com/apache/arrow/issues/43118) - [JS] Add interval for unit MONTH_DAY_NANO (#43117) (#45712) +* [GH-43135](https://github.com/apache/arrow/issues/43135) - [R] Change the binary type mapping to `blob::blob` (#45595) +* [GH-43296](https://github.com/apache/arrow/issues/43296) - [C++][FlightRPC] Remove Flight UCX transport (#43297) +* [GH-43573](https://github.com/apache/arrow/issues/43573) - [C++] Copy bitmap when casting from string-view to offset string and binary types (#44822) +* [GH-43587](https://github.com/apache/arrow/issues/43587) - [Python] Remove no longer used serialize/deserialize PyArrow C++ code (#45743) +* [GH-43876](https://github.com/apache/arrow/issues/43876) - [Swift] Use apache/arrow-go (#45781) +* [GH-44042](https://github.com/apache/arrow/issues/44042) - [C++][Parquet] Limit num-of row-groups when building parquet for encrypted file (# 44043) +* [GH-44393](https://github.com/apache/arrow/issues/44393) - [C++][Compute] Vector selection functions `inverse_permutation` and `scatter` (#44394) +* [GH-44421](https://github.com/apache/arrow/issues/44421) - [Python] Add configuration for building & testing free-threaded wheels on Windows (#44804) +* [GH-44615](https://github.com/apache/arrow/issues/44615) - [C++][Compute] Add extract_regex_span function (#45577) +* [GH-44629](https://github.com/apache/arrow/issues/44629) - [C++][Acero] Use `implicit_ordering` for `asof_join` rather than `require_sequenced_output` (#44616) +* [GH-44757](https://github.com/apache/arrow/issues/44757) - [GLib] Add garrow_array_validate() (#45328) +* [GH-44758](https://github.com/apache/arrow/issues/44758) - [GLib] Add garrow_array_validate_full() (#45342) +* [GH-44759](https://github.com/apache/arrow/issues/44759) - [GLib] Add garrow_record_batch_validate() (#45353) +* [GH-44760](https://github.com/apache/arrow/issues/44760) - [GLib] Add garrow_record_batch_validate_full() (#45386) +* [GH-44761](https://github.com/apache/arrow/issues/44761) - [GLib] Add garrow_table_validate() (#45414) +* [GH-44762](https://github.com/apache/arrow/issues/44762) - [GLib] Add garrow_table_validate_full() (#45468) +* [GH-44790](https://github.com/apache/arrow/issues/44790) - [Python] Remove use_legacy_dataset from code base (#45742) +* [GH-44905](https://github.com/apache/arrow/issues/44905) - [Dev] Remove unused file with only header (#45526) +* [GH-44924](https://github.com/apache/arrow/issues/44924) - [R] Remove usage of cpp11's HAS_UNWIND_PROTECT (#45261) +* [GH-44950](https://github.com/apache/arrow/issues/44950) - [C++] Bump minimum CMake version to 3.25 (#44989) +* [GH-45045](https://github.com/apache/arrow/issues/45045) - [C++][Parquet] Add a benchmark for size_statistics_level (#45085) +* [GH-45156](https://github.com/apache/arrow/issues/45156) - [Python][Packaging] Refactor Python Windows wheel images to use newer base image (#45442) +* [GH-45190](https://github.com/apache/arrow/issues/45190) - [C++][Compute] Add rank_quantile function (#45259) +* [GH-45196](https://github.com/apache/arrow/issues/45196) - [C++][Acero] Small refinement to hash join (#45197) +* [GH-45204](https://github.com/apache/arrow/issues/45204) - [Integration][Archery] Remove skips for nanoarrow IPC compression ZSTD/uncompressible golden files (#45205) +* [GH-45206](https://github.com/apache/arrow/issues/45206) - [C++][CMake] Add sanitizer presets (#45207) +* [GH-45209](https://github.com/apache/arrow/issues/45209) - [C++][CMake] Fix the issue that allocator not disabled for sanitizer cmake presets (#45210) +* [GH-45215](https://github.com/apache/arrow/issues/45215) - [C++][Acero] Export SequencingQueue and SerialSequencingQueue (#45221) +* [GH-45216](https://github.com/apache/arrow/issues/45216) - [C++][Compute] Refactor Rank implementation (#45217) +* [GH-45219](https://github.com/apache/arrow/issues/45219) - [C++][Examples] Update examples to disable mimalloc (#45220) +* [GH-45225](https://github.com/apache/arrow/issues/45225) - [C++] Upgrade ORC to 2.1.0 (#45226) +* [GH-45227](https://github.com/apache/arrow/issues/45227) - [C++][Parquet] Enable Size Stats and Page Index by default (#45249) +* [GH-45237](https://github.com/apache/arrow/issues/45237) - [Python] Raise minimum supported cython to >=3 (#45238) +* [GH-45263](https://github.com/apache/arrow/issues/45263) - [MATLAB] Add ability to construct `RecordBatchStreamReader` from `uint8` array (#45274) +* [GH-45269](https://github.com/apache/arrow/issues/45269) - [C++][Compute] Add "pivot_wider" and "hash_pivot_wider" functions (#45562) +* [GH-45278](https://github.com/apache/arrow/issues/45278) - [Python][Packaging] Updated delvewheel install command and updated flags used with delvewheel repair (#45323) +* [GH-45279](https://github.com/apache/arrow/issues/45279) - [C++][Compute] Move all Grouper tests to grouper_test.cc (#45280) +* [GH-45282](https://github.com/apache/arrow/issues/45282) - [Python][Parquet] Remove unused readonly properties of ParquetWriter (#45281) +* [GH-45288](https://github.com/apache/arrow/issues/45288) - [Python][Packaging][Docs] Update documentation for PyArrow nightly wheels (#45289) +* [GH-45307](https://github.com/apache/arrow/issues/45307) - [CI] Use GitHub hosted arm runner (#45308) +* [GH-45344](https://github.com/apache/arrow/issues/45344) - [C++][Testing] Generic `StepGenerator` (#45345) +* [GH-45356](https://github.com/apache/arrow/issues/45356) - [CI][R] Update MACOSX_DEPLOYMENT_TARGET to 11.6 (#45363) +* [GH-45358](https://github.com/apache/arrow/issues/45358) - [C++][Python] Add MemoryPool method to print statistics (#45359) +* [GH-45361](https://github.com/apache/arrow/issues/45361) - [CI][C++] Curate `ci/vcpkg/vcpkg.json` (#45081) +* [GH-45366](https://github.com/apache/arrow/issues/45366) - [C++][Parquet] Set is_compressed to false when data page v2 is not compressed (#45367) +* [GH-45388](https://github.com/apache/arrow/issues/45388) - [CI][MATLAB] Can we use Ubuntu 22.04 or 24.04 for Ubuntu CI (#45395) +* [GH-45389](https://github.com/apache/arrow/issues/45389) - [CI][R] Use Ubuntu 22.04 for test-r-versions (#45475) +* [GH-45398](https://github.com/apache/arrow/issues/45398) - [CI][Dev][Ruby] Add Ruby lint (#45417) +* [GH-45402](https://github.com/apache/arrow/issues/45402) - [CI][Dev][Ruby] Reformat codes before apply lint (#45403) +* [GH-45416](https://github.com/apache/arrow/issues/45416) - [CI][C++][Homebrew] Backport the latest formula changes (#45460) +* [GH-45433](https://github.com/apache/arrow/issues/45433) - [Python] Remove Cython workarounds (#45437) +* [GH-45447](https://github.com/apache/arrow/issues/45447) - [CI][GLib] Use `meson format` for Meson configuration files (#45448) +* [GH-45451](https://github.com/apache/arrow/issues/45451) - [C#] Integration with Grpc.Net.ClientFactory (#45458) +* [GH-45457](https://github.com/apache/arrow/issues/45457) - [Python] Add `pyarrow.ArrayStatistics` (#45550) +* [GH-45476](https://github.com/apache/arrow/issues/45476) - [Packaging][Linux] Drop support for Ubuntu 20.04 (#45477) +* [GH-45478](https://github.com/apache/arrow/issues/45478) - [CI][C++] Drop support for Ubuntu 20.04 (#45519) +* [GH-45479](https://github.com/apache/arrow/issues/45479) - [CI][Release] Use Ubuntu 24.04 instead of 20.04 (#45480) +* [GH-45482](https://github.com/apache/arrow/issues/45482) - [CI][Python] Don't use Ubuntu 20.04 for wheel test (#45483) +* [GH-45485](https://github.com/apache/arrow/issues/45485) - [Dev] Simplify pull request template (#45599) +* [GH-45486](https://github.com/apache/arrow/issues/45486) - [GLib] Add `GArrowArrayStatistics` (#45490) +* [GH-45491](https://github.com/apache/arrow/issues/45491) - [GLib] Require Meson 0.61.2 or later (#45492) +* [GH-45505](https://github.com/apache/arrow/issues/45505) - [CI][R] Use Ubuntu 22.04 instead of 20.04 as much as possible for nightly jobs (#45507) +* [GH-45506](https://github.com/apache/arrow/issues/45506) - [C++][Acero] More overflow-safe Swiss table (#45515) +* [GH-45508](https://github.com/apache/arrow/issues/45508) - [CI][R] Remove Ubuntu version from sanitizer jobs (#45509) +* [GH-45517](https://github.com/apache/arrow/issues/45517) - [GLib] garrow_data_type_new_raw() returns GARROW_TYPE_STRING_VIEW_DATA_TYPE (#45518) +* [GH-45528](https://github.com/apache/arrow/issues/45528) - [GLib] garrow_data_type_new_raw() returns GARROW_TYPE_BINARY_VIEW_DATA_TYPE (#45529) +* [GH-45548](https://github.com/apache/arrow/issues/45548) - [Release][Dev][Packaging] Omit APT/Yum repositories check on local in the RC verification script (#45738) +* [GH-45551](https://github.com/apache/arrow/issues/45551) - [C++][Acero] Release temp states of Swiss join building hash table to reduce memory consumption (#45552) +* [GH-45563](https://github.com/apache/arrow/issues/45563) - [C++][Compute] Split up hash_aggregate.cc (#45725) +* [GH-45566](https://github.com/apache/arrow/issues/45566) - [C++][Parquet][CMake] Remove a workaround for Windows in FindThriftAlt.cmake (#45567) +* [GH-45570](https://github.com/apache/arrow/issues/45570) - [Python] Allow Decimal32/64Array.to_pandas (#45571) +* [GH-45572](https://github.com/apache/arrow/issues/45572) - [C++][Compute] Add rank_normal function (#45573) +* [GH-45584](https://github.com/apache/arrow/issues/45584) - [C++][Thirdparty] Bump zstd to v1.5.7 (#45585) +* [GH-45589](https://github.com/apache/arrow/issues/45589) - [C++] Enable singular test in Meson configuration (#45596) +* [GH-45591](https://github.com/apache/arrow/issues/45591) - [C++][Acero] Refine hash join benchmark and remove openmp from the project (#45593) +* [GH-45605](https://github.com/apache/arrow/issues/45605) - [R][C++] Fix identifier ... preceded by whitespace warnings (#45606) +* [GH-45611](https://github.com/apache/arrow/issues/45611) - [C++][Acero] Improve Swiss join build performance by partitioning batches ahead to reduce contention (#45612) +* [GH-45620](https://github.com/apache/arrow/issues/45620) - [CI][C++] Use Visual Studio 2022 not 2019 (#45621) +* [GH-45626](https://github.com/apache/arrow/issues/45626) - [CI][Docs] Remove Java related configurations from `ci/docker/linux-apt-docs.dockerfile` (#45627) +* [GH-45631](https://github.com/apache/arrow/issues/45631) - [CI] Remove unused `java-jni-manylinux-201x.dockerfile` (#45632) +* [GH-45649](https://github.com/apache/arrow/issues/45649) - [GLib] Add GArrowBinaryViewArray (#45650) +* [GH-45652](https://github.com/apache/arrow/issues/45652) - [C++][Acero] Unify ConcurrentQueue and BackpressureConcurrentQueue API (#45421) +* [GH-45661](https://github.com/apache/arrow/issues/45661) - [GLib][Ruby][Dev] Add Ruby lint rule (add space after comma) (#45662) +* [GH-45665](https://github.com/apache/arrow/issues/45665) - [Docs] Add kapa AI bot to the docs (#45667) +* [GH-45670](https://github.com/apache/arrow/issues/45670) - [Release][Archery] Crossbow bot accepts `--prefix` (#45671) +* [GH-45675](https://github.com/apache/arrow/issues/45675) - [Release] Run binary RC verification jobs in apache/arrow (#45699) +* [GH-45676](https://github.com/apache/arrow/issues/45676) - [C++][Python][Compute] Add skew and kurtosis functions (#45677) +* [GH-45680](https://github.com/apache/arrow/issues/45680) - [C++][Python] Remove deprecated functions in 20.0 +* [GH-45689](https://github.com/apache/arrow/issues/45689) - [C++][Thirdparty] Bump Apache ORC to 2.1.1 (#45600) +* [GH-45691](https://github.com/apache/arrow/issues/45691) - [R][Packaging] Update R packaging checklist with latest process (#45692) +* [GH-45694](https://github.com/apache/arrow/issues/45694) - [C++] Bump vendored flatbuffers to 24.3.6 (#45687) +* [GH-45696](https://github.com/apache/arrow/issues/45696) - [C++][Gandiva] Accept LLVM 20.1 (#45697) +* [GH-45705](https://github.com/apache/arrow/issues/45705) - [Python] Add support for SAS token in AzureFileSystem (#45706) +* [GH-45708](https://github.com/apache/arrow/issues/45708) - [Release] Re-run binary verification jobs after we upload binaries (#45736) +* [GH-45710](https://github.com/apache/arrow/issues/45710) - [GLib] Add GArrowStringViewArray (#45711) +* [GH-45732](https://github.com/apache/arrow/issues/45732) - [C++][Compute] Accept more pivot key types (#45945) +* [GH-45744](https://github.com/apache/arrow/issues/45744) - [C++] Remove deprecated GetNextSegment (#45745) +* [GH-45746](https://github.com/apache/arrow/issues/45746) - [C++] Remove deprecated functions in 20.0 (C++ subset) (#45748) +* [GH-45752](https://github.com/apache/arrow/issues/45752) - [C#] Update FlightInfo.cs with missing fields (#45753) +* [GH-45755](https://github.com/apache/arrow/issues/45755) - [C++][Python][Compute] Add winsorize function (#45763) +* [GH-45769](https://github.com/apache/arrow/issues/45769) - [C#][flight] add FlightInfo ByteString serialization (#45770) +* [GH-45771](https://github.com/apache/arrow/issues/45771) - [C++] Add tests to top level Meson configuration (#45773) +* [GH-45772](https://github.com/apache/arrow/issues/45772) - [C++] Export Arrow as dependency from Meson configuration (#45774) +* [GH-45775](https://github.com/apache/arrow/issues/45775) - [C++] Use dict.get() in Meson configuration (#45776) +* [GH-45779](https://github.com/apache/arrow/issues/45779) - [C++] Add testing directory to Meson configuration (#45780) +* [GH-45784](https://github.com/apache/arrow/issues/45784) - [C++] Unpin LLVM and OpenSSL in Brewfile (#45785) +* [GH-45792](https://github.com/apache/arrow/issues/45792) - [C++] Add benchmarks to Meson configuration (#45793) +* [GH-45813](https://github.com/apache/arrow/issues/45813) - [Docs] Enable discussions (#45811) +* [GH-45816](https://github.com/apache/arrow/issues/45816) - [C++] Make `VisitType()` fallback branch unreachable (#45815) +* [GH-45820](https://github.com/apache/arrow/issues/45820) - [C++] Add optional out_offset for Buffer-returning CopyBitmap function (#45852) +* [GH-45821](https://github.com/apache/arrow/issues/45821) - [C++][Compute] Grouper improvements (#45822) +* [GH-45825](https://github.com/apache/arrow/issues/45825) - [C++] Add c directory to Meson configuration (#45826) +* [GH-45827](https://github.com/apache/arrow/issues/45827) - [C++] Add io directory to Meson configuration (#45828) +* [GH-45831](https://github.com/apache/arrow/issues/45831) - [C++] Add CSV directory to Meson configuration (#45832) +* [GH-45848](https://github.com/apache/arrow/issues/45848) - [C++][Python][R] Remove deprecated PARQUET_2_0 (#45849) +* [GH-45877](https://github.com/apache/arrow/issues/45877) - [C++][Acero] Cleanup 64-bit temp states of Swiss join by using 32-bit (#45878) +* [GH-45883](https://github.com/apache/arrow/issues/45883) - [Docs] Update GitHub Issue Template for GitHub Discussions (#45884) +* [GH-45890](https://github.com/apache/arrow/issues/45890) - [Ruby] Unify test for dense union array in raw_records and each_raw_record (#45904) +* [GH-45891](https://github.com/apache/arrow/issues/45891) - [Ruby] Unify test for dictionary array in raw_records and each_raw_record (#45927) +* [GH-45892](https://github.com/apache/arrow/issues/45892) - [Ruby] Unify test for list array in raw_records and each_raw_record (#45940) +* [GH-45893](https://github.com/apache/arrow/issues/45893) - [Ruby] Unify test for map array in raw_records and each_raw_record (#45955) +* [GH-45894](https://github.com/apache/arrow/issues/45894) - [Ruby] Unify test for multiple columns in raw_records and each_raw_record (#45965) +* [GH-45895](https://github.com/apache/arrow/issues/45895) - [Ruby] Unify test for sparse union array in raw_records and each_raw_record (#45970) +* [GH-45896](https://github.com/apache/arrow/issues/45896) - [Ruby] Unify test for struct array in raw_records and each_raw_record (#45974) +* [GH-45897](https://github.com/apache/arrow/issues/45897) - [Ruby] Unify test for table in raw_records and each_raw_record (#45977) +* [GH-45906](https://github.com/apache/arrow/issues/45906) - [Docs] Document GitHub Discussions in Developer's Guide (#45907) +* [GH-45917](https://github.com/apache/arrow/issues/45917) - [C++][Acero] Add flush taskgroup to enable parallelization (#45918) +* [GH-45920](https://github.com/apache/arrow/issues/45920) - [Release][Python] Upload sdist and wheels to GitHub Releases not apache.jfrog.io (#45962) +* [GH-45922](https://github.com/apache/arrow/issues/45922) - [C++][Flight] Remove deprecated Authenticate and StartCall (#45932) +* [GH-45949](https://github.com/apache/arrow/issues/45949) - [R] Fix CRAN warnings for 19.0.1 about compiled code (#45951) +* [GH-45953](https://github.com/apache/arrow/issues/45953) - [C++] Use lock to fix atomic bug in ReadaheadGenerator (#45954) +* [GH-45961](https://github.com/apache/arrow/issues/45961) - [Release][Docs] Upload generated docs to GitHub Releases not apache.jfrog.io (#45963) +* [GH-45975](https://github.com/apache/arrow/issues/45975) - [Ruby] Add support for rubygems-requirements-system (#45976) +* [GH-45986](https://github.com/apache/arrow/issues/45986) - [C++] Update bundled GoogleTest (#45996) +* [GH-45987](https://github.com/apache/arrow/issues/45987) - [C++] Set CMAKE_POLICY_VERSION_MINIMUM=3.5 for bundled dependencies (#45997) +* [GH-46051](https://github.com/apache/arrow/issues/46051) - [R] Backport NEWS.md changes from 19.0.1.1 (#46056) + + + # Apache Arrow 6.0.1 (2021-11-18) ## Bug Fixes diff --git a/c_glib/meson.build b/c_glib/meson.build index 11b6ba2f476f..90b11e4adcc9 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -34,7 +34,7 @@ project( # * 22.04: 0.61.2 # * 24.04: 1.3.2 meson_version: '>=0.61.2', - version: '20.0.0-SNAPSHOT', + version: '20.0.0', ) version = meson.project_version() @@ -48,6 +48,7 @@ endif version_major = version_numbers[0].to_int() version_minor = version_numbers[1].to_int() version_micro = version_numbers[2].to_int() +version_no_snapshot = '.'.join(version_numbers) api_version = '1.0' so_version = version_major * 100 + version_minor @@ -81,7 +82,7 @@ endif generate_vapi = have_gi and get_option('vapi') if generate_vapi pkgconfig_variables += ['vapidir=@0@'.format(vapi_dir)] - add_languages('vala') + add_languages('vala', native: false) endif arrow_cpp_build_dir = get_option('arrow_cpp_build_dir') @@ -97,47 +98,96 @@ else endif if arrow_cpp_build_lib_dir == '' - arrow = dependency('arrow', version: ['>=' + version]) + common_args = {'version': [f'>=@version_no_snapshot@']} + arrow = dependency( + 'arrow', + 'Arrow', + kwargs: common_args, + modules: ['Arrow::arrow_shared'], + ) # They are just for checking required modules are enabled. They are built into # libarrow.so. So we don't need additional build flags for them. - dependency('arrow-compute', version: ['>=' + version]) - dependency('arrow-csv', version: ['>=' + version]) - dependency('arrow-filesystem', version: ['>=' + version]) - dependency('arrow-json', version: ['>=' + version]) + if arrow.type_name() == 'cmake' + assert( + arrow.get_variable('ARROW_COMPUTE', default_value: 'OFF') == 'ON', + 'compute module must be enabled', + ) + assert( + arrow.get_variable('ARROW_CSV', default_value: 'OFF') == 'ON', + 'CSV module must be enabled', + ) + assert( + arrow.get_variable('ARROW_FILESYSTEM', default_value: 'OFF') == 'ON', + 'filesystem module must be enabled', + ) + assert( + arrow.get_variable('ARROW_JSON', default_value: 'OFF') == 'ON', + 'JSON module must be enabled', + ) + else + dependency('arrow-compute', kwargs: common_args) + dependency('arrow-csv', kwargs: common_args) + dependency('arrow-filesystem', kwargs: common_args) + dependency('arrow-json', kwargs: common_args) + endif - have_arrow_orc = dependency( - 'arrow-orc', - required: false, - version: ['>=' + version], - ).found() + if arrow.type_name() == 'cmake' + have_arrow_orc = (arrow.get_variable('ARROW_ORC', default_value: 'OFF') == 'ON') + else + have_arrow_orc = dependency( + 'arrow-orc', + kwargs: common_args, + required: false, + ).found() + endif arrow_cuda = dependency( 'arrow-cuda', + 'ArrowCUDA', + kwargs: common_args, + modules: ['ArrowCUDA::arrow_cuda_shared'], required: false, - version: ['>=' + version], ) - # we do not support compiling glib without acero engine + # we do not support compiling GLib without Acero engine arrow_acero = dependency( 'arrow-acero', - required: true, - version: ['>=' + version], + 'ArrowAcero', + kwargs: common_args, + modules: ['ArrowAcero::arrow_acero_shared'], ) arrow_dataset = dependency( 'arrow-dataset', + 'ArrowDataset', + kwargs: common_args, + modules: ['ArrowDataset::arrow_dataset_shared'], required: false, - version: ['>=' + version], ) arrow_flight = dependency( 'arrow-flight', + 'ArrowFlight', + kwargs: common_args, + modules: ['ArrowFlight::arrow_flight_shared'], required: false, - version: ['>=' + version], ) arrow_flight_sql = dependency( 'arrow-flight-sql', + 'ArrowFlightSql', + kwargs: common_args, + modules: ['ArrowFlightSql::arrow_flight_sql_shared'], + required: false, + ) + gandiva = dependency( + 'gandiva', + 'Gandiva', + kwargs: common_args, + modules: ['Gandiva::gandiva_shared'], required: false, - version: ['>=' + version], ) - gandiva = dependency('gandiva', required: false, version: ['>=' + version]) - parquet = dependency('parquet', required: false, version: ['>=' + version]) + parquet = dependency( + 'parquet', + 'Parquet', + kwargs: common_args, + modules: ['Parquet::parquet_shared'], + ) else base_include_directories += [ include_directories(join_paths(arrow_cpp_build_dir, 'src')), diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index df4508d29736..582dd8c5a22d 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,23 +1,11 @@ { "name": "arrow-glib", - "version-string": "20.0.0-SNAPSHOT", + "version-string": "20.0.0", + "$comment:dependencies": "We can enable gobject-introspection again once it's updated", "dependencies": [ "glib", - "gobject-introspection", "pkgconf" ], - "$comment": "2025.02.14", - "builtin-baseline": "d5ec528843d29e3a52d745a64b469f810b2cedbf", - "overrides": [ - { - "$comment": - "We need gobject-introspection 1.80.0 or later for GLib 2.80.0 but vcpkg doesn't provide it yet.", - "name": "glib", - "version": "2.78.4" - }, - { - "name": "vcpkg-tool-meson", - "version": "1.3.2" - } - ] + "$comment": "We can update builtin-baseline by 'vcpkg x-update-baseline'", + "builtin-baseline": "09f6a4ef2f08252f7f4d924fd9c2d42165fb21c9" } diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 31435d498912..b9f7c716e520 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -18,7 +18,7 @@ ARG base FROM ${base} -ARG r=4.4 +ARG r=4.5 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 48c7154ef0eb..83a7b8b9baad 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -35,7 +35,7 @@ ENV LANG=C.UTF-8 # Build R # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran -ARG r=4.4 +ARG r=4.5 RUN apt-get update -y && \ apt-get install -y \ dirmngr \ diff --git a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile index 4b972999b047..335dcd78f604 100644 --- a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile +++ b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile @@ -38,12 +38,15 @@ SHELL ["cmd", "/S", "/C"] RUN %PYTHON_CMD% -m pip install -U pip setuptools COPY python/requirements-wheel-test.txt C:/arrow/python/ +# Temporarily remove pandas from the requirements, see https://github.com/apache/arrow/issues/46041 +RUN findstr /V "pandas" C:\arrow\python\requirements-wheel-test.txt > C:\arrow\python\filtered-requirements-wheel-test.txt # Cython and Pandas wheels for 3.13 free-threaded are not released yet +# hadolint ignore=DL3059 RUN %PYTHON_CMD% -m pip install \ --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ --pre \ --prefer-binary \ - -r C:/arrow/python/requirements-wheel-test.txt + -r C:/arrow/python/filtered-requirements-wheel-test.txt # cffi-based tests would crash when importing cffi. # hadolint ignore=DL3059 RUN %PYTHON_CMD% -m pip uninstall -y cffi diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index 9eac3ef5cb9f..1271a378e100 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=19.0.1.9000 +pkgver=20.0.0 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh index 54bd7e5645a1..359d76c6d753 100755 --- a/ci/scripts/c_glib_build.sh +++ b/ci/scripts/c_glib_build.sh @@ -37,6 +37,7 @@ fi PATH="${ARROW_HOME}/bin:${PATH}" +meson_cmake_prefix_path="${ARROW_HOME}" meson_pkg_config_path="${ARROW_HOME}/lib/pkgconfig" mkdir -p "${build_dir}" @@ -46,6 +47,7 @@ if [ -n "${VCPKG_DEFAULT_TRIPLET:-}" ]; then vcpkg install \ --x-manifest-root="${source_dir}" \ --x-install-root="${vcpkg_install_root}" + meson_cmake_prefix_path="${vcpkg_install_root}/${VCPKG_DEFAULT_TRIPLET}:${meson_cmake_prefix_path}" PKG_CONFIG="${vcpkg_install_root}/${VCPKG_DEFAULT_TRIPLET}/tools/pkgconf/pkgconf.exe" export PKG_CONFIG meson_pkg_config_path="${vcpkg_install_root}/${VCPKG_DEFAULT_TRIPLET}/lib/pkgconfig:${meson_pkg_config_path}" @@ -65,9 +67,10 @@ fi # Build with Meson meson setup \ --backend=ninja \ - --prefix="${ARROW_HOME}" \ + --cmake-prefix-path="${meson_cmake_prefix_path}" \ --libdir=lib \ --pkg-config-path="${meson_pkg_config_path}" \ + --prefix="${ARROW_HOME}" \ -Ddoc="${with_doc}" \ -Dvapi="${ARROW_GLIB_VAPI}" \ -Dwerror="${ARROW_GLIB_WERROR}" \ diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index a686215b93da..e08b25e3d2d7 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -25,7 +25,13 @@ set PYARROW_TEST_GANDIVA=OFF set PYARROW_TEST_GCS=ON set PYARROW_TEST_HDFS=ON set PYARROW_TEST_ORC=ON -set PYARROW_TEST_PANDAS=ON +@REM Temporarily skip pandas for free-threaded tests. +@REM See https://github.com/apache/arrow/issues/46041 +if "%PYTHON_CMD%" neq "py -3.13t" ( + set PYARROW_TEST_PANDAS=ON +) else ( + echo "Skip PYARROW_TEST_PANDAS for free-threaded" +) set PYARROW_TEST_PARQUET=ON set PYARROW_TEST_PARQUET_ENCRYPTION=ON set PYARROW_TEST_SUBSTRAIT=ON diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index fb3e9a583638..b66724fdbdc9 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -36,8 +36,7 @@ ncores=$(${R_BIN} -s -e 'cat(parallel::detectCores())') echo "MAKEFLAGS=-j${ncores}" >> ${rhome}/etc/Renviron.site # build first so that any stray compiled files in r/src are ignored -${R_BIN} CMD build . -${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz +${R_BIN} CMD build --no-build-vignettes --no-manual . # But unset the env var so that it doesn't cause us to run extra dev tests unset ARROW_R_DEV @@ -45,25 +44,30 @@ unset ARROW_R_DEV # Set the testthat output to be verbose for easier debugging export ARROW_R_VERBOSE_TEST=TRUE -export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" +# We prune dependencies for these, so we need to disable forcing suggests +export _R_CHECK_FORCE_SUGGESTS_=FALSE + +export SUPPRESSION_FILE=$(readlink -f "tools/ubsan.supp") +export UBSAN_OPTIONS="print_stacktrace=1,suppressions=${SUPPRESSION_FILE}" # From the old rhub image https://github.com/r-hub/rhub-linux-builders/blob/master/fedora-clang-devel-san/Dockerfile export ASAN_OPTIONS="alloc_dealloc_mismatch=0:detect_leaks=0:detect_odr_violation=0" -# run tests -pushd tests -${R_BIN} --no-save < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +${R_BIN} CMD check --no-manual --no-vignettes --no-build-vignettes arrow*.tar.gz -cat testthat.out -if grep -q "runtime error" testthat.out; then +# Find sanitizer issues, print the file(s) they are part of, and fail the job +find . -type f -name "*Rout" -exec grep -l "runtime error\|SUMMARY: UndefinedBehaviorSanitizer" {} \; > sanitizer_errors.txt +if [ -s sanitizer_errors.txt ]; then + echo "Sanitizer errors found in the following files:" + cat sanitizer_errors.txt + + # Print the content of files with errors for debugging + while read -r file; do + echo "=============== $file ===============" + cat "$file" + echo "=========================================" + done < sanitizer_errors.txt + exit 1 fi -# run examples -popd -${R_BIN} --no-save -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } - -cat examples.out -if grep -q "runtime error" examples.out; then - exit 1 -fi popd diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 67142b66dd7f..34bef3cc157f 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -101,7 +101,7 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') build_args <- '--no-build-vignettes' } - if (requireNamespace('reticulate', quietly = TRUE) && reticulate::py_module_available('pyarrow')) { + if (!as_cran && requireNamespace('reticulate', quietly = TRUE) && reticulate::py_module_available('pyarrow')) { message('Running flight demo server for tests.') pid_flight <- sys::exec_background( 'python', @@ -130,7 +130,8 @@ echo "$SCRIPT" | ${R_BIN} --no-save AFTER=$(ls -alh ~/) if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then - ls -alh ~/.cmake/packages + # Ignore ~/.TinyTex/ and ~/R/ because it has many files. + find ~ -path ~/.TinyTeX -prune -or -path ~/R/ -prune -or -print exit 1 fi popd diff --git a/cpp/Brewfile b/cpp/Brewfile index 01d7b3624604..4c42607568c4 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -24,7 +24,6 @@ brew "c-ares" brew "curl" brew "ccache" brew "cmake" -brew "flatbuffers" brew "git" brew "glog" brew "googletest" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3c628a20c359..4bec228a575f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -86,7 +86,7 @@ if(POLICY CMP0170) cmake_policy(SET CMP0170 NEW) endif() -set(ARROW_VERSION "20.0.0-SNAPSHOT") +set(ARROW_VERSION "20.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") @@ -532,22 +532,79 @@ enable_testing() # For arrow.pc. Cflags.private, Libs.private and Requires.private are # used when "pkg-config --cflags --libs --static arrow" is used. set(ARROW_PC_CFLAGS "") -set(ARROW_PC_CFLAGS_PRIVATE " -DARROW_STATIC") +set(ARROW_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + # We add -DARROW_STATIC only when static build is enabled because + # pkgconf 1.7.4 or later on Windows uses "--static" by default. If + # Cflags.private (-DARROW_STATIC) is used for shared linking, it + # will cause linke error. We recommend users to not use pkgconf for + # shared linking on Windows but we also provide a workaround here. + # If users don't enable ARROW_BUILD_STATIC, users can use pkgconf on + # Windows because Cflags.private is used but it has nothing. + string(APPEND ARROW_PC_CFLAGS_PRIVATE " -DARROW_STATIC") +endif() set(ARROW_PC_LIBS_PRIVATE "") set(ARROW_PC_REQUIRES_PRIVATE "") +# For arrow-acero.pc. +set(ARROW_ACERO_PC_CFLAGS "") +set(ARROW_ACERO_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_ACERO_PC_CFLAGS_PRIVATE " -DARROW_ACERO_STATIC") +endif() + +# For arrow-dataset.pc. +set(ARROW_DATASET_PC_CFLAGS "") +set(ARROW_DATASET_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_DATASET_PC_CFLAGS_PRIVATE " -DARROW_DS_STATIC") +endif() + # For arrow-flight.pc. +set(ARROW_FLIGHT_PC_CFLAGS "") +set(ARROW_FLIGHT_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_FLIGHT_PC_CFLAGS_PRIVATE " -DARROW_FLIGHT_STATIC") +endif() set(ARROW_FLIGHT_PC_REQUIRES_PRIVATE "") +# For arrow-flight-sql.pc. +set(ARROW_FLIGHT_SQL_PC_CFLAGS "") +set(ARROW_FLIGHT_SQL_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_FLIGHT_SQL_PC_CFLAGS_PRIVATE " -DARROW_FLIGHT_SQL_STATIC") +endif() +set(ARROW_FLIGHT_PC_REQUIRES_PRIVATE "") + +# For arrow-substrait.pc. +set(ARROW_SUBSTRAIT_PC_CFLAGS "") +set(ARROW_SUBSTRAIT_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_SUBSTRAIT_PC_CFLAGS_PRIVATE " -DARROW_ENGINE_STATIC") +endif() + # For arrow-testing.pc. set(ARROW_TESTING_PC_CFLAGS "") -set(ARROW_TESTING_PC_CFLAGS_PRIVATE " -DARROW_TESTING_STATIC") +set(ARROW_TESTING_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_TESTING_PC_CFLAGS_PRIVATE " -DARROW_TESTING_STATIC") +endif() set(ARROW_TESTING_PC_LIBS "") set(ARROW_TESTING_PC_REQUIRES "") +# For gandiva.pc. +set(GANDIVA_PC_CFLAGS "") +set(GANDIVA_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND GANDIVA_PC_CFLAGS_PRIVATE " -DGANDIVA_STATIC") +endif() + # For parquet.pc. set(PARQUET_PC_CFLAGS "") -set(PARQUET_PC_CFLAGS_PRIVATE " -DPARQUET_STATIC") +set(PARQUET_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND PARQUET_PC_CFLAGS_PRIVATE " -DPARQUET_STATIC") +endif() set(PARQUET_PC_REQUIRES "") set(PARQUET_PC_REQUIRES_PRIVATE "") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5d57bf312807..63bdd4ab76ee 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -694,8 +694,7 @@ if(DEFINED ENV{ARROW_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}") else() set_urls(GTEST_SOURCE_URL - "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" - "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "${THIRDPARTY_MIRROR_URL}/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz") endif() @@ -1667,11 +1666,10 @@ endif() if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS OR ARROW_BUILD_INTEGRATION - OR ARROW_USE_GLOG - OR ARROW_WITH_GRPC) - set(ARROW_NEED_GFLAGS 1) + OR ARROW_USE_GLOG) + set(ARROW_NEED_GFLAGS TRUE) else() - set(ARROW_NEED_GFLAGS 0) + set(ARROW_NEED_GFLAGS FALSE) endif() macro(build_gflags) @@ -2338,6 +2336,9 @@ function(build_gtest) URL ${GTEST_SOURCE_URL} URL_HASH "SHA256=${ARROW_GTEST_BUILD_SHA256_CHECKSUM}") prepare_fetchcontent() + # We can remove this once we remove set(CMAKE_POLICY_VERSION_MINIMUM + # 3.5) from prepare_fetchcontent(). + unset(CMAKE_POLICY_VERSION_MINIMUM) if(APPLE) string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " -Wno-ignored-attributes") endif() @@ -3962,9 +3963,6 @@ macro(build_grpc) IMPORTED_LOCATION) get_target_property(GRPC_CARES_INCLUDE_DIR c-ares::cares INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(GRPC_CARES_ROOT "${GRPC_CARES_INCLUDE_DIR}" DIRECTORY) - get_target_property(GRPC_GFLAGS_INCLUDE_DIR ${GFLAGS_LIBRARIES} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(GRPC_GFLAGS_ROOT "${GRPC_GFLAGS_INCLUDE_DIR}" DIRECTORY) get_target_property(GRPC_RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(GRPC_RE2_ROOT "${GRPC_RE2_INCLUDE_DIR}" DIRECTORY) @@ -3972,7 +3970,6 @@ macro(build_grpc) # before (what are likely) system directories set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${ABSL_PREFIX}") set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${GRPC_PB_ROOT}") - set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${GRPC_GFLAGS_ROOT}") set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${GRPC_CARES_ROOT}") set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${GRPC_RE2_ROOT}") @@ -4022,7 +4019,6 @@ macro(build_grpc) -DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF -DgRPC_BUILD_TESTS=OFF -DgRPC_CARES_PROVIDER=package - -DgRPC_GFLAGS_PROVIDER=package -DgRPC_MSVC_STATIC_RUNTIME=${ARROW_USE_STATIC_CRT} -DgRPC_PROTOBUF_PROVIDER=package -DgRPC_RE2_PROVIDER=package diff --git a/cpp/meson.build b/cpp/meson.build index 688239fdd3e4..98f91f523ae2 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -19,7 +19,7 @@ project( 'arrow', 'cpp', 'c', - version: '20.0.0-SNAPSHOT', + version: '20.0.0', license: 'Apache-2.0', meson_version: '>=1.3.0', default_options: [ diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index e6aa0560dfa8..5708d71737c6 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -19,6 +19,15 @@ add_custom_target(arrow_acero) arrow_install_all_headers("arrow/acero") +# If libarrow_acero.a is only built, "pkg-config --cflags --libs +# arrow-acero" outputs build flags for static linking not shared +# linking. ARROW_ACERO_PC_* except ARROW_ACERO_PC_*_PRIVATE are for +# the static linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_ACERO_PC_CFLAGS "${ARROW_ACERO_PC_CFLAGS_PRIVATE}") + set(ARROW_ACERO_PC_CFLAGS_PRIVATE "") +endif() + set(ARROW_ACERO_SRCS accumulation_queue.cc scalar_aggregate_node.cc diff --git a/cpp/src/arrow/acero/arrow-acero.pc.in b/cpp/src/arrow/acero/arrow-acero.pc.in index fde3262b9a21..ddddd52c4dd5 100644 --- a/cpp/src/arrow/acero/arrow-acero.pc.in +++ b/cpp/src/arrow/acero/arrow-acero.pc.in @@ -24,4 +24,5 @@ Description: Apache Arrow's Acero Engine. Version: @ARROW_VERSION@ Requires: arrow Libs: -L${libdir} -larrow_acero -Cflags.private: -DARROW_ACERO_STATIC +Cflags:@ARROW_ACERO_PC_CFLAGS@ +Cflags.private:@ARROW_ACERO_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/compute/kernels/pivot_internal.cc b/cpp/src/arrow/compute/kernels/pivot_internal.cc index 72d96213c92e..ee104919d573 100644 --- a/cpp/src/arrow/compute/kernels/pivot_internal.cc +++ b/cpp/src/arrow/compute/kernels/pivot_internal.cc @@ -150,7 +150,9 @@ Result> PivotWiderKeyMapper::Make( const DataType& key_type, const PivotWiderOptions* options, ExecContext* ctx) { auto instance = std::make_unique(); RETURN_NOT_OK(instance->Init(key_type, options, ctx)); - return instance; + // We can remove this static_cast() once we drop support for g++ + // 7.5.0 (we require C++20). + return static_cast>(std::move(instance)); } } // namespace arrow::compute::internal diff --git a/cpp/src/arrow/compute/light_array_internal.cc b/cpp/src/arrow/compute/light_array_internal.cc index e4b1f1b8cdd6..322f4bd7fdb1 100644 --- a/cpp/src/arrow/compute/light_array_internal.cc +++ b/cpp/src/arrow/compute/light_array_internal.cc @@ -611,11 +611,9 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr& source }); Visit(source, num_rows_to_append - num_rows_to_process, row_ids + num_rows_to_process, [&](int i, const uint8_t* ptr, int32_t num_bytes) { - uint64_t* dst = reinterpret_cast( - target->mutable_data(2) + - offsets[num_rows_before + num_rows_to_process + i]); - const uint64_t* src = reinterpret_cast(ptr); - memcpy(dst, src, num_bytes); + auto dst = target->mutable_data(2) + + offsets[num_rows_before + num_rows_to_process + i]; + memcpy(dst, ptr, num_bytes); }); } diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc index b7a01ea75ad7..bfae3ed301cc 100644 --- a/cpp/src/arrow/compute/row/compare_internal.cc +++ b/cpp/src/arrow/compute/row/compare_internal.cc @@ -275,7 +275,11 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper( int32_t tail_length = length - j * 8; uint64_t tail_mask = ~0ULL >> (64 - 8 * tail_length); uint64_t key_left = 0; - std::memcpy(&key_left, key_left_ptr + j, tail_length); + // NOTE: UBSAN may falsely report "misaligned load" in `std::memcpy` on some + // platforms when using 64-bit pointers. Cast to an 8-bit pointer to work around + // this. + const uint8_t* src_bytes = reinterpret_cast(key_left_ptr + j); + std::memcpy(&key_left, src_bytes, tail_length); uint64_t key_right = key_right_ptr[j]; result_or |= tail_mask & (key_left ^ key_right); } diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt index e99593f669b8..29d8c3cb59f7 100644 --- a/cpp/src/arrow/dataset/CMakeLists.txt +++ b/cpp/src/arrow/dataset/CMakeLists.txt @@ -19,6 +19,15 @@ add_custom_target(arrow_dataset) arrow_install_all_headers("arrow/dataset") +# If libarrow_dataset.a is only built, "pkg-config --cflags --libs +# arrow-dataset" outputs build flags for static linking not shared +# linking. ARROW_DATASET_PC_* except ARROW_DATASET_PC_*_PRIVATE are for +# the static linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_DATASET_PC_CFLAGS "${ARROW_DATASET_PC_CFLAGS_PRIVATE}") + set(ARROW_DATASET_PC_CFLAGS_PRIVATE "") +endif() + set(ARROW_DATASET_SRCS dataset.cc dataset_writer.cc diff --git a/cpp/src/arrow/dataset/arrow-dataset.pc.in b/cpp/src/arrow/dataset/arrow-dataset.pc.in index d587fd110c08..4e35c554d7f8 100644 --- a/cpp/src/arrow/dataset/arrow-dataset.pc.in +++ b/cpp/src/arrow/dataset/arrow-dataset.pc.in @@ -24,4 +24,5 @@ Description: Apache Arrow Dataset provides an API to read and write semantic dat Version: @ARROW_VERSION@ Requires: @ARROW_DATASET_PKG_CONFIG_REQUIRES@ Libs: -L${libdir} -larrow_dataset -Cflags.private: -DARROW_DS_STATIC +Cflags:@ARROW_DATASET_PC_CFLAGS@ +Cflags.private:@ARROW_DATASET_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index fcaa242b1148..6978a8383f0e 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -19,6 +19,15 @@ add_custom_target(arrow_substrait) arrow_install_all_headers("arrow/engine") +# If libarrow_substrait.a is only built, "pkg-config --cflags --libs +# arrow-substrait" outputs build flags for static linking not shared +# linking. ARROW_SUBSTRAIT_PC_* except ARROW_SUBSTRAIT_PC_*_PRIVATE are for +# the static linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_SUBSTRAIT_PC_CFLAGS "${ARROW_SUBSTRAIT_PC_CFLAGS_PRIVATE}") + set(ARROW_SUBSTRAIT_PC_CFLAGS_PRIVATE "") +endif() + set(ARROW_SUBSTRAIT_SRCS substrait/expression_internal.cc substrait/extended_expression_internal.cc diff --git a/cpp/src/arrow/engine/arrow-substrait.pc.in b/cpp/src/arrow/engine/arrow-substrait.pc.in index 1e891995e2d2..ceb9d0fadaac 100644 --- a/cpp/src/arrow/engine/arrow-substrait.pc.in +++ b/cpp/src/arrow/engine/arrow-substrait.pc.in @@ -24,4 +24,5 @@ Description: Apache Arrow's Substrait Consumer. Version: @ARROW_VERSION@ Requires: arrow-dataset Libs: -L${libdir} -larrow_substrait -Cflags.private: -DARROW_ENGINE_STATIC +Cflags:@ARROW_SUBSTRAIT_PC_CFLAGS@ +Cflags.private:@ARROW_SUBSTRAIT_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 16dcba894483..02e453fbff9f 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -142,7 +142,7 @@ std::string EnumToString(int value, const google::protobuf::EnumDescriptor* desc if (value_desc == nullptr) { return "unknown"; } - return value_desc->name(); + return std::string(value_desc->name()); } Result FromProto(const substrait::Expression::ReferenceSegment* ref, diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index 6b4c05a3b1dd..db2dcb592829 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -62,8 +62,8 @@ Status ParseFromBufferImpl(const Buffer& buf, const std::string& full_name, template Result ParseFromBuffer(const Buffer& buf) { Message message; - ARROW_RETURN_NOT_OK( - ParseFromBufferImpl(buf, Message::descriptor()->full_name(), &message)); + ARROW_RETURN_NOT_OK(ParseFromBufferImpl( + buf, std::string(Message::descriptor()->full_name()), &message)); return message; } diff --git a/cpp/src/arrow/engine/substrait/util_internal.cc b/cpp/src/arrow/engine/substrait/util_internal.cc index 89034784ab5b..35e3baf94a6c 100644 --- a/cpp/src/arrow/engine/substrait/util_internal.cc +++ b/cpp/src/arrow/engine/substrait/util_internal.cc @@ -30,7 +30,7 @@ std::string EnumToString(int value, const google::protobuf::EnumDescriptor& desc if (value_desc == nullptr) { return "unknown"; } - return value_desc->name(); + return std::string(value_desc->name()); } std::unique_ptr CreateVersion() { diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index b92d900ffd91..564d300ffa6d 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -19,6 +19,15 @@ add_custom_target(arrow_flight) arrow_install_all_headers("arrow/flight") +# If libarrow_flight.a is only built, "pkg-config --cflags --libs +# arrow-flight" outputs build flags for static linking not shared +# linking. ARROW_FLIGHT_PC_* except ARROW_FLIGHT_PC_*_PRIVATE are for +# the static linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_FLIGHT_PC_CFLAGS "${ARROW_FLIGHT_PC_CFLAGS_PRIVATE}") + set(ARROW_FLIGHT_PC_CFLAGS_PRIVATE "") +endif() + set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ARROW_WITH_OPENTELEMETRY) list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) diff --git a/cpp/src/arrow/flight/arrow-flight.pc.in b/cpp/src/arrow/flight/arrow-flight.pc.in index a63d6cadaa80..f548c642a712 100644 --- a/cpp/src/arrow/flight/arrow-flight.pc.in +++ b/cpp/src/arrow/flight/arrow-flight.pc.in @@ -25,4 +25,5 @@ Version: @ARROW_VERSION@ Requires: arrow Requires.private:@ARROW_FLIGHT_PC_REQUIRES_PRIVATE@ Libs: -L${libdir} -larrow_flight -Cflags.private: -DARROW_FLIGHT_STATIC +Cflags:@ARROW_FLIGHT_PC_CFLAGS@ +Cflags.private:@ARROW_FLIGHT_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/flight/sql/CMakeLists.txt b/cpp/src/arrow/flight/sql/CMakeLists.txt index b32f73149674..796cb9da18e3 100644 --- a/cpp/src/arrow/flight/sql/CMakeLists.txt +++ b/cpp/src/arrow/flight/sql/CMakeLists.txt @@ -19,6 +19,15 @@ add_custom_target(arrow_flight_sql) arrow_install_all_headers("arrow/flight/sql") +# If libarrow_flight_sql.a is only built, "pkg-config --cflags --libs +# arrow-flight-sql" outputs build flags for static linking not shared +# linking. ARROW_FLIGHT_SQL_PC_* except ARROW_FLIGHT_PC_SQL_*_PRIVATE +# are for the static linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_FLIGHT_SQL_PC_CFLAGS "${ARROW_FLIGHT_SQL_PC_CFLAGS_PRIVATE}") + set(ARROW_FLIGHT_SQL_PC_CFLAGS_PRIVATE "") +endif() + set(FLIGHT_SQL_PROTO_PATH "${ARROW_SOURCE_DIR}/../format") set(FLIGHT_SQL_PROTO ${ARROW_SOURCE_DIR}/../format/FlightSql.proto) diff --git a/cpp/src/arrow/flight/sql/arrow-flight-sql.pc.in b/cpp/src/arrow/flight/sql/arrow-flight-sql.pc.in index b76964b97c89..b42f00703a33 100644 --- a/cpp/src/arrow/flight/sql/arrow-flight-sql.pc.in +++ b/cpp/src/arrow/flight/sql/arrow-flight-sql.pc.in @@ -24,4 +24,5 @@ Description: Apache Arrow Flight SQL extension Version: @ARROW_VERSION@ Requires: arrow-flight Libs: -L${libdir} -larrow_flight_sql -Cflags.private: -DARROW_FLIGHT_SQL_STATIC +Cflags:@ARROW_FLIGHT_SQL_PC_CFLAGS@ +Cflags.private:@ARROW_FLIGHT_SQL_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc index 5f04b31aa15e..45b92af0dded 100644 --- a/cpp/src/arrow/testing/process.cc +++ b/cpp/src/arrow/testing/process.cc @@ -39,11 +39,18 @@ # ifdef __APPLE__ # include # endif -# include # include -# else -# include # endif +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include # include # else // We need BOOST_USE_WINDOWS_H definition with MinGW when we use @@ -57,7 +64,24 @@ # define BOOST_USE_WINDOWS_H = 1 # endif # ifdef BOOST_PROCESS_HAVE_V1 -# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include # else # include # endif diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 3e63f88c3d27..a7f5f9dacfff 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -25,6 +25,15 @@ add_custom_target(gandiva-benchmarks) add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks) +# If libgandiva.a is only built, "pkg-config --cflags --libs +# arrow-flight" outputs build flags for static linking not shared +# linking. GANDIVA_PC_* except GANDIVA_PC_*_PRIVATE are for the static +# linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND GANDIVA_PC_CFLAGS "${GANDIVA_PC_CFLAGS_PRIVATE}") + set(GANDIVA_PC_CFLAGS_PRIVATE "") +endif() + include(GandivaAddBitcode) find_package(LLVMAlt REQUIRED) diff --git a/cpp/src/gandiva/gandiva.pc.in b/cpp/src/gandiva/gandiva.pc.in index 49260092d297..e999a785feaa 100644 --- a/cpp/src/gandiva/gandiva.pc.in +++ b/cpp/src/gandiva/gandiva.pc.in @@ -24,5 +24,5 @@ Description: Gandiva is a toolset for compiling and evaluating expressions on Ar Version: @GANDIVA_VERSION@ Requires: arrow Libs: -L${libdir} -lgandiva -Cflags: -I${includedir} -Cflags.private: -DGANDIVA_STATIC +Cflags: -I${includedir}@GANDIVA_PC_CFLAGS@ +Cflags.private:@GANDIVA_PC_CFLAGS_PRIVATE@ diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 2ed7dd637fff..9af3a05e60bb 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -76,8 +76,8 @@ ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 -ARROW_GTEST_BUILD_VERSION=1.11.0 -ARROW_GTEST_BUILD_SHA256_CHECKSUM=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5 +ARROW_GTEST_BUILD_VERSION=1.16.0 +ARROW_GTEST_BUILD_SHA256_CHECKSUM=78c676fc63881529bf97bf9d45948d905a66833fbfa5318ea2cd7478cb98f399 ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 @@ -149,7 +149,7 @@ DEPENDENCIES=( "ARROW_GLOG_URL glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz" "ARROW_GOOGLE_CLOUD_CPP_URL google-cloud-cpp-${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz https://github.com/googleapis/google-cloud-cpp/archive/${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz" "ARROW_GRPC_URL grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz" - "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "ARROW_JEMALLOC_URL jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2" "ARROW_LZ4_URL lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz" "ARROW_MIMALLOC_URL mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz" diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 6eb24ad18038..0ea06fcd70e8 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "20.0.0-SNAPSHOT", + "version-string": "20.0.0", "dependencies": [ "abseil", { @@ -29,15 +29,19 @@ "gflags", "glog", { - "name":"google-cloud-cpp", - "version>=": "1.32.1", + "name": "google-cloud-cpp", "default-features": false, "features": [ "storage" ] }, "grpc", - "gtest", + { + "name": "gtest", + "features": [ + "cxx17" + ] + }, "lz4", "openssl", "orc", @@ -52,6 +56,6 @@ "zlib", "zstd" ], - "$comment": "2025.02.14", - "builtin-baseline": "d5ec528843d29e3a52d745a64b469f810b2cedbf" + "$comment": "We can update builtin-baseline by 'vcpkg x-update-baseline'", + "builtin-baseline": "09f6a4ef2f08252f7f4d924fd9c2d42165fb21c9" } diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 474c57734607..afde593793c6 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2024 The Apache Software Foundation The Apache Software Foundation - 20.0.0-SNAPSHOT + 20.0.0 diff --git a/dev/release/.env.example b/dev/release/.env.example index e751ce4dbc56..c1e012aacbd6 100644 --- a/dev/release/.env.example +++ b/dev/release/.env.example @@ -31,18 +31,6 @@ # You must set this. #ARTIFACTORY_API_KEY=secret -# The Apache Sofotware Foundation ID to upload artifacts to -# repository.apache.org. -# -# You must set this. -#ASF_USER=kou - -# The Apache Sofotware Foundation password to upload artifacts to -# repository.apache.org. -# -# You must set this. -#ASF_PASSWORD=secret - # The GitHub token to upload artifacts to GitHub Release. # # You must set this. diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index 85b58ab20660..3a9398531d49 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -160,12 +160,12 @@ def test_vote [1]: https://github.com/apache/arrow/issues?q=is%3Aissue+milestone%3A#{@release_version}+is%3Aclosed [2]: https://github.com/apache/arrow/tree/#{@current_commit} [3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0 -[4]: https://apache.jfrog.io/artifactory/arrow/almalinux-rc/ -[5]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ -[6]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ -[7]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ -[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0 -[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[4]: https://packages.apache.org/artifactory/arrow/almalinux-rc/ +[5]: https://packages.apache.org/artifactory/arrow/amazon-linux-rc/ +[6]: https://packages.apache.org/artifactory/arrow/centos-rc/ +[7]: https://packages.apache.org/artifactory/arrow/debian-rc/ +[8]: https://packages.apache.org/artifactory/arrow/ubuntu-rc/ +[9]: https://github.com/apache/arrow/releases/tag/apache-arrow-#{@release_version}-rc0 [10]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md [11]: https://arrow.apache.org/docs/developers/release_verification.html [12]: #{verify_pr_url || "null"} diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh index b50aa9493653..414a2462dd57 100755 --- a/dev/release/02-source.sh +++ b/dev/release/02-source.sh @@ -189,12 +189,12 @@ The vote will be open for at least 72 hours. [1]: https://github.com/apache/arrow/issues?q=is%3Aissue+milestone%3A${version}+is%3Aclosed [2]: https://github.com/apache/arrow/tree/${release_hash} [3]: ${rc_url} -[4]: https://apache.jfrog.io/artifactory/arrow/almalinux-rc/ -[5]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ -[6]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ -[7]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ -[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc} -[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[4]: https://packages.apache.org/artifactory/arrow/almalinux-rc/ +[5]: https://packages.apache.org/artifactory/arrow/amazon-linux-rc/ +[6]: https://packages.apache.org/artifactory/arrow/centos-rc/ +[7]: https://packages.apache.org/artifactory/arrow/debian-rc/ +[8]: https://packages.apache.org/artifactory/arrow/ubuntu-rc/ +[9]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version}-rc${rc} [10]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md [11]: https://arrow.apache.org/docs/developers/release_verification.html [12]: ${verify_pr_url} diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh index ae3ff936a74b..327517e801d8 100755 --- a/dev/release/05-binary-upload.sh +++ b/dev/release/05-binary-upload.sh @@ -121,19 +121,19 @@ rake_tasks=() apt_targets=() yum_targets=() if [ "${UPLOAD_ALMALINUX}" -gt 0 ]; then - rake_tasks+=(yum:rc:artifactory yum:rc) + rake_tasks+=(yum:rc) yum_targets+=(almalinux) fi if [ "${UPLOAD_AMAZON_LINUX}" -gt 0 ]; then - rake_tasks+=(yum:rc:artifactory yum:rc) + rake_tasks+=(yum:rc) yum_targets+=(amazon-linux) fi if [ "${UPLOAD_CENTOS}" -gt 0 ]; then - rake_tasks+=(yum:rc:artifactory yum:rc) + rake_tasks+=(yum:rc) yum_targets+=(centos) fi if [ "${UPLOAD_DEBIAN}" -gt 0 ]; then - rake_tasks+=(apt:rc:artifactory apt:rc) + rake_tasks+=(apt:rc) apt_targets+=(debian) fi if [ "${UPLOAD_DOCS}" -gt 0 ]; then @@ -143,7 +143,7 @@ if [ "${UPLOAD_R}" -gt 0 ]; then rake_tasks+=(r:rc) fi if [ "${UPLOAD_UBUNTU}" -gt 0 ]; then - rake_tasks+=(apt:rc:artifactory apt:rc) + rake_tasks+=(apt:rc) apt_targets+=(ubuntu) fi rake_tasks+=(summary:rc) @@ -162,8 +162,6 @@ docker_run \ )" \ ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \ ARTIFACTS_DIR="${tmp_dir}/artifacts" \ - ASF_PASSWORD="${ASF_PASSWORD}" \ - ASF_USER="${ASF_USER}" \ DEB_PACKAGE_NAME="${DEB_PACKAGE_NAME:-}" \ DRY_RUN="${DRY_RUN:-no}" \ GPG_KEY_ID="${GPG_KEY_ID}" \ diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 77d0352b8df9..30805cd339c5 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -693,7 +693,7 @@ def build_write_url(path) end def build_api_url(path, parameters) - uri_string = "https://apache.jfrog.io/artifactory/api/#{path}" + uri_string = "https://packages.apache.org/artifactory/api/#{path}" unless parameters.empty? uri_string << "?" escaped_parameters = parameters.collect do |key, value| @@ -705,7 +705,7 @@ def build_api_url(path, parameters) end def build_deployed_url(path) - uri_string = "https://apache.jfrog.io/artifactory/arrow" + uri_string = "https://packages.apache.org/artifactory/arrow" uri_string << "/#{@prefix}" unless @prefix.nil? uri_string << "/#{path}" URI(uri_string) @@ -1623,139 +1623,6 @@ def define_apt_rc_tasks merged_dir = "#{apt_rc_repositories_dir}/merged" upload_dir = "#{apt_rc_repositories_dir}/upload" - namespace :artifactory do - desc "Copy .deb packages" - task :copy do - apt_targets.each do |distribution, code_name, component| - progress_label = "Copying: #{distribution} #{code_name}" - progress_reporter = ProgressReporter.new(progress_label) - - distribution_dir = "#{incoming_dir}/#{distribution}" - pool_dir = "#{distribution_dir}/pool/#{code_name}" - rm_rf(pool_dir, verbose: verbose?) - mkdir_p(pool_dir, verbose: verbose?) - source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{code_name}" - Dir.glob("#{source_dir_prefix}-*/apache-arrow-apt-source*") do |path| - base_name = File.basename(path) - package_name = "apache-arrow-apt-source" - destination_path = [ - pool_dir, - component, - package_name[0], - package_name, - base_name, - ].join("/") - copy_artifact(path, - destination_path, - progress_reporter) - if base_name.end_with?(".deb") - latest_apt_source_package_path = [ - distribution_dir, - "#{package_name}-latest-#{code_name}.deb" - ].join("/") - copy_artifact(path, - latest_apt_source_package_path, - progress_reporter) - end - end - progress_reporter.finish - end - end - - desc "Download dists/ for RC APT repositories" - task :download do - apt_distributions.each do |distribution| - not_checksum_pattern = /.+(? "C"}, - "gpg", - "--verify", - path, - out: IO::NULL, - err: IO::NULL, - verbose: false) - rescue - sh("debsign", - "--no-re-sign", - "-k#{gpg_key_id}", - path, - out: default_output, - verbose: verbose?) - end - end - sign_dir(distribution, distribution_dir) - end - end - - desc "Update RC APT repositories" - task :update do - apt_update(base_dir, incoming_dir, merged_dir) - apt_targets.each do |distribution, code_name, component| - dists_dir = "#{merged_dir}/#{distribution}/dists/#{code_name}" - next unless File.exist?(dists_dir) - sign_dir("#{distribution} #{code_name}", - dists_dir) - end - end - - desc "Upload .deb packages and RC APT repositories" - task :upload do - apt_distributions.each do |distribution| - upload_distribution_dir = "#{upload_dir}/#{distribution}" - incoming_distribution_dir = "#{incoming_dir}/#{distribution}" - merged_dists_dir = "#{merged_dir}/#{distribution}/dists" - - rm_rf(upload_distribution_dir, verbose: verbose?) - mkdir_p(upload_distribution_dir, verbose: verbose?) - Dir.glob("#{incoming_distribution_dir}/*") do |path| - next if File.basename(path) == "dists" - cp_r(path, - upload_distribution_dir, - preserve: true, - verbose: verbose?) - end - cp_r(merged_dists_dir, - upload_distribution_dir, - preserve: true, - verbose: verbose?) - write_uploaded_files(upload_distribution_dir) - uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, - distribution: distribution, - rc: rc, - source: upload_distribution_dir, - staging: staging?) - uploader.upload - end - end - end - - desc "Release RC APT repositories to Artifactory" - apt_rc_artifactory_tasks = [ - "apt:rc:artifactory:copy", - "apt:rc:artifactory:download", - "apt:rc:artifactory:sign", - "apt:rc:artifactory:update", - "apt:rc:artifactory:upload", - ] - apt_rc_artifactory_tasks.unshift("apt:staging:prepare") if staging? - task :artifactory => apt_rc_artifactory_tasks - desc "Copy .deb packages" task :copy do apt_targets.each do |distribution, code_name, component| @@ -1767,8 +1634,7 @@ def define_apt_rc_tasks rm_rf(pool_dir, verbose: verbose?) mkdir_p(pool_dir, verbose: verbose?) source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{code_name}" - Dir.glob("#{source_dir_prefix}*/**/*") do |path| - next if File.directory?(path) + Dir.glob("#{source_dir_prefix}-*/*") do |path| base_name = File.basename(path) package_name = ENV["DEB_PACKAGE_NAME"] if package_name.nil? or package_name.empty? @@ -1809,7 +1675,7 @@ def define_apt_rc_tasks not_checksum_pattern = /.+(? apt_rc_tasks end end @@ -1904,13 +1769,11 @@ def define_apt_release_tasks directory apt_release_repositories_dir namespace :apt do - namespace :artifactory do - desc "Release APT repository on Artifactory" - task :release do - apt_distributions.each do |distribution| - release_distribution(distribution, - list: uploaded_files_name) - end + desc "Release APT repository" + task :release do + apt_distributions.each do |distribution| + release_distribution(distribution, + list: uploaded_files_name) end end end @@ -2072,20 +1935,18 @@ def yum_update(base_dir, incoming_dir) def define_yum_staging_tasks namespace :yum do - namespace :artifactory do - namespace :staging do - desc "Prepare staging environment for Yum repositories on Artifactory" - task :prepare do - yum_distributions.each do |distribution| - prepare_staging(distribution) - end + namespace :staging do + desc "Prepare staging environment for Yum repositories" + task :prepare do + yum_distributions.each do |distribution| + prepare_staging(distribution) end + end - desc "Delete staging environment for Yum repositories on Artifactory" - task :delete do - yum_distributions.each do |distribution| - delete_staging(distribution) - end + desc "Delete staging environment for Yum repositories" + task :delete do + yum_distributions.each do |distribution| + delete_staging(distribution) end end end @@ -2099,156 +1960,6 @@ def define_yum_rc_tasks incoming_dir = "#{yum_rc_repositories_dir}/incoming" upload_dir = "#{yum_rc_repositories_dir}/upload" - namespace :artifactory do - desc "Copy RPM packages" - task :copy do - yum_targets.each do |distribution, distribution_version| - progress_label = "Copying: #{distribution} #{distribution_version}" - progress_reporter = ProgressReporter.new(progress_label) - - destination_prefix = [ - incoming_dir, - distribution, - distribution_version, - ].join("/") - rm_rf(destination_prefix, verbose: verbose?) - source_dir_prefix = - "#{artifacts_dir}/#{distribution}-#{distribution_version}" - Dir.glob("#{source_dir_prefix}*/apache-arrow-release-*") do |path| - base_name = File.basename(path) - type = base_name.split(".")[-2] - destination_paths = [] - case type - when "src" - destination_paths << [ - destination_prefix, - "Source", - "SPackages", - base_name, - ].join("/") - when "noarch" - yum_architectures.each do |architecture| - destination_paths << [ - destination_prefix, - architecture, - "Packages", - base_name, - ].join("/") - end - else - destination_paths << [ - destination_prefix, - type, - "Packages", - base_name, - ].join("/") - end - destination_paths.each do |destination_path| - copy_artifact(path, - destination_path, - progress_reporter) - end - case base_name - when /\A(apache-arrow-release)-.*\.noarch\.rpm\z/ - package_name = $1 - latest_release_package_path = [ - destination_prefix, - "#{package_name}-latest.rpm" - ].join("/") - copy_artifact(path, - latest_release_package_path, - progress_reporter) - end - end - - progress_reporter.finish - end - end - - desc "Download repodata for RC Yum repositories" - task :download do - yum_distributions.each do |distribution| - distribution_dir = "#{base_dir}/#{distribution}" - download_distribution(:artifactory, - distribution, - distribution_dir, - :base, - pattern: /\/repodata\//) - end - end - - desc "Sign RPM packages" - task :sign do - rpm_sign(incoming_dir) - yum_targets.each do |distribution, distribution_version| - source_dir = [ - incoming_dir, - distribution, - distribution_version, - ].join("/") - sign_dir("#{distribution}-#{distribution_version}", - source_dir) - end - end - - desc "Update RC Yum repositories" - task :update do - yum_update(base_dir, incoming_dir) - yum_targets.each do |distribution, distribution_version| - target_dir = [ - incoming_dir, - distribution, - distribution_version, - ].join("/") - target_dir = Pathname(target_dir) - next unless target_dir.directory? - target_dir.glob("*") do |arch_dir| - next unless arch_dir.directory? - sign_label = - "#{distribution}-#{distribution_version} #{arch_dir.basename}" - sign_dir(sign_label, - arch_dir.to_s) - end - end - end - - desc "Upload RC Yum repositories on Artifactory" - task :upload => yum_rc_repositories_dir do - yum_distributions.each do |distribution| - incoming_target_dir = "#{incoming_dir}/#{distribution}" - upload_target_dir = "#{upload_dir}/#{distribution}" - - rm_rf(upload_target_dir, verbose: verbose?) - mkdir_p(upload_target_dir, verbose: verbose?) - cp_r(Dir.glob("#{incoming_target_dir}/*"), - upload_target_dir.to_s, - preserve: true, - verbose: verbose?) - write_uploaded_files(upload_target_dir) - - uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, - distribution: distribution, - rc: rc, - source: upload_target_dir, - staging: staging?, - sync: true, - sync_pattern: /\/repodata\//) - uploader.upload - end - end - end - - desc "Release RC Yum packages on Artifactory" - yum_rc_artifactory_tasks = [ - "yum:rc:artifactory:copy", - "yum:rc:artifactory:download", - "yum:rc:artifactory:sign", - "yum:rc:artifactory:update", - "yum:rc:artifactory:upload", - ] - yum_rc_artifactory_tasks.unshift("yum:staging:prepare") if staging? - task :artifactory => yum_rc_artifactory_tasks - desc "Copy RPM packages" task :copy do yum_targets.each do |distribution, distribution_version| @@ -2263,8 +1974,7 @@ def define_yum_rc_tasks rm_rf(destination_prefix, verbose: verbose?) source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{distribution_version}" - Dir.glob("#{source_dir_prefix}*/**/*") do |path| - next if File.directory?(path) + Dir.glob("#{source_dir_prefix}*/*.rpm") do |path| base_name = File.basename(path) type = base_name.split(".")[-2] destination_paths = [] @@ -2319,7 +2029,7 @@ def define_yum_rc_tasks task :download do yum_distributions.each do |distribution| distribution_dir = "#{base_dir}/#{distribution}" - download_distribution(:maven_repository, + download_distribution(:artifactory, distribution, distribution_dir, :base, @@ -2376,13 +2086,16 @@ def define_yum_rc_tasks verbose: verbose?) write_uploaded_files(upload_target_dir) - uploader = MavenRepositoryUploader.new(asf_user: asf_user, - asf_password: asf_password, - distribution: distribution, - rc: rc, - source: upload_target_dir, - sync: true, - sync_pattern: /\/repodata\//) + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + rc: rc, + source: upload_target_dir, + staging: staging?, + # Don't remove old repodata + # because our implementation + # doesn't support it. + sync: false, + sync_pattern: /\/repodata\//) uploader.upload end end @@ -2396,6 +2109,7 @@ def define_yum_rc_tasks "yum:rc:update", "yum:rc:upload", ] + yum_rc_tasks.unshift("yum:staging:prepare") if staging? task :rc => yum_rc_tasks end end @@ -2404,33 +2118,31 @@ def define_yum_release_tasks directory yum_release_repositories_dir namespace :yum do - namespace :artifactory do - desc "Release Yum packages on Artifactory" - task :release => yum_release_repositories_dir do - yum_distributions.each do |distribution| - release_distribution(distribution, - list: uploaded_files_name) - - distribution_dir = "#{yum_release_repositories_dir}/#{distribution}" - download_distribution(distribution, - distribution_dir, - :rc, - pattern: /\/repodata\//) - uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, - distribution: distribution, - source: distribution_dir, - staging: staging?, - # Don't remove old repodata for - # unsupported distribution version - # such as Amazon Linux 2. - # This keeps garbage in repodata/ - # for currently available - # distribution versions but we - # accept it for easy to implement. - sync: false, - sync_pattern: /\/repodata\//) - uploader.upload - end + desc "Release Yum packages" + task :release => yum_release_repositories_dir do + yum_distributions.each do |distribution| + release_distribution(distribution, + list: uploaded_files_name) + + distribution_dir = "#{yum_release_repositories_dir}/#{distribution}" + download_distribution(distribution, + distribution_dir, + :rc, + pattern: /\/repodata\//) + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + source: distribution_dir, + staging: staging?, + # Don't remove old repodata for + # unsupported distribution version + # such as Amazon Linux 2. + # This keeps garbage in repodata/ + # for currently available + # distribution versions but we + # accept it for easy to implement. + sync: false, + sync_pattern: /\/repodata\//) + uploader.upload end end end @@ -2586,17 +2298,11 @@ def define_summary_tasks suffix << "-staging" if staging? puts(<<-SUMMARY) Success! The release candidate binaries are available here: - https://repository.apache.org/content/repositories/staging/org/apache/arrow/almalinux/ - https://repository.apache.org/content/repositories/staging/org/apache/arrow/amazon-linux/ - https://repository.apache.org/content/repositories/staging/org/apache/arrow/centos/ - https://repository.apache.org/content/repositories/staging/org/apache/arrow/debian/ - https://repository.apache.org/content/repositories/staging/org/apache/arrow/ubuntu/ - - https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}-rc/ - https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}-rc/ - https://apache.jfrog.io/artifactory/arrow/centos#{suffix}-rc/ - https://apache.jfrog.io/artifactory/arrow/debian#{suffix}-rc/ - https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}-rc/ + https://packages.apache.org/artifactory/arrow/almalinux#{suffix}-rc/ + https://packages.apache.org/artifactory/arrow/amazon-linux#{suffix}-rc/ + https://packages.apache.org/artifactory/arrow/centos#{suffix}-rc/ + https://packages.apache.org/artifactory/arrow/debian#{suffix}-rc/ + https://packages.apache.org/artifactory/arrow/ubuntu#{suffix}-rc/ SUMMARY end @@ -2605,21 +2311,12 @@ def define_summary_tasks suffix = "" suffix << "-staging" if staging? puts(<<-SUMMARY) -Click the "release" button manually at -https://repository.apache.org/#stagingRepositories . - Success! The release binaries are available here: - https://repo1.maven.org/maven2/org/apache/arrow/almalinux/ - https://repo1.maven.org/maven2/org/apache/arrow/amazon-linux/ - https://repo1.maven.org/maven2/org/apache/arrow/centos/ - https://repo1.maven.org/maven2/org/apache/arrow/debian/ - https://repo1.maven.org/maven2/org/apache/arrow/ubuntu/ - - https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}/ - https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}/ - https://apache.jfrog.io/artifactory/arrow/centos#{suffix}/ - https://apache.jfrog.io/artifactory/arrow/debian#{suffix}/ - https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}/ + https://packages.apache.org/artifactory/arrow/almalinux#{suffix}/ + https://packages.apache.org/artifactory/arrow/amazon-linux#{suffix}/ + https://packages.apache.org/artifactory/arrow/centos#{suffix}/ + https://packages.apache.org/artifactory/arrow/debian#{suffix}/ + https://packages.apache.org/artifactory/arrow/ubuntu#{suffix}/ SUMMARY end end diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py index e9defccc6304..01f6588c6d6b 100755 --- a/dev/release/download_rc_binaries.py +++ b/dev/release/download_rc_binaries.py @@ -28,7 +28,6 @@ import time import urllib.request - DEFAULT_PARALLEL_DOWNLOADS = 8 @@ -36,33 +35,31 @@ class Downloader: def get_file_list(self, prefix, filter=None): def traverse(directory, files, directories): - url = f'{self.URL_ROOT}/{directory}' + url = f"{self.URL_ROOT}/{directory}" response = urllib.request.urlopen(url).read().decode() paths = re.findall(' 0: directory = directories.pop() traverse(directory, files, directories) return files - def download_files(self, files, dest=None, num_parallel=None, - re_match=None): + def download_files(self, files, dest=None, num_parallel=None, re_match=None): """ Download files from Bintray in parallel. If file already exists, will overwrite if the checksum does not match what Bintray says it should be @@ -83,19 +80,21 @@ def download_files(self, files, dest=None, num_parallel=None, num_parallel = DEFAULT_PARALLEL_DOWNLOADS if re_match is not None: - regex = re.compile(re_match) - files = [x for x in files if regex.match(x)] + files = self._filter_files(files, re_match) if num_parallel == 1: for path in files: self._download_file(dest, path) else: parallel_map_terminate_early( - functools.partial(self._download_file, dest), - files, - num_parallel + functools.partial(self._download_file, + dest), files, num_parallel ) + def _filter_files(self, files, re_match): + regex = re.compile(re_match) + return [x for x in files if regex.match(x)] + def _download_file(self, dest, path): base, filename = os.path.split(path) @@ -106,7 +105,7 @@ def _download_file(self, dest, path): print("Downloading {} to {}".format(path, dest_path)) - url = f'{self.URL_ROOT}/{path}' + url = f"{self.URL_ROOT}/{path}" self._download_url(url, dest_path) def _download_url(self, url, dest_path, *, extra_args=None): @@ -128,8 +127,8 @@ def _download_url(self, url, dest_path, *, extra_args=None): delay = attempt * 3 print(f"Waiting {delay} seconds before retrying {url}") time.sleep(delay) - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: try: @@ -142,8 +141,9 @@ def _download_url(self, url, dest_path, *, extra_args=None): break else: return - raise Exception(f"Downloading {url} failed\n" - f"stdout: {stdout}\nstderr: {stderr}") + raise Exception( + f"Downloading {url} failed\n" f"stdout: {stdout}\nstderr: {stderr}" + ) def _curl_version(self): cmd = ["curl", "--version"] @@ -152,9 +152,15 @@ def _curl_version(self): return (int(match.group(1)), int(match.group(2)), int(match.group(3))) +class Artifactory(Downloader): + URL_ROOT = "https://packages.apache.org/artifactory/arrow" + + class Maven(Downloader): - URL_ROOT = "https://repository.apache.org" + \ - "/content/repositories/staging/org/apache/arrow" + URL_ROOT = ( + "https://repository.apache.org" + + "/content/repositories/staging/org/apache/arrow" + ) class GitHub(Downloader): @@ -170,8 +176,10 @@ def __init__(self, repository, tag): self._token = os.environ.get("GH_TOKEN") def get_file_list(self, prefix, filter=None): - url = (f"https://api.github.com/repos/{self._repository}/" - f"releases/tags/{self._tag}") + url = ( + f"https://api.github.com/repos/{self._repository}/" + f"releases/tags/{self._tag}" + ) print("Fetching release from", url) headers = { "Accept": "application/vnd.github+json", @@ -200,6 +208,10 @@ def get_file_list(self, prefix, filter=None): files.append((asset["name"], url)) return files + def _filter_files(self, files, re_match): + regex = re.compile(re_match) + return [x for x in files if regex.match(x[0])] + def _download_file(self, dest, asset): name, url = asset @@ -222,11 +234,7 @@ def _download_file(self, dest, asset): if self._curl_version() >= (7, 71, 0): # Also retry 403s extra_args.append("--retry-all-errors") - self._download_url( - url, - dest_path, - extra_args=extra_args - ) + self._download_url(url, dest_path, extra_args=extra_args) def parallel_map_terminate_early(f, iterable, num_parallel): @@ -244,74 +252,104 @@ def parallel_map_terminate_early(f, iterable, num_parallel): ARROW_REPOSITORY_PACKAGE_TYPES = [ - 'almalinux', - 'amazon-linux', - 'centos', - 'debian', - 'ubuntu', + "almalinux", + "amazon-linux", + "centos", + "debian", + "ubuntu", ] -ARROW_STANDALONE_PACKAGE_TYPES = ['nuget', 'python'] -ARROW_PACKAGE_TYPES = \ - ARROW_REPOSITORY_PACKAGE_TYPES + \ - ARROW_STANDALONE_PACKAGE_TYPES - - -def download_rc_binaries(version, rc_number, re_match=None, dest=None, - num_parallel=None, target_package_type=None, - repository=None, tag=None): - version_string = '{}-rc{}'.format(version, rc_number) - version_pattern = re.compile(r'\d+\.\d+\.\d+') +ARROW_STANDALONE_PACKAGE_TYPES = ["nuget", "python"] +ARROW_PACKAGE_TYPES = ARROW_REPOSITORY_PACKAGE_TYPES + ARROW_STANDALONE_PACKAGE_TYPES + + +def download_rc_binaries( + version, + rc_number, + re_match=None, + dest=None, + num_parallel=None, + target_package_type=None, + repository=None, + tag=None, +): + version_string = "{}-rc{}".format(version, rc_number) + version_pattern = re.compile(r"\d+\.\d+\.\d+") if target_package_type: package_types = [target_package_type] else: package_types = ARROW_PACKAGE_TYPES for package_type in package_types: + def is_target(path): match = version_pattern.search(path) if not match: return True return match[0] == version + filter = is_target - if package_type == 'github' or package_type == 'nuget': + if package_type == "github" or package_type in ARROW_STANDALONE_PACKAGE_TYPES: downloader = GitHub(repository, tag) - prefix = '' + prefix = "" filter = None elif package_type in ARROW_REPOSITORY_PACKAGE_TYPES: - downloader = Maven() - prefix = package_type + downloader = Artifactory() + prefix = f'{package_type}-rc' else: - downloader = Maven() - prefix = f'{package_type}/{version_string}' + downloader = Artifactory() + prefix = f'{package_type}-rc/{version_string}' filter = None files = downloader.get_file_list(prefix, filter=filter) - downloader.download_files(files, re_match=re_match, dest=dest, - num_parallel=num_parallel) + downloader.download_files( + files, re_match=re_match, dest=dest, num_parallel=num_parallel + ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( - description='Download release candidate binaries' + description="Download release candidate binaries") + parser.add_argument("version", type=str, help="The version number") + parser.add_argument( + "rc_number", type=int, help="The release candidate number, e.g. 0, 1, etc" + ) + parser.add_argument( + "-e", + "--regexp", + type=str, + default=None, + help=( + "Regular expression to match on file names " + "to only download certain files" + ), + ) + parser.add_argument( + "--dest", + type=str, + default=os.getcwd(), + help="The output folder for the downloaded files", + ) + parser.add_argument( + "--num_parallel", + type=int, + default=DEFAULT_PARALLEL_DOWNLOADS, + help="The number of concurrent downloads to do", + ) + parser.add_argument( + "--package_type", + type=str, + default=None, + help="The package type to be downloaded", + ) + parser.add_argument( + "--repository", + type=str, + help=("The repository to pull from " "(required if --package_type=github)"), + ) + parser.add_argument( + "--tag", + type=str, + help=("The release tag to download " "(required if --package_type=github)"), ) - parser.add_argument('version', type=str, help='The version number') - parser.add_argument('rc_number', type=int, - help='The release candidate number, e.g. 0, 1, etc') - parser.add_argument('-e', '--regexp', type=str, default=None, - help=('Regular expression to match on file names ' - 'to only download certain files')) - parser.add_argument('--dest', type=str, default=os.getcwd(), - help='The output folder for the downloaded files') - parser.add_argument('--num_parallel', type=int, - default=DEFAULT_PARALLEL_DOWNLOADS, - help='The number of concurrent downloads to do') - parser.add_argument('--package_type', type=str, default=None, - help='The package type to be downloaded') - parser.add_argument('--repository', type=str, - help=('The repository to pull from ' - '(required if --package_type=github)')) - parser.add_argument('--tag', type=str, - help=('The release tag to download ' - '(required if --package_type=github)')) args = parser.parse_args() download_rc_binaries( diff --git a/dev/release/post-03-binary.sh b/dev/release/post-03-binary.sh index 1ebc97aac339..d0487585ddd2 100755 --- a/dev/release/post-03-binary.sh +++ b/dev/release/post-03-binary.sh @@ -57,26 +57,26 @@ rake_tasks=() apt_targets=() yum_targets=() if [ "${DEPLOY_ALMALINUX}" -gt 0 ]; then - rake_tasks+=(yum:artifactory:release) + rake_tasks+=(yum:release) yum_targets+=(almalinux) fi if [ "${DEPLOY_AMAZON_LINUX}" -gt 0 ]; then - rake_tasks+=(yum:artifactory:release) + rake_tasks+=(yum:release) yum_targets+=(amazon-linux) fi if [ "${DEPLOY_CENTOS}" -gt 0 ]; then - rake_tasks+=(yum:artifactory:release) + rake_tasks+=(yum:release) yum_targets+=(centos) fi if [ "${DEPLOY_DEBIAN}" -gt 0 ]; then - rake_tasks+=(apt:artifactory:release) + rake_tasks+=(apt:release) apt_targets+=(debian) fi if [ "${DEPLOY_R}" -gt 0 ]; then rake_tasks+=(r:release) fi if [ "${DEPLOY_UBUNTU}" -gt 0 ]; then - rake_tasks+=(apt:artifactory:release) + rake_tasks+=(apt:release) apt_targets+=(ubuntu) fi rake_tasks+=(summary:release) diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index 255b414c27a1..02296c2079a7 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -66,11 +66,10 @@ ${APT_INSTALL} \ code_name="$(lsb_release --codename --short)" distribution="$(lsb_release --id --short | tr 'A-Z' 'a-z')" -production_repository_base_url="https://repo1.maven.org/maven2/org/apache/arrow/${distribution}" -staging_repository_base_url="https://repository.apache.org/content/repositories/staging/org/apache/arrow/${distribution}" -repository_base_url="${production_repository_base_url}" +artifactory_base_url="https://packages.apache.org/artifactory/arrow/${distribution}" if [ "${TYPE}" = "rc" ]; then - repository_base_url="${staging_repository_base_url}" + suffix=${TYPE%-release} + artifactory_base_url+="-${suffix}" fi workaround_missing_packages=() @@ -106,7 +105,7 @@ else apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb" curl \ --output "${apt_source_base_name}" \ - "${repository_base_url}/${apt_source_base_name}" + "${artifactory_base_url}/${apt_source_base_name}" ${APT_INSTALL} "./${apt_source_base_name}" fi @@ -132,7 +131,7 @@ else rc) sed \ -i"" \ - -e "s,^URIs: ${production_repository_base_url},URIs: ${staging_repository_base_url},g" \ + -e "s,^URIs: \\(.*\\)/,URIs: \\1-${suffix}/,g" \ /etc/apt/sources.list.d/apache-arrow.sources ;; esac diff --git a/dev/release/verify-release-candidate-wheels.bat b/dev/release/verify-release-candidate-wheels.bat index a9a4703fae63..e41d2dbf25b0 100644 --- a/dev/release/verify-release-candidate-wheels.bat +++ b/dev/release/verify-release-candidate-wheels.bat @@ -33,29 +33,18 @@ if not exist %_VERIFICATION_DIR% mkdir %_VERIFICATION_DIR% cd %_VERIFICATION_DIR% -@rem clone Arrow repository to obtain test requirements -set GIT_ENV_PATH=%_VERIFICATION_DIR%\_git -call conda create -p %GIT_ENV_PATH% ^ - --no-shortcuts -f -q -y git ^ - || EXIT /B 1 -call activate %GIT_ENV_PATH% - -git clone https://github.com/apache/arrow.git || EXIT /B 1 -pushd arrow -git submodule update --init -popd - set ARROW_VERSION=%1 set RC_NUMBER=%2 -python arrow\dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^ - --package_type python ^ +python dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^ + --package_type="python" ^ + --repository="apache/arrow" ^ + --dest="%_VERIFICATION_DIR%" ^ + --tag="apache-arrow-%ARROW_VERSION%-rc%RC_NUMBER%" ^ --regex=".*win_amd64.*" || EXIT /B 1 -call deactivate - -set ARROW_TEST_DATA=%cd%\arrow\testing\data -set PARQUET_TEST_DATA=%cd%\arrow\cpp\submodules\parquet-testing\data +set ARROW_TEST_DATA=%cd%\testing\data +set PARQUET_TEST_DATA=%cd%\cpp\submodules\parquet-testing\data CALL :verify_wheel 3.9 @@ -99,13 +88,13 @@ call activate %CONDA_ENV_PATH% set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl -pip install python-rc\%ARROW_VERSION%-rc%RC_NUMBER%\%WHEEL_FILENAME% || EXIT /B 1 +pip install %_VERIFICATION_DIR%\%WHEEL_FILENAME% || EXIT /B 1 python -c "import pyarrow" || EXIT /B 1 python -c "import pyarrow.parquet" || EXIT /B 1 python -c "import pyarrow.flight" || EXIT /B 1 python -c "import pyarrow.dataset" || EXIT /B 1 -pip install -r arrow\python\requirements-test.txt || EXIT /B 1 +pip install -r %_CURRENT_DIR%\python\requirements-test.txt || EXIT /B 1 set PYARROW_TEST_CYTHON=OFF set TZDIR=%CONDA_ENV_PATH%\share\zoneinfo @@ -113,6 +102,6 @@ pytest %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1 :done -call deactivate +call conda deactivate EXIT /B 0 diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index d7ffcdb0af01..21afb90d93c7 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -168,6 +168,7 @@ verify_dir_artifact_signatures() { } test_binary() { + # this downloads all artifacts and verifies their checksums and signatures show_header "Testing binary artifacts" maybe_setup_conda @@ -176,7 +177,8 @@ test_binary() { ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} \ - --repository=${GITHUB_REPOSITORY:-apache/arrow} + --repository=${GITHUB_REPOSITORY:-apache/arrow} \ + --tag="apache-arrow-$VERSION-rc$RC_NUMBER" verify_dir_artifact_signatures ${download_dir} } @@ -1049,11 +1051,13 @@ test_wheels() { $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --package_type python \ --regex=${filter_regex} \ - --dest=${download_dir} + --dest=${download_dir} \ + --repository=${GITHUB_REPOSITORY:-apache/arrow} \ + --tag="apache-arrow-$VERSION-rc$RC_NUMBER" verify_dir_artifact_signatures ${download_dir} - wheels_dir=${download_dir}/python-rc/${VERSION}-rc${RC_NUMBER} + wheels_dir=${download_dir} fi pushd ${wheels_dir} diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index 7bc8f4583c33..8691e538ec7d 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -36,12 +36,7 @@ SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TOP_SOURCE_DIR="${SOURCE_DIR}/../.." local_prefix="${TOP_SOURCE_DIR}/dev/tasks/linux-packages" -production_repository_base_url="https://repo1.maven.org/maven2/org/apache/arrow" -staging_repository_base_url="https://repository.apache.org/content/repositories/staging/org/apache/arrow" -repository_base_url="${production_repository_base_url}" -if [ "${TYPE}" = "rc" ]; then - repository_base_url="${staging_repository_base_url}" -fi +artifactory_base_url="https://packages.apache.org/artifactory/arrow" distribution=$(. /etc/os-release && echo "${ID}") distribution_version=$(. /etc/os-release && echo "${VERSION_ID}" | grep -o "^[0-9]*") @@ -162,29 +157,33 @@ if [ "${TYPE}" = "local" ]; then ${install_command} "${release_path}" else package_version="${VERSION}" + if [ "${TYPE}" = "rc" ]; then + suffix=${TYPE%-release} + distribution_prefix+="-${suffix}" + fi ${install_command} \ - ${repository_base_url}/${distribution_prefix}/${repository_version}/apache-arrow-release-latest.rpm + ${artifactory_base_url}/${distribution_prefix}/${repository_version}/apache-arrow-release-latest.rpm fi if [ "${TYPE}" = "local" ]; then sed \ -i"" \ - -e "s,baseurl=${production_repository_base_url}/,baseurl=file://${local_prefix}/yum/repositories/,g" \ + -e "s,baseurl=https://packages\.apache\.org/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \ /etc/yum.repos.d/Apache-Arrow.repo keys="${local_prefix}/KEYS" if [ -f "${keys}" ]; then cp "${keys}" /etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow fi else - case "${TYPE}" in - rc) - suffix=${TYPE%-release} - sed \ - -i"" \ - -e "s,baseurl=${production_repository_base_url},baseurl=${staging_repository_base_url},g" \ - /etc/yum.repos.d/Apache-Arrow.repo - ;; - esac + if [ "${TYPE}" = "rc" ]; then + suffix=${TYPE%-release} + sed \ + -i"" \ + -e "s,/almalinux/,/almalinux-${suffix}/,g" \ + -e "s,/centos/,/centos-${suffix}/,g" \ + -e "s,/amazon-linux/,/amazon-linux-${suffix}/,g" \ + /etc/yum.repos.d/Apache-Arrow.repo + fi fi echo "::endgroup::" @@ -299,7 +298,7 @@ fi echo "::group::Test coexistence with old library" ${uninstall_command} apache-arrow-release if ${install_command} \ - ${repository_base_url}/${distribution_prefix}/${repository_version}/apache-arrow-release-latest.rpm; then + ${artifactory_base_url}/${distribution_prefix}/${repository_version}/apache-arrow-release-latest.rpm; then ${clean_command} all if [ "${have_arrow_libs}" = "yes" ]; then ${install_command} ${enablerepo_epel} arrow-libs diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index ee221d6f6d8d..a893432cf941 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -66,7 +66,11 @@ jobs: uses: actions/upload-artifact@v4 with: name: test-output - path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout* + path: | + arrow/r/tests/ + arrow/r/arrow.Rcheck/ + !arrow/r/arrow.Rcheck/00_pkg_src/ + !arrow/r/arrow.Rcheck/arrow/ if-no-files-found: ignore {% if arrow.is_default_branch() %} diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index f403d5db87aa..64461c3ee495 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0-SNAPSHOT/apache-arrow-20.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0/apache-arrow-20.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 6e1505859ae6..8fb95dad64d7 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0-SNAPSHOT/apache-arrow-20.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0/apache-arrow-20.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index e780a704a05b..eb915890d22f 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (20.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Jacob Wujciak-Jens Tue, 22 Apr 2025 17:58:15 -0000 + apache-arrow-apt-source (19.0.1-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules index a7d0637bd1e4..1e3be48c3150 100755 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules @@ -24,7 +24,7 @@ override_dh_auto_build: distribution=$$(lsb_release --id --short | tr 'A-Z' 'a-z'); \ code_name=$$(lsb_release --codename --short); \ echo "Types: deb deb-src"; \ - echo "URIs: https://repo1.maven.org/maven2/org/apache/arrow/$${distribution}/"; \ + echo "URIs: https://packages.apache.org/artifactory/arrow/$${distribution}/"; \ echo "Suites: $${code_name}"; \ echo "Components: main"; \ echo "Signed-By: /usr/share/keyrings/apache-arrow-apt-source.asc"; \ diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo index 7eb76706262d..6a27acf9a2ea 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo @@ -17,42 +17,42 @@ [apache-arrow-almalinux] name=Apache Arrow for AlmaLinux $releasever - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/almalinux/$releasever/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/almalinux/$releasever/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow [apache-arrow-amazon-linux-2023] name=Apache Arrow for Amazon Linux 2023 - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/amazon-linux/2023/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/amazon-linux/2023/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow [apache-arrow-centos-stream] name=Apache Arrow for CentOS Stream $releasever - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/centos/$stream/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/centos/$stream/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow [apache-arrow-centos] name=Apache Arrow for CentOS $releasever - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/centos/$releasever/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/centos/$releasever/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow [apache-arrow-rhel] name=Apache Arrow for RHEL $releasever - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/almalinux/$releasever/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/almalinux/$releasever/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow [apache-arrow-rhel7] name=Apache Arrow for RHEL 7 - $basearch -baseurl=https://repo1.maven.org/maven2/org/apache/arrow/centos/7/$basearch/ +baseurl=https://packages.apache.org/artifactory/arrow/centos/7/$basearch/ gpgcheck=1 enabled=0 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index 612f43a19db6..b6403c921f33 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Tue Apr 22 2025 Jacob Wujciak-Jens - 20.0.0-1 +- New upstream release. + * Tue Feb 11 2025 Bryce Mecum - 19.0.1-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 6c41ddcecb3b..466a634976a1 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (20.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Jacob Wujciak-Jens Tue, 22 Apr 2025 17:58:15 -0000 + apache-arrow (19.0.1-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index b2cff1a26b4c..8557071ee6ce 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -894,6 +894,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Tue Apr 22 2025 Jacob Wujciak-Jens - 20.0.0-1 +- New upstream release. + * Tue Feb 11 2025 Bryce Mecum - 19.0.1-1 - New upstream release. diff --git a/dev/tasks/r/github.macos.m1san.yml b/dev/tasks/r/github.macos.m1san.yml new file mode 100644 index 000000000000..4a8b39301c0f --- /dev/null +++ b/dev/tasks/r/github.macos.m1san.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + macos-cran: + name: "m1-san on macOS" + runs-on: macOS-15 + strategy: + fail-fast: false + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - name: Configure dependencies (macos) + run: | + brew install openssl + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + r-version: devel + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache: false # cache does not work on across branches + working-directory: arrow/r + extra-packages: | + any::rcmdcheck + any::sys + - name: Setup the sanitizer + run: | + # From rhub/rhub's m1-san: + # https://github.com/r-hub/actions/blob/ab9b2fb5021f43bfd0ee977932e0a14d25f6e59e/run-check/scripts/mac-asan.sh + # Use XCode 16.3 ------------------------------------------------ + sudo rm -f /Applications/Xcode.app + sudo ln -sfF /Applications/Xcode_16.3.app /Applications/Xcode.app + sudo xcode-select -s /Applications/Xcode.app + + # Compile with sanitizers --------------------------------------- + mkdir -p ~/.R + cat >> ~/.R/Makevars <> /tmp/R + cat >> /tmp/R <> /tmp/R + chmod +x /tmp/R + sudo mv /tmp/R ${R} + - name: Install and check + env: + R_BIN: R + LIBARROW_BINARY: FALSE + run: | + export ARROW_SOURCE_HOME=$(pwd)/arrow + export INSTALL_ARGS="--no-test-load" + arrow/ci/scripts/r_sanitize.sh arrow + - name: Save the test output + uses: actions/upload-artifact@v4 + with: + name: test-output + path: | + arrow/r/tests/ + arrow/r/arrow.Rcheck/ + !arrow/r/arrow.Rcheck/00_pkg_src/ + !arrow/r/arrow.Rcheck/arrow/ + if: always() diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index dd9418a44d76..9a845c60add5 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -533,12 +533,12 @@ tasks: - r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-pkg__bin__windows__contrib__4.5__arrow_{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.4__arrow_{no_rc_r_version}\.zip - - r-pkg__bin__windows__contrib__4.3__arrow_{no_rc_r_version}\.zip + - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.5__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.3__arrow_{no_rc_r_version}\.tgz + - r-pkg__bin__macosx__big-sur-arm64__contrib__4.5__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-arm64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__big-sur-arm64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - r-pkg__src__contrib__arrow_{no_rc_r_version}\.tar\.gz {% for which in ["strong", "most"] %} @@ -1087,13 +1087,25 @@ tasks: image: ubuntu-r-sanitizer timeout: 120 - test-r-clang-sanitizer: + test-r-clang-asan: ci: github template: docker-tests/github.linux.yml params: env: R_PRUNE_DEPS: TRUE - image: r-clang-sanitizer + image: r-clang-asan + + test-r-clang-ubsan: + ci: github + template: docker-tests/github.linux.yml + params: + env: + R_PRUNE_DEPS: TRUE + image: r-clang-ubsan + + test-r-m1-san: + ci: github + template: r/github.macos.m1san.yml # be sure to update binary-task.rb when upgrading Debian test-debian-12-docs: diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat index 3b337bb17500..e118ce6f57e9 100644 --- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat +++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat @@ -27,7 +27,6 @@ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Too @rem changes in vcpkg vcpkg install ^ - --triplet x64-windows ^ --x-manifest-root cpp ^ --feature-flags=versions ^ --clean-after-build ^ diff --git a/dev/tasks/vcpkg-tests/github.windows.yml b/dev/tasks/vcpkg-tests/github.windows.yml index 0fd4892c0a67..81213d9fc8bf 100644 --- a/dev/tasks/vcpkg-tests/github.windows.yml +++ b/dev/tasks/vcpkg-tests/github.windows.yml @@ -25,6 +25,7 @@ jobs: runs-on: windows-2019 env: VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' + VCPKG_DEFAULT_TRIPLET: 'x64-windows' steps: {{ macros.github_checkout_arrow()|indent }} # CMake 3.29.1 that is pre-installed on the Windows image has a problem. diff --git a/docker-compose.yml b/docker-compose.yml index 596eca7f8dc4..633302491089 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -162,7 +162,8 @@ x-hierarchy: - ubuntu-r-valgrind - ubuntu-swift - ubuntu-verify-rc - - r-clang-sanitizer + - r-clang-asan + - r-clang-ubsan - r - r-revdepcheck # helper services @@ -1717,7 +1718,7 @@ services: /bin/bash -c " /arrow/ci/scripts/r_sanitize.sh /arrow" - r-clang-sanitizer: + r-clang-asan: image: ${REPO}:r-rhub-clang-devel-latest build: context: . @@ -1737,7 +1738,33 @@ services: LIBARROW_DOWNLOAD: "false" ARROW_SOURCE_HOME: "/arrow" ARROW_R_DEV: ${ARROW_R_DEV} - # To test for CRAN release, delete ^^ these two env vars so we download the Apache release + ARROW_USE_PKG_CONFIG: "false" + volumes: + - .:/arrow:delegated + command: > + /bin/bash -c " + /arrow/ci/scripts/r_sanitize.sh /arrow" + + r-clang-ubsan: + image: ${REPO}:r-rhub-clang-ubsan-devel-latest + build: + context: . + dockerfile: ci/docker/linux-r.dockerfile + cache_from: + - ${REPO}:r-rhub-clang-ubsan-devel-latest + args: + base: rhub/clang-ubsan + cmake: ${CMAKE} + r_dev: ${ARROW_R_DEV} + r_bin: R + tz: ${TZ} + r_prune_deps: ${R_PRUNE_DEPS} + shm_size: *shm-size + environment: + <<: *common + LIBARROW_DOWNLOAD: "false" + ARROW_SOURCE_HOME: "/arrow" + ARROW_R_DEV: ${ARROW_R_DEV} ARROW_USE_PKG_CONFIG: "false" volumes: - .:/arrow:delegated diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index 370915c47985..795c05c7ac2a 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "20.0 (dev)", + "name": "21.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "19.0 (stable)", + "name": "20.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "19.0", + "version": "19.0/", + "url": "https://arrow.apache.org/docs/19.0/" + }, { "name": "18.1", "version": "18.1/", diff --git a/js/package.json b/js/package.json index dd366c431c69..4786c509fcaf 100644 --- a/js/package.json +++ b/js/package.json @@ -120,5 +120,5 @@ "engines": { "node": ">=12.0" }, - "version": "20.0.0-SNAPSHOT" + "version": "20.0.0" } diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 07de5a612b39..207bde70c5c0 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -100,7 +100,7 @@ endfunction() set(CMAKE_CXX_STANDARD 17) -set(MLARROW_VERSION "20.0.0-SNAPSHOT") +set(MLARROW_VERSION "20.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 27af19676b73..8dd34f3a61e3 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -24,7 +24,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_NAME libmexclass) # libmexclass is accessible for CI without permission issues. set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_REPOSITORY "https://github.com/mathworks/libmexclass.git") # Use a specific Git commit hash to avoid libmexclass version changing unexpectedly. -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "cac7c3630a086bd5ba41413af44c833cef189c09") +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "2a75a5e9bbb524a044572598e371c994cc715d3d") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp") # ------------------------------------------ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d07edaa6ff64..d75f12ee1e45 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -28,7 +28,7 @@ project(pyarrow) # which in turn meant that Py_GIL_DISABLED was not set. set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON) -set(PYARROW_VERSION "20.0.0-SNAPSHOT") +set(PYARROW_VERSION "20.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") # Generate SO version and full SO version diff --git a/python/pyproject.toml b/python/pyproject.toml index 567fc96f78eb..e7c95e07e258 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -81,4 +81,4 @@ root = '..' version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' -fallback_version = '20.0.0a0' +fallback_version = '20.0.0' diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 2203e507e42f..3bd71577394c 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 19.0.1.9000 +Version: 20.0.0 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 5caec65df25f..420ad8b3320d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,11 +17,25 @@ under the License. --> -# arrow 19.0.1.9000 +# arrow 20.0.0 + +# arrow 19.0.1.1 + +## Minor improvements and fixes + +- Updated internal code to comply with new CRAN requirements on non-API calls ([#45949](https://github.com/apache/arrow/issues/45949)) +- Enable building the bundled third-party libraries under CMake 4.0 ([#45987](https://github.com/apache/arrow/issues/45987)) # arrow 19.0.1 -# arrow 19.0.0 +This release primarily updates the underlying Arrow C++ version used by the +package to version 19.0.1 and includes all changes from the 19.0.0 and 19.0.1 +releases. For what's changed in Arrow C++ 19.0.0, please see the [blog +post](https://arrow.apache.org/blog/2025/01/16/19.0.0-release/) and +[changelog](https://arrow.apache.org/release/19.0.0.html#changelog). +For what's changed in Arrow C++ 19.0.1, please see the [blog +post](https://arrow.apache.org/blog/2025/02/16/19.0.1-release/) and +[changelog](https://arrow.apache.org/release/19.0.1.html#changelog). # arrow 18.1.0 @@ -72,7 +86,7 @@ ## Minor improvements and fixes * Dataset and table output printing now truncates schemas longer than 20 items long (#38916) -* Fixed pointer conversion to Python for latest reticulate to ensure data can be passed between Arrow and PyArrow (#39969) +* Fixed pointer conversion to Python for latest reticulate to ensure data can be passed between Arrow and PyArrow (#39969) * Check on macOS if we are using GNU libtool is and ensure we use macOS libtool instead (#40259) * Fix an error where creating a bundled tarball with all dependencies was failing on Windows (@hutch3232, #40232) @@ -86,7 +100,7 @@ data into R (@m-muecke, #38601). * Calling `dimnames` or `colnames` on `Dataset` objects now returns a useful result rather than just `NULL` (#38377). -* The `code()` method on Schema objects now takes an optional `namespace` +* The `code()` method on Schema objects now takes an optional `namespace` argument which, when `TRUE`, prefixes names with `arrow::` which makes the output more portable (@orgadish, #38144). @@ -101,7 +115,7 @@ ND-JSON support added in arrow 13.0.0 (@Divyansh200102, #38258). * To make debugging problems easier when using arrow with AWS S3 (e.g., `s3_bucket`, `S3FileSystem`), the debug log level for S3 can be set - with the `AWS_S3_LOG_LEVEL` environment variable. + with the `AWS_S3_LOG_LEVEL` environment variable. See `?S3FileSystem` for more information. (#38267) * Using arrow with duckdb (i.e., `to_duckdb()`) no longer results in warnings when quitting your R session. (#38495) @@ -143,8 +157,8 @@ * If pkg-config fails to detect the required libraries an additional search without pkg-config is run (#38970). -* Fetch the latest nightly Arrow C++ binary when installing a development - Version (#38236). +* Fetch the latest nightly Arrow C++ binary when installing a development + Version (#38236). # arrow 14.0.0.1 @@ -168,7 +182,7 @@ * `schema()` can now be called on `data.frame` objects to retrieve their inferred Arrow schema (#37843). * CSVs with a comma or other character as decimal mark can now be read in - by the dataset reading functions and new function `read_csv2_arrow()` + by the dataset reading functions and new function `read_csv2_arrow()` (#38002). ## Minor improvements and fixes @@ -246,7 +260,7 @@ # arrow 12.0.1 -* Update the version of the date library vendored with Arrow C++ library +* Update the version of the date library vendored with Arrow C++ library for compatibility with tzdb 0.4.0 (#35594, #35612). * Update some tests for compatibility with waldo 0.5.1 (#35131, #35308). diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html index db8a97badb97..1e0fa4ffd4e5 100644 --- a/r/pkgdown/assets/versions.html +++ b/r/pkgdown/assets/versions.html @@ -1,7 +1,8 @@ -

19.0.1.9000 (dev)

-

19.0.1 (release)

+

20.0.0.9000 (dev)

+

20.0.0 (release)

+

19.0.1

18.1.0

17.0.0

16.1.0

diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index ea0d663b2a6f..c924e2f3c10c 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "19.0.1.9000 (dev)", + "name": "20.0.0.9000 (dev)", "version": "dev/" }, { - "name": "19.0.1 (release)", + "name": "20.0.0 (release)", "version": "" }, + { + "name": "19.0.1", + "version": "19.0/" + }, { "name": "18.1.0", "version": "18.1/" diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 7a09d6cff3ae..f91fe503f3cf 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -31,4 +31,4 @@ PKG_LIBS=@libs@ all: $(SHLIB) purify purify: $(SHLIB) - @rm -rf ../libarrow || true + @rm -rf ../{libarrow,windows} || true diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 90a459e19cb6..a4adf58997c4 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -115,7 +115,7 @@ const std::shared_ptr& GetChunkedArray(SEXP alt) { // materialization is needed. // data2: starts as NULL, and becomes a standard R vector with the same // data if necessary: if materialization is needed, e.g. if we need -// to access its data pointer, with DATAPTR(). +// to access its data pointer, with INTEGER(), REAL(), etc. template struct AltrepVectorBase { // store the Array as an external pointer in data1, mark as immutable @@ -220,7 +220,14 @@ struct AltrepVectorPrimitive : public AltrepVectorBase(DATAPTR(copy))); + if constexpr (std::is_same_v) { + Get_region(alt, 0, size, REAL(copy)); + } else if constexpr (std::is_same_v) { + Get_region(alt, 0, size, INTEGER(copy)); + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } // store as data2, this is now considered materialized SetRepresentation(alt, copy); @@ -269,13 +276,27 @@ struct AltrepVectorPrimitive : public AltrepVectorBase) { + return REAL(Materialize(alt)); + } else if constexpr (std::is_same_v) { + return INTEGER(Materialize(alt)); + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } } // The value at position i static c_type Elt(SEXP alt, R_xlen_t i) { if (IsMaterialized(alt)) { - return reinterpret_cast(DATAPTR(Representation(alt)))[i]; + if constexpr (std::is_same_v) { + return REAL(Representation(alt))[i]; + } else if constexpr (std::is_same_v) { + return INTEGER(Representation(alt))[i]; + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } } auto altrep_data = @@ -531,7 +552,7 @@ struct AltrepFactor : public AltrepVectorBase { SEXP copy = PROTECT(Rf_allocVector(INTSXP, size)); // copy the data from the array, through Get_region - Get_region(alt, 0, size, reinterpret_cast(DATAPTR(copy))); + Get_region(alt, 0, size, INTEGER(copy)); // store as data2, this is now considered materialized SetRepresentation(alt, copy); @@ -552,7 +573,7 @@ struct AltrepFactor : public AltrepVectorBase { return nullptr; } - static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } + static void* Dataptr(SEXP alt, Rboolean writeable) { return INTEGER(Materialize(alt)); } static SEXP Duplicate(SEXP alt, Rboolean /* deep */) { // the representation integer vector @@ -892,7 +913,9 @@ struct AltrepVectorString : public AltrepVectorBase> { return s; } - static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } + static void* Dataptr(SEXP alt, Rboolean writeable) { + return const_cast(DATAPTR_RO(Materialize(alt))); + } static SEXP Materialize(SEXP alt) { if (Base::IsMaterialized(alt)) { @@ -931,7 +954,9 @@ struct AltrepVectorString : public AltrepVectorBase> { } static const void* Dataptr_or_null(SEXP alt) { - if (Base::IsMaterialized(alt)) return DATAPTR(Representation(alt)); + if (Base::IsMaterialized(alt)) { + return DATAPTR_RO(alt); + } // otherwise give up return nullptr; @@ -1267,21 +1292,14 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) { if (TYPEOF(x) == INTSXP) { cpp11::writable::integers out(Rf_xlength(x)); - int* ptr = reinterpret_cast(DATAPTR(x)); + int* ptr = INTEGER(x); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } return out; } else if (TYPEOF(x) == REALSXP) { cpp11::writable::doubles out(Rf_xlength(x)); - double* ptr = reinterpret_cast(DATAPTR(x)); - for (R_xlen_t i = 0; i < n; i++) { - out[i] = ptr[i]; - } - return out; - } else if (TYPEOF(x) == STRSXP) { - cpp11::writable::strings out(Rf_xlength(x)); - SEXP* ptr = reinterpret_cast(DATAPTR(x)); + double* ptr = REAL(x); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 1595a3c8fb5d..d1882e56daf1 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -173,7 +173,7 @@ template class RBuffer : public MutableBuffer { public: explicit RBuffer(RVector vec) - : MutableBuffer(reinterpret_cast(DATAPTR(vec)), + : MutableBuffer(reinterpret_cast(getDataPointer(vec)), vec.size() * sizeof(typename RVector::value_type), arrow::CPUDevice::memory_manager(gc_memory_pool())), vec_(vec) {} @@ -181,6 +181,24 @@ class RBuffer : public MutableBuffer { private: // vec_ holds the memory RVector vec_; + + static void* getDataPointer(RVector& vec) { + if (TYPEOF(vec) == LGLSXP) { + return LOGICAL(vec); + } else if (TYPEOF(vec) == INTSXP) { + return INTEGER(vec); + } else if (TYPEOF(vec) == REALSXP) { + return REAL(vec); + } else if (TYPEOF(vec) == CPLXSXP) { + return COMPLEX(vec); + } else if (TYPEOF(vec) == STRSXP) { + // We don't want to expose the string data here, so we error + cpp11::stop("Operation not supported for string vectors."); + } else { + // raw + return RAW(vec); + } + } }; std::shared_ptr InferArrowTypeFromFactor(SEXP); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 3292c44520fb..981bd911fbaf 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1214,11 +1214,11 @@ bool can_reuse_memory(SEXP x, const std::shared_ptr& type) { // because MakeSimpleArray below will force materialization switch (type->id()) { case Type::INT32: - return TYPEOF(x) == INTSXP && !OBJECT(x); + return TYPEOF(x) == INTSXP && !Rf_isObject(x); case Type::DOUBLE: - return TYPEOF(x) == REALSXP && !OBJECT(x); + return TYPEOF(x) == REALSXP && !Rf_isObject(x); case Type::INT8: - return TYPEOF(x) == RAWSXP && !OBJECT(x); + return TYPEOF(x) == RAWSXP && !Rf_isObject(x); case Type::INT64: return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); default: @@ -1412,17 +1412,17 @@ bool vector_from_r_memory(SEXP x, const std::shared_ptr& type, switch (type->id()) { case Type::INT32: - return TYPEOF(x) == INTSXP && !OBJECT(x) && + return TYPEOF(x) == INTSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); case Type::DOUBLE: - return TYPEOF(x) == REALSXP && !OBJECT(x) && + return TYPEOF(x) == REALSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); case Type::UINT8: - return TYPEOF(x) == RAWSXP && !OBJECT(x) && + return TYPEOF(x) == RAWSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index bd2908084818..ce2ad34e9f48 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -57,6 +57,7 @@ skip_if_no_pyarrow <- function() { return() } + skip_on_cran() skip_on_linux_devel() skip_on_os("windows") diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 50bd40988e55..d1e90b6b59a2 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with nulls", { expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # because there are no nulls, DATAPTR() does not materialize + # because there are nulls, DATAPTR() does materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) @@ -193,7 +193,7 @@ test_that("element access methods for double ALTREP with nulls", { expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # because there are no nulls, DATAPTR() does not materialize + # because there are nulls, DATAPTR() does materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) @@ -244,14 +244,13 @@ test_that("element access methods for character ALTREP", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) + # match() calls DATAPTR() internally which materializes the vector + match(altrep, c("1", "40", "999")) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("element access methods for character ALTREP from large_utf8()", { @@ -265,14 +264,13 @@ test_that("element access methods for character ALTREP from large_utf8()", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) + # match() calls DATAPTR() internally which materializes the vector + match(altrep, c("1", "40", "999")) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("empty vectors are not altrep", { diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index 548622e7400a..5aae383e2b96 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index 64f8570f3d20..e72e6828e0e4 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index ddd70bf43e1f..d6508df02082 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index f3970286b69c..74c23ecc6c12 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index eaeec61e7fed..4c5a462fec0a 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec index d23962d0961c..53e8d8bc08ae 100644 --- a/ruby/red-arrow/red-arrow.gemspec +++ b/ruby/red-arrow/red-arrow.gemspec @@ -62,7 +62,7 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency("gio2", ">= 4.2.3") spec.add_runtime_dependency("pkg-config") - repository_url_prefix = "https://repo1.maven.org/maven2/org/apache/arrow" + repository_url_prefix = "https://packages.apache.org/artifactory/arrow" [ # Try without additional repository ["amazon_linux", "arrow-glib-devel"], diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index 657023bed0f6..f7f46ae589b8 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index ecc9a975b0b5..494a52182917 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "20.0.0-SNAPSHOT" + VERSION = "20.0.0" module Version numbers, TAG = VERSION.split("-")