Skip to content

Commit 60875a1

Browse files
committed
Disable some stuff for debugging
1 parent 078c21b commit 60875a1

13 files changed

Lines changed: 145 additions & 120 deletions

File tree

.github/workflows/cpp.yml

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,23 @@ on:
4040
- 'compose.yaml'
4141
- 'format/Flight.proto'
4242
- 'testing'
43-
pull_request:
44-
paths:
45-
- '.dockerignore'
46-
- '.github/workflows/cpp.yml'
47-
- 'ci/conda_env_*'
48-
- 'ci/docker/**'
49-
- 'ci/scripts/ccache_setup.sh'
50-
- 'ci/scripts/cpp_*'
51-
- 'ci/scripts/install_azurite.sh'
52-
- 'ci/scripts/install_gcs_testbench.sh'
53-
- 'ci/scripts/install_minio.sh'
54-
- 'ci/scripts/msys2_*'
55-
- 'ci/scripts/util_*'
56-
- 'cpp/**'
57-
- 'compose.yaml'
58-
- 'format/Flight.proto'
59-
- 'testing'
43+
# pull_request:
44+
# paths:
45+
# - '.dockerignore'
46+
# - '.github/workflows/cpp.yml'
47+
# - 'ci/conda_env_*'
48+
# - 'ci/docker/**'
49+
# - 'ci/scripts/ccache_setup.sh'
50+
# - 'ci/scripts/cpp_*'
51+
# - 'ci/scripts/install_azurite.sh'
52+
# - 'ci/scripts/install_gcs_testbench.sh'
53+
# - 'ci/scripts/install_minio.sh'
54+
# - 'ci/scripts/msys2_*'
55+
# - 'ci/scripts/util_*'
56+
# - 'cpp/**'
57+
# - 'compose.yaml'
58+
# - 'format/Flight.proto'
59+
# - 'testing'
6060

6161
concurrency:
6262
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}

.github/workflows/dev.yml

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ on:
2525
- '!dependabot/**'
2626
tags:
2727
- '**'
28-
pull_request:
28+
# pull_request:
2929

3030
concurrency:
3131
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
@@ -50,25 +50,25 @@ jobs:
5050
uses: actions/checkout@v6
5151
with:
5252
fetch-depth: 0
53-
- name: Install pre-commit
54-
run: |
55-
sudo apt update
56-
sudo apt install -y -V \
57-
pre-commit \
58-
r-base \
59-
ruby-dev
60-
- name: Cache pre-commit
61-
uses: actions/cache@v5
62-
with:
63-
path: |
64-
~/.cache/pre-commit
65-
~/.local/share/renv/cache
66-
key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
67-
- name: Install Air
68-
uses: posit-dev/setup-air@63e80dedb6d275c94a3841e15e5ff8691e1ab237 # v1.0.0
69-
- name: Run pre-commit
70-
run: |
71-
pre-commit run --all-files --color=always --show-diff-on-failure
53+
# - name: Install pre-commit
54+
# run: |
55+
# sudo apt update
56+
# sudo apt install -y -V \
57+
# pre-commit \
58+
# r-base \
59+
# ruby-dev
60+
# - name: Cache pre-commit
61+
# uses: actions/cache@v5
62+
# with:
63+
# path: |
64+
# ~/.cache/pre-commit
65+
# ~/.local/share/renv/cache
66+
# key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
67+
# - name: Install Air
68+
# uses: posit-dev/setup-air@63e80dedb6d275c94a3841e15e5ff8691e1ab237 # v1.0.0
69+
# - name: Run pre-commit
70+
# run: |
71+
# pre-commit run --all-files --color=always --show-diff-on-failure
7272

7373
release:
7474
name: Source Release and Merge Script on ${{ matrix.runs-on }}

.github/workflows/integration.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@ on:
3333
- 'integration/**'
3434
- 'cpp/**'
3535
- 'format/**'
36-
pull_request:
37-
paths:
38-
- '.dockerignore'
39-
- '.github/workflows/integration.yml'
40-
- 'ci/**'
41-
- 'dev/archery/**'
42-
- 'compose.yaml'
43-
- 'integration/**'
44-
- 'cpp/**'
45-
- 'format/**'
36+
# pull_request:
37+
# paths:
38+
# - '.dockerignore'
39+
# - '.github/workflows/integration.yml'
40+
# - 'ci/**'
41+
# - 'dev/archery/**'
42+
# - 'compose.yaml'
43+
# - 'integration/**'
44+
# - 'cpp/**'
45+
# - 'format/**'
4646

4747
concurrency:
4848
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}

.github/workflows/matlab.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ on:
2929
- 'ci/scripts/matlab*.sh'
3030
- 'matlab/**'
3131
- 'cpp/src/arrow/**'
32-
pull_request:
33-
paths:
34-
- '.github/workflows/matlab.yml'
35-
- 'ci/scripts/matlab*.sh'
36-
- 'matlab/**'
37-
- 'cpp/src/arrow/**'
32+
# pull_request:
33+
# paths:
34+
# - '.github/workflows/matlab.yml'
35+
# - 'ci/scripts/matlab*.sh'
36+
# - 'matlab/**'
37+
# - 'cpp/src/arrow/**'
3838

3939
concurrency:
4040
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}

.github/workflows/python.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ on:
3131
- 'cpp/**'
3232
- 'compose.yaml'
3333
- 'python/**'
34-
pull_request:
35-
paths:
36-
- '.dockerignore'
37-
- '.github/workflows/python.yml'
38-
- 'ci/**'
39-
- 'cpp/**'
40-
- 'compose.yaml'
41-
- 'python/**'
34+
# pull_request:
35+
# paths:
36+
# - '.dockerignore'
37+
# - '.github/workflows/python.yml'
38+
# - 'ci/**'
39+
# - 'cpp/**'
40+
# - 'compose.yaml'
41+
# - 'python/**'
4242

4343
concurrency:
4444
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}

.github/workflows/r.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ on:
5050
- "compose.yaml"
5151
- "r/**"
5252

53-
concurrency:
54-
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
55-
cancel-in-progress: true
53+
# concurrency:
54+
# group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
55+
# cancel-in-progress: true
5656

5757
permissions:
5858
contents: read
@@ -261,6 +261,8 @@ jobs:
261261
env:
262262
ARROW_R_CXXFLAGS: "-Werror"
263263
_R_CHECK_TESTS_NLINES_: 0
264+
OMP_NUM_THREADS: 1
265+
OMP_THREAD_LIMIT: 1
264266
steps:
265267
- run: git config --global core.autocrlf false
266268
- name: Checkout Arrow

.github/workflows/ruby.yml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,21 @@ on:
3838
- 'cpp/**'
3939
- 'compose.yaml'
4040
- 'ruby/**'
41-
pull_request:
42-
paths:
43-
- '.dockerignore'
44-
- '.github/workflows/ruby.yml'
45-
- 'ci/docker/**'
46-
- 'ci/scripts/c_glib_*'
47-
- 'ci/scripts/ccache_setup.sh'
48-
- 'ci/scripts/cpp_*'
49-
- 'ci/scripts/msys2_*'
50-
- 'ci/scripts/ruby_*'
51-
- 'ci/scripts/util_*'
52-
- 'c_glib/**'
53-
- 'cpp/**'
54-
- 'compose.yaml'
55-
- 'ruby/**'
41+
# pull_request:
42+
# paths:
43+
# - '.dockerignore'
44+
# - '.github/workflows/ruby.yml'
45+
# - 'ci/docker/**'
46+
# - 'ci/scripts/c_glib_*'
47+
# - 'ci/scripts/ccache_setup.sh'
48+
# - 'ci/scripts/cpp_*'
49+
# - 'ci/scripts/msys2_*'
50+
# - 'ci/scripts/ruby_*'
51+
# - 'ci/scripts/util_*'
52+
# - 'c_glib/**'
53+
# - 'cpp/**'
54+
# - 'compose.yaml'
55+
# - 'ruby/**'
5656

5757
concurrency:
5858
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}

ci/scripts/PKGBUILD

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,15 @@ build() {
114114
-DARROW_CSV=ON \
115115
-DARROW_DATASET=ON \
116116
-DARROW_FILESYSTEM=ON \
117-
-DARROW_GCS=ON \
117+
-DARROW_GCS=OFF \
118118
-DARROW_HDFS=OFF \
119119
-DARROW_JEMALLOC=OFF \
120120
-DARROW_JSON=ON \
121121
-DARROW_LZ4_USE_SHARED=OFF \
122122
-DARROW_MIMALLOC=ON \
123123
-DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \
124124
-DARROW_PARQUET=ON \
125-
-DARROW_S3=ON \
125+
-DARROW_S3=OFF \
126126
-DARROW_SNAPPY_USE_SHARED=OFF \
127127
-DARROW_USE_GLOG=OFF \
128128
-DARROW_UTF8PROC_USE_SHARED=OFF \
@@ -132,8 +132,8 @@ build() {
132132
-DARROW_WITH_SNAPPY=ON \
133133
-DARROW_WITH_ZLIB=ON \
134134
-DARROW_WITH_ZSTD=ON \
135-
-DARROW_WITH_BROTLI=ON \
136-
-DARROW_WITH_BZ2=ON \
135+
-DARROW_WITH_BROTLI=OFF \
136+
-DARROW_WITH_BZ2=OFF \
137137
-DARROW_ZSTD_USE_SHARED=OFF \
138138
-DARROW_CXXFLAGS="${CPPFLAGS}" \
139139
-DAWSSDK_SOURCE=BUNDLED \
@@ -147,6 +147,7 @@ build() {
147147
popd
148148
}
149149

150+
150151
package() {
151152
make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install
152153

cpp/cmake_modules/SetupCxxFlags.cmake

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
7474
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
7575
# Check for AVX extensions on 64-bit systems only, as 32-bit support seems iffy
7676
list(JOIN ARROW_AVX2_FLAGS " " ARROW_AVX2_FLAGS_COMMAND_LINE)
77-
check_cxx_compiler_flag("${ARROW_AVX2_FLAGS_COMMAND_LINE}" CXX_SUPPORTS_AVX2)
77+
if(MINGW AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
78+
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
79+
message(STATUS "Disable AVX2 support on gcc / MINGW for now")
80+
else()
81+
check_cxx_compiler_flag("${ARROW_AVX2_FLAGS_COMMAND_LINE}" CXX_SUPPORTS_AVX2)
82+
endif()
7883
if(MINGW)
7984
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782
8085
message(STATUS "Disable AVX512 support on MINGW for now")

cpp/src/arrow/util/bpacking_dispatch_internal.h

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,11 @@ using SpreadBufferUint = std::conditional_t<
9898
/// stop if it finds a byte aligned value start.
9999
template <int kPackedBitWidth, bool kIsProlog, typename Uint>
100100
int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
101+
static_assert(kPackedBitWidth > 0);
102+
101103
// For the epilog we adapt the max spread since better alignment give shorter spreads
102-
ARROW_DCHECK(kIsProlog || bit_offset == 0);
103-
ARROW_DCHECK(bit_offset >= 0 && bit_offset < 8);
104+
ARROW_CHECK(kIsProlog || bit_offset == 0);
105+
ARROW_CHECK(bit_offset >= 0 && bit_offset < 8);
104106
constexpr int kMaxSpreadBytes = kIsProlog ? PackedMaxSpreadBytes(kPackedBitWidth)
105107
: PackedMaxSpreadBytes(kPackedBitWidth, 0);
106108
using buffer_uint = SpreadBufferUint<kMaxSpreadBytes>;
@@ -112,16 +114,17 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
112114
constexpr buffer_uint kLowMask =
113115
bit_util::LeastSignificantBitMask<buffer_uint, true>(kPackedBitWidth);
114116

115-
ARROW_DCHECK_GE(bit_offset, 0);
116-
ARROW_DCHECK_LE(bit_offset, 8);
117+
ARROW_CHECK_GE(bit_offset, 0);
118+
ARROW_CHECK_LE(bit_offset, 8);
117119

118120
// Looping over values one by one
119121
const int start_bit_term = batch_size * kPackedBitWidth + bit_offset;
120122
int start_bit = bit_offset;
121123
while ((start_bit < start_bit_term) && (!kIsProlog || (start_bit % 8 != 0))) {
122124
const int start_byte = start_bit / 8;
123125
const int spread_bytes = ((start_bit + kPackedBitWidth - 1) / 8) - start_byte + 1;
124-
ARROW_COMPILER_ASSUME(spread_bytes <= kMaxSpreadBytes);
126+
ARROW_CHECK_LE(spread_bytes, kMaxSpreadBytes);
127+
// ARROW_COMPILER_ASSUME(spread_bytes <= kMaxSpreadBytes);
125128

126129
// Reading the bytes for the current value.
127130
// Must be careful not to read out of input bounds.
@@ -130,8 +133,10 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
130133
// We read the max possible bytes in the first pass and handle the rest after.
131134
// Even though the worst spread does not happen on all iterations we can still read
132135
// all bytes because we will mask them.
136+
// ARROW_LOG(INFO) << " > reading " << std::min(kBufferSize, spread_bytes) << " bytes from " <<reinterpret_cast<const void*>(in + start_byte);
133137
std::memcpy(&buffer, in + start_byte, std::min(kBufferSize, spread_bytes));
134138
} else {
139+
// ARROW_LOG(INFO) << " > reading " << spread_bytes << " bytes from " <<reinterpret_cast<const void*>(in + start_byte);
135140
std::memcpy(&buffer, in + start_byte, spread_bytes);
136141
}
137142

@@ -144,6 +149,7 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
144149
if constexpr (kLarge) {
145150
// The oversized bytes do not happen at all iterations
146151
if (spread_bytes > kBufferSize) {
152+
// ARROW_LOG(INFO) << " > reading " << spread_bytes - kBufferSize << " bytes from " <<reinterpret_cast<const void*>(in + start_byte + kBufferSize);
147153
std::memcpy(&buffer, in + start_byte + kBufferSize, spread_bytes - kBufferSize);
148154
buffer = bit_util::FromLittleEndian(buffer);
149155
buffer <<= 8 * kBufferSize - bit_offset;
@@ -156,7 +162,7 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
156162
start_bit += kPackedBitWidth;
157163
}
158164

159-
ARROW_DCHECK((start_bit - bit_offset) % kPackedBitWidth == 0);
165+
ARROW_CHECK((start_bit - bit_offset) % kPackedBitWidth == 0);
160166
return (start_bit - bit_offset) / kPackedBitWidth;
161167
}
162168

@@ -185,16 +191,23 @@ void unpack_width(const uint8_t* in, UnpackedUInt* out, int batch_size, int bit_
185191
bit_util::BytesForBits(batch_size * kPackedBitWidth + bit_offset));
186192
// If specified, max_read_bytes must be greater that the bytes needed to extract the
187193
// number of desired values.
188-
ARROW_DCHECK(max_read_bytes < 0 || bytes_batch <= max_read_bytes);
194+
ARROW_CHECK(max_read_bytes < 0 || bytes_batch <= max_read_bytes);
189195
const uint8_t* in_end = in + (max_read_bytes >= 0 ? max_read_bytes : bytes_batch);
190196

197+
ARROW_LOG(INFO) << "... unpack: width=" << kPackedBitWidth
198+
<< ", in=" <<reinterpret_cast<const void*>(in)
199+
<< ", batch_size = " << batch_size << ", bit_offset = " << bit_offset
200+
<< ", max_read_bytes=" << max_read_bytes
201+
<< " (reading up to" << reinterpret_cast<const void*>(in_end - 1)
202+
<< ")";
203+
191204
// In case of misalignment, we need to run the prolog until aligned.
192205
int extracted = unpack_exact<kPackedBitWidth, true>(in, out, batch_size, bit_offset);
193206
// We either extracted everything or found a alignment
194207
const int start_bit = extracted * kPackedBitWidth + bit_offset;
195-
ARROW_DCHECK((extracted == batch_size) || ((start_bit) % 8 == 0));
208+
ARROW_CHECK((extracted == batch_size) || ((start_bit) % 8 == 0));
196209
batch_size -= extracted;
197-
ARROW_DCHECK_GE(batch_size, 0);
210+
ARROW_CHECK_GE(batch_size, 0);
198211
in += start_bit / 8;
199212
out += extracted;
200213

@@ -221,14 +234,22 @@ void unpack_width(const uint8_t* in, UnpackedUInt* out, int batch_size, int bit_
221234
// Performance check making sure we ran the kernel loop as much as possible:
222235
// Either we ran out because we could not pack enough values, or because we would
223236
// overread.
224-
ARROW_DCHECK((batch_size < kValuesUnpacked) || (in_end - in) < kBytesRead);
237+
ARROW_CHECK((batch_size < kValuesUnpacked) || (in_end - in) < kBytesRead);
225238
}
226239

227240
// Running the epilog for the remaining values that don't fit in a kernel
228-
ARROW_DCHECK_GE(batch_size, 0);
241+
const auto epilog_bytes = bit_util::BytesForBits(batch_size * kPackedBitWidth);
242+
ARROW_LOG(INFO) << " > calling unpack_exact: "
243+
<< "in=" << reinterpret_cast<const void*>(in)
244+
<< ", batch_size=" << batch_size
245+
<< ", epilog_bytes=" << epilog_bytes
246+
<< " (expecting to read up to " << reinterpret_cast<const void*>(in + (epilog_bytes - 1))
247+
<< ")";
248+
ARROW_CHECK_GE(batch_size, 0);
229249
ARROW_COMPILER_ASSUME(batch_size >= 0);
230250
unpack_exact<kPackedBitWidth, false>(in, out, batch_size, /* bit_offset= */ 0);
231251
}
252+
ARROW_LOG(INFO) << " > /unpack finished";
232253
}
233254
}
234255

@@ -628,6 +649,6 @@ static void unpack_jump(const uint8_t* in, UnpackedUint* out, const UnpackOption
628649
opt.max_read_bytes);
629650
}
630651
}
631-
ARROW_DCHECK(false) << "Unsupported num_bits " << opt.bit_width;
652+
ARROW_CHECK(false) << "Unsupported num_bits " << opt.bit_width;
632653
}
633654
} // namespace arrow::internal::bpacking

0 commit comments

Comments
 (0)