Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ if(PYBIND11_MASTER_PROJECT)
endif()

set(PYBIND11_HEADERS
include/pybind11/detail/argument_vector.h
include/pybind11/detail/class.h
include/pybind11/detail/common.h
include/pybind11/detail/cpp_conduit.h
Expand Down
9 changes: 7 additions & 2 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#pragma once

#include "detail/argument_vector.h"
#include "detail/common.h"
#include "detail/descr.h"
#include "detail/native_enum_data.h"
Expand Down Expand Up @@ -2037,6 +2038,8 @@ using is_pos_only = std::is_same<intrinsic_t<T>, pos_only>;
// forward declaration (definition in attr.h)
struct function_record;

constexpr std::size_t arg_vector_small_size = 6;

/// Internal data associated with a single function call
struct function_call {
function_call(const function_record &f, handle p); // Implementation in attr.h
Expand All @@ -2045,10 +2048,12 @@ struct function_call {
const function_record &func;

/// Arguments passed to the function:
std::vector<handle> args;
/// (Inline size chosen mostly arbitrarily; 6 should pad function_call out to two cache lines
/// (16 pointers) in size.)
argument_vector<arg_vector_small_size> args;

/// The `convert` value the arguments should be loaded with
std::vector<bool> args_convert;
args_convert_vector<arg_vector_small_size> args_convert;

/// Extra references for the optional `py::args` and/or `py::kwargs` arguments (which, if
/// present, are also in `args` but without a reference).
Expand Down
315 changes: 315 additions & 0 deletions include/pybind11/detail/argument_vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
/*
pybind11/detail/argument_vector.h: small_vector-like containers to
avoid heap allocation of arguments during function call dispatch.

Copyright (c) Meta Platforms, Inc. and affiliates.

All rights reserved. Use of this source code is governed by a
BSD-style license that can be found in the LICENSE file.
*/

#pragma once

#include <pybind11/pytypes.h>

#include "common.h"

#include <algorithm>
#include <array>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <utility>
#include <vector>

PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)

PYBIND11_WARNING_DISABLE_MSVC(4127)

PYBIND11_NAMESPACE_BEGIN(detail)

// Shared implementation utility for our small_vector-like containers.
// We support C++11 and C++14, so we cannot use
// std::variant. Union with the tag packed next to the inline
// array's size is smaller anyway, allowing 1 extra handle of
// inline storage for free. Compare the layouts (1 line per
// size_t/void*):
// With variant, total is N + 2 for N >= 2:
// - variant tag (cannot be packed with the array size)
// - array size (or first pointer of 3 in std::vector)
// - N pointers of inline storage (or 2 remaining pointers of std::vector)
// Custom union, total is N + 1 for N >= 3:
// - variant tag & array size if applicable
// - N pointers of inline storage (or 3 pointers of std::vector)
//
// NOTE: this is a low-level representational convenience; the two
// use cases of this union are materially different and in particular
// have different semantics for inline_array::size. All that is being
// shared is the memory management behavior.
template <typename ArrayT, std::size_t InlineSize, typename VectorT = ArrayT>
union inline_array_or_vector {
struct inline_array {
bool is_inline = true;
std::uint32_t size = 0;
std::array<ArrayT, InlineSize> arr;
};
struct heap_vector {
bool is_inline = false;
std::vector<VectorT> vec;

heap_vector() = default;
heap_vector(std::size_t count, VectorT value) : vec(count, value) {}
};

inline_array array;
heap_vector vector;

static_assert(std::is_trivially_move_constructible<ArrayT>::value,
"ArrayT must be trivially move constructible");
static_assert(std::is_trivially_destructible<ArrayT>::value,
"ArrayT must be trivially destructible");

inline_array_or_vector() : array() {}
~inline_array_or_vector() {
if (!is_inline()) {
vector.~heap_vector();
}
}
inline_array_or_vector(const inline_array_or_vector &) = delete;
inline_array_or_vector &operator=(const inline_array_or_vector &) = delete;

inline_array_or_vector(inline_array_or_vector &&rhs) noexcept {
if (rhs.is_inline()) {
std::memcpy(&array, &rhs.array, sizeof(array));
} else {
new (&vector) heap_vector(std::move(rhs.vector));
}
assert(is_inline() == rhs.is_inline());
}

inline_array_or_vector &operator=(inline_array_or_vector &&rhs) noexcept {
if (this == &rhs) {
return *this;
}

if (rhs.is_inline()) {
if (!is_inline()) {
vector.~heap_vector();
}
std::memcpy(&array, &rhs.array, sizeof(array));
} else {
if (is_inline()) {
new (&vector) heap_vector(std::move(rhs.vector));
} else {
vector = std::move(rhs.vector);
}
}
return *this;
}

bool is_inline() const {
// It is undefined behavior to access the inactive member of a
// union directly. However, it is well-defined to reinterpret_cast any
// pointer into a pointer to char and examine it as an array
// of bytes. See
// https://dev-discuss.pytorch.org/t/unionizing-for-profit-how-to-exploit-the-power-of-unions-in-c/444#the-memcpy-loophole-4
bool result = false;
std::memcpy(&result, reinterpret_cast<const char *>(this), sizeof(bool));
return result;
}
};

// small_vector-like container to avoid heap allocation for N or fewer
// arguments.
template <std::size_t N>
struct argument_vector {
public:
argument_vector() = default;

argument_vector(const argument_vector &) = delete;
argument_vector &operator=(const argument_vector &) = delete;
argument_vector(argument_vector &&) noexcept = default;
argument_vector &operator=(argument_vector &&) noexcept = default;

std::size_t size() const {
if (is_inline()) {
return m_repr.array.size;
}
return m_repr.vector.vec.size();
}

handle &operator[](std::size_t idx) {
assert(idx < size());
if (is_inline()) {
return m_repr.array.arr[idx];
}
return m_repr.vector.vec[idx];
}

handle operator[](std::size_t idx) const {
assert(idx < size());
if (is_inline()) {
return m_repr.array.arr[idx];
}
return m_repr.vector.vec[idx];
}

void push_back(handle x) {
if (is_inline()) {
auto &ha = m_repr.array;
if (ha.size == N) {
move_to_vector_with_reserved_size(N + 1);
m_repr.vector.vec.push_back(x);
} else {
ha.arr[ha.size++] = x;
}
} else {
m_repr.vector.vec.push_back(x);
}
}

template <typename Arg>
void emplace_back(Arg &&x) {
push_back(handle(x));
}

void reserve(std::size_t sz) {
if (is_inline()) {
if (sz > N) {
move_to_vector_with_reserved_size(sz);
}
} else {
m_repr.vector.vec.reserve(sz);
}
}

private:
using repr_type = inline_array_or_vector<handle, N>;
repr_type m_repr;

void move_to_vector_with_reserved_size(std::size_t reserved_size) {
assert(is_inline());
auto &ha = m_repr.array;
using heap_vector = typename repr_type::heap_vector;
heap_vector hv;
hv.vec.reserve(reserved_size);
std::copy(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec));
new (&m_repr.vector) heap_vector(std::move(hv));
}

bool is_inline() const { return m_repr.is_inline(); }
};

// small_vector-like container to avoid heap allocation for N or fewer
// arguments.
template <std::size_t kRequestedInlineSize>
struct args_convert_vector {
private:
public:
args_convert_vector() = default;

args_convert_vector(const args_convert_vector &) = delete;
args_convert_vector &operator=(const args_convert_vector &) = delete;
args_convert_vector(args_convert_vector &&) noexcept = default;
args_convert_vector &operator=(args_convert_vector &&) noexcept = default;

args_convert_vector(std::size_t count, bool value) {
if (count > kInlineSize) {
new (&m_repr.vector) typename repr_type::heap_vector(count, value);
} else {
auto &inline_arr = m_repr.array;
inline_arr.arr.fill(value ? std::size_t(-1) : 0);
inline_arr.size = static_cast<decltype(inline_arr.size)>(count);
}
}

std::size_t size() const {
if (is_inline()) {
return m_repr.array.size;
}
return m_repr.vector.vec.size();
}

void reserve(std::size_t sz) {
if (is_inline()) {
if (sz > kInlineSize) {
move_to_vector_with_reserved_size(sz);
}
} else {
m_repr.vector.vec.reserve(sz);
}
}

bool operator[](std::size_t idx) const {
if (is_inline()) {
return inline_index(idx);
}
assert(idx < m_repr.vector.vec.size());
return m_repr.vector.vec[idx];
}

void push_back(bool b) {
if (is_inline()) {
auto &ha = m_repr.array;
if (ha.size == kInlineSize) {
move_to_vector_with_reserved_size(kInlineSize + 1);
m_repr.vector.vec.push_back(b);
} else {
assert(ha.size < kInlineSize);
const auto wbi = word_and_bit_index(ha.size++);
assert(wbi.word < kWords);
assert(wbi.bit < kBitsPerWord);
if (b) {
ha.arr[wbi.word] |= (std::size_t(1) << wbi.bit);
} else {
ha.arr[wbi.word] &= ~(std::size_t(1) << wbi.bit);
}
assert(operator[](ha.size - 1) == b);
}
} else {
m_repr.vector.vec.push_back(b);
}
}

void swap(args_convert_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); }

private:
struct WordAndBitIndex {
std::size_t word;
std::size_t bit;
};

static WordAndBitIndex word_and_bit_index(std::size_t idx) {
return WordAndBitIndex{idx / kBitsPerWord, idx % kBitsPerWord};
}

bool inline_index(std::size_t idx) const {
const auto wbi = word_and_bit_index(idx);
assert(wbi.word < kWords);
assert(wbi.bit < kBitsPerWord);
return m_repr.array.arr[wbi.word] & (std::size_t(1) << wbi.bit);
}

void move_to_vector_with_reserved_size(std::size_t reserved_size) {
auto &inline_arr = m_repr.array;
using heap_vector = typename repr_type::heap_vector;
heap_vector hv;
hv.vec.reserve(reserved_size);
for (std::size_t ii = 0; ii < inline_arr.size; ++ii) {
hv.vec.push_back(inline_index(ii));
}
new (&m_repr.vector) heap_vector(std::move(hv));
}

static constexpr auto kBitsPerWord = 8 * sizeof(std::size_t);
static constexpr auto kWords = (kRequestedInlineSize + kBitsPerWord - 1) / kBitsPerWord;
static constexpr auto kInlineSize = kWords * kBitsPerWord;

using repr_type = inline_array_or_vector<std::size_t, kWords, bool>;
repr_type m_repr;

bool is_inline() const { return m_repr.is_inline(); }
};

PYBIND11_NAMESPACE_END(detail)
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
7 changes: 4 additions & 3 deletions include/pybind11/pybind11.h
Original file line number Diff line number Diff line change
Expand Up @@ -1048,13 +1048,14 @@ class cpp_function : public function {
}
#endif

std::vector<bool> second_pass_convert;
args_convert_vector<arg_vector_small_size> second_pass_convert;
if (overloaded) {
// We're in the first no-convert pass, so swap out the conversion flags for a
// set of all-false flags. If the call fails, we'll swap the flags back in for
// the conversion-allowed call below.
second_pass_convert.resize(func.nargs, false);
call.args_convert.swap(second_pass_convert);
second_pass_convert = std::move(call.args_convert);
call.args_convert
= args_convert_vector<arg_vector_small_size>(func.nargs, false);
}

// 6. Call the function.
Expand Down
4 changes: 2 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -647,8 +647,8 @@ if(NOT PYBIND11_CUDA_TESTS)
# Test pure C++ code (not depending on Python). Provides the `test_pure_cpp` target.
add_subdirectory(pure_cpp)

# Test embedding the interpreter. Provides the `cpptest` target.
add_subdirectory(test_embed)
# Test C++ code that depends on Python, such as embedding the interpreter. Provides the `cpptest` target.
add_subdirectory(test_with_catch)

# Test CMake build using functions and targets from subdirectory or installed location
add_subdirectory(test_cmake_build)
Expand Down
1 change: 1 addition & 0 deletions tests/extra_python_package/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
}

detail_headers = {
"include/pybind11/detail/argument_vector.h",
"include/pybind11/detail/class.h",
"include/pybind11/detail/common.h",
"include/pybind11/detail/cpp_conduit.h",
Expand Down
Loading
Loading