Browse Source

Revert "Make wrapped C++ functions pickleable (#5580)"

This reverts commit e7e5d6e5bb.

Signed-off-by: Henry Schreiner <henryschreineriii@gmail.com>
henryiii/revert/pycapsule
Henry Schreiner 2 weeks ago
parent
commit
19ddbe5bc3
No known key found for this signature in database
GPG Key ID: B9D0E45146A241E8
  1. 1
      CMakeLists.txt
  2. 1
      include/pybind11/attr.h
  3. 208
      include/pybind11/detail/function_record_pyobject.h
  4. 30
      include/pybind11/detail/internals.h
  5. 11
      include/pybind11/functional.h
  6. 73
      include/pybind11/pybind11.h
  7. 12
      include/pybind11/pytypes.h
  8. 1
      tests/extra_python_package/test_files.py
  9. 46
      tests/test_pickling.py

1
CMakeLists.txt

@ -186,7 +186,6 @@ set(PYBIND11_HEADERS @@ -186,7 +186,6 @@ set(PYBIND11_HEADERS
include/pybind11/detail/descr.h
include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h
include/pybind11/detail/exception_translation.h
include/pybind11/detail/function_record_pyobject.h
include/pybind11/detail/init.h
include/pybind11/detail/internals.h
include/pybind11/detail/native_enum_data.h

1
include/pybind11/attr.h

@ -193,7 +193,6 @@ struct argument_record { @@ -193,7 +193,6 @@ struct argument_record {
/// Internal data structure which holds metadata about a bound function (signature, overloads,
/// etc.)
#define PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID "v1" // PLEASE UPDATE if the struct is changed.
struct function_record {
function_record()
: is_constructor(false), is_new_style_constructor(false), is_stateless(false),

208
include/pybind11/detail/function_record_pyobject.h

@ -1,208 +0,0 @@ @@ -1,208 +0,0 @@
// Copyright (c) 2024-2025 The Pybind Development Team.
// All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// For background see the description of PR google/pybind11clif#30099.
#pragma once
#include <pybind11/attr.h>
#include <pybind11/conduit/pybind11_platform_abi_id.h>
#include <pybind11/pytypes.h>
#include "common.h"
#include <cstring>
#include <utility>
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
PYBIND11_NAMESPACE_BEGIN(detail)
struct function_record_PyObject {
PyObject_HEAD
function_record *cpp_func_rec;
};
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
PyObject *tp_new_impl(PyTypeObject *type, PyObject *args, PyObject *kwds);
PyObject *tp_alloc_impl(PyTypeObject *type, Py_ssize_t nitems);
int tp_init_impl(PyObject *self, PyObject *args, PyObject *kwds);
void tp_dealloc_impl(PyObject *self);
void tp_free_impl(void *self);
static PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *);
static PyMethodDef tp_methods_impl[]
= {{"__reduce_ex__",
// reduce_ex_impl is a PyCFunctionWithKeywords, but PyMethodDef
// requires a PyCFunction. The cast through void* is safe and
// idiomatic with METH_KEYWORDS, and it successfully sidesteps
// unhelpful compiler warnings.
// NOLINTNEXTLINE(bugprone-casting-through-void)
reinterpret_cast<PyCFunction>(reinterpret_cast<void *>(reduce_ex_impl)),
METH_VARARGS | METH_KEYWORDS,
nullptr},
{nullptr, nullptr, 0, nullptr}};
// Note that this name is versioned.
constexpr char tp_name_impl[]
= "pybind11_detail_function_record_" PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID
"_" PYBIND11_PLATFORM_ABI_ID;
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
// Designated initializers are a C++20 feature:
// https://en.cppreference.com/w/cpp/language/aggregate_initialization#Designated_initializers
// MSVC rejects them unless /std:c++20 is used (error code C7555).
PYBIND11_WARNING_PUSH
PYBIND11_WARNING_DISABLE_CLANG("-Wmissing-field-initializers")
#if defined(__GNUC__) && __GNUC__ >= 8
PYBIND11_WARNING_DISABLE_GCC("-Wmissing-field-initializers")
#endif
static PyTypeObject function_record_PyTypeObject = {
PyVarObject_HEAD_INIT(nullptr, 0)
/* const char *tp_name */ function_record_PyTypeObject_methods::tp_name_impl,
/* Py_ssize_t tp_basicsize */ sizeof(function_record_PyObject),
/* Py_ssize_t tp_itemsize */ 0,
/* destructor tp_dealloc */ function_record_PyTypeObject_methods::tp_dealloc_impl,
/* Py_ssize_t tp_vectorcall_offset */ 0,
/* getattrfunc tp_getattr */ nullptr,
/* setattrfunc tp_setattr */ nullptr,
/* PyAsyncMethods *tp_as_async */ nullptr,
/* reprfunc tp_repr */ nullptr,
/* PyNumberMethods *tp_as_number */ nullptr,
/* PySequenceMethods *tp_as_sequence */ nullptr,
/* PyMappingMethods *tp_as_mapping */ nullptr,
/* hashfunc tp_hash */ nullptr,
/* ternaryfunc tp_call */ nullptr,
/* reprfunc tp_str */ nullptr,
/* getattrofunc tp_getattro */ nullptr,
/* setattrofunc tp_setattro */ nullptr,
/* PyBufferProcs *tp_as_buffer */ nullptr,
/* unsigned long tp_flags */ Py_TPFLAGS_DEFAULT,
/* const char *tp_doc */ nullptr,
/* traverseproc tp_traverse */ nullptr,
/* inquiry tp_clear */ nullptr,
/* richcmpfunc tp_richcompare */ nullptr,
/* Py_ssize_t tp_weaklistoffset */ 0,
/* getiterfunc tp_iter */ nullptr,
/* iternextfunc tp_iternext */ nullptr,
/* struct PyMethodDef *tp_methods */ function_record_PyTypeObject_methods::tp_methods_impl,
/* struct PyMemberDef *tp_members */ nullptr,
/* struct PyGetSetDef *tp_getset */ nullptr,
/* struct _typeobject *tp_base */ nullptr,
/* PyObject *tp_dict */ nullptr,
/* descrgetfunc tp_descr_get */ nullptr,
/* descrsetfunc tp_descr_set */ nullptr,
/* Py_ssize_t tp_dictoffset */ 0,
/* initproc tp_init */ function_record_PyTypeObject_methods::tp_init_impl,
/* allocfunc tp_alloc */ function_record_PyTypeObject_methods::tp_alloc_impl,
/* newfunc tp_new */ function_record_PyTypeObject_methods::tp_new_impl,
/* freefunc tp_free */ function_record_PyTypeObject_methods::tp_free_impl,
/* inquiry tp_is_gc */ nullptr,
/* PyObject *tp_bases */ nullptr,
/* PyObject *tp_mro */ nullptr,
/* PyObject *tp_cache */ nullptr,
/* PyObject *tp_subclasses */ nullptr,
/* PyObject *tp_weaklist */ nullptr,
/* destructor tp_del */ nullptr,
/* unsigned int tp_version_tag */ 0,
/* destructor tp_finalize */ nullptr,
/* vectorcallfunc tp_vectorcall */ nullptr,
};
PYBIND11_WARNING_POP
static bool function_record_PyTypeObject_PyType_Ready_first_call = true;
inline void function_record_PyTypeObject_PyType_Ready() {
if (function_record_PyTypeObject_PyType_Ready_first_call) {
if (PyType_Ready(&function_record_PyTypeObject) < 0) {
throw error_already_set();
}
function_record_PyTypeObject_PyType_Ready_first_call = false;
}
}
inline bool is_function_record_PyObject(PyObject *obj) {
if (PyType_Check(obj) != 0) {
return false;
}
PyTypeObject *obj_type = Py_TYPE(obj);
// Fast path (pointer comparison).
if (obj_type == &function_record_PyTypeObject) {
return true;
}
// This works across extension modules. Note that tp_name is versioned.
if (strcmp(obj_type->tp_name, function_record_PyTypeObject.tp_name) == 0) {
return true;
}
return false;
}
inline function_record *function_record_ptr_from_PyObject(PyObject *obj) {
if (is_function_record_PyObject(obj)) {
return ((detail::function_record_PyObject *) obj)->cpp_func_rec;
}
return nullptr;
}
inline object function_record_PyObject_New() {
auto *py_func_rec = PyObject_New(function_record_PyObject, &function_record_PyTypeObject);
if (py_func_rec == nullptr) {
throw error_already_set();
}
py_func_rec->cpp_func_rec = nullptr; // For clarity/purity. Redundant in practice.
return reinterpret_steal<object>((PyObject *) py_func_rec);
}
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
// Guard against accidents & oversights, in particular when porting to future Python versions.
inline PyObject *tp_new_impl(PyTypeObject *, PyObject *, PyObject *) {
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_new_impl");
// return nullptr; // Unreachable.
}
inline PyObject *tp_alloc_impl(PyTypeObject *, Py_ssize_t) {
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_alloc_impl");
// return nullptr; // Unreachable.
}
inline int tp_init_impl(PyObject *, PyObject *, PyObject *) {
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_init_impl");
// return -1; // Unreachable.
}
inline void tp_free_impl(void *) {
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_free_impl");
}
inline PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *) {
// Deliberately ignoring the arguments for simplicity (expected is `protocol: int`).
const function_record *rec = function_record_ptr_from_PyObject(self);
if (rec == nullptr) {
pybind11_fail(
"FATAL: function_record_PyTypeObject reduce_ex_impl(): cannot obtain cpp_func_rec.");
}
if (rec->name != nullptr && rec->name[0] != '\0' && rec->scope
&& PyModule_Check(rec->scope.ptr()) != 0) {
object scope_module = get_scope_module(rec->scope);
if (scope_module) {
auto builtins = reinterpret_borrow<dict>(PyEval_GetBuiltins());
auto builtins_eval = builtins["eval"];
auto reconstruct_args = make_tuple(str("__import__('importlib').import_module('")
+ scope_module + str("')"));
return make_tuple(std::move(builtins_eval), std::move(reconstruct_args))
.release()
.ptr();
}
}
set_error(PyExc_RuntimeError, repr(self) + str(" is not pickleable."));
return nullptr;
}
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
PYBIND11_NAMESPACE_END(detail)
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

30
include/pybind11/detail/internals.h

@ -37,11 +37,11 @@ @@ -37,11 +37,11 @@
/// further ABI-incompatible changes may be made before the ABI is officially
/// changed to the new version.
#ifndef PYBIND11_INTERNALS_VERSION
# define PYBIND11_INTERNALS_VERSION 11
# define PYBIND11_INTERNALS_VERSION 12
#endif
#if PYBIND11_INTERNALS_VERSION < 11
# error "PYBIND11_INTERNALS_VERSION 11 is the minimum for all platforms for pybind11v3."
#if PYBIND11_INTERNALS_VERSION < 12
# error "PYBIND11_INTERNALS_VERSION 12 is the minimum for all platforms for pybind11v3."
#endif
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
@ -254,6 +254,10 @@ struct internals { @@ -254,6 +254,10 @@ struct internals {
// Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
PyInterpreterState *istate = nullptr;
// Note that we have to use a std::string to allocate memory to ensure a unique address
// We want unique addresses since we use pointer equality to compare function records
std::string function_record_capsule_name = internals_function_record_capsule_name;
type_map<PyObject *> native_enum_type_map;
internals()
@ -747,6 +751,26 @@ const char *c_str(Args &&...args) { @@ -747,6 +751,26 @@ const char *c_str(Args &&...args) {
return strings.front().c_str();
}
inline const char *get_function_record_capsule_name() {
// On GraalPy, pointer equality of the names is currently not guaranteed
#if !defined(GRAALVM_PYTHON)
return get_internals().function_record_capsule_name.c_str();
#else
return nullptr;
#endif
}
// Determine whether or not the following capsule contains a pybind11 function record.
// Note that we use `internals` to make sure that only ABI compatible records are touched.
//
// This check is currently used in two places:
// - An important optimization in functional.h to avoid overhead in C++ -> Python -> C++
// - The sibling feature of cpp_function to allow overloads
inline bool is_function_record_capsule(const capsule &cap) {
// Pointer equality as we rely on internals() to ensure unique pointers
return cap.name() == get_function_record_capsule_name();
}
PYBIND11_NAMESPACE_END(detail)
/// Returns a named pointer that is shared among all extension modules (using the same

11
include/pybind11/functional.h

@ -91,8 +91,15 @@ public: @@ -91,8 +91,15 @@ public:
auto *cfunc_self = PyCFunction_GET_SELF(cfunc.ptr());
if (cfunc_self == nullptr) {
PyErr_Clear();
} else {
function_record *rec = function_record_ptr_from_PyObject(cfunc_self);
} else if (isinstance<capsule>(cfunc_self)) {
auto c = reinterpret_borrow<capsule>(cfunc_self);
function_record *rec = nullptr;
// Check that we can safely reinterpret the capsule into a function_record
if (detail::is_function_record_capsule(c)) {
rec = c.get_pointer<function_record>();
}
while (rec != nullptr) {
if (rec->is_stateless
&& same_type(typeid(function_type),

73
include/pybind11/pybind11.h

@ -12,7 +12,6 @@ @@ -12,7 +12,6 @@
#include "detail/class.h"
#include "detail/dynamic_raw_ptr_cast_if_possible.h"
#include "detail/exception_translation.h"
#include "detail/function_record_pyobject.h"
#include "detail/init.h"
#include "detail/native_enum_data.h"
#include "detail/using_smart_holder.h"
@ -23,7 +22,6 @@ @@ -23,7 +22,6 @@
#include "trampoline_self_life_support.h"
#include "typing.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <memory>
@ -603,11 +601,20 @@ protected: @@ -603,11 +601,20 @@ protected:
if (rec->sibling) {
if (PyCFunction_Check(rec->sibling.ptr())) {
auto *self = PyCFunction_GET_SELF(rec->sibling.ptr());
chain = detail::function_record_ptr_from_PyObject(self);
if (chain && !chain->scope.is(rec->scope)) {
/* Never append a method to an overload chain of a parent class;
instead, hide the parent's overloads in this case */
if (!isinstance<capsule>(self)) {
chain = nullptr;
} else {
auto rec_capsule = reinterpret_borrow<capsule>(self);
if (detail::is_function_record_capsule(rec_capsule)) {
chain = rec_capsule.get_pointer<detail::function_record>();
/* Never append a method to an overload chain of a parent class;
instead, hide the parent's overloads in this case */
if (!chain->scope.is(rec->scope)) {
chain = nullptr;
}
} else {
chain = nullptr;
}
}
}
// Don't trigger for things like the default __init__, which are wrapper_descriptors
@ -627,14 +634,21 @@ protected: @@ -627,14 +634,21 @@ protected:
= reinterpret_cast<PyCFunction>(reinterpret_cast<void (*)()>(dispatcher));
rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
detail::function_record_PyTypeObject_PyType_Ready(); // Call-once initialization.
object py_func_rec = detail::function_record_PyObject_New();
((detail::function_record_PyObject *) py_func_rec.ptr())->cpp_func_rec
= unique_rec.release();
capsule rec_capsule(unique_rec.release(),
detail::get_function_record_capsule_name(),
[](void *ptr) { destruct((detail::function_record *) ptr); });
guarded_strdup.release();
object scope_module = detail::get_scope_module(rec->scope);
m_ptr = PyCFunction_NewEx(rec->def, py_func_rec.ptr(), scope_module.ptr());
object scope_module;
if (rec->scope) {
if (hasattr(rec->scope, "__module__")) {
scope_module = rec->scope.attr("__module__");
} else if (hasattr(rec->scope, "__name__")) {
scope_module = rec->scope.attr("__name__");
}
}
m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr());
if (!m_ptr) {
pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
}
@ -663,9 +677,8 @@ protected: @@ -663,9 +677,8 @@ protected:
// chain.
chain_start = rec;
rec->next = chain;
auto *py_func_rec
= (detail::function_record_PyObject *) PyCFunction_GET_SELF(m_ptr);
py_func_rec->cpp_func_rec = unique_rec.release();
auto rec_capsule = reinterpret_borrow<capsule>(PyCFunction_GET_SELF(m_ptr));
rec_capsule.set_pointer(unique_rec.release());
guarded_strdup.release();
} else {
// Or end of chain (normal behavior)
@ -740,8 +753,6 @@ protected: @@ -740,8 +753,6 @@ protected:
}
}
friend void detail::function_record_PyTypeObject_methods::tp_dealloc_impl(PyObject *);
/// When a cpp_function is GCed, release any memory allocated by pybind11
static void destruct(detail::function_record *rec, bool free_strings = true) {
// If on Python 3.9, check the interpreter "MICRO" (patch) version.
@ -791,11 +802,13 @@ protected: @@ -791,11 +802,13 @@ protected:
/// Main dispatch logic for calls to functions bound using pybind11
static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) {
using namespace detail;
const function_record *overloads = function_record_ptr_from_PyObject(self);
assert(overloads != nullptr);
assert(isinstance<capsule>(self));
/* Iterator over the list of potentially admissible overloads */
const function_record *current_overload = overloads;
const function_record *overloads = reinterpret_cast<function_record *>(
PyCapsule_GetPointer(self, get_function_record_capsule_name())),
*current_overload = overloads;
assert(overloads != nullptr);
/* Need to know how many arguments + keyword arguments there are to pick the right
overload */
@ -1239,17 +1252,6 @@ protected: @@ -1239,17 +1252,6 @@ protected:
PYBIND11_NAMESPACE_BEGIN(detail)
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
// This implementation needs the definition of `class cpp_function`.
inline void tp_dealloc_impl(PyObject *self) {
auto *py_func_rec = (function_record_PyObject *) self;
cpp_function::destruct(py_func_rec->cpp_func_rec);
py_func_rec->cpp_func_rec = nullptr;
}
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
template <>
struct handle_type_name<cpp_function> {
static constexpr auto name = const_name("collections.abc.Callable");
@ -2516,7 +2518,14 @@ private: @@ -2516,7 +2518,14 @@ private:
if (!func_self) {
throw error_already_set();
}
return detail::function_record_ptr_from_PyObject(func_self.ptr());
if (!isinstance<capsule>(func_self)) {
return nullptr;
}
auto cap = reinterpret_borrow<capsule>(func_self);
if (!detail::is_function_record_capsule(cap)) {
return nullptr;
}
return cap.get_pointer<detail::function_record>();
}
};

12
include/pybind11/pytypes.h

@ -1399,18 +1399,6 @@ class simple_collector; @@ -1399,18 +1399,6 @@ class simple_collector;
template <return_value_policy policy = return_value_policy::automatic_reference>
class unpacking_collector;
inline object get_scope_module(handle scope) {
if (scope) {
if (hasattr(scope, "__module__")) {
return scope.attr("__module__");
}
if (hasattr(scope, "__name__")) {
return scope.attr("__name__");
}
}
return object();
}
PYBIND11_NAMESPACE_END(detail)
// TODO: After the deprecated constructors are removed, this macro can be simplified by

1
tests/extra_python_package/test_files.py

@ -81,7 +81,6 @@ detail_headers = { @@ -81,7 +81,6 @@ detail_headers = {
"include/pybind11/detail/cpp_conduit.h",
"include/pybind11/detail/descr.h",
"include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h",
"include/pybind11/detail/function_record_pyobject.h",
"include/pybind11/detail/init.h",
"include/pybind11/detail/internals.h",
"include/pybind11/detail/native_enum_data.h",

46
tests/test_pickling.py

@ -10,42 +10,18 @@ import env @@ -10,42 +10,18 @@ import env
from pybind11_tests import pickling as m
def all_pickle_protocols():
assert pickle.HIGHEST_PROTOCOL >= 0
return range(pickle.HIGHEST_PROTOCOL + 1)
@pytest.mark.parametrize("protocol", all_pickle_protocols())
def test_pickle_simple_callable(protocol):
def test_pickle_simple_callable():
assert m.simple_callable() == 20220426
serialized = pickle.dumps(m.simple_callable, protocol=protocol)
assert b"pybind11_tests.pickling" in serialized
assert b"simple_callable" in serialized
deserialized = pickle.loads(serialized)
assert deserialized() == 20220426
assert deserialized is m.simple_callable
# UNUSUAL: function record pickle roundtrip returns a module, not a function record object:
if not env.PYPY:
assert (
pickle.loads(pickle.dumps(m.simple_callable.__self__, protocol=protocol))
is m
)
# This is not expected to create issues because the only purpose of
# `m.simple_callable.__self__` is to enable pickling: the only method it has is
# `__reduce_ex__`. Direct access for any other purpose is not supported.
# Note that `repr(m.simple_callable.__self__)` shows, e.g.:
# `<pybind11_detail_function_record_v1__gcc_libstdcpp_cxxabi1018 object at 0x...>`
# It is considered to be as much an implementation detail as the
# `pybind11::detail::function_record` C++ type is.
# @rainwoodman suggested that the unusual pickle roundtrip behavior could be
# avoided by changing `reduce_ex_impl()` to produce, e.g.:
# `"__import__('importlib').import_module('pybind11_tests.pickling').simple_callable.__self__"`
# as the argument for the `eval()` function, and adding a getter to the
# `function_record_PyTypeObject` that returns `self`. However, the additional code complexity
# for this is deemed warranted only if the unusual pickle roundtrip behavior actually
# creates issues.
if env.PYPY:
serialized = pickle.dumps(m.simple_callable)
deserialized = pickle.loads(serialized)
assert deserialized() == 20220426
else:
# To document broken behavior: currently it fails universally with
# all C Python versions.
with pytest.raises(TypeError) as excinfo:
pickle.dumps(m.simple_callable)
assert re.search("can.*t pickle .*[Cc]apsule.* object", str(excinfo.value))
@pytest.mark.parametrize("cls_name", ["Pickleable", "PickleableNew"])

Loading…
Cancel
Save