1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
23 #include "arrow/buffer.h"
24 #include "arrow/python/pyarrow.h"
25 #include "arrow/python/visibility.h"
26 #include "arrow/result.h"
27 #include "arrow/util/macros.h"
37 // Convert current Python error to a Status. The Python error state is cleared
38 // and can be restored with RestorePyError().
39 ARROW_PYTHON_EXPORT Status
ConvertPyError(StatusCode code
= StatusCode::UnknownError
);
40 // Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
41 ARROW_PYTHON_EXPORT
bool IsPyError(const Status
& status
);
42 // Restore a Python error wrapped in a Status.
43 ARROW_PYTHON_EXPORT
void RestorePyError(const Status
& status
);
45 // Catch a pending Python exception and return the corresponding Status.
46 // If no exception is pending, Status::OK() is returned.
47 inline Status
CheckPyError(StatusCode code
= StatusCode::UnknownError
) {
48 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
51 return ConvertPyError(code
);
55 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError())
57 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE))
59 // For Cython, as you can't define template C++ functions in Cython, only use them.
60 // This function can set a Python exception. It assumes that T has a (cheap)
61 // default constructor.
63 T
GetResultValue(Result
<T
> result
) {
64 if (ARROW_PREDICT_TRUE(result
.ok())) {
65 return *std::move(result
);
67 int r
= internal::check_status(result
.status()); // takes the GIL
68 assert(r
== -1); // should have errored out
74 // A RAII-style helper that ensures the GIL is acquired inside a lexical block.
75 class ARROW_PYTHON_EXPORT PyAcquireGIL
{
77 PyAcquireGIL() : acquired_gil_(false) { acquire(); }
79 ~PyAcquireGIL() { release(); }
83 state_
= PyGILState_Ensure();
91 PyGILState_Release(state_
);
92 acquired_gil_
= false;
98 PyGILState_STATE state_
;
99 ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL
);
102 // A RAII-style helper that releases the GIL until the end of a lexical block
103 class ARROW_PYTHON_EXPORT PyReleaseGIL
{
105 PyReleaseGIL() { saved_state_
= PyEval_SaveThread(); }
107 ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_
); }
110 PyThreadState
* saved_state_
;
111 ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL
);
114 // A helper to call safely into the Python interpreter from arbitrary C++ code.
115 // The GIL is acquired, and the current thread's error status is preserved.
116 template <typename Function
>
117 auto SafeCallIntoPython(Function
&& func
) -> decltype(func()) {
121 PyObject
* exc_traceback
;
122 PyErr_Fetch(&exc_type
, &exc_value
, &exc_traceback
);
123 auto maybe_status
= std::forward
<Function
>(func
)();
124 // If the return Status is a "Python error", the current Python error status
125 // describes the error and shouldn't be clobbered.
126 if (!IsPyError(::arrow::internal::GenericToStatus(maybe_status
)) &&
127 exc_type
!= NULLPTR
) {
128 PyErr_Restore(exc_type
, exc_value
, exc_traceback
);
133 // A RAII primitive that DECREFs the underlying PyObject* when it
134 // goes out of scope.
135 class ARROW_PYTHON_EXPORT OwnedRef
{
137 OwnedRef() : obj_(NULLPTR
) {}
138 OwnedRef(OwnedRef
&& other
) : OwnedRef(other
.detach()) {}
139 explicit OwnedRef(PyObject
* obj
) : obj_(obj
) {}
141 OwnedRef
& operator=(OwnedRef
&& other
) {
142 obj_
= other
.detach();
146 ~OwnedRef() { reset(); }
148 void reset(PyObject
* obj
) {
153 void reset() { reset(NULLPTR
); }
156 PyObject
* result
= obj_
;
161 PyObject
* obj() const { return obj_
; }
163 PyObject
** ref() { return &obj_
; }
165 operator bool() const { return obj_
!= NULLPTR
; }
168 ARROW_DISALLOW_COPY_AND_ASSIGN(OwnedRef
);
173 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
174 // This is for situations where the GIL is not always known to be held
175 // (e.g. if it is released in the middle of a function for performance reasons)
176 class ARROW_PYTHON_EXPORT OwnedRefNoGIL
: public OwnedRef
{
178 OwnedRefNoGIL() : OwnedRef() {}
179 OwnedRefNoGIL(OwnedRefNoGIL
&& other
) : OwnedRef(other
.detach()) {}
180 explicit OwnedRefNoGIL(PyObject
* obj
) : OwnedRef(obj
) {}
188 template <typename Fn
>
189 struct BoundFunction
;
191 template <typename
... Args
>
192 struct BoundFunction
<void(PyObject
*, Args
...)> {
193 // We bind `cdef void fn(object, ...)` to get a `Status(...)`
194 // where the Status contains any Python error raised by `fn`
195 using Unbound
= void(PyObject
*, Args
...);
196 using Bound
= Status(Args
...);
198 BoundFunction(Unbound
* unbound
, PyObject
* bound_arg
)
199 : bound_arg_(bound_arg
), unbound_(unbound
) {}
201 Status
Invoke(Args
... args
) const {
203 unbound_(bound_arg_
.obj(), std::forward
<Args
>(args
)...);
209 OwnedRefNoGIL bound_arg_
;
212 template <typename Return
, typename
... Args
>
213 struct BoundFunction
<Return(PyObject
*, Args
...)> {
214 // We bind `cdef Return fn(object, ...)` to get a `Result<Return>(...)`
215 // where the Result contains any Python error raised by `fn` or the
216 // return value from `fn`.
217 using Unbound
= Return(PyObject
*, Args
...);
218 using Bound
= Result
<Return
>(Args
...);
220 BoundFunction(Unbound
* unbound
, PyObject
* bound_arg
)
221 : bound_arg_(bound_arg
), unbound_(unbound
) {}
223 Result
<Return
> Invoke(Args
... args
) const {
225 Return ret
= unbound_(bound_arg_
.obj(), std::forward
<Args
>(args
)...);
231 OwnedRefNoGIL bound_arg_
;
234 template <typename OutFn
, typename Return
, typename
... Args
>
235 std::function
<OutFn
> BindFunction(Return (*unbound
)(PyObject
*, Args
...),
236 PyObject
* bound_arg
) {
237 using Fn
= BoundFunction
<Return(PyObject
*, Args
...)>;
239 static_assert(std::is_same
<typename
Fn::Bound
, OutFn
>::value
,
240 "requested bound function of unsupported type");
242 Py_XINCREF(bound_arg
);
243 auto bound_fn
= std::make_shared
<Fn
>(unbound
, bound_arg
);
245 [bound_fn
](Args
... args
) { return bound_fn
->Invoke(std::forward
<Args
>(args
)...); };
248 // A temporary conversion of a Python object to a bytes area.
254 static Result
<PyBytesView
> FromString(PyObject
* obj
, bool check_utf8
= false) {
256 ARROW_RETURN_NOT_OK(self
.ParseString(obj
, check_utf8
));
257 return std::move(self
);
260 static Result
<PyBytesView
> FromUnicode(PyObject
* obj
) {
262 ARROW_RETURN_NOT_OK(self
.ParseUnicode(obj
));
263 return std::move(self
);
266 static Result
<PyBytesView
> FromBinary(PyObject
* obj
) {
268 ARROW_RETURN_NOT_OK(self
.ParseBinary(obj
));
269 return std::move(self
);
272 // View the given Python object as string-like, i.e. str or (utf8) bytes
273 Status
ParseString(PyObject
* obj
, bool check_utf8
= false) {
274 if (PyUnicode_Check(obj
)) {
275 return ParseUnicode(obj
);
277 ARROW_RETURN_NOT_OK(ParseBinary(obj
));
279 // Check the bytes are utf8 utf-8
280 OwnedRef
decoded(PyUnicode_FromStringAndSize(bytes
, size
));
281 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
292 // View the given Python object as unicode string
293 Status
ParseUnicode(PyObject
* obj
) {
294 // The utf-8 representation is cached on the unicode object
295 bytes
= PyUnicode_AsUTF8AndSize(obj
, &size
);
301 // View the given Python object as binary-like, i.e. bytes
302 Status
ParseBinary(PyObject
* obj
) {
303 if (PyBytes_Check(obj
)) {
304 bytes
= PyBytes_AS_STRING(obj
);
305 size
= PyBytes_GET_SIZE(obj
);
307 } else if (PyByteArray_Check(obj
)) {
308 bytes
= PyByteArray_AS_STRING(obj
);
309 size
= PyByteArray_GET_SIZE(obj
);
311 } else if (PyMemoryView_Check(obj
)) {
312 PyObject
* ref
= PyMemoryView_GetContiguous(obj
, PyBUF_READ
, 'C');
314 Py_buffer
* buffer
= PyMemoryView_GET_BUFFER(ref
);
315 bytes
= reinterpret_cast<const char*>(buffer
->buf
);
319 return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj
)->tp_name
,
329 class ARROW_PYTHON_EXPORT PyBuffer
: public Buffer
{
331 /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
332 /// one-dimensional byte buffers.
335 static Result
<std::shared_ptr
<Buffer
>> FromPyObject(PyObject
* obj
);
339 Status
Init(PyObject
*);
344 // Return the common PyArrow memory pool
345 ARROW_PYTHON_EXPORT
void set_default_memory_pool(MemoryPool
* pool
);
346 ARROW_PYTHON_EXPORT MemoryPool
* get_memory_pool();
348 // This is annoying: because C++11 does not allow implicit conversion of string
349 // literals to non-const char*, we need to go through some gymnastics to use
350 // PyObject_CallMethod without a lot of pain (its arguments are non-const
352 template <typename
... ArgTypes
>
353 static inline PyObject
* cpp_PyObject_CallMethod(PyObject
* obj
, const char* method_name
,
354 const char* argspec
, ArgTypes
... args
) {
355 return PyObject_CallMethod(obj
, const_cast<char*>(method_name
),
356 const_cast<char*>(argspec
), args
...);