]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/python/common.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / python / common.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <memory>
21 #include <utility>
22
23 #include "arrow/buffer.h"
24 #include "arrow/python/pyarrow.h"
25 #include "arrow/python/visibility.h"
26 #include "arrow/result.h"
27 #include "arrow/util/macros.h"
28
29 namespace arrow {
30
31 class MemoryPool;
32 template <class T>
33 class Result;
34
35 namespace py {
36
37 // Convert current Python error to a Status. The Python error state is cleared
38 // and can be restored with RestorePyError().
39 ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
40 // Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
41 ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
42 // Restore a Python error wrapped in a Status.
43 ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);
44
45 // Catch a pending Python exception and return the corresponding Status.
46 // If no exception is pending, Status::OK() is returned.
47 inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
48 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
49 return Status::OK();
50 } else {
51 return ConvertPyError(code);
52 }
53 }
54
55 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError())
56
57 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE))
58
59 // For Cython, as you can't define template C++ functions in Cython, only use them.
60 // This function can set a Python exception. It assumes that T has a (cheap)
61 // default constructor.
62 template <class T>
63 T GetResultValue(Result<T> result) {
64 if (ARROW_PREDICT_TRUE(result.ok())) {
65 return *std::move(result);
66 } else {
67 int r = internal::check_status(result.status()); // takes the GIL
68 assert(r == -1); // should have errored out
69 ARROW_UNUSED(r);
70 return {};
71 }
72 }
73
74 // A RAII-style helper that ensures the GIL is acquired inside a lexical block.
75 class ARROW_PYTHON_EXPORT PyAcquireGIL {
76 public:
77 PyAcquireGIL() : acquired_gil_(false) { acquire(); }
78
79 ~PyAcquireGIL() { release(); }
80
81 void acquire() {
82 if (!acquired_gil_) {
83 state_ = PyGILState_Ensure();
84 acquired_gil_ = true;
85 }
86 }
87
88 // idempotent
89 void release() {
90 if (acquired_gil_) {
91 PyGILState_Release(state_);
92 acquired_gil_ = false;
93 }
94 }
95
96 private:
97 bool acquired_gil_;
98 PyGILState_STATE state_;
99 ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
100 };
101
102 // A RAII-style helper that releases the GIL until the end of a lexical block
103 class ARROW_PYTHON_EXPORT PyReleaseGIL {
104 public:
105 PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }
106
107 ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }
108
109 private:
110 PyThreadState* saved_state_;
111 ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
112 };
113
114 // A helper to call safely into the Python interpreter from arbitrary C++ code.
115 // The GIL is acquired, and the current thread's error status is preserved.
116 template <typename Function>
117 auto SafeCallIntoPython(Function&& func) -> decltype(func()) {
118 PyAcquireGIL lock;
119 PyObject* exc_type;
120 PyObject* exc_value;
121 PyObject* exc_traceback;
122 PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
123 auto maybe_status = std::forward<Function>(func)();
124 // If the return Status is a "Python error", the current Python error status
125 // describes the error and shouldn't be clobbered.
126 if (!IsPyError(::arrow::internal::GenericToStatus(maybe_status)) &&
127 exc_type != NULLPTR) {
128 PyErr_Restore(exc_type, exc_value, exc_traceback);
129 }
130 return maybe_status;
131 }
132
133 // A RAII primitive that DECREFs the underlying PyObject* when it
134 // goes out of scope.
135 class ARROW_PYTHON_EXPORT OwnedRef {
136 public:
137 OwnedRef() : obj_(NULLPTR) {}
138 OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
139 explicit OwnedRef(PyObject* obj) : obj_(obj) {}
140
141 OwnedRef& operator=(OwnedRef&& other) {
142 obj_ = other.detach();
143 return *this;
144 }
145
146 ~OwnedRef() { reset(); }
147
148 void reset(PyObject* obj) {
149 Py_XDECREF(obj_);
150 obj_ = obj;
151 }
152
153 void reset() { reset(NULLPTR); }
154
155 PyObject* detach() {
156 PyObject* result = obj_;
157 obj_ = NULLPTR;
158 return result;
159 }
160
161 PyObject* obj() const { return obj_; }
162
163 PyObject** ref() { return &obj_; }
164
165 operator bool() const { return obj_ != NULLPTR; }
166
167 private:
168 ARROW_DISALLOW_COPY_AND_ASSIGN(OwnedRef);
169
170 PyObject* obj_;
171 };
172
173 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
174 // This is for situations where the GIL is not always known to be held
175 // (e.g. if it is released in the middle of a function for performance reasons)
176 class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
177 public:
178 OwnedRefNoGIL() : OwnedRef() {}
179 OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
180 explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
181
182 ~OwnedRefNoGIL() {
183 PyAcquireGIL lock;
184 reset();
185 }
186 };
187
188 template <typename Fn>
189 struct BoundFunction;
190
191 template <typename... Args>
192 struct BoundFunction<void(PyObject*, Args...)> {
193 // We bind `cdef void fn(object, ...)` to get a `Status(...)`
194 // where the Status contains any Python error raised by `fn`
195 using Unbound = void(PyObject*, Args...);
196 using Bound = Status(Args...);
197
198 BoundFunction(Unbound* unbound, PyObject* bound_arg)
199 : bound_arg_(bound_arg), unbound_(unbound) {}
200
201 Status Invoke(Args... args) const {
202 PyAcquireGIL lock;
203 unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
204 RETURN_IF_PYERROR();
205 return Status::OK();
206 }
207
208 Unbound* unbound_;
209 OwnedRefNoGIL bound_arg_;
210 };
211
212 template <typename Return, typename... Args>
213 struct BoundFunction<Return(PyObject*, Args...)> {
214 // We bind `cdef Return fn(object, ...)` to get a `Result<Return>(...)`
215 // where the Result contains any Python error raised by `fn` or the
216 // return value from `fn`.
217 using Unbound = Return(PyObject*, Args...);
218 using Bound = Result<Return>(Args...);
219
220 BoundFunction(Unbound* unbound, PyObject* bound_arg)
221 : bound_arg_(bound_arg), unbound_(unbound) {}
222
223 Result<Return> Invoke(Args... args) const {
224 PyAcquireGIL lock;
225 Return ret = unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
226 RETURN_IF_PYERROR();
227 return ret;
228 }
229
230 Unbound* unbound_;
231 OwnedRefNoGIL bound_arg_;
232 };
233
234 template <typename OutFn, typename Return, typename... Args>
235 std::function<OutFn> BindFunction(Return (*unbound)(PyObject*, Args...),
236 PyObject* bound_arg) {
237 using Fn = BoundFunction<Return(PyObject*, Args...)>;
238
239 static_assert(std::is_same<typename Fn::Bound, OutFn>::value,
240 "requested bound function of unsupported type");
241
242 Py_XINCREF(bound_arg);
243 auto bound_fn = std::make_shared<Fn>(unbound, bound_arg);
244 return
245 [bound_fn](Args... args) { return bound_fn->Invoke(std::forward<Args>(args)...); };
246 }
247
248 // A temporary conversion of a Python object to a bytes area.
249 struct PyBytesView {
250 const char* bytes;
251 Py_ssize_t size;
252 bool is_utf8;
253
254 static Result<PyBytesView> FromString(PyObject* obj, bool check_utf8 = false) {
255 PyBytesView self;
256 ARROW_RETURN_NOT_OK(self.ParseString(obj, check_utf8));
257 return std::move(self);
258 }
259
260 static Result<PyBytesView> FromUnicode(PyObject* obj) {
261 PyBytesView self;
262 ARROW_RETURN_NOT_OK(self.ParseUnicode(obj));
263 return std::move(self);
264 }
265
266 static Result<PyBytesView> FromBinary(PyObject* obj) {
267 PyBytesView self;
268 ARROW_RETURN_NOT_OK(self.ParseBinary(obj));
269 return std::move(self);
270 }
271
272 // View the given Python object as string-like, i.e. str or (utf8) bytes
273 Status ParseString(PyObject* obj, bool check_utf8 = false) {
274 if (PyUnicode_Check(obj)) {
275 return ParseUnicode(obj);
276 } else {
277 ARROW_RETURN_NOT_OK(ParseBinary(obj));
278 if (check_utf8) {
279 // Check the bytes are utf8 utf-8
280 OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
281 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
282 is_utf8 = true;
283 } else {
284 PyErr_Clear();
285 is_utf8 = false;
286 }
287 }
288 return Status::OK();
289 }
290 }
291
292 // View the given Python object as unicode string
293 Status ParseUnicode(PyObject* obj) {
294 // The utf-8 representation is cached on the unicode object
295 bytes = PyUnicode_AsUTF8AndSize(obj, &size);
296 RETURN_IF_PYERROR();
297 is_utf8 = true;
298 return Status::OK();
299 }
300
301 // View the given Python object as binary-like, i.e. bytes
302 Status ParseBinary(PyObject* obj) {
303 if (PyBytes_Check(obj)) {
304 bytes = PyBytes_AS_STRING(obj);
305 size = PyBytes_GET_SIZE(obj);
306 is_utf8 = false;
307 } else if (PyByteArray_Check(obj)) {
308 bytes = PyByteArray_AS_STRING(obj);
309 size = PyByteArray_GET_SIZE(obj);
310 is_utf8 = false;
311 } else if (PyMemoryView_Check(obj)) {
312 PyObject* ref = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
313 RETURN_IF_PYERROR();
314 Py_buffer* buffer = PyMemoryView_GET_BUFFER(ref);
315 bytes = reinterpret_cast<const char*>(buffer->buf);
316 size = buffer->len;
317 is_utf8 = false;
318 } else {
319 return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj)->tp_name,
320 "' object");
321 }
322 return Status::OK();
323 }
324
325 protected:
326 OwnedRef ref;
327 };
328
329 class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
330 public:
331 /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
332 /// one-dimensional byte buffers.
333 ~PyBuffer();
334
335 static Result<std::shared_ptr<Buffer>> FromPyObject(PyObject* obj);
336
337 private:
338 PyBuffer();
339 Status Init(PyObject*);
340
341 Py_buffer py_buf_;
342 };
343
344 // Return the common PyArrow memory pool
345 ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
346 ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();
347
348 // This is annoying: because C++11 does not allow implicit conversion of string
349 // literals to non-const char*, we need to go through some gymnastics to use
350 // PyObject_CallMethod without a lot of pain (its arguments are non-const
351 // char*)
352 template <typename... ArgTypes>
353 static inline PyObject* cpp_PyObject_CallMethod(PyObject* obj, const char* method_name,
354 const char* argspec, ArgTypes... args) {
355 return PyObject_CallMethod(obj, const_cast<char*>(method_name),
356 const_cast<char*>(argspec), args...);
357 }
358
359 } // namespace py
360 } // namespace arrow