1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
23 #include "arrow/ipc/options.h"
24 #include "arrow/python/visibility.h"
25 #include "arrow/sparse_tensor.h"
26 #include "arrow/status.h"
28 // Forward declaring PyObject, see
29 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
32 typedef _object PyObject
;
51 struct ARROW_PYTHON_EXPORT SerializedPyObject
{
52 std::shared_ptr
<RecordBatch
> batch
;
53 std::vector
<std::shared_ptr
<Tensor
>> tensors
;
54 std::vector
<std::shared_ptr
<SparseTensor
>> sparse_tensors
;
55 std::vector
<std::shared_ptr
<Tensor
>> ndarrays
;
56 std::vector
<std::shared_ptr
<Buffer
>> buffers
;
57 ipc::IpcWriteOptions ipc_options
;
61 /// \brief Write serialized Python object to OutputStream
62 /// \param[in,out] dst an OutputStream
64 Status
WriteTo(io::OutputStream
* dst
);
66 /// \brief Convert SerializedPyObject to a dict containing the message
67 /// components as Buffer instances with minimal memory allocation
71 /// 'num_sparse_tensors': N,
76 /// Each tensor is written as two buffers, one for the metadata and one for
77 /// the body. Therefore, the number of buffers in 'data' is 2 * M + 2 * N + K + 1,
78 /// with the first buffer containing the serialized record batch containing
79 /// the UnionArray that describes the whole object
80 Status
GetComponents(MemoryPool
* pool
, PyObject
** out
);
83 /// \brief Serialize Python sequence as a SerializedPyObject.
84 /// \param[in] context Serialization context which contains custom serialization
85 /// and deserialization callbacks. Can be any Python object with a
86 /// _serialize_callback method for serialization and a _deserialize_callback
87 /// method for deserialization. If context is None, no custom serialization
88 /// will be attempted.
89 /// \param[in] sequence A Python sequence object to serialize to Arrow data
91 /// \param[out] out The serialized representation
94 /// Release GIL before calling
96 Status
SerializeObject(PyObject
* context
, PyObject
* sequence
, SerializedPyObject
* out
);
98 /// \brief Serialize an Arrow Tensor as a SerializedPyObject.
99 /// \param[in] tensor Tensor to be serialized
100 /// \param[out] out The serialized representation
103 Status
SerializeTensor(std::shared_ptr
<Tensor
> tensor
, py::SerializedPyObject
* out
);
105 /// \brief Write the Tensor metadata header to an OutputStream.
106 /// \param[in] dtype DataType of the Tensor
107 /// \param[in] shape The shape of the tensor
108 /// \param[in] tensor_num_bytes The length of the Tensor data in bytes
109 /// \param[in] dst The OutputStream to write the Tensor header to
112 Status
WriteNdarrayHeader(std::shared_ptr
<DataType
> dtype
,
113 const std::vector
<int64_t>& shape
, int64_t tensor_num_bytes
,
114 io::OutputStream
* dst
);
121 PY2INT
, // Kept for compatibility