]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/datum.h
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / arrow / cpp / src / arrow / datum.h
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <cstdint>
21#include <memory>
22#include <string>
23#include <type_traits>
24#include <utility>
25#include <vector>
26
27#include "arrow/array/data.h"
28#include "arrow/scalar.h"
29#include "arrow/type.h"
30#include "arrow/type_traits.h"
31#include "arrow/util/checked_cast.h"
32#include "arrow/util/macros.h"
33#include "arrow/util/variant.h" // IWYU pragma: export
34#include "arrow/util/visibility.h"
35
36namespace arrow {
37
38class Array;
39class ChunkedArray;
40class RecordBatch;
41class Table;
42
43/// \brief A descriptor type that gives the shape (array or scalar) and
44/// DataType of a Value, but without the data
45struct ARROW_EXPORT ValueDescr {
46 std::shared_ptr<DataType> type;
47 enum Shape {
48 /// \brief Either Array or Scalar
49 ANY,
50
51 /// \brief Array type
52 ARRAY,
53
54 /// \brief Only Scalar arguments supported
55 SCALAR
56 };
57
58 Shape shape;
59
60 ValueDescr() : shape(ANY) {}
61
62 ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
63 : type(std::move(type)), shape(shape) {}
64
65 ValueDescr(std::shared_ptr<DataType> type) // NOLINT implicit conversion
66 : type(std::move(type)), shape(ValueDescr::ANY) {}
67
68 /// \brief Convenience constructor for ANY descr
69 static ValueDescr Any(std::shared_ptr<DataType> type) {
70 return ValueDescr(std::move(type), ANY);
71 }
72
73 /// \brief Convenience constructor for Value::ARRAY descr
74 static ValueDescr Array(std::shared_ptr<DataType> type) {
75 return ValueDescr(std::move(type), ARRAY);
76 }
77
78 /// \brief Convenience constructor for Value::SCALAR descr
79 static ValueDescr Scalar(std::shared_ptr<DataType> type) {
80 return ValueDescr(std::move(type), SCALAR);
81 }
82
83 bool operator==(const ValueDescr& other) const {
84 if (shape != other.shape) return false;
85 if (type == other.type) return true;
86 return type && type->Equals(other.type);
87 }
88
89 bool operator!=(const ValueDescr& other) const { return !(*this == other); }
90
91 std::string ToString() const;
92 static std::string ToString(const std::vector<ValueDescr>&);
93
94 ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
95};
96
97/// \brief For use with scalar functions, returns the broadcasted Value::Shape
98/// given a vector of value descriptors. Return SCALAR unless any value is
99/// ARRAY
100ARROW_EXPORT
101ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
102
103/// \class Datum
104/// \brief Variant type for various Arrow C++ data structures
105struct ARROW_EXPORT Datum {
106 enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION };
107
108 struct Empty {};
109
110 // Datums variants may have a length. This special value indicate that the
111 // current variant does not have a length.
112 static constexpr int64_t kUnknownLength = -1;
113
114 util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
115 std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
116 std::shared_ptr<Table>, std::vector<Datum>>
117 value;
118
119 /// \brief Empty datum, to be populated elsewhere
120 Datum() = default;
121
122 Datum(const Datum& other) = default;
123 Datum& operator=(const Datum& other) = default;
124 Datum(Datum&& other) = default;
125 Datum& operator=(Datum&& other) = default;
126
127 Datum(std::shared_ptr<Scalar> value) // NOLINT implicit conversion
128 : value(std::move(value)) {}
129
130 Datum(std::shared_ptr<ArrayData> value) // NOLINT implicit conversion
131 : value(std::move(value)) {}
132
133 Datum(ArrayData arg) // NOLINT implicit conversion
134 : value(std::make_shared<ArrayData>(std::move(arg))) {}
135
136 Datum(const Array& value); // NOLINT implicit conversion
137 Datum(const std::shared_ptr<Array>& value); // NOLINT implicit conversion
138 Datum(std::shared_ptr<ChunkedArray> value); // NOLINT implicit conversion
139 Datum(std::shared_ptr<RecordBatch> value); // NOLINT implicit conversion
140 Datum(std::shared_ptr<Table> value); // NOLINT implicit conversion
141 Datum(std::vector<Datum> value); // NOLINT implicit conversion
142
143 // Explicit constructors from const-refs. Can be expensive, prefer the
144 // shared_ptr constructors
145 explicit Datum(const ChunkedArray& value);
146 explicit Datum(const RecordBatch& value);
147 explicit Datum(const Table& value);
148
149 // Cast from subtypes of Array to Datum
150 template <typename T, typename = enable_if_t<std::is_base_of<Array, T>::value>>
151 Datum(const std::shared_ptr<T>& value) // NOLINT implicit conversion
152 : Datum(std::shared_ptr<Array>(value)) {}
153
154 // Convenience constructors
155 explicit Datum(bool value);
156 explicit Datum(int8_t value);
157 explicit Datum(uint8_t value);
158 explicit Datum(int16_t value);
159 explicit Datum(uint16_t value);
160 explicit Datum(int32_t value);
161 explicit Datum(uint32_t value);
162 explicit Datum(int64_t value);
163 explicit Datum(uint64_t value);
164 explicit Datum(float value);
165 explicit Datum(double value);
166 explicit Datum(std::string value);
167 explicit Datum(const char* value);
168
169 Datum::Kind kind() const {
170 switch (this->value.index()) {
171 case 0:
172 return Datum::NONE;
173 case 1:
174 return Datum::SCALAR;
175 case 2:
176 return Datum::ARRAY;
177 case 3:
178 return Datum::CHUNKED_ARRAY;
179 case 4:
180 return Datum::RECORD_BATCH;
181 case 5:
182 return Datum::TABLE;
183 case 6:
184 return Datum::COLLECTION;
185 default:
186 return Datum::NONE;
187 }
188 }
189
190 const std::shared_ptr<ArrayData>& array() const {
191 return util::get<std::shared_ptr<ArrayData>>(this->value);
192 }
193
194 ArrayData* mutable_array() const { return this->array().get(); }
195
196 std::shared_ptr<Array> make_array() const;
197
198 const std::shared_ptr<ChunkedArray>& chunked_array() const {
199 return util::get<std::shared_ptr<ChunkedArray>>(this->value);
200 }
201
202 const std::shared_ptr<RecordBatch>& record_batch() const {
203 return util::get<std::shared_ptr<RecordBatch>>(this->value);
204 }
205
206 const std::shared_ptr<Table>& table() const {
207 return util::get<std::shared_ptr<Table>>(this->value);
208 }
209
210 const std::vector<Datum>& collection() const {
211 return util::get<std::vector<Datum>>(this->value);
212 }
213
214 const std::shared_ptr<Scalar>& scalar() const {
215 return util::get<std::shared_ptr<Scalar>>(this->value);
216 }
217
218 template <typename ExactType>
219 std::shared_ptr<ExactType> array_as() const {
220 return internal::checked_pointer_cast<ExactType>(this->make_array());
221 }
222
223 template <typename ExactType>
224 const ExactType& scalar_as() const {
225 return internal::checked_cast<const ExactType&>(*this->scalar());
226 }
227
228 bool is_array() const { return this->kind() == Datum::ARRAY; }
229
230 bool is_arraylike() const {
231 return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
232 }
233
234 bool is_scalar() const { return this->kind() == Datum::SCALAR; }
235
236 /// \brief True if Datum contains a scalar or array-like data
237 bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
238
239 bool is_collection() const { return this->kind() == Datum::COLLECTION; }
240
241 int64_t null_count() const;
242
243 /// \brief Return the shape (array or scalar) and type for supported kinds
244 /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
245 ValueDescr descr() const;
246
247 /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
248 /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
249 ValueDescr::Shape shape() const;
250
251 /// \brief The value type of the variant, if any
252 ///
253 /// \return nullptr if no type
254 const std::shared_ptr<DataType>& type() const;
255
256 /// \brief The schema of the variant, if any
257 ///
258 /// \return nullptr if no schema
259 const std::shared_ptr<Schema>& schema() const;
260
261 /// \brief The value length of the variant, if any
262 ///
263 /// \return kUnknownLength if no type
264 int64_t length() const;
265
266 /// \brief The array chunks of the variant, if any
267 ///
268 /// \return empty if not arraylike
269 ArrayVector chunks() const;
270
271 bool Equals(const Datum& other) const;
272
273 bool operator==(const Datum& other) const { return Equals(other); }
274 bool operator!=(const Datum& other) const { return !Equals(other); }
275
276 std::string ToString() const;
277
278 ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*);
279};
280
281} // namespace arrow