]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #pragma once | |
19 | ||
20 | #include <cstdint> | |
21 | #include <memory> | |
22 | #include <string> | |
23 | #include <type_traits> | |
24 | #include <utility> | |
25 | #include <vector> | |
26 | ||
27 | #include "arrow/array/data.h" | |
28 | #include "arrow/scalar.h" | |
29 | #include "arrow/type.h" | |
30 | #include "arrow/type_traits.h" | |
31 | #include "arrow/util/checked_cast.h" | |
32 | #include "arrow/util/macros.h" | |
33 | #include "arrow/util/variant.h" // IWYU pragma: export | |
34 | #include "arrow/util/visibility.h" | |
35 | ||
36 | namespace arrow { | |
37 | ||
38 | class Array; | |
39 | class ChunkedArray; | |
40 | class RecordBatch; | |
41 | class Table; | |
42 | ||
43 | /// \brief A descriptor type that gives the shape (array or scalar) and | |
44 | /// DataType of a Value, but without the data | |
45 | struct ARROW_EXPORT ValueDescr { | |
46 | std::shared_ptr<DataType> type; | |
47 | enum Shape { | |
48 | /// \brief Either Array or Scalar | |
49 | ANY, | |
50 | ||
51 | /// \brief Array type | |
52 | ARRAY, | |
53 | ||
54 | /// \brief Only Scalar arguments supported | |
55 | SCALAR | |
56 | }; | |
57 | ||
58 | Shape shape; | |
59 | ||
60 | ValueDescr() : shape(ANY) {} | |
61 | ||
62 | ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape) | |
63 | : type(std::move(type)), shape(shape) {} | |
64 | ||
65 | ValueDescr(std::shared_ptr<DataType> type) // NOLINT implicit conversion | |
66 | : type(std::move(type)), shape(ValueDescr::ANY) {} | |
67 | ||
68 | /// \brief Convenience constructor for ANY descr | |
69 | static ValueDescr Any(std::shared_ptr<DataType> type) { | |
70 | return ValueDescr(std::move(type), ANY); | |
71 | } | |
72 | ||
73 | /// \brief Convenience constructor for Value::ARRAY descr | |
74 | static ValueDescr Array(std::shared_ptr<DataType> type) { | |
75 | return ValueDescr(std::move(type), ARRAY); | |
76 | } | |
77 | ||
78 | /// \brief Convenience constructor for Value::SCALAR descr | |
79 | static ValueDescr Scalar(std::shared_ptr<DataType> type) { | |
80 | return ValueDescr(std::move(type), SCALAR); | |
81 | } | |
82 | ||
83 | bool operator==(const ValueDescr& other) const { | |
84 | if (shape != other.shape) return false; | |
85 | if (type == other.type) return true; | |
86 | return type && type->Equals(other.type); | |
87 | } | |
88 | ||
89 | bool operator!=(const ValueDescr& other) const { return !(*this == other); } | |
90 | ||
91 | std::string ToString() const; | |
92 | static std::string ToString(const std::vector<ValueDescr>&); | |
93 | ||
94 | ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*); | |
95 | }; | |
96 | ||
97 | /// \brief For use with scalar functions, returns the broadcasted Value::Shape | |
98 | /// given a vector of value descriptors. Return SCALAR unless any value is | |
99 | /// ARRAY | |
100 | ARROW_EXPORT | |
101 | ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args); | |
102 | ||
103 | /// \class Datum | |
104 | /// \brief Variant type for various Arrow C++ data structures | |
105 | struct ARROW_EXPORT Datum { | |
106 | enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION }; | |
107 | ||
108 | struct Empty {}; | |
109 | ||
110 | // Datums variants may have a length. This special value indicate that the | |
111 | // current variant does not have a length. | |
112 | static constexpr int64_t kUnknownLength = -1; | |
113 | ||
114 | util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>, | |
115 | std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>, | |
116 | std::shared_ptr<Table>, std::vector<Datum>> | |
117 | value; | |
118 | ||
119 | /// \brief Empty datum, to be populated elsewhere | |
120 | Datum() = default; | |
121 | ||
122 | Datum(const Datum& other) = default; | |
123 | Datum& operator=(const Datum& other) = default; | |
124 | Datum(Datum&& other) = default; | |
125 | Datum& operator=(Datum&& other) = default; | |
126 | ||
127 | Datum(std::shared_ptr<Scalar> value) // NOLINT implicit conversion | |
128 | : value(std::move(value)) {} | |
129 | ||
130 | Datum(std::shared_ptr<ArrayData> value) // NOLINT implicit conversion | |
131 | : value(std::move(value)) {} | |
132 | ||
133 | Datum(ArrayData arg) // NOLINT implicit conversion | |
134 | : value(std::make_shared<ArrayData>(std::move(arg))) {} | |
135 | ||
136 | Datum(const Array& value); // NOLINT implicit conversion | |
137 | Datum(const std::shared_ptr<Array>& value); // NOLINT implicit conversion | |
138 | Datum(std::shared_ptr<ChunkedArray> value); // NOLINT implicit conversion | |
139 | Datum(std::shared_ptr<RecordBatch> value); // NOLINT implicit conversion | |
140 | Datum(std::shared_ptr<Table> value); // NOLINT implicit conversion | |
141 | Datum(std::vector<Datum> value); // NOLINT implicit conversion | |
142 | ||
143 | // Explicit constructors from const-refs. Can be expensive, prefer the | |
144 | // shared_ptr constructors | |
145 | explicit Datum(const ChunkedArray& value); | |
146 | explicit Datum(const RecordBatch& value); | |
147 | explicit Datum(const Table& value); | |
148 | ||
149 | // Cast from subtypes of Array to Datum | |
150 | template <typename T, typename = enable_if_t<std::is_base_of<Array, T>::value>> | |
151 | Datum(const std::shared_ptr<T>& value) // NOLINT implicit conversion | |
152 | : Datum(std::shared_ptr<Array>(value)) {} | |
153 | ||
154 | // Convenience constructors | |
155 | explicit Datum(bool value); | |
156 | explicit Datum(int8_t value); | |
157 | explicit Datum(uint8_t value); | |
158 | explicit Datum(int16_t value); | |
159 | explicit Datum(uint16_t value); | |
160 | explicit Datum(int32_t value); | |
161 | explicit Datum(uint32_t value); | |
162 | explicit Datum(int64_t value); | |
163 | explicit Datum(uint64_t value); | |
164 | explicit Datum(float value); | |
165 | explicit Datum(double value); | |
166 | explicit Datum(std::string value); | |
167 | explicit Datum(const char* value); | |
168 | ||
169 | Datum::Kind kind() const { | |
170 | switch (this->value.index()) { | |
171 | case 0: | |
172 | return Datum::NONE; | |
173 | case 1: | |
174 | return Datum::SCALAR; | |
175 | case 2: | |
176 | return Datum::ARRAY; | |
177 | case 3: | |
178 | return Datum::CHUNKED_ARRAY; | |
179 | case 4: | |
180 | return Datum::RECORD_BATCH; | |
181 | case 5: | |
182 | return Datum::TABLE; | |
183 | case 6: | |
184 | return Datum::COLLECTION; | |
185 | default: | |
186 | return Datum::NONE; | |
187 | } | |
188 | } | |
189 | ||
190 | const std::shared_ptr<ArrayData>& array() const { | |
191 | return util::get<std::shared_ptr<ArrayData>>(this->value); | |
192 | } | |
193 | ||
194 | ArrayData* mutable_array() const { return this->array().get(); } | |
195 | ||
196 | std::shared_ptr<Array> make_array() const; | |
197 | ||
198 | const std::shared_ptr<ChunkedArray>& chunked_array() const { | |
199 | return util::get<std::shared_ptr<ChunkedArray>>(this->value); | |
200 | } | |
201 | ||
202 | const std::shared_ptr<RecordBatch>& record_batch() const { | |
203 | return util::get<std::shared_ptr<RecordBatch>>(this->value); | |
204 | } | |
205 | ||
206 | const std::shared_ptr<Table>& table() const { | |
207 | return util::get<std::shared_ptr<Table>>(this->value); | |
208 | } | |
209 | ||
210 | const std::vector<Datum>& collection() const { | |
211 | return util::get<std::vector<Datum>>(this->value); | |
212 | } | |
213 | ||
214 | const std::shared_ptr<Scalar>& scalar() const { | |
215 | return util::get<std::shared_ptr<Scalar>>(this->value); | |
216 | } | |
217 | ||
218 | template <typename ExactType> | |
219 | std::shared_ptr<ExactType> array_as() const { | |
220 | return internal::checked_pointer_cast<ExactType>(this->make_array()); | |
221 | } | |
222 | ||
223 | template <typename ExactType> | |
224 | const ExactType& scalar_as() const { | |
225 | return internal::checked_cast<const ExactType&>(*this->scalar()); | |
226 | } | |
227 | ||
228 | bool is_array() const { return this->kind() == Datum::ARRAY; } | |
229 | ||
230 | bool is_arraylike() const { | |
231 | return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY; | |
232 | } | |
233 | ||
234 | bool is_scalar() const { return this->kind() == Datum::SCALAR; } | |
235 | ||
236 | /// \brief True if Datum contains a scalar or array-like data | |
237 | bool is_value() const { return this->is_arraylike() || this->is_scalar(); } | |
238 | ||
239 | bool is_collection() const { return this->kind() == Datum::COLLECTION; } | |
240 | ||
241 | int64_t null_count() const; | |
242 | ||
243 | /// \brief Return the shape (array or scalar) and type for supported kinds | |
244 | /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise | |
245 | ValueDescr descr() const; | |
246 | ||
247 | /// \brief Return the shape (array or scalar) for supported kinds (ARRAY, | |
248 | /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise | |
249 | ValueDescr::Shape shape() const; | |
250 | ||
251 | /// \brief The value type of the variant, if any | |
252 | /// | |
253 | /// \return nullptr if no type | |
254 | const std::shared_ptr<DataType>& type() const; | |
255 | ||
256 | /// \brief The schema of the variant, if any | |
257 | /// | |
258 | /// \return nullptr if no schema | |
259 | const std::shared_ptr<Schema>& schema() const; | |
260 | ||
261 | /// \brief The value length of the variant, if any | |
262 | /// | |
263 | /// \return kUnknownLength if no type | |
264 | int64_t length() const; | |
265 | ||
266 | /// \brief The array chunks of the variant, if any | |
267 | /// | |
268 | /// \return empty if not arraylike | |
269 | ArrayVector chunks() const; | |
270 | ||
271 | bool Equals(const Datum& other) const; | |
272 | ||
273 | bool operator==(const Datum& other) const { return Equals(other); } | |
274 | bool operator!=(const Datum& other) const { return !Equals(other); } | |
275 | ||
276 | std::string ToString() const; | |
277 | ||
278 | ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*); | |
279 | }; | |
280 | ||
281 | } // namespace arrow |