]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/datum.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / datum.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/datum.h"
19
20#include <cstddef>
21#include <memory>
22#include <sstream>
23#include <vector>
24
25#include "arrow/array/array_base.h"
26#include "arrow/array/util.h"
27#include "arrow/chunked_array.h"
28#include "arrow/record_batch.h"
29#include "arrow/scalar.h"
30#include "arrow/table.h"
31#include "arrow/util/logging.h"
32#include "arrow/util/memory.h"
33
34namespace arrow {
35
36static bool CollectionEquals(const std::vector<Datum>& left,
37 const std::vector<Datum>& right) {
38 if (left.size() != right.size()) {
39 return false;
40 }
41
42 for (size_t i = 0; i < left.size(); i++) {
43 if (!left[i].Equals(right[i])) {
44 return false;
45 }
46 }
47 return true;
48}
49
50Datum::Datum(const Array& value) : Datum(value.data()) {}
51
52Datum::Datum(const std::shared_ptr<Array>& value)
53 : Datum(value ? value->data() : NULLPTR) {}
54
55Datum::Datum(std::shared_ptr<ChunkedArray> value) : value(std::move(value)) {}
56Datum::Datum(std::shared_ptr<RecordBatch> value) : value(std::move(value)) {}
57Datum::Datum(std::shared_ptr<Table> value) : value(std::move(value)) {}
58Datum::Datum(std::vector<Datum> value) : value(std::move(value)) {}
59
60Datum::Datum(bool value) : value(std::make_shared<BooleanScalar>(value)) {}
61Datum::Datum(int8_t value) : value(std::make_shared<Int8Scalar>(value)) {}
62Datum::Datum(uint8_t value) : value(std::make_shared<UInt8Scalar>(value)) {}
63Datum::Datum(int16_t value) : value(std::make_shared<Int16Scalar>(value)) {}
64Datum::Datum(uint16_t value) : value(std::make_shared<UInt16Scalar>(value)) {}
65Datum::Datum(int32_t value) : value(std::make_shared<Int32Scalar>(value)) {}
66Datum::Datum(uint32_t value) : value(std::make_shared<UInt32Scalar>(value)) {}
67Datum::Datum(int64_t value) : value(std::make_shared<Int64Scalar>(value)) {}
68Datum::Datum(uint64_t value) : value(std::make_shared<UInt64Scalar>(value)) {}
69Datum::Datum(float value) : value(std::make_shared<FloatScalar>(value)) {}
70Datum::Datum(double value) : value(std::make_shared<DoubleScalar>(value)) {}
71Datum::Datum(std::string value)
72 : value(std::make_shared<StringScalar>(std::move(value))) {}
73Datum::Datum(const char* value) : value(std::make_shared<StringScalar>(value)) {}
74
75Datum::Datum(const ChunkedArray& value)
76 : value(std::make_shared<ChunkedArray>(value.chunks(), value.type())) {}
77
78Datum::Datum(const Table& value)
79 : value(Table::Make(value.schema(), value.columns(), value.num_rows())) {}
80
81Datum::Datum(const RecordBatch& value)
82 : value(RecordBatch::Make(value.schema(), value.num_rows(), value.columns())) {}
83
84std::shared_ptr<Array> Datum::make_array() const {
85 DCHECK_EQ(Datum::ARRAY, this->kind());
86 return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value));
87}
88
89const std::shared_ptr<DataType>& Datum::type() const {
90 if (this->kind() == Datum::ARRAY) {
91 return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
92 }
93 if (this->kind() == Datum::CHUNKED_ARRAY) {
94 return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
95 }
96 if (this->kind() == Datum::SCALAR) {
97 return util::get<std::shared_ptr<Scalar>>(this->value)->type;
98 }
99 static std::shared_ptr<DataType> no_type;
100 return no_type;
101}
102
103const std::shared_ptr<Schema>& Datum::schema() const {
104 if (this->kind() == Datum::RECORD_BATCH) {
105 return util::get<std::shared_ptr<RecordBatch>>(this->value)->schema();
106 }
107 if (this->kind() == Datum::TABLE) {
108 return util::get<std::shared_ptr<Table>>(this->value)->schema();
109 }
110 static std::shared_ptr<Schema> no_schema;
111 return no_schema;
112}
113
114int64_t Datum::length() const {
115 switch (this->kind()) {
116 case Datum::ARRAY:
117 return util::get<std::shared_ptr<ArrayData>>(this->value)->length;
118 case Datum::CHUNKED_ARRAY:
119 return util::get<std::shared_ptr<ChunkedArray>>(this->value)->length();
120 case Datum::RECORD_BATCH:
121 return util::get<std::shared_ptr<RecordBatch>>(this->value)->num_rows();
122 case Datum::TABLE:
123 return util::get<std::shared_ptr<Table>>(this->value)->num_rows();
124 case Datum::SCALAR:
125 return 1;
126 default:
127 return kUnknownLength;
128 }
129}
130
131int64_t Datum::null_count() const {
132 if (this->kind() == Datum::ARRAY) {
133 return util::get<std::shared_ptr<ArrayData>>(this->value)->GetNullCount();
134 } else if (this->kind() == Datum::CHUNKED_ARRAY) {
135 return util::get<std::shared_ptr<ChunkedArray>>(this->value)->null_count();
136 } else if (this->kind() == Datum::SCALAR) {
137 const auto& val = *util::get<std::shared_ptr<Scalar>>(this->value);
138 return val.is_valid ? 0 : 1;
139 } else {
140 DCHECK(false) << "This function only valid for array-like values";
141 return 0;
142 }
143}
144
145ArrayVector Datum::chunks() const {
146 if (!this->is_arraylike()) {
147 return {};
148 }
149 if (this->is_array()) {
150 return {this->make_array()};
151 }
152 return this->chunked_array()->chunks();
153}
154
155bool Datum::Equals(const Datum& other) const {
156 if (this->kind() != other.kind()) return false;
157
158 switch (this->kind()) {
159 case Datum::NONE:
160 return true;
161 case Datum::SCALAR:
162 return internal::SharedPtrEquals(this->scalar(), other.scalar());
163 case Datum::ARRAY:
164 return internal::SharedPtrEquals(this->make_array(), other.make_array());
165 case Datum::CHUNKED_ARRAY:
166 return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array());
167 case Datum::RECORD_BATCH:
168 return internal::SharedPtrEquals(this->record_batch(), other.record_batch());
169 case Datum::TABLE:
170 return internal::SharedPtrEquals(this->table(), other.table());
171 case Datum::COLLECTION:
172 return CollectionEquals(this->collection(), other.collection());
173 default:
174 return false;
175 }
176}
177
178ValueDescr Datum::descr() const {
179 if (this->is_arraylike()) {
180 return ValueDescr(this->type(), ValueDescr::ARRAY);
181 } else if (this->is_scalar()) {
182 return ValueDescr(this->type(), ValueDescr::SCALAR);
183 } else {
184 DCHECK(false) << "Datum is not value-like, this method should not be called";
185 return ValueDescr();
186 }
187}
188
189ValueDescr::Shape Datum::shape() const {
190 if (this->is_arraylike()) {
191 return ValueDescr::ARRAY;
192 } else if (this->is_scalar()) {
193 return ValueDescr::SCALAR;
194 } else {
195 DCHECK(false) << "Datum is not value-like, this method should not be called";
196 return ValueDescr::ANY;
197 }
198}
199
200static std::string FormatValueDescr(const ValueDescr& descr) {
201 std::stringstream ss;
202 switch (descr.shape) {
203 case ValueDescr::ANY:
204 ss << "any";
205 break;
206 case ValueDescr::ARRAY:
207 ss << "array";
208 break;
209 case ValueDescr::SCALAR:
210 ss << "scalar";
211 break;
212 default:
213 DCHECK(false);
214 break;
215 }
216 ss << "[" << descr.type->ToString() << "]";
217 return ss.str();
218}
219
220std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
221
222std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
223 std::stringstream ss;
224 ss << "(";
225 for (size_t i = 0; i < descrs.size(); ++i) {
226 if (i > 0) {
227 ss << ", ";
228 }
229 ss << descrs[i].ToString();
230 }
231 ss << ")";
232 return ss.str();
233}
234
235void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); }
236
237std::string Datum::ToString() const {
238 switch (this->kind()) {
239 case Datum::NONE:
240 return "nullptr";
241 case Datum::SCALAR:
242 return "Scalar";
243 case Datum::ARRAY:
244 return "Array";
245 case Datum::CHUNKED_ARRAY:
246 return "ChunkedArray";
247 case Datum::RECORD_BATCH:
248 return "RecordBatch";
249 case Datum::TABLE:
250 return "Table";
251 case Datum::COLLECTION: {
252 std::stringstream ss;
253 ss << "Collection(";
254 const auto& values = this->collection();
255 for (size_t i = 0; i < values.size(); ++i) {
256 if (i > 0) {
257 ss << ", ";
258 }
259 ss << values[i].ToString();
260 }
261 ss << ')';
262 return ss.str();
263 }
264 default:
265 DCHECK(false);
266 return "";
267 }
268}
269
270ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
271 for (const auto& descr : args) {
272 if (descr.shape == ValueDescr::ARRAY) {
273 return ValueDescr::ARRAY;
274 }
275 }
276 return ValueDescr::SCALAR;
277}
278
279void PrintTo(const Datum& datum, std::ostream* os) {
280 switch (datum.kind()) {
281 case Datum::SCALAR:
282 *os << datum.scalar()->ToString();
283 break;
284 case Datum::ARRAY:
285 *os << datum.make_array()->ToString();
286 break;
287 default:
288 *os << datum.ToString();
289 }
290}
291
292} // namespace arrow