]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "arrow/datum.h" | |
19 | ||
20 | #include <cstddef> | |
21 | #include <memory> | |
22 | #include <sstream> | |
23 | #include <vector> | |
24 | ||
25 | #include "arrow/array/array_base.h" | |
26 | #include "arrow/array/util.h" | |
27 | #include "arrow/chunked_array.h" | |
28 | #include "arrow/record_batch.h" | |
29 | #include "arrow/scalar.h" | |
30 | #include "arrow/table.h" | |
31 | #include "arrow/util/logging.h" | |
32 | #include "arrow/util/memory.h" | |
33 | ||
34 | namespace arrow { | |
35 | ||
36 | static bool CollectionEquals(const std::vector<Datum>& left, | |
37 | const std::vector<Datum>& right) { | |
38 | if (left.size() != right.size()) { | |
39 | return false; | |
40 | } | |
41 | ||
42 | for (size_t i = 0; i < left.size(); i++) { | |
43 | if (!left[i].Equals(right[i])) { | |
44 | return false; | |
45 | } | |
46 | } | |
47 | return true; | |
48 | } | |
49 | ||
50 | Datum::Datum(const Array& value) : Datum(value.data()) {} | |
51 | ||
52 | Datum::Datum(const std::shared_ptr<Array>& value) | |
53 | : Datum(value ? value->data() : NULLPTR) {} | |
54 | ||
55 | Datum::Datum(std::shared_ptr<ChunkedArray> value) : value(std::move(value)) {} | |
56 | Datum::Datum(std::shared_ptr<RecordBatch> value) : value(std::move(value)) {} | |
57 | Datum::Datum(std::shared_ptr<Table> value) : value(std::move(value)) {} | |
58 | Datum::Datum(std::vector<Datum> value) : value(std::move(value)) {} | |
59 | ||
60 | Datum::Datum(bool value) : value(std::make_shared<BooleanScalar>(value)) {} | |
61 | Datum::Datum(int8_t value) : value(std::make_shared<Int8Scalar>(value)) {} | |
62 | Datum::Datum(uint8_t value) : value(std::make_shared<UInt8Scalar>(value)) {} | |
63 | Datum::Datum(int16_t value) : value(std::make_shared<Int16Scalar>(value)) {} | |
64 | Datum::Datum(uint16_t value) : value(std::make_shared<UInt16Scalar>(value)) {} | |
65 | Datum::Datum(int32_t value) : value(std::make_shared<Int32Scalar>(value)) {} | |
66 | Datum::Datum(uint32_t value) : value(std::make_shared<UInt32Scalar>(value)) {} | |
67 | Datum::Datum(int64_t value) : value(std::make_shared<Int64Scalar>(value)) {} | |
68 | Datum::Datum(uint64_t value) : value(std::make_shared<UInt64Scalar>(value)) {} | |
69 | Datum::Datum(float value) : value(std::make_shared<FloatScalar>(value)) {} | |
70 | Datum::Datum(double value) : value(std::make_shared<DoubleScalar>(value)) {} | |
71 | Datum::Datum(std::string value) | |
72 | : value(std::make_shared<StringScalar>(std::move(value))) {} | |
73 | Datum::Datum(const char* value) : value(std::make_shared<StringScalar>(value)) {} | |
74 | ||
75 | Datum::Datum(const ChunkedArray& value) | |
76 | : value(std::make_shared<ChunkedArray>(value.chunks(), value.type())) {} | |
77 | ||
78 | Datum::Datum(const Table& value) | |
79 | : value(Table::Make(value.schema(), value.columns(), value.num_rows())) {} | |
80 | ||
81 | Datum::Datum(const RecordBatch& value) | |
82 | : value(RecordBatch::Make(value.schema(), value.num_rows(), value.columns())) {} | |
83 | ||
84 | std::shared_ptr<Array> Datum::make_array() const { | |
85 | DCHECK_EQ(Datum::ARRAY, this->kind()); | |
86 | return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value)); | |
87 | } | |
88 | ||
89 | const std::shared_ptr<DataType>& Datum::type() const { | |
90 | if (this->kind() == Datum::ARRAY) { | |
91 | return util::get<std::shared_ptr<ArrayData>>(this->value)->type; | |
92 | } | |
93 | if (this->kind() == Datum::CHUNKED_ARRAY) { | |
94 | return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type(); | |
95 | } | |
96 | if (this->kind() == Datum::SCALAR) { | |
97 | return util::get<std::shared_ptr<Scalar>>(this->value)->type; | |
98 | } | |
99 | static std::shared_ptr<DataType> no_type; | |
100 | return no_type; | |
101 | } | |
102 | ||
103 | const std::shared_ptr<Schema>& Datum::schema() const { | |
104 | if (this->kind() == Datum::RECORD_BATCH) { | |
105 | return util::get<std::shared_ptr<RecordBatch>>(this->value)->schema(); | |
106 | } | |
107 | if (this->kind() == Datum::TABLE) { | |
108 | return util::get<std::shared_ptr<Table>>(this->value)->schema(); | |
109 | } | |
110 | static std::shared_ptr<Schema> no_schema; | |
111 | return no_schema; | |
112 | } | |
113 | ||
114 | int64_t Datum::length() const { | |
115 | switch (this->kind()) { | |
116 | case Datum::ARRAY: | |
117 | return util::get<std::shared_ptr<ArrayData>>(this->value)->length; | |
118 | case Datum::CHUNKED_ARRAY: | |
119 | return util::get<std::shared_ptr<ChunkedArray>>(this->value)->length(); | |
120 | case Datum::RECORD_BATCH: | |
121 | return util::get<std::shared_ptr<RecordBatch>>(this->value)->num_rows(); | |
122 | case Datum::TABLE: | |
123 | return util::get<std::shared_ptr<Table>>(this->value)->num_rows(); | |
124 | case Datum::SCALAR: | |
125 | return 1; | |
126 | default: | |
127 | return kUnknownLength; | |
128 | } | |
129 | } | |
130 | ||
131 | int64_t Datum::null_count() const { | |
132 | if (this->kind() == Datum::ARRAY) { | |
133 | return util::get<std::shared_ptr<ArrayData>>(this->value)->GetNullCount(); | |
134 | } else if (this->kind() == Datum::CHUNKED_ARRAY) { | |
135 | return util::get<std::shared_ptr<ChunkedArray>>(this->value)->null_count(); | |
136 | } else if (this->kind() == Datum::SCALAR) { | |
137 | const auto& val = *util::get<std::shared_ptr<Scalar>>(this->value); | |
138 | return val.is_valid ? 0 : 1; | |
139 | } else { | |
140 | DCHECK(false) << "This function only valid for array-like values"; | |
141 | return 0; | |
142 | } | |
143 | } | |
144 | ||
145 | ArrayVector Datum::chunks() const { | |
146 | if (!this->is_arraylike()) { | |
147 | return {}; | |
148 | } | |
149 | if (this->is_array()) { | |
150 | return {this->make_array()}; | |
151 | } | |
152 | return this->chunked_array()->chunks(); | |
153 | } | |
154 | ||
155 | bool Datum::Equals(const Datum& other) const { | |
156 | if (this->kind() != other.kind()) return false; | |
157 | ||
158 | switch (this->kind()) { | |
159 | case Datum::NONE: | |
160 | return true; | |
161 | case Datum::SCALAR: | |
162 | return internal::SharedPtrEquals(this->scalar(), other.scalar()); | |
163 | case Datum::ARRAY: | |
164 | return internal::SharedPtrEquals(this->make_array(), other.make_array()); | |
165 | case Datum::CHUNKED_ARRAY: | |
166 | return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array()); | |
167 | case Datum::RECORD_BATCH: | |
168 | return internal::SharedPtrEquals(this->record_batch(), other.record_batch()); | |
169 | case Datum::TABLE: | |
170 | return internal::SharedPtrEquals(this->table(), other.table()); | |
171 | case Datum::COLLECTION: | |
172 | return CollectionEquals(this->collection(), other.collection()); | |
173 | default: | |
174 | return false; | |
175 | } | |
176 | } | |
177 | ||
178 | ValueDescr Datum::descr() const { | |
179 | if (this->is_arraylike()) { | |
180 | return ValueDescr(this->type(), ValueDescr::ARRAY); | |
181 | } else if (this->is_scalar()) { | |
182 | return ValueDescr(this->type(), ValueDescr::SCALAR); | |
183 | } else { | |
184 | DCHECK(false) << "Datum is not value-like, this method should not be called"; | |
185 | return ValueDescr(); | |
186 | } | |
187 | } | |
188 | ||
189 | ValueDescr::Shape Datum::shape() const { | |
190 | if (this->is_arraylike()) { | |
191 | return ValueDescr::ARRAY; | |
192 | } else if (this->is_scalar()) { | |
193 | return ValueDescr::SCALAR; | |
194 | } else { | |
195 | DCHECK(false) << "Datum is not value-like, this method should not be called"; | |
196 | return ValueDescr::ANY; | |
197 | } | |
198 | } | |
199 | ||
200 | static std::string FormatValueDescr(const ValueDescr& descr) { | |
201 | std::stringstream ss; | |
202 | switch (descr.shape) { | |
203 | case ValueDescr::ANY: | |
204 | ss << "any"; | |
205 | break; | |
206 | case ValueDescr::ARRAY: | |
207 | ss << "array"; | |
208 | break; | |
209 | case ValueDescr::SCALAR: | |
210 | ss << "scalar"; | |
211 | break; | |
212 | default: | |
213 | DCHECK(false); | |
214 | break; | |
215 | } | |
216 | ss << "[" << descr.type->ToString() << "]"; | |
217 | return ss.str(); | |
218 | } | |
219 | ||
220 | std::string ValueDescr::ToString() const { return FormatValueDescr(*this); } | |
221 | ||
222 | std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) { | |
223 | std::stringstream ss; | |
224 | ss << "("; | |
225 | for (size_t i = 0; i < descrs.size(); ++i) { | |
226 | if (i > 0) { | |
227 | ss << ", "; | |
228 | } | |
229 | ss << descrs[i].ToString(); | |
230 | } | |
231 | ss << ")"; | |
232 | return ss.str(); | |
233 | } | |
234 | ||
235 | void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); } | |
236 | ||
237 | std::string Datum::ToString() const { | |
238 | switch (this->kind()) { | |
239 | case Datum::NONE: | |
240 | return "nullptr"; | |
241 | case Datum::SCALAR: | |
242 | return "Scalar"; | |
243 | case Datum::ARRAY: | |
244 | return "Array"; | |
245 | case Datum::CHUNKED_ARRAY: | |
246 | return "ChunkedArray"; | |
247 | case Datum::RECORD_BATCH: | |
248 | return "RecordBatch"; | |
249 | case Datum::TABLE: | |
250 | return "Table"; | |
251 | case Datum::COLLECTION: { | |
252 | std::stringstream ss; | |
253 | ss << "Collection("; | |
254 | const auto& values = this->collection(); | |
255 | for (size_t i = 0; i < values.size(); ++i) { | |
256 | if (i > 0) { | |
257 | ss << ", "; | |
258 | } | |
259 | ss << values[i].ToString(); | |
260 | } | |
261 | ss << ')'; | |
262 | return ss.str(); | |
263 | } | |
264 | default: | |
265 | DCHECK(false); | |
266 | return ""; | |
267 | } | |
268 | } | |
269 | ||
270 | ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) { | |
271 | for (const auto& descr : args) { | |
272 | if (descr.shape == ValueDescr::ARRAY) { | |
273 | return ValueDescr::ARRAY; | |
274 | } | |
275 | } | |
276 | return ValueDescr::SCALAR; | |
277 | } | |
278 | ||
279 | void PrintTo(const Datum& datum, std::ostream* os) { | |
280 | switch (datum.kind()) { | |
281 | case Datum::SCALAR: | |
282 | *os << datum.scalar()->ToString(); | |
283 | break; | |
284 | case Datum::ARRAY: | |
285 | *os << datum.make_array()->ToString(); | |
286 | break; | |
287 | default: | |
288 | *os << datum.ToString(); | |
289 | } | |
290 | } | |
291 | ||
292 | } // namespace arrow |