]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/stl.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / stl.h
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <algorithm>
21#include <cstddef>
22#include <memory>
23#include <sstream>
24#include <string>
25#include <tuple>
26#include <type_traits>
27#include <utility>
28#include <vector>
29
30#include "arrow/array.h"
31#include "arrow/array/builder_base.h"
32#include "arrow/array/builder_binary.h"
33#include "arrow/array/builder_nested.h"
34#include "arrow/array/builder_primitive.h"
35#include "arrow/chunked_array.h"
36#include "arrow/compute/api.h"
37#include "arrow/status.h"
38#include "arrow/table.h"
39#include "arrow/type_fwd.h"
40#include "arrow/type_traits.h"
41#include "arrow/util/checked_cast.h"
42#include "arrow/util/macros.h"
43
44namespace arrow {
45
46class Schema;
47
48namespace stl {
49
50namespace internal {
51
52template <typename T, typename = void>
53struct is_optional_like : public std::false_type {};
54
55template <typename T, typename = void>
56struct is_dereferencable : public std::false_type {};
57
58template <typename T>
59struct is_dereferencable<T, arrow::internal::void_t<decltype(*std::declval<T>())>>
60 : public std::true_type {};
61
62template <typename T>
63struct is_optional_like<
64 T, typename std::enable_if<
65 std::is_constructible<bool, T>::value && is_dereferencable<T>::value &&
66 !std::is_array<typename std::remove_reference<T>::type>::value>::type>
67 : public std::true_type {};
68
69template <size_t N, typename Tuple>
70using BareTupleElement =
71 typename std::decay<typename std::tuple_element<N, Tuple>::type>::type;
72
73} // namespace internal
74
75template <typename T, typename R = void>
76using enable_if_optional_like =
77 typename std::enable_if<internal::is_optional_like<T>::value, R>::type;
78
79/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
80template <typename T, typename Enable = void>
81struct ConversionTraits {};
82
83/// Returns builder type for given standard C/C++ type.
84template <typename CType>
85using CBuilderType =
86 typename TypeTraits<typename ConversionTraits<CType>::ArrowType>::BuilderType;
87
88/// Default implementation of AppendListValues.
89///
90/// This function can be specialized by user to take advantage of appending
91/// contiguous ranges while appending. This default implementation will call
92/// ConversionTraits<ValueCType>::AppendRow() for each value in the range.
93template <typename ValueCType, typename Range>
94inline Status AppendListValues(CBuilderType<ValueCType>& value_builder,
95 Range&& cell_range) {
96 for (auto const& value : cell_range) {
97 ARROW_RETURN_NOT_OK(ConversionTraits<ValueCType>::AppendRow(value_builder, value));
98 }
99 return Status::OK();
100}
101
102#define ARROW_STL_CONVERSION(CType_, ArrowType_) \
103 template <> \
104 struct ConversionTraits<CType_> : public CTypeTraits<CType_> { \
105 static Status AppendRow(typename TypeTraits<ArrowType_>::BuilderType& builder, \
106 CType_ cell) { \
107 return builder.Append(cell); \
108 } \
109 static CType_ GetEntry(const typename TypeTraits<ArrowType_>::ArrayType& array, \
110 size_t j) { \
111 return array.Value(j); \
112 } \
113 }; \
114 \
115 template <> \
116 inline Status AppendListValues<CType_, const std::vector<CType_>&>( \
117 typename TypeTraits<ArrowType_>::BuilderType & value_builder, \
118 const std::vector<CType_>& cell_range) { \
119 return value_builder.AppendValues(cell_range); \
120 }
121
122ARROW_STL_CONVERSION(bool, BooleanType)
123ARROW_STL_CONVERSION(int8_t, Int8Type)
124ARROW_STL_CONVERSION(int16_t, Int16Type)
125ARROW_STL_CONVERSION(int32_t, Int32Type)
126ARROW_STL_CONVERSION(int64_t, Int64Type)
127ARROW_STL_CONVERSION(uint8_t, UInt8Type)
128ARROW_STL_CONVERSION(uint16_t, UInt16Type)
129ARROW_STL_CONVERSION(uint32_t, UInt32Type)
130ARROW_STL_CONVERSION(uint64_t, UInt64Type)
131ARROW_STL_CONVERSION(float, FloatType)
132ARROW_STL_CONVERSION(double, DoubleType)
133
134template <>
135struct ConversionTraits<std::string> : public CTypeTraits<std::string> {
136 static Status AppendRow(StringBuilder& builder, const std::string& cell) {
137 return builder.Append(cell);
138 }
139 static std::string GetEntry(const StringArray& array, size_t j) {
140 return array.GetString(j);
141 }
142};
143
144/// Append cell range elements as a single value to the list builder.
145///
146/// Cell range will be added to child builder using AppendListValues<ValueCType>()
147/// if provided. AppendListValues<ValueCType>() has a default implementation, but
148/// it can be specialized by users.
149template <typename ValueCType, typename ListBuilderType, typename Range>
150Status AppendCellRange(ListBuilderType& builder, Range&& cell_range) {
151 constexpr bool is_list_builder = std::is_same<ListBuilderType, ListBuilder>::value;
152 constexpr bool is_large_list_builder =
153 std::is_same<ListBuilderType, LargeListBuilder>::value;
154 static_assert(
155 is_list_builder || is_large_list_builder,
156 "Builder type must be either ListBuilder or LargeListBuilder for appending "
157 "multiple rows.");
158
159 using ChildBuilderType = CBuilderType<ValueCType>;
160 ARROW_RETURN_NOT_OK(builder.Append());
161 auto& value_builder =
162 ::arrow::internal::checked_cast<ChildBuilderType&>(*builder.value_builder());
163
164 // XXX: Remove appended value before returning if status isn't OK?
165 return AppendListValues<ValueCType>(value_builder, std::forward<Range>(cell_range));
166}
167
168template <typename ValueCType>
169struct ConversionTraits<std::vector<ValueCType>>
170 : public CTypeTraits<std::vector<ValueCType>> {
171 static Status AppendRow(ListBuilder& builder, const std::vector<ValueCType>& cell) {
172 return AppendCellRange<ValueCType>(builder, cell);
173 }
174
175 static std::vector<ValueCType> GetEntry(const ListArray& array, size_t j) {
176 using ElementArrayType =
177 typename TypeTraits<typename ConversionTraits<ValueCType>::ArrowType>::ArrayType;
178
179 const ElementArrayType& value_array =
180 ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
181
182 std::vector<ValueCType> vec(array.value_length(j));
183 for (int64_t i = 0; i < array.value_length(j); i++) {
184 vec[i] =
185 ConversionTraits<ValueCType>::GetEntry(value_array, array.value_offset(j) + i);
186 }
187 return vec;
188 }
189};
190
191template <typename Optional>
192struct ConversionTraits<Optional, enable_if_optional_like<Optional>>
193 : public CTypeTraits<typename std::decay<decltype(*std::declval<Optional>())>::type> {
194 using OptionalInnerType =
195 typename std::decay<decltype(*std::declval<Optional>())>::type;
196 using typename CTypeTraits<OptionalInnerType>::ArrowType;
197 using CTypeTraits<OptionalInnerType>::type_singleton;
198
199 static Status AppendRow(typename TypeTraits<ArrowType>::BuilderType& builder,
200 const Optional& cell) {
201 if (cell) {
202 return ConversionTraits<OptionalInnerType>::AppendRow(builder, *cell);
203 } else {
204 return builder.AppendNull();
205 }
206 }
207};
208
209/// Build an arrow::Schema based upon the types defined in a std::tuple-like structure.
210///
211/// While the type information is available at compile-time, we still need to add the
212/// column names at runtime, thus these methods are not constexpr.
213template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
214struct SchemaFromTuple {
215 using Element = internal::BareTupleElement<N - 1, Tuple>;
216
217 // Implementations that take a vector-like object for the column names.
218
219 /// Recursively build a vector of arrow::Field from the defined types.
220 ///
221 /// In most cases MakeSchema is the better entrypoint for the Schema creation.
222 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
223 const std::vector<std::string>& names) {
224 std::vector<std::shared_ptr<Field>> ret =
225 SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
226 auto type = ConversionTraits<Element>::type_singleton();
227 ret.push_back(field(names[N - 1], type, internal::is_optional_like<Element>::value));
228 return ret;
229 }
230
231 /// Build a Schema from the types of the tuple-like structure passed in as template
232 /// parameter assign the column names at runtime.
233 ///
234 /// An example usage of this API can look like the following:
235 ///
236 /// \code{.cpp}
237 /// using TupleType = std::tuple<int, std::vector<std::string>>;
238 /// std::shared_ptr<Schema> schema =
239 /// SchemaFromTuple<TupleType>::MakeSchema({"int_column", "list_of_strings_column"});
240 /// \endcode
241 static std::shared_ptr<Schema> MakeSchema(const std::vector<std::string>& names) {
242 return std::make_shared<Schema>(MakeSchemaRecursion(names));
243 }
244
245 // Implementations that take a tuple-like object for the column names.
246
247 /// Recursively build a vector of arrow::Field from the defined types.
248 ///
249 /// In most cases MakeSchema is the better entrypoint for the Schema creation.
250 template <typename NamesTuple>
251 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
252 const NamesTuple& names) {
253 using std::get;
254
255 std::vector<std::shared_ptr<Field>> ret =
256 SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
257 std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
258 ret.push_back(
259 field(get<N - 1>(names), type, internal::is_optional_like<Element>::value));
260 return ret;
261 }
262
263 /// Build a Schema from the types of the tuple-like structure passed in as template
264 /// parameter assign the column names at runtime.
265 ///
266 /// An example usage of this API can look like the following:
267 ///
268 /// \code{.cpp}
269 /// using TupleType = std::tuple<int, std::vector<std::string>>;
270 /// std::shared_ptr<Schema> schema =
271 /// SchemaFromTuple<TupleType>::MakeSchema({"int_column", "list_of_strings_column"});
272 /// \endcode
273 template <typename NamesTuple>
274 static std::shared_ptr<Schema> MakeSchema(const NamesTuple& names) {
275 return std::make_shared<Schema>(MakeSchemaRecursionT<NamesTuple>(names));
276 }
277};
278
279template <typename Tuple>
280struct SchemaFromTuple<Tuple, 0> {
281 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
282 const std::vector<std::string>& names) {
283 std::vector<std::shared_ptr<Field>> ret;
284 ret.reserve(names.size());
285 return ret;
286 }
287
288 template <typename NamesTuple>
289 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
290 const NamesTuple& names) {
291 std::vector<std::shared_ptr<Field>> ret;
292 ret.reserve(std::tuple_size<NamesTuple>::value);
293 return ret;
294 }
295};
296
297namespace internal {
298
299template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
300struct CreateBuildersRecursive {
301 static Status Make(MemoryPool* pool,
302 std::vector<std::unique_ptr<ArrayBuilder>>* builders) {
303 using Element = BareTupleElement<N - 1, Tuple>;
304 std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
305 ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1)));
306
307 return CreateBuildersRecursive<Tuple, N - 1>::Make(pool, builders);
308 }
309};
310
311template <typename Tuple>
312struct CreateBuildersRecursive<Tuple, 0> {
313 static Status Make(MemoryPool*, std::vector<std::unique_ptr<ArrayBuilder>>*) {
314 return Status::OK();
315 }
316};
317
318template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
319struct RowIterator {
320 static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
321 const Tuple& row) {
322 using std::get;
323 using Element = BareTupleElement<N - 1, Tuple>;
324 using BuilderType =
325 typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::BuilderType;
326
327 BuilderType& builder =
328 ::arrow::internal::checked_cast<BuilderType&>(*builders[N - 1]);
329 ARROW_RETURN_NOT_OK(ConversionTraits<Element>::AppendRow(builder, get<N - 1>(row)));
330
331 return RowIterator<Tuple, N - 1>::Append(builders, row);
332 }
333};
334
335template <typename Tuple>
336struct RowIterator<Tuple, 0> {
337 static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
338 const Tuple& row) {
339 return Status::OK();
340 }
341};
342
343template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
344struct EnsureColumnTypes {
345 static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
346 const compute::CastOptions& cast_options, compute::ExecContext* ctx,
347 std::reference_wrapper<const ::arrow::Table>* result) {
348 using Element = BareTupleElement<N - 1, Tuple>;
349 std::shared_ptr<DataType> expected_type = ConversionTraits<Element>::type_singleton();
350
351 if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
352 ARROW_ASSIGN_OR_RAISE(
353 Datum casted,
354 compute::Cast(table.column(N - 1), expected_type, cast_options, ctx));
355 auto new_field = table.schema()->field(N - 1)->WithType(expected_type);
356 ARROW_ASSIGN_OR_RAISE(*table_owner,
357 table.SetColumn(N - 1, new_field, casted.chunked_array()));
358 *result = **table_owner;
359 }
360
361 return EnsureColumnTypes<Tuple, N - 1>::Cast(result->get(), table_owner, cast_options,
362 ctx, result);
363 }
364};
365
366template <typename Tuple>
367struct EnsureColumnTypes<Tuple, 0> {
368 static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
369 const compute::CastOptions& cast_options, compute::ExecContext* ctx,
370 std::reference_wrapper<const ::arrow::Table>* result) {
371 return Status::OK();
372 }
373};
374
375template <typename Range, typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
376struct TupleSetter {
377 static void Fill(const Table& table, Range* rows) {
378 using std::get;
379 using Element = typename std::tuple_element<N - 1, Tuple>::type;
380 using ArrayType =
381 typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::ArrayType;
382
383 auto iter = rows->begin();
384 const ChunkedArray& chunked_array = *table.column(N - 1);
385 for (int i = 0; i < chunked_array.num_chunks(); i++) {
386 const ArrayType& array =
387 ::arrow::internal::checked_cast<const ArrayType&>(*chunked_array.chunk(i));
388 for (int64_t j = 0; j < array.length(); j++) {
389 get<N - 1>(*iter++) = ConversionTraits<Element>::GetEntry(array, j);
390 }
391 }
392
393 return TupleSetter<Range, Tuple, N - 1>::Fill(table, rows);
394 }
395};
396
397template <typename Range, typename Tuple>
398struct TupleSetter<Range, Tuple, 0> {
399 static void Fill(const Table& table, Range* rows) {}
400};
401
402} // namespace internal
403
404template <typename Range>
405Status TableFromTupleRange(MemoryPool* pool, Range&& rows,
406 const std::vector<std::string>& names,
407 std::shared_ptr<Table>* table) {
408 using row_type = typename std::iterator_traits<decltype(std::begin(rows))>::value_type;
409 constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
410
411 std::shared_ptr<Schema> schema = SchemaFromTuple<row_type>::MakeSchema(names);
412
413 std::vector<std::unique_ptr<ArrayBuilder>> builders(n_columns);
414 ARROW_RETURN_NOT_OK(internal::CreateBuildersRecursive<row_type>::Make(pool, &builders));
415
416 for (auto const& row : rows) {
417 ARROW_RETURN_NOT_OK(internal::RowIterator<row_type>::Append(builders, row));
418 }
419
420 std::vector<std::shared_ptr<Array>> arrays;
421 for (auto const& builder : builders) {
422 std::shared_ptr<Array> array;
423 ARROW_RETURN_NOT_OK(builder->Finish(&array));
424 arrays.emplace_back(array);
425 }
426
427 *table = Table::Make(std::move(schema), std::move(arrays));
428
429 return Status::OK();
430}
431
432template <typename Range>
433Status TupleRangeFromTable(const Table& table, const compute::CastOptions& cast_options,
434 compute::ExecContext* ctx, Range* rows) {
435 using row_type = typename std::decay<decltype(*std::begin(*rows))>::type;
436 constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
437
438 if (table.schema()->num_fields() != n_columns) {
439 std::stringstream ss;
440 ss << "Number of columns in the table does not match the width of the target: ";
441 ss << table.schema()->num_fields() << " != " << n_columns;
442 return Status::Invalid(ss.str());
443 }
444
445 // TODO: Use std::size with C++17
446 if (rows->size() != static_cast<size_t>(table.num_rows())) {
447 std::stringstream ss;
448 ss << "Number of rows in the table does not match the size of the target: ";
449 ss << table.num_rows() << " != " << rows->size();
450 return Status::Invalid(ss.str());
451 }
452
453 // Check that all columns have the correct type, otherwise cast them.
454 std::shared_ptr<Table> table_owner;
455 std::reference_wrapper<const ::arrow::Table> current_table(table);
456
457 ARROW_RETURN_NOT_OK(internal::EnsureColumnTypes<row_type>::Cast(
458 table, &table_owner, cast_options, ctx, &current_table));
459
460 internal::TupleSetter<Range, row_type>::Fill(current_table.get(), rows);
461
462 return Status::OK();
463}
464
465} // namespace stl
466} // namespace arrow