]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/array/builder_union.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / builder_union.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24
25 #include "arrow/array/array_nested.h"
26 #include "arrow/array/builder_base.h"
27 #include "arrow/array/data.h"
28 #include "arrow/buffer_builder.h"
29 #include "arrow/memory_pool.h"
30 #include "arrow/status.h"
31 #include "arrow/type.h"
32 #include "arrow/util/visibility.h"
33
34 namespace arrow {
35
36 /// \brief Base class for union array builds.
37 ///
38 /// Note that while we subclass ArrayBuilder, as union types do not have a
39 /// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
40 class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
41 public:
42 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
43
44 /// \cond FALSE
45 using ArrayBuilder::Finish;
46 /// \endcond
47
48 Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
49
50 /// \brief Make a new child builder available to the UnionArray
51 ///
52 /// \param[in] new_child the child builder
53 /// \param[in] field_name the name of the field in the union array type
54 /// if type inference is used
55 /// \return child index, which is the "type" argument that needs
56 /// to be passed to the "Append" method to add a new element to
57 /// the union array.
58 int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
59 const std::string& field_name = "");
60
61 std::shared_ptr<DataType> type() const override;
62
63 int64_t length() const override { return types_builder_.length(); }
64
65 protected:
66 BasicUnionBuilder(MemoryPool* pool,
67 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
68 const std::shared_ptr<DataType>& type);
69
70 int8_t NextTypeId();
71
72 std::vector<std::shared_ptr<Field>> child_fields_;
73 std::vector<int8_t> type_codes_;
74 UnionMode::type mode_;
75
76 std::vector<ArrayBuilder*> type_id_to_children_;
77 std::vector<int> type_id_to_child_id_;
78 // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
79 int8_t dense_type_id_ = 0;
80 TypedBufferBuilder<int8_t> types_builder_;
81 };
82
83 /// \class DenseUnionBuilder
84 ///
85 /// This API is EXPERIMENTAL.
86 class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
87 public:
88 /// Use this constructor to initialize the UnionBuilder with no child builders,
89 /// allowing type to be inferred. You will need to call AppendChild for each of the
90 /// children builders you want to use.
91 explicit DenseUnionBuilder(MemoryPool* pool)
92 : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
93
94 /// Use this constructor to specify the type explicitly.
95 /// You can still add child builders to the union after using this constructor
96 DenseUnionBuilder(MemoryPool* pool,
97 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
98 const std::shared_ptr<DataType>& type)
99 : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
100
101 Status AppendNull() final {
102 const int8_t first_child_code = type_codes_[0];
103 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
104 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
105 ARROW_RETURN_NOT_OK(
106 offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
107 // Append a null arbitrarily to the first child
108 return child_builder->AppendNull();
109 }
110
111 Status AppendNulls(int64_t length) final {
112 const int8_t first_child_code = type_codes_[0];
113 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
114 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
115 ARROW_RETURN_NOT_OK(
116 offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
117 // Append just a single null to the first child
118 return child_builder->AppendNull();
119 }
120
121 Status AppendEmptyValue() final {
122 const int8_t first_child_code = type_codes_[0];
123 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
124 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
125 ARROW_RETURN_NOT_OK(
126 offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
127 // Append an empty value arbitrarily to the first child
128 return child_builder->AppendEmptyValue();
129 }
130
131 Status AppendEmptyValues(int64_t length) final {
132 const int8_t first_child_code = type_codes_[0];
133 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
134 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
135 ARROW_RETURN_NOT_OK(
136 offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
137 // Append just a single empty value to the first child
138 return child_builder->AppendEmptyValue();
139 }
140
141 /// \brief Append an element to the UnionArray. This must be followed
142 /// by an append to the appropriate child builder.
143 ///
144 /// \param[in] next_type type_id of the child to which the next value will be appended.
145 ///
146 /// The corresponding child builder must be appended to independently after this method
147 /// is called.
148 Status Append(int8_t next_type) {
149 ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
150 if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
151 return Status::CapacityError(
152 "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
153 "child");
154 }
155 auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
156 return offsets_builder_.Append(offset);
157 }
158
159 Status AppendArraySlice(const ArrayData& array, int64_t offset,
160 int64_t length) override;
161
162 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
163
164 private:
165 TypedBufferBuilder<int32_t> offsets_builder_;
166 };
167
168 /// \class SparseUnionBuilder
169 ///
170 /// This API is EXPERIMENTAL.
171 class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
172 public:
173 /// Use this constructor to initialize the UnionBuilder with no child builders,
174 /// allowing type to be inferred. You will need to call AppendChild for each of the
175 /// children builders you want to use.
176 explicit SparseUnionBuilder(MemoryPool* pool)
177 : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
178
179 /// Use this constructor to specify the type explicitly.
180 /// You can still add child builders to the union after using this constructor
181 SparseUnionBuilder(MemoryPool* pool,
182 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
183 const std::shared_ptr<DataType>& type)
184 : BasicUnionBuilder(pool, children, type) {}
185
186 /// \brief Append a null value.
187 ///
188 /// A null is appended to the first child, empty values to the other children.
189 Status AppendNull() final {
190 const auto first_child_code = type_codes_[0];
191 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
192 ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
193 for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
194 ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
195 }
196 return Status::OK();
197 }
198
199 /// \brief Append multiple null values.
200 ///
201 /// Nulls are appended to the first child, empty values to the other children.
202 Status AppendNulls(int64_t length) final {
203 const auto first_child_code = type_codes_[0];
204 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
205 ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
206 for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
207 ARROW_RETURN_NOT_OK(
208 type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
209 }
210 return Status::OK();
211 }
212
213 Status AppendEmptyValue() final {
214 ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
215 for (int8_t code : type_codes_) {
216 ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
217 }
218 return Status::OK();
219 }
220
221 Status AppendEmptyValues(int64_t length) final {
222 ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
223 for (int8_t code : type_codes_) {
224 ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
225 }
226 return Status::OK();
227 }
228
229 /// \brief Append an element to the UnionArray. This must be followed
230 /// by an append to the appropriate child builder.
231 ///
232 /// \param[in] next_type type_id of the child to which the next value will be appended.
233 ///
234 /// The corresponding child builder must be appended to independently after this method
235 /// is called, and all other child builders must have null or empty value appended.
236 Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
237
238 Status AppendArraySlice(const ArrayData& array, int64_t offset,
239 int64_t length) override;
240 };
241
242 } // namespace arrow