1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
25 #include "arrow/array/array_nested.h"
26 #include "arrow/array/builder_base.h"
27 #include "arrow/array/data.h"
28 #include "arrow/buffer_builder.h"
29 #include "arrow/memory_pool.h"
30 #include "arrow/status.h"
31 #include "arrow/type.h"
32 #include "arrow/util/visibility.h"
36 /// \brief Base class for union array builds.
38 /// Note that while we subclass ArrayBuilder, as union types do not have a
39 /// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
40 class ARROW_EXPORT BasicUnionBuilder
: public ArrayBuilder
{
42 Status
FinishInternal(std::shared_ptr
<ArrayData
>* out
) override
;
45 using ArrayBuilder::Finish
;
48 Status
Finish(std::shared_ptr
<UnionArray
>* out
) { return FinishTyped(out
); }
50 /// \brief Make a new child builder available to the UnionArray
52 /// \param[in] new_child the child builder
53 /// \param[in] field_name the name of the field in the union array type
54 /// if type inference is used
55 /// \return child index, which is the "type" argument that needs
56 /// to be passed to the "Append" method to add a new element to
58 int8_t AppendChild(const std::shared_ptr
<ArrayBuilder
>& new_child
,
59 const std::string
& field_name
= "");
61 std::shared_ptr
<DataType
> type() const override
;
63 int64_t length() const override
{ return types_builder_
.length(); }
66 BasicUnionBuilder(MemoryPool
* pool
,
67 const std::vector
<std::shared_ptr
<ArrayBuilder
>>& children
,
68 const std::shared_ptr
<DataType
>& type
);
72 std::vector
<std::shared_ptr
<Field
>> child_fields_
;
73 std::vector
<int8_t> type_codes_
;
74 UnionMode::type mode_
;
76 std::vector
<ArrayBuilder
*> type_id_to_children_
;
77 std::vector
<int> type_id_to_child_id_
;
78 // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
79 int8_t dense_type_id_
= 0;
80 TypedBufferBuilder
<int8_t> types_builder_
;
83 /// \class DenseUnionBuilder
85 /// This API is EXPERIMENTAL.
86 class ARROW_EXPORT DenseUnionBuilder
: public BasicUnionBuilder
{
88 /// Use this constructor to initialize the UnionBuilder with no child builders,
89 /// allowing type to be inferred. You will need to call AppendChild for each of the
90 /// children builders you want to use.
91 explicit DenseUnionBuilder(MemoryPool
* pool
)
92 : BasicUnionBuilder(pool
, {}, dense_union(FieldVector
{})), offsets_builder_(pool
) {}
94 /// Use this constructor to specify the type explicitly.
95 /// You can still add child builders to the union after using this constructor
96 DenseUnionBuilder(MemoryPool
* pool
,
97 const std::vector
<std::shared_ptr
<ArrayBuilder
>>& children
,
98 const std::shared_ptr
<DataType
>& type
)
99 : BasicUnionBuilder(pool
, children
, type
), offsets_builder_(pool
) {}
101 Status
AppendNull() final
{
102 const int8_t first_child_code
= type_codes_
[0];
103 ArrayBuilder
* child_builder
= type_id_to_children_
[first_child_code
];
104 ARROW_RETURN_NOT_OK(types_builder_
.Append(first_child_code
));
106 offsets_builder_
.Append(static_cast<int32_t>(child_builder
->length())));
107 // Append a null arbitrarily to the first child
108 return child_builder
->AppendNull();
111 Status
AppendNulls(int64_t length
) final
{
112 const int8_t first_child_code
= type_codes_
[0];
113 ArrayBuilder
* child_builder
= type_id_to_children_
[first_child_code
];
114 ARROW_RETURN_NOT_OK(types_builder_
.Append(length
, first_child_code
));
116 offsets_builder_
.Append(length
, static_cast<int32_t>(child_builder
->length())));
117 // Append just a single null to the first child
118 return child_builder
->AppendNull();
121 Status
AppendEmptyValue() final
{
122 const int8_t first_child_code
= type_codes_
[0];
123 ArrayBuilder
* child_builder
= type_id_to_children_
[first_child_code
];
124 ARROW_RETURN_NOT_OK(types_builder_
.Append(first_child_code
));
126 offsets_builder_
.Append(static_cast<int32_t>(child_builder
->length())));
127 // Append an empty value arbitrarily to the first child
128 return child_builder
->AppendEmptyValue();
131 Status
AppendEmptyValues(int64_t length
) final
{
132 const int8_t first_child_code
= type_codes_
[0];
133 ArrayBuilder
* child_builder
= type_id_to_children_
[first_child_code
];
134 ARROW_RETURN_NOT_OK(types_builder_
.Append(length
, first_child_code
));
136 offsets_builder_
.Append(length
, static_cast<int32_t>(child_builder
->length())));
137 // Append just a single empty value to the first child
138 return child_builder
->AppendEmptyValue();
141 /// \brief Append an element to the UnionArray. This must be followed
142 /// by an append to the appropriate child builder.
144 /// \param[in] next_type type_id of the child to which the next value will be appended.
146 /// The corresponding child builder must be appended to independently after this method
148 Status
Append(int8_t next_type
) {
149 ARROW_RETURN_NOT_OK(types_builder_
.Append(next_type
));
150 if (type_id_to_children_
[next_type
]->length() == kListMaximumElements
) {
151 return Status::CapacityError(
152 "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
155 auto offset
= static_cast<int32_t>(type_id_to_children_
[next_type
]->length());
156 return offsets_builder_
.Append(offset
);
159 Status
AppendArraySlice(const ArrayData
& array
, int64_t offset
,
160 int64_t length
) override
;
162 Status
FinishInternal(std::shared_ptr
<ArrayData
>* out
) override
;
165 TypedBufferBuilder
<int32_t> offsets_builder_
;
168 /// \class SparseUnionBuilder
170 /// This API is EXPERIMENTAL.
171 class ARROW_EXPORT SparseUnionBuilder
: public BasicUnionBuilder
{
173 /// Use this constructor to initialize the UnionBuilder with no child builders,
174 /// allowing type to be inferred. You will need to call AppendChild for each of the
175 /// children builders you want to use.
176 explicit SparseUnionBuilder(MemoryPool
* pool
)
177 : BasicUnionBuilder(pool
, {}, sparse_union(FieldVector
{})) {}
179 /// Use this constructor to specify the type explicitly.
180 /// You can still add child builders to the union after using this constructor
181 SparseUnionBuilder(MemoryPool
* pool
,
182 const std::vector
<std::shared_ptr
<ArrayBuilder
>>& children
,
183 const std::shared_ptr
<DataType
>& type
)
184 : BasicUnionBuilder(pool
, children
, type
) {}
186 /// \brief Append a null value.
188 /// A null is appended to the first child, empty values to the other children.
189 Status
AppendNull() final
{
190 const auto first_child_code
= type_codes_
[0];
191 ARROW_RETURN_NOT_OK(types_builder_
.Append(first_child_code
));
192 ARROW_RETURN_NOT_OK(type_id_to_children_
[first_child_code
]->AppendNull());
193 for (int i
= 1; i
< static_cast<int>(type_codes_
.size()); ++i
) {
194 ARROW_RETURN_NOT_OK(type_id_to_children_
[type_codes_
[i
]]->AppendEmptyValue());
199 /// \brief Append multiple null values.
201 /// Nulls are appended to the first child, empty values to the other children.
202 Status
AppendNulls(int64_t length
) final
{
203 const auto first_child_code
= type_codes_
[0];
204 ARROW_RETURN_NOT_OK(types_builder_
.Append(length
, first_child_code
));
205 ARROW_RETURN_NOT_OK(type_id_to_children_
[first_child_code
]->AppendNulls(length
));
206 for (int i
= 1; i
< static_cast<int>(type_codes_
.size()); ++i
) {
208 type_id_to_children_
[type_codes_
[i
]]->AppendEmptyValues(length
));
213 Status
AppendEmptyValue() final
{
214 ARROW_RETURN_NOT_OK(types_builder_
.Append(type_codes_
[0]));
215 for (int8_t code
: type_codes_
) {
216 ARROW_RETURN_NOT_OK(type_id_to_children_
[code
]->AppendEmptyValue());
221 Status
AppendEmptyValues(int64_t length
) final
{
222 ARROW_RETURN_NOT_OK(types_builder_
.Append(length
, type_codes_
[0]));
223 for (int8_t code
: type_codes_
) {
224 ARROW_RETURN_NOT_OK(type_id_to_children_
[code
]->AppendEmptyValues(length
));
229 /// \brief Append an element to the UnionArray. This must be followed
230 /// by an append to the appropriate child builder.
232 /// \param[in] next_type type_id of the child to which the next value will be appended.
234 /// The corresponding child builder must be appended to independently after this method
235 /// is called, and all other child builders must have null or empty value appended.
236 Status
Append(int8_t next_type
) { return types_builder_
.Append(next_type
); }
238 Status
AppendArraySlice(const ArrayData
& array
, int64_t offset
,
239 int64_t length
) override
;