]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/array/builder_nested.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / builder_nested.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/array/builder_nested.h"
19
20 #include <cstddef>
21 #include <cstdint>
22 #include <utility>
23 #include <vector>
24
25 #include "arrow/buffer.h"
26 #include "arrow/status.h"
27 #include "arrow/type.h"
28 #include "arrow/util/checked_cast.h"
29 #include "arrow/util/logging.h"
30
31 namespace arrow {
32
33 // ----------------------------------------------------------------------
34 // MapBuilder
35
36 MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
37 std::shared_ptr<ArrayBuilder> const& item_builder,
38 const std::shared_ptr<DataType>& type)
39 : ArrayBuilder(pool), key_builder_(key_builder), item_builder_(item_builder) {
40 auto map_type = internal::checked_cast<const MapType*>(type.get());
41 keys_sorted_ = map_type->keys_sorted();
42
43 std::vector<std::shared_ptr<ArrayBuilder>> child_builders{key_builder, item_builder};
44 auto struct_builder =
45 std::make_shared<StructBuilder>(map_type->value_type(), pool, child_builders);
46
47 list_builder_ =
48 std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
49 }
50
51 MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
52 const std::shared_ptr<ArrayBuilder>& item_builder,
53 bool keys_sorted)
54 : MapBuilder(pool, key_builder, item_builder,
55 map(key_builder->type(), item_builder->type(), keys_sorted)) {}
56
57 MapBuilder::MapBuilder(MemoryPool* pool,
58 const std::shared_ptr<ArrayBuilder>& struct_builder,
59 const std::shared_ptr<DataType>& type)
60 : ArrayBuilder(pool) {
61 auto map_type = internal::checked_cast<const MapType*>(type.get());
62 keys_sorted_ = map_type->keys_sorted();
63 key_builder_ = struct_builder->child_builder(0);
64 item_builder_ = struct_builder->child_builder(1);
65 list_builder_ =
66 std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
67 }
68
69 Status MapBuilder::Resize(int64_t capacity) {
70 RETURN_NOT_OK(list_builder_->Resize(capacity));
71 capacity_ = list_builder_->capacity();
72 return Status::OK();
73 }
74
75 void MapBuilder::Reset() {
76 list_builder_->Reset();
77 ArrayBuilder::Reset();
78 }
79
80 Status MapBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
81 ARROW_CHECK_EQ(item_builder_->length(), key_builder_->length())
82 << "keys and items builders don't have the same size in MapBuilder";
83 RETURN_NOT_OK(AdjustStructBuilderLength());
84 RETURN_NOT_OK(list_builder_->FinishInternal(out));
85 (*out)->type = type();
86 ArrayBuilder::Reset();
87 return Status::OK();
88 }
89
90 Status MapBuilder::AppendValues(const int32_t* offsets, int64_t length,
91 const uint8_t* valid_bytes) {
92 DCHECK_EQ(item_builder_->length(), key_builder_->length());
93 RETURN_NOT_OK(AdjustStructBuilderLength());
94 RETURN_NOT_OK(list_builder_->AppendValues(offsets, length, valid_bytes));
95 length_ = list_builder_->length();
96 null_count_ = list_builder_->null_count();
97 return Status::OK();
98 }
99
100 Status MapBuilder::Append() {
101 DCHECK_EQ(item_builder_->length(), key_builder_->length());
102 RETURN_NOT_OK(AdjustStructBuilderLength());
103 RETURN_NOT_OK(list_builder_->Append());
104 length_ = list_builder_->length();
105 return Status::OK();
106 }
107
108 Status MapBuilder::AppendNull() {
109 DCHECK_EQ(item_builder_->length(), key_builder_->length());
110 RETURN_NOT_OK(AdjustStructBuilderLength());
111 RETURN_NOT_OK(list_builder_->AppendNull());
112 length_ = list_builder_->length();
113 null_count_ = list_builder_->null_count();
114 return Status::OK();
115 }
116
117 Status MapBuilder::AppendNulls(int64_t length) {
118 DCHECK_EQ(item_builder_->length(), key_builder_->length());
119 RETURN_NOT_OK(AdjustStructBuilderLength());
120 RETURN_NOT_OK(list_builder_->AppendNulls(length));
121 length_ = list_builder_->length();
122 null_count_ = list_builder_->null_count();
123 return Status::OK();
124 }
125
126 Status MapBuilder::AppendEmptyValue() {
127 DCHECK_EQ(item_builder_->length(), key_builder_->length());
128 RETURN_NOT_OK(AdjustStructBuilderLength());
129 RETURN_NOT_OK(list_builder_->AppendEmptyValue());
130 length_ = list_builder_->length();
131 null_count_ = list_builder_->null_count();
132 return Status::OK();
133 }
134
135 Status MapBuilder::AppendEmptyValues(int64_t length) {
136 DCHECK_EQ(item_builder_->length(), key_builder_->length());
137 RETURN_NOT_OK(AdjustStructBuilderLength());
138 RETURN_NOT_OK(list_builder_->AppendEmptyValues(length));
139 length_ = list_builder_->length();
140 null_count_ = list_builder_->null_count();
141 return Status::OK();
142 }
143
144 Status MapBuilder::AdjustStructBuilderLength() {
145 // If key/item builders have been appended, adjust struct builder length
146 // to match. Struct and key are non-nullable, append all valid values.
147 auto struct_builder =
148 internal::checked_cast<StructBuilder*>(list_builder_->value_builder());
149 if (struct_builder->length() < key_builder_->length()) {
150 int64_t length_diff = key_builder_->length() - struct_builder->length();
151 RETURN_NOT_OK(struct_builder->AppendValues(length_diff, NULLPTR));
152 }
153 return Status::OK();
154 }
155
156 // ----------------------------------------------------------------------
157 // FixedSizeListBuilder
158
159 FixedSizeListBuilder::FixedSizeListBuilder(
160 MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
161 const std::shared_ptr<DataType>& type)
162 : ArrayBuilder(pool),
163 value_field_(type->field(0)),
164 list_size_(
165 internal::checked_cast<const FixedSizeListType*>(type.get())->list_size()),
166 value_builder_(value_builder) {}
167
168 FixedSizeListBuilder::FixedSizeListBuilder(
169 MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
170 int32_t list_size)
171 : FixedSizeListBuilder(pool, value_builder,
172 fixed_size_list(value_builder->type(), list_size)) {}
173
174 void FixedSizeListBuilder::Reset() {
175 ArrayBuilder::Reset();
176 value_builder_->Reset();
177 }
178
179 Status FixedSizeListBuilder::Append() {
180 RETURN_NOT_OK(Reserve(1));
181 UnsafeAppendToBitmap(true);
182 return Status::OK();
183 }
184
185 Status FixedSizeListBuilder::AppendValues(int64_t length, const uint8_t* valid_bytes) {
186 RETURN_NOT_OK(Reserve(length));
187 UnsafeAppendToBitmap(valid_bytes, length);
188 return Status::OK();
189 }
190
191 Status FixedSizeListBuilder::AppendNull() {
192 RETURN_NOT_OK(Reserve(1));
193 UnsafeAppendToBitmap(false);
194 return value_builder_->AppendNulls(list_size_);
195 }
196
197 Status FixedSizeListBuilder::AppendNulls(int64_t length) {
198 RETURN_NOT_OK(Reserve(length));
199 UnsafeAppendToBitmap(length, false);
200 return value_builder_->AppendNulls(list_size_ * length);
201 }
202
203 Status FixedSizeListBuilder::ValidateOverflow(int64_t new_elements) {
204 auto new_length = value_builder_->length() + new_elements;
205 if (new_elements != list_size_) {
206 return Status::Invalid("Length of item not correct: expected ", list_size_,
207 " but got array of size ", new_elements);
208 }
209 if (new_length > maximum_elements()) {
210 return Status::CapacityError("array cannot contain more than ", maximum_elements(),
211 " elements, have ", new_elements);
212 }
213 return Status::OK();
214 }
215
216 Status FixedSizeListBuilder::AppendEmptyValue() {
217 RETURN_NOT_OK(Reserve(1));
218 UnsafeAppendToBitmap(true);
219 return value_builder_->AppendEmptyValues(list_size_);
220 }
221
222 Status FixedSizeListBuilder::AppendEmptyValues(int64_t length) {
223 RETURN_NOT_OK(Reserve(length));
224 UnsafeAppendToBitmap(length, true);
225 return value_builder_->AppendEmptyValues(list_size_ * length);
226 }
227
228 Status FixedSizeListBuilder::Resize(int64_t capacity) {
229 RETURN_NOT_OK(CheckCapacity(capacity));
230 return ArrayBuilder::Resize(capacity);
231 }
232
233 Status FixedSizeListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
234 std::shared_ptr<ArrayData> items;
235
236 if (value_builder_->length() == 0) {
237 // Try to make sure we get a non-null values buffer (ARROW-2744)
238 RETURN_NOT_OK(value_builder_->Resize(0));
239 }
240 RETURN_NOT_OK(value_builder_->FinishInternal(&items));
241
242 std::shared_ptr<Buffer> null_bitmap;
243 RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
244 *out = ArrayData::Make(type(), length_, {null_bitmap}, {std::move(items)}, null_count_);
245 Reset();
246 return Status::OK();
247 }
248
249 // ----------------------------------------------------------------------
250 // Struct
251
252 StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
253 std::vector<std::shared_ptr<ArrayBuilder>> field_builders)
254 : ArrayBuilder(pool), type_(type) {
255 children_ = std::move(field_builders);
256 }
257
258 void StructBuilder::Reset() {
259 ArrayBuilder::Reset();
260 for (const auto& field_builder : children_) {
261 field_builder->Reset();
262 }
263 }
264
265 Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
266 std::shared_ptr<Buffer> null_bitmap;
267 RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
268
269 std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
270 for (size_t i = 0; i < children_.size(); ++i) {
271 if (length_ == 0) {
272 // Try to make sure the child buffers are initialized
273 RETURN_NOT_OK(children_[i]->Resize(0));
274 }
275 RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
276 }
277
278 *out = ArrayData::Make(type(), length_, {null_bitmap}, null_count_);
279 (*out)->child_data = std::move(child_data);
280
281 capacity_ = length_ = null_count_ = 0;
282 return Status::OK();
283 }
284
285 std::shared_ptr<DataType> StructBuilder::type() const {
286 DCHECK_EQ(type_->fields().size(), children_.size());
287 std::vector<std::shared_ptr<Field>> fields(children_.size());
288 for (int i = 0; i < static_cast<int>(fields.size()); ++i) {
289 fields[i] = type_->field(i)->WithType(children_[i]->type());
290 }
291 return struct_(std::move(fields));
292 }
293
294 } // namespace arrow