]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/array/builder_binary.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / builder_binary.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/array/builder_binary.h"
19
20#include <algorithm>
21#include <cstddef>
22#include <cstdint>
23#include <cstring>
24#include <numeric>
25#include <string>
26#include <utility>
27#include <vector>
28
29#include "arrow/array.h"
30#include "arrow/buffer.h"
31#include "arrow/status.h"
32#include "arrow/type.h"
33#include "arrow/type_traits.h"
34#include "arrow/util/bit_util.h"
35#include "arrow/util/checked_cast.h"
36#include "arrow/util/decimal.h"
37#include "arrow/util/logging.h"
38
39namespace arrow {
40
41using internal::checked_cast;
42
43// ----------------------------------------------------------------------
44// Fixed width binary
45
46FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
47 MemoryPool* pool)
48 : ArrayBuilder(pool),
49 byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
50 byte_builder_(pool) {}
51
52void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
53 DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
54}
55
56Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
57 const uint8_t* valid_bytes) {
58 RETURN_NOT_OK(Reserve(length));
59 UnsafeAppendToBitmap(valid_bytes, length);
60 return byte_builder_.Append(data, length * byte_width_);
61}
62
63Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
64 const uint8_t* validity,
65 int64_t bitmap_offset) {
66 RETURN_NOT_OK(Reserve(length));
67 UnsafeAppendToBitmap(validity, bitmap_offset, length);
68 return byte_builder_.Append(data, length * byte_width_);
69}
70
71Status FixedSizeBinaryBuilder::AppendNull() {
72 RETURN_NOT_OK(Reserve(1));
73 UnsafeAppendNull();
74 return Status::OK();
75}
76
77Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
78 RETURN_NOT_OK(Reserve(length));
79 UnsafeAppendToBitmap(length, false);
80 byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
81 return Status::OK();
82}
83
84Status FixedSizeBinaryBuilder::AppendEmptyValue() {
85 RETURN_NOT_OK(Reserve(1));
86 UnsafeAppendToBitmap(true);
87 byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
88 return Status::OK();
89}
90
91Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) {
92 RETURN_NOT_OK(Reserve(length));
93 UnsafeAppendToBitmap(length, true);
94 byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
95 return Status::OK();
96}
97
98void FixedSizeBinaryBuilder::Reset() {
99 ArrayBuilder::Reset();
100 byte_builder_.Reset();
101}
102
103Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
104 RETURN_NOT_OK(CheckCapacity(capacity));
105 RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
106 return ArrayBuilder::Resize(capacity);
107}
108
109Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
110 std::shared_ptr<Buffer> data;
111 RETURN_NOT_OK(byte_builder_.Finish(&data));
112
113 std::shared_ptr<Buffer> null_bitmap;
114 RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
115 *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
116
117 capacity_ = length_ = null_count_ = 0;
118 return Status::OK();
119}
120
121const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
122 const uint8_t* data_ptr = byte_builder_.data();
123 return data_ptr + i * byte_width_;
124}
125
126util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
127 const uint8_t* data_ptr = byte_builder_.data();
128 return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
129 byte_width_);
130}
131
132// ----------------------------------------------------------------------
133// ChunkedArray builders
134
135namespace internal {
136
137ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
138 MemoryPool* pool)
139 : max_chunk_value_length_(max_chunk_value_length), builder_(new BinaryBuilder(pool)) {
140 DCHECK_LE(max_chunk_value_length, kBinaryMemoryLimit);
141}
142
143ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
144 int32_t max_chunk_length, MemoryPool* pool)
145 : ChunkedBinaryBuilder(max_chunk_value_length, pool) {
146 max_chunk_length_ = max_chunk_length;
147}
148
149Status ChunkedBinaryBuilder::Finish(ArrayVector* out) {
150 if (builder_->length() > 0 || chunks_.size() == 0) {
151 std::shared_ptr<Array> chunk;
152 RETURN_NOT_OK(builder_->Finish(&chunk));
153 chunks_.emplace_back(std::move(chunk));
154 }
155 *out = std::move(chunks_);
156 return Status::OK();
157}
158
159Status ChunkedBinaryBuilder::NextChunk() {
160 std::shared_ptr<Array> chunk;
161 RETURN_NOT_OK(builder_->Finish(&chunk));
162 chunks_.emplace_back(std::move(chunk));
163
164 if (auto capacity = extra_capacity_) {
165 extra_capacity_ = 0;
166 return Reserve(capacity);
167 }
168
169 return Status::OK();
170}
171
172Status ChunkedStringBuilder::Finish(ArrayVector* out) {
173 RETURN_NOT_OK(ChunkedBinaryBuilder::Finish(out));
174
175 // Change data type to string/utf8
176 for (size_t i = 0; i < out->size(); ++i) {
177 std::shared_ptr<ArrayData> data = (*out)[i]->data();
178 data->type = ::arrow::utf8();
179 (*out)[i] = std::make_shared<StringArray>(data);
180 }
181 return Status::OK();
182}
183
184Status ChunkedBinaryBuilder::Reserve(int64_t values) {
185 if (ARROW_PREDICT_FALSE(extra_capacity_ != 0)) {
186 extra_capacity_ += values;
187 return Status::OK();
188 }
189
190 auto current_capacity = builder_->capacity();
191 auto min_capacity = builder_->length() + values;
192 if (current_capacity >= min_capacity) {
193 return Status::OK();
194 }
195
196 auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
197 if (ARROW_PREDICT_TRUE(new_capacity <= max_chunk_length_)) {
198 return builder_->Resize(new_capacity);
199 }
200
201 extra_capacity_ = new_capacity - max_chunk_length_;
202 return builder_->Resize(max_chunk_length_);
203}
204
205} // namespace internal
206
207} // namespace arrow