]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "arrow/array/builder_binary.h" | |
19 | ||
20 | #include <algorithm> | |
21 | #include <cstddef> | |
22 | #include <cstdint> | |
23 | #include <cstring> | |
24 | #include <numeric> | |
25 | #include <string> | |
26 | #include <utility> | |
27 | #include <vector> | |
28 | ||
29 | #include "arrow/array.h" | |
30 | #include "arrow/buffer.h" | |
31 | #include "arrow/status.h" | |
32 | #include "arrow/type.h" | |
33 | #include "arrow/type_traits.h" | |
34 | #include "arrow/util/bit_util.h" | |
35 | #include "arrow/util/checked_cast.h" | |
36 | #include "arrow/util/decimal.h" | |
37 | #include "arrow/util/logging.h" | |
38 | ||
39 | namespace arrow { | |
40 | ||
41 | using internal::checked_cast; | |
42 | ||
43 | // ---------------------------------------------------------------------- | |
44 | // Fixed width binary | |
45 | ||
46 | FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type, | |
47 | MemoryPool* pool) | |
48 | : ArrayBuilder(pool), | |
49 | byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()), | |
50 | byte_builder_(pool) {} | |
51 | ||
52 | void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) { | |
53 | DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder"; | |
54 | } | |
55 | ||
56 | Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, | |
57 | const uint8_t* valid_bytes) { | |
58 | RETURN_NOT_OK(Reserve(length)); | |
59 | UnsafeAppendToBitmap(valid_bytes, length); | |
60 | return byte_builder_.Append(data, length * byte_width_); | |
61 | } | |
62 | ||
63 | Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, | |
64 | const uint8_t* validity, | |
65 | int64_t bitmap_offset) { | |
66 | RETURN_NOT_OK(Reserve(length)); | |
67 | UnsafeAppendToBitmap(validity, bitmap_offset, length); | |
68 | return byte_builder_.Append(data, length * byte_width_); | |
69 | } | |
70 | ||
71 | Status FixedSizeBinaryBuilder::AppendNull() { | |
72 | RETURN_NOT_OK(Reserve(1)); | |
73 | UnsafeAppendNull(); | |
74 | return Status::OK(); | |
75 | } | |
76 | ||
77 | Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) { | |
78 | RETURN_NOT_OK(Reserve(length)); | |
79 | UnsafeAppendToBitmap(length, false); | |
80 | byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0); | |
81 | return Status::OK(); | |
82 | } | |
83 | ||
84 | Status FixedSizeBinaryBuilder::AppendEmptyValue() { | |
85 | RETURN_NOT_OK(Reserve(1)); | |
86 | UnsafeAppendToBitmap(true); | |
87 | byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0); | |
88 | return Status::OK(); | |
89 | } | |
90 | ||
91 | Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) { | |
92 | RETURN_NOT_OK(Reserve(length)); | |
93 | UnsafeAppendToBitmap(length, true); | |
94 | byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0); | |
95 | return Status::OK(); | |
96 | } | |
97 | ||
98 | void FixedSizeBinaryBuilder::Reset() { | |
99 | ArrayBuilder::Reset(); | |
100 | byte_builder_.Reset(); | |
101 | } | |
102 | ||
103 | Status FixedSizeBinaryBuilder::Resize(int64_t capacity) { | |
104 | RETURN_NOT_OK(CheckCapacity(capacity)); | |
105 | RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_)); | |
106 | return ArrayBuilder::Resize(capacity); | |
107 | } | |
108 | ||
109 | Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { | |
110 | std::shared_ptr<Buffer> data; | |
111 | RETURN_NOT_OK(byte_builder_.Finish(&data)); | |
112 | ||
113 | std::shared_ptr<Buffer> null_bitmap; | |
114 | RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); | |
115 | *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_); | |
116 | ||
117 | capacity_ = length_ = null_count_ = 0; | |
118 | return Status::OK(); | |
119 | } | |
120 | ||
121 | const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const { | |
122 | const uint8_t* data_ptr = byte_builder_.data(); | |
123 | return data_ptr + i * byte_width_; | |
124 | } | |
125 | ||
126 | util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { | |
127 | const uint8_t* data_ptr = byte_builder_.data(); | |
128 | return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_), | |
129 | byte_width_); | |
130 | } | |
131 | ||
132 | // ---------------------------------------------------------------------- | |
133 | // ChunkedArray builders | |
134 | ||
135 | namespace internal { | |
136 | ||
137 | ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length, | |
138 | MemoryPool* pool) | |
139 | : max_chunk_value_length_(max_chunk_value_length), builder_(new BinaryBuilder(pool)) { | |
140 | DCHECK_LE(max_chunk_value_length, kBinaryMemoryLimit); | |
141 | } | |
142 | ||
143 | ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length, | |
144 | int32_t max_chunk_length, MemoryPool* pool) | |
145 | : ChunkedBinaryBuilder(max_chunk_value_length, pool) { | |
146 | max_chunk_length_ = max_chunk_length; | |
147 | } | |
148 | ||
149 | Status ChunkedBinaryBuilder::Finish(ArrayVector* out) { | |
150 | if (builder_->length() > 0 || chunks_.size() == 0) { | |
151 | std::shared_ptr<Array> chunk; | |
152 | RETURN_NOT_OK(builder_->Finish(&chunk)); | |
153 | chunks_.emplace_back(std::move(chunk)); | |
154 | } | |
155 | *out = std::move(chunks_); | |
156 | return Status::OK(); | |
157 | } | |
158 | ||
159 | Status ChunkedBinaryBuilder::NextChunk() { | |
160 | std::shared_ptr<Array> chunk; | |
161 | RETURN_NOT_OK(builder_->Finish(&chunk)); | |
162 | chunks_.emplace_back(std::move(chunk)); | |
163 | ||
164 | if (auto capacity = extra_capacity_) { | |
165 | extra_capacity_ = 0; | |
166 | return Reserve(capacity); | |
167 | } | |
168 | ||
169 | return Status::OK(); | |
170 | } | |
171 | ||
172 | Status ChunkedStringBuilder::Finish(ArrayVector* out) { | |
173 | RETURN_NOT_OK(ChunkedBinaryBuilder::Finish(out)); | |
174 | ||
175 | // Change data type to string/utf8 | |
176 | for (size_t i = 0; i < out->size(); ++i) { | |
177 | std::shared_ptr<ArrayData> data = (*out)[i]->data(); | |
178 | data->type = ::arrow::utf8(); | |
179 | (*out)[i] = std::make_shared<StringArray>(data); | |
180 | } | |
181 | return Status::OK(); | |
182 | } | |
183 | ||
184 | Status ChunkedBinaryBuilder::Reserve(int64_t values) { | |
185 | if (ARROW_PREDICT_FALSE(extra_capacity_ != 0)) { | |
186 | extra_capacity_ += values; | |
187 | return Status::OK(); | |
188 | } | |
189 | ||
190 | auto current_capacity = builder_->capacity(); | |
191 | auto min_capacity = builder_->length() + values; | |
192 | if (current_capacity >= min_capacity) { | |
193 | return Status::OK(); | |
194 | } | |
195 | ||
196 | auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity); | |
197 | if (ARROW_PREDICT_TRUE(new_capacity <= max_chunk_length_)) { | |
198 | return builder_->Resize(new_capacity); | |
199 | } | |
200 | ||
201 | extra_capacity_ = new_capacity - max_chunk_length_; | |
202 | return builder_->Resize(max_chunk_length_); | |
203 | } | |
204 | ||
205 | } // namespace internal | |
206 | ||
207 | } // namespace arrow |