]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/array/builder_adaptive.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / builder_adaptive.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cstdint>
21 #include <cstring>
22 #include <memory>
23 #include <type_traits>
24
25 #include "arrow/array/builder_base.h"
26 #include "arrow/buffer.h"
27 #include "arrow/status.h"
28 #include "arrow/type.h"
29 #include "arrow/util/macros.h"
30 #include "arrow/util/visibility.h"
31
32 namespace arrow {
33
34 namespace internal {
35
36 class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
37 public:
38 AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
39
40 explicit AdaptiveIntBuilderBase(MemoryPool* pool)
41 : AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
42
43 /// \brief Append multiple nulls
44 /// \param[in] length the number of nulls to append
45 Status AppendNulls(int64_t length) final {
46 ARROW_RETURN_NOT_OK(CommitPendingData());
47 ARROW_RETURN_NOT_OK(Reserve(length));
48 memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
49 UnsafeSetNull(length);
50 return Status::OK();
51 }
52
53 Status AppendNull() final {
54 pending_data_[pending_pos_] = 0;
55 pending_valid_[pending_pos_] = 0;
56 pending_has_nulls_ = true;
57 ++pending_pos_;
58 ++length_;
59 ++null_count_;
60
61 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
62 return CommitPendingData();
63 }
64 return Status::OK();
65 }
66
67 Status AppendEmptyValues(int64_t length) final {
68 ARROW_RETURN_NOT_OK(CommitPendingData());
69 ARROW_RETURN_NOT_OK(Reserve(length));
70 memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
71 UnsafeSetNotNull(length);
72 return Status::OK();
73 }
74
75 Status AppendEmptyValue() final {
76 pending_data_[pending_pos_] = 0;
77 pending_valid_[pending_pos_] = 1;
78 ++pending_pos_;
79 ++length_;
80
81 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
82 return CommitPendingData();
83 }
84 return Status::OK();
85 }
86
87 void Reset() override;
88 Status Resize(int64_t capacity) override;
89
90 protected:
91 Status AppendInternal(const uint64_t val) {
92 pending_data_[pending_pos_] = val;
93 pending_valid_[pending_pos_] = 1;
94 ++pending_pos_;
95 ++length_;
96
97 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
98 return CommitPendingData();
99 }
100 return Status::OK();
101 }
102
103 virtual Status CommitPendingData() = 0;
104
105 template <typename new_type, typename old_type>
106 typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
107 ExpandIntSizeInternal();
108 template <typename new_type, typename old_type>
109 typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
110 ExpandIntSizeInternal();
111
112 std::shared_ptr<ResizableBuffer> data_;
113 uint8_t* raw_data_ = NULLPTR;
114
115 const uint8_t start_int_size_;
116 uint8_t int_size_;
117
118 static constexpr int32_t pending_size_ = 1024;
119 uint8_t pending_valid_[pending_size_];
120 uint64_t pending_data_[pending_size_];
121 int32_t pending_pos_ = 0;
122 bool pending_has_nulls_ = false;
123 };
124
125 } // namespace internal
126
127 class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
128 public:
129 explicit AdaptiveUIntBuilder(uint8_t start_int_size,
130 MemoryPool* pool = default_memory_pool());
131
132 explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
133 : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
134
135 using ArrayBuilder::Advance;
136 using internal::AdaptiveIntBuilderBase::Reset;
137
138 /// Scalar append
139 Status Append(const uint64_t val) { return AppendInternal(val); }
140
141 /// \brief Append a sequence of elements in one shot
142 /// \param[in] values a contiguous C array of values
143 /// \param[in] length the number of values to append
144 /// \param[in] valid_bytes an optional sequence of bytes where non-zero
145 /// indicates a valid (non-null) value
146 /// \return Status
147 Status AppendValues(const uint64_t* values, int64_t length,
148 const uint8_t* valid_bytes = NULLPTR);
149
150 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
151
152 std::shared_ptr<DataType> type() const override;
153
154 protected:
155 Status CommitPendingData() override;
156 Status ExpandIntSize(uint8_t new_int_size);
157
158 Status AppendValuesInternal(const uint64_t* values, int64_t length,
159 const uint8_t* valid_bytes);
160
161 template <typename new_type>
162 Status ExpandIntSizeN();
163 };
164
165 class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
166 public:
167 explicit AdaptiveIntBuilder(uint8_t start_int_size,
168 MemoryPool* pool = default_memory_pool());
169
170 explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
171 : AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
172
173 using ArrayBuilder::Advance;
174 using internal::AdaptiveIntBuilderBase::Reset;
175
176 /// Scalar append
177 Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
178
179 /// \brief Append a sequence of elements in one shot
180 /// \param[in] values a contiguous C array of values
181 /// \param[in] length the number of values to append
182 /// \param[in] valid_bytes an optional sequence of bytes where non-zero
183 /// indicates a valid (non-null) value
184 /// \return Status
185 Status AppendValues(const int64_t* values, int64_t length,
186 const uint8_t* valid_bytes = NULLPTR);
187
188 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
189
190 std::shared_ptr<DataType> type() const override;
191
192 protected:
193 Status CommitPendingData() override;
194 Status ExpandIntSize(uint8_t new_int_size);
195
196 Status AppendValuesInternal(const int64_t* values, int64_t length,
197 const uint8_t* valid_bytes);
198
199 template <typename new_type>
200 Status ExpandIntSizeN();
201 };
202
203 } // namespace arrow