]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/array/array_base.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / array_base.h
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <cstdint>
21#include <iosfwd>
22#include <memory>
23#include <string>
24#include <vector>
25
26#include "arrow/array/data.h"
27#include "arrow/buffer.h"
28#include "arrow/compare.h"
29#include "arrow/result.h"
30#include "arrow/status.h"
31#include "arrow/type.h"
32#include "arrow/util/bit_util.h"
33#include "arrow/util/macros.h"
34#include "arrow/util/visibility.h"
35#include "arrow/visitor.h"
36
37namespace arrow {
38
39// ----------------------------------------------------------------------
40// User array accessor types
41
42/// \brief Array base type
43/// Immutable data array with some logical type and some length.
44///
45/// Any memory is owned by the respective Buffer instance (or its parents).
46///
47/// The base class is only required to have a null bitmap buffer if the null
48/// count is greater than 0
49///
50/// If known, the null count can be provided in the base Array constructor. If
51/// the null count is not known, pass -1 to indicate that the null count is to
52/// be computed on the first call to null_count()
53class ARROW_EXPORT Array {
54 public:
55 virtual ~Array() = default;
56
57 /// \brief Return true if value at index is null. Does not boundscheck
58 bool IsNull(int64_t i) const {
59 return null_bitmap_data_ != NULLPTR
60 ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
61 : data_->null_count == data_->length;
62 }
63
64 /// \brief Return true if value at index is valid (not null). Does not
65 /// boundscheck
66 bool IsValid(int64_t i) const {
67 return null_bitmap_data_ != NULLPTR
68 ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
69 : data_->null_count != data_->length;
70 }
71
72 /// \brief Return a Scalar containing the value of this array at i
73 Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
74
75 /// Size in the number of elements this array contains.
76 int64_t length() const { return data_->length; }
77
78 /// A relative position into another array's data, to enable zero-copy
79 /// slicing. This value defaults to zero
80 int64_t offset() const { return data_->offset; }
81
82 /// The number of null entries in the array. If the null count was not known
83 /// at time of construction (and set to a negative value), then the null
84 /// count will be computed and cached on the first invocation of this
85 /// function
86 int64_t null_count() const;
87
88 std::shared_ptr<DataType> type() const { return data_->type; }
89 Type::type type_id() const { return data_->type->id(); }
90
91 /// Buffer for the validity (null) bitmap, if any. Note that Union types
92 /// never have a null bitmap.
93 ///
94 /// Note that for `null_count == 0` or for null type, this will be null.
95 /// This buffer does not account for any slice offset
96 const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
97
98 /// Raw pointer to the null bitmap.
99 ///
100 /// Note that for `null_count == 0` or for null type, this will be null.
101 /// This buffer does not account for any slice offset
102 const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
103
104 /// Equality comparison with another array
105 bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
106 bool Equals(const std::shared_ptr<Array>& arr,
107 const EqualOptions& = EqualOptions::Defaults()) const;
108
109 /// \brief Return the formatted unified diff of arrow::Diff between this
110 /// Array and another Array
111 std::string Diff(const Array& other) const;
112
113 /// Approximate equality comparison with another array
114 ///
115 /// epsilon is only used if this is FloatArray or DoubleArray
116 bool ApproxEquals(const std::shared_ptr<Array>& arr,
117 const EqualOptions& = EqualOptions::Defaults()) const;
118 bool ApproxEquals(const Array& arr,
119 const EqualOptions& = EqualOptions::Defaults()) const;
120
121 /// Compare if the range of slots specified are equal for the given array and
122 /// this array. end_idx exclusive. This methods does not bounds check.
123 bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
124 const Array& other,
125 const EqualOptions& = EqualOptions::Defaults()) const;
126 bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
127 const std::shared_ptr<Array>& other,
128 const EqualOptions& = EqualOptions::Defaults()) const;
129 bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
130 int64_t other_start_idx,
131 const EqualOptions& = EqualOptions::Defaults()) const;
132 bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
133 int64_t end_idx, int64_t other_start_idx,
134 const EqualOptions& = EqualOptions::Defaults()) const;
135
136 Status Accept(ArrayVisitor* visitor) const;
137
138 /// Construct a zero-copy view of this array with the given type.
139 ///
140 /// This method checks if the types are layout-compatible.
141 /// Nested types are traversed in depth-first order. Data buffers must have
142 /// the same item sizes, even though the logical types may be different.
143 /// An error is returned if the types are not layout-compatible.
144 Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
145
146 /// Construct a zero-copy slice of the array with the indicated offset and
147 /// length
148 ///
149 /// \param[in] offset the position of the first element in the constructed
150 /// slice
151 /// \param[in] length the length of the slice. If there are not enough
152 /// elements in the array, the length will be adjusted accordingly
153 ///
154 /// \return a new object wrapped in std::shared_ptr<Array>
155 std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
156
157 /// Slice from offset until end of the array
158 std::shared_ptr<Array> Slice(int64_t offset) const;
159
160 /// Input-checking variant of Array::Slice
161 Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
162 /// Input-checking variant of Array::Slice
163 Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
164
165 const std::shared_ptr<ArrayData>& data() const { return data_; }
166
167 int num_fields() const { return static_cast<int>(data_->child_data.size()); }
168
169 /// \return PrettyPrint representation of array suitable for debugging
170 std::string ToString() const;
171
172 /// \brief Perform cheap validation checks to determine obvious inconsistencies
173 /// within the array's internal data.
174 ///
175 /// This is O(k) where k is the number of descendents.
176 ///
177 /// \return Status
178 Status Validate() const;
179
180 /// \brief Perform extensive validation checks to determine inconsistencies
181 /// within the array's internal data.
182 ///
183 /// This is potentially O(k*n) where k is the number of descendents and n
184 /// is the array length.
185 ///
186 /// \return Status
187 Status ValidateFull() const;
188
189 protected:
190 Array() : null_bitmap_data_(NULLPTR) {}
191
192 std::shared_ptr<ArrayData> data_;
193 const uint8_t* null_bitmap_data_;
194
195 /// Protected method for constructors
196 void SetData(const std::shared_ptr<ArrayData>& data) {
197 if (data->buffers.size() > 0) {
198 null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
199 } else {
200 null_bitmap_data_ = NULLPTR;
201 }
202 data_ = data;
203 }
204
205 private:
206 ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
207};
208
209static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
210 os << x.ToString();
211 return os;
212}
213
214/// Base class for non-nested arrays
215class ARROW_EXPORT FlatArray : public Array {
216 protected:
217 using Array::Array;
218};
219
220/// Base class for arrays of fixed-size logical types
221class ARROW_EXPORT PrimitiveArray : public FlatArray {
222 public:
223 PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
224 const std::shared_ptr<Buffer>& data,
225 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
226 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
227
228 /// Does not account for any slice offset
229 std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
230
231 protected:
232 PrimitiveArray() : raw_values_(NULLPTR) {}
233
234 void SetData(const std::shared_ptr<ArrayData>& data) {
235 this->Array::SetData(data);
236 raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
237 }
238
239 explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
240
241 const uint8_t* raw_values_;
242};
243
244/// Degenerate null type Array
245class ARROW_EXPORT NullArray : public FlatArray {
246 public:
247 using TypeClass = NullType;
248
249 explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
250 explicit NullArray(int64_t length);
251
252 private:
253 void SetData(const std::shared_ptr<ArrayData>& data) {
254 null_bitmap_data_ = NULLPTR;
255 data->null_count = data->length;
256 data_ = data;
257 }
258};
259
260} // namespace arrow