]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #pragma once | |
19 | ||
20 | #include <cstdint> | |
21 | #include <iosfwd> | |
22 | #include <memory> | |
23 | #include <string> | |
24 | #include <vector> | |
25 | ||
26 | #include "arrow/array/data.h" | |
27 | #include "arrow/buffer.h" | |
28 | #include "arrow/compare.h" | |
29 | #include "arrow/result.h" | |
30 | #include "arrow/status.h" | |
31 | #include "arrow/type.h" | |
32 | #include "arrow/util/bit_util.h" | |
33 | #include "arrow/util/macros.h" | |
34 | #include "arrow/util/visibility.h" | |
35 | #include "arrow/visitor.h" | |
36 | ||
37 | namespace arrow { | |
38 | ||
39 | // ---------------------------------------------------------------------- | |
40 | // User array accessor types | |
41 | ||
42 | /// \brief Array base type | |
43 | /// Immutable data array with some logical type and some length. | |
44 | /// | |
45 | /// Any memory is owned by the respective Buffer instance (or its parents). | |
46 | /// | |
47 | /// The base class is only required to have a null bitmap buffer if the null | |
48 | /// count is greater than 0 | |
49 | /// | |
50 | /// If known, the null count can be provided in the base Array constructor. If | |
51 | /// the null count is not known, pass -1 to indicate that the null count is to | |
52 | /// be computed on the first call to null_count() | |
53 | class ARROW_EXPORT Array { | |
54 | public: | |
55 | virtual ~Array() = default; | |
56 | ||
57 | /// \brief Return true if value at index is null. Does not boundscheck | |
58 | bool IsNull(int64_t i) const { | |
59 | return null_bitmap_data_ != NULLPTR | |
60 | ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset) | |
61 | : data_->null_count == data_->length; | |
62 | } | |
63 | ||
64 | /// \brief Return true if value at index is valid (not null). Does not | |
65 | /// boundscheck | |
66 | bool IsValid(int64_t i) const { | |
67 | return null_bitmap_data_ != NULLPTR | |
68 | ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset) | |
69 | : data_->null_count != data_->length; | |
70 | } | |
71 | ||
72 | /// \brief Return a Scalar containing the value of this array at i | |
73 | Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const; | |
74 | ||
75 | /// Size in the number of elements this array contains. | |
76 | int64_t length() const { return data_->length; } | |
77 | ||
78 | /// A relative position into another array's data, to enable zero-copy | |
79 | /// slicing. This value defaults to zero | |
80 | int64_t offset() const { return data_->offset; } | |
81 | ||
82 | /// The number of null entries in the array. If the null count was not known | |
83 | /// at time of construction (and set to a negative value), then the null | |
84 | /// count will be computed and cached on the first invocation of this | |
85 | /// function | |
86 | int64_t null_count() const; | |
87 | ||
88 | std::shared_ptr<DataType> type() const { return data_->type; } | |
89 | Type::type type_id() const { return data_->type->id(); } | |
90 | ||
91 | /// Buffer for the validity (null) bitmap, if any. Note that Union types | |
92 | /// never have a null bitmap. | |
93 | /// | |
94 | /// Note that for `null_count == 0` or for null type, this will be null. | |
95 | /// This buffer does not account for any slice offset | |
96 | const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; } | |
97 | ||
98 | /// Raw pointer to the null bitmap. | |
99 | /// | |
100 | /// Note that for `null_count == 0` or for null type, this will be null. | |
101 | /// This buffer does not account for any slice offset | |
102 | const uint8_t* null_bitmap_data() const { return null_bitmap_data_; } | |
103 | ||
104 | /// Equality comparison with another array | |
105 | bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const; | |
106 | bool Equals(const std::shared_ptr<Array>& arr, | |
107 | const EqualOptions& = EqualOptions::Defaults()) const; | |
108 | ||
109 | /// \brief Return the formatted unified diff of arrow::Diff between this | |
110 | /// Array and another Array | |
111 | std::string Diff(const Array& other) const; | |
112 | ||
113 | /// Approximate equality comparison with another array | |
114 | /// | |
115 | /// epsilon is only used if this is FloatArray or DoubleArray | |
116 | bool ApproxEquals(const std::shared_ptr<Array>& arr, | |
117 | const EqualOptions& = EqualOptions::Defaults()) const; | |
118 | bool ApproxEquals(const Array& arr, | |
119 | const EqualOptions& = EqualOptions::Defaults()) const; | |
120 | ||
121 | /// Compare if the range of slots specified are equal for the given array and | |
122 | /// this array. end_idx exclusive. This methods does not bounds check. | |
123 | bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx, | |
124 | const Array& other, | |
125 | const EqualOptions& = EqualOptions::Defaults()) const; | |
126 | bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx, | |
127 | const std::shared_ptr<Array>& other, | |
128 | const EqualOptions& = EqualOptions::Defaults()) const; | |
129 | bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx, | |
130 | int64_t other_start_idx, | |
131 | const EqualOptions& = EqualOptions::Defaults()) const; | |
132 | bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx, | |
133 | int64_t end_idx, int64_t other_start_idx, | |
134 | const EqualOptions& = EqualOptions::Defaults()) const; | |
135 | ||
136 | Status Accept(ArrayVisitor* visitor) const; | |
137 | ||
138 | /// Construct a zero-copy view of this array with the given type. | |
139 | /// | |
140 | /// This method checks if the types are layout-compatible. | |
141 | /// Nested types are traversed in depth-first order. Data buffers must have | |
142 | /// the same item sizes, even though the logical types may be different. | |
143 | /// An error is returned if the types are not layout-compatible. | |
144 | Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const; | |
145 | ||
146 | /// Construct a zero-copy slice of the array with the indicated offset and | |
147 | /// length | |
148 | /// | |
149 | /// \param[in] offset the position of the first element in the constructed | |
150 | /// slice | |
151 | /// \param[in] length the length of the slice. If there are not enough | |
152 | /// elements in the array, the length will be adjusted accordingly | |
153 | /// | |
154 | /// \return a new object wrapped in std::shared_ptr<Array> | |
155 | std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const; | |
156 | ||
157 | /// Slice from offset until end of the array | |
158 | std::shared_ptr<Array> Slice(int64_t offset) const; | |
159 | ||
160 | /// Input-checking variant of Array::Slice | |
161 | Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const; | |
162 | /// Input-checking variant of Array::Slice | |
163 | Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const; | |
164 | ||
165 | const std::shared_ptr<ArrayData>& data() const { return data_; } | |
166 | ||
167 | int num_fields() const { return static_cast<int>(data_->child_data.size()); } | |
168 | ||
169 | /// \return PrettyPrint representation of array suitable for debugging | |
170 | std::string ToString() const; | |
171 | ||
172 | /// \brief Perform cheap validation checks to determine obvious inconsistencies | |
173 | /// within the array's internal data. | |
174 | /// | |
175 | /// This is O(k) where k is the number of descendents. | |
176 | /// | |
177 | /// \return Status | |
178 | Status Validate() const; | |
179 | ||
180 | /// \brief Perform extensive validation checks to determine inconsistencies | |
181 | /// within the array's internal data. | |
182 | /// | |
183 | /// This is potentially O(k*n) where k is the number of descendents and n | |
184 | /// is the array length. | |
185 | /// | |
186 | /// \return Status | |
187 | Status ValidateFull() const; | |
188 | ||
189 | protected: | |
190 | Array() : null_bitmap_data_(NULLPTR) {} | |
191 | ||
192 | std::shared_ptr<ArrayData> data_; | |
193 | const uint8_t* null_bitmap_data_; | |
194 | ||
195 | /// Protected method for constructors | |
196 | void SetData(const std::shared_ptr<ArrayData>& data) { | |
197 | if (data->buffers.size() > 0) { | |
198 | null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0); | |
199 | } else { | |
200 | null_bitmap_data_ = NULLPTR; | |
201 | } | |
202 | data_ = data; | |
203 | } | |
204 | ||
205 | private: | |
206 | ARROW_DISALLOW_COPY_AND_ASSIGN(Array); | |
207 | }; | |
208 | ||
209 | static inline std::ostream& operator<<(std::ostream& os, const Array& x) { | |
210 | os << x.ToString(); | |
211 | return os; | |
212 | } | |
213 | ||
214 | /// Base class for non-nested arrays | |
215 | class ARROW_EXPORT FlatArray : public Array { | |
216 | protected: | |
217 | using Array::Array; | |
218 | }; | |
219 | ||
220 | /// Base class for arrays of fixed-size logical types | |
221 | class ARROW_EXPORT PrimitiveArray : public FlatArray { | |
222 | public: | |
223 | PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length, | |
224 | const std::shared_ptr<Buffer>& data, | |
225 | const std::shared_ptr<Buffer>& null_bitmap = NULLPTR, | |
226 | int64_t null_count = kUnknownNullCount, int64_t offset = 0); | |
227 | ||
228 | /// Does not account for any slice offset | |
229 | std::shared_ptr<Buffer> values() const { return data_->buffers[1]; } | |
230 | ||
231 | protected: | |
232 | PrimitiveArray() : raw_values_(NULLPTR) {} | |
233 | ||
234 | void SetData(const std::shared_ptr<ArrayData>& data) { | |
235 | this->Array::SetData(data); | |
236 | raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0); | |
237 | } | |
238 | ||
239 | explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); } | |
240 | ||
241 | const uint8_t* raw_values_; | |
242 | }; | |
243 | ||
244 | /// Degenerate null type Array | |
245 | class ARROW_EXPORT NullArray : public FlatArray { | |
246 | public: | |
247 | using TypeClass = NullType; | |
248 | ||
249 | explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); } | |
250 | explicit NullArray(int64_t length); | |
251 | ||
252 | private: | |
253 | void SetData(const std::shared_ptr<ArrayData>& data) { | |
254 | null_bitmap_data_ = NULLPTR; | |
255 | data->null_count = data->length; | |
256 | data_ = data; | |
257 | } | |
258 | }; | |
259 | ||
260 | } // namespace arrow |