]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/array/array_view_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / array_view_test.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <memory>
19#include <string>
20
21#include <gtest/gtest.h>
22
23#include "arrow/array/array_base.h"
24#include "arrow/array/array_dict.h"
25#include "arrow/array/array_nested.h"
26#include "arrow/array/data.h"
27#include "arrow/extension_type.h"
28#include "arrow/result.h"
29#include "arrow/status.h"
30#include "arrow/testing/gtest_util.h"
31#include "arrow/type.h"
32#include "arrow/util/endian.h"
33#include "arrow/util/logging.h"
34
35namespace arrow {
36
37void CheckView(const std::shared_ptr<Array>& input,
38 const std::shared_ptr<DataType>& view_type,
39 const std::shared_ptr<Array>& expected) {
40 ASSERT_OK_AND_ASSIGN(auto result, input->View(view_type));
41 ASSERT_OK(result->ValidateFull());
42 AssertArraysEqual(*expected, *result);
43}
44
45void CheckView(const std::shared_ptr<Array>& input,
46 const std::shared_ptr<Array>& expected_view) {
47 CheckView(input, expected_view->type(), expected_view);
48}
49
50void CheckViewFails(const std::shared_ptr<Array>& input,
51 const std::shared_ptr<DataType>& view_type) {
52 ASSERT_RAISES(Invalid, input->View(view_type));
53}
54
55class IPv4Type : public ExtensionType {
56 public:
57 IPv4Type() : ExtensionType(fixed_size_binary(4)) {}
58
59 std::string extension_name() const override { return "ipv4"; }
60
61 bool ExtensionEquals(const ExtensionType& other) const override {
62 return other.extension_name() == this->extension_name();
63 }
64
65 std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
66 DCHECK_EQ(data->type->id(), Type::EXTENSION);
67 DCHECK_EQ("ipv4", static_cast<const ExtensionType&>(*data->type).extension_name());
68 return std::make_shared<ExtensionArray>(data);
69 }
70
71 Result<std::shared_ptr<DataType>> Deserialize(
72 std::shared_ptr<DataType> storage_type,
73 const std::string& serialized) const override {
74 return Status::NotImplemented("IPv4Type::Deserialize");
75 }
76
77 std::string Serialize() const override { return ""; }
78};
79
80TEST(TestArrayView, IdentityPrimitive) {
81 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
82 CheckView(arr, arr->type(), arr);
83 arr = ArrayFromJSON(int16(), "[0, -1, 42, null]");
84 CheckView(arr, arr->type(), arr);
85 arr = ArrayFromJSON(boolean(), "[true, false, null]");
86 CheckView(arr, arr->type(), arr);
87}
88
89TEST(TestArrayView, IdentityNullType) {
90 auto arr = ArrayFromJSON(null(), "[null, null, null]");
91 CheckView(arr, arr->type(), arr);
92}
93
94TEST(TestArrayView, PrimitiveAsPrimitive) {
95 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
96 auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42]");
97 CheckView(arr, expected);
98 CheckView(expected, arr);
99
100 arr = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672, null]");
101 expected = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5, null]");
102 CheckView(arr, expected);
103
104 arr = ArrayFromJSON(timestamp(TimeUnit::SECOND),
105 R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])");
106 expected = ArrayFromJSON(int64(), "[0, 951782400, 63730281600, -2203977600]");
107 CheckView(arr, expected);
108 CheckView(expected, arr);
109}
110
111TEST(TestArrayView, PrimitiveAsFixedSizeBinary) {
112#if ARROW_LITTLE_ENDIAN
113 auto arr = ArrayFromJSON(int32(), "[2020568934, 2054316386, null]");
114#else
115 auto arr = ArrayFromJSON(int32(), "[1718579064, 1650553466, null]");
116#endif
117 auto expected = ArrayFromJSON(fixed_size_binary(4), R"(["foox", "barz", null])");
118 CheckView(arr, expected);
119 CheckView(expected, arr);
120}
121
122TEST(TestArrayView, StringAsBinary) {
123 auto arr = ArrayFromJSON(utf8(), R"(["foox", "barz", null])");
124 auto expected = ArrayFromJSON(binary(), R"(["foox", "barz", null])");
125 CheckView(arr, expected);
126 CheckView(expected, arr);
127}
128
129TEST(TestArrayView, PrimitiveWrongSize) {
130 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
131 CheckViewFails(arr, int8());
132 CheckViewFails(arr, fixed_size_binary(3));
133 CheckViewFails(arr, null());
134}
135
136TEST(TestArrayView, StructAsStructSimple) {
137 auto ty1 = struct_({field("a", int8()), field("b", int32())});
138 auto ty2 = struct_({field("c", uint8()), field("d", float32())});
139
140 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
141 auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
142 CheckView(arr, expected);
143 CheckView(expected, arr);
144
145 // With nulls
146 arr = ArrayFromJSON(ty1, "[[0, 0], null, [-1, -1071644672]]");
147 expected = ArrayFromJSON(ty2, "[[0, 0], null, [255, -2.5]]");
148 CheckView(arr, expected);
149 CheckView(expected, arr);
150
151 // With nested nulls
152 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, -1071644672]]");
153 expected = ArrayFromJSON(ty2, "[[0, null], null, [255, -2.5]]");
154 CheckView(arr, expected);
155 CheckView(expected, arr);
156
157 ty2 = struct_({field("c", uint8()), field("d", fixed_size_binary(4))});
158#if ARROW_LITTLE_ENDIAN
159 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 2020568934]]");
160#else
161 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 1718579064]]");
162#endif
163 expected = ArrayFromJSON(ty2, R"([[0, null], null, [255, "foox"]])");
164 CheckView(arr, expected);
165 CheckView(expected, arr);
166}
167
168TEST(TestArrayView, StructAsStructNonNullable) {
169 auto ty1 = struct_({field("a", int8()), field("b", int32())});
170 auto ty2 = struct_({field("c", uint8(), /*nullable=*/false), field("d", float32())});
171
172 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
173 auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
174 CheckView(arr, expected);
175 CheckView(expected, arr);
176
177 // With nested nulls
178 arr = ArrayFromJSON(ty1, "[[0, null], [-1, -1071644672]]");
179 expected = ArrayFromJSON(ty2, "[[0, null], [255, -2.5]]");
180 CheckView(arr, expected);
181 CheckView(expected, arr);
182
183 // Nested null cannot be viewed as non-null field
184 arr = ArrayFromJSON(ty1, "[[0, null], [null, -1071644672]]");
185 CheckViewFails(arr, ty2);
186}
187
188TEST(TestArrayView, StructAsStructWrongLayout) {
189 auto ty1 = struct_({field("a", int8()), field("b", int32())});
190 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
191
192 auto ty2 = struct_({field("c", int16()), field("d", int32())});
193 CheckViewFails(arr, ty2);
194 ty2 = struct_({field("c", int32()), field("d", int8())});
195 CheckViewFails(arr, ty2);
196 ty2 = struct_({field("c", int8())});
197 CheckViewFails(arr, ty2);
198 ty2 = struct_({field("c", fixed_size_binary(5))});
199 CheckViewFails(arr, ty2);
200}
201
202TEST(TestArrayView, StructAsStructWithNullType) {
203 auto ty1 = struct_({field("a", int8()), field("b", null())});
204 auto ty2 = struct_({field("c", uint8()), field("d", null())});
205
206 auto arr = ArrayFromJSON(ty1, "[[0, null], [1, null], [-1, null]]");
207 auto expected = ArrayFromJSON(ty2, "[[0, null], [1, null], [255, null]]");
208 CheckView(arr, expected);
209 CheckView(expected, arr);
210
211 // With nulls and nested nulls
212 arr = ArrayFromJSON(ty1, "[null, [null, null], [-1, null]]");
213 expected = ArrayFromJSON(ty2, "[null, [null, null], [255, null]]");
214 CheckView(arr, expected);
215 CheckView(expected, arr);
216
217 // Moving the null types around
218 ty2 = struct_({field("c", null()), field("d", uint8())});
219 expected = ArrayFromJSON(ty2, "[null, [null, null], [null, 255]]");
220 CheckView(arr, expected);
221 CheckView(expected, arr);
222
223 // Removing the null type
224 ty2 = struct_({field("c", uint8())});
225 expected = ArrayFromJSON(ty2, "[null, [null], [255]]");
226 CheckView(arr, expected);
227 CheckView(expected, arr);
228}
229
230TEST(TestArrayView, StructAsFlat) {
231 auto ty1 = struct_({field("a", int16())});
232 auto arr = ArrayFromJSON(ty1, "[[0], [1], [-1]]");
233 auto expected = ArrayFromJSON(uint16(), "[0, 1, 65535]");
234 CheckView(arr, expected);
235 CheckView(expected, arr);
236
237 // With nulls
238 arr = ArrayFromJSON(ty1, "[[0], null, [-1]]");
239 expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
240 // CheckView(arr, expected); // XXX currently fails
241 CheckView(expected, arr);
242
243 // With nested nulls => fails
244 arr = ArrayFromJSON(ty1, "[[0], [null], [-1]]");
245 CheckViewFails(arr, uint16());
246}
247
248TEST(TestArrayView, StructAsFlatWithNullType) {
249 auto ty1 = struct_({field("a", null()), field("b", int16()), field("c", null())});
250 auto arr = ArrayFromJSON(ty1, "[[null, 0, null], [null, -1, null]]");
251 auto expected = ArrayFromJSON(uint16(), "[0, 65535]");
252 CheckView(arr, expected);
253 CheckView(expected, arr);
254
255 // With nulls
256 arr = ArrayFromJSON(ty1, "[[null, 0, null], null, [null, -1, null]]");
257 expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
258 // CheckView(arr, expected); // XXX currently fails
259 CheckView(expected, arr);
260
261 // With nested nulls => fails
262 arr = ArrayFromJSON(ty1, "[[null, null, null]]");
263 CheckViewFails(arr, uint16());
264}
265
266TEST(TestArrayView, StructAsStructNested) {
267 // Nesting tree shape need not be identical
268 auto ty1 = struct_({field("a", struct_({field("b", int8())})), field("d", int32())});
269 auto ty2 = struct_({field("a", uint8()), field("b", struct_({field("b", float32())}))});
270 auto arr = ArrayFromJSON(ty1, "[[[0], 1069547520], [[-1], -1071644672]]");
271 auto expected = ArrayFromJSON(ty2, "[[0, [1.5]], [255, [-2.5]]]");
272 CheckView(arr, expected);
273 CheckView(expected, arr);
274
275 // With null types
276 ty1 = struct_({field("a", struct_({field("xx", null()), field("b", int8())})),
277 field("d", int32())});
278 ty2 = struct_({field("a", uint8()),
279 field("b", struct_({field("b", float32()), field("xx", null())}))});
280 arr = ArrayFromJSON(ty1, "[[[null, 0], 1069547520], [[null, -1], -1071644672]]");
281 expected = ArrayFromJSON(ty2, "[[0, [1.5, null]], [255, [-2.5, null]]]");
282 CheckView(arr, expected);
283 CheckView(expected, arr);
284
285 // XXX With nulls (currently fails)
286}
287
288TEST(TestArrayView, ListAsListSimple) {
289 auto arr = ArrayFromJSON(list(int16()), "[[0, -1], [], [42]]");
290 auto expected = ArrayFromJSON(list(uint16()), "[[0, 65535], [], [42]]");
291 CheckView(arr, expected);
292 CheckView(expected, arr);
293
294 // With nulls
295 arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [42]]");
296 expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [42]]");
297 CheckView(arr, expected);
298 CheckView(expected, arr);
299
300 // With nested nulls
301 arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [null, 42]]");
302 expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [null, 42]]");
303 CheckView(arr, expected);
304 CheckView(expected, arr);
305}
306
307TEST(TestArrayView, FixedSizeListAsFixedSizeList) {
308 auto ty1 = fixed_size_list(int16(), 3);
309 auto ty2 = fixed_size_list(uint16(), 3);
310 auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
311 auto expected = ArrayFromJSON(ty2, "[[0, 65535, 42], [5, 6, 49152]]");
312 CheckView(arr, expected);
313 CheckView(expected, arr);
314
315 // With nested nulls
316 arr = ArrayFromJSON(ty1, "[[0, -1, null], null, [5, 6, -16384]]");
317 expected = ArrayFromJSON(ty2, "[[0, 65535, null], null, [5, 6, 49152]]");
318 CheckView(arr, expected);
319 CheckView(expected, arr);
320}
321
322TEST(TestArrayView, FixedSizeListAsFlat) {
323 auto ty1 = fixed_size_list(int16(), 3);
324 auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
325 auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42, 5, 6, 49152]");
326 CheckView(arr, expected);
327 // CheckView(expected, arr); // XXX currently fails
328
329 // XXX With nulls (currently fails)
330}
331
332TEST(TestArrayView, FixedSizeListAsFixedSizeBinary) {
333 auto ty1 = fixed_size_list(int32(), 1);
334#if ARROW_LITTLE_ENDIAN
335 auto arr = ArrayFromJSON(ty1, "[[2020568934], [2054316386]]");
336#else
337 auto arr = ArrayFromJSON(ty1, "[[1718579064], [1650553466]]");
338#endif
339 auto expected = ArrayFromJSON(fixed_size_binary(4), R"(["foox", "barz"])");
340 CheckView(arr, expected);
341}
342
343TEST(TestArrayView, SparseUnionAsStruct) {
344 auto child1 = ArrayFromJSON(int16(), "[0, -1, 42]");
345 auto child2 = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672]");
346 auto indices = ArrayFromJSON(int8(), "[0, 0, 1]");
347 ASSERT_OK_AND_ASSIGN(auto arr, SparseUnionArray::Make(*indices, {child1, child2}));
348 ASSERT_OK(arr->ValidateFull());
349
350 auto ty1 = struct_({field("a", int8()), field("b", uint16()), field("c", float32())});
351 auto expected = ArrayFromJSON(ty1, "[[0, 0, 0], [0, 65535, 1.5], [1, 42, -2.5]]");
352 CheckView(arr, expected);
353 CheckView(expected, arr);
354}
355
356TEST(TestArrayView, DecimalRoundTrip) {
357 auto ty1 = decimal(10, 4);
358 auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");
359
360 auto ty2 = fixed_size_binary(16);
361 ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
362 ASSERT_OK(v->ValidateFull());
363 ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
364 ASSERT_OK(w->ValidateFull());
365 AssertArraysEqual(*arr, *w);
366}
367
368TEST(TestArrayView, Dictionaries) {
369 // ARROW-6049
370 auto ty1 = dictionary(int8(), float32());
371 auto ty2 = dictionary(int8(), int32());
372
373 auto indices = ArrayFromJSON(int8(), "[0, 2, null, 1]");
374 auto values = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5]");
375
376 ASSERT_OK_AND_ASSIGN(auto expected_dict, values->View(int32()));
377 ASSERT_OK_AND_ASSIGN(auto arr, DictionaryArray::FromArrays(ty1, indices, values));
378 ASSERT_OK_AND_ASSIGN(auto expected,
379 DictionaryArray::FromArrays(ty2, indices, expected_dict));
380
381 CheckView(arr, expected);
382 CheckView(expected, arr);
383
384 // Incompatible index type
385 auto ty3 = dictionary(int16(), int32());
386 CheckViewFails(arr, ty3);
387
388 // Incompatible dictionary type
389 auto ty4 = dictionary(int16(), float64());
390 CheckViewFails(arr, ty4);
391
392 // Check dictionary-encoded child
393 auto offsets = ArrayFromJSON(int32(), "[0, 2, 2, 4]");
394 ASSERT_OK_AND_ASSIGN(auto list_arr, ListArray::FromArrays(*offsets, *arr));
395 ASSERT_OK_AND_ASSIGN(auto expected_list_arr,
396 ListArray::FromArrays(*offsets, *expected));
397 CheckView(list_arr, expected_list_arr);
398 CheckView(expected_list_arr, list_arr);
399}
400
401TEST(TestArrayView, ExtensionType) {
402 auto ty1 = std::make_shared<IPv4Type>();
403 auto data = ArrayFromJSON(ty1->storage_type(), R"(["ABCD", null])")->data();
404 data->type = ty1;
405 auto arr = ty1->MakeArray(data);
406#if ARROW_LITTLE_ENDIAN
407 auto expected = ArrayFromJSON(uint32(), "[1145258561, null]");
408#else
409 auto expected = ArrayFromJSON(uint32(), "[1094861636, null]");
410#endif
411 CheckView(arr, expected);
412 CheckView(expected, arr);
413}
414
415TEST(TestArrayView, NonZeroOffset) {
416 auto arr = ArrayFromJSON(int16(), "[10, 11, 12, 13]");
417
418 ASSERT_OK_AND_ASSIGN(auto expected, arr->View(fixed_size_binary(2)));
419 CheckView(arr->Slice(1), expected->Slice(1));
420}
421
422TEST(TestArrayView, NonZeroNestedOffset) {
423 auto list_values = ArrayFromJSON(int16(), "[10, 11, 12, 13, 14]");
424 auto view_values = ArrayFromJSON(uint16(), "[10, 11, 12, 13, 14]");
425
426 auto list_offsets = ArrayFromJSON(int32(), "[0, 2, 3]");
427
428 ASSERT_OK_AND_ASSIGN(auto arr,
429 ListArray::FromArrays(*list_offsets, *list_values->Slice(2)));
430 ASSERT_OK_AND_ASSIGN(auto expected,
431 ListArray::FromArrays(*list_offsets, *view_values->Slice(2)));
432 ASSERT_OK(arr->ValidateFull());
433 CheckView(arr->Slice(1), expected->Slice(1));
434
435 // Be extra paranoid about checking offsets
436 ASSERT_OK_AND_ASSIGN(auto result, arr->Slice(1)->View(expected->type()));
437 ASSERT_EQ(1, result->offset());
438 ASSERT_EQ(2, static_cast<const ListArray&>(*result).values()->offset());
439}
440
441} // namespace arrow