]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/arrow/array/concat_test.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / array / concat_test.go
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package array_test
18
19import (
20 "fmt"
21 "math"
22 "sort"
23 "testing"
24
25 "github.com/apache/arrow/go/v6/arrow"
26 "github.com/apache/arrow/go/v6/arrow/array"
27 "github.com/apache/arrow/go/v6/arrow/bitutil"
28 "github.com/apache/arrow/go/v6/arrow/internal/testing/gen"
29 "github.com/apache/arrow/go/v6/arrow/memory"
30 "github.com/stretchr/testify/assert"
31 "github.com/stretchr/testify/suite"
32 "golang.org/x/exp/rand"
33)
34
35func TestConcatenateValueBuffersNull(t *testing.T) {
36 mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
37 defer mem.AssertSize(t, 0)
38
39 inputs := make([]array.Interface, 0)
40
41 bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
42 defer bldr.Release()
43
44 arr := bldr.NewArray()
45 defer arr.Release()
46 inputs = append(inputs, arr)
47
48 bldr.AppendNull()
49 arr = bldr.NewArray()
50 defer arr.Release()
51 inputs = append(inputs, arr)
52
53 actual, err := array.Concatenate(inputs, mem)
54 assert.NoError(t, err)
55 defer actual.Release()
56
57 assert.True(t, array.ArrayEqual(actual, inputs[1]))
58}
59
60func TestConcatenate(t *testing.T) {
61 tests := []struct {
62 dt arrow.DataType
63 }{
64 {arrow.FixedWidthTypes.Boolean},
65 {arrow.PrimitiveTypes.Int8},
66 {arrow.PrimitiveTypes.Uint8},
67 {arrow.PrimitiveTypes.Int16},
68 {arrow.PrimitiveTypes.Uint16},
69 {arrow.PrimitiveTypes.Int32},
70 {arrow.PrimitiveTypes.Uint32},
71 {arrow.PrimitiveTypes.Int64},
72 {arrow.PrimitiveTypes.Uint64},
73 {arrow.PrimitiveTypes.Float32},
74 {arrow.PrimitiveTypes.Float64},
75 {arrow.BinaryTypes.String},
76 {arrow.ListOf(arrow.PrimitiveTypes.Int8)},
77 {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)},
78 {arrow.StructOf()},
79 {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)},
80 }
81
82 for _, tt := range tests {
83 t.Run(tt.dt.Name(), func(t *testing.T) {
84 suite.Run(t, &ConcatTestSuite{
85 seed: 0xdeadbeef,
86 dt: tt.dt,
87 nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0},
88 sizes: []int32{0, 1, 2, 4, 16, 31, 1234},
89 })
90 })
91 }
92}
93
94type ConcatTestSuite struct {
95 suite.Suite
96
97 seed uint64
98 rng gen.RandomArrayGenerator
99 dt arrow.DataType
100
101 nullProbs []float64
102 sizes []int32
103
104 mem *memory.CheckedAllocator
105}
106
107func (cts *ConcatTestSuite) SetupSuite() {
108 cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
109 cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem)
110}
111
112func (cts *ConcatTestSuite) TearDownSuite() {
113 cts.mem.AssertSize(cts.T(), 0)
114}
115
116func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) array.Interface {
117 switch cts.dt.ID() {
118 case arrow.BOOL:
119 return cts.rng.Boolean(size, 0.5, nullprob)
120 case arrow.INT8:
121 return cts.rng.Int8(size, 0, 127, nullprob)
122 case arrow.UINT8:
123 return cts.rng.Uint8(size, 0, 127, nullprob)
124 case arrow.INT16:
125 return cts.rng.Int16(size, 0, 127, nullprob)
126 case arrow.UINT16:
127 return cts.rng.Uint16(size, 0, 127, nullprob)
128 case arrow.INT32:
129 return cts.rng.Int32(size, 0, 127, nullprob)
130 case arrow.UINT32:
131 return cts.rng.Uint32(size, 0, 127, nullprob)
132 case arrow.INT64:
133 return cts.rng.Int64(size, 0, 127, nullprob)
134 case arrow.UINT64:
135 return cts.rng.Uint64(size, 0, 127, nullprob)
136 case arrow.FLOAT32:
137 return cts.rng.Float32(size, 0, 127, nullprob)
138 case arrow.FLOAT64:
139 return cts.rng.Float64(size, 0, 127, nullprob)
140 case arrow.NULL:
141 return array.NewNull(int(size))
142 case arrow.STRING:
143 return cts.rng.String(size, 0, 15, nullprob)
144 case arrow.LIST:
145 valuesSize := size * 4
146 values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8)
147 defer values.Release()
148 offsetsVector := cts.offsets(int32(valuesSize), int32(size))
149 // ensure the first and last offsets encompass the whole values
150 offsetsVector[0] = 0
151 offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
152
153 bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8)
154 defer bldr.Release()
155
156 valid := make([]bool, len(offsetsVector)-1)
157 for i := range valid {
158 valid[i] = true
159 }
160 bldr.AppendValues(offsetsVector, valid)
161 vb := bldr.ValueBuilder().(*array.Int8Builder)
162 for i := 0; i < values.Len(); i++ {
163 if values.IsValid(i) {
164 vb.Append(values.Value(i))
165 } else {
166 vb.AppendNull()
167 }
168 }
169 return bldr.NewArray()
170 case arrow.FIXED_SIZE_LIST:
171 const listsize = 3
172 valuesSize := size * listsize
173 values := cts.rng.Int8(valuesSize, 0, 127, nullprob)
174 defer values.Release()
175
176 data := array.NewData(arrow.FixedSizeListOf(listsize, arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, []*array.Data{values.Data()}, 0, 0)
177 defer data.Release()
178 return array.MakeFromData(data)
179 case arrow.STRUCT:
180 foo := cts.rng.Int8(size, 0, 127, nullprob)
181 defer foo.Release()
182 bar := cts.rng.Float64(size, 0, 127, nullprob)
183 defer bar.Release()
184 baz := cts.rng.Boolean(size, 0.5, nullprob)
185 defer baz.Release()
186
187 data := array.NewData(arrow.StructOf(
188 arrow.Field{Name: "foo", Type: foo.DataType(), Nullable: true},
189 arrow.Field{Name: "bar", Type: bar.DataType(), Nullable: true},
190 arrow.Field{Name: "baz", Type: baz.DataType(), Nullable: true}),
191 int(size), []*memory.Buffer{nil}, []*array.Data{foo.Data(), bar.Data(), baz.Data()}, 0, 0)
192 defer data.Release()
193 return array.NewStructData(data)
194 case arrow.MAP:
195 valuesSize := size * 4
196 keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16)
197 defer keys.Release()
198 values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8)
199 defer values.Release()
200
201 offsetsVector := cts.offsets(int32(valuesSize), int32(size))
202 offsetsVector[0] = 0
203 offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
204
205 bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false)
206 defer bldr.Release()
207
208 kb := bldr.KeyBuilder().(*array.Uint16Builder)
209 vb := bldr.ItemBuilder().(*array.Int8Builder)
210
211 valid := make([]bool, len(offsetsVector)-1)
212 for i := range valid {
213 valid[i] = true
214 }
215 bldr.AppendValues(offsetsVector, valid)
216 for i := 0; i < int(valuesSize); i++ {
217 kb.Append(keys.Value(i))
218 if values.IsValid(i) {
219 vb.Append(values.Value(i))
220 } else {
221 vb.AppendNull()
222 }
223 }
224 return bldr.NewArray()
225 default:
226 return nil
227 }
228}
229
230func (cts *ConcatTestSuite) slices(arr array.Interface, offsets []int32) []array.Interface {
231 slices := make([]array.Interface, len(offsets)-1)
232 for i := 0; i != len(slices); i++ {
233 slices[i] = array.NewSlice(arr, int64(offsets[i]), int64(offsets[i+1]))
234 }
235 return slices
236}
237
238func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, length int64) {
239 if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 {
240 lastByte := bitmap.Bytes()[length/8]
241 cts.Equal(lastByte&preceding, lastByte, length, preceding)
242 }
243}
244
245func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 {
246 offsets := make([]int32, slicecount+1)
247 dist := rand.New(rand.NewSource(cts.seed))
248 for i := range offsets {
249 offsets[i] = dist.Int31n(length + 1)
250 }
251 sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] })
252 return offsets
253}
254
255func (cts *ConcatTestSuite) TestCheckConcat() {
256 for _, sz := range cts.sizes {
257 cts.Run(fmt.Sprintf("size %d", sz), func() {
258 offsets := cts.offsets(sz, 3)
259 for _, np := range cts.nullProbs {
260 cts.Run(fmt.Sprintf("nullprob %0.2f", np), func() {
261 scopedMem := memory.NewCheckedAllocatorScope(cts.mem)
262 defer scopedMem.CheckSize(cts.T())
263
264 arr := cts.generateArr(int64(sz), np)
265 defer arr.Release()
266 expected := array.NewSlice(arr, int64(offsets[0]), int64(offsets[len(offsets)-1]))
267 defer expected.Release()
268
269 slices := cts.slices(arr, offsets)
270 for _, s := range slices {
271 defer s.Release()
272 }
273
274 actual, err := array.Concatenate(slices, cts.mem)
275 cts.NoError(err)
276 defer actual.Release()
277
278 cts.Truef(array.ArrayEqual(expected, actual), "expected: %s\ngot: %s\n", expected, actual)
279 if len(actual.Data().Buffers()) > 0 {
280 if actual.Data().Buffers()[0] != nil {
281 cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len()))
282 }
283 if actual.DataType().ID() == arrow.BOOL {
284 cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len()))
285 }
286 }
287 })
288 }
289 })
290 }
291}
292
293func TestOffsetOverflow(t *testing.T) {
294 fakeOffsets := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, math.MaxInt32}))
295 fakeArr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, []*memory.Buffer{nil, fakeOffsets, memory.NewBufferBytes([]byte{})}, nil, 0, 0))
296 var err error
297 assert.NotPanics(t, func() {
298 _, err = array.Concatenate([]array.Interface{fakeArr, fakeArr}, memory.DefaultAllocator)
299 })
300 assert.EqualError(t, err, "offset overflow while concatenating arrays")
301}