]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, software | |
12 | // distributed under the License is distributed on an "AS IS" BASIS, | |
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | // See the License for the specific language governing permissions and | |
15 | // limitations under the License. | |
16 | ||
17 | package array_test | |
18 | ||
19 | import ( | |
20 | "fmt" | |
21 | "math" | |
22 | "sort" | |
23 | "testing" | |
24 | ||
25 | "github.com/apache/arrow/go/v6/arrow" | |
26 | "github.com/apache/arrow/go/v6/arrow/array" | |
27 | "github.com/apache/arrow/go/v6/arrow/bitutil" | |
28 | "github.com/apache/arrow/go/v6/arrow/internal/testing/gen" | |
29 | "github.com/apache/arrow/go/v6/arrow/memory" | |
30 | "github.com/stretchr/testify/assert" | |
31 | "github.com/stretchr/testify/suite" | |
32 | "golang.org/x/exp/rand" | |
33 | ) | |
34 | ||
35 | func TestConcatenateValueBuffersNull(t *testing.T) { | |
36 | mem := memory.NewCheckedAllocator(memory.DefaultAllocator) | |
37 | defer mem.AssertSize(t, 0) | |
38 | ||
39 | inputs := make([]array.Interface, 0) | |
40 | ||
41 | bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) | |
42 | defer bldr.Release() | |
43 | ||
44 | arr := bldr.NewArray() | |
45 | defer arr.Release() | |
46 | inputs = append(inputs, arr) | |
47 | ||
48 | bldr.AppendNull() | |
49 | arr = bldr.NewArray() | |
50 | defer arr.Release() | |
51 | inputs = append(inputs, arr) | |
52 | ||
53 | actual, err := array.Concatenate(inputs, mem) | |
54 | assert.NoError(t, err) | |
55 | defer actual.Release() | |
56 | ||
57 | assert.True(t, array.ArrayEqual(actual, inputs[1])) | |
58 | } | |
59 | ||
60 | func TestConcatenate(t *testing.T) { | |
61 | tests := []struct { | |
62 | dt arrow.DataType | |
63 | }{ | |
64 | {arrow.FixedWidthTypes.Boolean}, | |
65 | {arrow.PrimitiveTypes.Int8}, | |
66 | {arrow.PrimitiveTypes.Uint8}, | |
67 | {arrow.PrimitiveTypes.Int16}, | |
68 | {arrow.PrimitiveTypes.Uint16}, | |
69 | {arrow.PrimitiveTypes.Int32}, | |
70 | {arrow.PrimitiveTypes.Uint32}, | |
71 | {arrow.PrimitiveTypes.Int64}, | |
72 | {arrow.PrimitiveTypes.Uint64}, | |
73 | {arrow.PrimitiveTypes.Float32}, | |
74 | {arrow.PrimitiveTypes.Float64}, | |
75 | {arrow.BinaryTypes.String}, | |
76 | {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, | |
77 | {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, | |
78 | {arrow.StructOf()}, | |
79 | {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, | |
80 | } | |
81 | ||
82 | for _, tt := range tests { | |
83 | t.Run(tt.dt.Name(), func(t *testing.T) { | |
84 | suite.Run(t, &ConcatTestSuite{ | |
85 | seed: 0xdeadbeef, | |
86 | dt: tt.dt, | |
87 | nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0}, | |
88 | sizes: []int32{0, 1, 2, 4, 16, 31, 1234}, | |
89 | }) | |
90 | }) | |
91 | } | |
92 | } | |
93 | ||
94 | type ConcatTestSuite struct { | |
95 | suite.Suite | |
96 | ||
97 | seed uint64 | |
98 | rng gen.RandomArrayGenerator | |
99 | dt arrow.DataType | |
100 | ||
101 | nullProbs []float64 | |
102 | sizes []int32 | |
103 | ||
104 | mem *memory.CheckedAllocator | |
105 | } | |
106 | ||
107 | func (cts *ConcatTestSuite) SetupSuite() { | |
108 | cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) | |
109 | cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem) | |
110 | } | |
111 | ||
112 | func (cts *ConcatTestSuite) TearDownSuite() { | |
113 | cts.mem.AssertSize(cts.T(), 0) | |
114 | } | |
115 | ||
116 | func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) array.Interface { | |
117 | switch cts.dt.ID() { | |
118 | case arrow.BOOL: | |
119 | return cts.rng.Boolean(size, 0.5, nullprob) | |
120 | case arrow.INT8: | |
121 | return cts.rng.Int8(size, 0, 127, nullprob) | |
122 | case arrow.UINT8: | |
123 | return cts.rng.Uint8(size, 0, 127, nullprob) | |
124 | case arrow.INT16: | |
125 | return cts.rng.Int16(size, 0, 127, nullprob) | |
126 | case arrow.UINT16: | |
127 | return cts.rng.Uint16(size, 0, 127, nullprob) | |
128 | case arrow.INT32: | |
129 | return cts.rng.Int32(size, 0, 127, nullprob) | |
130 | case arrow.UINT32: | |
131 | return cts.rng.Uint32(size, 0, 127, nullprob) | |
132 | case arrow.INT64: | |
133 | return cts.rng.Int64(size, 0, 127, nullprob) | |
134 | case arrow.UINT64: | |
135 | return cts.rng.Uint64(size, 0, 127, nullprob) | |
136 | case arrow.FLOAT32: | |
137 | return cts.rng.Float32(size, 0, 127, nullprob) | |
138 | case arrow.FLOAT64: | |
139 | return cts.rng.Float64(size, 0, 127, nullprob) | |
140 | case arrow.NULL: | |
141 | return array.NewNull(int(size)) | |
142 | case arrow.STRING: | |
143 | return cts.rng.String(size, 0, 15, nullprob) | |
144 | case arrow.LIST: | |
145 | valuesSize := size * 4 | |
146 | values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) | |
147 | defer values.Release() | |
148 | offsetsVector := cts.offsets(int32(valuesSize), int32(size)) | |
149 | // ensure the first and last offsets encompass the whole values | |
150 | offsetsVector[0] = 0 | |
151 | offsetsVector[len(offsetsVector)-1] = int32(valuesSize) | |
152 | ||
153 | bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) | |
154 | defer bldr.Release() | |
155 | ||
156 | valid := make([]bool, len(offsetsVector)-1) | |
157 | for i := range valid { | |
158 | valid[i] = true | |
159 | } | |
160 | bldr.AppendValues(offsetsVector, valid) | |
161 | vb := bldr.ValueBuilder().(*array.Int8Builder) | |
162 | for i := 0; i < values.Len(); i++ { | |
163 | if values.IsValid(i) { | |
164 | vb.Append(values.Value(i)) | |
165 | } else { | |
166 | vb.AppendNull() | |
167 | } | |
168 | } | |
169 | return bldr.NewArray() | |
170 | case arrow.FIXED_SIZE_LIST: | |
171 | const listsize = 3 | |
172 | valuesSize := size * listsize | |
173 | values := cts.rng.Int8(valuesSize, 0, 127, nullprob) | |
174 | defer values.Release() | |
175 | ||
176 | data := array.NewData(arrow.FixedSizeListOf(listsize, arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, []*array.Data{values.Data()}, 0, 0) | |
177 | defer data.Release() | |
178 | return array.MakeFromData(data) | |
179 | case arrow.STRUCT: | |
180 | foo := cts.rng.Int8(size, 0, 127, nullprob) | |
181 | defer foo.Release() | |
182 | bar := cts.rng.Float64(size, 0, 127, nullprob) | |
183 | defer bar.Release() | |
184 | baz := cts.rng.Boolean(size, 0.5, nullprob) | |
185 | defer baz.Release() | |
186 | ||
187 | data := array.NewData(arrow.StructOf( | |
188 | arrow.Field{Name: "foo", Type: foo.DataType(), Nullable: true}, | |
189 | arrow.Field{Name: "bar", Type: bar.DataType(), Nullable: true}, | |
190 | arrow.Field{Name: "baz", Type: baz.DataType(), Nullable: true}), | |
191 | int(size), []*memory.Buffer{nil}, []*array.Data{foo.Data(), bar.Data(), baz.Data()}, 0, 0) | |
192 | defer data.Release() | |
193 | return array.NewStructData(data) | |
194 | case arrow.MAP: | |
195 | valuesSize := size * 4 | |
196 | keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16) | |
197 | defer keys.Release() | |
198 | values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) | |
199 | defer values.Release() | |
200 | ||
201 | offsetsVector := cts.offsets(int32(valuesSize), int32(size)) | |
202 | offsetsVector[0] = 0 | |
203 | offsetsVector[len(offsetsVector)-1] = int32(valuesSize) | |
204 | ||
205 | bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false) | |
206 | defer bldr.Release() | |
207 | ||
208 | kb := bldr.KeyBuilder().(*array.Uint16Builder) | |
209 | vb := bldr.ItemBuilder().(*array.Int8Builder) | |
210 | ||
211 | valid := make([]bool, len(offsetsVector)-1) | |
212 | for i := range valid { | |
213 | valid[i] = true | |
214 | } | |
215 | bldr.AppendValues(offsetsVector, valid) | |
216 | for i := 0; i < int(valuesSize); i++ { | |
217 | kb.Append(keys.Value(i)) | |
218 | if values.IsValid(i) { | |
219 | vb.Append(values.Value(i)) | |
220 | } else { | |
221 | vb.AppendNull() | |
222 | } | |
223 | } | |
224 | return bldr.NewArray() | |
225 | default: | |
226 | return nil | |
227 | } | |
228 | } | |
229 | ||
230 | func (cts *ConcatTestSuite) slices(arr array.Interface, offsets []int32) []array.Interface { | |
231 | slices := make([]array.Interface, len(offsets)-1) | |
232 | for i := 0; i != len(slices); i++ { | |
233 | slices[i] = array.NewSlice(arr, int64(offsets[i]), int64(offsets[i+1])) | |
234 | } | |
235 | return slices | |
236 | } | |
237 | ||
238 | func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, length int64) { | |
239 | if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 { | |
240 | lastByte := bitmap.Bytes()[length/8] | |
241 | cts.Equal(lastByte&preceding, lastByte, length, preceding) | |
242 | } | |
243 | } | |
244 | ||
245 | func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { | |
246 | offsets := make([]int32, slicecount+1) | |
247 | dist := rand.New(rand.NewSource(cts.seed)) | |
248 | for i := range offsets { | |
249 | offsets[i] = dist.Int31n(length + 1) | |
250 | } | |
251 | sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) | |
252 | return offsets | |
253 | } | |
254 | ||
255 | func (cts *ConcatTestSuite) TestCheckConcat() { | |
256 | for _, sz := range cts.sizes { | |
257 | cts.Run(fmt.Sprintf("size %d", sz), func() { | |
258 | offsets := cts.offsets(sz, 3) | |
259 | for _, np := range cts.nullProbs { | |
260 | cts.Run(fmt.Sprintf("nullprob %0.2f", np), func() { | |
261 | scopedMem := memory.NewCheckedAllocatorScope(cts.mem) | |
262 | defer scopedMem.CheckSize(cts.T()) | |
263 | ||
264 | arr := cts.generateArr(int64(sz), np) | |
265 | defer arr.Release() | |
266 | expected := array.NewSlice(arr, int64(offsets[0]), int64(offsets[len(offsets)-1])) | |
267 | defer expected.Release() | |
268 | ||
269 | slices := cts.slices(arr, offsets) | |
270 | for _, s := range slices { | |
271 | defer s.Release() | |
272 | } | |
273 | ||
274 | actual, err := array.Concatenate(slices, cts.mem) | |
275 | cts.NoError(err) | |
276 | defer actual.Release() | |
277 | ||
278 | cts.Truef(array.ArrayEqual(expected, actual), "expected: %s\ngot: %s\n", expected, actual) | |
279 | if len(actual.Data().Buffers()) > 0 { | |
280 | if actual.Data().Buffers()[0] != nil { | |
281 | cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len())) | |
282 | } | |
283 | if actual.DataType().ID() == arrow.BOOL { | |
284 | cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len())) | |
285 | } | |
286 | } | |
287 | }) | |
288 | } | |
289 | }) | |
290 | } | |
291 | } | |
292 | ||
293 | func TestOffsetOverflow(t *testing.T) { | |
294 | fakeOffsets := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, math.MaxInt32})) | |
295 | fakeArr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, []*memory.Buffer{nil, fakeOffsets, memory.NewBufferBytes([]byte{})}, nil, 0, 0)) | |
296 | var err error | |
297 | assert.NotPanics(t, func() { | |
298 | _, err = array.Concatenate([]array.Interface{fakeArr, fakeArr}, memory.DefaultAllocator) | |
299 | }) | |
300 | assert.EqualError(t, err, "offset overflow while concatenating arrays") | |
301 | } |