]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/parquet/metadata/statistics_test.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / metadata / statistics_test.go
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16
17 package metadata_test
18
19 import (
20 "math"
21 "reflect"
22 "testing"
23
24 "github.com/apache/arrow/go/v6/arrow/bitutil"
25 "github.com/apache/arrow/go/v6/arrow/memory"
26 "github.com/apache/arrow/go/v6/parquet"
27 "github.com/apache/arrow/go/v6/parquet/metadata"
28 "github.com/apache/arrow/go/v6/parquet/schema"
29 "github.com/stretchr/testify/assert"
30 )
31
32 // NOTE(zeroshade): tests will be added and updated after merging the "file" package
33 // since the tests that I wrote relied on the file writer/reader for ease of use.
34
35 func TestCheckNaNs(t *testing.T) {
36 const (
37 numvals = 8
38 min = -4.0
39 max = 3.0
40 )
41 nan := math.NaN()
42
43 allNans := []float64{nan, nan, nan, nan, nan, nan, nan, nan}
44 allNansf32 := make([]float32, numvals)
45 for idx, v := range allNans {
46 allNansf32[idx] = float32(v)
47 }
48
49 someNans := []float64{nan, max, -3.0, -1.0, nan, 2.0, min, nan}
50 someNansf32 := make([]float32, numvals)
51 for idx, v := range someNans {
52 someNansf32[idx] = float32(v)
53 }
54
55 validBitmap := []byte{0x7F} // 0b01111111
56 validBitmapNoNaNs := []byte{0x6E} // 0b01101110
57
58 assertUnsetMinMax := func(stats metadata.TypedStatistics, values interface{}, bitmap []byte) {
59 if bitmap == nil {
60 switch s := stats.(type) {
61 case *metadata.Float32Statistics:
62 s.Update(values.([]float32), 0)
63 case *metadata.Float64Statistics:
64 s.Update(values.([]float64), 0)
65 }
66 assert.False(t, stats.HasMinMax())
67 } else {
68 nvalues := reflect.ValueOf(values).Len()
69 nullCount := bitutil.CountSetBits(bitmap, 0, nvalues)
70 switch s := stats.(type) {
71 case *metadata.Float32Statistics:
72 s.UpdateSpaced(values.([]float32), bitmap, 0, int64(nullCount))
73 case *metadata.Float64Statistics:
74 s.UpdateSpaced(values.([]float64), bitmap, 0, int64(nullCount))
75 }
76 assert.False(t, stats.HasMinMax())
77 }
78 }
79
80 assertMinMaxAre := func(stats metadata.TypedStatistics, values interface{}, expectedMin, expectedMax interface{}) {
81 switch s := stats.(type) {
82 case *metadata.Float32Statistics:
83 s.Update(values.([]float32), 0)
84 assert.True(t, stats.HasMinMax())
85 assert.Equal(t, expectedMin, s.Min())
86 assert.Equal(t, expectedMax, s.Max())
87 case *metadata.Float64Statistics:
88 s.Update(values.([]float64), 0)
89 assert.True(t, stats.HasMinMax())
90 assert.Equal(t, expectedMin, s.Min())
91 assert.Equal(t, expectedMax, s.Max())
92 }
93 }
94
95 assertMinMaxAreSpaced := func(stats metadata.TypedStatistics, values interface{}, bitmap []byte, expectedMin, expectedMax interface{}) {
96 nvalues := reflect.ValueOf(values).Len()
97 nullCount := bitutil.CountSetBits(bitmap, 0, nvalues)
98 switch s := stats.(type) {
99 case *metadata.Float32Statistics:
100 s.UpdateSpaced(values.([]float32), bitmap, 0, int64(nullCount))
101 assert.True(t, s.HasMinMax())
102 assert.Equal(t, expectedMin, s.Min())
103 assert.Equal(t, expectedMax, s.Max())
104 case *metadata.Float64Statistics:
105 s.UpdateSpaced(values.([]float64), bitmap, 0, int64(nullCount))
106 assert.True(t, s.HasMinMax())
107 assert.Equal(t, expectedMin, s.Min())
108 assert.Equal(t, expectedMax, s.Max())
109 }
110 }
111
112 f32Col := schema.NewColumn(schema.NewFloat32Node("f", parquet.Repetitions.Optional, -1), 1, 1)
113 f64Col := schema.NewColumn(schema.NewFloat64Node("f", parquet.Repetitions.Optional, -1), 1, 1)
114 // test values
115 someNanStats := metadata.NewStatistics(f64Col, memory.DefaultAllocator)
116 someNanStatsf32 := metadata.NewStatistics(f32Col, memory.DefaultAllocator)
117 // ingesting only nans should not yield a min or max
118 assertUnsetMinMax(someNanStats, allNans, nil)
119 assertUnsetMinMax(someNanStatsf32, allNansf32, nil)
120 // ingesting a mix should yield a valid min/max
121 assertMinMaxAre(someNanStats, someNans, min, max)
122 assertMinMaxAre(someNanStatsf32, someNansf32, float32(min), float32(max))
123 // ingesting only nans after a valid min/max should have no effect
124 assertMinMaxAre(someNanStats, allNans, min, max)
125 assertMinMaxAre(someNanStatsf32, allNansf32, float32(min), float32(max))
126
127 someNanStats = metadata.NewStatistics(f64Col, memory.DefaultAllocator)
128 someNanStatsf32 = metadata.NewStatistics(f32Col, memory.DefaultAllocator)
129 assertUnsetMinMax(someNanStats, allNans, validBitmap)
130 assertUnsetMinMax(someNanStatsf32, allNansf32, validBitmap)
131 // nans should not pollute min/max when excluded via null bitmap
132 assertMinMaxAreSpaced(someNanStats, someNans, validBitmapNoNaNs, min, max)
133 assertMinMaxAreSpaced(someNanStatsf32, someNansf32, validBitmapNoNaNs, float32(min), float32(max))
134 // ingesting nans with a null bitmap should not change the result
135 assertMinMaxAreSpaced(someNanStats, someNans, validBitmap, min, max)
136 assertMinMaxAreSpaced(someNanStatsf32, someNansf32, validBitmap, float32(min), float32(max))
137 }
138
139 func TestCheckNegativeZeroStats(t *testing.T) {
140 assertMinMaxZeroesSign := func(stats metadata.TypedStatistics, values interface{}) {
141 switch s := stats.(type) {
142 case *metadata.Float32Statistics:
143 s.Update(values.([]float32), 0)
144 assert.True(t, s.HasMinMax())
145 var zero float32
146 assert.Equal(t, zero, s.Min())
147 assert.True(t, math.Signbit(float64(s.Min())))
148 assert.Equal(t, zero, s.Max())
149 assert.False(t, math.Signbit(float64(s.Max())))
150 case *metadata.Float64Statistics:
151 s.Update(values.([]float64), 0)
152 assert.True(t, s.HasMinMax())
153 var zero float64
154 assert.Equal(t, zero, s.Min())
155 assert.True(t, math.Signbit(s.Min()))
156 assert.Equal(t, zero, s.Max())
157 assert.False(t, math.Signbit(s.Max()))
158 }
159 }
160
161 fcol := schema.NewColumn(schema.NewFloat32Node("f", parquet.Repetitions.Optional, -1), 1, 1)
162 dcol := schema.NewColumn(schema.NewFloat64Node("d", parquet.Repetitions.Optional, -1), 1, 1)
163
164 var f32zero float32
165 var f64zero float64
166 {
167 fstats := metadata.NewStatistics(fcol, memory.DefaultAllocator)
168 dstats := metadata.NewStatistics(dcol, memory.DefaultAllocator)
169 assertMinMaxZeroesSign(fstats, []float32{-f32zero, f32zero})
170 assertMinMaxZeroesSign(dstats, []float64{-f64zero, f64zero})
171 }
172 {
173 fstats := metadata.NewStatistics(fcol, memory.DefaultAllocator)
174 dstats := metadata.NewStatistics(dcol, memory.DefaultAllocator)
175 assertMinMaxZeroesSign(fstats, []float32{f32zero, -f32zero})
176 assertMinMaxZeroesSign(dstats, []float64{f64zero, -f64zero})
177 }
178 {
179 fstats := metadata.NewStatistics(fcol, memory.DefaultAllocator)
180 dstats := metadata.NewStatistics(dcol, memory.DefaultAllocator)
181 assertMinMaxZeroesSign(fstats, []float32{-f32zero, -f32zero})
182 assertMinMaxZeroesSign(dstats, []float64{-f64zero, -f64zero})
183 }
184 {
185 fstats := metadata.NewStatistics(fcol, memory.DefaultAllocator)
186 dstats := metadata.NewStatistics(dcol, memory.DefaultAllocator)
187 assertMinMaxZeroesSign(fstats, []float32{f32zero, f32zero})
188 assertMinMaxZeroesSign(dstats, []float64{f64zero, f64zero})
189 }
190 }