]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/parquet/metadata/statistics_types.gen.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / metadata / statistics_types.gen.go
CommitLineData
1d09f67e
TL
1// Code generated by statistics_types.gen.go.tmpl. DO NOT EDIT.
2
3// Licensed to the Apache Software Foundation (ASF) under one
4// or more contributor license agreements. See the NOTICE file
5// distributed with this work for additional information
6// regarding copyright ownership. The ASF licenses this file
7// to you under the Apache License, Version 2.0 (the
8// "License"); you may not use this file except in compliance
9// with the License. You may obtain a copy of the License at
10//
11// http://www.apache.org/licenses/LICENSE-2.0
12//
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18
19package metadata
20
21import (
22 "math"
23
24 "github.com/apache/arrow/go/v6/arrow"
25 "github.com/apache/arrow/go/v6/arrow/memory"
26 "github.com/apache/arrow/go/v6/parquet"
27 "github.com/apache/arrow/go/v6/parquet/internal/encoding"
28 "github.com/apache/arrow/go/v6/parquet/internal/utils"
29 "github.com/apache/arrow/go/v6/parquet/schema"
30 "golang.org/x/xerrors"
31)
32
33type minmaxPairInt32 [2]int32
34
35// Int32Statistics is the typed interface for managing stats for a column
36// of Int32 type.
37type Int32Statistics struct {
38 statistics
39 min int32
40 max int32
41
42 bitSetReader utils.SetBitRunReader
43}
44
45// NewInt32Statistics constructs an appropriate stat object type using the
46// given column descriptor and allocator.
47//
48// Panics if the physical type of descr is not parquet.Type.Int32
49func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics {
50 if descr.PhysicalType() != parquet.Types.Int32 {
51 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int32 stat object", descr.PhysicalType()))
52 }
53
54 return &Int32Statistics{
55 statistics: statistics{
56 descr: descr,
57 hasNullCount: true,
58 hasDistinctCount: true,
59 order: descr.SortOrder(),
60 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
61 mem: mem,
62 },
63 }
64}
65
66// NewInt32StatisticsFromEncoded will construct a propertly typed statistics object
67// initializing it with the provided information.
68func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics {
69 ret := NewInt32Statistics(descr, mem)
70 ret.nvalues += nvalues
71 if encoded.IsSetNullCount() {
72 ret.incNulls(encoded.GetNullCount())
73 }
74 if encoded.IsSetDistinctCount() {
75 ret.incDistinct(encoded.GetDistinctCount())
76 }
77
78 encodedMin := encoded.GetMin()
79 if encodedMin != nil && len(encodedMin) > 0 {
80 ret.min = ret.plainDecode(encodedMin)
81 }
82 encodedMax := encoded.GetMax()
83 if encodedMax != nil && len(encodedMax) > 0 {
84 ret.max = ret.plainDecode(encodedMax)
85 }
86 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
87 return ret
88}
89
90func (s *Int32Statistics) plainEncode(src int32) []byte {
91 s.encoder.(encoding.Int32Encoder).Put([]int32{src})
92 buf, err := s.encoder.FlushValues()
93 if err != nil {
94 panic(err) // recovered by Encode
95 }
96 defer buf.Release()
97
98 out := make([]byte, buf.Len())
99 copy(out, buf.Bytes())
100 return out
101}
102
103func (s *Int32Statistics) plainDecode(src []byte) int32 {
104 var buf [1]int32
105
106 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
107 decoder.SetData(1, src)
108 decoder.(encoding.Int32Decoder).Decode(buf[:])
109 return buf[0]
110}
111
112func (s *Int32Statistics) minval(a, b int32) int32 {
113 if s.less(a, b) {
114 return a
115 }
116 return b
117}
118
119func (s *Int32Statistics) maxval(a, b int32) int32 {
120 if s.less(a, b) {
121 return b
122 }
123 return a
124}
125
126// MinMaxEqual returns true if both stat objects have the same Min and Max values
127func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool {
128 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
129}
130
131// Equals returns true only if both objects are the same type, have the same min and
132// max values, null count, distinct count and number of values.
133func (s *Int32Statistics) Equals(other TypedStatistics) bool {
134 if s.Type() != other.Type() {
135 return false
136 }
137 rhs, ok := other.(*Int32Statistics)
138 if !ok {
139 return false
140 }
141
142 if s.HasMinMax() != rhs.HasMinMax() {
143 return false
144 }
145 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
146 s.NullCount() == rhs.NullCount() &&
147 s.DistinctCount() == rhs.DistinctCount() &&
148 s.NumValues() == rhs.NumValues()
149}
150
151func (s *Int32Statistics) getMinMax(values []int32) (min, max int32) {
152 if s.order == schema.SortSIGNED {
153 min, max = utils.GetMinMaxInt32(values)
154 } else {
155 umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values)))
156 min, max = int32(umin), int32(umax)
157 }
158 return
159}
160
161func (s *Int32Statistics) getMinMaxSpaced(values []int32, validBits []byte, validBitsOffset int64) (min, max int32) {
162 min = s.defaultMin()
163 max = s.defaultMax()
164 var fn func([]int32) (int32, int32)
165 if s.order == schema.SortSIGNED {
166 fn = utils.GetMinMaxInt32
167 } else {
168 fn = func(v []int32) (int32, int32) {
169 umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values)))
170 return int32(umin), int32(umax)
171 }
172 }
173
174 if s.bitSetReader == nil {
175 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
176 } else {
177 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
178 }
179
180 for {
181 run := s.bitSetReader.NextRun()
182 if run.Length == 0 {
183 break
184 }
185 localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)])
186 if min > localMin {
187 min = localMin
188 }
189 if max < localMax {
190 max = localMax
191 }
192 }
193 return
194}
195
196func (s *Int32Statistics) Min() int32 { return s.min }
197func (s *Int32Statistics) Max() int32 { return s.max }
198
199// Merge merges the stats from other into this stat object, updating
200// the null count, distinct count, number of values and the min/max if
201// appropriate.
202func (s *Int32Statistics) Merge(other TypedStatistics) {
203 rhs, ok := other.(*Int32Statistics)
204 if !ok {
205 panic("incompatible stat type merge")
206 }
207
208 s.statistics.merge(rhs)
209 if rhs.HasMinMax() {
210 s.SetMinMax(rhs.Min(), rhs.Max())
211 }
212}
213
214// Update is used to add more values to the current stat object, finding the
215// min and max values etc.
216func (s *Int32Statistics) Update(values []int32, numNull int64) {
217 s.incNulls(numNull)
218 s.nvalues += int64(len(values))
219
220 if len(values) == 0 {
221 return
222 }
223
224 s.SetMinMax(s.getMinMax(values))
225}
226
227// UpdateSpaced is just like Update, but for spaced values using validBits to determine
228// and skip null values.
229func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64) {
230 s.incNulls(numNull)
231 notnull := int64(len(values)) - numNull
232 s.nvalues += notnull
233
234 if notnull == 0 {
235 return
236 }
237
238 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
239}
240
241// SetMinMax updates the min and max values only if they are not currently set
242// or if argMin is less than the current min / argMax is greater than the current max
243func (s *Int32Statistics) SetMinMax(argMin, argMax int32) {
244 maybeMinMax := s.cleanStat([2]int32{argMin, argMax})
245 if maybeMinMax == nil {
246 return
247 }
248
249 min := (*maybeMinMax)[0]
250 max := (*maybeMinMax)[1]
251
252 if !s.hasMinMax {
253 s.hasMinMax = true
254 s.min = min
255 s.max = max
256 } else {
257 if !s.less(s.min, min) {
258 s.min = min
259 }
260 if s.less(s.max, max) {
261 s.max = max
262 }
263 }
264}
265
266// EncodeMin returns the encoded min value with plain encoding.
267//
268// ByteArray stats do not include the length in the encoding.
269func (s *Int32Statistics) EncodeMin() []byte {
270 if s.HasMinMax() {
271 return s.plainEncode(s.min)
272 }
273 return nil
274}
275
276// EncodeMax returns the current encoded max value with plain encoding
277//
278// ByteArray stats do not include the length in the encoding
279func (s *Int32Statistics) EncodeMax() []byte {
280 if s.HasMinMax() {
281 return s.plainEncode(s.max)
282 }
283 return nil
284}
285
286// Encode returns a populated EncodedStatistics object
287func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error) {
288 defer func() {
289 if r := recover(); r != nil {
290 switch r := r.(type) {
291 case error:
292 err = r
293 case string:
294 err = xerrors.New(r)
295 default:
296 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
297 }
298 }
299 }()
300 if s.HasMinMax() {
301 enc.SetMax(s.EncodeMax())
302 enc.SetMin(s.EncodeMin())
303 }
304 if s.HasNullCount() {
305 enc.SetNullCount(s.NullCount())
306 }
307 if s.HasDistinctCount() {
308 enc.SetDistinctCount(s.DistinctCount())
309 }
310 return
311}
312
313type minmaxPairInt64 [2]int64
314
315// Int64Statistics is the typed interface for managing stats for a column
316// of Int64 type.
317type Int64Statistics struct {
318 statistics
319 min int64
320 max int64
321
322 bitSetReader utils.SetBitRunReader
323}
324
325// NewInt64Statistics constructs an appropriate stat object type using the
326// given column descriptor and allocator.
327//
328// Panics if the physical type of descr is not parquet.Type.Int64
329func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics {
330 if descr.PhysicalType() != parquet.Types.Int64 {
331 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int64 stat object", descr.PhysicalType()))
332 }
333
334 return &Int64Statistics{
335 statistics: statistics{
336 descr: descr,
337 hasNullCount: true,
338 hasDistinctCount: true,
339 order: descr.SortOrder(),
340 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
341 mem: mem,
342 },
343 }
344}
345
346// NewInt64StatisticsFromEncoded will construct a propertly typed statistics object
347// initializing it with the provided information.
348func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics {
349 ret := NewInt64Statistics(descr, mem)
350 ret.nvalues += nvalues
351 if encoded.IsSetNullCount() {
352 ret.incNulls(encoded.GetNullCount())
353 }
354 if encoded.IsSetDistinctCount() {
355 ret.incDistinct(encoded.GetDistinctCount())
356 }
357
358 encodedMin := encoded.GetMin()
359 if encodedMin != nil && len(encodedMin) > 0 {
360 ret.min = ret.plainDecode(encodedMin)
361 }
362 encodedMax := encoded.GetMax()
363 if encodedMax != nil && len(encodedMax) > 0 {
364 ret.max = ret.plainDecode(encodedMax)
365 }
366 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
367 return ret
368}
369
370func (s *Int64Statistics) plainEncode(src int64) []byte {
371 s.encoder.(encoding.Int64Encoder).Put([]int64{src})
372 buf, err := s.encoder.FlushValues()
373 if err != nil {
374 panic(err) // recovered by Encode
375 }
376 defer buf.Release()
377
378 out := make([]byte, buf.Len())
379 copy(out, buf.Bytes())
380 return out
381}
382
383func (s *Int64Statistics) plainDecode(src []byte) int64 {
384 var buf [1]int64
385
386 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
387 decoder.SetData(1, src)
388 decoder.(encoding.Int64Decoder).Decode(buf[:])
389 return buf[0]
390}
391
392func (s *Int64Statistics) minval(a, b int64) int64 {
393 if s.less(a, b) {
394 return a
395 }
396 return b
397}
398
399func (s *Int64Statistics) maxval(a, b int64) int64 {
400 if s.less(a, b) {
401 return b
402 }
403 return a
404}
405
406// MinMaxEqual returns true if both stat objects have the same Min and Max values
407func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool {
408 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
409}
410
411// Equals returns true only if both objects are the same type, have the same min and
412// max values, null count, distinct count and number of values.
413func (s *Int64Statistics) Equals(other TypedStatistics) bool {
414 if s.Type() != other.Type() {
415 return false
416 }
417 rhs, ok := other.(*Int64Statistics)
418 if !ok {
419 return false
420 }
421
422 if s.HasMinMax() != rhs.HasMinMax() {
423 return false
424 }
425 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
426 s.NullCount() == rhs.NullCount() &&
427 s.DistinctCount() == rhs.DistinctCount() &&
428 s.NumValues() == rhs.NumValues()
429}
430
431func (s *Int64Statistics) getMinMax(values []int64) (min, max int64) {
432 if s.order == schema.SortSIGNED {
433 min, max = utils.GetMinMaxInt64(values)
434 } else {
435 umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values)))
436 min, max = int64(umin), int64(umax)
437 }
438 return
439}
440
441func (s *Int64Statistics) getMinMaxSpaced(values []int64, validBits []byte, validBitsOffset int64) (min, max int64) {
442 min = s.defaultMin()
443 max = s.defaultMax()
444 var fn func([]int64) (int64, int64)
445 if s.order == schema.SortSIGNED {
446 fn = utils.GetMinMaxInt64
447 } else {
448 fn = func(v []int64) (int64, int64) {
449 umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values)))
450 return int64(umin), int64(umax)
451 }
452 }
453
454 if s.bitSetReader == nil {
455 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
456 } else {
457 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
458 }
459
460 for {
461 run := s.bitSetReader.NextRun()
462 if run.Length == 0 {
463 break
464 }
465 localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)])
466 if min > localMin {
467 min = localMin
468 }
469 if max < localMax {
470 max = localMax
471 }
472 }
473 return
474}
475
476func (s *Int64Statistics) Min() int64 { return s.min }
477func (s *Int64Statistics) Max() int64 { return s.max }
478
479// Merge merges the stats from other into this stat object, updating
480// the null count, distinct count, number of values and the min/max if
481// appropriate.
482func (s *Int64Statistics) Merge(other TypedStatistics) {
483 rhs, ok := other.(*Int64Statistics)
484 if !ok {
485 panic("incompatible stat type merge")
486 }
487
488 s.statistics.merge(rhs)
489 if rhs.HasMinMax() {
490 s.SetMinMax(rhs.Min(), rhs.Max())
491 }
492}
493
494// Update is used to add more values to the current stat object, finding the
495// min and max values etc.
496func (s *Int64Statistics) Update(values []int64, numNull int64) {
497 s.incNulls(numNull)
498 s.nvalues += int64(len(values))
499
500 if len(values) == 0 {
501 return
502 }
503
504 s.SetMinMax(s.getMinMax(values))
505}
506
507// UpdateSpaced is just like Update, but for spaced values using validBits to determine
508// and skip null values.
509func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64) {
510 s.incNulls(numNull)
511 notnull := int64(len(values)) - numNull
512 s.nvalues += notnull
513
514 if notnull == 0 {
515 return
516 }
517
518 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
519}
520
521// SetMinMax updates the min and max values only if they are not currently set
522// or if argMin is less than the current min / argMax is greater than the current max
523func (s *Int64Statistics) SetMinMax(argMin, argMax int64) {
524 maybeMinMax := s.cleanStat([2]int64{argMin, argMax})
525 if maybeMinMax == nil {
526 return
527 }
528
529 min := (*maybeMinMax)[0]
530 max := (*maybeMinMax)[1]
531
532 if !s.hasMinMax {
533 s.hasMinMax = true
534 s.min = min
535 s.max = max
536 } else {
537 if !s.less(s.min, min) {
538 s.min = min
539 }
540 if s.less(s.max, max) {
541 s.max = max
542 }
543 }
544}
545
546// EncodeMin returns the encoded min value with plain encoding.
547//
548// ByteArray stats do not include the length in the encoding.
549func (s *Int64Statistics) EncodeMin() []byte {
550 if s.HasMinMax() {
551 return s.plainEncode(s.min)
552 }
553 return nil
554}
555
556// EncodeMax returns the current encoded max value with plain encoding
557//
558// ByteArray stats do not include the length in the encoding
559func (s *Int64Statistics) EncodeMax() []byte {
560 if s.HasMinMax() {
561 return s.plainEncode(s.max)
562 }
563 return nil
564}
565
566// Encode returns a populated EncodedStatistics object
567func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error) {
568 defer func() {
569 if r := recover(); r != nil {
570 switch r := r.(type) {
571 case error:
572 err = r
573 case string:
574 err = xerrors.New(r)
575 default:
576 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
577 }
578 }
579 }()
580 if s.HasMinMax() {
581 enc.SetMax(s.EncodeMax())
582 enc.SetMin(s.EncodeMin())
583 }
584 if s.HasNullCount() {
585 enc.SetNullCount(s.NullCount())
586 }
587 if s.HasDistinctCount() {
588 enc.SetDistinctCount(s.DistinctCount())
589 }
590 return
591}
592
593type minmaxPairInt96 [2]parquet.Int96
594
595// Int96Statistics is the typed interface for managing stats for a column
596// of Int96 type.
597type Int96Statistics struct {
598 statistics
599 min parquet.Int96
600 max parquet.Int96
601
602 bitSetReader utils.SetBitRunReader
603}
604
605// NewInt96Statistics constructs an appropriate stat object type using the
606// given column descriptor and allocator.
607//
608// Panics if the physical type of descr is not parquet.Type.Int96
609func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics {
610 if descr.PhysicalType() != parquet.Types.Int96 {
611 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int96 stat object", descr.PhysicalType()))
612 }
613
614 return &Int96Statistics{
615 statistics: statistics{
616 descr: descr,
617 hasNullCount: true,
618 hasDistinctCount: true,
619 order: descr.SortOrder(),
620 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
621 mem: mem,
622 },
623 }
624}
625
626// NewInt96StatisticsFromEncoded will construct a propertly typed statistics object
627// initializing it with the provided information.
628func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics {
629 ret := NewInt96Statistics(descr, mem)
630 ret.nvalues += nvalues
631 if encoded.IsSetNullCount() {
632 ret.incNulls(encoded.GetNullCount())
633 }
634 if encoded.IsSetDistinctCount() {
635 ret.incDistinct(encoded.GetDistinctCount())
636 }
637
638 encodedMin := encoded.GetMin()
639 if encodedMin != nil && len(encodedMin) > 0 {
640 ret.min = ret.plainDecode(encodedMin)
641 }
642 encodedMax := encoded.GetMax()
643 if encodedMax != nil && len(encodedMax) > 0 {
644 ret.max = ret.plainDecode(encodedMax)
645 }
646 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
647 return ret
648}
649
650func (s *Int96Statistics) plainEncode(src parquet.Int96) []byte {
651 s.encoder.(encoding.Int96Encoder).Put([]parquet.Int96{src})
652 buf, err := s.encoder.FlushValues()
653 if err != nil {
654 panic(err) // recovered by Encode
655 }
656 defer buf.Release()
657
658 out := make([]byte, buf.Len())
659 copy(out, buf.Bytes())
660 return out
661}
662
663func (s *Int96Statistics) plainDecode(src []byte) parquet.Int96 {
664 var buf [1]parquet.Int96
665
666 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
667 decoder.SetData(1, src)
668 decoder.(encoding.Int96Decoder).Decode(buf[:])
669 return buf[0]
670}
671
672func (s *Int96Statistics) minval(a, b parquet.Int96) parquet.Int96 {
673 if s.less(a, b) {
674 return a
675 }
676 return b
677}
678
679func (s *Int96Statistics) maxval(a, b parquet.Int96) parquet.Int96 {
680 if s.less(a, b) {
681 return b
682 }
683 return a
684}
685
686// MinMaxEqual returns true if both stat objects have the same Min and Max values
687func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool {
688 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
689}
690
691// Equals returns true only if both objects are the same type, have the same min and
692// max values, null count, distinct count and number of values.
693func (s *Int96Statistics) Equals(other TypedStatistics) bool {
694 if s.Type() != other.Type() {
695 return false
696 }
697 rhs, ok := other.(*Int96Statistics)
698 if !ok {
699 return false
700 }
701
702 if s.HasMinMax() != rhs.HasMinMax() {
703 return false
704 }
705 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
706 s.NullCount() == rhs.NullCount() &&
707 s.DistinctCount() == rhs.DistinctCount() &&
708 s.NumValues() == rhs.NumValues()
709}
710
711func (s *Int96Statistics) getMinMax(values []parquet.Int96) (min, max parquet.Int96) {
712 defMin := s.defaultMin()
713 defMax := s.defaultMax()
714
715 min = defMin
716 max = defMax
717
718 for _, v := range values {
719 min = s.minval(min, v)
720 max = s.maxval(max, v)
721 }
722 return
723}
724
725func (s *Int96Statistics) getMinMaxSpaced(values []parquet.Int96, validBits []byte, validBitsOffset int64) (min, max parquet.Int96) {
726 min = s.defaultMin()
727 max = s.defaultMax()
728
729 if s.bitSetReader == nil {
730 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
731 } else {
732 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
733 }
734
735 for {
736 run := s.bitSetReader.NextRun()
737 if run.Length == 0 {
738 break
739 }
740 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
741 min = s.minval(min, v)
742 max = s.maxval(max, v)
743 }
744 }
745 return
746}
747
748func (s *Int96Statistics) Min() parquet.Int96 { return s.min }
749func (s *Int96Statistics) Max() parquet.Int96 { return s.max }
750
751// Merge merges the stats from other into this stat object, updating
752// the null count, distinct count, number of values and the min/max if
753// appropriate.
754func (s *Int96Statistics) Merge(other TypedStatistics) {
755 rhs, ok := other.(*Int96Statistics)
756 if !ok {
757 panic("incompatible stat type merge")
758 }
759
760 s.statistics.merge(rhs)
761 if rhs.HasMinMax() {
762 s.SetMinMax(rhs.Min(), rhs.Max())
763 }
764}
765
766// Update is used to add more values to the current stat object, finding the
767// min and max values etc.
768func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64) {
769 s.incNulls(numNull)
770 s.nvalues += int64(len(values))
771
772 if len(values) == 0 {
773 return
774 }
775
776 s.SetMinMax(s.getMinMax(values))
777}
778
779// UpdateSpaced is just like Update, but for spaced values using validBits to determine
780// and skip null values.
781func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64) {
782 s.incNulls(numNull)
783 notnull := int64(len(values)) - numNull
784 s.nvalues += notnull
785
786 if notnull == 0 {
787 return
788 }
789
790 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
791}
792
793// SetMinMax updates the min and max values only if they are not currently set
794// or if argMin is less than the current min / argMax is greater than the current max
795func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96) {
796 maybeMinMax := s.cleanStat([2]parquet.Int96{argMin, argMax})
797 if maybeMinMax == nil {
798 return
799 }
800
801 min := (*maybeMinMax)[0]
802 max := (*maybeMinMax)[1]
803
804 if !s.hasMinMax {
805 s.hasMinMax = true
806 s.min = min
807 s.max = max
808 } else {
809 if !s.less(s.min, min) {
810 s.min = min
811 }
812 if s.less(s.max, max) {
813 s.max = max
814 }
815 }
816}
817
818// EncodeMin returns the encoded min value with plain encoding.
819//
820// ByteArray stats do not include the length in the encoding.
821func (s *Int96Statistics) EncodeMin() []byte {
822 if s.HasMinMax() {
823 return s.plainEncode(s.min)
824 }
825 return nil
826}
827
828// EncodeMax returns the current encoded max value with plain encoding
829//
830// ByteArray stats do not include the length in the encoding
831func (s *Int96Statistics) EncodeMax() []byte {
832 if s.HasMinMax() {
833 return s.plainEncode(s.max)
834 }
835 return nil
836}
837
838// Encode returns a populated EncodedStatistics object
839func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error) {
840 defer func() {
841 if r := recover(); r != nil {
842 switch r := r.(type) {
843 case error:
844 err = r
845 case string:
846 err = xerrors.New(r)
847 default:
848 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
849 }
850 }
851 }()
852 if s.HasMinMax() {
853 enc.SetMax(s.EncodeMax())
854 enc.SetMin(s.EncodeMin())
855 }
856 if s.HasNullCount() {
857 enc.SetNullCount(s.NullCount())
858 }
859 if s.HasDistinctCount() {
860 enc.SetDistinctCount(s.DistinctCount())
861 }
862 return
863}
864
865type minmaxPairFloat32 [2]float32
866
867// Float32Statistics is the typed interface for managing stats for a column
868// of Float32 type.
869type Float32Statistics struct {
870 statistics
871 min float32
872 max float32
873
874 bitSetReader utils.SetBitRunReader
875}
876
877// NewFloat32Statistics constructs an appropriate stat object type using the
878// given column descriptor and allocator.
879//
880// Panics if the physical type of descr is not parquet.Type.Float
881func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics {
882 if descr.PhysicalType() != parquet.Types.Float {
883 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float32 stat object", descr.PhysicalType()))
884 }
885
886 return &Float32Statistics{
887 statistics: statistics{
888 descr: descr,
889 hasNullCount: true,
890 hasDistinctCount: true,
891 order: descr.SortOrder(),
892 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
893 mem: mem,
894 },
895 }
896}
897
898// NewFloat32StatisticsFromEncoded will construct a propertly typed statistics object
899// initializing it with the provided information.
900func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics {
901 ret := NewFloat32Statistics(descr, mem)
902 ret.nvalues += nvalues
903 if encoded.IsSetNullCount() {
904 ret.incNulls(encoded.GetNullCount())
905 }
906 if encoded.IsSetDistinctCount() {
907 ret.incDistinct(encoded.GetDistinctCount())
908 }
909
910 encodedMin := encoded.GetMin()
911 if encodedMin != nil && len(encodedMin) > 0 {
912 ret.min = ret.plainDecode(encodedMin)
913 }
914 encodedMax := encoded.GetMax()
915 if encodedMax != nil && len(encodedMax) > 0 {
916 ret.max = ret.plainDecode(encodedMax)
917 }
918 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
919 return ret
920}
921
922func (s *Float32Statistics) plainEncode(src float32) []byte {
923 s.encoder.(encoding.Float32Encoder).Put([]float32{src})
924 buf, err := s.encoder.FlushValues()
925 if err != nil {
926 panic(err) // recovered by Encode
927 }
928 defer buf.Release()
929
930 out := make([]byte, buf.Len())
931 copy(out, buf.Bytes())
932 return out
933}
934
935func (s *Float32Statistics) plainDecode(src []byte) float32 {
936 var buf [1]float32
937
938 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
939 decoder.SetData(1, src)
940 decoder.(encoding.Float32Decoder).Decode(buf[:])
941 return buf[0]
942}
943
944func (s *Float32Statistics) minval(a, b float32) float32 {
945 if s.less(a, b) {
946 return a
947 }
948 return b
949}
950
951func (s *Float32Statistics) maxval(a, b float32) float32 {
952 if s.less(a, b) {
953 return b
954 }
955 return a
956}
957
958// MinMaxEqual returns true if both stat objects have the same Min and Max values
959func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool {
960 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
961}
962
963// Equals returns true only if both objects are the same type, have the same min and
964// max values, null count, distinct count and number of values.
965func (s *Float32Statistics) Equals(other TypedStatistics) bool {
966 if s.Type() != other.Type() {
967 return false
968 }
969 rhs, ok := other.(*Float32Statistics)
970 if !ok {
971 return false
972 }
973
974 if s.HasMinMax() != rhs.HasMinMax() {
975 return false
976 }
977 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
978 s.NullCount() == rhs.NullCount() &&
979 s.DistinctCount() == rhs.DistinctCount() &&
980 s.NumValues() == rhs.NumValues()
981}
982
983func (s *Float32Statistics) coalesce(val, fallback float32) float32 {
984 if math.IsNaN(float64(val)) {
985 return fallback
986 }
987 return val
988}
989
990func (s *Float32Statistics) getMinMax(values []float32) (min, max float32) {
991 defMin := s.defaultMin()
992 defMax := s.defaultMax()
993
994 min = defMin
995 max = defMax
996
997 for _, v := range values {
998 min = s.minval(min, s.coalesce(v, defMin))
999 max = s.maxval(max, s.coalesce(v, defMax))
1000 }
1001 return
1002}
1003
1004func (s *Float32Statistics) getMinMaxSpaced(values []float32, validBits []byte, validBitsOffset int64) (min, max float32) {
1005 min = s.defaultMin()
1006 max = s.defaultMax()
1007
1008 if s.bitSetReader == nil {
1009 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1010 } else {
1011 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1012 }
1013
1014 for {
1015 run := s.bitSetReader.NextRun()
1016 if run.Length == 0 {
1017 break
1018 }
1019 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1020 min = s.minval(min, coalesce(v, s.defaultMin()).(float32))
1021 max = s.maxval(max, coalesce(v, s.defaultMax()).(float32))
1022 }
1023 }
1024 return
1025}
1026
1027func (s *Float32Statistics) Min() float32 { return s.min }
1028func (s *Float32Statistics) Max() float32 { return s.max }
1029
1030// Merge merges the stats from other into this stat object, updating
1031// the null count, distinct count, number of values and the min/max if
1032// appropriate.
1033func (s *Float32Statistics) Merge(other TypedStatistics) {
1034 rhs, ok := other.(*Float32Statistics)
1035 if !ok {
1036 panic("incompatible stat type merge")
1037 }
1038
1039 s.statistics.merge(rhs)
1040 if rhs.HasMinMax() {
1041 s.SetMinMax(rhs.Min(), rhs.Max())
1042 }
1043}
1044
1045// Update is used to add more values to the current stat object, finding the
1046// min and max values etc.
1047func (s *Float32Statistics) Update(values []float32, numNull int64) {
1048 s.incNulls(numNull)
1049 s.nvalues += int64(len(values))
1050
1051 if len(values) == 0 {
1052 return
1053 }
1054
1055 s.SetMinMax(s.getMinMax(values))
1056}
1057
1058// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1059// and skip null values.
1060func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64) {
1061 s.incNulls(numNull)
1062 notnull := int64(len(values)) - numNull
1063 s.nvalues += notnull
1064
1065 if notnull == 0 {
1066 return
1067 }
1068
1069 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1070}
1071
1072// SetMinMax updates the min and max values only if they are not currently set
1073// or if argMin is less than the current min / argMax is greater than the current max
1074func (s *Float32Statistics) SetMinMax(argMin, argMax float32) {
1075 maybeMinMax := s.cleanStat([2]float32{argMin, argMax})
1076 if maybeMinMax == nil {
1077 return
1078 }
1079
1080 min := (*maybeMinMax)[0]
1081 max := (*maybeMinMax)[1]
1082
1083 if !s.hasMinMax {
1084 s.hasMinMax = true
1085 s.min = min
1086 s.max = max
1087 } else {
1088 if !s.less(s.min, min) {
1089 s.min = min
1090 }
1091 if s.less(s.max, max) {
1092 s.max = max
1093 }
1094 }
1095}
1096
1097// EncodeMin returns the encoded min value with plain encoding.
1098//
1099// ByteArray stats do not include the length in the encoding.
1100func (s *Float32Statistics) EncodeMin() []byte {
1101 if s.HasMinMax() {
1102 return s.plainEncode(s.min)
1103 }
1104 return nil
1105}
1106
1107// EncodeMax returns the current encoded max value with plain encoding
1108//
1109// ByteArray stats do not include the length in the encoding
1110func (s *Float32Statistics) EncodeMax() []byte {
1111 if s.HasMinMax() {
1112 return s.plainEncode(s.max)
1113 }
1114 return nil
1115}
1116
1117// Encode returns a populated EncodedStatistics object
1118func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error) {
1119 defer func() {
1120 if r := recover(); r != nil {
1121 switch r := r.(type) {
1122 case error:
1123 err = r
1124 case string:
1125 err = xerrors.New(r)
1126 default:
1127 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1128 }
1129 }
1130 }()
1131 if s.HasMinMax() {
1132 enc.SetMax(s.EncodeMax())
1133 enc.SetMin(s.EncodeMin())
1134 }
1135 if s.HasNullCount() {
1136 enc.SetNullCount(s.NullCount())
1137 }
1138 if s.HasDistinctCount() {
1139 enc.SetDistinctCount(s.DistinctCount())
1140 }
1141 return
1142}
1143
1144type minmaxPairFloat64 [2]float64
1145
1146// Float64Statistics is the typed interface for managing stats for a column
1147// of Float64 type.
1148type Float64Statistics struct {
1149 statistics
1150 min float64
1151 max float64
1152
1153 bitSetReader utils.SetBitRunReader
1154}
1155
1156// NewFloat64Statistics constructs an appropriate stat object type using the
1157// given column descriptor and allocator.
1158//
1159// Panics if the physical type of descr is not parquet.Type.Double
1160func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics {
1161 if descr.PhysicalType() != parquet.Types.Double {
1162 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float64 stat object", descr.PhysicalType()))
1163 }
1164
1165 return &Float64Statistics{
1166 statistics: statistics{
1167 descr: descr,
1168 hasNullCount: true,
1169 hasDistinctCount: true,
1170 order: descr.SortOrder(),
1171 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1172 mem: mem,
1173 },
1174 }
1175}
1176
1177// NewFloat64StatisticsFromEncoded will construct a propertly typed statistics object
1178// initializing it with the provided information.
1179func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics {
1180 ret := NewFloat64Statistics(descr, mem)
1181 ret.nvalues += nvalues
1182 if encoded.IsSetNullCount() {
1183 ret.incNulls(encoded.GetNullCount())
1184 }
1185 if encoded.IsSetDistinctCount() {
1186 ret.incDistinct(encoded.GetDistinctCount())
1187 }
1188
1189 encodedMin := encoded.GetMin()
1190 if encodedMin != nil && len(encodedMin) > 0 {
1191 ret.min = ret.plainDecode(encodedMin)
1192 }
1193 encodedMax := encoded.GetMax()
1194 if encodedMax != nil && len(encodedMax) > 0 {
1195 ret.max = ret.plainDecode(encodedMax)
1196 }
1197 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1198 return ret
1199}
1200
1201func (s *Float64Statistics) plainEncode(src float64) []byte {
1202 s.encoder.(encoding.Float64Encoder).Put([]float64{src})
1203 buf, err := s.encoder.FlushValues()
1204 if err != nil {
1205 panic(err) // recovered by Encode
1206 }
1207 defer buf.Release()
1208
1209 out := make([]byte, buf.Len())
1210 copy(out, buf.Bytes())
1211 return out
1212}
1213
1214func (s *Float64Statistics) plainDecode(src []byte) float64 {
1215 var buf [1]float64
1216
1217 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
1218 decoder.SetData(1, src)
1219 decoder.(encoding.Float64Decoder).Decode(buf[:])
1220 return buf[0]
1221}
1222
1223func (s *Float64Statistics) minval(a, b float64) float64 {
1224 if s.less(a, b) {
1225 return a
1226 }
1227 return b
1228}
1229
1230func (s *Float64Statistics) maxval(a, b float64) float64 {
1231 if s.less(a, b) {
1232 return b
1233 }
1234 return a
1235}
1236
1237// MinMaxEqual returns true if both stat objects have the same Min and Max values
1238func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool {
1239 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1240}
1241
1242// Equals returns true only if both objects are the same type, have the same min and
1243// max values, null count, distinct count and number of values.
1244func (s *Float64Statistics) Equals(other TypedStatistics) bool {
1245 if s.Type() != other.Type() {
1246 return false
1247 }
1248 rhs, ok := other.(*Float64Statistics)
1249 if !ok {
1250 return false
1251 }
1252
1253 if s.HasMinMax() != rhs.HasMinMax() {
1254 return false
1255 }
1256 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1257 s.NullCount() == rhs.NullCount() &&
1258 s.DistinctCount() == rhs.DistinctCount() &&
1259 s.NumValues() == rhs.NumValues()
1260}
1261
1262func (s *Float64Statistics) coalesce(val, fallback float64) float64 {
1263 if math.IsNaN(float64(val)) {
1264 return fallback
1265 }
1266 return val
1267}
1268
1269func (s *Float64Statistics) getMinMax(values []float64) (min, max float64) {
1270 defMin := s.defaultMin()
1271 defMax := s.defaultMax()
1272
1273 min = defMin
1274 max = defMax
1275
1276 for _, v := range values {
1277 min = s.minval(min, s.coalesce(v, defMin))
1278 max = s.maxval(max, s.coalesce(v, defMax))
1279 }
1280 return
1281}
1282
1283func (s *Float64Statistics) getMinMaxSpaced(values []float64, validBits []byte, validBitsOffset int64) (min, max float64) {
1284 min = s.defaultMin()
1285 max = s.defaultMax()
1286
1287 if s.bitSetReader == nil {
1288 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1289 } else {
1290 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1291 }
1292
1293 for {
1294 run := s.bitSetReader.NextRun()
1295 if run.Length == 0 {
1296 break
1297 }
1298 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1299 min = s.minval(min, coalesce(v, s.defaultMin()).(float64))
1300 max = s.maxval(max, coalesce(v, s.defaultMax()).(float64))
1301 }
1302 }
1303 return
1304}
1305
1306func (s *Float64Statistics) Min() float64 { return s.min }
1307func (s *Float64Statistics) Max() float64 { return s.max }
1308
1309// Merge merges the stats from other into this stat object, updating
1310// the null count, distinct count, number of values and the min/max if
1311// appropriate.
1312func (s *Float64Statistics) Merge(other TypedStatistics) {
1313 rhs, ok := other.(*Float64Statistics)
1314 if !ok {
1315 panic("incompatible stat type merge")
1316 }
1317
1318 s.statistics.merge(rhs)
1319 if rhs.HasMinMax() {
1320 s.SetMinMax(rhs.Min(), rhs.Max())
1321 }
1322}
1323
1324// Update is used to add more values to the current stat object, finding the
1325// min and max values etc.
1326func (s *Float64Statistics) Update(values []float64, numNull int64) {
1327 s.incNulls(numNull)
1328 s.nvalues += int64(len(values))
1329
1330 if len(values) == 0 {
1331 return
1332 }
1333
1334 s.SetMinMax(s.getMinMax(values))
1335}
1336
1337// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1338// and skip null values.
1339func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64) {
1340 s.incNulls(numNull)
1341 notnull := int64(len(values)) - numNull
1342 s.nvalues += notnull
1343
1344 if notnull == 0 {
1345 return
1346 }
1347
1348 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1349}
1350
1351// SetMinMax updates the min and max values only if they are not currently set
1352// or if argMin is less than the current min / argMax is greater than the current max
1353func (s *Float64Statistics) SetMinMax(argMin, argMax float64) {
1354 maybeMinMax := s.cleanStat([2]float64{argMin, argMax})
1355 if maybeMinMax == nil {
1356 return
1357 }
1358
1359 min := (*maybeMinMax)[0]
1360 max := (*maybeMinMax)[1]
1361
1362 if !s.hasMinMax {
1363 s.hasMinMax = true
1364 s.min = min
1365 s.max = max
1366 } else {
1367 if !s.less(s.min, min) {
1368 s.min = min
1369 }
1370 if s.less(s.max, max) {
1371 s.max = max
1372 }
1373 }
1374}
1375
1376// EncodeMin returns the encoded min value with plain encoding.
1377//
1378// ByteArray stats do not include the length in the encoding.
1379func (s *Float64Statistics) EncodeMin() []byte {
1380 if s.HasMinMax() {
1381 return s.plainEncode(s.min)
1382 }
1383 return nil
1384}
1385
1386// EncodeMax returns the current encoded max value with plain encoding
1387//
1388// ByteArray stats do not include the length in the encoding
1389func (s *Float64Statistics) EncodeMax() []byte {
1390 if s.HasMinMax() {
1391 return s.plainEncode(s.max)
1392 }
1393 return nil
1394}
1395
1396// Encode returns a populated EncodedStatistics object
1397func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error) {
1398 defer func() {
1399 if r := recover(); r != nil {
1400 switch r := r.(type) {
1401 case error:
1402 err = r
1403 case string:
1404 err = xerrors.New(r)
1405 default:
1406 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1407 }
1408 }
1409 }()
1410 if s.HasMinMax() {
1411 enc.SetMax(s.EncodeMax())
1412 enc.SetMin(s.EncodeMin())
1413 }
1414 if s.HasNullCount() {
1415 enc.SetNullCount(s.NullCount())
1416 }
1417 if s.HasDistinctCount() {
1418 enc.SetDistinctCount(s.DistinctCount())
1419 }
1420 return
1421}
1422
1423type minmaxPairBoolean [2]bool
1424
1425// BooleanStatistics is the typed interface for managing stats for a column
1426// of Boolean type.
1427type BooleanStatistics struct {
1428 statistics
1429 min bool
1430 max bool
1431
1432 bitSetReader utils.SetBitRunReader
1433}
1434
1435// NewBooleanStatistics constructs an appropriate stat object type using the
1436// given column descriptor and allocator.
1437//
1438// Panics if the physical type of descr is not parquet.Type.Boolean
1439func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics {
1440 if descr.PhysicalType() != parquet.Types.Boolean {
1441 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Boolean stat object", descr.PhysicalType()))
1442 }
1443
1444 return &BooleanStatistics{
1445 statistics: statistics{
1446 descr: descr,
1447 hasNullCount: true,
1448 hasDistinctCount: true,
1449 order: descr.SortOrder(),
1450 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1451 mem: mem,
1452 },
1453 }
1454}
1455
1456// NewBooleanStatisticsFromEncoded will construct a propertly typed statistics object
1457// initializing it with the provided information.
1458func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics {
1459 ret := NewBooleanStatistics(descr, mem)
1460 ret.nvalues += nvalues
1461 if encoded.IsSetNullCount() {
1462 ret.incNulls(encoded.GetNullCount())
1463 }
1464 if encoded.IsSetDistinctCount() {
1465 ret.incDistinct(encoded.GetDistinctCount())
1466 }
1467
1468 encodedMin := encoded.GetMin()
1469 if encodedMin != nil && len(encodedMin) > 0 {
1470 ret.min = ret.plainDecode(encodedMin)
1471 }
1472 encodedMax := encoded.GetMax()
1473 if encodedMax != nil && len(encodedMax) > 0 {
1474 ret.max = ret.plainDecode(encodedMax)
1475 }
1476 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1477 return ret
1478}
1479
1480func (s *BooleanStatistics) plainEncode(src bool) []byte {
1481 s.encoder.(encoding.BooleanEncoder).Put([]bool{src})
1482 buf, err := s.encoder.FlushValues()
1483 if err != nil {
1484 panic(err) // recovered by Encode
1485 }
1486 defer buf.Release()
1487
1488 out := make([]byte, buf.Len())
1489 copy(out, buf.Bytes())
1490 return out
1491}
1492
1493func (s *BooleanStatistics) plainDecode(src []byte) bool {
1494 var buf [1]bool
1495
1496 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
1497 decoder.SetData(1, src)
1498 decoder.(encoding.BooleanDecoder).Decode(buf[:])
1499 return buf[0]
1500}
1501
1502func (s *BooleanStatistics) minval(a, b bool) bool {
1503 if s.less(a, b) {
1504 return a
1505 }
1506 return b
1507}
1508
1509func (s *BooleanStatistics) maxval(a, b bool) bool {
1510 if s.less(a, b) {
1511 return b
1512 }
1513 return a
1514}
1515
1516// MinMaxEqual returns true if both stat objects have the same Min and Max values
1517func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool {
1518 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1519}
1520
1521// Equals returns true only if both objects are the same type, have the same min and
1522// max values, null count, distinct count and number of values.
1523func (s *BooleanStatistics) Equals(other TypedStatistics) bool {
1524 if s.Type() != other.Type() {
1525 return false
1526 }
1527 rhs, ok := other.(*BooleanStatistics)
1528 if !ok {
1529 return false
1530 }
1531
1532 if s.HasMinMax() != rhs.HasMinMax() {
1533 return false
1534 }
1535 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1536 s.NullCount() == rhs.NullCount() &&
1537 s.DistinctCount() == rhs.DistinctCount() &&
1538 s.NumValues() == rhs.NumValues()
1539}
1540
1541func (s *BooleanStatistics) getMinMax(values []bool) (min, max bool) {
1542 defMin := s.defaultMin()
1543 defMax := s.defaultMax()
1544
1545 min = defMin
1546 max = defMax
1547
1548 for _, v := range values {
1549 min = s.minval(min, v)
1550 max = s.maxval(max, v)
1551 }
1552 return
1553}
1554
1555func (s *BooleanStatistics) getMinMaxSpaced(values []bool, validBits []byte, validBitsOffset int64) (min, max bool) {
1556 min = s.defaultMin()
1557 max = s.defaultMax()
1558
1559 if s.bitSetReader == nil {
1560 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1561 } else {
1562 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1563 }
1564
1565 for {
1566 run := s.bitSetReader.NextRun()
1567 if run.Length == 0 {
1568 break
1569 }
1570 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1571 min = s.minval(min, v)
1572 max = s.maxval(max, v)
1573 }
1574 }
1575 return
1576}
1577
1578func (s *BooleanStatistics) Min() bool { return s.min }
1579func (s *BooleanStatistics) Max() bool { return s.max }
1580
1581// Merge merges the stats from other into this stat object, updating
1582// the null count, distinct count, number of values and the min/max if
1583// appropriate.
1584func (s *BooleanStatistics) Merge(other TypedStatistics) {
1585 rhs, ok := other.(*BooleanStatistics)
1586 if !ok {
1587 panic("incompatible stat type merge")
1588 }
1589
1590 s.statistics.merge(rhs)
1591 if rhs.HasMinMax() {
1592 s.SetMinMax(rhs.Min(), rhs.Max())
1593 }
1594}
1595
1596// Update is used to add more values to the current stat object, finding the
1597// min and max values etc.
1598func (s *BooleanStatistics) Update(values []bool, numNull int64) {
1599 s.incNulls(numNull)
1600 s.nvalues += int64(len(values))
1601
1602 if len(values) == 0 {
1603 return
1604 }
1605
1606 s.SetMinMax(s.getMinMax(values))
1607}
1608
1609// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1610// and skip null values.
1611func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64) {
1612 s.incNulls(numNull)
1613 notnull := int64(len(values)) - numNull
1614 s.nvalues += notnull
1615
1616 if notnull == 0 {
1617 return
1618 }
1619
1620 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1621}
1622
1623// SetMinMax updates the min and max values only if they are not currently set
1624// or if argMin is less than the current min / argMax is greater than the current max
1625func (s *BooleanStatistics) SetMinMax(argMin, argMax bool) {
1626 maybeMinMax := s.cleanStat([2]bool{argMin, argMax})
1627 if maybeMinMax == nil {
1628 return
1629 }
1630
1631 min := (*maybeMinMax)[0]
1632 max := (*maybeMinMax)[1]
1633
1634 if !s.hasMinMax {
1635 s.hasMinMax = true
1636 s.min = min
1637 s.max = max
1638 } else {
1639 if !s.less(s.min, min) {
1640 s.min = min
1641 }
1642 if s.less(s.max, max) {
1643 s.max = max
1644 }
1645 }
1646}
1647
1648// EncodeMin returns the encoded min value with plain encoding.
1649//
1650// ByteArray stats do not include the length in the encoding.
1651func (s *BooleanStatistics) EncodeMin() []byte {
1652 if s.HasMinMax() {
1653 return s.plainEncode(s.min)
1654 }
1655 return nil
1656}
1657
1658// EncodeMax returns the current encoded max value with plain encoding
1659//
1660// ByteArray stats do not include the length in the encoding
1661func (s *BooleanStatistics) EncodeMax() []byte {
1662 if s.HasMinMax() {
1663 return s.plainEncode(s.max)
1664 }
1665 return nil
1666}
1667
1668// Encode returns a populated EncodedStatistics object
1669func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error) {
1670 defer func() {
1671 if r := recover(); r != nil {
1672 switch r := r.(type) {
1673 case error:
1674 err = r
1675 case string:
1676 err = xerrors.New(r)
1677 default:
1678 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1679 }
1680 }
1681 }()
1682 if s.HasMinMax() {
1683 enc.SetMax(s.EncodeMax())
1684 enc.SetMin(s.EncodeMin())
1685 }
1686 if s.HasNullCount() {
1687 enc.SetNullCount(s.NullCount())
1688 }
1689 if s.HasDistinctCount() {
1690 enc.SetDistinctCount(s.DistinctCount())
1691 }
1692 return
1693}
1694
1695type minmaxPairByteArray [2]parquet.ByteArray
1696
1697// ByteArrayStatistics is the typed interface for managing stats for a column
1698// of ByteArray type.
1699type ByteArrayStatistics struct {
1700 statistics
1701 min parquet.ByteArray
1702 max parquet.ByteArray
1703
1704 bitSetReader utils.SetBitRunReader
1705}
1706
1707// NewByteArrayStatistics constructs an appropriate stat object type using the
1708// given column descriptor and allocator.
1709//
1710// Panics if the physical type of descr is not parquet.Type.ByteArray
1711func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics {
1712 if descr.PhysicalType() != parquet.Types.ByteArray {
1713 panic(xerrors.Errorf("parquet: invalid type %s for constructing a ByteArray stat object", descr.PhysicalType()))
1714 }
1715
1716 return &ByteArrayStatistics{
1717 statistics: statistics{
1718 descr: descr,
1719 hasNullCount: true,
1720 hasDistinctCount: true,
1721 order: descr.SortOrder(),
1722 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1723 mem: mem,
1724 },
1725
1726 min: make([]byte, 0),
1727 max: make([]byte, 0),
1728 }
1729}
1730
1731// NewByteArrayStatisticsFromEncoded will construct a propertly typed statistics object
1732// initializing it with the provided information.
1733func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics {
1734 ret := NewByteArrayStatistics(descr, mem)
1735 ret.nvalues += nvalues
1736 if encoded.IsSetNullCount() {
1737 ret.incNulls(encoded.GetNullCount())
1738 }
1739 if encoded.IsSetDistinctCount() {
1740 ret.incDistinct(encoded.GetDistinctCount())
1741 }
1742
1743 encodedMin := encoded.GetMin()
1744 if encodedMin != nil && len(encodedMin) > 0 {
1745 ret.min = ret.plainDecode(encodedMin)
1746 }
1747 encodedMax := encoded.GetMax()
1748 if encodedMax != nil && len(encodedMax) > 0 {
1749 ret.max = ret.plainDecode(encodedMax)
1750 }
1751 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1752 return ret
1753}
1754
1755func (s *ByteArrayStatistics) plainEncode(src parquet.ByteArray) []byte {
1756 return src
1757}
1758
1759func (s *ByteArrayStatistics) plainDecode(src []byte) parquet.ByteArray {
1760 return src
1761}
1762
1763func (s *ByteArrayStatistics) minval(a, b parquet.ByteArray) parquet.ByteArray {
1764 switch {
1765 case a == nil:
1766 return b
1767 case b == nil:
1768 return a
1769 case s.less(a, b):
1770 return a
1771 default:
1772 return b
1773 }
1774}
1775
1776func (s *ByteArrayStatistics) maxval(a, b parquet.ByteArray) parquet.ByteArray {
1777 switch {
1778 case a == nil:
1779 return b
1780 case b == nil:
1781 return a
1782 case s.less(a, b):
1783 return b
1784 default:
1785 return a
1786 }
1787}
1788
1789// MinMaxEqual returns true if both stat objects have the same Min and Max values
1790func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool {
1791 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1792}
1793
1794// Equals returns true only if both objects are the same type, have the same min and
1795// max values, null count, distinct count and number of values.
1796func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool {
1797 if s.Type() != other.Type() {
1798 return false
1799 }
1800 rhs, ok := other.(*ByteArrayStatistics)
1801 if !ok {
1802 return false
1803 }
1804
1805 if s.HasMinMax() != rhs.HasMinMax() {
1806 return false
1807 }
1808 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1809 s.NullCount() == rhs.NullCount() &&
1810 s.DistinctCount() == rhs.DistinctCount() &&
1811 s.NumValues() == rhs.NumValues()
1812}
1813
1814func (s *ByteArrayStatistics) getMinMax(values []parquet.ByteArray) (min, max parquet.ByteArray) {
1815 defMin := s.defaultMin()
1816 defMax := s.defaultMax()
1817
1818 min = defMin
1819 max = defMax
1820
1821 for _, v := range values {
1822 min = s.minval(min, v)
1823 max = s.maxval(max, v)
1824 }
1825 return
1826}
1827
1828func (s *ByteArrayStatistics) getMinMaxSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.ByteArray) {
1829 min = s.defaultMin()
1830 max = s.defaultMax()
1831
1832 if s.bitSetReader == nil {
1833 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1834 } else {
1835 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1836 }
1837
1838 for {
1839 run := s.bitSetReader.NextRun()
1840 if run.Length == 0 {
1841 break
1842 }
1843 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1844 min = s.minval(min, v)
1845 max = s.maxval(max, v)
1846 }
1847 }
1848 return
1849}
1850
1851func (s *ByteArrayStatistics) Min() parquet.ByteArray { return s.min }
1852func (s *ByteArrayStatistics) Max() parquet.ByteArray { return s.max }
1853
1854// Merge merges the stats from other into this stat object, updating
1855// the null count, distinct count, number of values and the min/max if
1856// appropriate.
1857func (s *ByteArrayStatistics) Merge(other TypedStatistics) {
1858 rhs, ok := other.(*ByteArrayStatistics)
1859 if !ok {
1860 panic("incompatible stat type merge")
1861 }
1862
1863 s.statistics.merge(rhs)
1864 if rhs.HasMinMax() {
1865 s.SetMinMax(rhs.Min(), rhs.Max())
1866 }
1867}
1868
1869// Update is used to add more values to the current stat object, finding the
1870// min and max values etc.
1871func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64) {
1872 s.incNulls(numNull)
1873 s.nvalues += int64(len(values))
1874
1875 if len(values) == 0 {
1876 return
1877 }
1878
1879 s.SetMinMax(s.getMinMax(values))
1880}
1881
1882// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1883// and skip null values.
1884func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64) {
1885 s.incNulls(numNull)
1886 notnull := int64(len(values)) - numNull
1887 s.nvalues += notnull
1888
1889 if notnull == 0 {
1890 return
1891 }
1892
1893 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1894}
1895
1896// SetMinMax updates the min and max values only if they are not currently set
1897// or if argMin is less than the current min / argMax is greater than the current max
1898func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray) {
1899 maybeMinMax := s.cleanStat([2]parquet.ByteArray{argMin, argMax})
1900 if maybeMinMax == nil {
1901 return
1902 }
1903
1904 min := (*maybeMinMax)[0]
1905 max := (*maybeMinMax)[1]
1906
1907 if !s.hasMinMax {
1908 s.hasMinMax = true
1909 s.min = min
1910 s.max = max
1911 } else {
1912 if !s.less(s.min, min) {
1913 s.min = min
1914 }
1915 if s.less(s.max, max) {
1916 s.max = max
1917 }
1918 }
1919}
1920
1921// EncodeMin returns the encoded min value with plain encoding.
1922//
1923// ByteArray stats do not include the length in the encoding.
1924func (s *ByteArrayStatistics) EncodeMin() []byte {
1925 if s.HasMinMax() {
1926 return s.plainEncode(s.min)
1927 }
1928 return nil
1929}
1930
1931// EncodeMax returns the current encoded max value with plain encoding
1932//
1933// ByteArray stats do not include the length in the encoding
1934func (s *ByteArrayStatistics) EncodeMax() []byte {
1935 if s.HasMinMax() {
1936 return s.plainEncode(s.max)
1937 }
1938 return nil
1939}
1940
1941// Encode returns a populated EncodedStatistics object
1942func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error) {
1943 defer func() {
1944 if r := recover(); r != nil {
1945 switch r := r.(type) {
1946 case error:
1947 err = r
1948 case string:
1949 err = xerrors.New(r)
1950 default:
1951 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1952 }
1953 }
1954 }()
1955 if s.HasMinMax() {
1956 enc.SetMax(s.EncodeMax())
1957 enc.SetMin(s.EncodeMin())
1958 }
1959 if s.HasNullCount() {
1960 enc.SetNullCount(s.NullCount())
1961 }
1962 if s.HasDistinctCount() {
1963 enc.SetDistinctCount(s.DistinctCount())
1964 }
1965 return
1966}
1967
1968type minmaxPairFixedLenByteArray [2]parquet.FixedLenByteArray
1969
1970// FixedLenByteArrayStatistics is the typed interface for managing stats for a column
1971// of FixedLenByteArray type.
1972type FixedLenByteArrayStatistics struct {
1973 statistics
1974 min parquet.FixedLenByteArray
1975 max parquet.FixedLenByteArray
1976
1977 bitSetReader utils.SetBitRunReader
1978}
1979
1980// NewFixedLenByteArrayStatistics constructs an appropriate stat object type using the
1981// given column descriptor and allocator.
1982//
1983// Panics if the physical type of descr is not parquet.Type.FixedLenByteArray
1984func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics {
1985 if descr.PhysicalType() != parquet.Types.FixedLenByteArray {
1986 panic(xerrors.Errorf("parquet: invalid type %s for constructing a FixedLenByteArray stat object", descr.PhysicalType()))
1987 }
1988
1989 return &FixedLenByteArrayStatistics{
1990 statistics: statistics{
1991 descr: descr,
1992 hasNullCount: true,
1993 hasDistinctCount: true,
1994 order: descr.SortOrder(),
1995 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1996 mem: mem,
1997 },
1998 }
1999}
2000
2001// NewFixedLenByteArrayStatisticsFromEncoded will construct a propertly typed statistics object
2002// initializing it with the provided information.
2003func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics {
2004 ret := NewFixedLenByteArrayStatistics(descr, mem)
2005 ret.nvalues += nvalues
2006 if encoded.IsSetNullCount() {
2007 ret.incNulls(encoded.GetNullCount())
2008 }
2009 if encoded.IsSetDistinctCount() {
2010 ret.incDistinct(encoded.GetDistinctCount())
2011 }
2012
2013 encodedMin := encoded.GetMin()
2014 if encodedMin != nil && len(encodedMin) > 0 {
2015 ret.min = ret.plainDecode(encodedMin)
2016 }
2017 encodedMax := encoded.GetMax()
2018 if encodedMax != nil && len(encodedMax) > 0 {
2019 ret.max = ret.plainDecode(encodedMax)
2020 }
2021 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
2022 return ret
2023}
2024
2025func (s *FixedLenByteArrayStatistics) plainEncode(src parquet.FixedLenByteArray) []byte {
2026 s.encoder.(encoding.FixedLenByteArrayEncoder).Put([]parquet.FixedLenByteArray{src})
2027 buf, err := s.encoder.FlushValues()
2028 if err != nil {
2029 panic(err) // recovered by Encode
2030 }
2031 defer buf.Release()
2032
2033 out := make([]byte, buf.Len())
2034 copy(out, buf.Bytes())
2035 return out
2036}
2037
2038func (s *FixedLenByteArrayStatistics) plainDecode(src []byte) parquet.FixedLenByteArray {
2039 var buf [1]parquet.FixedLenByteArray
2040
2041 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
2042 decoder.SetData(1, src)
2043 decoder.(encoding.FixedLenByteArrayDecoder).Decode(buf[:])
2044 return buf[0]
2045}
2046
2047func (s *FixedLenByteArrayStatistics) minval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray {
2048 switch {
2049 case a == nil:
2050 return b
2051 case b == nil:
2052 return a
2053 case s.less(a, b):
2054 return a
2055 default:
2056 return b
2057 }
2058}
2059
2060func (s *FixedLenByteArrayStatistics) maxval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray {
2061 switch {
2062 case a == nil:
2063 return b
2064 case b == nil:
2065 return a
2066 case s.less(a, b):
2067 return b
2068 default:
2069 return a
2070 }
2071}
2072
2073// MinMaxEqual returns true if both stat objects have the same Min and Max values
2074func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool {
2075 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
2076}
2077
2078// Equals returns true only if both objects are the same type, have the same min and
2079// max values, null count, distinct count and number of values.
2080func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool {
2081 if s.Type() != other.Type() {
2082 return false
2083 }
2084 rhs, ok := other.(*FixedLenByteArrayStatistics)
2085 if !ok {
2086 return false
2087 }
2088
2089 if s.HasMinMax() != rhs.HasMinMax() {
2090 return false
2091 }
2092 return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
2093 s.NullCount() == rhs.NullCount() &&
2094 s.DistinctCount() == rhs.DistinctCount() &&
2095 s.NumValues() == rhs.NumValues()
2096}
2097
2098func (s *FixedLenByteArrayStatistics) getMinMax(values []parquet.FixedLenByteArray) (min, max parquet.FixedLenByteArray) {
2099 defMin := s.defaultMin()
2100 defMax := s.defaultMax()
2101
2102 min = defMin
2103 max = defMax
2104
2105 for _, v := range values {
2106 min = s.minval(min, v)
2107 max = s.maxval(max, v)
2108 }
2109 return
2110}
2111
2112func (s *FixedLenByteArrayStatistics) getMinMaxSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.FixedLenByteArray) {
2113 min = s.defaultMin()
2114 max = s.defaultMax()
2115
2116 if s.bitSetReader == nil {
2117 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
2118 } else {
2119 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
2120 }
2121
2122 for {
2123 run := s.bitSetReader.NextRun()
2124 if run.Length == 0 {
2125 break
2126 }
2127 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
2128 min = s.minval(min, v)
2129 max = s.maxval(max, v)
2130 }
2131 }
2132 return
2133}
2134
2135func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray { return s.min }
2136func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray { return s.max }
2137
2138// Merge merges the stats from other into this stat object, updating
2139// the null count, distinct count, number of values and the min/max if
2140// appropriate.
2141func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics) {
2142 rhs, ok := other.(*FixedLenByteArrayStatistics)
2143 if !ok {
2144 panic("incompatible stat type merge")
2145 }
2146
2147 s.statistics.merge(rhs)
2148 if rhs.HasMinMax() {
2149 s.SetMinMax(rhs.Min(), rhs.Max())
2150 }
2151}
2152
2153// Update is used to add more values to the current stat object, finding the
2154// min and max values etc.
2155func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64) {
2156 s.incNulls(numNull)
2157 s.nvalues += int64(len(values))
2158
2159 if len(values) == 0 {
2160 return
2161 }
2162
2163 s.SetMinMax(s.getMinMax(values))
2164}
2165
2166// UpdateSpaced is just like Update, but for spaced values using validBits to determine
2167// and skip null values.
2168func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64) {
2169 s.incNulls(numNull)
2170 notnull := int64(len(values)) - numNull
2171 s.nvalues += notnull
2172
2173 if notnull == 0 {
2174 return
2175 }
2176
2177 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
2178}
2179
2180// SetMinMax updates the min and max values only if they are not currently set
2181// or if argMin is less than the current min / argMax is greater than the current max
2182func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray) {
2183 maybeMinMax := s.cleanStat([2]parquet.FixedLenByteArray{argMin, argMax})
2184 if maybeMinMax == nil {
2185 return
2186 }
2187
2188 min := (*maybeMinMax)[0]
2189 max := (*maybeMinMax)[1]
2190
2191 if !s.hasMinMax {
2192 s.hasMinMax = true
2193 s.min = min
2194 s.max = max
2195 } else {
2196 if !s.less(s.min, min) {
2197 s.min = min
2198 }
2199 if s.less(s.max, max) {
2200 s.max = max
2201 }
2202 }
2203}
2204
2205// EncodeMin returns the encoded min value with plain encoding.
2206//
2207// ByteArray stats do not include the length in the encoding.
2208func (s *FixedLenByteArrayStatistics) EncodeMin() []byte {
2209 if s.HasMinMax() {
2210 return s.plainEncode(s.min)
2211 }
2212 return nil
2213}
2214
2215// EncodeMax returns the current encoded max value with plain encoding
2216//
2217// ByteArray stats do not include the length in the encoding
2218func (s *FixedLenByteArrayStatistics) EncodeMax() []byte {
2219 if s.HasMinMax() {
2220 return s.plainEncode(s.max)
2221 }
2222 return nil
2223}
2224
2225// Encode returns a populated EncodedStatistics object
2226func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error) {
2227 defer func() {
2228 if r := recover(); r != nil {
2229 switch r := r.(type) {
2230 case error:
2231 err = r
2232 case string:
2233 err = xerrors.New(r)
2234 default:
2235 err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
2236 }
2237 }
2238 }()
2239 if s.HasMinMax() {
2240 enc.SetMax(s.EncodeMax())
2241 enc.SetMin(s.EncodeMin())
2242 }
2243 if s.HasNullCount() {
2244 enc.SetNullCount(s.NullCount())
2245 }
2246 if s.HasDistinctCount() {
2247 enc.SetDistinctCount(s.DistinctCount())
2248 }
2249 return
2250}
2251
2252// NewStatistics uses the type in the column descriptor to construct the appropriate
2253// typed stats object. If mem is nil, then memory.DefaultAllocator will be used.
2254func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics {
2255 if mem == nil {
2256 mem = memory.DefaultAllocator
2257 }
2258 switch descr.PhysicalType() {
2259 case parquet.Types.Int32:
2260 return NewInt32Statistics(descr, mem)
2261 case parquet.Types.Int64:
2262 return NewInt64Statistics(descr, mem)
2263 case parquet.Types.Int96:
2264 return NewInt96Statistics(descr, mem)
2265 case parquet.Types.Float:
2266 return NewFloat32Statistics(descr, mem)
2267 case parquet.Types.Double:
2268 return NewFloat64Statistics(descr, mem)
2269 case parquet.Types.Boolean:
2270 return NewBooleanStatistics(descr, mem)
2271 case parquet.Types.ByteArray:
2272 return NewByteArrayStatistics(descr, mem)
2273 case parquet.Types.FixedLenByteArray:
2274 return NewFixedLenByteArrayStatistics(descr, mem)
2275 default:
2276 panic("not implemented")
2277 }
2278}
2279
2280// NewStatisticsFromEncoded uses the provided information to initialize a typed stat object
2281// by checking the type of the provided column descriptor.
2282//
2283// If mem is nil, then memory.DefaultAllocator is used.
2284func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics {
2285 if mem == nil {
2286 mem = memory.DefaultAllocator
2287 }
2288 switch descr.PhysicalType() {
2289 case parquet.Types.Int32:
2290 return NewInt32StatisticsFromEncoded(descr, mem, nvalues, encoded)
2291 case parquet.Types.Int64:
2292 return NewInt64StatisticsFromEncoded(descr, mem, nvalues, encoded)
2293 case parquet.Types.Int96:
2294 return NewInt96StatisticsFromEncoded(descr, mem, nvalues, encoded)
2295 case parquet.Types.Float:
2296 return NewFloat32StatisticsFromEncoded(descr, mem, nvalues, encoded)
2297 case parquet.Types.Double:
2298 return NewFloat64StatisticsFromEncoded(descr, mem, nvalues, encoded)
2299 case parquet.Types.Boolean:
2300 return NewBooleanStatisticsFromEncoded(descr, mem, nvalues, encoded)
2301 case parquet.Types.ByteArray:
2302 return NewByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded)
2303 case parquet.Types.FixedLenByteArray:
2304 return NewFixedLenByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded)
2305 default:
2306 panic("not implemented")
2307 }
2308}