]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Code generated by statistics_types.gen.go.tmpl. DO NOT EDIT. |
2 | ||
3 | // Licensed to the Apache Software Foundation (ASF) under one | |
4 | // or more contributor license agreements. See the NOTICE file | |
5 | // distributed with this work for additional information | |
6 | // regarding copyright ownership. The ASF licenses this file | |
7 | // to you under the Apache License, Version 2.0 (the | |
8 | // "License"); you may not use this file except in compliance | |
9 | // with the License. You may obtain a copy of the License at | |
10 | // | |
11 | // http://www.apache.org/licenses/LICENSE-2.0 | |
12 | // | |
13 | // Unless required by applicable law or agreed to in writing, software | |
14 | // distributed under the License is distributed on an "AS IS" BASIS, | |
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
16 | // See the License for the specific language governing permissions and | |
17 | // limitations under the License. | |
18 | ||
19 | package metadata | |
20 | ||
21 | import ( | |
22 | "math" | |
23 | ||
24 | "github.com/apache/arrow/go/v6/arrow" | |
25 | "github.com/apache/arrow/go/v6/arrow/memory" | |
26 | "github.com/apache/arrow/go/v6/parquet" | |
27 | "github.com/apache/arrow/go/v6/parquet/internal/encoding" | |
28 | "github.com/apache/arrow/go/v6/parquet/internal/utils" | |
29 | "github.com/apache/arrow/go/v6/parquet/schema" | |
30 | "golang.org/x/xerrors" | |
31 | ) | |
32 | ||
33 | type minmaxPairInt32 [2]int32 | |
34 | ||
35 | // Int32Statistics is the typed interface for managing stats for a column | |
36 | // of Int32 type. | |
37 | type Int32Statistics struct { | |
38 | statistics | |
39 | min int32 | |
40 | max int32 | |
41 | ||
42 | bitSetReader utils.SetBitRunReader | |
43 | } | |
44 | ||
45 | // NewInt32Statistics constructs an appropriate stat object type using the | |
46 | // given column descriptor and allocator. | |
47 | // | |
48 | // Panics if the physical type of descr is not parquet.Type.Int32 | |
49 | func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics { | |
50 | if descr.PhysicalType() != parquet.Types.Int32 { | |
51 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int32 stat object", descr.PhysicalType())) | |
52 | } | |
53 | ||
54 | return &Int32Statistics{ | |
55 | statistics: statistics{ | |
56 | descr: descr, | |
57 | hasNullCount: true, | |
58 | hasDistinctCount: true, | |
59 | order: descr.SortOrder(), | |
60 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
61 | mem: mem, | |
62 | }, | |
63 | } | |
64 | } | |
65 | ||
66 | // NewInt32StatisticsFromEncoded will construct a propertly typed statistics object | |
67 | // initializing it with the provided information. | |
68 | func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics { | |
69 | ret := NewInt32Statistics(descr, mem) | |
70 | ret.nvalues += nvalues | |
71 | if encoded.IsSetNullCount() { | |
72 | ret.incNulls(encoded.GetNullCount()) | |
73 | } | |
74 | if encoded.IsSetDistinctCount() { | |
75 | ret.incDistinct(encoded.GetDistinctCount()) | |
76 | } | |
77 | ||
78 | encodedMin := encoded.GetMin() | |
79 | if encodedMin != nil && len(encodedMin) > 0 { | |
80 | ret.min = ret.plainDecode(encodedMin) | |
81 | } | |
82 | encodedMax := encoded.GetMax() | |
83 | if encodedMax != nil && len(encodedMax) > 0 { | |
84 | ret.max = ret.plainDecode(encodedMax) | |
85 | } | |
86 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
87 | return ret | |
88 | } | |
89 | ||
90 | func (s *Int32Statistics) plainEncode(src int32) []byte { | |
91 | s.encoder.(encoding.Int32Encoder).Put([]int32{src}) | |
92 | buf, err := s.encoder.FlushValues() | |
93 | if err != nil { | |
94 | panic(err) // recovered by Encode | |
95 | } | |
96 | defer buf.Release() | |
97 | ||
98 | out := make([]byte, buf.Len()) | |
99 | copy(out, buf.Bytes()) | |
100 | return out | |
101 | } | |
102 | ||
103 | func (s *Int32Statistics) plainDecode(src []byte) int32 { | |
104 | var buf [1]int32 | |
105 | ||
106 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
107 | decoder.SetData(1, src) | |
108 | decoder.(encoding.Int32Decoder).Decode(buf[:]) | |
109 | return buf[0] | |
110 | } | |
111 | ||
112 | func (s *Int32Statistics) minval(a, b int32) int32 { | |
113 | if s.less(a, b) { | |
114 | return a | |
115 | } | |
116 | return b | |
117 | } | |
118 | ||
119 | func (s *Int32Statistics) maxval(a, b int32) int32 { | |
120 | if s.less(a, b) { | |
121 | return b | |
122 | } | |
123 | return a | |
124 | } | |
125 | ||
126 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
127 | func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool { | |
128 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
129 | } | |
130 | ||
131 | // Equals returns true only if both objects are the same type, have the same min and | |
132 | // max values, null count, distinct count and number of values. | |
133 | func (s *Int32Statistics) Equals(other TypedStatistics) bool { | |
134 | if s.Type() != other.Type() { | |
135 | return false | |
136 | } | |
137 | rhs, ok := other.(*Int32Statistics) | |
138 | if !ok { | |
139 | return false | |
140 | } | |
141 | ||
142 | if s.HasMinMax() != rhs.HasMinMax() { | |
143 | return false | |
144 | } | |
145 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
146 | s.NullCount() == rhs.NullCount() && | |
147 | s.DistinctCount() == rhs.DistinctCount() && | |
148 | s.NumValues() == rhs.NumValues() | |
149 | } | |
150 | ||
151 | func (s *Int32Statistics) getMinMax(values []int32) (min, max int32) { | |
152 | if s.order == schema.SortSIGNED { | |
153 | min, max = utils.GetMinMaxInt32(values) | |
154 | } else { | |
155 | umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values))) | |
156 | min, max = int32(umin), int32(umax) | |
157 | } | |
158 | return | |
159 | } | |
160 | ||
161 | func (s *Int32Statistics) getMinMaxSpaced(values []int32, validBits []byte, validBitsOffset int64) (min, max int32) { | |
162 | min = s.defaultMin() | |
163 | max = s.defaultMax() | |
164 | var fn func([]int32) (int32, int32) | |
165 | if s.order == schema.SortSIGNED { | |
166 | fn = utils.GetMinMaxInt32 | |
167 | } else { | |
168 | fn = func(v []int32) (int32, int32) { | |
169 | umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values))) | |
170 | return int32(umin), int32(umax) | |
171 | } | |
172 | } | |
173 | ||
174 | if s.bitSetReader == nil { | |
175 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
176 | } else { | |
177 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
178 | } | |
179 | ||
180 | for { | |
181 | run := s.bitSetReader.NextRun() | |
182 | if run.Length == 0 { | |
183 | break | |
184 | } | |
185 | localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)]) | |
186 | if min > localMin { | |
187 | min = localMin | |
188 | } | |
189 | if max < localMax { | |
190 | max = localMax | |
191 | } | |
192 | } | |
193 | return | |
194 | } | |
195 | ||
196 | func (s *Int32Statistics) Min() int32 { return s.min } | |
197 | func (s *Int32Statistics) Max() int32 { return s.max } | |
198 | ||
199 | // Merge merges the stats from other into this stat object, updating | |
200 | // the null count, distinct count, number of values and the min/max if | |
201 | // appropriate. | |
202 | func (s *Int32Statistics) Merge(other TypedStatistics) { | |
203 | rhs, ok := other.(*Int32Statistics) | |
204 | if !ok { | |
205 | panic("incompatible stat type merge") | |
206 | } | |
207 | ||
208 | s.statistics.merge(rhs) | |
209 | if rhs.HasMinMax() { | |
210 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
211 | } | |
212 | } | |
213 | ||
214 | // Update is used to add more values to the current stat object, finding the | |
215 | // min and max values etc. | |
216 | func (s *Int32Statistics) Update(values []int32, numNull int64) { | |
217 | s.incNulls(numNull) | |
218 | s.nvalues += int64(len(values)) | |
219 | ||
220 | if len(values) == 0 { | |
221 | return | |
222 | } | |
223 | ||
224 | s.SetMinMax(s.getMinMax(values)) | |
225 | } | |
226 | ||
227 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
228 | // and skip null values. | |
229 | func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64) { | |
230 | s.incNulls(numNull) | |
231 | notnull := int64(len(values)) - numNull | |
232 | s.nvalues += notnull | |
233 | ||
234 | if notnull == 0 { | |
235 | return | |
236 | } | |
237 | ||
238 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
239 | } | |
240 | ||
241 | // SetMinMax updates the min and max values only if they are not currently set | |
242 | // or if argMin is less than the current min / argMax is greater than the current max | |
243 | func (s *Int32Statistics) SetMinMax(argMin, argMax int32) { | |
244 | maybeMinMax := s.cleanStat([2]int32{argMin, argMax}) | |
245 | if maybeMinMax == nil { | |
246 | return | |
247 | } | |
248 | ||
249 | min := (*maybeMinMax)[0] | |
250 | max := (*maybeMinMax)[1] | |
251 | ||
252 | if !s.hasMinMax { | |
253 | s.hasMinMax = true | |
254 | s.min = min | |
255 | s.max = max | |
256 | } else { | |
257 | if !s.less(s.min, min) { | |
258 | s.min = min | |
259 | } | |
260 | if s.less(s.max, max) { | |
261 | s.max = max | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | // EncodeMin returns the encoded min value with plain encoding. | |
267 | // | |
268 | // ByteArray stats do not include the length in the encoding. | |
269 | func (s *Int32Statistics) EncodeMin() []byte { | |
270 | if s.HasMinMax() { | |
271 | return s.plainEncode(s.min) | |
272 | } | |
273 | return nil | |
274 | } | |
275 | ||
276 | // EncodeMax returns the current encoded max value with plain encoding | |
277 | // | |
278 | // ByteArray stats do not include the length in the encoding | |
279 | func (s *Int32Statistics) EncodeMax() []byte { | |
280 | if s.HasMinMax() { | |
281 | return s.plainEncode(s.max) | |
282 | } | |
283 | return nil | |
284 | } | |
285 | ||
286 | // Encode returns a populated EncodedStatistics object | |
287 | func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error) { | |
288 | defer func() { | |
289 | if r := recover(); r != nil { | |
290 | switch r := r.(type) { | |
291 | case error: | |
292 | err = r | |
293 | case string: | |
294 | err = xerrors.New(r) | |
295 | default: | |
296 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
297 | } | |
298 | } | |
299 | }() | |
300 | if s.HasMinMax() { | |
301 | enc.SetMax(s.EncodeMax()) | |
302 | enc.SetMin(s.EncodeMin()) | |
303 | } | |
304 | if s.HasNullCount() { | |
305 | enc.SetNullCount(s.NullCount()) | |
306 | } | |
307 | if s.HasDistinctCount() { | |
308 | enc.SetDistinctCount(s.DistinctCount()) | |
309 | } | |
310 | return | |
311 | } | |
312 | ||
313 | type minmaxPairInt64 [2]int64 | |
314 | ||
315 | // Int64Statistics is the typed interface for managing stats for a column | |
316 | // of Int64 type. | |
317 | type Int64Statistics struct { | |
318 | statistics | |
319 | min int64 | |
320 | max int64 | |
321 | ||
322 | bitSetReader utils.SetBitRunReader | |
323 | } | |
324 | ||
325 | // NewInt64Statistics constructs an appropriate stat object type using the | |
326 | // given column descriptor and allocator. | |
327 | // | |
328 | // Panics if the physical type of descr is not parquet.Type.Int64 | |
329 | func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics { | |
330 | if descr.PhysicalType() != parquet.Types.Int64 { | |
331 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int64 stat object", descr.PhysicalType())) | |
332 | } | |
333 | ||
334 | return &Int64Statistics{ | |
335 | statistics: statistics{ | |
336 | descr: descr, | |
337 | hasNullCount: true, | |
338 | hasDistinctCount: true, | |
339 | order: descr.SortOrder(), | |
340 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
341 | mem: mem, | |
342 | }, | |
343 | } | |
344 | } | |
345 | ||
346 | // NewInt64StatisticsFromEncoded will construct a propertly typed statistics object | |
347 | // initializing it with the provided information. | |
348 | func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics { | |
349 | ret := NewInt64Statistics(descr, mem) | |
350 | ret.nvalues += nvalues | |
351 | if encoded.IsSetNullCount() { | |
352 | ret.incNulls(encoded.GetNullCount()) | |
353 | } | |
354 | if encoded.IsSetDistinctCount() { | |
355 | ret.incDistinct(encoded.GetDistinctCount()) | |
356 | } | |
357 | ||
358 | encodedMin := encoded.GetMin() | |
359 | if encodedMin != nil && len(encodedMin) > 0 { | |
360 | ret.min = ret.plainDecode(encodedMin) | |
361 | } | |
362 | encodedMax := encoded.GetMax() | |
363 | if encodedMax != nil && len(encodedMax) > 0 { | |
364 | ret.max = ret.plainDecode(encodedMax) | |
365 | } | |
366 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
367 | return ret | |
368 | } | |
369 | ||
370 | func (s *Int64Statistics) plainEncode(src int64) []byte { | |
371 | s.encoder.(encoding.Int64Encoder).Put([]int64{src}) | |
372 | buf, err := s.encoder.FlushValues() | |
373 | if err != nil { | |
374 | panic(err) // recovered by Encode | |
375 | } | |
376 | defer buf.Release() | |
377 | ||
378 | out := make([]byte, buf.Len()) | |
379 | copy(out, buf.Bytes()) | |
380 | return out | |
381 | } | |
382 | ||
383 | func (s *Int64Statistics) plainDecode(src []byte) int64 { | |
384 | var buf [1]int64 | |
385 | ||
386 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
387 | decoder.SetData(1, src) | |
388 | decoder.(encoding.Int64Decoder).Decode(buf[:]) | |
389 | return buf[0] | |
390 | } | |
391 | ||
392 | func (s *Int64Statistics) minval(a, b int64) int64 { | |
393 | if s.less(a, b) { | |
394 | return a | |
395 | } | |
396 | return b | |
397 | } | |
398 | ||
399 | func (s *Int64Statistics) maxval(a, b int64) int64 { | |
400 | if s.less(a, b) { | |
401 | return b | |
402 | } | |
403 | return a | |
404 | } | |
405 | ||
406 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
407 | func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool { | |
408 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
409 | } | |
410 | ||
411 | // Equals returns true only if both objects are the same type, have the same min and | |
412 | // max values, null count, distinct count and number of values. | |
413 | func (s *Int64Statistics) Equals(other TypedStatistics) bool { | |
414 | if s.Type() != other.Type() { | |
415 | return false | |
416 | } | |
417 | rhs, ok := other.(*Int64Statistics) | |
418 | if !ok { | |
419 | return false | |
420 | } | |
421 | ||
422 | if s.HasMinMax() != rhs.HasMinMax() { | |
423 | return false | |
424 | } | |
425 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
426 | s.NullCount() == rhs.NullCount() && | |
427 | s.DistinctCount() == rhs.DistinctCount() && | |
428 | s.NumValues() == rhs.NumValues() | |
429 | } | |
430 | ||
431 | func (s *Int64Statistics) getMinMax(values []int64) (min, max int64) { | |
432 | if s.order == schema.SortSIGNED { | |
433 | min, max = utils.GetMinMaxInt64(values) | |
434 | } else { | |
435 | umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values))) | |
436 | min, max = int64(umin), int64(umax) | |
437 | } | |
438 | return | |
439 | } | |
440 | ||
441 | func (s *Int64Statistics) getMinMaxSpaced(values []int64, validBits []byte, validBitsOffset int64) (min, max int64) { | |
442 | min = s.defaultMin() | |
443 | max = s.defaultMax() | |
444 | var fn func([]int64) (int64, int64) | |
445 | if s.order == schema.SortSIGNED { | |
446 | fn = utils.GetMinMaxInt64 | |
447 | } else { | |
448 | fn = func(v []int64) (int64, int64) { | |
449 | umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values))) | |
450 | return int64(umin), int64(umax) | |
451 | } | |
452 | } | |
453 | ||
454 | if s.bitSetReader == nil { | |
455 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
456 | } else { | |
457 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
458 | } | |
459 | ||
460 | for { | |
461 | run := s.bitSetReader.NextRun() | |
462 | if run.Length == 0 { | |
463 | break | |
464 | } | |
465 | localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)]) | |
466 | if min > localMin { | |
467 | min = localMin | |
468 | } | |
469 | if max < localMax { | |
470 | max = localMax | |
471 | } | |
472 | } | |
473 | return | |
474 | } | |
475 | ||
476 | func (s *Int64Statistics) Min() int64 { return s.min } | |
477 | func (s *Int64Statistics) Max() int64 { return s.max } | |
478 | ||
479 | // Merge merges the stats from other into this stat object, updating | |
480 | // the null count, distinct count, number of values and the min/max if | |
481 | // appropriate. | |
482 | func (s *Int64Statistics) Merge(other TypedStatistics) { | |
483 | rhs, ok := other.(*Int64Statistics) | |
484 | if !ok { | |
485 | panic("incompatible stat type merge") | |
486 | } | |
487 | ||
488 | s.statistics.merge(rhs) | |
489 | if rhs.HasMinMax() { | |
490 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
491 | } | |
492 | } | |
493 | ||
494 | // Update is used to add more values to the current stat object, finding the | |
495 | // min and max values etc. | |
496 | func (s *Int64Statistics) Update(values []int64, numNull int64) { | |
497 | s.incNulls(numNull) | |
498 | s.nvalues += int64(len(values)) | |
499 | ||
500 | if len(values) == 0 { | |
501 | return | |
502 | } | |
503 | ||
504 | s.SetMinMax(s.getMinMax(values)) | |
505 | } | |
506 | ||
507 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
508 | // and skip null values. | |
509 | func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64) { | |
510 | s.incNulls(numNull) | |
511 | notnull := int64(len(values)) - numNull | |
512 | s.nvalues += notnull | |
513 | ||
514 | if notnull == 0 { | |
515 | return | |
516 | } | |
517 | ||
518 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
519 | } | |
520 | ||
521 | // SetMinMax updates the min and max values only if they are not currently set | |
522 | // or if argMin is less than the current min / argMax is greater than the current max | |
523 | func (s *Int64Statistics) SetMinMax(argMin, argMax int64) { | |
524 | maybeMinMax := s.cleanStat([2]int64{argMin, argMax}) | |
525 | if maybeMinMax == nil { | |
526 | return | |
527 | } | |
528 | ||
529 | min := (*maybeMinMax)[0] | |
530 | max := (*maybeMinMax)[1] | |
531 | ||
532 | if !s.hasMinMax { | |
533 | s.hasMinMax = true | |
534 | s.min = min | |
535 | s.max = max | |
536 | } else { | |
537 | if !s.less(s.min, min) { | |
538 | s.min = min | |
539 | } | |
540 | if s.less(s.max, max) { | |
541 | s.max = max | |
542 | } | |
543 | } | |
544 | } | |
545 | ||
546 | // EncodeMin returns the encoded min value with plain encoding. | |
547 | // | |
548 | // ByteArray stats do not include the length in the encoding. | |
549 | func (s *Int64Statistics) EncodeMin() []byte { | |
550 | if s.HasMinMax() { | |
551 | return s.plainEncode(s.min) | |
552 | } | |
553 | return nil | |
554 | } | |
555 | ||
556 | // EncodeMax returns the current encoded max value with plain encoding | |
557 | // | |
558 | // ByteArray stats do not include the length in the encoding | |
559 | func (s *Int64Statistics) EncodeMax() []byte { | |
560 | if s.HasMinMax() { | |
561 | return s.plainEncode(s.max) | |
562 | } | |
563 | return nil | |
564 | } | |
565 | ||
566 | // Encode returns a populated EncodedStatistics object | |
567 | func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error) { | |
568 | defer func() { | |
569 | if r := recover(); r != nil { | |
570 | switch r := r.(type) { | |
571 | case error: | |
572 | err = r | |
573 | case string: | |
574 | err = xerrors.New(r) | |
575 | default: | |
576 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
577 | } | |
578 | } | |
579 | }() | |
580 | if s.HasMinMax() { | |
581 | enc.SetMax(s.EncodeMax()) | |
582 | enc.SetMin(s.EncodeMin()) | |
583 | } | |
584 | if s.HasNullCount() { | |
585 | enc.SetNullCount(s.NullCount()) | |
586 | } | |
587 | if s.HasDistinctCount() { | |
588 | enc.SetDistinctCount(s.DistinctCount()) | |
589 | } | |
590 | return | |
591 | } | |
592 | ||
593 | type minmaxPairInt96 [2]parquet.Int96 | |
594 | ||
595 | // Int96Statistics is the typed interface for managing stats for a column | |
596 | // of Int96 type. | |
597 | type Int96Statistics struct { | |
598 | statistics | |
599 | min parquet.Int96 | |
600 | max parquet.Int96 | |
601 | ||
602 | bitSetReader utils.SetBitRunReader | |
603 | } | |
604 | ||
605 | // NewInt96Statistics constructs an appropriate stat object type using the | |
606 | // given column descriptor and allocator. | |
607 | // | |
608 | // Panics if the physical type of descr is not parquet.Type.Int96 | |
609 | func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics { | |
610 | if descr.PhysicalType() != parquet.Types.Int96 { | |
611 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int96 stat object", descr.PhysicalType())) | |
612 | } | |
613 | ||
614 | return &Int96Statistics{ | |
615 | statistics: statistics{ | |
616 | descr: descr, | |
617 | hasNullCount: true, | |
618 | hasDistinctCount: true, | |
619 | order: descr.SortOrder(), | |
620 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
621 | mem: mem, | |
622 | }, | |
623 | } | |
624 | } | |
625 | ||
626 | // NewInt96StatisticsFromEncoded will construct a propertly typed statistics object | |
627 | // initializing it with the provided information. | |
628 | func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics { | |
629 | ret := NewInt96Statistics(descr, mem) | |
630 | ret.nvalues += nvalues | |
631 | if encoded.IsSetNullCount() { | |
632 | ret.incNulls(encoded.GetNullCount()) | |
633 | } | |
634 | if encoded.IsSetDistinctCount() { | |
635 | ret.incDistinct(encoded.GetDistinctCount()) | |
636 | } | |
637 | ||
638 | encodedMin := encoded.GetMin() | |
639 | if encodedMin != nil && len(encodedMin) > 0 { | |
640 | ret.min = ret.plainDecode(encodedMin) | |
641 | } | |
642 | encodedMax := encoded.GetMax() | |
643 | if encodedMax != nil && len(encodedMax) > 0 { | |
644 | ret.max = ret.plainDecode(encodedMax) | |
645 | } | |
646 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
647 | return ret | |
648 | } | |
649 | ||
650 | func (s *Int96Statistics) plainEncode(src parquet.Int96) []byte { | |
651 | s.encoder.(encoding.Int96Encoder).Put([]parquet.Int96{src}) | |
652 | buf, err := s.encoder.FlushValues() | |
653 | if err != nil { | |
654 | panic(err) // recovered by Encode | |
655 | } | |
656 | defer buf.Release() | |
657 | ||
658 | out := make([]byte, buf.Len()) | |
659 | copy(out, buf.Bytes()) | |
660 | return out | |
661 | } | |
662 | ||
663 | func (s *Int96Statistics) plainDecode(src []byte) parquet.Int96 { | |
664 | var buf [1]parquet.Int96 | |
665 | ||
666 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
667 | decoder.SetData(1, src) | |
668 | decoder.(encoding.Int96Decoder).Decode(buf[:]) | |
669 | return buf[0] | |
670 | } | |
671 | ||
672 | func (s *Int96Statistics) minval(a, b parquet.Int96) parquet.Int96 { | |
673 | if s.less(a, b) { | |
674 | return a | |
675 | } | |
676 | return b | |
677 | } | |
678 | ||
679 | func (s *Int96Statistics) maxval(a, b parquet.Int96) parquet.Int96 { | |
680 | if s.less(a, b) { | |
681 | return b | |
682 | } | |
683 | return a | |
684 | } | |
685 | ||
686 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
687 | func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool { | |
688 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
689 | } | |
690 | ||
691 | // Equals returns true only if both objects are the same type, have the same min and | |
692 | // max values, null count, distinct count and number of values. | |
693 | func (s *Int96Statistics) Equals(other TypedStatistics) bool { | |
694 | if s.Type() != other.Type() { | |
695 | return false | |
696 | } | |
697 | rhs, ok := other.(*Int96Statistics) | |
698 | if !ok { | |
699 | return false | |
700 | } | |
701 | ||
702 | if s.HasMinMax() != rhs.HasMinMax() { | |
703 | return false | |
704 | } | |
705 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
706 | s.NullCount() == rhs.NullCount() && | |
707 | s.DistinctCount() == rhs.DistinctCount() && | |
708 | s.NumValues() == rhs.NumValues() | |
709 | } | |
710 | ||
711 | func (s *Int96Statistics) getMinMax(values []parquet.Int96) (min, max parquet.Int96) { | |
712 | defMin := s.defaultMin() | |
713 | defMax := s.defaultMax() | |
714 | ||
715 | min = defMin | |
716 | max = defMax | |
717 | ||
718 | for _, v := range values { | |
719 | min = s.minval(min, v) | |
720 | max = s.maxval(max, v) | |
721 | } | |
722 | return | |
723 | } | |
724 | ||
725 | func (s *Int96Statistics) getMinMaxSpaced(values []parquet.Int96, validBits []byte, validBitsOffset int64) (min, max parquet.Int96) { | |
726 | min = s.defaultMin() | |
727 | max = s.defaultMax() | |
728 | ||
729 | if s.bitSetReader == nil { | |
730 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
731 | } else { | |
732 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
733 | } | |
734 | ||
735 | for { | |
736 | run := s.bitSetReader.NextRun() | |
737 | if run.Length == 0 { | |
738 | break | |
739 | } | |
740 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
741 | min = s.minval(min, v) | |
742 | max = s.maxval(max, v) | |
743 | } | |
744 | } | |
745 | return | |
746 | } | |
747 | ||
748 | func (s *Int96Statistics) Min() parquet.Int96 { return s.min } | |
749 | func (s *Int96Statistics) Max() parquet.Int96 { return s.max } | |
750 | ||
751 | // Merge merges the stats from other into this stat object, updating | |
752 | // the null count, distinct count, number of values and the min/max if | |
753 | // appropriate. | |
754 | func (s *Int96Statistics) Merge(other TypedStatistics) { | |
755 | rhs, ok := other.(*Int96Statistics) | |
756 | if !ok { | |
757 | panic("incompatible stat type merge") | |
758 | } | |
759 | ||
760 | s.statistics.merge(rhs) | |
761 | if rhs.HasMinMax() { | |
762 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
763 | } | |
764 | } | |
765 | ||
766 | // Update is used to add more values to the current stat object, finding the | |
767 | // min and max values etc. | |
768 | func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64) { | |
769 | s.incNulls(numNull) | |
770 | s.nvalues += int64(len(values)) | |
771 | ||
772 | if len(values) == 0 { | |
773 | return | |
774 | } | |
775 | ||
776 | s.SetMinMax(s.getMinMax(values)) | |
777 | } | |
778 | ||
779 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
780 | // and skip null values. | |
781 | func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64) { | |
782 | s.incNulls(numNull) | |
783 | notnull := int64(len(values)) - numNull | |
784 | s.nvalues += notnull | |
785 | ||
786 | if notnull == 0 { | |
787 | return | |
788 | } | |
789 | ||
790 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
791 | } | |
792 | ||
793 | // SetMinMax updates the min and max values only if they are not currently set | |
794 | // or if argMin is less than the current min / argMax is greater than the current max | |
795 | func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96) { | |
796 | maybeMinMax := s.cleanStat([2]parquet.Int96{argMin, argMax}) | |
797 | if maybeMinMax == nil { | |
798 | return | |
799 | } | |
800 | ||
801 | min := (*maybeMinMax)[0] | |
802 | max := (*maybeMinMax)[1] | |
803 | ||
804 | if !s.hasMinMax { | |
805 | s.hasMinMax = true | |
806 | s.min = min | |
807 | s.max = max | |
808 | } else { | |
809 | if !s.less(s.min, min) { | |
810 | s.min = min | |
811 | } | |
812 | if s.less(s.max, max) { | |
813 | s.max = max | |
814 | } | |
815 | } | |
816 | } | |
817 | ||
818 | // EncodeMin returns the encoded min value with plain encoding. | |
819 | // | |
820 | // ByteArray stats do not include the length in the encoding. | |
821 | func (s *Int96Statistics) EncodeMin() []byte { | |
822 | if s.HasMinMax() { | |
823 | return s.plainEncode(s.min) | |
824 | } | |
825 | return nil | |
826 | } | |
827 | ||
828 | // EncodeMax returns the current encoded max value with plain encoding | |
829 | // | |
830 | // ByteArray stats do not include the length in the encoding | |
831 | func (s *Int96Statistics) EncodeMax() []byte { | |
832 | if s.HasMinMax() { | |
833 | return s.plainEncode(s.max) | |
834 | } | |
835 | return nil | |
836 | } | |
837 | ||
838 | // Encode returns a populated EncodedStatistics object | |
839 | func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error) { | |
840 | defer func() { | |
841 | if r := recover(); r != nil { | |
842 | switch r := r.(type) { | |
843 | case error: | |
844 | err = r | |
845 | case string: | |
846 | err = xerrors.New(r) | |
847 | default: | |
848 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
849 | } | |
850 | } | |
851 | }() | |
852 | if s.HasMinMax() { | |
853 | enc.SetMax(s.EncodeMax()) | |
854 | enc.SetMin(s.EncodeMin()) | |
855 | } | |
856 | if s.HasNullCount() { | |
857 | enc.SetNullCount(s.NullCount()) | |
858 | } | |
859 | if s.HasDistinctCount() { | |
860 | enc.SetDistinctCount(s.DistinctCount()) | |
861 | } | |
862 | return | |
863 | } | |
864 | ||
865 | type minmaxPairFloat32 [2]float32 | |
866 | ||
867 | // Float32Statistics is the typed interface for managing stats for a column | |
868 | // of Float32 type. | |
869 | type Float32Statistics struct { | |
870 | statistics | |
871 | min float32 | |
872 | max float32 | |
873 | ||
874 | bitSetReader utils.SetBitRunReader | |
875 | } | |
876 | ||
877 | // NewFloat32Statistics constructs an appropriate stat object type using the | |
878 | // given column descriptor and allocator. | |
879 | // | |
880 | // Panics if the physical type of descr is not parquet.Type.Float | |
881 | func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics { | |
882 | if descr.PhysicalType() != parquet.Types.Float { | |
883 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float32 stat object", descr.PhysicalType())) | |
884 | } | |
885 | ||
886 | return &Float32Statistics{ | |
887 | statistics: statistics{ | |
888 | descr: descr, | |
889 | hasNullCount: true, | |
890 | hasDistinctCount: true, | |
891 | order: descr.SortOrder(), | |
892 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
893 | mem: mem, | |
894 | }, | |
895 | } | |
896 | } | |
897 | ||
898 | // NewFloat32StatisticsFromEncoded will construct a propertly typed statistics object | |
899 | // initializing it with the provided information. | |
900 | func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics { | |
901 | ret := NewFloat32Statistics(descr, mem) | |
902 | ret.nvalues += nvalues | |
903 | if encoded.IsSetNullCount() { | |
904 | ret.incNulls(encoded.GetNullCount()) | |
905 | } | |
906 | if encoded.IsSetDistinctCount() { | |
907 | ret.incDistinct(encoded.GetDistinctCount()) | |
908 | } | |
909 | ||
910 | encodedMin := encoded.GetMin() | |
911 | if encodedMin != nil && len(encodedMin) > 0 { | |
912 | ret.min = ret.plainDecode(encodedMin) | |
913 | } | |
914 | encodedMax := encoded.GetMax() | |
915 | if encodedMax != nil && len(encodedMax) > 0 { | |
916 | ret.max = ret.plainDecode(encodedMax) | |
917 | } | |
918 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
919 | return ret | |
920 | } | |
921 | ||
922 | func (s *Float32Statistics) plainEncode(src float32) []byte { | |
923 | s.encoder.(encoding.Float32Encoder).Put([]float32{src}) | |
924 | buf, err := s.encoder.FlushValues() | |
925 | if err != nil { | |
926 | panic(err) // recovered by Encode | |
927 | } | |
928 | defer buf.Release() | |
929 | ||
930 | out := make([]byte, buf.Len()) | |
931 | copy(out, buf.Bytes()) | |
932 | return out | |
933 | } | |
934 | ||
935 | func (s *Float32Statistics) plainDecode(src []byte) float32 { | |
936 | var buf [1]float32 | |
937 | ||
938 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
939 | decoder.SetData(1, src) | |
940 | decoder.(encoding.Float32Decoder).Decode(buf[:]) | |
941 | return buf[0] | |
942 | } | |
943 | ||
944 | func (s *Float32Statistics) minval(a, b float32) float32 { | |
945 | if s.less(a, b) { | |
946 | return a | |
947 | } | |
948 | return b | |
949 | } | |
950 | ||
951 | func (s *Float32Statistics) maxval(a, b float32) float32 { | |
952 | if s.less(a, b) { | |
953 | return b | |
954 | } | |
955 | return a | |
956 | } | |
957 | ||
958 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
959 | func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool { | |
960 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
961 | } | |
962 | ||
963 | // Equals returns true only if both objects are the same type, have the same min and | |
964 | // max values, null count, distinct count and number of values. | |
965 | func (s *Float32Statistics) Equals(other TypedStatistics) bool { | |
966 | if s.Type() != other.Type() { | |
967 | return false | |
968 | } | |
969 | rhs, ok := other.(*Float32Statistics) | |
970 | if !ok { | |
971 | return false | |
972 | } | |
973 | ||
974 | if s.HasMinMax() != rhs.HasMinMax() { | |
975 | return false | |
976 | } | |
977 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
978 | s.NullCount() == rhs.NullCount() && | |
979 | s.DistinctCount() == rhs.DistinctCount() && | |
980 | s.NumValues() == rhs.NumValues() | |
981 | } | |
982 | ||
983 | func (s *Float32Statistics) coalesce(val, fallback float32) float32 { | |
984 | if math.IsNaN(float64(val)) { | |
985 | return fallback | |
986 | } | |
987 | return val | |
988 | } | |
989 | ||
990 | func (s *Float32Statistics) getMinMax(values []float32) (min, max float32) { | |
991 | defMin := s.defaultMin() | |
992 | defMax := s.defaultMax() | |
993 | ||
994 | min = defMin | |
995 | max = defMax | |
996 | ||
997 | for _, v := range values { | |
998 | min = s.minval(min, s.coalesce(v, defMin)) | |
999 | max = s.maxval(max, s.coalesce(v, defMax)) | |
1000 | } | |
1001 | return | |
1002 | } | |
1003 | ||
1004 | func (s *Float32Statistics) getMinMaxSpaced(values []float32, validBits []byte, validBitsOffset int64) (min, max float32) { | |
1005 | min = s.defaultMin() | |
1006 | max = s.defaultMax() | |
1007 | ||
1008 | if s.bitSetReader == nil { | |
1009 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
1010 | } else { | |
1011 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
1012 | } | |
1013 | ||
1014 | for { | |
1015 | run := s.bitSetReader.NextRun() | |
1016 | if run.Length == 0 { | |
1017 | break | |
1018 | } | |
1019 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
1020 | min = s.minval(min, coalesce(v, s.defaultMin()).(float32)) | |
1021 | max = s.maxval(max, coalesce(v, s.defaultMax()).(float32)) | |
1022 | } | |
1023 | } | |
1024 | return | |
1025 | } | |
1026 | ||
1027 | func (s *Float32Statistics) Min() float32 { return s.min } | |
1028 | func (s *Float32Statistics) Max() float32 { return s.max } | |
1029 | ||
1030 | // Merge merges the stats from other into this stat object, updating | |
1031 | // the null count, distinct count, number of values and the min/max if | |
1032 | // appropriate. | |
1033 | func (s *Float32Statistics) Merge(other TypedStatistics) { | |
1034 | rhs, ok := other.(*Float32Statistics) | |
1035 | if !ok { | |
1036 | panic("incompatible stat type merge") | |
1037 | } | |
1038 | ||
1039 | s.statistics.merge(rhs) | |
1040 | if rhs.HasMinMax() { | |
1041 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
1042 | } | |
1043 | } | |
1044 | ||
1045 | // Update is used to add more values to the current stat object, finding the | |
1046 | // min and max values etc. | |
1047 | func (s *Float32Statistics) Update(values []float32, numNull int64) { | |
1048 | s.incNulls(numNull) | |
1049 | s.nvalues += int64(len(values)) | |
1050 | ||
1051 | if len(values) == 0 { | |
1052 | return | |
1053 | } | |
1054 | ||
1055 | s.SetMinMax(s.getMinMax(values)) | |
1056 | } | |
1057 | ||
1058 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
1059 | // and skip null values. | |
1060 | func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64) { | |
1061 | s.incNulls(numNull) | |
1062 | notnull := int64(len(values)) - numNull | |
1063 | s.nvalues += notnull | |
1064 | ||
1065 | if notnull == 0 { | |
1066 | return | |
1067 | } | |
1068 | ||
1069 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
1070 | } | |
1071 | ||
1072 | // SetMinMax updates the min and max values only if they are not currently set | |
1073 | // or if argMin is less than the current min / argMax is greater than the current max | |
1074 | func (s *Float32Statistics) SetMinMax(argMin, argMax float32) { | |
1075 | maybeMinMax := s.cleanStat([2]float32{argMin, argMax}) | |
1076 | if maybeMinMax == nil { | |
1077 | return | |
1078 | } | |
1079 | ||
1080 | min := (*maybeMinMax)[0] | |
1081 | max := (*maybeMinMax)[1] | |
1082 | ||
1083 | if !s.hasMinMax { | |
1084 | s.hasMinMax = true | |
1085 | s.min = min | |
1086 | s.max = max | |
1087 | } else { | |
1088 | if !s.less(s.min, min) { | |
1089 | s.min = min | |
1090 | } | |
1091 | if s.less(s.max, max) { | |
1092 | s.max = max | |
1093 | } | |
1094 | } | |
1095 | } | |
1096 | ||
1097 | // EncodeMin returns the encoded min value with plain encoding. | |
1098 | // | |
1099 | // ByteArray stats do not include the length in the encoding. | |
1100 | func (s *Float32Statistics) EncodeMin() []byte { | |
1101 | if s.HasMinMax() { | |
1102 | return s.plainEncode(s.min) | |
1103 | } | |
1104 | return nil | |
1105 | } | |
1106 | ||
1107 | // EncodeMax returns the current encoded max value with plain encoding | |
1108 | // | |
1109 | // ByteArray stats do not include the length in the encoding | |
1110 | func (s *Float32Statistics) EncodeMax() []byte { | |
1111 | if s.HasMinMax() { | |
1112 | return s.plainEncode(s.max) | |
1113 | } | |
1114 | return nil | |
1115 | } | |
1116 | ||
1117 | // Encode returns a populated EncodedStatistics object | |
1118 | func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error) { | |
1119 | defer func() { | |
1120 | if r := recover(); r != nil { | |
1121 | switch r := r.(type) { | |
1122 | case error: | |
1123 | err = r | |
1124 | case string: | |
1125 | err = xerrors.New(r) | |
1126 | default: | |
1127 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
1128 | } | |
1129 | } | |
1130 | }() | |
1131 | if s.HasMinMax() { | |
1132 | enc.SetMax(s.EncodeMax()) | |
1133 | enc.SetMin(s.EncodeMin()) | |
1134 | } | |
1135 | if s.HasNullCount() { | |
1136 | enc.SetNullCount(s.NullCount()) | |
1137 | } | |
1138 | if s.HasDistinctCount() { | |
1139 | enc.SetDistinctCount(s.DistinctCount()) | |
1140 | } | |
1141 | return | |
1142 | } | |
1143 | ||
1144 | type minmaxPairFloat64 [2]float64 | |
1145 | ||
1146 | // Float64Statistics is the typed interface for managing stats for a column | |
1147 | // of Float64 type. | |
1148 | type Float64Statistics struct { | |
1149 | statistics | |
1150 | min float64 | |
1151 | max float64 | |
1152 | ||
1153 | bitSetReader utils.SetBitRunReader | |
1154 | } | |
1155 | ||
1156 | // NewFloat64Statistics constructs an appropriate stat object type using the | |
1157 | // given column descriptor and allocator. | |
1158 | // | |
1159 | // Panics if the physical type of descr is not parquet.Type.Double | |
1160 | func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics { | |
1161 | if descr.PhysicalType() != parquet.Types.Double { | |
1162 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float64 stat object", descr.PhysicalType())) | |
1163 | } | |
1164 | ||
1165 | return &Float64Statistics{ | |
1166 | statistics: statistics{ | |
1167 | descr: descr, | |
1168 | hasNullCount: true, | |
1169 | hasDistinctCount: true, | |
1170 | order: descr.SortOrder(), | |
1171 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
1172 | mem: mem, | |
1173 | }, | |
1174 | } | |
1175 | } | |
1176 | ||
1177 | // NewFloat64StatisticsFromEncoded will construct a propertly typed statistics object | |
1178 | // initializing it with the provided information. | |
1179 | func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics { | |
1180 | ret := NewFloat64Statistics(descr, mem) | |
1181 | ret.nvalues += nvalues | |
1182 | if encoded.IsSetNullCount() { | |
1183 | ret.incNulls(encoded.GetNullCount()) | |
1184 | } | |
1185 | if encoded.IsSetDistinctCount() { | |
1186 | ret.incDistinct(encoded.GetDistinctCount()) | |
1187 | } | |
1188 | ||
1189 | encodedMin := encoded.GetMin() | |
1190 | if encodedMin != nil && len(encodedMin) > 0 { | |
1191 | ret.min = ret.plainDecode(encodedMin) | |
1192 | } | |
1193 | encodedMax := encoded.GetMax() | |
1194 | if encodedMax != nil && len(encodedMax) > 0 { | |
1195 | ret.max = ret.plainDecode(encodedMax) | |
1196 | } | |
1197 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
1198 | return ret | |
1199 | } | |
1200 | ||
1201 | func (s *Float64Statistics) plainEncode(src float64) []byte { | |
1202 | s.encoder.(encoding.Float64Encoder).Put([]float64{src}) | |
1203 | buf, err := s.encoder.FlushValues() | |
1204 | if err != nil { | |
1205 | panic(err) // recovered by Encode | |
1206 | } | |
1207 | defer buf.Release() | |
1208 | ||
1209 | out := make([]byte, buf.Len()) | |
1210 | copy(out, buf.Bytes()) | |
1211 | return out | |
1212 | } | |
1213 | ||
1214 | func (s *Float64Statistics) plainDecode(src []byte) float64 { | |
1215 | var buf [1]float64 | |
1216 | ||
1217 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
1218 | decoder.SetData(1, src) | |
1219 | decoder.(encoding.Float64Decoder).Decode(buf[:]) | |
1220 | return buf[0] | |
1221 | } | |
1222 | ||
1223 | func (s *Float64Statistics) minval(a, b float64) float64 { | |
1224 | if s.less(a, b) { | |
1225 | return a | |
1226 | } | |
1227 | return b | |
1228 | } | |
1229 | ||
1230 | func (s *Float64Statistics) maxval(a, b float64) float64 { | |
1231 | if s.less(a, b) { | |
1232 | return b | |
1233 | } | |
1234 | return a | |
1235 | } | |
1236 | ||
1237 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
1238 | func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool { | |
1239 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
1240 | } | |
1241 | ||
1242 | // Equals returns true only if both objects are the same type, have the same min and | |
1243 | // max values, null count, distinct count and number of values. | |
1244 | func (s *Float64Statistics) Equals(other TypedStatistics) bool { | |
1245 | if s.Type() != other.Type() { | |
1246 | return false | |
1247 | } | |
1248 | rhs, ok := other.(*Float64Statistics) | |
1249 | if !ok { | |
1250 | return false | |
1251 | } | |
1252 | ||
1253 | if s.HasMinMax() != rhs.HasMinMax() { | |
1254 | return false | |
1255 | } | |
1256 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
1257 | s.NullCount() == rhs.NullCount() && | |
1258 | s.DistinctCount() == rhs.DistinctCount() && | |
1259 | s.NumValues() == rhs.NumValues() | |
1260 | } | |
1261 | ||
1262 | func (s *Float64Statistics) coalesce(val, fallback float64) float64 { | |
1263 | if math.IsNaN(float64(val)) { | |
1264 | return fallback | |
1265 | } | |
1266 | return val | |
1267 | } | |
1268 | ||
1269 | func (s *Float64Statistics) getMinMax(values []float64) (min, max float64) { | |
1270 | defMin := s.defaultMin() | |
1271 | defMax := s.defaultMax() | |
1272 | ||
1273 | min = defMin | |
1274 | max = defMax | |
1275 | ||
1276 | for _, v := range values { | |
1277 | min = s.minval(min, s.coalesce(v, defMin)) | |
1278 | max = s.maxval(max, s.coalesce(v, defMax)) | |
1279 | } | |
1280 | return | |
1281 | } | |
1282 | ||
1283 | func (s *Float64Statistics) getMinMaxSpaced(values []float64, validBits []byte, validBitsOffset int64) (min, max float64) { | |
1284 | min = s.defaultMin() | |
1285 | max = s.defaultMax() | |
1286 | ||
1287 | if s.bitSetReader == nil { | |
1288 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
1289 | } else { | |
1290 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
1291 | } | |
1292 | ||
1293 | for { | |
1294 | run := s.bitSetReader.NextRun() | |
1295 | if run.Length == 0 { | |
1296 | break | |
1297 | } | |
1298 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
1299 | min = s.minval(min, coalesce(v, s.defaultMin()).(float64)) | |
1300 | max = s.maxval(max, coalesce(v, s.defaultMax()).(float64)) | |
1301 | } | |
1302 | } | |
1303 | return | |
1304 | } | |
1305 | ||
1306 | func (s *Float64Statistics) Min() float64 { return s.min } | |
1307 | func (s *Float64Statistics) Max() float64 { return s.max } | |
1308 | ||
1309 | // Merge merges the stats from other into this stat object, updating | |
1310 | // the null count, distinct count, number of values and the min/max if | |
1311 | // appropriate. | |
1312 | func (s *Float64Statistics) Merge(other TypedStatistics) { | |
1313 | rhs, ok := other.(*Float64Statistics) | |
1314 | if !ok { | |
1315 | panic("incompatible stat type merge") | |
1316 | } | |
1317 | ||
1318 | s.statistics.merge(rhs) | |
1319 | if rhs.HasMinMax() { | |
1320 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
1321 | } | |
1322 | } | |
1323 | ||
1324 | // Update is used to add more values to the current stat object, finding the | |
1325 | // min and max values etc. | |
1326 | func (s *Float64Statistics) Update(values []float64, numNull int64) { | |
1327 | s.incNulls(numNull) | |
1328 | s.nvalues += int64(len(values)) | |
1329 | ||
1330 | if len(values) == 0 { | |
1331 | return | |
1332 | } | |
1333 | ||
1334 | s.SetMinMax(s.getMinMax(values)) | |
1335 | } | |
1336 | ||
1337 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
1338 | // and skip null values. | |
1339 | func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64) { | |
1340 | s.incNulls(numNull) | |
1341 | notnull := int64(len(values)) - numNull | |
1342 | s.nvalues += notnull | |
1343 | ||
1344 | if notnull == 0 { | |
1345 | return | |
1346 | } | |
1347 | ||
1348 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
1349 | } | |
1350 | ||
1351 | // SetMinMax updates the min and max values only if they are not currently set | |
1352 | // or if argMin is less than the current min / argMax is greater than the current max | |
1353 | func (s *Float64Statistics) SetMinMax(argMin, argMax float64) { | |
1354 | maybeMinMax := s.cleanStat([2]float64{argMin, argMax}) | |
1355 | if maybeMinMax == nil { | |
1356 | return | |
1357 | } | |
1358 | ||
1359 | min := (*maybeMinMax)[0] | |
1360 | max := (*maybeMinMax)[1] | |
1361 | ||
1362 | if !s.hasMinMax { | |
1363 | s.hasMinMax = true | |
1364 | s.min = min | |
1365 | s.max = max | |
1366 | } else { | |
1367 | if !s.less(s.min, min) { | |
1368 | s.min = min | |
1369 | } | |
1370 | if s.less(s.max, max) { | |
1371 | s.max = max | |
1372 | } | |
1373 | } | |
1374 | } | |
1375 | ||
1376 | // EncodeMin returns the encoded min value with plain encoding. | |
1377 | // | |
1378 | // ByteArray stats do not include the length in the encoding. | |
1379 | func (s *Float64Statistics) EncodeMin() []byte { | |
1380 | if s.HasMinMax() { | |
1381 | return s.plainEncode(s.min) | |
1382 | } | |
1383 | return nil | |
1384 | } | |
1385 | ||
1386 | // EncodeMax returns the current encoded max value with plain encoding | |
1387 | // | |
1388 | // ByteArray stats do not include the length in the encoding | |
1389 | func (s *Float64Statistics) EncodeMax() []byte { | |
1390 | if s.HasMinMax() { | |
1391 | return s.plainEncode(s.max) | |
1392 | } | |
1393 | return nil | |
1394 | } | |
1395 | ||
1396 | // Encode returns a populated EncodedStatistics object | |
1397 | func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error) { | |
1398 | defer func() { | |
1399 | if r := recover(); r != nil { | |
1400 | switch r := r.(type) { | |
1401 | case error: | |
1402 | err = r | |
1403 | case string: | |
1404 | err = xerrors.New(r) | |
1405 | default: | |
1406 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
1407 | } | |
1408 | } | |
1409 | }() | |
1410 | if s.HasMinMax() { | |
1411 | enc.SetMax(s.EncodeMax()) | |
1412 | enc.SetMin(s.EncodeMin()) | |
1413 | } | |
1414 | if s.HasNullCount() { | |
1415 | enc.SetNullCount(s.NullCount()) | |
1416 | } | |
1417 | if s.HasDistinctCount() { | |
1418 | enc.SetDistinctCount(s.DistinctCount()) | |
1419 | } | |
1420 | return | |
1421 | } | |
1422 | ||
1423 | type minmaxPairBoolean [2]bool | |
1424 | ||
1425 | // BooleanStatistics is the typed interface for managing stats for a column | |
1426 | // of Boolean type. | |
1427 | type BooleanStatistics struct { | |
1428 | statistics | |
1429 | min bool | |
1430 | max bool | |
1431 | ||
1432 | bitSetReader utils.SetBitRunReader | |
1433 | } | |
1434 | ||
1435 | // NewBooleanStatistics constructs an appropriate stat object type using the | |
1436 | // given column descriptor and allocator. | |
1437 | // | |
1438 | // Panics if the physical type of descr is not parquet.Type.Boolean | |
1439 | func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics { | |
1440 | if descr.PhysicalType() != parquet.Types.Boolean { | |
1441 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a Boolean stat object", descr.PhysicalType())) | |
1442 | } | |
1443 | ||
1444 | return &BooleanStatistics{ | |
1445 | statistics: statistics{ | |
1446 | descr: descr, | |
1447 | hasNullCount: true, | |
1448 | hasDistinctCount: true, | |
1449 | order: descr.SortOrder(), | |
1450 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
1451 | mem: mem, | |
1452 | }, | |
1453 | } | |
1454 | } | |
1455 | ||
1456 | // NewBooleanStatisticsFromEncoded will construct a propertly typed statistics object | |
1457 | // initializing it with the provided information. | |
1458 | func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics { | |
1459 | ret := NewBooleanStatistics(descr, mem) | |
1460 | ret.nvalues += nvalues | |
1461 | if encoded.IsSetNullCount() { | |
1462 | ret.incNulls(encoded.GetNullCount()) | |
1463 | } | |
1464 | if encoded.IsSetDistinctCount() { | |
1465 | ret.incDistinct(encoded.GetDistinctCount()) | |
1466 | } | |
1467 | ||
1468 | encodedMin := encoded.GetMin() | |
1469 | if encodedMin != nil && len(encodedMin) > 0 { | |
1470 | ret.min = ret.plainDecode(encodedMin) | |
1471 | } | |
1472 | encodedMax := encoded.GetMax() | |
1473 | if encodedMax != nil && len(encodedMax) > 0 { | |
1474 | ret.max = ret.plainDecode(encodedMax) | |
1475 | } | |
1476 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
1477 | return ret | |
1478 | } | |
1479 | ||
1480 | func (s *BooleanStatistics) plainEncode(src bool) []byte { | |
1481 | s.encoder.(encoding.BooleanEncoder).Put([]bool{src}) | |
1482 | buf, err := s.encoder.FlushValues() | |
1483 | if err != nil { | |
1484 | panic(err) // recovered by Encode | |
1485 | } | |
1486 | defer buf.Release() | |
1487 | ||
1488 | out := make([]byte, buf.Len()) | |
1489 | copy(out, buf.Bytes()) | |
1490 | return out | |
1491 | } | |
1492 | ||
1493 | func (s *BooleanStatistics) plainDecode(src []byte) bool { | |
1494 | var buf [1]bool | |
1495 | ||
1496 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
1497 | decoder.SetData(1, src) | |
1498 | decoder.(encoding.BooleanDecoder).Decode(buf[:]) | |
1499 | return buf[0] | |
1500 | } | |
1501 | ||
1502 | func (s *BooleanStatistics) minval(a, b bool) bool { | |
1503 | if s.less(a, b) { | |
1504 | return a | |
1505 | } | |
1506 | return b | |
1507 | } | |
1508 | ||
1509 | func (s *BooleanStatistics) maxval(a, b bool) bool { | |
1510 | if s.less(a, b) { | |
1511 | return b | |
1512 | } | |
1513 | return a | |
1514 | } | |
1515 | ||
1516 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
1517 | func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool { | |
1518 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
1519 | } | |
1520 | ||
1521 | // Equals returns true only if both objects are the same type, have the same min and | |
1522 | // max values, null count, distinct count and number of values. | |
1523 | func (s *BooleanStatistics) Equals(other TypedStatistics) bool { | |
1524 | if s.Type() != other.Type() { | |
1525 | return false | |
1526 | } | |
1527 | rhs, ok := other.(*BooleanStatistics) | |
1528 | if !ok { | |
1529 | return false | |
1530 | } | |
1531 | ||
1532 | if s.HasMinMax() != rhs.HasMinMax() { | |
1533 | return false | |
1534 | } | |
1535 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
1536 | s.NullCount() == rhs.NullCount() && | |
1537 | s.DistinctCount() == rhs.DistinctCount() && | |
1538 | s.NumValues() == rhs.NumValues() | |
1539 | } | |
1540 | ||
1541 | func (s *BooleanStatistics) getMinMax(values []bool) (min, max bool) { | |
1542 | defMin := s.defaultMin() | |
1543 | defMax := s.defaultMax() | |
1544 | ||
1545 | min = defMin | |
1546 | max = defMax | |
1547 | ||
1548 | for _, v := range values { | |
1549 | min = s.minval(min, v) | |
1550 | max = s.maxval(max, v) | |
1551 | } | |
1552 | return | |
1553 | } | |
1554 | ||
1555 | func (s *BooleanStatistics) getMinMaxSpaced(values []bool, validBits []byte, validBitsOffset int64) (min, max bool) { | |
1556 | min = s.defaultMin() | |
1557 | max = s.defaultMax() | |
1558 | ||
1559 | if s.bitSetReader == nil { | |
1560 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
1561 | } else { | |
1562 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
1563 | } | |
1564 | ||
1565 | for { | |
1566 | run := s.bitSetReader.NextRun() | |
1567 | if run.Length == 0 { | |
1568 | break | |
1569 | } | |
1570 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
1571 | min = s.minval(min, v) | |
1572 | max = s.maxval(max, v) | |
1573 | } | |
1574 | } | |
1575 | return | |
1576 | } | |
1577 | ||
1578 | func (s *BooleanStatistics) Min() bool { return s.min } | |
1579 | func (s *BooleanStatistics) Max() bool { return s.max } | |
1580 | ||
1581 | // Merge merges the stats from other into this stat object, updating | |
1582 | // the null count, distinct count, number of values and the min/max if | |
1583 | // appropriate. | |
1584 | func (s *BooleanStatistics) Merge(other TypedStatistics) { | |
1585 | rhs, ok := other.(*BooleanStatistics) | |
1586 | if !ok { | |
1587 | panic("incompatible stat type merge") | |
1588 | } | |
1589 | ||
1590 | s.statistics.merge(rhs) | |
1591 | if rhs.HasMinMax() { | |
1592 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
1593 | } | |
1594 | } | |
1595 | ||
1596 | // Update is used to add more values to the current stat object, finding the | |
1597 | // min and max values etc. | |
1598 | func (s *BooleanStatistics) Update(values []bool, numNull int64) { | |
1599 | s.incNulls(numNull) | |
1600 | s.nvalues += int64(len(values)) | |
1601 | ||
1602 | if len(values) == 0 { | |
1603 | return | |
1604 | } | |
1605 | ||
1606 | s.SetMinMax(s.getMinMax(values)) | |
1607 | } | |
1608 | ||
1609 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
1610 | // and skip null values. | |
1611 | func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64) { | |
1612 | s.incNulls(numNull) | |
1613 | notnull := int64(len(values)) - numNull | |
1614 | s.nvalues += notnull | |
1615 | ||
1616 | if notnull == 0 { | |
1617 | return | |
1618 | } | |
1619 | ||
1620 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
1621 | } | |
1622 | ||
1623 | // SetMinMax updates the min and max values only if they are not currently set | |
1624 | // or if argMin is less than the current min / argMax is greater than the current max | |
1625 | func (s *BooleanStatistics) SetMinMax(argMin, argMax bool) { | |
1626 | maybeMinMax := s.cleanStat([2]bool{argMin, argMax}) | |
1627 | if maybeMinMax == nil { | |
1628 | return | |
1629 | } | |
1630 | ||
1631 | min := (*maybeMinMax)[0] | |
1632 | max := (*maybeMinMax)[1] | |
1633 | ||
1634 | if !s.hasMinMax { | |
1635 | s.hasMinMax = true | |
1636 | s.min = min | |
1637 | s.max = max | |
1638 | } else { | |
1639 | if !s.less(s.min, min) { | |
1640 | s.min = min | |
1641 | } | |
1642 | if s.less(s.max, max) { | |
1643 | s.max = max | |
1644 | } | |
1645 | } | |
1646 | } | |
1647 | ||
1648 | // EncodeMin returns the encoded min value with plain encoding. | |
1649 | // | |
1650 | // ByteArray stats do not include the length in the encoding. | |
1651 | func (s *BooleanStatistics) EncodeMin() []byte { | |
1652 | if s.HasMinMax() { | |
1653 | return s.plainEncode(s.min) | |
1654 | } | |
1655 | return nil | |
1656 | } | |
1657 | ||
1658 | // EncodeMax returns the current encoded max value with plain encoding | |
1659 | // | |
1660 | // ByteArray stats do not include the length in the encoding | |
1661 | func (s *BooleanStatistics) EncodeMax() []byte { | |
1662 | if s.HasMinMax() { | |
1663 | return s.plainEncode(s.max) | |
1664 | } | |
1665 | return nil | |
1666 | } | |
1667 | ||
1668 | // Encode returns a populated EncodedStatistics object | |
1669 | func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error) { | |
1670 | defer func() { | |
1671 | if r := recover(); r != nil { | |
1672 | switch r := r.(type) { | |
1673 | case error: | |
1674 | err = r | |
1675 | case string: | |
1676 | err = xerrors.New(r) | |
1677 | default: | |
1678 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
1679 | } | |
1680 | } | |
1681 | }() | |
1682 | if s.HasMinMax() { | |
1683 | enc.SetMax(s.EncodeMax()) | |
1684 | enc.SetMin(s.EncodeMin()) | |
1685 | } | |
1686 | if s.HasNullCount() { | |
1687 | enc.SetNullCount(s.NullCount()) | |
1688 | } | |
1689 | if s.HasDistinctCount() { | |
1690 | enc.SetDistinctCount(s.DistinctCount()) | |
1691 | } | |
1692 | return | |
1693 | } | |
1694 | ||
1695 | type minmaxPairByteArray [2]parquet.ByteArray | |
1696 | ||
1697 | // ByteArrayStatistics is the typed interface for managing stats for a column | |
1698 | // of ByteArray type. | |
1699 | type ByteArrayStatistics struct { | |
1700 | statistics | |
1701 | min parquet.ByteArray | |
1702 | max parquet.ByteArray | |
1703 | ||
1704 | bitSetReader utils.SetBitRunReader | |
1705 | } | |
1706 | ||
1707 | // NewByteArrayStatistics constructs an appropriate stat object type using the | |
1708 | // given column descriptor and allocator. | |
1709 | // | |
1710 | // Panics if the physical type of descr is not parquet.Type.ByteArray | |
1711 | func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics { | |
1712 | if descr.PhysicalType() != parquet.Types.ByteArray { | |
1713 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a ByteArray stat object", descr.PhysicalType())) | |
1714 | } | |
1715 | ||
1716 | return &ByteArrayStatistics{ | |
1717 | statistics: statistics{ | |
1718 | descr: descr, | |
1719 | hasNullCount: true, | |
1720 | hasDistinctCount: true, | |
1721 | order: descr.SortOrder(), | |
1722 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
1723 | mem: mem, | |
1724 | }, | |
1725 | ||
1726 | min: make([]byte, 0), | |
1727 | max: make([]byte, 0), | |
1728 | } | |
1729 | } | |
1730 | ||
1731 | // NewByteArrayStatisticsFromEncoded will construct a propertly typed statistics object | |
1732 | // initializing it with the provided information. | |
1733 | func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics { | |
1734 | ret := NewByteArrayStatistics(descr, mem) | |
1735 | ret.nvalues += nvalues | |
1736 | if encoded.IsSetNullCount() { | |
1737 | ret.incNulls(encoded.GetNullCount()) | |
1738 | } | |
1739 | if encoded.IsSetDistinctCount() { | |
1740 | ret.incDistinct(encoded.GetDistinctCount()) | |
1741 | } | |
1742 | ||
1743 | encodedMin := encoded.GetMin() | |
1744 | if encodedMin != nil && len(encodedMin) > 0 { | |
1745 | ret.min = ret.plainDecode(encodedMin) | |
1746 | } | |
1747 | encodedMax := encoded.GetMax() | |
1748 | if encodedMax != nil && len(encodedMax) > 0 { | |
1749 | ret.max = ret.plainDecode(encodedMax) | |
1750 | } | |
1751 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
1752 | return ret | |
1753 | } | |
1754 | ||
1755 | func (s *ByteArrayStatistics) plainEncode(src parquet.ByteArray) []byte { | |
1756 | return src | |
1757 | } | |
1758 | ||
1759 | func (s *ByteArrayStatistics) plainDecode(src []byte) parquet.ByteArray { | |
1760 | return src | |
1761 | } | |
1762 | ||
1763 | func (s *ByteArrayStatistics) minval(a, b parquet.ByteArray) parquet.ByteArray { | |
1764 | switch { | |
1765 | case a == nil: | |
1766 | return b | |
1767 | case b == nil: | |
1768 | return a | |
1769 | case s.less(a, b): | |
1770 | return a | |
1771 | default: | |
1772 | return b | |
1773 | } | |
1774 | } | |
1775 | ||
1776 | func (s *ByteArrayStatistics) maxval(a, b parquet.ByteArray) parquet.ByteArray { | |
1777 | switch { | |
1778 | case a == nil: | |
1779 | return b | |
1780 | case b == nil: | |
1781 | return a | |
1782 | case s.less(a, b): | |
1783 | return b | |
1784 | default: | |
1785 | return a | |
1786 | } | |
1787 | } | |
1788 | ||
1789 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
1790 | func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool { | |
1791 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
1792 | } | |
1793 | ||
1794 | // Equals returns true only if both objects are the same type, have the same min and | |
1795 | // max values, null count, distinct count and number of values. | |
1796 | func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool { | |
1797 | if s.Type() != other.Type() { | |
1798 | return false | |
1799 | } | |
1800 | rhs, ok := other.(*ByteArrayStatistics) | |
1801 | if !ok { | |
1802 | return false | |
1803 | } | |
1804 | ||
1805 | if s.HasMinMax() != rhs.HasMinMax() { | |
1806 | return false | |
1807 | } | |
1808 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
1809 | s.NullCount() == rhs.NullCount() && | |
1810 | s.DistinctCount() == rhs.DistinctCount() && | |
1811 | s.NumValues() == rhs.NumValues() | |
1812 | } | |
1813 | ||
1814 | func (s *ByteArrayStatistics) getMinMax(values []parquet.ByteArray) (min, max parquet.ByteArray) { | |
1815 | defMin := s.defaultMin() | |
1816 | defMax := s.defaultMax() | |
1817 | ||
1818 | min = defMin | |
1819 | max = defMax | |
1820 | ||
1821 | for _, v := range values { | |
1822 | min = s.minval(min, v) | |
1823 | max = s.maxval(max, v) | |
1824 | } | |
1825 | return | |
1826 | } | |
1827 | ||
1828 | func (s *ByteArrayStatistics) getMinMaxSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.ByteArray) { | |
1829 | min = s.defaultMin() | |
1830 | max = s.defaultMax() | |
1831 | ||
1832 | if s.bitSetReader == nil { | |
1833 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
1834 | } else { | |
1835 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
1836 | } | |
1837 | ||
1838 | for { | |
1839 | run := s.bitSetReader.NextRun() | |
1840 | if run.Length == 0 { | |
1841 | break | |
1842 | } | |
1843 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
1844 | min = s.minval(min, v) | |
1845 | max = s.maxval(max, v) | |
1846 | } | |
1847 | } | |
1848 | return | |
1849 | } | |
1850 | ||
1851 | func (s *ByteArrayStatistics) Min() parquet.ByteArray { return s.min } | |
1852 | func (s *ByteArrayStatistics) Max() parquet.ByteArray { return s.max } | |
1853 | ||
1854 | // Merge merges the stats from other into this stat object, updating | |
1855 | // the null count, distinct count, number of values and the min/max if | |
1856 | // appropriate. | |
1857 | func (s *ByteArrayStatistics) Merge(other TypedStatistics) { | |
1858 | rhs, ok := other.(*ByteArrayStatistics) | |
1859 | if !ok { | |
1860 | panic("incompatible stat type merge") | |
1861 | } | |
1862 | ||
1863 | s.statistics.merge(rhs) | |
1864 | if rhs.HasMinMax() { | |
1865 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
1866 | } | |
1867 | } | |
1868 | ||
1869 | // Update is used to add more values to the current stat object, finding the | |
1870 | // min and max values etc. | |
1871 | func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64) { | |
1872 | s.incNulls(numNull) | |
1873 | s.nvalues += int64(len(values)) | |
1874 | ||
1875 | if len(values) == 0 { | |
1876 | return | |
1877 | } | |
1878 | ||
1879 | s.SetMinMax(s.getMinMax(values)) | |
1880 | } | |
1881 | ||
1882 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
1883 | // and skip null values. | |
1884 | func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64) { | |
1885 | s.incNulls(numNull) | |
1886 | notnull := int64(len(values)) - numNull | |
1887 | s.nvalues += notnull | |
1888 | ||
1889 | if notnull == 0 { | |
1890 | return | |
1891 | } | |
1892 | ||
1893 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
1894 | } | |
1895 | ||
1896 | // SetMinMax updates the min and max values only if they are not currently set | |
1897 | // or if argMin is less than the current min / argMax is greater than the current max | |
1898 | func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray) { | |
1899 | maybeMinMax := s.cleanStat([2]parquet.ByteArray{argMin, argMax}) | |
1900 | if maybeMinMax == nil { | |
1901 | return | |
1902 | } | |
1903 | ||
1904 | min := (*maybeMinMax)[0] | |
1905 | max := (*maybeMinMax)[1] | |
1906 | ||
1907 | if !s.hasMinMax { | |
1908 | s.hasMinMax = true | |
1909 | s.min = min | |
1910 | s.max = max | |
1911 | } else { | |
1912 | if !s.less(s.min, min) { | |
1913 | s.min = min | |
1914 | } | |
1915 | if s.less(s.max, max) { | |
1916 | s.max = max | |
1917 | } | |
1918 | } | |
1919 | } | |
1920 | ||
1921 | // EncodeMin returns the encoded min value with plain encoding. | |
1922 | // | |
1923 | // ByteArray stats do not include the length in the encoding. | |
1924 | func (s *ByteArrayStatistics) EncodeMin() []byte { | |
1925 | if s.HasMinMax() { | |
1926 | return s.plainEncode(s.min) | |
1927 | } | |
1928 | return nil | |
1929 | } | |
1930 | ||
1931 | // EncodeMax returns the current encoded max value with plain encoding | |
1932 | // | |
1933 | // ByteArray stats do not include the length in the encoding | |
1934 | func (s *ByteArrayStatistics) EncodeMax() []byte { | |
1935 | if s.HasMinMax() { | |
1936 | return s.plainEncode(s.max) | |
1937 | } | |
1938 | return nil | |
1939 | } | |
1940 | ||
1941 | // Encode returns a populated EncodedStatistics object | |
1942 | func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error) { | |
1943 | defer func() { | |
1944 | if r := recover(); r != nil { | |
1945 | switch r := r.(type) { | |
1946 | case error: | |
1947 | err = r | |
1948 | case string: | |
1949 | err = xerrors.New(r) | |
1950 | default: | |
1951 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
1952 | } | |
1953 | } | |
1954 | }() | |
1955 | if s.HasMinMax() { | |
1956 | enc.SetMax(s.EncodeMax()) | |
1957 | enc.SetMin(s.EncodeMin()) | |
1958 | } | |
1959 | if s.HasNullCount() { | |
1960 | enc.SetNullCount(s.NullCount()) | |
1961 | } | |
1962 | if s.HasDistinctCount() { | |
1963 | enc.SetDistinctCount(s.DistinctCount()) | |
1964 | } | |
1965 | return | |
1966 | } | |
1967 | ||
1968 | type minmaxPairFixedLenByteArray [2]parquet.FixedLenByteArray | |
1969 | ||
1970 | // FixedLenByteArrayStatistics is the typed interface for managing stats for a column | |
1971 | // of FixedLenByteArray type. | |
1972 | type FixedLenByteArrayStatistics struct { | |
1973 | statistics | |
1974 | min parquet.FixedLenByteArray | |
1975 | max parquet.FixedLenByteArray | |
1976 | ||
1977 | bitSetReader utils.SetBitRunReader | |
1978 | } | |
1979 | ||
1980 | // NewFixedLenByteArrayStatistics constructs an appropriate stat object type using the | |
1981 | // given column descriptor and allocator. | |
1982 | // | |
1983 | // Panics if the physical type of descr is not parquet.Type.FixedLenByteArray | |
1984 | func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics { | |
1985 | if descr.PhysicalType() != parquet.Types.FixedLenByteArray { | |
1986 | panic(xerrors.Errorf("parquet: invalid type %s for constructing a FixedLenByteArray stat object", descr.PhysicalType())) | |
1987 | } | |
1988 | ||
1989 | return &FixedLenByteArrayStatistics{ | |
1990 | statistics: statistics{ | |
1991 | descr: descr, | |
1992 | hasNullCount: true, | |
1993 | hasDistinctCount: true, | |
1994 | order: descr.SortOrder(), | |
1995 | encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), | |
1996 | mem: mem, | |
1997 | }, | |
1998 | } | |
1999 | } | |
2000 | ||
2001 | // NewFixedLenByteArrayStatisticsFromEncoded will construct a propertly typed statistics object | |
2002 | // initializing it with the provided information. | |
2003 | func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics { | |
2004 | ret := NewFixedLenByteArrayStatistics(descr, mem) | |
2005 | ret.nvalues += nvalues | |
2006 | if encoded.IsSetNullCount() { | |
2007 | ret.incNulls(encoded.GetNullCount()) | |
2008 | } | |
2009 | if encoded.IsSetDistinctCount() { | |
2010 | ret.incDistinct(encoded.GetDistinctCount()) | |
2011 | } | |
2012 | ||
2013 | encodedMin := encoded.GetMin() | |
2014 | if encodedMin != nil && len(encodedMin) > 0 { | |
2015 | ret.min = ret.plainDecode(encodedMin) | |
2016 | } | |
2017 | encodedMax := encoded.GetMax() | |
2018 | if encodedMax != nil && len(encodedMax) > 0 { | |
2019 | ret.max = ret.plainDecode(encodedMax) | |
2020 | } | |
2021 | ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() | |
2022 | return ret | |
2023 | } | |
2024 | ||
2025 | func (s *FixedLenByteArrayStatistics) plainEncode(src parquet.FixedLenByteArray) []byte { | |
2026 | s.encoder.(encoding.FixedLenByteArrayEncoder).Put([]parquet.FixedLenByteArray{src}) | |
2027 | buf, err := s.encoder.FlushValues() | |
2028 | if err != nil { | |
2029 | panic(err) // recovered by Encode | |
2030 | } | |
2031 | defer buf.Release() | |
2032 | ||
2033 | out := make([]byte, buf.Len()) | |
2034 | copy(out, buf.Bytes()) | |
2035 | return out | |
2036 | } | |
2037 | ||
2038 | func (s *FixedLenByteArrayStatistics) plainDecode(src []byte) parquet.FixedLenByteArray { | |
2039 | var buf [1]parquet.FixedLenByteArray | |
2040 | ||
2041 | decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) | |
2042 | decoder.SetData(1, src) | |
2043 | decoder.(encoding.FixedLenByteArrayDecoder).Decode(buf[:]) | |
2044 | return buf[0] | |
2045 | } | |
2046 | ||
2047 | func (s *FixedLenByteArrayStatistics) minval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray { | |
2048 | switch { | |
2049 | case a == nil: | |
2050 | return b | |
2051 | case b == nil: | |
2052 | return a | |
2053 | case s.less(a, b): | |
2054 | return a | |
2055 | default: | |
2056 | return b | |
2057 | } | |
2058 | } | |
2059 | ||
2060 | func (s *FixedLenByteArrayStatistics) maxval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray { | |
2061 | switch { | |
2062 | case a == nil: | |
2063 | return b | |
2064 | case b == nil: | |
2065 | return a | |
2066 | case s.less(a, b): | |
2067 | return b | |
2068 | default: | |
2069 | return a | |
2070 | } | |
2071 | } | |
2072 | ||
2073 | // MinMaxEqual returns true if both stat objects have the same Min and Max values | |
2074 | func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool { | |
2075 | return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) | |
2076 | } | |
2077 | ||
2078 | // Equals returns true only if both objects are the same type, have the same min and | |
2079 | // max values, null count, distinct count and number of values. | |
2080 | func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool { | |
2081 | if s.Type() != other.Type() { | |
2082 | return false | |
2083 | } | |
2084 | rhs, ok := other.(*FixedLenByteArrayStatistics) | |
2085 | if !ok { | |
2086 | return false | |
2087 | } | |
2088 | ||
2089 | if s.HasMinMax() != rhs.HasMinMax() { | |
2090 | return false | |
2091 | } | |
2092 | return (s.hasMinMax && s.MinMaxEqual(rhs)) && | |
2093 | s.NullCount() == rhs.NullCount() && | |
2094 | s.DistinctCount() == rhs.DistinctCount() && | |
2095 | s.NumValues() == rhs.NumValues() | |
2096 | } | |
2097 | ||
2098 | func (s *FixedLenByteArrayStatistics) getMinMax(values []parquet.FixedLenByteArray) (min, max parquet.FixedLenByteArray) { | |
2099 | defMin := s.defaultMin() | |
2100 | defMax := s.defaultMax() | |
2101 | ||
2102 | min = defMin | |
2103 | max = defMax | |
2104 | ||
2105 | for _, v := range values { | |
2106 | min = s.minval(min, v) | |
2107 | max = s.maxval(max, v) | |
2108 | } | |
2109 | return | |
2110 | } | |
2111 | ||
2112 | func (s *FixedLenByteArrayStatistics) getMinMaxSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.FixedLenByteArray) { | |
2113 | min = s.defaultMin() | |
2114 | max = s.defaultMax() | |
2115 | ||
2116 | if s.bitSetReader == nil { | |
2117 | s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) | |
2118 | } else { | |
2119 | s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) | |
2120 | } | |
2121 | ||
2122 | for { | |
2123 | run := s.bitSetReader.NextRun() | |
2124 | if run.Length == 0 { | |
2125 | break | |
2126 | } | |
2127 | for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { | |
2128 | min = s.minval(min, v) | |
2129 | max = s.maxval(max, v) | |
2130 | } | |
2131 | } | |
2132 | return | |
2133 | } | |
2134 | ||
2135 | func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray { return s.min } | |
2136 | func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray { return s.max } | |
2137 | ||
2138 | // Merge merges the stats from other into this stat object, updating | |
2139 | // the null count, distinct count, number of values and the min/max if | |
2140 | // appropriate. | |
2141 | func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics) { | |
2142 | rhs, ok := other.(*FixedLenByteArrayStatistics) | |
2143 | if !ok { | |
2144 | panic("incompatible stat type merge") | |
2145 | } | |
2146 | ||
2147 | s.statistics.merge(rhs) | |
2148 | if rhs.HasMinMax() { | |
2149 | s.SetMinMax(rhs.Min(), rhs.Max()) | |
2150 | } | |
2151 | } | |
2152 | ||
2153 | // Update is used to add more values to the current stat object, finding the | |
2154 | // min and max values etc. | |
2155 | func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64) { | |
2156 | s.incNulls(numNull) | |
2157 | s.nvalues += int64(len(values)) | |
2158 | ||
2159 | if len(values) == 0 { | |
2160 | return | |
2161 | } | |
2162 | ||
2163 | s.SetMinMax(s.getMinMax(values)) | |
2164 | } | |
2165 | ||
2166 | // UpdateSpaced is just like Update, but for spaced values using validBits to determine | |
2167 | // and skip null values. | |
2168 | func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64) { | |
2169 | s.incNulls(numNull) | |
2170 | notnull := int64(len(values)) - numNull | |
2171 | s.nvalues += notnull | |
2172 | ||
2173 | if notnull == 0 { | |
2174 | return | |
2175 | } | |
2176 | ||
2177 | s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) | |
2178 | } | |
2179 | ||
2180 | // SetMinMax updates the min and max values only if they are not currently set | |
2181 | // or if argMin is less than the current min / argMax is greater than the current max | |
2182 | func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray) { | |
2183 | maybeMinMax := s.cleanStat([2]parquet.FixedLenByteArray{argMin, argMax}) | |
2184 | if maybeMinMax == nil { | |
2185 | return | |
2186 | } | |
2187 | ||
2188 | min := (*maybeMinMax)[0] | |
2189 | max := (*maybeMinMax)[1] | |
2190 | ||
2191 | if !s.hasMinMax { | |
2192 | s.hasMinMax = true | |
2193 | s.min = min | |
2194 | s.max = max | |
2195 | } else { | |
2196 | if !s.less(s.min, min) { | |
2197 | s.min = min | |
2198 | } | |
2199 | if s.less(s.max, max) { | |
2200 | s.max = max | |
2201 | } | |
2202 | } | |
2203 | } | |
2204 | ||
2205 | // EncodeMin returns the encoded min value with plain encoding. | |
2206 | // | |
2207 | // ByteArray stats do not include the length in the encoding. | |
2208 | func (s *FixedLenByteArrayStatistics) EncodeMin() []byte { | |
2209 | if s.HasMinMax() { | |
2210 | return s.plainEncode(s.min) | |
2211 | } | |
2212 | return nil | |
2213 | } | |
2214 | ||
2215 | // EncodeMax returns the current encoded max value with plain encoding | |
2216 | // | |
2217 | // ByteArray stats do not include the length in the encoding | |
2218 | func (s *FixedLenByteArrayStatistics) EncodeMax() []byte { | |
2219 | if s.HasMinMax() { | |
2220 | return s.plainEncode(s.max) | |
2221 | } | |
2222 | return nil | |
2223 | } | |
2224 | ||
2225 | // Encode returns a populated EncodedStatistics object | |
2226 | func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error) { | |
2227 | defer func() { | |
2228 | if r := recover(); r != nil { | |
2229 | switch r := r.(type) { | |
2230 | case error: | |
2231 | err = r | |
2232 | case string: | |
2233 | err = xerrors.New(r) | |
2234 | default: | |
2235 | err = xerrors.Errorf("unknown error type thrown from panic: %v", r) | |
2236 | } | |
2237 | } | |
2238 | }() | |
2239 | if s.HasMinMax() { | |
2240 | enc.SetMax(s.EncodeMax()) | |
2241 | enc.SetMin(s.EncodeMin()) | |
2242 | } | |
2243 | if s.HasNullCount() { | |
2244 | enc.SetNullCount(s.NullCount()) | |
2245 | } | |
2246 | if s.HasDistinctCount() { | |
2247 | enc.SetDistinctCount(s.DistinctCount()) | |
2248 | } | |
2249 | return | |
2250 | } | |
2251 | ||
2252 | // NewStatistics uses the type in the column descriptor to construct the appropriate | |
2253 | // typed stats object. If mem is nil, then memory.DefaultAllocator will be used. | |
2254 | func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics { | |
2255 | if mem == nil { | |
2256 | mem = memory.DefaultAllocator | |
2257 | } | |
2258 | switch descr.PhysicalType() { | |
2259 | case parquet.Types.Int32: | |
2260 | return NewInt32Statistics(descr, mem) | |
2261 | case parquet.Types.Int64: | |
2262 | return NewInt64Statistics(descr, mem) | |
2263 | case parquet.Types.Int96: | |
2264 | return NewInt96Statistics(descr, mem) | |
2265 | case parquet.Types.Float: | |
2266 | return NewFloat32Statistics(descr, mem) | |
2267 | case parquet.Types.Double: | |
2268 | return NewFloat64Statistics(descr, mem) | |
2269 | case parquet.Types.Boolean: | |
2270 | return NewBooleanStatistics(descr, mem) | |
2271 | case parquet.Types.ByteArray: | |
2272 | return NewByteArrayStatistics(descr, mem) | |
2273 | case parquet.Types.FixedLenByteArray: | |
2274 | return NewFixedLenByteArrayStatistics(descr, mem) | |
2275 | default: | |
2276 | panic("not implemented") | |
2277 | } | |
2278 | } | |
2279 | ||
2280 | // NewStatisticsFromEncoded uses the provided information to initialize a typed stat object | |
2281 | // by checking the type of the provided column descriptor. | |
2282 | // | |
2283 | // If mem is nil, then memory.DefaultAllocator is used. | |
2284 | func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics { | |
2285 | if mem == nil { | |
2286 | mem = memory.DefaultAllocator | |
2287 | } | |
2288 | switch descr.PhysicalType() { | |
2289 | case parquet.Types.Int32: | |
2290 | return NewInt32StatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2291 | case parquet.Types.Int64: | |
2292 | return NewInt64StatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2293 | case parquet.Types.Int96: | |
2294 | return NewInt96StatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2295 | case parquet.Types.Float: | |
2296 | return NewFloat32StatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2297 | case parquet.Types.Double: | |
2298 | return NewFloat64StatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2299 | case parquet.Types.Boolean: | |
2300 | return NewBooleanStatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2301 | case parquet.Types.ByteArray: | |
2302 | return NewByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2303 | case parquet.Types.FixedLenByteArray: | |
2304 | return NewFixedLenByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded) | |
2305 | default: | |
2306 | panic("not implemented") | |
2307 | } | |
2308 | } |