]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/parquet/internal/encoding/boolean_encoder.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / internal / encoding / boolean_encoder.go
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package encoding
18
19import (
20 "github.com/apache/arrow/go/v6/arrow/bitutil"
21 "github.com/apache/arrow/go/v6/parquet"
22 "github.com/apache/arrow/go/v6/parquet/internal/utils"
23)
24
25const (
26 boolBufSize = 1024
27 boolsInBuf = boolBufSize * 8
28)
29
30// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
31type PlainBooleanEncoder struct {
32 encoder
33 bitsBuffer []byte
34 wr utils.BitmapWriter
35}
36
37// Type for the PlainBooleanEncoder is parquet.Types.Boolean
38func (PlainBooleanEncoder) Type() parquet.Type {
39 return parquet.Types.Boolean
40}
41
42// Put encodes the contents of in into the underlying data buffer.
43func (enc *PlainBooleanEncoder) Put(in []bool) {
44 if enc.bitsBuffer == nil {
45 enc.bitsBuffer = make([]byte, boolBufSize)
46 }
47 if enc.wr == nil {
48 enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
49 }
50
51 n := enc.wr.AppendBools(in)
52 for n < len(in) {
53 enc.wr.Finish()
54 enc.append(enc.bitsBuffer)
55 enc.wr.Reset(0, boolsInBuf)
56 in = in[n:]
57 n = enc.wr.AppendBools(in)
58 }
59}
60
61// PutSpaced will use the validBits bitmap to determine which values are nulls
62// and can be left out from the slice, and the encoded without those nulls.
63func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
64 bufferOut := make([]bool, len(in))
65 nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
66 enc.Put(bufferOut[:nvalid])
67}
68
69// EstimatedDataEncodedSize returns the current number of bytes that have
70// been buffered so far
71func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
72 return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos()))))
73}
74
75// FlushValues returns the buffered data, the responsibility is on the caller
76// to release the buffer memory
77func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) {
78 if enc.wr.Pos() > 0 {
79 toFlush := int(enc.wr.Pos())
80 enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
81 }
82
83 return enc.sink.Finish(), nil
84}