]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, software | |
12 | // distributed under the License is distributed on an "AS IS" BASIS, | |
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | // See the License for the specific language governing permissions and | |
15 | // limitations under the License. | |
16 | ||
17 | package encoding | |
18 | ||
19 | import ( | |
20 | "github.com/apache/arrow/go/v6/arrow/bitutil" | |
21 | "github.com/apache/arrow/go/v6/parquet" | |
22 | "github.com/apache/arrow/go/v6/parquet/internal/utils" | |
23 | ) | |
24 | ||
25 | const ( | |
26 | boolBufSize = 1024 | |
27 | boolsInBuf = boolBufSize * 8 | |
28 | ) | |
29 | ||
30 | // PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding | |
31 | type PlainBooleanEncoder struct { | |
32 | encoder | |
33 | bitsBuffer []byte | |
34 | wr utils.BitmapWriter | |
35 | } | |
36 | ||
37 | // Type for the PlainBooleanEncoder is parquet.Types.Boolean | |
38 | func (PlainBooleanEncoder) Type() parquet.Type { | |
39 | return parquet.Types.Boolean | |
40 | } | |
41 | ||
42 | // Put encodes the contents of in into the underlying data buffer. | |
43 | func (enc *PlainBooleanEncoder) Put(in []bool) { | |
44 | if enc.bitsBuffer == nil { | |
45 | enc.bitsBuffer = make([]byte, boolBufSize) | |
46 | } | |
47 | if enc.wr == nil { | |
48 | enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf) | |
49 | } | |
50 | ||
51 | n := enc.wr.AppendBools(in) | |
52 | for n < len(in) { | |
53 | enc.wr.Finish() | |
54 | enc.append(enc.bitsBuffer) | |
55 | enc.wr.Reset(0, boolsInBuf) | |
56 | in = in[n:] | |
57 | n = enc.wr.AppendBools(in) | |
58 | } | |
59 | } | |
60 | ||
61 | // PutSpaced will use the validBits bitmap to determine which values are nulls | |
62 | // and can be left out from the slice, and the encoded without those nulls. | |
63 | func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) { | |
64 | bufferOut := make([]bool, len(in)) | |
65 | nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset) | |
66 | enc.Put(bufferOut[:nvalid]) | |
67 | } | |
68 | ||
69 | // EstimatedDataEncodedSize returns the current number of bytes that have | |
70 | // been buffered so far | |
71 | func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 { | |
72 | return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos())))) | |
73 | } | |
74 | ||
75 | // FlushValues returns the buffered data, the responsibility is on the caller | |
76 | // to release the buffer memory | |
77 | func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) { | |
78 | if enc.wr.Pos() > 0 { | |
79 | toFlush := int(enc.wr.Pos()) | |
80 | enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))]) | |
81 | } | |
82 | ||
83 | return enc.sink.Finish(), nil | |
84 | } |