]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/parquet/internal/utils/bitmap_writer.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / internal / utils / bitmap_writer.go
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16
17 package utils
18
19 import (
20 "encoding/binary"
21 "math/bits"
22
23 "github.com/apache/arrow/go/v6/arrow/bitutil"
24 )
25
26 // BitmapWriter is an interface for bitmap writers so that we can use multiple
27 // implementations or swap if necessary.
28 type BitmapWriter interface {
29 // Set sets the current bit that will be written
30 Set()
31 // Clear clears the current bit that will be written
32 Clear()
33 // Next advances to the next bit for the writer
34 Next()
35 // Finish flushes the current byte out to the bitmap slice
36 Finish()
37 // AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap.
38 AppendWord(word uint64, nbits int64)
39 // AppendBools appends the bit representation of the bools slice, returning the number
40 // of bools that were able to fit in the remaining length of the bitmapwriter.
41 AppendBools(in []bool) int
42 // Pos is the current position that will be written next
43 Pos() int
44 // Reset allows reusing the bitmapwriter by resetting Pos to start with length as
45 // the number of bits that the writer can write.
46 Reset(start, length int)
47 }
48
49 type bitmapWriter struct {
50 *bitutil.BitmapWriter
51 }
52
53 func NewBitmapWriter(bitmap []byte, start, length int) BitmapWriter {
54 return &bitmapWriter{bitutil.NewBitmapWriter(bitmap, start, length)}
55 }
56
57 func (b *bitmapWriter) AppendWord(uint64, int64) {
58 panic("unimplemented")
59 }
60
61 type firstTimeBitmapWriter struct {
62 buf []byte
63 pos int64
64 length int64
65
66 curByte uint8
67 bitMask uint8
68 byteOffset int64
69 }
70
71 // NewFirstTimeBitmapWriter creates a bitmap writer that might clobber any bit values
72 // following the bits written to the bitmap, as such it is faster than the bitmapwriter
73 // that is created with NewBitmapWriter
74 func NewFirstTimeBitmapWriter(buf []byte, start, length int64) BitmapWriter {
75 ret := &firstTimeBitmapWriter{
76 buf: buf,
77 byteOffset: start / 8,
78 bitMask: bitutil.BitMask[start%8],
79 length: length,
80 }
81 if length > 0 {
82 ret.curByte = ret.buf[int(ret.byteOffset)] & bitutil.PrecedingBitmask[start%8]
83 }
84 return ret
85 }
86
87 var endianBuffer [8]byte
88
89 func (bw *firstTimeBitmapWriter) Reset(start, length int) {
90 bw.pos = 0
91 bw.byteOffset = int64(start / 8)
92 bw.bitMask = bitutil.BitMask[start%8]
93 bw.length = int64(length)
94 if length > 0 {
95 bw.curByte = bw.buf[int(bw.byteOffset)] & bitutil.PrecedingBitmask[start%8]
96 }
97 }
98
99 func (bw *firstTimeBitmapWriter) Pos() int { return int(bw.pos) }
100 func (bw *firstTimeBitmapWriter) AppendWord(word uint64, nbits int64) {
101 if nbits == 0 {
102 return
103 }
104
105 // location that the first byte needs to be written to for appending
106 appslice := bw.buf[int(bw.byteOffset):]
107
108 // update everything but curByte
109 bw.pos += nbits
110 bitOffset := bits.TrailingZeros32(uint32(bw.bitMask))
111 bw.bitMask = bitutil.BitMask[(int64(bitOffset)+nbits)%8]
112 bw.byteOffset += (int64(bitOffset) + nbits) / 8
113
114 if bitOffset != 0 {
115 // we're in the middle of the byte. Update the byte and shift bits appropriately
116 // so we can just copy the bytes.
117 carry := 8 - bitOffset
118 // Carry over bits from word to curByte. We assume any extra bits in word are unset
119 // so no additional accounting is needed for when nbits < carry
120 bw.curByte |= uint8((word & uint64(bitutil.PrecedingBitmask[carry])) << bitOffset)
121 // check everything was transferred to curByte
122 if nbits < int64(carry) {
123 return
124 }
125 appslice[0] = bw.curByte
126 appslice = appslice[1:]
127 // move the carry bits off of word
128 word = word >> carry
129 nbits -= int64(carry)
130 }
131 bytesForWord := bitutil.BytesForBits(nbits)
132 binary.LittleEndian.PutUint64(endianBuffer[:], word)
133 copy(appslice, endianBuffer[:bytesForWord])
134
135 // at this point, the previous curByte has been written, the new curByte
136 // is either the last relevant byte in word or cleared if the new position
137 // is byte aligned (ie. a fresh byte)
138 if bw.bitMask == 0x1 {
139 bw.curByte = 0
140 } else {
141 bw.curByte = appslice[bytesForWord-1]
142 }
143 }
144
145 func (bw *firstTimeBitmapWriter) Set() {
146 bw.curByte |= bw.bitMask
147 }
148
149 func (bw *firstTimeBitmapWriter) Clear() {}
150
151 func (bw *firstTimeBitmapWriter) Next() {
152 bw.bitMask = uint8(bw.bitMask << 1)
153 bw.pos++
154 if bw.bitMask == 0 {
155 // byte finished, advance to the next one
156 bw.bitMask = 0x1
157 bw.buf[int(bw.byteOffset)] = bw.curByte
158 bw.byteOffset++
159 bw.curByte = 0
160 }
161 }
162
163 func (b *firstTimeBitmapWriter) AppendBools(in []bool) int {
164 panic("Append Bools not yet implemented for firstTimeBitmapWriter")
165 }
166
167 func (bw *firstTimeBitmapWriter) Finish() {
168 // store curByte into the bitmap
169 if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length {
170 bw.buf[int(bw.byteOffset)] = bw.curByte
171 }
172 }
173
174 func (bw *firstTimeBitmapWriter) Position() int64 { return bw.pos }