]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/arrow/bitutil/bitmaps.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / bitutil / bitmaps.go
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16
17 package bitutil
18
19 import (
20 "math/bits"
21 "unsafe"
22
23 "github.com/apache/arrow/go/v6/arrow/endian"
24 "github.com/apache/arrow/go/v6/arrow/internal/debug"
25 )
26
27 // BitmapReader is a simple bitmap reader for a byte slice.
28 type BitmapReader struct {
29 bitmap []byte
30 pos int
31 len int
32
33 current byte
34 byteOffset int
35 bitOffset int
36 }
37
38 // NewBitmapReader creates and returns a new bitmap reader for the given bitmap
39 func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader {
40 curbyte := byte(0)
41 if length > 0 && bitmap != nil {
42 curbyte = bitmap[offset/8]
43 }
44 return &BitmapReader{
45 bitmap: bitmap,
46 byteOffset: offset / 8,
47 bitOffset: offset % 8,
48 current: curbyte,
49 len: length,
50 }
51 }
52
53 // Set returns true if the current bit is set
54 func (b *BitmapReader) Set() bool {
55 return (b.current & (1 << b.bitOffset)) != 0
56 }
57
58 // NotSet returns true if the current bit is not set
59 func (b *BitmapReader) NotSet() bool {
60 return (b.current & (1 << b.bitOffset)) == 0
61 }
62
63 // Next advances the reader to the next bit in the bitmap.
64 func (b *BitmapReader) Next() {
65 b.bitOffset++
66 b.pos++
67 if b.bitOffset == 8 {
68 b.bitOffset = 0
69 b.byteOffset++
70 if b.pos < b.len {
71 b.current = b.bitmap[int(b.byteOffset)]
72 }
73 }
74 }
75
76 // Pos returns the current bit position in the bitmap that the reader is looking at
77 func (b *BitmapReader) Pos() int { return b.pos }
78
79 // Len returns the total number of bits in the bitmap
80 func (b *BitmapReader) Len() int { return b.len }
81
82 // BitmapWriter is a simple writer for writing bitmaps to byte slices
83 type BitmapWriter struct {
84 buf []byte
85 pos int
86 length int
87
88 curByte uint8
89 bitMask uint8
90 byteOffset int
91 }
92
93 // NewBitmapWriter returns a sequential bitwise writer that preserves surrounding
94 // bit values as it writes.
95 func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter {
96 ret := &BitmapWriter{
97 buf: bitmap,
98 length: length,
99 byteOffset: start / 8,
100 bitMask: BitMask[start%8],
101 }
102 if length > 0 {
103 ret.curByte = bitmap[int(ret.byteOffset)]
104 }
105 return ret
106 }
107
108 // Reset resets the position and view of the slice to restart writing a bitmap
109 // to the same byte slice.
110 func (b *BitmapWriter) Reset(start, length int) {
111 b.pos = 0
112 b.byteOffset = start / 8
113 b.bitMask = BitMask[start%8]
114 b.length = length
115 if b.length > 0 {
116 b.curByte = b.buf[int(b.byteOffset)]
117 }
118 }
119
120 func (b *BitmapWriter) Pos() int { return b.pos }
121 func (b *BitmapWriter) Set() { b.curByte |= b.bitMask }
122 func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask }
123
124 // Next increments the writer to the next bit for writing.
125 func (b *BitmapWriter) Next() {
126 b.bitMask = b.bitMask << 1
127 b.pos++
128 if b.bitMask == 0 {
129 b.bitMask = 0x01
130 b.buf[b.byteOffset] = b.curByte
131 b.byteOffset++
132 if b.pos < b.length {
133 b.curByte = b.buf[int(b.byteOffset)]
134 }
135 }
136 }
137
138 // AppendBools writes a series of booleans to the bitmapwriter and returns
139 // the number of remaining bytes left in the buffer for writing.
140 func (b *BitmapWriter) AppendBools(in []bool) int {
141 space := min(b.length-b.pos, len(in))
142 if space == 0 {
143 return 0
144 }
145
146 bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
147 // location that the first byte needs to be written to for appending
148 appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))]
149 // update everything but curByte
150 appslice[0] = b.curByte
151 for i, b := range in[:space] {
152 if b {
153 SetBit(appslice, i+bitOffset)
154 } else {
155 ClearBit(appslice, i+bitOffset)
156 }
157 }
158
159 b.pos += space
160 b.bitMask = BitMask[(bitOffset+space)%8]
161 b.byteOffset += (bitOffset + space) / 8
162 b.curByte = appslice[len(appslice)-1]
163
164 return space
165 }
166
167 // Finish flushes the final byte out to the byteslice in case it was not already
168 // on a byte aligned boundary.
169 func (b *BitmapWriter) Finish() {
170 if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
171 b.buf[int(b.byteOffset)] = b.curByte
172 }
173 }
174
175 // BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64)
176 // and then provides functions to grab the individual trailing bytes after the last word
177 type BitmapWordReader struct {
178 bitmap []byte
179 offset int
180 nwords int
181 trailingBits int
182 trailingBytes int
183 curword uint64
184 }
185
186 // NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and
187 // number of trailing bytes, along with the number of words.
188 func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader {
189 bitoffset := offset % 8
190 byteOffset := offset / 8
191 bm := &BitmapWordReader{
192 offset: bitoffset,
193 bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))],
194 // decrement wordcount by 1 as we may touch two adjacent words in one iteration
195 nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1,
196 }
197 if bm.nwords < 0 {
198 bm.nwords = 0
199 }
200 bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8
201 bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits)))
202
203 if bm.nwords > 0 {
204 bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap))
205 } else {
206 setLSB(&bm.curword, bm.bitmap[0])
207 }
208 return bm
209 }
210
211 // NextWord returns the next full word read from the bitmap, should not be called
212 // if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic.
213 //
214 // We don't perform the bounds checking in order to improve performance.
215 func (bm *BitmapWordReader) NextWord() uint64 {
216 bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):]
217 word := bm.curword
218 nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap))
219 if bm.offset != 0 {
220 // combine two adjacent words into one word
221 // |<------ next ----->|<---- current ---->|
222 // +-------------+-----+-------------+-----+
223 // | --- | A | B | --- |
224 // +-------------+-----+-------------+-----+
225 // | | offset
226 // v v
227 // +-----+-------------+
228 // | A | B |
229 // +-----+-------------+
230 // |<------ word ----->|
231 word >>= uint64(bm.offset)
232 word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset))
233 }
234 bm.curword = nextWord
235 return word
236 }
237
238 // NextTrailingByte returns the next trailing byte of the bitmap after the last word
239 // along with the number of valid bits in that byte. When validBits < 8, that
240 // is the last byte.
241 //
242 // If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits.
243 // Thus the TrailingBytes function should be used to know how many trailing bytes to read.
244 func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) {
245 debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits")
246
247 if bm.trailingBits <= 8 {
248 // last byte
249 validBits = bm.trailingBits
250 bm.trailingBits = 0
251 rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits)
252 for i := 0; i < validBits; i++ {
253 val >>= 1
254 if rdr.Set() {
255 val |= 0x80
256 }
257 rdr.Next()
258 }
259 val >>= (8 - validBits)
260 return
261 }
262
263 bm.bitmap = bm.bitmap[1:]
264 nextByte := bm.bitmap[0]
265 val = getLSB(bm.curword)
266 if bm.offset != 0 {
267 val >>= byte(bm.offset)
268 val |= nextByte << (8 - bm.offset)
269 }
270 setLSB(&bm.curword, nextByte)
271 bm.trailingBits -= 8
272 bm.trailingBytes--
273 validBits = 8
274 return
275 }
276
277 func (bm *BitmapWordReader) Words() int { return bm.nwords }
278 func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes }
279
280 // BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being
281 // a uint64). After the last full word is written, PutNextTrailingByte can be used to
282 // write the remaining trailing bytes.
283 type BitmapWordWriter struct {
284 bitmap []byte
285 offset int
286 len int
287
288 bitMask uint64
289 currentWord uint64
290 }
291
292 // NewBitmapWordWriter initializes a new bitmap word writer which will start writing
293 // into the byte slice at bit offset start, expecting to write len bits.
294 func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter {
295 ret := &BitmapWordWriter{
296 bitmap: bitmap[start/8:],
297 len: len,
298 offset: start % 8,
299 bitMask: (uint64(1) << uint64(start%8)) - 1,
300 }
301
302 if ret.offset != 0 {
303 if ret.len >= int(unsafe.Sizeof(uint64(0))*8) {
304 ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap))
305 } else if ret.len > 0 {
306 setLSB(&ret.currentWord, ret.bitmap[0])
307 }
308 }
309 return ret
310 }
311
312 // PutNextWord writes the given word to the bitmap, potentially splitting across
313 // two adjacent words.
314 func (bm *BitmapWordWriter) PutNextWord(word uint64) {
315 sz := int(unsafe.Sizeof(word))
316 if bm.offset != 0 {
317 // split one word into two adjacent words, don't touch unused bits
318 // |<------ word ----->|
319 // +-----+-------------+
320 // | A | B |
321 // +-----+-------------+
322 // | |
323 // v v offset
324 // +-------------+-----+-------------+-----+
325 // | --- | A | B | --- |
326 // +-------------+-----+-------------+-----+
327 // |<------ next ----->|<---- current ---->|
328 word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset)))
329 next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:]))
330 bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask)
331 next = (next &^ bm.bitMask) | (word & bm.bitMask)
332 endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord))
333 endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next))
334 bm.currentWord = next
335 } else {
336 endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word))
337 }
338 bm.bitmap = bm.bitmap[sz:]
339 }
340
341 // PutNextTrailingByte writes the number of bits indicated by validBits from b to
342 // the bitmap.
343 func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) {
344 curbyte := getLSB(bm.currentWord)
345 if validBits == 8 {
346 if bm.offset != 0 {
347 b = (b << bm.offset) | (b >> (8 - bm.offset))
348 next := bm.bitmap[1]
349 curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask))
350 next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask))
351 bm.bitmap[0] = curbyte
352 bm.bitmap[1] = next
353 bm.currentWord = uint64(next)
354 } else {
355 bm.bitmap[0] = b
356 }
357 bm.bitmap = bm.bitmap[1:]
358 } else {
359 debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer")
360 debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailiing byte outside of bounds of bitmap")
361 wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits)
362 for i := 0; i < validBits; i++ {
363 if b&0x01 != 0 {
364 wr.Set()
365 } else {
366 wr.Clear()
367 }
368 wr.Next()
369 b >>= 1
370 }
371 wr.Finish()
372 }
373 }
374
375 // CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset,
376 // and copying length bits into dst, starting at bit offset dstOffset.
377 func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
378 if length == 0 {
379 // if there's nothing to write, end early.
380 return
381 }
382
383 bitOffset := srcOffset % 8
384 destBitOffset := dstOffset % 8
385
386 // slow path, one of the bitmaps are not byte aligned.
387 if bitOffset != 0 || destBitOffset != 0 {
388 rdr := NewBitmapWordReader(src, srcOffset, length)
389 wr := NewBitmapWordWriter(dst, dstOffset, length)
390
391 nwords := rdr.Words()
392 for nwords > 0 {
393 nwords--
394 wr.PutNextWord(rdr.NextWord())
395 }
396 nbytes := rdr.TrailingBytes()
397 for nbytes > 0 {
398 nbytes--
399 bt, validBits := rdr.NextTrailingByte()
400 wr.PutNextTrailingByte(bt, validBits)
401 }
402 return
403 }
404
405 // fast path, both are starting with byte-aligned bitmaps
406 nbytes := int(BytesForBits(int64(length)))
407
408 // shift by its byte offset
409 src = src[srcOffset/8:]
410 dst = dst[dstOffset/8:]
411
412 // Take care of the trailing bits in the last byte
413 // E.g., if trailing_bits = 5, last byte should be
414 // - low 3 bits: new bits from last byte of data buffer
415 // - high 5 bits: old bits from last byte of dest buffer
416 trailingBits := nbytes*8 - length
417 trailMask := byte(uint(1)<<(8-trailingBits)) - 1
418
419 copy(dst, src[:nbytes-1])
420 lastData := src[nbytes-1]
421
422 dst[nbytes-1] &= ^trailMask
423 dst[nbytes-1] |= lastData & trailMask
424 }