1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
23 "github.com/apache/arrow/go/v6/arrow/endian"
24 "github.com/apache/arrow/go/v6/arrow/internal/debug"
27 // BitmapReader is a simple bitmap reader for a byte slice.
28 type BitmapReader struct {
38 // NewBitmapReader creates and returns a new bitmap reader for the given bitmap
39 func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader {
41 if length > 0 && bitmap != nil {
42 curbyte = bitmap[offset/8]
46 byteOffset: offset / 8,
47 bitOffset: offset % 8,
53 // Set returns true if the current bit is set
54 func (b *BitmapReader) Set() bool {
55 return (b.current & (1 << b.bitOffset)) != 0
58 // NotSet returns true if the current bit is not set
59 func (b *BitmapReader) NotSet() bool {
60 return (b.current & (1 << b.bitOffset)) == 0
63 // Next advances the reader to the next bit in the bitmap.
64 func (b *BitmapReader) Next() {
71 b.current = b.bitmap[int(b.byteOffset)]
76 // Pos returns the current bit position in the bitmap that the reader is looking at
77 func (b *BitmapReader) Pos() int { return b.pos }
79 // Len returns the total number of bits in the bitmap
80 func (b *BitmapReader) Len() int { return b.len }
82 // BitmapWriter is a simple writer for writing bitmaps to byte slices
83 type BitmapWriter struct {
93 // NewBitmapWriter returns a sequential bitwise writer that preserves surrounding
94 // bit values as it writes.
95 func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter {
99 byteOffset: start / 8,
100 bitMask: BitMask[start%8],
103 ret.curByte = bitmap[int(ret.byteOffset)]
108 // Reset resets the position and view of the slice to restart writing a bitmap
109 // to the same byte slice.
110 func (b *BitmapWriter) Reset(start, length int) {
112 b.byteOffset = start / 8
113 b.bitMask = BitMask[start%8]
116 b.curByte = b.buf[int(b.byteOffset)]
120 func (b *BitmapWriter) Pos() int { return b.pos }
121 func (b *BitmapWriter) Set() { b.curByte |= b.bitMask }
122 func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask }
124 // Next increments the writer to the next bit for writing.
125 func (b *BitmapWriter) Next() {
126 b.bitMask = b.bitMask << 1
130 b.buf[b.byteOffset] = b.curByte
132 if b.pos < b.length {
133 b.curByte = b.buf[int(b.byteOffset)]
138 // AppendBools writes a series of booleans to the bitmapwriter and returns
139 // the number of remaining bytes left in the buffer for writing.
140 func (b *BitmapWriter) AppendBools(in []bool) int {
141 space := min(b.length-b.pos, len(in))
146 bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
147 // location that the first byte needs to be written to for appending
148 appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))]
149 // update everything but curByte
150 appslice[0] = b.curByte
151 for i, b := range in[:space] {
153 SetBit(appslice, i+bitOffset)
155 ClearBit(appslice, i+bitOffset)
160 b.bitMask = BitMask[(bitOffset+space)%8]
161 b.byteOffset += (bitOffset + space) / 8
162 b.curByte = appslice[len(appslice)-1]
167 // Finish flushes the final byte out to the byteslice in case it was not already
168 // on a byte aligned boundary.
169 func (b *BitmapWriter) Finish() {
170 if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
171 b.buf[int(b.byteOffset)] = b.curByte
175 // BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64)
176 // and then provides functions to grab the individual trailing bytes after the last word
177 type BitmapWordReader struct {
186 // NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and
187 // number of trailing bytes, along with the number of words.
188 func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader {
189 bitoffset := offset % 8
190 byteOffset := offset / 8
191 bm := &BitmapWordReader{
193 bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))],
194 // decrement wordcount by 1 as we may touch two adjacent words in one iteration
195 nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1,
200 bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8
201 bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits)))
204 bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap))
206 setLSB(&bm.curword, bm.bitmap[0])
211 // NextWord returns the next full word read from the bitmap, should not be called
212 // if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic.
214 // We don't perform the bounds checking in order to improve performance.
215 func (bm *BitmapWordReader) NextWord() uint64 {
216 bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):]
218 nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap))
220 // combine two adjacent words into one word
221 // |<------ next ----->|<---- current ---->|
222 // +-------------+-----+-------------+-----+
223 // | --- | A | B | --- |
224 // +-------------+-----+-------------+-----+
227 // +-----+-------------+
229 // +-----+-------------+
230 // |<------ word ----->|
231 word >>= uint64(bm.offset)
232 word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset))
234 bm.curword = nextWord
238 // NextTrailingByte returns the next trailing byte of the bitmap after the last word
239 // along with the number of valid bits in that byte. When validBits < 8, that
242 // If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits.
243 // Thus the TrailingBytes function should be used to know how many trailing bytes to read.
244 func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) {
245 debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits")
247 if bm.trailingBits <= 8 {
249 validBits = bm.trailingBits
251 rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits)
252 for i := 0; i < validBits; i++ {
259 val >>= (8 - validBits)
263 bm.bitmap = bm.bitmap[1:]
264 nextByte := bm.bitmap[0]
265 val = getLSB(bm.curword)
267 val >>= byte(bm.offset)
268 val |= nextByte << (8 - bm.offset)
270 setLSB(&bm.curword, nextByte)
277 func (bm *BitmapWordReader) Words() int { return bm.nwords }
278 func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes }
280 // BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being
281 // a uint64). After the last full word is written, PutNextTrailingByte can be used to
282 // write the remaining trailing bytes.
283 type BitmapWordWriter struct {
292 // NewBitmapWordWriter initializes a new bitmap word writer which will start writing
293 // into the byte slice at bit offset start, expecting to write len bits.
294 func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter {
295 ret := &BitmapWordWriter{
296 bitmap: bitmap[start/8:],
299 bitMask: (uint64(1) << uint64(start%8)) - 1,
303 if ret.len >= int(unsafe.Sizeof(uint64(0))*8) {
304 ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap))
305 } else if ret.len > 0 {
306 setLSB(&ret.currentWord, ret.bitmap[0])
312 // PutNextWord writes the given word to the bitmap, potentially splitting across
313 // two adjacent words.
314 func (bm *BitmapWordWriter) PutNextWord(word uint64) {
315 sz := int(unsafe.Sizeof(word))
317 // split one word into two adjacent words, don't touch unused bits
318 // |<------ word ----->|
319 // +-----+-------------+
321 // +-----+-------------+
324 // +-------------+-----+-------------+-----+
325 // | --- | A | B | --- |
326 // +-------------+-----+-------------+-----+
327 // |<------ next ----->|<---- current ---->|
328 word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset)))
329 next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:]))
330 bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask)
331 next = (next &^ bm.bitMask) | (word & bm.bitMask)
332 endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord))
333 endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next))
334 bm.currentWord = next
336 endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word))
338 bm.bitmap = bm.bitmap[sz:]
341 // PutNextTrailingByte writes the number of bits indicated by validBits from b to
343 func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) {
344 curbyte := getLSB(bm.currentWord)
347 b = (b << bm.offset) | (b >> (8 - bm.offset))
349 curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask))
350 next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask))
351 bm.bitmap[0] = curbyte
353 bm.currentWord = uint64(next)
357 bm.bitmap = bm.bitmap[1:]
359 debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer")
360 debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailiing byte outside of bounds of bitmap")
361 wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits)
362 for i := 0; i < validBits; i++ {
375 // CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset,
376 // and copying length bits into dst, starting at bit offset dstOffset.
377 func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
379 // if there's nothing to write, end early.
383 bitOffset := srcOffset % 8
384 destBitOffset := dstOffset % 8
386 // slow path, one of the bitmaps are not byte aligned.
387 if bitOffset != 0 || destBitOffset != 0 {
388 rdr := NewBitmapWordReader(src, srcOffset, length)
389 wr := NewBitmapWordWriter(dst, dstOffset, length)
391 nwords := rdr.Words()
394 wr.PutNextWord(rdr.NextWord())
396 nbytes := rdr.TrailingBytes()
399 bt, validBits := rdr.NextTrailingByte()
400 wr.PutNextTrailingByte(bt, validBits)
405 // fast path, both are starting with byte-aligned bitmaps
406 nbytes := int(BytesForBits(int64(length)))
408 // shift by its byte offset
409 src = src[srcOffset/8:]
410 dst = dst[dstOffset/8:]
412 // Take care of the trailing bits in the last byte
413 // E.g., if trailing_bits = 5, last byte should be
414 // - low 3 bits: new bits from last byte of data buffer
415 // - high 5 bits: old bits from last byte of dest buffer
416 trailingBits := nbytes*8 - length
417 trailMask := byte(uint(1)<<(8-trailingBits)) - 1
419 copy(dst, src[:nbytes-1])
420 lastData := src[nbytes-1]
422 dst[nbytes-1] &= ^trailMask
423 dst[nbytes-1] |= lastData & trailMask