// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil import ( "math/bits" "unsafe" "github.com/apache/arrow/go/v6/arrow/endian" "github.com/apache/arrow/go/v6/arrow/internal/debug" ) // BitmapReader is a simple bitmap reader for a byte slice. type BitmapReader struct { bitmap []byte pos int len int current byte byteOffset int bitOffset int } // NewBitmapReader creates and returns a new bitmap reader for the given bitmap func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader { curbyte := byte(0) if length > 0 && bitmap != nil { curbyte = bitmap[offset/8] } return &BitmapReader{ bitmap: bitmap, byteOffset: offset / 8, bitOffset: offset % 8, current: curbyte, len: length, } } // Set returns true if the current bit is set func (b *BitmapReader) Set() bool { return (b.current & (1 << b.bitOffset)) != 0 } // NotSet returns true if the current bit is not set func (b *BitmapReader) NotSet() bool { return (b.current & (1 << b.bitOffset)) == 0 } // Next advances the reader to the next bit in the bitmap. func (b *BitmapReader) Next() { b.bitOffset++ b.pos++ if b.bitOffset == 8 { b.bitOffset = 0 b.byteOffset++ if b.pos < b.len { b.current = b.bitmap[int(b.byteOffset)] } } } // Pos returns the current bit position in the bitmap that the reader is looking at func (b *BitmapReader) Pos() int { return b.pos } // Len returns the total number of bits in the bitmap func (b *BitmapReader) Len() int { return b.len } // BitmapWriter is a simple writer for writing bitmaps to byte slices type BitmapWriter struct { buf []byte pos int length int curByte uint8 bitMask uint8 byteOffset int } // NewBitmapWriter returns a sequential bitwise writer that preserves surrounding // bit values as it writes. func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter { ret := &BitmapWriter{ buf: bitmap, length: length, byteOffset: start / 8, bitMask: BitMask[start%8], } if length > 0 { ret.curByte = bitmap[int(ret.byteOffset)] } return ret } // Reset resets the position and view of the slice to restart writing a bitmap // to the same byte slice. func (b *BitmapWriter) Reset(start, length int) { b.pos = 0 b.byteOffset = start / 8 b.bitMask = BitMask[start%8] b.length = length if b.length > 0 { b.curByte = b.buf[int(b.byteOffset)] } } func (b *BitmapWriter) Pos() int { return b.pos } func (b *BitmapWriter) Set() { b.curByte |= b.bitMask } func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask } // Next increments the writer to the next bit for writing. func (b *BitmapWriter) Next() { b.bitMask = b.bitMask << 1 b.pos++ if b.bitMask == 0 { b.bitMask = 0x01 b.buf[b.byteOffset] = b.curByte b.byteOffset++ if b.pos < b.length { b.curByte = b.buf[int(b.byteOffset)] } } } // AppendBools writes a series of booleans to the bitmapwriter and returns // the number of remaining bytes left in the buffer for writing. func (b *BitmapWriter) AppendBools(in []bool) int { space := min(b.length-b.pos, len(in)) if space == 0 { return 0 } bitOffset := bits.TrailingZeros32(uint32(b.bitMask)) // location that the first byte needs to be written to for appending appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))] // update everything but curByte appslice[0] = b.curByte for i, b := range in[:space] { if b { SetBit(appslice, i+bitOffset) } else { ClearBit(appslice, i+bitOffset) } } b.pos += space b.bitMask = BitMask[(bitOffset+space)%8] b.byteOffset += (bitOffset + space) / 8 b.curByte = appslice[len(appslice)-1] return space } // Finish flushes the final byte out to the byteslice in case it was not already // on a byte aligned boundary. func (b *BitmapWriter) Finish() { if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) { b.buf[int(b.byteOffset)] = b.curByte } } // BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64) // and then provides functions to grab the individual trailing bytes after the last word type BitmapWordReader struct { bitmap []byte offset int nwords int trailingBits int trailingBytes int curword uint64 } // NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and // number of trailing bytes, along with the number of words. func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader { bitoffset := offset % 8 byteOffset := offset / 8 bm := &BitmapWordReader{ offset: bitoffset, bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))], // decrement wordcount by 1 as we may touch two adjacent words in one iteration nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1, } if bm.nwords < 0 { bm.nwords = 0 } bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8 bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits))) if bm.nwords > 0 { bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap)) } else { setLSB(&bm.curword, bm.bitmap[0]) } return bm } // NextWord returns the next full word read from the bitmap, should not be called // if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic. // // We don't perform the bounds checking in order to improve performance. func (bm *BitmapWordReader) NextWord() uint64 { bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):] word := bm.curword nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap)) if bm.offset != 0 { // combine two adjacent words into one word // |<------ next ----->|<---- current ---->| // +-------------+-----+-------------+-----+ // | --- | A | B | --- | // +-------------+-----+-------------+-----+ // | | offset // v v // +-----+-------------+ // | A | B | // +-----+-------------+ // |<------ word ----->| word >>= uint64(bm.offset) word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset)) } bm.curword = nextWord return word } // NextTrailingByte returns the next trailing byte of the bitmap after the last word // along with the number of valid bits in that byte. When validBits < 8, that // is the last byte. // // If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits. // Thus the TrailingBytes function should be used to know how many trailing bytes to read. func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) { debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits") if bm.trailingBits <= 8 { // last byte validBits = bm.trailingBits bm.trailingBits = 0 rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits) for i := 0; i < validBits; i++ { val >>= 1 if rdr.Set() { val |= 0x80 } rdr.Next() } val >>= (8 - validBits) return } bm.bitmap = bm.bitmap[1:] nextByte := bm.bitmap[0] val = getLSB(bm.curword) if bm.offset != 0 { val >>= byte(bm.offset) val |= nextByte << (8 - bm.offset) } setLSB(&bm.curword, nextByte) bm.trailingBits -= 8 bm.trailingBytes-- validBits = 8 return } func (bm *BitmapWordReader) Words() int { return bm.nwords } func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes } // BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being // a uint64). After the last full word is written, PutNextTrailingByte can be used to // write the remaining trailing bytes. type BitmapWordWriter struct { bitmap []byte offset int len int bitMask uint64 currentWord uint64 } // NewBitmapWordWriter initializes a new bitmap word writer which will start writing // into the byte slice at bit offset start, expecting to write len bits. func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter { ret := &BitmapWordWriter{ bitmap: bitmap[start/8:], len: len, offset: start % 8, bitMask: (uint64(1) << uint64(start%8)) - 1, } if ret.offset != 0 { if ret.len >= int(unsafe.Sizeof(uint64(0))*8) { ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap)) } else if ret.len > 0 { setLSB(&ret.currentWord, ret.bitmap[0]) } } return ret } // PutNextWord writes the given word to the bitmap, potentially splitting across // two adjacent words. func (bm *BitmapWordWriter) PutNextWord(word uint64) { sz := int(unsafe.Sizeof(word)) if bm.offset != 0 { // split one word into two adjacent words, don't touch unused bits // |<------ word ----->| // +-----+-------------+ // | A | B | // +-----+-------------+ // | | // v v offset // +-------------+-----+-------------+-----+ // | --- | A | B | --- | // +-------------+-----+-------------+-----+ // |<------ next ----->|<---- current ---->| word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset))) next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:])) bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask) next = (next &^ bm.bitMask) | (word & bm.bitMask) endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord)) endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next)) bm.currentWord = next } else { endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word)) } bm.bitmap = bm.bitmap[sz:] } // PutNextTrailingByte writes the number of bits indicated by validBits from b to // the bitmap. func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) { curbyte := getLSB(bm.currentWord) if validBits == 8 { if bm.offset != 0 { b = (b << bm.offset) | (b >> (8 - bm.offset)) next := bm.bitmap[1] curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask)) next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask)) bm.bitmap[0] = curbyte bm.bitmap[1] = next bm.currentWord = uint64(next) } else { bm.bitmap[0] = b } bm.bitmap = bm.bitmap[1:] } else { debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer") debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailiing byte outside of bounds of bitmap") wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits) for i := 0; i < validBits; i++ { if b&0x01 != 0 { wr.Set() } else { wr.Clear() } wr.Next() b >>= 1 } wr.Finish() } } // CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset, // and copying length bits into dst, starting at bit offset dstOffset. func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { if length == 0 { // if there's nothing to write, end early. return } bitOffset := srcOffset % 8 destBitOffset := dstOffset % 8 // slow path, one of the bitmaps are not byte aligned. if bitOffset != 0 || destBitOffset != 0 { rdr := NewBitmapWordReader(src, srcOffset, length) wr := NewBitmapWordWriter(dst, dstOffset, length) nwords := rdr.Words() for nwords > 0 { nwords-- wr.PutNextWord(rdr.NextWord()) } nbytes := rdr.TrailingBytes() for nbytes > 0 { nbytes-- bt, validBits := rdr.NextTrailingByte() wr.PutNextTrailingByte(bt, validBits) } return } // fast path, both are starting with byte-aligned bitmaps nbytes := int(BytesForBits(int64(length))) // shift by its byte offset src = src[srcOffset/8:] dst = dst[dstOffset/8:] // Take care of the trailing bits in the last byte // E.g., if trailing_bits = 5, last byte should be // - low 3 bits: new bits from last byte of data buffer // - high 5 bits: old bits from last byte of dest buffer trailingBits := nbytes*8 - length trailMask := byte(uint(1)<<(8-trailingBits)) - 1 copy(dst, src[:nbytes-1]) lastData := src[nbytes-1] dst[nbytes-1] &= ^trailMask dst[nbytes-1] |= lastData & trailMask }