// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package utils_test import ( "testing" "github.com/apache/arrow/go/v6/arrow/bitutil" "github.com/apache/arrow/go/v6/arrow/memory" "github.com/apache/arrow/go/v6/parquet/internal/utils" "github.com/stretchr/testify/assert" "golang.org/x/exp/rand" ) const kWordSize = 64 func create(nbytes, offset, length int64) (*memory.Buffer, *utils.BitBlockCounter) { buf := memory.NewResizableBuffer(memory.DefaultAllocator) buf.Resize(int(nbytes)) return buf, utils.NewBitBlockCounter(buf.Bytes(), offset, length) } func TestOneWordBasics(t *testing.T) { const nbytes = 1024 buf, counter := create(nbytes, 0, nbytes*8) defer buf.Release() var bitsScanned int64 for i := 0; i < nbytes/8; i++ { block := counter.NextWord() assert.EqualValues(t, kWordSize, block.Len) assert.EqualValues(t, 0, block.Popcnt) bitsScanned += int64(block.Len) } assert.EqualValues(t, 1024*8, bitsScanned) block := counter.NextWord() assert.Zero(t, block.Len) assert.Zero(t, block.Popcnt) assert.True(t, block.NoneSet()) } func TestFourWordsBasics(t *testing.T) { const nbytes = 1024 buf, counter := create(nbytes, 0, nbytes*8) defer buf.Release() var bitsScanned int64 for i := 0; i < nbytes/32; i++ { block := counter.NextFourWords() assert.EqualValues(t, 4*kWordSize, block.Len) assert.EqualValues(t, 0, block.Popcnt) bitsScanned += int64(block.Len) } assert.EqualValues(t, 1024*8, bitsScanned) block := counter.NextFourWords() assert.Zero(t, block.Len) assert.Zero(t, block.Popcnt) } func TestOneWordWithOffsets(t *testing.T) { checkWithOffset := func(offset int64) { const ( nwords int64 = 4 totalBytes = nwords*8 + 1 ) // Trim a bit from the end of the bitmap so we can check // the remainder bits behavior buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1) defer buf.Release() memory.Set(buf.Bytes(), byte(0xFF)) block := counter.NextWord() assert.EqualValues(t, kWordSize, block.Len) assert.EqualValues(t, 64, block.Popcnt) // add a false value to the next word bitutil.SetBitTo(buf.Bytes(), kWordSize+int(offset), false) block = counter.NextWord() assert.EqualValues(t, 64, block.Len) assert.EqualValues(t, 63, block.Popcnt) // Set the next word to all false bitutil.SetBitsTo(buf.Bytes(), 2*kWordSize+offset, kWordSize, false) block = counter.NextWord() assert.EqualValues(t, 64, block.Len) assert.Zero(t, block.Popcnt) block = counter.NextWord() assert.EqualValues(t, kWordSize-offset-1, block.Len) assert.EqualValues(t, block.Len, block.Popcnt) assert.True(t, block.AllSet()) // we can keep calling nextword safely block = counter.NextWord() assert.Zero(t, block.Len) assert.Zero(t, block.Popcnt) } for offsetI := int64(0); offsetI < 8; offsetI++ { checkWithOffset(offsetI) } } func TestFourWordsWithOffsets(t *testing.T) { checkWithOffset := func(offset int64) { const ( nwords = 17 totalBytes = nwords*8 + 1 ) // trim a bit from the end of the bitmap so we can check the remainder // bits behavior buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1) // start with all set memory.Set(buf.Bytes(), 0xFF) block := counter.NextFourWords() assert.EqualValues(t, 4*kWordSize, block.Len) assert.EqualValues(t, block.Len, block.Popcnt) // add some false values to the next 3 shifted words bitutil.ClearBit(buf.Bytes(), int(4*kWordSize+offset)) bitutil.ClearBit(buf.Bytes(), int(5*kWordSize+offset)) bitutil.ClearBit(buf.Bytes(), int(6*kWordSize+offset)) block = counter.NextFourWords() assert.EqualValues(t, 4*kWordSize, block.Len) assert.EqualValues(t, 253, block.Popcnt) // set the next two words to all false bitutil.SetBitsTo(buf.Bytes(), 8*kWordSize+offset, 2*kWordSize, false) // block is half set block = counter.NextFourWords() assert.EqualValues(t, 4*kWordSize, block.Len) assert.EqualValues(t, 128, block.Popcnt) // last full block whether offset or no block = counter.NextFourWords() assert.EqualValues(t, 4*kWordSize, block.Len) assert.EqualValues(t, block.Len, block.Popcnt) // partial block block = counter.NextFourWords() assert.EqualValues(t, kWordSize-offset-1, block.Len) assert.EqualValues(t, block.Len, block.Popcnt) // we can keep calling NextFourWords safely block = counter.NextFourWords() assert.Zero(t, block.Len) assert.Zero(t, block.Popcnt) } for offsetI := int64(0); offsetI < 8; offsetI++ { checkWithOffset(offsetI) } } func TestFourWordsRandomData(t *testing.T) { const ( nbytes = 1024 ) buf := make([]byte, nbytes) r := rand.New(rand.NewSource(0)) r.Read(buf) checkWithOffset := func(offset int64) { counter := utils.NewBitBlockCounter(buf, offset, nbytes*8-offset) for i := 0; i < nbytes/32; i++ { block := counter.NextFourWords() assert.EqualValues(t, bitutil.CountSetBits(buf, i*256+int(offset), int(block.Len)), block.Popcnt) } } for offsetI := int64(0); offsetI < 8; offsetI++ { checkWithOffset(offsetI) } }