1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
23 "github.com/apache/arrow/go/v6/arrow"
24 "github.com/apache/arrow/go/v6/arrow/bitutil"
25 "github.com/apache/arrow/go/v6/arrow/memory"
29 minBuilderCapacity = 1 << 5
32 // Builder provides an interface to build arrow arrays.
33 type Builder interface {
34 // Retain increases the reference count by 1.
35 // Retain may be called simultaneously from multiple goroutines.
38 // Release decreases the reference count by 1.
41 // Len returns the number of elements in the array builder.
44 // Cap returns the total number of elements that can be stored
45 // without allocating additional memory.
48 // NullN returns the number of null values in the array builder.
51 // AppendNull adds a new null value to the array being built.
54 // Reserve ensures there is enough space for appending n elements
55 // by checking the capacity and calling Resize if necessary.
58 // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
59 // additional memory will be allocated. If n is smaller, the allocated memory may reduced.
62 // NewArray creates a new array from the memory buffers used
63 // by the builder and resets the Builder so it can be used to build
68 resize(newBits int, init func(int))
71 // builder provides common functionality for managing the validity bitmap (nulls) when building arrays.
75 nullBitmap *memory.Buffer
81 // Retain increases the reference count by 1.
82 // Retain may be called simultaneously from multiple goroutines.
83 func (b *builder) Retain() {
84 atomic.AddInt64(&b.refCount, 1)
87 // Len returns the number of elements in the array builder.
88 func (b *builder) Len() int { return b.length }
90 // Cap returns the total number of elements that can be stored without allocating additional memory.
91 func (b *builder) Cap() int { return b.capacity }
93 // NullN returns the number of null values in the array builder.
94 func (b *builder) NullN() int { return b.nulls }
96 func (b *builder) init(capacity int) {
97 toAlloc := bitutil.CeilByte(capacity) / 8
98 b.nullBitmap = memory.NewResizableBuffer(b.mem)
99 b.nullBitmap.Resize(toAlloc)
100 b.capacity = capacity
101 memory.Set(b.nullBitmap.Buf(), 0)
104 func (b *builder) reset() {
105 if b.nullBitmap != nil {
106 b.nullBitmap.Release()
115 func (b *builder) resize(newBits int, init func(int)) {
116 if b.nullBitmap == nil {
121 newBytesN := bitutil.CeilByte(newBits) / 8
122 oldBytesN := b.nullBitmap.Len()
123 b.nullBitmap.Resize(newBytesN)
125 if oldBytesN < newBytesN {
126 // TODO(sgc): necessary?
127 memory.Set(b.nullBitmap.Buf()[oldBytesN:], 0)
129 if newBits < b.length {
131 b.nulls = newBits - bitutil.CountSetBits(b.nullBitmap.Buf(), 0, newBits)
135 func (b *builder) reserve(elements int, resize func(int)) {
136 if b.nullBitmap == nil {
137 b.nullBitmap = memory.NewResizableBuffer(b.mem)
139 if b.length+elements > b.capacity {
140 newCap := bitutil.NextPowerOf2(b.length + elements)
145 // unsafeAppendBoolsToBitmap appends the contents of valid to the validity bitmap.
146 // As an optimization, if the valid slice is empty, the next length bits will be set to valid (not null).
147 func (b *builder) unsafeAppendBoolsToBitmap(valid []bool, length int) {
149 b.unsafeSetValid(length)
153 byteOffset := b.length / 8
154 bitOffset := byte(b.length % 8)
155 nullBitmap := b.nullBitmap.Bytes()
156 bitSet := nullBitmap[byteOffset]
158 for _, v := range valid {
161 nullBitmap[byteOffset] = bitSet
163 bitSet = nullBitmap[byteOffset]
167 bitSet |= bitutil.BitMask[bitOffset]
169 bitSet &= bitutil.FlippedBitMask[bitOffset]
176 nullBitmap[byteOffset] = bitSet
178 b.length += len(valid)
181 // unsafeSetValid sets the next length bits to valid in the validity bitmap.
182 func (b *builder) unsafeSetValid(length int) {
183 padToByte := min(8-(b.length%8), length)
187 bits := b.nullBitmap.Bytes()
188 for i := b.length; i < b.length+padToByte; i++ {
189 bitutil.SetBit(bits, i)
192 start := (b.length + padToByte) / 8
193 fastLength := (length - padToByte) / 8
194 memory.Set(bits[start:start+fastLength], 0xff)
196 newLength := b.length + length
198 for i := b.length + padToByte + (fastLength * 8); i < newLength; i++ {
199 bitutil.SetBit(bits, i)
205 func (b *builder) UnsafeAppendBoolToBitmap(isValid bool) {
207 bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
214 func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
215 // FIXME(sbinet): use a type switch on dtype instead?
218 return NewNullBuilder(mem)
220 return NewBooleanBuilder(mem)
222 return NewUint8Builder(mem)
224 return NewInt8Builder(mem)
226 return NewUint16Builder(mem)
228 return NewInt16Builder(mem)
230 return NewUint32Builder(mem)
232 return NewInt32Builder(mem)
234 return NewUint64Builder(mem)
236 return NewInt64Builder(mem)
238 return NewFloat16Builder(mem)
240 return NewFloat32Builder(mem)
242 return NewFloat64Builder(mem)
244 return NewStringBuilder(mem)
246 return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
247 case arrow.FIXED_SIZE_BINARY:
248 typ := dtype.(*arrow.FixedSizeBinaryType)
249 return NewFixedSizeBinaryBuilder(mem, typ)
251 return NewDate32Builder(mem)
253 return NewDate64Builder(mem)
254 case arrow.TIMESTAMP:
255 typ := dtype.(*arrow.TimestampType)
256 return NewTimestampBuilder(mem, typ)
258 typ := dtype.(*arrow.Time32Type)
259 return NewTime32Builder(mem, typ)
261 typ := dtype.(*arrow.Time64Type)
262 return NewTime64Builder(mem, typ)
264 switch dtype.(type) {
265 case *arrow.DayTimeIntervalType:
266 return NewDayTimeIntervalBuilder(mem)
267 case *arrow.MonthIntervalType:
268 return NewMonthIntervalBuilder(mem)
269 case *arrow.MonthDayNanoIntervalType:
270 return NewMonthDayNanoIntervalBuilder(mem)
272 case arrow.INTERVAL_MONTHS:
273 return NewMonthIntervalBuilder(mem)
274 case arrow.INTERVAL_DAY_TIME:
275 return NewDayTimeIntervalBuilder(mem)
276 case arrow.INTERVAL_MONTH_DAY_NANO:
277 return NewMonthDayNanoIntervalBuilder(mem)
278 case arrow.DECIMAL128:
279 if typ, ok := dtype.(*arrow.Decimal128Type); ok {
280 return NewDecimal128Builder(mem, typ)
282 case arrow.DECIMAL256:
284 typ := dtype.(*arrow.ListType)
285 return NewListBuilder(mem, typ.Elem())
287 typ := dtype.(*arrow.StructType)
288 return NewStructBuilder(mem, typ)
289 case arrow.SPARSE_UNION:
290 case arrow.DENSE_UNION:
291 case arrow.DICTIONARY:
292 case arrow.LARGE_STRING:
293 case arrow.LARGE_BINARY:
294 case arrow.LARGE_LIST:
296 typ := dtype.(*arrow.MapType)
297 return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted)
298 case arrow.EXTENSION:
299 typ := dtype.(arrow.ExtensionType)
300 return NewExtensionBuilder(mem, typ)
301 case arrow.FIXED_SIZE_LIST:
302 typ := dtype.(*arrow.FixedSizeListType)
303 return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
305 typ := dtype.(*arrow.DurationType)
306 return NewDurationBuilder(mem, typ)
308 panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype))