]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/arrow/array/builder.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / array / builder.go
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16
17 package array
18
19 import (
20 "fmt"
21 "sync/atomic"
22
23 "github.com/apache/arrow/go/v6/arrow"
24 "github.com/apache/arrow/go/v6/arrow/bitutil"
25 "github.com/apache/arrow/go/v6/arrow/memory"
26 )
27
28 const (
29 minBuilderCapacity = 1 << 5
30 )
31
32 // Builder provides an interface to build arrow arrays.
33 type Builder interface {
34 // Retain increases the reference count by 1.
35 // Retain may be called simultaneously from multiple goroutines.
36 Retain()
37
38 // Release decreases the reference count by 1.
39 Release()
40
41 // Len returns the number of elements in the array builder.
42 Len() int
43
44 // Cap returns the total number of elements that can be stored
45 // without allocating additional memory.
46 Cap() int
47
48 // NullN returns the number of null values in the array builder.
49 NullN() int
50
51 // AppendNull adds a new null value to the array being built.
52 AppendNull()
53
54 // Reserve ensures there is enough space for appending n elements
55 // by checking the capacity and calling Resize if necessary.
56 Reserve(n int)
57
58 // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
59 // additional memory will be allocated. If n is smaller, the allocated memory may reduced.
60 Resize(n int)
61
62 // NewArray creates a new array from the memory buffers used
63 // by the builder and resets the Builder so it can be used to build
64 // a new array.
65 NewArray() Interface
66
67 init(capacity int)
68 resize(newBits int, init func(int))
69 }
70
71 // builder provides common functionality for managing the validity bitmap (nulls) when building arrays.
72 type builder struct {
73 refCount int64
74 mem memory.Allocator
75 nullBitmap *memory.Buffer
76 nulls int
77 length int
78 capacity int
79 }
80
81 // Retain increases the reference count by 1.
82 // Retain may be called simultaneously from multiple goroutines.
83 func (b *builder) Retain() {
84 atomic.AddInt64(&b.refCount, 1)
85 }
86
87 // Len returns the number of elements in the array builder.
88 func (b *builder) Len() int { return b.length }
89
90 // Cap returns the total number of elements that can be stored without allocating additional memory.
91 func (b *builder) Cap() int { return b.capacity }
92
93 // NullN returns the number of null values in the array builder.
94 func (b *builder) NullN() int { return b.nulls }
95
96 func (b *builder) init(capacity int) {
97 toAlloc := bitutil.CeilByte(capacity) / 8
98 b.nullBitmap = memory.NewResizableBuffer(b.mem)
99 b.nullBitmap.Resize(toAlloc)
100 b.capacity = capacity
101 memory.Set(b.nullBitmap.Buf(), 0)
102 }
103
104 func (b *builder) reset() {
105 if b.nullBitmap != nil {
106 b.nullBitmap.Release()
107 b.nullBitmap = nil
108 }
109
110 b.nulls = 0
111 b.length = 0
112 b.capacity = 0
113 }
114
115 func (b *builder) resize(newBits int, init func(int)) {
116 if b.nullBitmap == nil {
117 init(newBits)
118 return
119 }
120
121 newBytesN := bitutil.CeilByte(newBits) / 8
122 oldBytesN := b.nullBitmap.Len()
123 b.nullBitmap.Resize(newBytesN)
124 b.capacity = newBits
125 if oldBytesN < newBytesN {
126 // TODO(sgc): necessary?
127 memory.Set(b.nullBitmap.Buf()[oldBytesN:], 0)
128 }
129 if newBits < b.length {
130 b.length = newBits
131 b.nulls = newBits - bitutil.CountSetBits(b.nullBitmap.Buf(), 0, newBits)
132 }
133 }
134
135 func (b *builder) reserve(elements int, resize func(int)) {
136 if b.nullBitmap == nil {
137 b.nullBitmap = memory.NewResizableBuffer(b.mem)
138 }
139 if b.length+elements > b.capacity {
140 newCap := bitutil.NextPowerOf2(b.length + elements)
141 resize(newCap)
142 }
143 }
144
145 // unsafeAppendBoolsToBitmap appends the contents of valid to the validity bitmap.
146 // As an optimization, if the valid slice is empty, the next length bits will be set to valid (not null).
147 func (b *builder) unsafeAppendBoolsToBitmap(valid []bool, length int) {
148 if len(valid) == 0 {
149 b.unsafeSetValid(length)
150 return
151 }
152
153 byteOffset := b.length / 8
154 bitOffset := byte(b.length % 8)
155 nullBitmap := b.nullBitmap.Bytes()
156 bitSet := nullBitmap[byteOffset]
157
158 for _, v := range valid {
159 if bitOffset == 8 {
160 bitOffset = 0
161 nullBitmap[byteOffset] = bitSet
162 byteOffset++
163 bitSet = nullBitmap[byteOffset]
164 }
165
166 if v {
167 bitSet |= bitutil.BitMask[bitOffset]
168 } else {
169 bitSet &= bitutil.FlippedBitMask[bitOffset]
170 b.nulls++
171 }
172 bitOffset++
173 }
174
175 if bitOffset != 0 {
176 nullBitmap[byteOffset] = bitSet
177 }
178 b.length += len(valid)
179 }
180
181 // unsafeSetValid sets the next length bits to valid in the validity bitmap.
182 func (b *builder) unsafeSetValid(length int) {
183 padToByte := min(8-(b.length%8), length)
184 if padToByte == 8 {
185 padToByte = 0
186 }
187 bits := b.nullBitmap.Bytes()
188 for i := b.length; i < b.length+padToByte; i++ {
189 bitutil.SetBit(bits, i)
190 }
191
192 start := (b.length + padToByte) / 8
193 fastLength := (length - padToByte) / 8
194 memory.Set(bits[start:start+fastLength], 0xff)
195
196 newLength := b.length + length
197 // trailing bytes
198 for i := b.length + padToByte + (fastLength * 8); i < newLength; i++ {
199 bitutil.SetBit(bits, i)
200 }
201
202 b.length = newLength
203 }
204
205 func (b *builder) UnsafeAppendBoolToBitmap(isValid bool) {
206 if isValid {
207 bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
208 } else {
209 b.nulls++
210 }
211 b.length++
212 }
213
214 func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
215 // FIXME(sbinet): use a type switch on dtype instead?
216 switch dtype.ID() {
217 case arrow.NULL:
218 return NewNullBuilder(mem)
219 case arrow.BOOL:
220 return NewBooleanBuilder(mem)
221 case arrow.UINT8:
222 return NewUint8Builder(mem)
223 case arrow.INT8:
224 return NewInt8Builder(mem)
225 case arrow.UINT16:
226 return NewUint16Builder(mem)
227 case arrow.INT16:
228 return NewInt16Builder(mem)
229 case arrow.UINT32:
230 return NewUint32Builder(mem)
231 case arrow.INT32:
232 return NewInt32Builder(mem)
233 case arrow.UINT64:
234 return NewUint64Builder(mem)
235 case arrow.INT64:
236 return NewInt64Builder(mem)
237 case arrow.FLOAT16:
238 return NewFloat16Builder(mem)
239 case arrow.FLOAT32:
240 return NewFloat32Builder(mem)
241 case arrow.FLOAT64:
242 return NewFloat64Builder(mem)
243 case arrow.STRING:
244 return NewStringBuilder(mem)
245 case arrow.BINARY:
246 return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
247 case arrow.FIXED_SIZE_BINARY:
248 typ := dtype.(*arrow.FixedSizeBinaryType)
249 return NewFixedSizeBinaryBuilder(mem, typ)
250 case arrow.DATE32:
251 return NewDate32Builder(mem)
252 case arrow.DATE64:
253 return NewDate64Builder(mem)
254 case arrow.TIMESTAMP:
255 typ := dtype.(*arrow.TimestampType)
256 return NewTimestampBuilder(mem, typ)
257 case arrow.TIME32:
258 typ := dtype.(*arrow.Time32Type)
259 return NewTime32Builder(mem, typ)
260 case arrow.TIME64:
261 typ := dtype.(*arrow.Time64Type)
262 return NewTime64Builder(mem, typ)
263 case arrow.INTERVAL:
264 switch dtype.(type) {
265 case *arrow.DayTimeIntervalType:
266 return NewDayTimeIntervalBuilder(mem)
267 case *arrow.MonthIntervalType:
268 return NewMonthIntervalBuilder(mem)
269 case *arrow.MonthDayNanoIntervalType:
270 return NewMonthDayNanoIntervalBuilder(mem)
271 }
272 case arrow.INTERVAL_MONTHS:
273 return NewMonthIntervalBuilder(mem)
274 case arrow.INTERVAL_DAY_TIME:
275 return NewDayTimeIntervalBuilder(mem)
276 case arrow.INTERVAL_MONTH_DAY_NANO:
277 return NewMonthDayNanoIntervalBuilder(mem)
278 case arrow.DECIMAL128:
279 if typ, ok := dtype.(*arrow.Decimal128Type); ok {
280 return NewDecimal128Builder(mem, typ)
281 }
282 case arrow.DECIMAL256:
283 case arrow.LIST:
284 typ := dtype.(*arrow.ListType)
285 return NewListBuilder(mem, typ.Elem())
286 case arrow.STRUCT:
287 typ := dtype.(*arrow.StructType)
288 return NewStructBuilder(mem, typ)
289 case arrow.SPARSE_UNION:
290 case arrow.DENSE_UNION:
291 case arrow.DICTIONARY:
292 case arrow.LARGE_STRING:
293 case arrow.LARGE_BINARY:
294 case arrow.LARGE_LIST:
295 case arrow.MAP:
296 typ := dtype.(*arrow.MapType)
297 return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted)
298 case arrow.EXTENSION:
299 typ := dtype.(arrow.ExtensionType)
300 return NewExtensionBuilder(mem, typ)
301 case arrow.FIXED_SIZE_LIST:
302 typ := dtype.(*arrow.FixedSizeListType)
303 return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
304 case arrow.DURATION:
305 typ := dtype.(*arrow.DurationType)
306 return NewDurationBuilder(mem, typ)
307 }
308 panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype))
309 }