1 // Licensed to the Apache Software Foundation (ASF) under one or more
2 // contributor license agreements. See the NOTICE file distributed with
3 // this work for additional information regarding copyright ownership.
4 // The ASF licenses this file to You under the Apache License, Version 2.0
5 // (the "License"); you may not use this file except in compliance with
6 // the License. You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 using Apache.Arrow.Types;
18 using System.Collections.Generic;
19 using System.Runtime.CompilerServices;
20 using Apache.Arrow.Memory;
22 namespace Apache.Arrow
24 public class BinaryArray : Array
26 public class Builder : BuilderBase<BinaryArray, Builder>
28 public Builder() : base(BinaryType.Default) { }
29 public Builder(IArrowType dataType) : base(dataType) { }
31 protected override BinaryArray Build(ArrayData data)
33 return new BinaryArray(data);
37 public BinaryArray(ArrayData data)
40 data.EnsureDataType(ArrowTypeId.Binary);
41 data.EnsureBufferCount(3);
44 public BinaryArray(ArrowTypeId typeId, ArrayData data)
47 data.EnsureDataType(typeId);
48 data.EnsureBufferCount(3);
51 public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder>
52 where TArray : IArrowArray
53 where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
55 protected IArrowType DataType { get; }
56 protected TBuilder Instance => this as TBuilder;
57 protected ArrowBuffer.Builder<int> ValueOffsets { get; }
58 protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
59 protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
60 protected int Offset { get; set; }
61 protected int NullCount => this.ValidityBuffer.UnsetBitCount;
63 protected BuilderBase(IArrowType dataType)
66 ValueOffsets = new ArrowBuffer.Builder<int>();
67 ValueBuffer = new ArrowBuffer.Builder<byte>();
68 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
72 // The offsets buffer contains length + 1 signed integers (either 32-bit or 64-bit, depending on the
73 // logical type), which encode the start position of each slot in the data buffer. The length of the
74 // value in each slot is computed using the difference between the offset at that slot’s index and the
77 // In this builder, we choose to append the first offset (zero) upon construction, and each trailing
78 // offset is then added after each individual item has been appended.
79 ValueOffsets.Append(this.Offset);
82 protected abstract TArray Build(ArrayData data);
85 /// Gets the length of the array built so far.
87 public int Length => ValueOffsets.Length - 1;
90 /// Build an Arrow array from the appended contents so far.
92 /// <param name="allocator">Optional memory allocator.</param>
93 /// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
94 public TArray Build(MemoryAllocator allocator = default)
98 NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty,
99 ValueOffsets.Build(allocator),
100 ValueBuffer.Build(allocator),
102 var data = new ArrayData(
113 /// Append a single null value to the array.
115 /// <returns>Returns the builder (for fluent-style composition).</returns>
116 public TBuilder AppendNull()
118 // Do not add to the value buffer in the case of a null.
119 // Note that we do not need to increment the offset as a result.
120 ValidityBuffer.Append(false);
121 ValueOffsets.Append(Offset);
126 /// Appends a value, consisting of a single byte, to the array.
128 /// <param name="value">Byte value to append.</param>
129 /// <returns>Returns the builder (for fluent-style composition).</returns>
130 public TBuilder Append(byte value)
132 ValueBuffer.Append(value);
133 ValidityBuffer.Append(true);
135 ValueOffsets.Append(Offset);
140 /// Append a value, consisting of a span of bytes, to the array.
143 /// Note that a single value is added, which consists of arbitrarily many bytes. If multiple values are
144 /// to be added, use the <see cref="AppendRange"/> method.
146 /// <param name="span">Span of bytes to add.</param>
147 /// <returns>Returns the builder (for fluent-style composition).</returns>
148 public TBuilder Append(ReadOnlySpan<byte> span)
150 ValueBuffer.Append(span);
151 ValidityBuffer.Append(true);
152 Offset += span.Length;
153 ValueOffsets.Append(Offset);
158 /// Append a value, consisting of an enumerable collection of bytes, to the array.
161 /// Note that this method appends a single value, which may consist of arbitrarily many bytes. If multiple
162 /// values are to be added, use the <see cref="AppendRange(IEnumerable{byte})"/> method instead.
164 /// <param name="value">Enumerable collection of bytes to add.</param>
165 /// <returns>Returns the builder (for fluent-style composition).</returns>
166 public TBuilder Append(IEnumerable<byte> value)
173 // Note: by looking at the length of the value buffer before and after, we avoid having to iterate
174 // through the enumerable multiple times to get both length and contents.
175 int priorLength = ValueBuffer.Length;
176 ValueBuffer.AppendRange(value);
177 int valueLength = ValueBuffer.Length - priorLength;
178 Offset += valueLength;
179 ValidityBuffer.Append(true);
180 ValueOffsets.Append(Offset);
185 /// Append an enumerable collection of single-byte values to the array.
188 /// Note that this method appends multiple values, each of which is a single byte. If a single value is
189 /// to be added, use the <see cref="Append(IEnumerable{byte})"/> method instead.
191 /// <param name="values">Single-byte values to add.</param>
192 /// <returns>Returns the builder (for fluent-style composition).</returns>
193 public TBuilder AppendRange(IEnumerable<byte> values)
197 throw new ArgumentNullException(nameof(values));
200 foreach (byte b in values)
209 /// Append an enumerable collection of values to the array.
211 /// <param name="values">Values to add.</param>
212 /// <returns>Returns the builder (for fluent-style composition).</returns>
213 public TBuilder AppendRange(IEnumerable<byte[]> values)
217 throw new ArgumentNullException(nameof(values));
220 foreach (byte[] arr in values)
228 Append((ReadOnlySpan<byte>)arr);
235 public TBuilder Reserve(int capacity)
237 // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
238 ValueOffsets.Reserve(capacity + 1);
239 ValueBuffer.Reserve(capacity);
240 ValidityBuffer.Reserve(capacity + 1);
244 public TBuilder Resize(int length)
246 // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
247 ValueOffsets.Resize(length + 1);
248 ValueBuffer.Resize(length);
249 ValidityBuffer.Resize(length + 1);
253 public TBuilder Swap(int i, int j)
256 throw new NotImplementedException();
259 public TBuilder Set(int index, byte value)
262 throw new NotImplementedException();
266 /// Clear all contents appended so far.
268 /// <returns>Returns the builder (for fluent-style composition).</returns>
269 public TBuilder Clear()
271 ValueOffsets.Clear();
273 ValidityBuffer.Clear();
275 // Always write the first offset before anything has been written.
277 ValueOffsets.Append(Offset);
282 public BinaryArray(IArrowType dataType, int length,
283 ArrowBuffer valueOffsetsBuffer,
284 ArrowBuffer dataBuffer,
285 ArrowBuffer nullBitmapBuffer,
286 int nullCount = 0, int offset = 0)
287 : this(new ArrayData(dataType, length, nullCount, offset,
288 new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
291 public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
293 public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
295 public ArrowBuffer ValueBuffer => Data.Buffers[2];
297 public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);
299 public ReadOnlySpan<byte> Values => ValueBuffer.Span.CastTo<byte>();
301 [MethodImpl(MethodImplOptions.AggressiveInlining)]
302 [Obsolete("This method has been deprecated. Please use ValueOffsets[index] instead.")]
303 public int GetValueOffset(int index)
305 if (index < 0 || index > Length)
307 throw new ArgumentOutOfRangeException(nameof(index));
309 return ValueOffsets[index];
312 [MethodImpl(MethodImplOptions.AggressiveInlining)]
313 public int GetValueLength(int index)
315 if (index < 0 || index >= Length)
317 throw new ArgumentOutOfRangeException(nameof(index));
324 ReadOnlySpan<int> offsets = ValueOffsets;
325 return offsets[index + 1] - offsets[index];
329 /// Get the collection of bytes, as a read-only span, at a given index in the array.
332 /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
333 /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
334 /// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values.
336 /// <param name="index">Index at which to get bytes.</param>
337 /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
338 /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
340 public ReadOnlySpan<byte> GetBytes(int index)
342 if (index < 0 || index >= Length)
344 throw new ArgumentOutOfRangeException(nameof(index));
349 // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
350 // is actually returned as an empty span.
351 return ReadOnlySpan<byte>.Empty;
354 return ValueBuffer.Span.Slice(ValueOffsets[index], GetValueLength(index));