]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / csharp / src / Apache.Arrow / Arrays / BinaryArray.cs
1 // Licensed to the Apache Software Foundation (ASF) under one or more
2 // contributor license agreements. See the NOTICE file distributed with
3 // this work for additional information regarding copyright ownership.
4 // The ASF licenses this file to You under the Apache License, Version 2.0
5 // (the "License"); you may not use this file except in compliance with
6 // the License. You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 using Apache.Arrow.Types;
17 using System;
18 using System.Collections.Generic;
19 using System.Runtime.CompilerServices;
20 using Apache.Arrow.Memory;
21
22 namespace Apache.Arrow
23 {
24 public class BinaryArray : Array
25 {
26 public class Builder : BuilderBase<BinaryArray, Builder>
27 {
28 public Builder() : base(BinaryType.Default) { }
29 public Builder(IArrowType dataType) : base(dataType) { }
30
31 protected override BinaryArray Build(ArrayData data)
32 {
33 return new BinaryArray(data);
34 }
35 }
36
37 public BinaryArray(ArrayData data)
38 : base(data)
39 {
40 data.EnsureDataType(ArrowTypeId.Binary);
41 data.EnsureBufferCount(3);
42 }
43
44 public BinaryArray(ArrowTypeId typeId, ArrayData data)
45 : base(data)
46 {
47 data.EnsureDataType(typeId);
48 data.EnsureBufferCount(3);
49 }
50
51 public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder>
52 where TArray : IArrowArray
53 where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
54 {
55 protected IArrowType DataType { get; }
56 protected TBuilder Instance => this as TBuilder;
57 protected ArrowBuffer.Builder<int> ValueOffsets { get; }
58 protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
59 protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
60 protected int Offset { get; set; }
61 protected int NullCount => this.ValidityBuffer.UnsetBitCount;
62
63 protected BuilderBase(IArrowType dataType)
64 {
65 DataType = dataType;
66 ValueOffsets = new ArrowBuffer.Builder<int>();
67 ValueBuffer = new ArrowBuffer.Builder<byte>();
68 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
69
70 // From the docs:
71 //
72 // The offsets buffer contains length + 1 signed integers (either 32-bit or 64-bit, depending on the
73 // logical type), which encode the start position of each slot in the data buffer. The length of the
74 // value in each slot is computed using the difference between the offset at that slot’s index and the
75 // subsequent offset.
76 //
77 // In this builder, we choose to append the first offset (zero) upon construction, and each trailing
78 // offset is then added after each individual item has been appended.
79 ValueOffsets.Append(this.Offset);
80 }
81
82 protected abstract TArray Build(ArrayData data);
83
84 /// <summary>
85 /// Gets the length of the array built so far.
86 /// </summary>
87 public int Length => ValueOffsets.Length - 1;
88
89 /// <summary>
90 /// Build an Arrow array from the appended contents so far.
91 /// </summary>
92 /// <param name="allocator">Optional memory allocator.</param>
93 /// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
94 public TArray Build(MemoryAllocator allocator = default)
95 {
96 var bufs = new[]
97 {
98 NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty,
99 ValueOffsets.Build(allocator),
100 ValueBuffer.Build(allocator),
101 };
102 var data = new ArrayData(
103 DataType,
104 length: Length,
105 NullCount,
106 offset: 0,
107 bufs);
108
109 return Build(data);
110 }
111
112 /// <summary>
113 /// Append a single null value to the array.
114 /// </summary>
115 /// <returns>Returns the builder (for fluent-style composition).</returns>
116 public TBuilder AppendNull()
117 {
118 // Do not add to the value buffer in the case of a null.
119 // Note that we do not need to increment the offset as a result.
120 ValidityBuffer.Append(false);
121 ValueOffsets.Append(Offset);
122 return Instance;
123 }
124
125 /// <summary>
126 /// Appends a value, consisting of a single byte, to the array.
127 /// </summary>
128 /// <param name="value">Byte value to append.</param>
129 /// <returns>Returns the builder (for fluent-style composition).</returns>
130 public TBuilder Append(byte value)
131 {
132 ValueBuffer.Append(value);
133 ValidityBuffer.Append(true);
134 Offset++;
135 ValueOffsets.Append(Offset);
136 return Instance;
137 }
138
139 /// <summary>
140 /// Append a value, consisting of a span of bytes, to the array.
141 /// </summary>
142 /// <remarks>
143 /// Note that a single value is added, which consists of arbitrarily many bytes. If multiple values are
144 /// to be added, use the <see cref="AppendRange"/> method.
145 /// </remarks>
146 /// <param name="span">Span of bytes to add.</param>
147 /// <returns>Returns the builder (for fluent-style composition).</returns>
148 public TBuilder Append(ReadOnlySpan<byte> span)
149 {
150 ValueBuffer.Append(span);
151 ValidityBuffer.Append(true);
152 Offset += span.Length;
153 ValueOffsets.Append(Offset);
154 return Instance;
155 }
156
157 /// <summary>
158 /// Append a value, consisting of an enumerable collection of bytes, to the array.
159 /// </summary>
160 /// <remarks>
161 /// Note that this method appends a single value, which may consist of arbitrarily many bytes. If multiple
162 /// values are to be added, use the <see cref="AppendRange(IEnumerable{byte})"/> method instead.
163 /// </remarks>
164 /// <param name="value">Enumerable collection of bytes to add.</param>
165 /// <returns>Returns the builder (for fluent-style composition).</returns>
166 public TBuilder Append(IEnumerable<byte> value)
167 {
168 if (value == null)
169 {
170 return AppendNull();
171 }
172
173 // Note: by looking at the length of the value buffer before and after, we avoid having to iterate
174 // through the enumerable multiple times to get both length and contents.
175 int priorLength = ValueBuffer.Length;
176 ValueBuffer.AppendRange(value);
177 int valueLength = ValueBuffer.Length - priorLength;
178 Offset += valueLength;
179 ValidityBuffer.Append(true);
180 ValueOffsets.Append(Offset);
181 return Instance;
182 }
183
184 /// <summary>
185 /// Append an enumerable collection of single-byte values to the array.
186 /// </summary>
187 /// <remarks>
188 /// Note that this method appends multiple values, each of which is a single byte. If a single value is
189 /// to be added, use the <see cref="Append(IEnumerable{byte})"/> method instead.
190 /// </remarks>
191 /// <param name="values">Single-byte values to add.</param>
192 /// <returns>Returns the builder (for fluent-style composition).</returns>
193 public TBuilder AppendRange(IEnumerable<byte> values)
194 {
195 if (values == null)
196 {
197 throw new ArgumentNullException(nameof(values));
198 }
199
200 foreach (byte b in values)
201 {
202 Append(b);
203 }
204
205 return Instance;
206 }
207
208 /// <summary>
209 /// Append an enumerable collection of values to the array.
210 /// </summary>
211 /// <param name="values">Values to add.</param>
212 /// <returns>Returns the builder (for fluent-style composition).</returns>
213 public TBuilder AppendRange(IEnumerable<byte[]> values)
214 {
215 if (values == null)
216 {
217 throw new ArgumentNullException(nameof(values));
218 }
219
220 foreach (byte[] arr in values)
221 {
222 if (arr == null)
223 {
224 AppendNull();
225 }
226 else
227 {
228 Append((ReadOnlySpan<byte>)arr);
229 }
230 }
231
232 return Instance;
233 }
234
235 public TBuilder Reserve(int capacity)
236 {
237 // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
238 ValueOffsets.Reserve(capacity + 1);
239 ValueBuffer.Reserve(capacity);
240 ValidityBuffer.Reserve(capacity + 1);
241 return Instance;
242 }
243
244 public TBuilder Resize(int length)
245 {
246 // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
247 ValueOffsets.Resize(length + 1);
248 ValueBuffer.Resize(length);
249 ValidityBuffer.Resize(length + 1);
250 return Instance;
251 }
252
253 public TBuilder Swap(int i, int j)
254 {
255 // TODO: Implement
256 throw new NotImplementedException();
257 }
258
259 public TBuilder Set(int index, byte value)
260 {
261 // TODO: Implement
262 throw new NotImplementedException();
263 }
264
265 /// <summary>
266 /// Clear all contents appended so far.
267 /// </summary>
268 /// <returns>Returns the builder (for fluent-style composition).</returns>
269 public TBuilder Clear()
270 {
271 ValueOffsets.Clear();
272 ValueBuffer.Clear();
273 ValidityBuffer.Clear();
274
275 // Always write the first offset before anything has been written.
276 Offset = 0;
277 ValueOffsets.Append(Offset);
278 return Instance;
279 }
280 }
281
282 public BinaryArray(IArrowType dataType, int length,
283 ArrowBuffer valueOffsetsBuffer,
284 ArrowBuffer dataBuffer,
285 ArrowBuffer nullBitmapBuffer,
286 int nullCount = 0, int offset = 0)
287 : this(new ArrayData(dataType, length, nullCount, offset,
288 new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
289 { }
290
291 public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
292
293 public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
294
295 public ArrowBuffer ValueBuffer => Data.Buffers[2];
296
297 public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);
298
299 public ReadOnlySpan<byte> Values => ValueBuffer.Span.CastTo<byte>();
300
301 [MethodImpl(MethodImplOptions.AggressiveInlining)]
302 [Obsolete("This method has been deprecated. Please use ValueOffsets[index] instead.")]
303 public int GetValueOffset(int index)
304 {
305 if (index < 0 || index > Length)
306 {
307 throw new ArgumentOutOfRangeException(nameof(index));
308 }
309 return ValueOffsets[index];
310 }
311
312 [MethodImpl(MethodImplOptions.AggressiveInlining)]
313 public int GetValueLength(int index)
314 {
315 if (index < 0 || index >= Length)
316 {
317 throw new ArgumentOutOfRangeException(nameof(index));
318 }
319 if (!IsValid(index))
320 {
321 return 0;
322 }
323
324 ReadOnlySpan<int> offsets = ValueOffsets;
325 return offsets[index + 1] - offsets[index];
326 }
327
328 /// <summary>
329 /// Get the collection of bytes, as a read-only span, at a given index in the array.
330 /// </summary>
331 /// <remarks>
332 /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
333 /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
334 /// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values.
335 /// </remarks>
336 /// <param name="index">Index at which to get bytes.</param>
337 /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
338 /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
339 /// </exception>
340 public ReadOnlySpan<byte> GetBytes(int index)
341 {
342 if (index < 0 || index >= Length)
343 {
344 throw new ArgumentOutOfRangeException(nameof(index));
345 }
346
347 if (IsNull(index))
348 {
349 // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
350 // is actually returned as an empty span.
351 return ReadOnlySpan<byte>.Empty;
352 }
353
354 return ValueBuffer.Span.Slice(ValueOffsets[index], GetValueLength(index));
355 }
356
357 }
358 }