]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / csharp / src / Apache.Arrow / Arrays / ArrayDataConcatenator.cs
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one or more
2// contributor license agreements. See the NOTICE file distributed with
3// this work for additional information regarding copyright ownership.
4// The ASF licenses this file to You under the Apache License, Version 2.0
5// (the "License"); you may not use this file except in compliance with
6// the License. You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16using Apache.Arrow.Memory;
17using Apache.Arrow.Types;
18using System;
19using System.Collections.Generic;
20
21namespace Apache.Arrow
22{
23 static class ArrayDataConcatenator
24 {
25 internal static ArrayData Concatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default)
26 {
27 if (arrayDataList == null || arrayDataList.Count == 0)
28 {
29 return null;
30 }
31
32 if (arrayDataList.Count == 1)
33 {
34 return arrayDataList[0];
35 }
36
37 var arrowArrayConcatenationVisitor = new ArrayDataConcatenationVisitor(arrayDataList, allocator);
38
39 IArrowType type = arrayDataList[0].DataType;
40 type.Accept(arrowArrayConcatenationVisitor);
41
42 return arrowArrayConcatenationVisitor.Result;
43 }
44
45 private class ArrayDataConcatenationVisitor :
46 IArrowTypeVisitor<BooleanType>,
47 IArrowTypeVisitor<FixedWidthType>,
48 IArrowTypeVisitor<BinaryType>,
49 IArrowTypeVisitor<StringType>,
50 IArrowTypeVisitor<ListType>,
51 IArrowTypeVisitor<StructType>
52 {
53 public ArrayData Result { get; private set; }
54 private readonly IReadOnlyList<ArrayData> _arrayDataList;
55 private readonly int _totalLength;
56 private readonly int _totalNullCount;
57 private readonly MemoryAllocator _allocator;
58
59 public ArrayDataConcatenationVisitor(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default)
60 {
61 _arrayDataList = arrayDataList;
62 _allocator = allocator;
63
64 foreach (ArrayData arrayData in _arrayDataList)
65 {
66 _totalLength += arrayData.Length;
67 _totalNullCount += arrayData.NullCount;
68 }
69 }
70
71 public void Visit(BooleanType type)
72 {
73 CheckData(type, 2);
74 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
75 ArrowBuffer valueBuffer = ConcatenateBitmapBuffer(1);
76
77 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
78 }
79
80 public void Visit(FixedWidthType type)
81 {
82 CheckData(type, 2);
83 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
84 ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type);
85
86 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
87 }
88
89 public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type);
90
91 public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type);
92
93 public void Visit(ListType type)
94 {
95 CheckData(type, 2);
96 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
97 ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer();
98 ArrayData child = Concatenate(SelectChildren(0), _allocator);
99
100 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child });
101 }
102
103 public void Visit(StructType type)
104 {
105 CheckData(type, 1);
106 List<ArrayData> children = new List<ArrayData>(type.Fields.Count);
107
108 for (int i = 0; i < type.Fields.Count; i++)
109 {
110 children.Add(Concatenate(SelectChildren(i), _allocator));
111 }
112
113 Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children);
114 }
115
116 public void Visit(IArrowType type)
117 {
118 throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet.");
119 }
120
121 private void CheckData(IArrowType type, int expectedBufferCount)
122 {
123 foreach (ArrayData arrayData in _arrayDataList)
124 {
125 arrayData.EnsureDataType(type.TypeId);
126 arrayData.EnsureBufferCount(expectedBufferCount);
127 }
128 }
129
130 private void ConcatenateVariableBinaryArrayData(IArrowType type)
131 {
132 CheckData(type, 3);
133 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
134 ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer();
135 ArrowBuffer valueBuffer = ConcatenateVariableBinaryValueBuffer();
136
137 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
138 }
139
140 private ArrowBuffer ConcatenateValidityBuffer()
141 {
142 if (_totalNullCount == 0)
143 {
144 return ArrowBuffer.Empty;
145 }
146
147 return ConcatenateBitmapBuffer(0);
148 }
149
150 private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex)
151 {
152 var builder = new ArrowBuffer.BitmapBuilder(_totalLength);
153
154 foreach (ArrayData arrayData in _arrayDataList)
155 {
156 int length = arrayData.Length;
157 ReadOnlySpan<byte> span = arrayData.Buffers[bufferIndex].Span;
158
159 for (int i = 0; i < length; i++)
160 {
161 builder.Append(span.IsEmpty || BitUtility.GetBit(span, i));
162 }
163 }
164
165 return builder.Build(_allocator);
166 }
167
168 private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
169 {
170 int typeByteWidth = type.BitWidth / 8;
171 var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth);
172
173 foreach (ArrayData arrayData in _arrayDataList)
174 {
175 int length = arrayData.Length;
176 int byteLength = length * typeByteWidth;
177
178 builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength));
179 }
180
181 return builder.Build(_allocator);
182 }
183
184 private ArrowBuffer ConcatenateVariableBinaryValueBuffer()
185 {
186 var builder = new ArrowBuffer.Builder<byte>();
187
188 foreach (ArrayData arrayData in _arrayDataList)
189 {
190 int lastOffset = arrayData.Buffers[1].Span.CastTo<int>()[arrayData.Length];
191 builder.Append(arrayData.Buffers[2].Span.Slice(0, lastOffset));
192 }
193
194 return builder.Build(_allocator);
195 }
196
197 private ArrowBuffer ConcatenateOffsetBuffer()
198 {
199 var builder = new ArrowBuffer.Builder<int>(_totalLength + 1);
200 int baseOffset = 0;
201
202 builder.Append(0);
203
204 foreach (ArrayData arrayData in _arrayDataList)
205 {
206 if (arrayData.Length == 0)
207 {
208 continue;
209 }
210
211 // The first offset is always 0.
212 // It should be skipped because it duplicate to the last offset of builder.
213 ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(1, arrayData.Length);
214
215 foreach (int offset in span)
216 {
217 builder.Append(baseOffset + offset);
218 }
219
220 // The next offset must start from the current last offset.
221 baseOffset += span[arrayData.Length - 1];
222 }
223
224 return builder.Build(_allocator);
225 }
226
227 private List<ArrayData> SelectChildren(int index)
228 {
229 var children = new List<ArrayData>(_arrayDataList.Count);
230
231 foreach (ArrayData arrayData in _arrayDataList)
232 {
233 children.Add(arrayData.Children[index]);
234 }
235
236 return children;
237 }
238 }
239 }
240}