]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one or more |
2 | // contributor license agreements. See the NOTICE file distributed with | |
3 | // this work for additional information regarding copyright ownership. | |
4 | // The ASF licenses this file to You under the Apache License, Version 2.0 | |
5 | // (the "License"); you may not use this file except in compliance with | |
6 | // the License. You may obtain a copy of the License at | |
7 | // | |
8 | // http://www.apache.org/licenses/LICENSE-2.0 | |
9 | // | |
10 | // Unless required by applicable law or agreed to in writing, software | |
11 | // distributed under the License is distributed on an "AS IS" BASIS, | |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | // See the License for the specific language governing permissions and | |
14 | // limitations under the License. | |
15 | ||
16 | using Apache.Arrow.Memory; | |
17 | using Apache.Arrow.Types; | |
18 | using System; | |
19 | using System.Collections.Generic; | |
20 | ||
21 | namespace Apache.Arrow | |
22 | { | |
23 | static class ArrayDataConcatenator | |
24 | { | |
25 | internal static ArrayData Concatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) | |
26 | { | |
27 | if (arrayDataList == null || arrayDataList.Count == 0) | |
28 | { | |
29 | return null; | |
30 | } | |
31 | ||
32 | if (arrayDataList.Count == 1) | |
33 | { | |
34 | return arrayDataList[0]; | |
35 | } | |
36 | ||
37 | var arrowArrayConcatenationVisitor = new ArrayDataConcatenationVisitor(arrayDataList, allocator); | |
38 | ||
39 | IArrowType type = arrayDataList[0].DataType; | |
40 | type.Accept(arrowArrayConcatenationVisitor); | |
41 | ||
42 | return arrowArrayConcatenationVisitor.Result; | |
43 | } | |
44 | ||
45 | private class ArrayDataConcatenationVisitor : | |
46 | IArrowTypeVisitor<BooleanType>, | |
47 | IArrowTypeVisitor<FixedWidthType>, | |
48 | IArrowTypeVisitor<BinaryType>, | |
49 | IArrowTypeVisitor<StringType>, | |
50 | IArrowTypeVisitor<ListType>, | |
51 | IArrowTypeVisitor<StructType> | |
52 | { | |
53 | public ArrayData Result { get; private set; } | |
54 | private readonly IReadOnlyList<ArrayData> _arrayDataList; | |
55 | private readonly int _totalLength; | |
56 | private readonly int _totalNullCount; | |
57 | private readonly MemoryAllocator _allocator; | |
58 | ||
59 | public ArrayDataConcatenationVisitor(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) | |
60 | { | |
61 | _arrayDataList = arrayDataList; | |
62 | _allocator = allocator; | |
63 | ||
64 | foreach (ArrayData arrayData in _arrayDataList) | |
65 | { | |
66 | _totalLength += arrayData.Length; | |
67 | _totalNullCount += arrayData.NullCount; | |
68 | } | |
69 | } | |
70 | ||
71 | public void Visit(BooleanType type) | |
72 | { | |
73 | CheckData(type, 2); | |
74 | ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); | |
75 | ArrowBuffer valueBuffer = ConcatenateBitmapBuffer(1); | |
76 | ||
77 | Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); | |
78 | } | |
79 | ||
80 | public void Visit(FixedWidthType type) | |
81 | { | |
82 | CheckData(type, 2); | |
83 | ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); | |
84 | ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type); | |
85 | ||
86 | Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); | |
87 | } | |
88 | ||
89 | public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type); | |
90 | ||
91 | public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); | |
92 | ||
93 | public void Visit(ListType type) | |
94 | { | |
95 | CheckData(type, 2); | |
96 | ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); | |
97 | ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); | |
98 | ArrayData child = Concatenate(SelectChildren(0), _allocator); | |
99 | ||
100 | Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); | |
101 | } | |
102 | ||
103 | public void Visit(StructType type) | |
104 | { | |
105 | CheckData(type, 1); | |
106 | List<ArrayData> children = new List<ArrayData>(type.Fields.Count); | |
107 | ||
108 | for (int i = 0; i < type.Fields.Count; i++) | |
109 | { | |
110 | children.Add(Concatenate(SelectChildren(i), _allocator)); | |
111 | } | |
112 | ||
113 | Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); | |
114 | } | |
115 | ||
116 | public void Visit(IArrowType type) | |
117 | { | |
118 | throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet."); | |
119 | } | |
120 | ||
121 | private void CheckData(IArrowType type, int expectedBufferCount) | |
122 | { | |
123 | foreach (ArrayData arrayData in _arrayDataList) | |
124 | { | |
125 | arrayData.EnsureDataType(type.TypeId); | |
126 | arrayData.EnsureBufferCount(expectedBufferCount); | |
127 | } | |
128 | } | |
129 | ||
130 | private void ConcatenateVariableBinaryArrayData(IArrowType type) | |
131 | { | |
132 | CheckData(type, 3); | |
133 | ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); | |
134 | ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); | |
135 | ArrowBuffer valueBuffer = ConcatenateVariableBinaryValueBuffer(); | |
136 | ||
137 | Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); | |
138 | } | |
139 | ||
140 | private ArrowBuffer ConcatenateValidityBuffer() | |
141 | { | |
142 | if (_totalNullCount == 0) | |
143 | { | |
144 | return ArrowBuffer.Empty; | |
145 | } | |
146 | ||
147 | return ConcatenateBitmapBuffer(0); | |
148 | } | |
149 | ||
150 | private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex) | |
151 | { | |
152 | var builder = new ArrowBuffer.BitmapBuilder(_totalLength); | |
153 | ||
154 | foreach (ArrayData arrayData in _arrayDataList) | |
155 | { | |
156 | int length = arrayData.Length; | |
157 | ReadOnlySpan<byte> span = arrayData.Buffers[bufferIndex].Span; | |
158 | ||
159 | for (int i = 0; i < length; i++) | |
160 | { | |
161 | builder.Append(span.IsEmpty || BitUtility.GetBit(span, i)); | |
162 | } | |
163 | } | |
164 | ||
165 | return builder.Build(_allocator); | |
166 | } | |
167 | ||
168 | private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type) | |
169 | { | |
170 | int typeByteWidth = type.BitWidth / 8; | |
171 | var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth); | |
172 | ||
173 | foreach (ArrayData arrayData in _arrayDataList) | |
174 | { | |
175 | int length = arrayData.Length; | |
176 | int byteLength = length * typeByteWidth; | |
177 | ||
178 | builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength)); | |
179 | } | |
180 | ||
181 | return builder.Build(_allocator); | |
182 | } | |
183 | ||
184 | private ArrowBuffer ConcatenateVariableBinaryValueBuffer() | |
185 | { | |
186 | var builder = new ArrowBuffer.Builder<byte>(); | |
187 | ||
188 | foreach (ArrayData arrayData in _arrayDataList) | |
189 | { | |
190 | int lastOffset = arrayData.Buffers[1].Span.CastTo<int>()[arrayData.Length]; | |
191 | builder.Append(arrayData.Buffers[2].Span.Slice(0, lastOffset)); | |
192 | } | |
193 | ||
194 | return builder.Build(_allocator); | |
195 | } | |
196 | ||
197 | private ArrowBuffer ConcatenateOffsetBuffer() | |
198 | { | |
199 | var builder = new ArrowBuffer.Builder<int>(_totalLength + 1); | |
200 | int baseOffset = 0; | |
201 | ||
202 | builder.Append(0); | |
203 | ||
204 | foreach (ArrayData arrayData in _arrayDataList) | |
205 | { | |
206 | if (arrayData.Length == 0) | |
207 | { | |
208 | continue; | |
209 | } | |
210 | ||
211 | // The first offset is always 0. | |
212 | // It should be skipped because it duplicate to the last offset of builder. | |
213 | ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(1, arrayData.Length); | |
214 | ||
215 | foreach (int offset in span) | |
216 | { | |
217 | builder.Append(baseOffset + offset); | |
218 | } | |
219 | ||
220 | // The next offset must start from the current last offset. | |
221 | baseOffset += span[arrayData.Length - 1]; | |
222 | } | |
223 | ||
224 | return builder.Build(_allocator); | |
225 | } | |
226 | ||
227 | private List<ArrayData> SelectChildren(int index) | |
228 | { | |
229 | var children = new List<ArrayData>(_arrayDataList.Count); | |
230 | ||
231 | foreach (ArrayData arrayData in _arrayDataList) | |
232 | { | |
233 | children.Add(arrayData.Children[index]); | |
234 | } | |
235 | ||
236 | return children; | |
237 | } | |
238 | } | |
239 | } | |
240 | } |