]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one or more |
2 | // contributor license agreements. See the NOTICE file distributed with | |
3 | // this work for additional information regarding copyright ownership. | |
4 | // The ASF licenses this file to You under the Apache License, Version 2.0 | |
5 | // (the "License"); you may not use this file except in compliance with | |
6 | // the License. You may obtain a copy of the License at | |
7 | // | |
8 | // http://www.apache.org/licenses/LICENSE-2.0 | |
9 | // | |
10 | // Unless required by applicable law or agreed to in writing, software | |
11 | // distributed under the License is distributed on an "AS IS" BASIS, | |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | // See the License for the specific language governing permissions and | |
14 | // limitations under the License. | |
15 | ||
16 | using Apache.Arrow.Arrays; | |
17 | using Apache.Arrow.Types; | |
18 | using System; | |
19 | using System.Collections.Generic; | |
20 | using System.Linq; | |
21 | ||
22 | namespace Apache.Arrow.Tests | |
23 | { | |
24 | public static class TestData | |
25 | { | |
26 | public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false) | |
27 | { | |
28 | return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray); | |
29 | } | |
30 | ||
31 | public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays) | |
32 | { | |
33 | Schema.Builder builder = new Schema.Builder(); | |
34 | for (int i = 0; i < columnSetCount; i++) | |
35 | { | |
36 | builder.Field(CreateField(new ListType(Int64Type.Default), i)); | |
37 | builder.Field(CreateField(BooleanType.Default, i)); | |
38 | builder.Field(CreateField(UInt8Type.Default, i)); | |
39 | builder.Field(CreateField(Int8Type.Default, i)); | |
40 | builder.Field(CreateField(UInt16Type.Default, i)); | |
41 | builder.Field(CreateField(Int16Type.Default, i)); | |
42 | builder.Field(CreateField(UInt32Type.Default, i)); | |
43 | builder.Field(CreateField(Int32Type.Default, i)); | |
44 | builder.Field(CreateField(UInt64Type.Default, i)); | |
45 | builder.Field(CreateField(Int64Type.Default, i)); | |
46 | builder.Field(CreateField(FloatType.Default, i)); | |
47 | builder.Field(CreateField(DoubleType.Default, i)); | |
48 | builder.Field(CreateField(Date32Type.Default, i)); | |
49 | builder.Field(CreateField(Date64Type.Default, i)); | |
50 | builder.Field(CreateField(TimestampType.Default, i)); | |
51 | builder.Field(CreateField(StringType.Default, i)); | |
52 | builder.Field(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); | |
53 | builder.Field(CreateField(new Decimal128Type(10, 6), i)); | |
54 | builder.Field(CreateField(new Decimal256Type(16, 8), i)); | |
55 | ||
56 | if (createAdvancedTypeArrays) | |
57 | { | |
58 | builder.Field(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i)); | |
59 | builder.Field(CreateField(new FixedSizeBinaryType(16), i)); | |
60 | } | |
61 | ||
62 | //builder.Field(CreateField(HalfFloatType.Default)); | |
63 | //builder.Field(CreateField(StringType.Default)); | |
64 | //builder.Field(CreateField(Time32Type.Default)); | |
65 | //builder.Field(CreateField(Time64Type.Default)); | |
66 | } | |
67 | ||
68 | Schema schema = builder.Build(); | |
69 | ||
70 | return CreateSampleRecordBatch(schema, length); | |
71 | } | |
72 | ||
73 | public static RecordBatch CreateSampleRecordBatch(Schema schema, int length) | |
74 | { | |
75 | IEnumerable<IArrowArray> arrays = CreateArrays(schema, length); | |
76 | ||
77 | return new RecordBatch(schema, arrays, length); | |
78 | } | |
79 | ||
80 | private static Field CreateField(ArrowType type, int iteration) | |
81 | { | |
82 | return new Field(type.Name + iteration, type, nullable: false); | |
83 | } | |
84 | ||
85 | public static IEnumerable<IArrowArray> CreateArrays(Schema schema, int length) | |
86 | { | |
87 | int fieldCount = schema.Fields.Count; | |
88 | List<IArrowArray> arrays = new List<IArrowArray>(fieldCount); | |
89 | for (int i = 0; i < fieldCount; i++) | |
90 | { | |
91 | Field field = schema.GetFieldByIndex(i); | |
92 | arrays.Add(CreateArray(field, length)); | |
93 | } | |
94 | return arrays; | |
95 | } | |
96 | ||
97 | private static IArrowArray CreateArray(Field field, int length) | |
98 | { | |
99 | var creator = new ArrayCreator(length); | |
100 | ||
101 | field.DataType.Accept(creator); | |
102 | ||
103 | return creator.Array; | |
104 | } | |
105 | ||
106 | private class ArrayCreator : | |
107 | IArrowTypeVisitor<BooleanType>, | |
108 | IArrowTypeVisitor<Date32Type>, | |
109 | IArrowTypeVisitor<Date64Type>, | |
110 | IArrowTypeVisitor<Int8Type>, | |
111 | IArrowTypeVisitor<Int16Type>, | |
112 | IArrowTypeVisitor<Int32Type>, | |
113 | IArrowTypeVisitor<Int64Type>, | |
114 | IArrowTypeVisitor<UInt8Type>, | |
115 | IArrowTypeVisitor<UInt16Type>, | |
116 | IArrowTypeVisitor<UInt32Type>, | |
117 | IArrowTypeVisitor<UInt64Type>, | |
118 | IArrowTypeVisitor<FloatType>, | |
119 | IArrowTypeVisitor<DoubleType>, | |
120 | IArrowTypeVisitor<TimestampType>, | |
121 | IArrowTypeVisitor<StringType>, | |
122 | IArrowTypeVisitor<ListType>, | |
123 | IArrowTypeVisitor<StructType>, | |
124 | IArrowTypeVisitor<Decimal128Type>, | |
125 | IArrowTypeVisitor<Decimal256Type>, | |
126 | IArrowTypeVisitor<DictionaryType>, | |
127 | IArrowTypeVisitor<FixedSizeBinaryType> | |
128 | { | |
129 | private int Length { get; } | |
130 | public IArrowArray Array { get; private set; } | |
131 | ||
132 | public ArrayCreator(int length) | |
133 | { | |
134 | Length = length; | |
135 | } | |
136 | ||
137 | public void Visit(BooleanType type) => GenerateArray(new BooleanArray.Builder(), x => x % 2 == 0); | |
138 | public void Visit(Int8Type type) => GenerateArray(new Int8Array.Builder(), x => (sbyte)x); | |
139 | public void Visit(Int16Type type) => GenerateArray(new Int16Array.Builder(), x => (short)x); | |
140 | public void Visit(Int32Type type) => GenerateArray(new Int32Array.Builder(), x => x); | |
141 | public void Visit(Int64Type type) => GenerateArray(new Int64Array.Builder(), x => x); | |
142 | public void Visit(UInt8Type type) => GenerateArray(new UInt8Array.Builder(), x => (byte)x); | |
143 | public void Visit(UInt16Type type) => GenerateArray(new UInt16Array.Builder(), x => (ushort)x); | |
144 | public void Visit(UInt32Type type) => GenerateArray(new UInt32Array.Builder(), x => (uint)x); | |
145 | public void Visit(UInt64Type type) => GenerateArray(new UInt64Array.Builder(), x => (ulong)x); | |
146 | public void Visit(FloatType type) => GenerateArray(new FloatArray.Builder(), x => ((float)x / Length)); | |
147 | public void Visit(DoubleType type) => GenerateArray(new DoubleArray.Builder(), x => ((double)x / Length)); | |
148 | public void Visit(Decimal128Type type) | |
149 | { | |
150 | var builder = new Decimal128Array.Builder(type).Reserve(Length); | |
151 | ||
152 | for (var i = 0; i < Length; i++) | |
153 | { | |
154 | builder.Append((decimal)i / Length); | |
155 | } | |
156 | ||
157 | Array = builder.Build(); | |
158 | } | |
159 | ||
160 | public void Visit(Decimal256Type type) | |
161 | { | |
162 | var builder = new Decimal256Array.Builder(type).Reserve(Length); | |
163 | ||
164 | for (var i = 0; i < Length; i++) | |
165 | { | |
166 | builder.Append((decimal)i / Length); | |
167 | } | |
168 | ||
169 | Array = builder.Build(); | |
170 | } | |
171 | ||
172 | public void Visit(Date32Type type) | |
173 | { | |
174 | var builder = new Date32Array.Builder().Reserve(Length); | |
175 | ||
176 | // Length can be greater than the number of days since DateTime.MinValue. | |
177 | // Set a cap for how many days can be subtracted from now. | |
178 | int maxDays = Math.Min(Length, 100_000); | |
179 | var basis = DateTimeOffset.UtcNow.AddDays(-maxDays); | |
180 | ||
181 | for (var i = 0; i < Length; i++) | |
182 | { | |
183 | builder.Append(basis.AddDays(i % maxDays)); | |
184 | } | |
185 | ||
186 | Array = builder.Build(); | |
187 | } | |
188 | ||
189 | public void Visit(Date64Type type) | |
190 | { | |
191 | var builder = new Date64Array.Builder().Reserve(Length); | |
192 | var basis = DateTimeOffset.UtcNow.AddSeconds(-Length); | |
193 | ||
194 | for (var i = 0; i < Length; i++) | |
195 | { | |
196 | builder.Append(basis.AddSeconds(i)); | |
197 | } | |
198 | ||
199 | Array = builder.Build(); | |
200 | } | |
201 | ||
202 | public void Visit(TimestampType type) | |
203 | { | |
204 | var builder = new TimestampArray.Builder().Reserve(Length); | |
205 | var basis = DateTimeOffset.UtcNow.AddMilliseconds(-Length); | |
206 | ||
207 | for (var i = 0; i < Length; i++) | |
208 | { | |
209 | builder.Append(basis.AddMilliseconds(i)); | |
210 | } | |
211 | ||
212 | Array = builder.Build(); | |
213 | } | |
214 | ||
215 | public void Visit(StringType type) | |
216 | { | |
217 | var str = "hello"; | |
218 | var builder = new StringArray.Builder(); | |
219 | ||
220 | for (var i = 0; i < Length; i++) | |
221 | { | |
222 | builder.Append(str); | |
223 | } | |
224 | ||
225 | Array = builder.Build(); | |
226 | } | |
227 | ||
228 | public void Visit(ListType type) | |
229 | { | |
230 | var builder = new ListArray.Builder(type.ValueField).Reserve(Length); | |
231 | ||
232 | //Todo : Support various types | |
233 | var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1); | |
234 | ||
235 | for (var i = 0; i < Length; i++) | |
236 | { | |
237 | builder.Append(); | |
238 | valueBuilder.Append(i); | |
239 | } | |
240 | //Add a value to check if Values.Length can exceed ListArray.Length | |
241 | valueBuilder.Append(0); | |
242 | ||
243 | Array = builder.Build(); | |
244 | } | |
245 | ||
246 | public void Visit(StructType type) | |
247 | { | |
248 | IArrowArray[] childArrays = new IArrowArray[type.Fields.Count]; | |
249 | for (int i = 0; i < childArrays.Length; i++) | |
250 | { | |
251 | childArrays[i] = CreateArray(type.Fields[i], Length); | |
252 | } | |
253 | ||
254 | ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); | |
255 | for (int i = 0; i < Length; i++) | |
256 | { | |
257 | nullBitmap.Append(true); | |
258 | } | |
259 | ||
260 | Array = new StructArray(type, Length, childArrays, nullBitmap.Build()); | |
261 | } | |
262 | ||
263 | public void Visit(DictionaryType type) | |
264 | { | |
265 | Int32Array.Builder indicesBuilder = new Int32Array.Builder().Reserve(Length); | |
266 | StringArray.Builder valueBuilder = new StringArray.Builder().Reserve(Length); | |
267 | ||
268 | for (int i = 0; i < Length; i++) | |
269 | { | |
270 | indicesBuilder.Append(i); | |
271 | valueBuilder.Append($"{i}"); | |
272 | } | |
273 | ||
274 | Array = new DictionaryArray(type, indicesBuilder.Build(), valueBuilder.Build()); | |
275 | } | |
276 | ||
277 | public void Visit(FixedSizeBinaryType type) | |
278 | { | |
279 | ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); | |
280 | ||
281 | int valueSize = type.BitWidth; | |
282 | for (int i = 0; i < Length; i++) | |
283 | { | |
284 | valueBuilder.Append(Enumerable.Repeat((byte)i, valueSize).ToArray()); | |
285 | } | |
286 | ||
287 | ArrowBuffer validityBuffer = ArrowBuffer.Empty; | |
288 | ArrowBuffer valueBuffer = valueBuilder.Build(default); | |
289 | ||
290 | ArrayData arrayData = new ArrayData(type, Length, 0, 0, new[] { validityBuffer, valueBuffer }); | |
291 | Array = new FixedSizeBinaryArray(arrayData); | |
292 | } | |
293 | ||
294 | private void GenerateArray<T, TArray, TArrayBuilder>(IArrowArrayBuilder<T, TArray, TArrayBuilder> builder, Func<int, T> generator) | |
295 | where TArrayBuilder : IArrowArrayBuilder<T, TArray, TArrayBuilder> | |
296 | where TArray : IArrowArray | |
297 | where T : struct | |
298 | { | |
299 | for (var i = 0; i < Length; i++) | |
300 | { | |
301 | if (i == Length - 2) | |
302 | { | |
303 | builder.AppendNull(); | |
304 | } | |
305 | else | |
306 | { | |
307 | var value = generator(i); | |
308 | builder.Append(value); | |
309 | } | |
310 | } | |
311 | ||
312 | Array = builder.Build(default); | |
313 | } | |
314 | ||
315 | public void Visit(IArrowType type) | |
316 | { | |
317 | throw new NotImplementedException(); | |
318 | } | |
319 | } | |
320 | } | |
321 | } |