]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/js/test/unit/table-tests.ts
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / js / test / unit / table-tests.ts
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 import '../jest-extensions';
19 import {
20 Data, Schema, Field, Table, RecordBatch, Column,
21 Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
22 Struct, Float32, Int32, Dictionary, Utf8, Int8
23 } from 'apache-arrow';
24 import { arange } from './utils';
25
26 const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
27 const F32 = 0, I32 = 1, DICT = 2;
28 export const test_data = [
29 {
30 name: `single record batch`,
31 table: getSingleRecordBatchTable,
32 // Use Math.fround to coerce to float32
33 values: () => [
34 [Math.fround(-0.3), -1, 'a'],
35 [Math.fround(-0.2), 1, 'b'],
36 [Math.fround(-0.1), -1, 'c'],
37 [Math.fround(0), 1, 'a'],
38 [Math.fround(0.1), -1, 'b'],
39 [Math.fround(0.2), 1, 'c'],
40 [Math.fround(0.3), -1, 'a']
41 ]
42 }, {
43 name: `multiple record batches`,
44 table: getMultipleRecordBatchesTable,
45 values: () => [
46 [Math.fround(-0.3), -1, 'a'],
47 [Math.fround(-0.2), 1, 'b'],
48 [Math.fround(-0.1), -1, 'c'],
49 [Math.fround(0), 1, 'a'],
50 [Math.fround(0.1), -1, 'b'],
51 [Math.fround(0.2), 1, 'c'],
52 [Math.fround(0.3), -1, 'a'],
53 [Math.fround(0.2), 1, 'b'],
54 [Math.fround(0.1), -1, 'c'],
55 ]
56 }, {
57 name: `struct`,
58 table: () => Table.fromStruct(getStructTable().getColumn('struct')!),
59 // Use Math.fround to coerce to float32
60 values: () => [
61 [Math.fround(-0.3), -1, 'a'],
62 [Math.fround(-0.2), 1, 'b'],
63 [Math.fround(-0.1), -1, 'c'],
64 [Math.fround(0), 1, 'a'],
65 [Math.fround(0.1), -1, 'b'],
66 [Math.fround(0.2), 1, 'c'],
67 [Math.fround(0.3), -1, 'a']
68 ]
69 },
70 ];
71
72 function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
73 expect(batch).toHaveLength(table.length);
74 expect(table.numCols).toEqual(source.numCols);
75 expect(batch.numCols).toEqual(source.numCols);
76 for (let i = -1, n = source.numCols; ++i < n;) {
77 const v0 = source.getColumnAt(i)!.slice(offset, offset + batch.length);
78 const v1 = batch.getChildAt(i);
79 const v2 = table.getColumnAt(i);
80 const name = source.schema.fields[i].name;
81 expect([v1, `batch`, name]).toEqualVector([v0, `source`]);
82 expect([v2, `table`, name]).toEqualVector([v0, `source`]);
83 }
84 }
85
86 describe(`Table`, () => {
87 test(`can create an empty table`, () => {
88 expect(Table.empty()).toHaveLength(0);
89 });
90 test(`Table.from([]) creates an empty table`, () => {
91 expect(Table.from([])).toHaveLength(0);
92 });
93 test(`Table.from() creates an empty table`, () => {
94 expect(Table.from()).toHaveLength(0);
95 });
96
97 describe(`new()`, () => {
98 test(`creates an empty Table with Columns`, () => {
99 let i32 = Column.new('i32', Data.new(new Int32(), 0, 0));
100 let f32 = Column.new('f32', Data.new(new Float32(), 0, 0));
101 const table = Table.new(i32, f32);
102 i32 = table.getColumn('i32')!;
103 f32 = table.getColumn('f32')!;
104 expect(table).toHaveLength(0);
105 expect(i32).toHaveLength(0);
106 expect(f32).toHaveLength(0);
107 expect(i32.toArray()).toBeInstanceOf(Int32Array);
108 expect(f32.toArray()).toBeInstanceOf(Float32Array);
109 });
110
111 test(`creates a new Table from a Column`, () => {
112
113 const i32s = new Int32Array(arange(new Array<number>(10)));
114
115 let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
116 expect(i32.name).toBe('i32');
117 expect(i32).toHaveLength(i32s.length);
118 expect(i32.nullable).toBe(true);
119 expect(i32.nullCount).toBe(0);
120
121 const table = Table.new(i32);
122 i32 = table.getColumnAt(0)!;
123
124 expect(i32.name).toBe('i32');
125 expect(i32).toHaveLength(i32s.length);
126 expect(i32.nullable).toBe(true);
127 expect(i32.nullCount).toBe(0);
128
129 expect(i32).toEqualVector(Int32Vector.from(i32s));
130 });
131
132 test(`creates a new Table from Columns`, () => {
133
134 const i32s = new Int32Array(arange(new Array<number>(10)));
135 const f32s = new Float32Array(arange(new Array<number>(10)));
136
137 let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
138 let f32 = Column.new('f32', Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
139 expect(i32.name).toBe('i32');
140 expect(f32.name).toBe('f32');
141 expect(i32).toHaveLength(i32s.length);
142 expect(f32).toHaveLength(f32s.length);
143 expect(i32.nullable).toBe(true);
144 expect(f32.nullable).toBe(true);
145 expect(i32.nullCount).toBe(0);
146 expect(f32.nullCount).toBe(0);
147
148 const table = Table.new(i32, f32);
149 i32 = table.getColumnAt(0)!;
150 f32 = table.getColumnAt(1)!;
151
152 expect(i32.name).toBe('i32');
153 expect(f32.name).toBe('f32');
154 expect(i32).toHaveLength(i32s.length);
155 expect(f32).toHaveLength(f32s.length);
156 expect(i32.nullable).toBe(true);
157 expect(f32.nullable).toBe(true);
158 expect(i32.nullCount).toBe(0);
159 expect(f32.nullCount).toBe(0);
160
161 expect(i32).toEqualVector(Int32Vector.from(i32s));
162 expect(f32).toEqualVector(Float32Vector.from(f32s));
163 });
164
165 test(`creates a new Table from Columns with different lengths`, () => {
166
167 const i32s = new Int32Array(arange(new Array<number>(20)));
168 const f32s = new Float32Array(arange(new Array<number>(8)));
169
170 let i32 = Column.new('i32', Int32Vector.from(i32s));
171 let f32 = Column.new('f32', Float32Vector.from(f32s));
172
173 expect(i32.name).toBe('i32');
174 expect(f32.name).toBe('f32');
175 expect(i32).toHaveLength(i32s.length);
176 expect(f32).toHaveLength(f32s.length);
177 expect(i32.nullable).toBe(true);
178 expect(f32.nullable).toBe(true);
179 expect(i32.nullCount).toBe(0);
180 expect(f32.nullCount).toBe(0);
181
182 const table = Table.new([i32, f32]);
183 i32 = table.getColumnAt(0)!;
184 f32 = table.getColumnAt(1)!;
185
186 expect(i32.name).toBe('i32');
187 expect(f32.name).toBe('f32');
188 expect(i32).toHaveLength(i32s.length);
189 expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
190 expect(i32.nullable).toBe(true);
191 expect(f32.nullable).toBe(true); // true, with 12 additional nulls
192 expect(i32.nullCount).toBe(0);
193 expect(f32.nullCount).toBe(i32s.length - f32s.length);
194
195 const f32Expected = Data.Float(
196 f32.type, 0, i32s.length,
197 i32s.length - f32s.length,
198 new Uint8Array(8).fill(255, 0, 1), f32s);
199
200 expect(i32).toEqualVector(Int32Vector.from(i32s));
201 expect(f32).toEqualVector(new Float32Vector(f32Expected));
202 });
203
204 test(`creates a new Table from Columns with different lengths and number of inner chunks`, () => {
205
206 const i32s = new Int32Array(arange(new Array<number>(20)));
207 const f32s = new Float32Array(arange(new Array<number>(16)));
208
209 let i32 = Column.new('i32', Int32Vector.from(i32s));
210 let f32 = Column.new('f32', Float32Vector.from(f32s.slice(0, 8)), Float32Vector.from(f32s.slice(8, 16)));
211
212 expect(i32.name).toBe('i32');
213 expect(f32.name).toBe('f32');
214 expect(i32).toHaveLength(i32s.length);
215 expect(f32).toHaveLength(f32s.length);
216 expect(i32.nullable).toBe(true);
217 expect(f32.nullable).toBe(true);
218 expect(i32.nullCount).toBe(0);
219 expect(f32.nullCount).toBe(0);
220
221 const table = Table.new({ i32Renamed: i32, f32Renamed: f32 });
222 i32 = table.getColumn('i32Renamed');
223 f32 = table.getColumn('f32Renamed');
224
225 expect(i32.name).toBe('i32Renamed');
226 expect(f32.name).toBe('f32Renamed');
227 expect(i32).toHaveLength(i32s.length);
228 expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
229 expect(i32.nullable).toBe(true);
230 expect(f32.nullable).toBe(true); // true, with 4 additional nulls
231 expect(i32.nullCount).toBe(0);
232 expect(f32.nullCount).toBe(i32s.length - f32s.length);
233
234 const f32Expected = Data.Float(
235 f32.type, 0, i32s.length,
236 i32s.length - f32s.length,
237 new Uint8Array(8).fill(255, 0, 2), f32s);
238
239 expect(i32).toEqualVector(Int32Vector.from(i32s));
240 expect(f32).toEqualVector(new Float32Vector(f32Expected));
241 });
242
243 test(`creates a new Table from Typed Arrays`, () => {
244 let i32s = Int32Array.from({length: 10}, (_, i) => i);
245 let f32s = Float32Array.from({length: 10}, (_, i) => i);
246 const table = Table.new({ i32s, f32s });
247 const i32 = table.getColumn('i32s')!;
248 const f32 = table.getColumn('f32s')!;
249
250 expect(table).toHaveLength(10);
251 expect(i32).toHaveLength(10);
252 expect(f32).toHaveLength(10);
253 expect(i32.toArray()).toBeInstanceOf(Int32Array);
254 expect(f32.toArray()).toBeInstanceOf(Float32Array);
255 expect(i32.toArray()).toEqual(i32s);
256 expect(f32.toArray()).toEqual(f32s);
257 });
258 });
259
260 test(`Table.serialize() serializes sliced RecordBatches`, () => {
261
262 const table = getSingleRecordBatchTable();
263 const batch = table.chunks[0], half = batch.length / 2 | 0;
264
265 // First compare what happens when slicing from the batch level
266 let [batch1, batch2] = [batch.slice(0, half), batch.slice(half)];
267
268 compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
269 compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
270
271 // Then compare what happens when creating a RecordBatch by slicing each child individually
272 batch1 = new RecordBatch(batch1.schema, batch1.length, batch1.schema.fields.map((_, i) => {
273 return batch.getChildAt(i)!.slice(0, half);
274 }));
275
276 batch2 = new RecordBatch(batch2.schema, batch2.length, batch2.schema.fields.map((_, i) => {
277 return batch.getChildAt(i)!.slice(half);
278 }));
279
280 compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
281 compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
282 });
283
284 for (let datum of test_data) {
285 describe(datum.name, () => {
286 test(`has the correct length`, () => {
287 const table = datum.table();
288 const values = datum.values();
289 expect(table).toHaveLength(values.length);
290 });
291 test(`gets expected values`, () => {
292 const table = datum.table();
293 const values = datum.values();
294 for (let i = -1; ++i < values.length;) {
295 const row = table.get(i);
296 const expected = values[i];
297 expect(row.f32).toEqual(expected[F32]);
298 expect(row.i32).toEqual(expected[I32]);
299 expect(row.dictionary).toEqual(expected[DICT]);
300 }
301 });
302 test(`iterates expected values`, () => {
303 let i = 0;
304 const table = datum.table();
305 const values = datum.values();
306 for (let row of table) {
307 const expected = values[i++];
308 expect(row.f32).toEqual(expected[F32]);
309 expect(row.i32).toEqual(expected[I32]);
310 expect(row.dictionary).toEqual(expected[DICT]);
311 }
312 });
313 test(`serialize and de-serialize is a no-op`, () => {
314 const table = datum.table();
315 const clone = Table.from(table.serialize());
316 expect(clone).toEqualTable(table);
317 });
318
319 test(`count() returns the correct length`, () => {
320 const table = datum.table();
321 const values = datum.values();
322 expect(table.count()).toEqual(values.length);
323 });
324 test(`getColumnIndex`, () => {
325 const table = datum.table();
326 expect(table.getColumnIndex('i32')).toEqual(I32);
327 expect(table.getColumnIndex('f32')).toEqual(F32);
328 expect(table.getColumnIndex('dictionary')).toEqual(DICT);
329 });
330
331 const table = datum.table();
332 const values = datum.values();
333
334 test(`table.select() basic tests`, () => {
335 let selected = table.select('f32', 'dictionary');
336 expect(selected.schema.fields).toHaveLength(2);
337 expect(selected.schema.fields[0]).toEqual(table.schema.fields[0]);
338 expect(selected.schema.fields[1]).toEqual(table.schema.fields[2]);
339
340 expect(selected).toHaveLength(values.length);
341 let idx = 0, expected_row;
342 for (let row of selected) {
343 expected_row = values[idx++];
344 expect(row.f32).toEqual(expected_row[F32]);
345 expect(row.dictionary).toEqual(expected_row[DICT]);
346 }
347 });
348 });
349 }
350 });
351
352 type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
353
354 function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
355
356 const values = Utf8Vector.from(['a', 'b', 'c']);
357 const i32Data = Data.Int(new Int32(), 0, i32Values.length, 0, null, i32Values);
358 const f32Data = Data.Float(new Float32(), 0, f32Values.length, 0, null, f32Values);
359
360 return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
361 }
362
363 function getSingleRecordBatchTable() {
364 const vectors = getTestVectors(
365 [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
366 [-1, 1, -1, 1, -1, 1, -1],
367 [0, 1, 2, 0, 1, 2, 0]
368 );
369
370 return Table.new<TestDataSchema>(vectors, NAMES);
371 }
372
373 function getMultipleRecordBatchesTable() {
374
375 const types = getTestVectors([], [], []).map((vec) => vec.type);
376 const fields = NAMES.map((name, i) => Field.new(name, types[i]));
377 const schema = new Schema<TestDataSchema>(fields);
378
379 const b1 = new RecordBatch(schema, 3, getTestVectors(
380 [-0.3, -0.2, -0.1],
381 [-1, 1, -1],
382 [0, 1, 2]
383 ));
384
385 const b2 = new RecordBatch(schema, 3, getTestVectors(
386 [0, 0.1, 0.2],
387 [1, -1, 1],
388 [0, 1, 2]
389 ));
390
391 const b3 = new RecordBatch(schema, 3, getTestVectors(
392 [0.3, 0.2, 0.1],
393 [-1, 1, -1],
394 [0, 1, 2]
395 ));
396
397 return new Table<TestDataSchema>([b1, b2, b3]);
398 }
399
400 function getStructTable() {
401 const table = getSingleRecordBatchTable();
402 const struct = new Struct<TestDataSchema>(table.schema.fields);
403 const children = table.schema.fields.map((_, i) => table.getColumnAt(i)!);
404 const structVec = Vector.new(Data.Struct(struct, 0, table.length, 0, null, children));
405 return Table.new<{ struct: Struct<TestDataSchema> }>([structVec], ['struct']);
406 }