]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/js/test/unit/builders/utils.ts
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / js / test / unit / builders / utils.ts
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 import '../../jest-extensions';
19 import { from, fromDOMStream, toArray } from 'ix/asynciterable';
20 import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
21 import 'ix/Ix.node';
22 import { util } from 'apache-arrow';
23 import { Builder } from 'apache-arrow';
24 import { DataType, Vector, Chunked } from 'apache-arrow';
25 import randstr from 'randomatic';
26
27 const rand = Math.random.bind(Math);
28 const randnulls = <T, TNull = null>(values: T[], n: TNull = <any> null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
29
30 export const randomBytes = (length: number) => fillRandom(Uint8Array, length);
31 export const randomString = ((opts) => (length: number) =>
32 randstr('?', length, opts)
33 )({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
34
35 export const stringsNoNulls = (length = 20) => Array.from({ length }, (_) => randomString(1 + (Math.random() * 19 | 0)));
36 export const timestamp32sNoNulls = (length = 20, now = Date.now() / 86400000 | 0) =>
37 Array.from({ length }, (_) => (now + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)) | 0) * 86400000);
38
39 export const timestamp64sNoNulls = (length = 20, now = Date.now()) => Array.from({ length }, (_) => {
40 const ms = now + (rand() * 31557600000 * (rand() > 0.5 ? -1 : 1) | 0);
41 return new Int32Array([(ms % 4294967296) | 0, (ms / 4294967296) | 0]);
42 });
43
44 export const timestamp32sWithNulls = (length = 20) => randnulls(timestamp32sNoNulls(length), null);
45 export const timestamp64sWithNulls = (length = 20) => randnulls(timestamp64sNoNulls(length), null);
46 export const timestamp32sWithMaxInts = (length = 20) => randnulls(timestamp32sNoNulls(length), 0x7fffffff);
47 export const timestamp64sWithMaxInts = (length = 20) => randnulls(timestamp64sNoNulls(length), new Int32Array([0x7fffffff, 0x7fffffff]));
48
49 export const boolsNoNulls = (length = 20) => Array.from({ length }, () => rand() > 0.5);
50 export const date32sNoNulls = (length = 20) => timestamp32sNoNulls(length).map((x) => new Date(x));
51 export const date64sNoNulls = (length = 20) => timestamp64sNoNulls(length).map((x) => new Date(4294967296 * x[1] + (x[0] >>> 0)));
52 export const int8sNoNulls = (length = 20) => Array.from(new Int8Array(randomBytes(length * Int8Array.BYTES_PER_ELEMENT).buffer));
53 export const int16sNoNulls = (length = 20) => Array.from(new Int16Array(randomBytes(length * Int16Array.BYTES_PER_ELEMENT).buffer));
54 export const int32sNoNulls = (length = 20) => Array.from(new Int32Array(randomBytes(length * Int32Array.BYTES_PER_ELEMENT).buffer));
55 export const int64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
56 const bn = util.BN.new(new Int32Array(randomBytes(2 * 4).buffer));
57 // Evenly distribute the three types of arguments we support in the Int64
58 // builder
59 switch (i % 3) {
60 // Int32Array (util.BN is-a Int32Array)
61 case 0: return bn;
62 // BigInt
63 case 1: return bn[Symbol.toPrimitive]();
64 // number
65 case 2:
66 default: return bn[0];
67 }
68 });
69
70 export const uint8sNoNulls = (length = 20) => Array.from(new Uint8Array(randomBytes(length * Uint8Array.BYTES_PER_ELEMENT).buffer));
71 export const uint16sNoNulls = (length = 20) => Array.from(new Uint16Array(randomBytes(length * Uint16Array.BYTES_PER_ELEMENT).buffer));
72 export const uint32sNoNulls = (length = 20) => Array.from(new Uint32Array(randomBytes(length * Uint32Array.BYTES_PER_ELEMENT).buffer));
73 export const uint64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
74 const bn = util.BN.new(new Uint32Array(randomBytes(2 * 4).buffer));
75 // Evenly distribute the three types of arguments we support in the Uint64
76 // builder
77 switch (i % 3) {
78 // UInt32Array (util.BN is-a Uint32Array)
79 case 0: return bn;
80 // BigInt
81 case 1: return bn[Symbol.toPrimitive]();
82 // number
83 case 2:
84 default: return bn[0];
85 }
86 });
87 export const float16sNoNulls = (length = 20) => Array.from(new Uint16Array(randomBytes(length * Uint16Array.BYTES_PER_ELEMENT).buffer)).map(util.uint16ToFloat64);
88 export const float32sNoNulls = (length = 20) => Array.from(new Float32Array(randomBytes(length * Float32Array.BYTES_PER_ELEMENT).buffer));
89 export const float64sNoNulls = (length = 20) => Array.from(new Float64Array(randomBytes(length * Float64Array.BYTES_PER_ELEMENT).buffer));
90
91 export const stringsWithNAs = (length = 20) => randnulls(stringsNoNulls(length), 'n/a');
92 export const stringsWithNulls = (length = 20) => randnulls(stringsNoNulls(length), null);
93 export const stringsWithEmpties = (length = 20) => randnulls(stringsNoNulls(length), '\0');
94
95 export const boolsWithNulls = (length = 20) => randnulls(boolsNoNulls(length), null);
96 export const date32sWithNulls = (length = 20) => randnulls(date32sNoNulls(length), null);
97 export const date64sWithNulls = (length = 20) => randnulls(date64sNoNulls(length), null);
98 export const int8sWithNulls = (length = 20) => randnulls(int8sNoNulls(length), null);
99 export const int16sWithNulls = (length = 20) => randnulls(int16sNoNulls(length), null);
100 export const int32sWithNulls = (length = 20) => randnulls(int32sNoNulls(length), null);
101 export const int64sWithNulls = (length = 20) => randnulls(int64sNoNulls(length), null);
102 export const uint8sWithNulls = (length = 20) => randnulls(uint8sNoNulls(length), null);
103 export const uint16sWithNulls = (length = 20) => randnulls(uint16sNoNulls(length), null);
104 export const uint32sWithNulls = (length = 20) => randnulls(uint32sNoNulls(length), null);
105 export const uint64sWithNulls = (length = 20) => randnulls(uint64sNoNulls(length), null);
106 export const float16sWithNulls = (length = 20) => randnulls(float16sNoNulls(length), null);
107 export const float32sWithNulls = (length = 20) => randnulls(float32sNoNulls(length), null);
108 export const float64sWithNulls = (length = 20) => randnulls(float64sNoNulls(length), null);
109
110 export const int8sWithMaxInts = (length = 20) => randnulls(int8sNoNulls(length), 0x7fffffff);
111 export const int16sWithMaxInts = (length = 20) => randnulls(int16sNoNulls(length), 0x7fffffff);
112 export const int32sWithMaxInts = (length = 20) => randnulls(int32sNoNulls(length), 0x7fffffff);
113 export const int64sWithMaxInts = (length = 20) => randnulls(int64sNoNulls(length), new Int32Array([0x7fffffff, 0x7fffffff]));
114 export const uint8sWithMaxInts = (length = 20) => randnulls(uint8sNoNulls(length), 0x7fffffff);
115 export const uint16sWithMaxInts = (length = 20) => randnulls(uint16sNoNulls(length), 0x7fffffff);
116 export const uint32sWithMaxInts = (length = 20) => randnulls(uint32sNoNulls(length), 0x7fffffff);
117 export const uint64sWithMaxInts = (length = 20) => randnulls(uint64sNoNulls(length), new Uint32Array([0x7fffffff, 0x7fffffff]));
118 export const float16sWithNaNs = (length = 20) => randnulls(float16sNoNulls(length), NaN);
119 export const float32sWithNaNs = (length = 20) => randnulls(float32sNoNulls(length), NaN);
120 export const float64sWithNaNs = (length = 20) => randnulls(float64sNoNulls(length), NaN);
121
122 export const duplicateItems = (n: number, xs: (any | null)[]) => {
123 const out = new Array<string | null>(n);
124 for (let i = -1, k = xs.length; ++i < n;) {
125 out[i] = xs[Math.random() * k | 0];
126 }
127 return out;
128 };
129
130 export function encodeAll<T extends DataType>(typeFactory: () => T) {
131 return async function encodeAll<TNull = any>(values: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
132 const type = typeFactory();
133 const builder = Builder.new({ type, nullValues });
134 values.forEach(builder.append.bind(builder));
135 return builder.finish().toVector();
136 };
137 }
138
139 export function encodeEach<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
140 return async function encodeEach<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
141 const type = typeFactory();
142 const opts = { type, nullValues, highWaterMark: chunkLen };
143 const chunks = [...Builder.throughIterable(opts)(vals)];
144 return Chunked.concat(...chunks) as Chunked<T>;
145 };
146 }
147
148 export function encodeEachDOM<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
149 return async function encodeEachDOM<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
150 const type = typeFactory();
151 const strategy = { highWaterMark: chunkLen };
152 const source = from(vals).toDOMStream();
153 const builder = Builder.throughDOM({ type, nullValues, readableStrategy: strategy, writableStrategy: strategy });
154 const chunks = await fromDOMStream(source.pipeThrough(builder)).pipe(toArray);
155 return Chunked.concat(...chunks) as Chunked<T>;
156 };
157 }
158
159 export function encodeEachNode<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
160 return async function encodeEachNode<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
161 const type = typeFactory();
162 const vals_ = vals.map((x) => x === null ? undefined : x);
163 const source = from(vals_).toNodeStream({ objectMode: true });
164 const nulls_ = nullValues ? nullValues.map((x) => x === null ? undefined : x) : nullValues;
165 const builder = Builder.throughNode({ type, nullValues: nulls_, highWaterMark: chunkLen });
166 const chunks: any[] = await fromNodeStream(source.pipe(builder), chunkLen).pipe(toArray);
167 return Chunked.concat(...chunks) as Chunked<T>;
168 };
169 }
170
171 const isInt64Null = (nulls: Map<any, any>, x: any) => {
172 if (ArrayBuffer.isView(x)) {
173 const bn = util.BN.new<Int32Array>(x as Int32Array);
174 return nulls.has((<any> bn)[Symbol.toPrimitive]('default'));
175 }
176 return false;
177 };
178
179 export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[], vec: Vector, nullVals: any[]) {
180 let i = 0, x: T['TValue'] | null, y: T['TValue'] | null;
181 const nulls = nullVals.reduce((m, x) => m.set(x, x), new Map());
182 try {
183 for (x of vec) {
184 if (nulls.has(y = vals[i])) {
185 expect(x).toBeNull();
186 } else if (isInt64Null(nulls, y)) {
187 expect(x).toBeNull();
188 } else {
189 expect(x).toArrowCompare(y);
190 }
191 i++;
192 }
193 } catch (e) {
194 // Uncomment these two lines to catch and debug the value retrieval that failed
195 // debugger;
196 // vec.get(i);
197 throw new Error([
198 `${(vec as any).VectorName}[${i}]: ${e?.stack || e}`,
199 `nulls: [${nullVals.join(', ')}]`,
200 `values: [${vals.join(', ')}]`,
201 ].join('\n'));
202 }
203 }
204
205 function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
206 const BPE = ArrayType.BYTES_PER_ELEMENT;
207 const array = new ArrayType(length);
208 const max = (2 ** (8 * BPE)) - 1;
209 for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1)) { }
210 return array as InstanceType<T>;
211 }
212
213 type TypedArrayConstructor =
214 (typeof Int8Array) |
215 (typeof Int16Array) |
216 (typeof Int32Array) |
217 (typeof Uint8Array) |
218 (typeof Uint16Array) |
219 (typeof Uint32Array) |
220 (typeof Float32Array) |
221 (typeof Float64Array);