]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/js/src/vector/chunked.ts
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / js / src / vector / chunked.ts
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18import { Data } from '../data';
19import { Field } from '../schema';
20import { clampRange } from '../util/vector';
21import { DataType, Dictionary } from '../type';
22import { selectChunkArgs } from '../util/args';
23import { DictionaryVector } from './dictionary';
24import { AbstractVector, Vector } from '../vector';
25import { Clonable, Sliceable, Applicative } from '../vector';
26
27/** @ignore */
28type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never;
29/** @ignore */
30type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;
31
32/** @ignore */
33export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
34
35/** @ignore */
36class ChunkedIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> {
37 private chunkIndex = 0;
38 private chunkIterator: IterableIterator<T['TValue'] | null>;
39
40 constructor(
41 private chunks: Vector<T>[],
42 ) {
43 this.chunkIterator = this.getChunkIterator();
44 }
45
46 next(): IteratorResult<T['TValue'] | null> {
47 while (this.chunkIndex < this.chunks.length) {
48 const next = this.chunkIterator.next();
49
50 if (!next.done) {
51 return next;
52 }
53
54 if (++this.chunkIndex < this.chunks.length) {
55 this.chunkIterator = this.getChunkIterator();
56 }
57 }
58
59 return {done: true, value: null};
60 }
61
62 getChunkIterator() {
63 return this.chunks[this.chunkIndex][Symbol.iterator]();
64 }
65
66 [Symbol.iterator]() {
67 return this;
68 }
69}
70
71/** @ignore */
72export class Chunked<T extends DataType = any>
73 extends AbstractVector<T>
74 implements Clonable<Chunked<T>>,
75 Sliceable<Chunked<T>>,
76 Applicative<T, Chunked<T>> {
77
78 /** @nocollapse */
79 public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
80 return selectChunkArgs<Vector<T>>(Vector, vectors);
81 }
82
83 /** @nocollapse */
84 public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
85 const chunks = Chunked.flatten<T>(...vectors);
86 return new Chunked<T>(chunks[0].type, chunks);
87 }
88
89 protected _type: T;
90 protected _length: number;
91 protected _chunks: Vector<T>[];
92 protected _numChildren: number;
93 protected _children?: Chunked[];
94 protected _nullCount = -1;
95 protected _chunkOffsets: Uint32Array;
96
97 constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
98 super();
99 this._type = type;
100 this._chunks = chunks;
101 this._chunkOffsets = offsets;
102 this._length = offsets[offsets.length - 1];
103 this._numChildren = (this._type.children || []).length;
104 }
105
106 public get type() { return this._type; }
107 public get length() { return this._length; }
108 public get chunks() { return this._chunks; }
109 public get typeId(): T['TType'] { return this._type.typeId; }
110 public get VectorName() { return `Chunked<${this._type}>`; }
111 public get data(): Data<T> {
112 return this._chunks[0] ? this._chunks[0].data : <any> null;
113 }
114
115 public get ArrayType() { return this._type.ArrayType; }
116 public get numChildren() { return this._numChildren; }
117 public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; }
118 public get byteLength(): number {
119 return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0);
120 }
121 public get nullCount() {
122 let nullCount = this._nullCount;
123 if (nullCount < 0) {
124 this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0);
125 }
126 return nullCount;
127 }
128
129 protected _indices?: ChunkedKeys<T>;
130 public get indices(): ChunkedKeys<T> | null {
131 if (DataType.isDictionary(this._type)) {
132 if (!this._indices) {
133 const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
134 this._indices = (chunks.length === 1
135 ? chunks[0].indices
136 : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>;
137 }
138 return this._indices;
139 }
140 return null;
141 }
142 public get dictionary(): ChunkedDict<T> | null {
143 if (DataType.isDictionary(this._type)) {
144 return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>;
145 }
146 return null;
147 }
148
149 public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
150 return new ChunkedIterator(this._chunks);
151 }
152
153 public clone(chunks = this._chunks): Chunked<T> {
154 return new Chunked(this._type, chunks);
155 }
156
157 public concat(...others: Vector<T>[]): Chunked<T> {
158 return this.clone(Chunked.flatten(this, ...others));
159 }
160
161 public slice(begin?: number, end?: number): Chunked<T> {
162 return clampRange(this, begin, end, this._sliceInternal);
163 }
164
165 public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null {
166
167 if (index < 0 || index >= this._numChildren) { return null; }
168
169 const columns = this._children || (this._children = []);
170 let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
171
172 if (child = columns[index]) { return child; }
173 if (field = ((this._type.children || [])[index] as Field<R>)) {
174 chunks = this._chunks
175 .map((vector) => vector.getChildAt<R>(index))
176 .filter((vec): vec is Vector<R> => vec != null);
177 if (chunks.length > 0) {
178 return (columns[index] = new Chunked<R>(field.type, chunks));
179 }
180 }
181
182 return null;
183 }
184
185 public search(index: number): [number, number] | null;
186 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
187 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
188 const idx = index;
189 // binary search to find the child vector and value indices
190 const offsets = this._chunkOffsets;
191 let rhs = offsets.length - 1;
192 // return early if out of bounds, or if there's just one child
193 if (idx < 0 ) { return null; }
194 if (idx >= offsets[rhs]) { return null; }
195 if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; }
196 let lhs = 0, pos = 0, mid = 0;
197 do {
198 if (lhs + 1 === rhs) {
199 return then ? then(this, lhs, idx - pos) : [lhs, idx - pos];
200 }
201 mid = lhs + ((rhs - lhs) / 2) | 0;
202 idx >= offsets[mid] ? (lhs = mid) : (rhs = mid);
203 } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs]));
204 return null;
205 }
206
207 public isValid(index: number): boolean {
208 return !!this.search(index, this.isValidInternal);
209 }
210
211 public get(index: number): T['TValue'] | null {
212 return this.search(index, this.getInternal);
213 }
214
215 public set(index: number, value: T['TValue'] | null): void {
216 this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value));
217 }
218
219 public indexOf(element: T['TValue'], offset?: number): number {
220 if (offset && typeof offset === 'number') {
221 return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!;
222 }
223 return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element);
224 }
225
226 public toArray(): T['TArray'] {
227 const { chunks } = this;
228 const n = chunks.length;
229 let ArrayType: any = this._type.ArrayType;
230 if (n <= 0) { return new ArrayType(0); }
231 if (n <= 1) { return chunks[0].toArray(); }
232 let len = 0;
233 const src = new Array(n);
234 for (let i = -1; ++i < n;) {
235 len += (src[i] = chunks[i].toArray()).length;
236 }
237 if (ArrayType !== src[0].constructor) {
238 ArrayType = src[0].constructor;
239 }
240 const dst = new ArrayType(len);
241 const set: any = ArrayType === Array ? arraySet : typedSet;
242 for (let i = -1, idx = 0; ++i < n;) {
243 idx = set(src[i], dst, idx);
244 }
245 return dst;
246 }
247
248 protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
249 protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
250 protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
251 let i = chunkIndex - 1;
252 const n = _chunks.length;
253 let start = fromIndex, offset = 0, found = -1;
254 while (++i < n) {
255 if (~(found = _chunks[i].indexOf(element, start))) {
256 return offset + found;
257 }
258 start = 0;
259 offset += _chunks[i].length;
260 }
261 return -1;
262 }
263
264 protected _sliceInternal(self: Chunked<T>, begin: number, end: number) {
265 const slices: Vector<T>[] = [];
266 const { chunks, _chunkOffsets: chunkOffsets } = self;
267 for (let i = -1, n = chunks.length; ++i < n;) {
268 const chunk = chunks[i];
269 const chunkLength = chunk.length;
270 const chunkOffset = chunkOffsets[i];
271 // If the child is to the right of the slice boundary, we can stop
272 if (chunkOffset >= end) { break; }
273 // If the child is to the left of of the slice boundary, exclude
274 if (begin >= chunkOffset + chunkLength) { continue; }
275 // If the child is between both left and right boundaries, include w/o slicing
276 if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) {
277 slices.push(chunk);
278 continue;
279 }
280 // If the child overlaps one of the slice boundaries, include that slice
281 const from = Math.max(0, begin - chunkOffset);
282 const to = Math.min(end - chunkOffset, chunkLength);
283 slices.push(chunk.slice(from, to) as Vector<T>);
284 }
285 return self.clone(slices);
286 }
287}
288
289/** @ignore */
290function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
291 const offsets = new Uint32Array((vectors || []).length + 1);
292 let offset = offsets[0] = 0;
293 const length = offsets.length;
294 for (let index = 0; ++index < length;) {
295 offsets[index] = (offset += vectors[index - 1].length);
296 }
297 return offsets;
298}
299
300/** @ignore */
301const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => {
302 dst.set(src, offset);
303 return (offset + src.length);
304};
305
306/** @ignore */
307const arraySet = (src: any[], dst: any[], offset: number) => {
308 let idx = offset;
309 for (let i = -1, n = src.length; ++i < n;) {
310 dst[idx++] = src[i];
311 }
312 return idx;
313};
314
315/** @ignore */
316interface TypedArray extends ArrayBufferView {
317 readonly length: number;
318 readonly [n: number]: number;
319 set(array: ArrayLike<number>, offset?: number): void;
320}