]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | import { Data } from '../data'; | |
19 | import { Field } from '../schema'; | |
20 | import { clampRange } from '../util/vector'; | |
21 | import { DataType, Dictionary } from '../type'; | |
22 | import { selectChunkArgs } from '../util/args'; | |
23 | import { DictionaryVector } from './dictionary'; | |
24 | import { AbstractVector, Vector } from '../vector'; | |
25 | import { Clonable, Sliceable, Applicative } from '../vector'; | |
26 | ||
27 | /** @ignore */ | |
28 | type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never; | |
29 | /** @ignore */ | |
30 | type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never; | |
31 | ||
32 | /** @ignore */ | |
33 | export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any; | |
34 | ||
35 | /** @ignore */ | |
36 | class ChunkedIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> { | |
37 | private chunkIndex = 0; | |
38 | private chunkIterator: IterableIterator<T['TValue'] | null>; | |
39 | ||
40 | constructor( | |
41 | private chunks: Vector<T>[], | |
42 | ) { | |
43 | this.chunkIterator = this.getChunkIterator(); | |
44 | } | |
45 | ||
46 | next(): IteratorResult<T['TValue'] | null> { | |
47 | while (this.chunkIndex < this.chunks.length) { | |
48 | const next = this.chunkIterator.next(); | |
49 | ||
50 | if (!next.done) { | |
51 | return next; | |
52 | } | |
53 | ||
54 | if (++this.chunkIndex < this.chunks.length) { | |
55 | this.chunkIterator = this.getChunkIterator(); | |
56 | } | |
57 | } | |
58 | ||
59 | return {done: true, value: null}; | |
60 | } | |
61 | ||
62 | getChunkIterator() { | |
63 | return this.chunks[this.chunkIndex][Symbol.iterator](); | |
64 | } | |
65 | ||
66 | [Symbol.iterator]() { | |
67 | return this; | |
68 | } | |
69 | } | |
70 | ||
71 | /** @ignore */ | |
72 | export class Chunked<T extends DataType = any> | |
73 | extends AbstractVector<T> | |
74 | implements Clonable<Chunked<T>>, | |
75 | Sliceable<Chunked<T>>, | |
76 | Applicative<T, Chunked<T>> { | |
77 | ||
78 | /** @nocollapse */ | |
79 | public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { | |
80 | return selectChunkArgs<Vector<T>>(Vector, vectors); | |
81 | } | |
82 | ||
83 | /** @nocollapse */ | |
84 | public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { | |
85 | const chunks = Chunked.flatten<T>(...vectors); | |
86 | return new Chunked<T>(chunks[0].type, chunks); | |
87 | } | |
88 | ||
89 | protected _type: T; | |
90 | protected _length: number; | |
91 | protected _chunks: Vector<T>[]; | |
92 | protected _numChildren: number; | |
93 | protected _children?: Chunked[]; | |
94 | protected _nullCount = -1; | |
95 | protected _chunkOffsets: Uint32Array; | |
96 | ||
97 | constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) { | |
98 | super(); | |
99 | this._type = type; | |
100 | this._chunks = chunks; | |
101 | this._chunkOffsets = offsets; | |
102 | this._length = offsets[offsets.length - 1]; | |
103 | this._numChildren = (this._type.children || []).length; | |
104 | } | |
105 | ||
106 | public get type() { return this._type; } | |
107 | public get length() { return this._length; } | |
108 | public get chunks() { return this._chunks; } | |
109 | public get typeId(): T['TType'] { return this._type.typeId; } | |
110 | public get VectorName() { return `Chunked<${this._type}>`; } | |
111 | public get data(): Data<T> { | |
112 | return this._chunks[0] ? this._chunks[0].data : <any> null; | |
113 | } | |
114 | ||
115 | public get ArrayType() { return this._type.ArrayType; } | |
116 | public get numChildren() { return this._numChildren; } | |
117 | public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; } | |
118 | public get byteLength(): number { | |
119 | return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0); | |
120 | } | |
121 | public get nullCount() { | |
122 | let nullCount = this._nullCount; | |
123 | if (nullCount < 0) { | |
124 | this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0); | |
125 | } | |
126 | return nullCount; | |
127 | } | |
128 | ||
129 | protected _indices?: ChunkedKeys<T>; | |
130 | public get indices(): ChunkedKeys<T> | null { | |
131 | if (DataType.isDictionary(this._type)) { | |
132 | if (!this._indices) { | |
133 | const chunks = (<any> this._chunks) as DictionaryVector<T, any>[]; | |
134 | this._indices = (chunks.length === 1 | |
135 | ? chunks[0].indices | |
136 | : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>; | |
137 | } | |
138 | return this._indices; | |
139 | } | |
140 | return null; | |
141 | } | |
142 | public get dictionary(): ChunkedDict<T> | null { | |
143 | if (DataType.isDictionary(this._type)) { | |
144 | return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>; | |
145 | } | |
146 | return null; | |
147 | } | |
148 | ||
149 | public [Symbol.iterator](): IterableIterator<T['TValue'] | null> { | |
150 | return new ChunkedIterator(this._chunks); | |
151 | } | |
152 | ||
153 | public clone(chunks = this._chunks): Chunked<T> { | |
154 | return new Chunked(this._type, chunks); | |
155 | } | |
156 | ||
157 | public concat(...others: Vector<T>[]): Chunked<T> { | |
158 | return this.clone(Chunked.flatten(this, ...others)); | |
159 | } | |
160 | ||
161 | public slice(begin?: number, end?: number): Chunked<T> { | |
162 | return clampRange(this, begin, end, this._sliceInternal); | |
163 | } | |
164 | ||
165 | public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null { | |
166 | ||
167 | if (index < 0 || index >= this._numChildren) { return null; } | |
168 | ||
169 | const columns = this._children || (this._children = []); | |
170 | let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[]; | |
171 | ||
172 | if (child = columns[index]) { return child; } | |
173 | if (field = ((this._type.children || [])[index] as Field<R>)) { | |
174 | chunks = this._chunks | |
175 | .map((vector) => vector.getChildAt<R>(index)) | |
176 | .filter((vec): vec is Vector<R> => vec != null); | |
177 | if (chunks.length > 0) { | |
178 | return (columns[index] = new Chunked<R>(field.type, chunks)); | |
179 | } | |
180 | } | |
181 | ||
182 | return null; | |
183 | } | |
184 | ||
185 | public search(index: number): [number, number] | null; | |
186 | public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>; | |
187 | public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) { | |
188 | const idx = index; | |
189 | // binary search to find the child vector and value indices | |
190 | const offsets = this._chunkOffsets; | |
191 | let rhs = offsets.length - 1; | |
192 | // return early if out of bounds, or if there's just one child | |
193 | if (idx < 0 ) { return null; } | |
194 | if (idx >= offsets[rhs]) { return null; } | |
195 | if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; } | |
196 | let lhs = 0, pos = 0, mid = 0; | |
197 | do { | |
198 | if (lhs + 1 === rhs) { | |
199 | return then ? then(this, lhs, idx - pos) : [lhs, idx - pos]; | |
200 | } | |
201 | mid = lhs + ((rhs - lhs) / 2) | 0; | |
202 | idx >= offsets[mid] ? (lhs = mid) : (rhs = mid); | |
203 | } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs])); | |
204 | return null; | |
205 | } | |
206 | ||
207 | public isValid(index: number): boolean { | |
208 | return !!this.search(index, this.isValidInternal); | |
209 | } | |
210 | ||
211 | public get(index: number): T['TValue'] | null { | |
212 | return this.search(index, this.getInternal); | |
213 | } | |
214 | ||
215 | public set(index: number, value: T['TValue'] | null): void { | |
216 | this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value)); | |
217 | } | |
218 | ||
219 | public indexOf(element: T['TValue'], offset?: number): number { | |
220 | if (offset && typeof offset === 'number') { | |
221 | return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!; | |
222 | } | |
223 | return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element); | |
224 | } | |
225 | ||
226 | public toArray(): T['TArray'] { | |
227 | const { chunks } = this; | |
228 | const n = chunks.length; | |
229 | let ArrayType: any = this._type.ArrayType; | |
230 | if (n <= 0) { return new ArrayType(0); } | |
231 | if (n <= 1) { return chunks[0].toArray(); } | |
232 | let len = 0; | |
233 | const src = new Array(n); | |
234 | for (let i = -1; ++i < n;) { | |
235 | len += (src[i] = chunks[i].toArray()).length; | |
236 | } | |
237 | if (ArrayType !== src[0].constructor) { | |
238 | ArrayType = src[0].constructor; | |
239 | } | |
240 | const dst = new ArrayType(len); | |
241 | const set: any = ArrayType === Array ? arraySet : typedSet; | |
242 | for (let i = -1, idx = 0; ++i < n;) { | |
243 | idx = set(src[i], dst, idx); | |
244 | } | |
245 | return dst; | |
246 | } | |
247 | ||
248 | protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); } | |
249 | protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); } | |
250 | protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) { | |
251 | let i = chunkIndex - 1; | |
252 | const n = _chunks.length; | |
253 | let start = fromIndex, offset = 0, found = -1; | |
254 | while (++i < n) { | |
255 | if (~(found = _chunks[i].indexOf(element, start))) { | |
256 | return offset + found; | |
257 | } | |
258 | start = 0; | |
259 | offset += _chunks[i].length; | |
260 | } | |
261 | return -1; | |
262 | } | |
263 | ||
264 | protected _sliceInternal(self: Chunked<T>, begin: number, end: number) { | |
265 | const slices: Vector<T>[] = []; | |
266 | const { chunks, _chunkOffsets: chunkOffsets } = self; | |
267 | for (let i = -1, n = chunks.length; ++i < n;) { | |
268 | const chunk = chunks[i]; | |
269 | const chunkLength = chunk.length; | |
270 | const chunkOffset = chunkOffsets[i]; | |
271 | // If the child is to the right of the slice boundary, we can stop | |
272 | if (chunkOffset >= end) { break; } | |
273 | // If the child is to the left of of the slice boundary, exclude | |
274 | if (begin >= chunkOffset + chunkLength) { continue; } | |
275 | // If the child is between both left and right boundaries, include w/o slicing | |
276 | if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) { | |
277 | slices.push(chunk); | |
278 | continue; | |
279 | } | |
280 | // If the child overlaps one of the slice boundaries, include that slice | |
281 | const from = Math.max(0, begin - chunkOffset); | |
282 | const to = Math.min(end - chunkOffset, chunkLength); | |
283 | slices.push(chunk.slice(from, to) as Vector<T>); | |
284 | } | |
285 | return self.clone(slices); | |
286 | } | |
287 | } | |
288 | ||
289 | /** @ignore */ | |
290 | function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) { | |
291 | const offsets = new Uint32Array((vectors || []).length + 1); | |
292 | let offset = offsets[0] = 0; | |
293 | const length = offsets.length; | |
294 | for (let index = 0; ++index < length;) { | |
295 | offsets[index] = (offset += vectors[index - 1].length); | |
296 | } | |
297 | return offsets; | |
298 | } | |
299 | ||
300 | /** @ignore */ | |
301 | const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => { | |
302 | dst.set(src, offset); | |
303 | return (offset + src.length); | |
304 | }; | |
305 | ||
306 | /** @ignore */ | |
307 | const arraySet = (src: any[], dst: any[], offset: number) => { | |
308 | let idx = offset; | |
309 | for (let i = -1, n = src.length; ++i < n;) { | |
310 | dst[idx++] = src[i]; | |
311 | } | |
312 | return idx; | |
313 | }; | |
314 | ||
315 | /** @ignore */ | |
316 | interface TypedArray extends ArrayBufferView { | |
317 | readonly length: number; | |
318 | readonly [n: number]: number; | |
319 | set(array: ArrayLike<number>, offset?: number): void; | |
320 | } |