]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | import { BN } from '../util/bn'; | |
19 | import { Column } from '../column'; | |
20 | import { Vector } from '../vector'; | |
21 | import { Visitor } from '../visitor'; | |
22 | import { BufferType } from '../enum'; | |
23 | import { RecordBatch } from '../recordbatch'; | |
24 | import { VectorType as V } from '../interfaces'; | |
25 | import { UnionMode, DateUnit, TimeUnit } from '../enum'; | |
26 | import { BitIterator, getBit, getBool } from '../util/bit'; | |
27 | import { selectColumnChildrenArgs } from '../util/args'; | |
28 | import { | |
29 | DataType, | |
30 | Float, Int, Date_, Interval, Time, Timestamp, Union, | |
31 | Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, | |
32 | } from '../type'; | |
33 | ||
34 | /** @ignore */ | |
35 | export interface JSONVectorAssembler extends Visitor { | |
36 | ||
37 | visit <T extends Column> (node: T ): Record<string, unknown>; | |
38 | visitMany <T extends Column> (cols: T[]): Record<string, unknown>[]; | |
39 | getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string; count: number; VALIDITY: (0 | 1)[]; DATA?: any[]; OFFSET?: number[]; TYPE?: number[]; children?: any[] }; | |
40 | ||
41 | visitNull <T extends Null> (vector: V<T>): Record<string, never>; | |
42 | visitBool <T extends Bool> (vector: V<T>): { DATA: boolean[] }; | |
43 | visitInt <T extends Int> (vector: V<T>): { DATA: (number | string)[] }; | |
44 | visitFloat <T extends Float> (vector: V<T>): { DATA: number[] }; | |
45 | visitUtf8 <T extends Utf8> (vector: V<T>): { DATA: string[]; OFFSET: number[] }; | |
46 | visitBinary <T extends Binary> (vector: V<T>): { DATA: string[]; OFFSET: number[] }; | |
47 | visitFixedSizeBinary <T extends FixedSizeBinary> (vector: V<T>): { DATA: string[] }; | |
48 | visitDate <T extends Date_> (vector: V<T>): { DATA: number[] }; | |
49 | visitTimestamp <T extends Timestamp> (vector: V<T>): { DATA: string[] }; | |
50 | visitTime <T extends Time> (vector: V<T>): { DATA: number[] }; | |
51 | visitDecimal <T extends Decimal> (vector: V<T>): { DATA: string[] }; | |
52 | visitList <T extends List> (vector: V<T>): { children: any[]; OFFSET: number[] }; | |
53 | visitStruct <T extends Struct> (vector: V<T>): { children: any[] }; | |
54 | visitUnion <T extends Union> (vector: V<T>): { children: any[]; TYPE: number[] }; | |
55 | visitInterval <T extends Interval> (vector: V<T>): { DATA: number[] }; | |
56 | visitFixedSizeList <T extends FixedSizeList> (vector: V<T>): { children: any[] }; | |
57 | visitMap <T extends Map_> (vector: V<T>): { children: any[] }; | |
58 | } | |
59 | ||
60 | /** @ignore */ | |
61 | export class JSONVectorAssembler extends Visitor { | |
62 | ||
63 | /** @nocollapse */ | |
64 | public static assemble<T extends Column | RecordBatch>(...args: (T | T[])[]) { | |
65 | return new JSONVectorAssembler().visitMany(selectColumnChildrenArgs(RecordBatch, args)); | |
66 | } | |
67 | ||
68 | public visit<T extends Column>(column: T) { | |
69 | const { data, name, length } = column; | |
70 | const { offset, nullCount, nullBitmap } = data; | |
71 | const type = DataType.isDictionary(column.type) ? column.type.indices : column.type; | |
72 | const buffers = Object.assign([], data.buffers, { [BufferType.VALIDITY]: undefined }); | |
73 | return { | |
74 | 'name': name, | |
75 | 'count': length, | |
76 | 'VALIDITY': DataType.isNull(type) ? undefined | |
77 | : nullCount <= 0 ? Array.from({ length }, () => 1) | |
78 | : [...new BitIterator(nullBitmap, offset, length, null, getBit)], | |
79 | ...super.visit(Vector.new(data.clone(type, offset, length, 0, buffers))) | |
80 | }; | |
81 | } | |
82 | public visitNull() { return {}; } | |
83 | public visitBool<T extends Bool>({ values, offset, length }: V<T>) { | |
84 | return { 'DATA': [...new BitIterator(values, offset, length, null, getBool)] }; | |
85 | } | |
86 | public visitInt<T extends Int>(vector: V<T>) { | |
87 | return { | |
88 | 'DATA': vector.type.bitWidth < 64 | |
89 | ? [...vector.values] | |
90 | : [...bigNumsToStrings(vector.values as (Int32Array | Uint32Array), 2)] | |
91 | }; | |
92 | } | |
93 | public visitFloat<T extends Float>(vector: V<T>) { | |
94 | return { 'DATA': [...vector.values] }; | |
95 | } | |
96 | public visitUtf8<T extends Utf8>(vector: V<T>) { | |
97 | return { 'DATA': [...vector], 'OFFSET': [...vector.valueOffsets] }; | |
98 | } | |
99 | public visitBinary<T extends Binary>(vector: V<T>) { | |
100 | return { 'DATA': [...binaryToString(vector)], OFFSET: [...vector.valueOffsets] }; | |
101 | } | |
102 | public visitFixedSizeBinary<T extends FixedSizeBinary>(vector: V<T>) { | |
103 | return { 'DATA': [...binaryToString(vector)] }; | |
104 | } | |
105 | public visitDate<T extends Date_>(vector: V<T>) { | |
106 | return { | |
107 | 'DATA': vector.type.unit === DateUnit.DAY | |
108 | ? [...vector.values] | |
109 | : [...bigNumsToStrings(vector.values, 2)] | |
110 | }; | |
111 | } | |
112 | public visitTimestamp<T extends Timestamp>(vector: V<T>) { | |
113 | return { 'DATA': [...bigNumsToStrings(vector.values, 2)] }; | |
114 | } | |
115 | public visitTime<T extends Time>(vector: V<T>) { | |
116 | return { | |
117 | 'DATA': vector.type.unit < TimeUnit.MICROSECOND | |
118 | ? [...vector.values] | |
119 | : [...bigNumsToStrings(vector.values, 2)] | |
120 | }; | |
121 | } | |
122 | public visitDecimal<T extends Decimal>(vector: V<T>) { | |
123 | return { 'DATA': [...bigNumsToStrings(vector.values, 4)] }; | |
124 | } | |
125 | public visitList<T extends List>(vector: V<T>) { | |
126 | return { | |
127 | 'OFFSET': [...vector.valueOffsets], | |
128 | 'children': vector.type.children.map((f, i) => | |
129 | this.visit(new Column(f, [vector.getChildAt(i)!]))) | |
130 | }; | |
131 | } | |
132 | public visitStruct<T extends Struct>(vector: V<T>) { | |
133 | return { | |
134 | 'children': vector.type.children.map((f, i) => | |
135 | this.visit(new Column(f, [vector.getChildAt(i)!]))) | |
136 | }; | |
137 | } | |
138 | public visitUnion<T extends Union>(vector: V<T>) { | |
139 | return { | |
140 | 'TYPE': [...vector.typeIds], | |
141 | 'OFFSET': vector.type.mode === UnionMode.Dense ? [...vector.valueOffsets] : undefined, | |
142 | 'children': vector.type.children.map((f, i) => this.visit(new Column(f, [vector.getChildAt(i)!]))) | |
143 | }; | |
144 | } | |
145 | public visitInterval<T extends Interval>(vector: V<T>) { | |
146 | return { 'DATA': [...vector.values] }; | |
147 | } | |
148 | public visitFixedSizeList<T extends FixedSizeList>(vector: V<T>) { | |
149 | return { | |
150 | 'children': vector.type.children.map((f, i) => | |
151 | this.visit(new Column(f, [vector.getChildAt(i)!]))) | |
152 | }; | |
153 | } | |
154 | public visitMap<T extends Map_>(vector: V<T>) { | |
155 | return { | |
156 | 'OFFSET': [...vector.valueOffsets], | |
157 | 'children': vector.type.children.map((f, i) => | |
158 | this.visit(new Column(f, [vector.getChildAt(i)!]))) | |
159 | }; | |
160 | } | |
161 | } | |
162 | ||
163 | /** @ignore */ | |
164 | function* binaryToString(vector: Vector<Binary> | Vector<FixedSizeBinary>) { | |
165 | for (const octets of vector as Iterable<Uint8Array>) { | |
166 | yield octets.reduce((str, byte) => { | |
167 | return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`; | |
168 | }, '').toUpperCase(); | |
169 | } | |
170 | } | |
171 | ||
172 | /** @ignore */ | |
173 | function* bigNumsToStrings(values: Uint32Array | Int32Array, stride: number) { | |
174 | for (let i = -1, n = values.length / stride; ++i < n;) { | |
175 | yield `${BN.new(values.subarray((i + 0) * stride, (i + 1) * stride), false)}`; | |
176 | } | |
177 | } |