]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/js/perf/index.ts
9f6cb8f79a0933097a6cdab70591e62176802ec8
[ceph.git] / ceph / src / arrow / js / perf / index.ts
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // Alternatively, use bundles for performance tests
19 // import * as Arrow from '../targets/es5/umd';
20 // import * as Arrow from '../targets/es5/cjs';
21 // import * as Arrow from '../targets/es2015/umd';
22 // import * as Arrow from '../targets/es2015/cjs';
23
24 import * as Arrow from '../src/Arrow';
25
26 import config from './config';
27 import b from 'benny';
28 import { CaseResult, Summary } from 'benny/lib/internal/common-types';
29 import kleur from 'kleur';
30
31 const { predicate, Table, RecordBatchReader } = Arrow;
32 const { col } = predicate;
33
34
35 const args = process.argv.slice(2);
36 const json = args[0] === '--json';
37
38 const formatter = new Intl.NumberFormat();
39 function formatNumber(number: number, precision = 0) {
40 const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
41 return formatter.format(rounded);
42 }
43
44 const results: CaseResult[] = [];
45
46 function cycle(result: CaseResult, _summary: Summary) {
47 const duration = result.details.median * 1000;
48 if (json) {
49 result.suite = _summary.name;
50 results.push(result);
51 }
52 console.log(
53 `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`,
54 );
55 }
56
57 for (const { name, ipc, df } of config) {
58 b.suite(
59 `Parse`,
60
61 b.add(`dataset: ${name}, function: Table.from`, () => {
62 Table.from(ipc);
63 }),
64
65 b.add(`dataset: ${name}, function: readBatches`, () => {
66 for (const _recordBatch of RecordBatchReader.from(ipc)) {}
67 }),
68
69 b.add(`dataset: ${name}, function: serialize`, () => {
70 df.serialize();
71 }),
72
73 b.cycle(cycle)
74 );
75
76 const schema = df.schema;
77
78 const suites = [{
79 suite_name: `Get values by index`,
80 fn(vector: Arrow.Column<any>) {
81 for (let i = -1, n = vector.length; ++i < n;) {
82 vector.get(i);
83 }
84 }
85 }, {
86 suite_name: `Iterate vectors`,
87 fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
88 }, {
89 suite_name: `Slice toArray vectors`,
90 fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
91 }, {
92 suite_name: `Slice vectors`,
93 fn(vector: Arrow.Column<any>) { vector.slice(); }
94 }];
95
96 for (const {suite_name, fn} of suites) {
97 b.suite(
98 suite_name,
99
100 ...schema.fields.map((f, i) => {
101 const vector = df.getColumnAt(i)!;
102 return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
103 fn(vector);
104 });
105 }),
106
107 b.cycle(cycle)
108 );
109 }
110 }
111
112
113 for (const { name, df, countBys, counts } of config) {
114 b.suite(
115 `DataFrame Iterate`,
116
117 b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
118 for (const _value of df) {}
119 }),
120
121 b.cycle(cycle)
122 );
123
124 b.suite(
125 `DataFrame Count By`,
126
127 ...countBys.map((column: string) => b.add(
128 `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
129 () => df.countBy(column)
130 )),
131
132 b.cycle(cycle)
133 );
134
135 b.suite(
136 `DataFrame Filter-Scan Count`,
137
138 ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
139 `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
140 () => {
141 let filteredDf: Arrow.FilteredDataFrame;
142 if (test == 'gt') {
143 filteredDf = df.filter(col(column).gt(value));
144 } else if (test == 'eq') {
145 filteredDf = df.filter(col(column).eq(value));
146 } else {
147 throw new Error(`Unrecognized test "${test}"`);
148 }
149
150 return () => filteredDf.count();
151 }
152 )),
153
154 b.cycle(cycle)
155 );
156
157 b.suite(
158 `DataFrame Filter-Iterate`,
159
160 ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
161 `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
162 () => {
163 let filteredDf: Arrow.FilteredDataFrame;
164 if (test == 'gt') {
165 filteredDf = df.filter(col(column).gt(value));
166 } else if (test == 'eq') {
167 filteredDf = df.filter(col(column).eq(value));
168 } else {
169 throw new Error(`Unrecognized test "${test}"`);
170 }
171
172 return () => {
173 for (const _value of filteredDf) {}
174 };
175 }
176 )),
177
178 b.cycle(cycle)
179 );
180
181 b.suite(
182 `DataFrame Direct Count`,
183
184 ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
185 `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
186 () => {
187 const colidx = df.schema.fields.findIndex((c)=> c.name === column);
188
189 if (test == 'gt') {
190 return () => {
191 let sum = 0;
192 const batches = df.chunks;
193 const numBatches = batches.length;
194 for (let batchIndex = -1; ++batchIndex < numBatches;) {
195 // load batches
196 const batch = batches[batchIndex];
197 const vector = batch.getChildAt(colidx)!;
198 // yield all indices
199 for (let index = -1, length = batch.length; ++index < length;) {
200 sum += (vector.get(index) >= value) ? 1 : 0;
201 }
202 }
203 return sum;
204 };
205 } else if (test == 'eq') {
206 return () => {
207 let sum = 0;
208 const batches = df.chunks;
209 const numBatches = batches.length;
210 for (let batchIndex = -1; ++batchIndex < numBatches;) {
211 // load batches
212 const batch = batches[batchIndex];
213 const vector = batch.getChildAt(colidx)!;
214 // yield all indices
215 for (let index = -1, length = batch.length; ++index < length;) {
216 sum += (vector.get(index) === value) ? 1 : 0;
217 }
218 }
219 return sum;
220 };
221 } else {
222 throw new Error(`Unrecognized test "${test}"`);
223 }
224 }
225 )),
226
227 b.cycle(cycle),
228
229 b.complete(() => {
230 // last benchmark finished
231 json && process.stderr.write(JSON.stringify(results, null, 2));
232 })
233 );
234 }