]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/arrow/js/perf/index.ts
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / js / perf / index.ts
diff --git a/ceph/src/arrow/js/perf/index.ts b/ceph/src/arrow/js/perf/index.ts
new file mode 100644 (file)
index 0000000..9f6cb8f
--- /dev/null
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Alternatively, use bundles for performance tests
+// import * as Arrow from '../targets/es5/umd';
+// import * as Arrow from '../targets/es5/cjs';
+// import * as Arrow from '../targets/es2015/umd';
+// import * as Arrow from '../targets/es2015/cjs';
+
+import * as Arrow from '../src/Arrow';
+
+import config from './config';
+import b from 'benny';
+import { CaseResult, Summary } from 'benny/lib/internal/common-types';
+import kleur from 'kleur';
+
+const { predicate, Table, RecordBatchReader } = Arrow;
+const { col } = predicate;
+
+
+const args = process.argv.slice(2);
+const json = args[0] === '--json';
+
+const formatter = new Intl.NumberFormat();
+function formatNumber(number: number, precision = 0) {
+    const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
+    return formatter.format(rounded);
+}
+
+const results: CaseResult[] = [];
+
+function cycle(result: CaseResult, _summary: Summary) {
+    const duration = result.details.median * 1000;
+    if (json) {
+        result.suite = _summary.name;
+        results.push(result);
+    }
+    console.log(
+        `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`,
+    );
+}
+
+for (const { name, ipc, df } of config) {
+    b.suite(
+        `Parse`,
+
+        b.add(`dataset: ${name}, function: Table.from`, () => {
+            Table.from(ipc);
+        }),
+
+        b.add(`dataset: ${name}, function: readBatches`, () => {
+            for (const _recordBatch of RecordBatchReader.from(ipc)) {}
+        }),
+
+        b.add(`dataset: ${name}, function: serialize`, () => {
+            df.serialize();
+        }),
+
+        b.cycle(cycle)
+    );
+
+    const schema = df.schema;
+
+    const suites = [{
+            suite_name: `Get values by index`,
+            fn(vector: Arrow.Column<any>) {
+                for (let i = -1, n = vector.length; ++i < n;) {
+                    vector.get(i);
+                }
+            }
+        }, {
+            suite_name: `Iterate vectors`,
+            fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
+        }, {
+            suite_name: `Slice toArray vectors`,
+            fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
+        }, {
+            suite_name: `Slice vectors`,
+            fn(vector: Arrow.Column<any>) { vector.slice(); }
+        }];
+
+    for (const {suite_name, fn} of suites) {
+        b.suite(
+            suite_name,
+
+            ...schema.fields.map((f, i) => {
+                const vector = df.getColumnAt(i)!;
+                return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+                    fn(vector);
+                });
+            }),
+
+            b.cycle(cycle)
+        );
+    }
+}
+
+
+for (const { name, df, countBys, counts } of config) {
+    b.suite(
+        `DataFrame Iterate`,
+
+        b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
+            for (const _value of df) {}
+        }),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Count By`,
+
+        ...countBys.map((column: string) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
+            () => df.countBy(column)
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Filter-Scan Count`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf: Arrow.FilteredDataFrame;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+
+                return () => filteredDf.count();
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Filter-Iterate`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf: Arrow.FilteredDataFrame;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+
+                return () => {
+                    for (const _value of filteredDf) {}
+                };
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Direct Count`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                const colidx = df.schema.fields.findIndex((c)=> c.name === column);
+
+                if (test == 'gt') {
+                    return () => {
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx)!;
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) >= value) ? 1 : 0;
+                            }
+                        }
+                        return sum;
+                    };
+                } else if (test == 'eq') {
+                    return () => {
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx)!;
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) === value) ?  1 : 0;
+                            }
+                        }
+                        return sum;
+                    };
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+            }
+        )),
+
+        b.cycle(cycle),
+
+        b.complete(() => {
+            // last benchmark finished
+            json && process.stderr.write(JSON.stringify(results, null, 2));
+        })
+    );
+}