ceph/src/arrow/dev/archery/archery/benchmark/jmh.py

   1 # Licensed to the Apache Software Foundation (ASF) under one
   2 # or more contributor license agreements.  See the NOTICE file
   3 # distributed with this work for additional information
   4 # regarding copyright ownership.  The ASF licenses this file
   5 # to you under the Apache License, Version 2.0 (the
   6 # "License"); you may not use this file except in compliance
   7 # with the License.  You may obtain a copy of the License at
   8 #
   9 #   http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing,
  12 # software distributed under the License is distributed on an
  13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 # KIND, either express or implied.  See the License for the
  15 # specific language governing permissions and limitations
  16 # under the License.
  17
  18 from itertools import filterfalse, groupby, tee
  19 import json
  20 import subprocess
  21 from tempfile import NamedTemporaryFile
  22
  23 from .core import Benchmark
  24 from ..utils.command import Command
  25 from ..utils.maven import Maven
  26
  27
  28 def partition(pred, iterable):
  29     # adapted from python's examples
  30     t1, t2 = tee(iterable)
  31     return list(filter(pred, t1)), list(filterfalse(pred, t2))
  32
  33
  34 class JavaMicrobenchmarkHarnessCommand(Command):
  35     """ Run a Java Micro Benchmark Harness
  36
  37     This assumes the binary supports the standard command line options,
  38     notably `-Dbenchmark_filter`
  39     """
  40
  41     def __init__(self, build, benchmark_filter=None):
  42         self.benchmark_filter = benchmark_filter
  43         self.build = build
  44         self.maven = Maven()
  45
  46     """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
  47     Assume the following output:
  48       ...
  49       Benchmarks:
  50       org.apache.arrow.vector.IntBenchmarks.setIntDirectly
  51       ...
  52       org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
  53       org.apache.arrow.vector.IntBenchmarks.setWithWriter
  54       ...
  55       [INFO]
  56     """
  57
  58     def list_benchmarks(self):
  59         argv = []
  60         if self.benchmark_filter:
  61             argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
  62         result = self.build.list(
  63             *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  64
  65         lists = []
  66         benchmarks = False
  67         for line in str.splitlines(result.stdout.decode("utf-8")):
  68             if not benchmarks:
  69                 if line.startswith("Benchmarks:"):
  70                     benchmarks = True
  71             else:
  72                 if line.startswith("org.apache.arrow"):
  73                     lists.append(line)
  74                 if line.startswith("[INFO]"):
  75                     break
  76         return lists
  77
  78     def results(self, repetitions):
  79         with NamedTemporaryFile(suffix=".json") as out:
  80             argv = ["-Dbenchmark.runs={}".format(repetitions),
  81                     "-Dbenchmark.resultfile={}".format(out.name),
  82                     "-Dbenchmark.resultformat=json"]
  83             if self.benchmark_filter:
  84                 argv.append(
  85                     "-Dbenchmark.filter={}".format(self.benchmark_filter)
  86                 )
  87
  88             self.build.benchmark(*argv, check=True)
  89             return json.load(out)
  90
  91
  92 class JavaMicrobenchmarkHarnessObservation:
  93     """ Represents one run of a single Java Microbenchmark Harness
  94     """
  95
  96     def __init__(self, benchmark, primaryMetric,
  97                  forks, warmupIterations, measurementIterations, **counters):
  98         self.name = benchmark
  99         self.primaryMetric = primaryMetric
 100         self.score = primaryMetric["score"]
 101         self.score_unit = primaryMetric["scoreUnit"]
 102         self.forks = forks
 103         self.warmups = warmupIterations
 104         self.runs = measurementIterations
 105         self.counters = {
 106             "mode": counters["mode"],
 107             "threads": counters["threads"],
 108             "warmups": warmupIterations,
 109             "warmupTime": counters["warmupTime"],
 110             "measurements": measurementIterations,
 111             "measurementTime": counters["measurementTime"],
 112             "jvmArgs": counters["jvmArgs"]
 113         }
 114         self.reciprocal_value = True if self.score_unit.endswith(
 115             "/op") else False
 116         if self.score_unit.startswith("ops/"):
 117             idx = self.score_unit.find("/")
 118             self.normalizePerSec(self.score_unit[idx+1:])
 119         elif self.score_unit.endswith("/op"):
 120             idx = self.score_unit.find("/")
 121             self.normalizePerSec(self.score_unit[:idx])
 122         else:
 123             self.normalizeFactor = 1
 124
 125     @property
 126     def value(self):
 127         """ Return the benchmark value."""
 128         val = 1 / self.score if self.reciprocal_value else self.score
 129         return val * self.normalizeFactor
 130
 131     def normalizePerSec(self, unit):
 132         if unit == "ns":
 133             self.normalizeFactor = 1000 * 1000 * 1000
 134         elif unit == "us":
 135             self.normalizeFactor = 1000 * 1000
 136         elif unit == "ms":
 137             self.normalizeFactor = 1000
 138         elif unit == "min":
 139             self.normalizeFactor = 1 / 60
 140         elif unit == "hr":
 141             self.normalizeFactor = 1 / (60 * 60)
 142         elif unit == "day":
 143             self.normalizeFactor = 1 / (60 * 60 * 24)
 144         else:
 145             self.normalizeFactor = 1
 146
 147     @property
 148     def unit(self):
 149         if self.score_unit.startswith("ops/"):
 150             return "items_per_second"
 151         elif self.score_unit.endswith("/op"):
 152             return "items_per_second"
 153         else:
 154             return "?"
 155
 156     def __repr__(self):
 157         return str(self.value)
 158
 159
 160 class JavaMicrobenchmarkHarness(Benchmark):
 161     """ A set of JavaMicrobenchmarkHarnessObservations. """
 162
 163     def __init__(self, name, runs):
 164         """ Initialize a JavaMicrobenchmarkHarness.
 165
 166         Parameters
 167         ----------
 168         name: str
 169               Name of the benchmark
 170         forks: int
 171         warmups: int
 172         runs: int
 173         runs: list(JavaMicrobenchmarkHarnessObservation)
 174               Repetitions of JavaMicrobenchmarkHarnessObservation run.
 175
 176         """
 177         self.name = name
 178         self.runs = sorted(runs, key=lambda b: b.value)
 179         unit = self.runs[0].unit
 180         time_unit = "N/A"
 181         less_is_better = not unit.endswith("per_second")
 182         values = [b.value for b in self.runs]
 183         times = []
 184         # Slight kludge to extract the UserCounters for each benchmark
 185         counters = self.runs[0].counters
 186         super().__init__(name, unit, less_is_better, values, time_unit, times,
 187                          counters)
 188
 189     def __repr__(self):
 190         return "JavaMicrobenchmark[name={},runs={}]".format(
 191             self.name, self.runs)
 192
 193     @classmethod
 194     def from_json(cls, payload):
 195         def group_key(x):
 196             return x.name
 197
 198         benchmarks = map(
 199             lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
 200         groups = groupby(sorted(benchmarks, key=group_key), group_key)
 201         return [cls(k, list(bs)) for k, bs in groups]