[ceph.git] / ceph / src / arrow / dev / archery / archery / benchmark / jmh.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from itertools import filterfalse, groupby, tee
import json
import subprocess
from tempfile import NamedTemporaryFile

from .core import Benchmark
from ..utils.command import Command
from ..utils.maven import Maven


def partition(pred, iterable):
    # adapted from python's examples
    t1, t2 = tee(iterable)
    return list(filter(pred, t1)), list(filterfalse(pred, t2))


class JavaMicrobenchmarkHarnessCommand(Command):
    """ Run a Java Micro Benchmark Harness

    This assumes the binary supports the standard command line options,
    notably `-Dbenchmark_filter`
    """

    def __init__(self, build, benchmark_filter=None):
        self.benchmark_filter = benchmark_filter
        self.build = build
        self.maven = Maven()

    """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
    Assume the following output:
      ...
      Benchmarks:
      org.apache.arrow.vector.IntBenchmarks.setIntDirectly
      ...
      org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
      org.apache.arrow.vector.IntBenchmarks.setWithWriter
      ...
      [INFO]
    """

    def list_benchmarks(self):
        argv = []
        if self.benchmark_filter:
            argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
        result = self.build.list(
            *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        lists = []
        benchmarks = False
        for line in str.splitlines(result.stdout.decode("utf-8")):
            if not benchmarks:
                if line.startswith("Benchmarks:"):
                    benchmarks = True
            else:
                if line.startswith("org.apache.arrow"):
                    lists.append(line)
                if line.startswith("[INFO]"):
                    break
        return lists

    def results(self, repetitions):
        with NamedTemporaryFile(suffix=".json") as out:
            argv = ["-Dbenchmark.runs={}".format(repetitions),
                    "-Dbenchmark.resultfile={}".format(out.name),
                    "-Dbenchmark.resultformat=json"]
            if self.benchmark_filter:
                argv.append(
                    "-Dbenchmark.filter={}".format(self.benchmark_filter)
                )

            self.build.benchmark(*argv, check=True)
            return json.load(out)


class JavaMicrobenchmarkHarnessObservation:
    """ Represents one run of a single Java Microbenchmark Harness
    """

    def __init__(self, benchmark, primaryMetric,
                 forks, warmupIterations, measurementIterations, **counters):
        self.name = benchmark
        self.primaryMetric = primaryMetric
        self.score = primaryMetric["score"]
        self.score_unit = primaryMetric["scoreUnit"]
        self.forks = forks
        self.warmups = warmupIterations
        self.runs = measurementIterations
        self.counters = {
            "mode": counters["mode"],
            "threads": counters["threads"],
            "warmups": warmupIterations,
            "warmupTime": counters["warmupTime"],
            "measurements": measurementIterations,
            "measurementTime": counters["measurementTime"],
            "jvmArgs": counters["jvmArgs"]
        }
        self.reciprocal_value = True if self.score_unit.endswith(
            "/op") else False
        if self.score_unit.startswith("ops/"):
            idx = self.score_unit.find("/")
            self.normalizePerSec(self.score_unit[idx+1:])
        elif self.score_unit.endswith("/op"):
            idx = self.score_unit.find("/")
            self.normalizePerSec(self.score_unit[:idx])
        else:
            self.normalizeFactor = 1

    @property
    def value(self):
        """ Return the benchmark value."""
        val = 1 / self.score if self.reciprocal_value else self.score
        return val * self.normalizeFactor

    def normalizePerSec(self, unit):
        if unit == "ns":
            self.normalizeFactor = 1000 * 1000 * 1000
        elif unit == "us":
            self.normalizeFactor = 1000 * 1000
        elif unit == "ms":
            self.normalizeFactor = 1000
        elif unit == "min":
            self.normalizeFactor = 1 / 60
        elif unit == "hr":
            self.normalizeFactor = 1 / (60 * 60)
        elif unit == "day":
            self.normalizeFactor = 1 / (60 * 60 * 24)
        else:
            self.normalizeFactor = 1

    @property
    def unit(self):
        if self.score_unit.startswith("ops/"):
            return "items_per_second"
        elif self.score_unit.endswith("/op"):
            return "items_per_second"
        else:
            return "?"

    def __repr__(self):
        return str(self.value)


class JavaMicrobenchmarkHarness(Benchmark):
    """ A set of JavaMicrobenchmarkHarnessObservations. """

    def __init__(self, name, runs):
        """ Initialize a JavaMicrobenchmarkHarness.

        Parameters
        ----------
        name: str
              Name of the benchmark
        forks: int
        warmups: int
        runs: int
        runs: list(JavaMicrobenchmarkHarnessObservation)
              Repetitions of JavaMicrobenchmarkHarnessObservation run.

        """
        self.name = name
        self.runs = sorted(runs, key=lambda b: b.value)
        unit = self.runs[0].unit
        time_unit = "N/A"
        less_is_better = not unit.endswith("per_second")
        values = [b.value for b in self.runs]
        times = []
        # Slight kludge to extract the UserCounters for each benchmark
        counters = self.runs[0].counters
        super().__init__(name, unit, less_is_better, values, time_unit, times,
                         counters)

    def __repr__(self):
        return "JavaMicrobenchmark[name={},runs={}]".format(
            self.name, self.runs)

    @classmethod
    def from_json(cls, payload):
        def group_key(x):
            return x.name

        benchmarks = map(
            lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
        groups = groupby(sorted(benchmarks, key=group_key), group_key)
        return [cls(k, list(bs)) for k, bs in groups]
Commit	Line	Data
1d09f67e TL	1	# Licensed to the Apache Software Foundation (ASF) under one
	2	# or more contributor license agreements. See the NOTICE file
	3	# distributed with this work for additional information
	4	# regarding copyright ownership. The ASF licenses this file
	5	# to you under the Apache License, Version 2.0 (the
	6	# "License"); you may not use this file except in compliance
	7	# with the License. You may obtain a copy of the License at
	8	#
	9	# http://www.apache.org/licenses/LICENSE-2.0
	10	#
	11	# Unless required by applicable law or agreed to in writing,
	12	# software distributed under the License is distributed on an
	13	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	14	# KIND, either express or implied. See the License for the
	15	# specific language governing permissions and limitations
	16	# under the License.
	17
	18	from itertools import filterfalse, groupby, tee
	19	import json
	20	import subprocess
	21	from tempfile import NamedTemporaryFile
	22
	23	from .core import Benchmark
	24	from ..utils.command import Command
	25	from ..utils.maven import Maven
	26
	27
	28	def partition(pred, iterable):
	29	# adapted from python's examples
	30	t1, t2 = tee(iterable)
	31	return list(filter(pred, t1)), list(filterfalse(pred, t2))
	32
	33
	34	class JavaMicrobenchmarkHarnessCommand(Command):
	35	""" Run a Java Micro Benchmark Harness
	36
	37	This assumes the binary supports the standard command line options,
	38	notably `-Dbenchmark_filter`
	39	"""
	40
	41	def __init__(self, build, benchmark_filter=None):
	42	self.benchmark_filter = benchmark_filter
	43	self.build = build
	44	self.maven = Maven()
	45
	46	""" Extract benchmark names from output between "Benchmarks:" and "[INFO]".
	47	Assume the following output:
	48	...
	49	Benchmarks:
	50	org.apache.arrow.vector.IntBenchmarks.setIntDirectly
	51	...
	52	org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
	53	org.apache.arrow.vector.IntBenchmarks.setWithWriter
	54	...
	55	[INFO]
	56	"""
	57
	58	def list_benchmarks(self):
	59	argv = []
	60	if self.benchmark_filter:
	61	argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
	62	result = self.build.list(
	63	*argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	64
65	lists = []
66	benchmarks = False
67	for line in str.splitlines(result.stdout.decode("utf-8")):
68	if not benchmarks:
69	if line.startswith("Benchmarks:"):
70	benchmarks = True
71	else:
72	if line.startswith("org.apache.arrow"):
73	lists.append(line)
74	if line.startswith("[INFO]"):
75	break
76	return lists
77
78	def results(self, repetitions):
79	with NamedTemporaryFile(suffix=".json") as out:
80	argv = ["-Dbenchmark.runs={}".format(repetitions),
81	"-Dbenchmark.resultfile={}".format(out.name),
82	"-Dbenchmark.resultformat=json"]
83	if self.benchmark_filter:
84	argv.append(
85	"-Dbenchmark.filter={}".format(self.benchmark_filter)
86	)
87
88	self.build.benchmark(*argv, check=True)
89	return json.load(out)
90
91
92	class JavaMicrobenchmarkHarnessObservation:
93	""" Represents one run of a single Java Microbenchmark Harness
94	"""
95
96	def __init__(self, benchmark, primaryMetric,
97	forks, warmupIterations, measurementIterations, **counters):
98	self.name = benchmark
99	self.primaryMetric = primaryMetric
100	self.score = primaryMetric["score"]
101	self.score_unit = primaryMetric["scoreUnit"]
102	self.forks = forks
103	self.warmups = warmupIterations
104	self.runs = measurementIterations
105	self.counters = {
106	"mode": counters["mode"],
107	"threads": counters["threads"],
108	"warmups": warmupIterations,
109	"warmupTime": counters["warmupTime"],
110	"measurements": measurementIterations,
111	"measurementTime": counters["measurementTime"],
112	"jvmArgs": counters["jvmArgs"]
113	}
114	self.reciprocal_value = True if self.score_unit.endswith(
115	"/op") else False
116	if self.score_unit.startswith("ops/"):
117	idx = self.score_unit.find("/")
118	self.normalizePerSec(self.score_unit[idx+1:])
119	elif self.score_unit.endswith("/op"):
120	idx = self.score_unit.find("/")
121	self.normalizePerSec(self.score_unit[:idx])
122	else:
123	self.normalizeFactor = 1
124
125	@property
126	def value(self):
127	""" Return the benchmark value."""
128	val = 1 / self.score if self.reciprocal_value else self.score
129	return val * self.normalizeFactor
130
131	def normalizePerSec(self, unit):
132	if unit == "ns":
133	self.normalizeFactor = 1000 * 1000 * 1000
134	elif unit == "us":
135	self.normalizeFactor = 1000 * 1000
136	elif unit == "ms":
137	self.normalizeFactor = 1000
138	elif unit == "min":
139	self.normalizeFactor = 1 / 60
140	elif unit == "hr":
141	self.normalizeFactor = 1 / (60 * 60)
142	elif unit == "day":
143	self.normalizeFactor = 1 / (60 * 60 * 24)
144	else:
145	self.normalizeFactor = 1
146
147	@property
148	def unit(self):
149	if self.score_unit.startswith("ops/"):
150	return "items_per_second"
151	elif self.score_unit.endswith("/op"):
152	return "items_per_second"
153	else:
154	return "?"
155
156	def __repr__(self):
157	return str(self.value)
158
159
160	class JavaMicrobenchmarkHarness(Benchmark):
161	""" A set of JavaMicrobenchmarkHarnessObservations. """
162
163	def __init__(self, name, runs):
164	""" Initialize a JavaMicrobenchmarkHarness.
165
166	Parameters
167	----------
168	name: str
169	Name of the benchmark
170	forks: int
171	warmups: int
172	runs: int
173	runs: list(JavaMicrobenchmarkHarnessObservation)
174	Repetitions of JavaMicrobenchmarkHarnessObservation run.
175
176	"""
177	self.name = name
178	self.runs = sorted(runs, key=lambda b: b.value)
179	unit = self.runs[0].unit
180	time_unit = "N/A"
181	less_is_better = not unit.endswith("per_second")
182	values = [b.value for b in self.runs]
183	times = []
184	# Slight kludge to extract the UserCounters for each benchmark
185	counters = self.runs[0].counters
186	super().__init__(name, unit, less_is_better, values, time_unit, times,
187	counters)
188
189	def __repr__(self):
190	return "JavaMicrobenchmark[name={},runs={}]".format(
191	self.name, self.runs)
192
193	@classmethod
194	def from_json(cls, payload):
195	def group_key(x):
196	return x.name
197
198	benchmarks = map(
199	lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
200	groups = groupby(sorted(benchmarks, key=group_key), group_key)
201	return [cls(k, list(bs)) for k, bs in groups]