[ceph.git] / ceph / src / jaegertracing / opentelemetry-cpp / api / test / common / spinlock_benchmark.cc

// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

#include "opentelemetry/common/spin_lock_mutex.h"

#include <benchmark/benchmark.h>
#include <mutex>

namespace
{
using opentelemetry::common::SpinLockMutex;

constexpr int TightLoopLocks = 10000;

// Runs a thrash-test where we spin up N threads, each of which will
// attempt to lock-mutate-unlock a total of `TightLoopLocks` times.
//
// lock: A lambda denoting how to lock.   Accepts a reference to `SpinLockType`.
// unlock: A lambda denoting how to unlock.   Accepts a reference to `SpinLockType`.
template <typename SpinLockType, typename LockF, typename UnlockF>
inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock)
{
  auto num_threads = s.range(0);
  // Value we will increment, fighting over a spinlock.
  // The contention is meant to be brief, as close to our expected
  // use cases of "updating pointers" or "pushing an event onto a buffer".
  std::int64_t value = 0;

  std::vector<std::thread> threads;
  threads.reserve(num_threads);

  // Timing loop
  for (auto _ : s)
  {
    for (auto i = 0; i < num_threads; i++)
    {
      threads.emplace_back([&] {
        // Increment value once each time the lock is acquired.  Spin a few times
        // to ensure maximum thread contention.
        for (int i = 0; i < TightLoopLocks; i++)
        {
          lock(spinlock);
          value++;
          unlock(spinlock);
        }
      });
    }
    // Join threads
    for (auto &thread : threads)
      thread.join();
    threads.clear();
  }
}

// Benchmark of full spin-lock implementation.
static void BM_SpinLockThrashing(benchmark::State &s)
{
  SpinLockMutex spinlock;
  SpinThrash(
      s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); });
}

// Naive `while(try_lock()) {}` implementation of lock.
static void BM_NaiveSpinLockThrashing(benchmark::State &s)
{
  SpinLockMutex spinlock;
  SpinThrash(
      s, spinlock,
      [](SpinLockMutex &m) {
        while (!m.try_lock())
        {
          // Left this comment to keep the same format on old and new versions of clang-format
        }
      },
      [](SpinLockMutex &m) { m.unlock(); });
}

// Simple `while(try_lock()) { yield-processor }`
static void BM_ProcYieldSpinLockThrashing(benchmark::State &s)
{
  SpinLockMutex spinlock;
  SpinThrash<SpinLockMutex>(
      s, spinlock,
      [](SpinLockMutex &m) {
        while (!m.try_lock())
        {
#if defined(_MSC_VER)
          YieldProcessor();
#elif defined(__i386__) || defined(__x86_64__)
#  if defined(__clang__)
          _mm_pause();
#  else
          __builtin_ia32_pause();
#  endif
#elif defined(__arm__)
          __asm__ volatile("yield" ::: "memory");
#endif
        }
      },
      [](SpinLockMutex &m) { m.unlock(); });
}

// SpinLock thrashing with thread::yield().
static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s)
{
  std::atomic_flag mutex = ATOMIC_FLAG_INIT;
  SpinThrash<std::atomic_flag>(
      s, mutex,
      [](std::atomic_flag &l) {
        uint32_t try_count = 0;
        while (l.test_and_set(std::memory_order_acq_rel))
        {
          ++try_count;
          if (try_count % 32)
          {
            std::this_thread::yield();
          }
        }
        std::this_thread::yield();
      },
      [](std::atomic_flag &l) { l.clear(std::memory_order_release); });
}

// Run the benchmarks at 2x thread/core and measure the amount of time to thrash around.
BENCHMARK(BM_SpinLockThrashing)
    ->RangeMultiplier(2)
    ->Range(1, std::thread::hardware_concurrency())
    ->MeasureProcessCPUTime()
    ->UseRealTime()
    ->Unit(benchmark::kMillisecond);
BENCHMARK(BM_ProcYieldSpinLockThrashing)
    ->RangeMultiplier(2)
    ->Range(1, std::thread::hardware_concurrency())
    ->MeasureProcessCPUTime()
    ->UseRealTime()
    ->Unit(benchmark::kMillisecond);
BENCHMARK(BM_NaiveSpinLockThrashing)
    ->RangeMultiplier(2)
    ->Range(1, std::thread::hardware_concurrency())
    ->MeasureProcessCPUTime()
    ->UseRealTime()
    ->Unit(benchmark::kMillisecond);
BENCHMARK(BM_ThreadYieldSpinLockThrashing)
    ->RangeMultiplier(2)
    ->Range(1, std::thread::hardware_concurrency())
    ->MeasureProcessCPUTime()
    ->UseRealTime()
    ->Unit(benchmark::kMillisecond);

}  // namespace

BENCHMARK_MAIN();
Commit	Line	Data
1e59de90 TL	1	// Copyright The OpenTelemetry Authors
	2	// SPDX-License-Identifier: Apache-2.0
	3
	4	#include "opentelemetry/common/spin_lock_mutex.h"
	5
	6	#include <benchmark/benchmark.h>
	7	#include <mutex>
	8
	9	namespace
	10	{
	11	using opentelemetry::common::SpinLockMutex;
	12
	13	constexpr int TightLoopLocks = 10000;
	14
	15	// Runs a thrash-test where we spin up N threads, each of which will
	16	// attempt to lock-mutate-unlock a total of `TightLoopLocks` times.
	17	//
	18	// lock: A lambda denoting how to lock. Accepts a reference to `SpinLockType`.
	19	// unlock: A lambda denoting how to unlock. Accepts a reference to `SpinLockType`.
	20	template <typename SpinLockType, typename LockF, typename UnlockF>
	21	inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock)
	22	{
	23	auto num_threads = s.range(0);
	24	// Value we will increment, fighting over a spinlock.
	25	// The contention is meant to be brief, as close to our expected
	26	// use cases of "updating pointers" or "pushing an event onto a buffer".
	27	std::int64_t value = 0;
	28
	29	std::vector<std::thread> threads;
	30	threads.reserve(num_threads);
	31
	32	// Timing loop
	33	for (auto _ : s)
	34	{
	35	for (auto i = 0; i < num_threads; i++)
	36	{
	37	threads.emplace_back([&] {
	38	// Increment value once each time the lock is acquired. Spin a few times
	39	// to ensure maximum thread contention.
	40	for (int i = 0; i < TightLoopLocks; i++)
	41	{
	42	lock(spinlock);
	43	value++;
	44	unlock(spinlock);
	45	}
	46	});
	47	}
	48	// Join threads
	49	for (auto &thread : threads)
	50	thread.join();
	51	threads.clear();
	52	}
	53	}
	54
	55	// Benchmark of full spin-lock implementation.
	56	static void BM_SpinLockThrashing(benchmark::State &s)
	57	{
	58	SpinLockMutex spinlock;
	59	SpinThrash(
	60	s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); });
	61	}
	62
	63	// Naive `while(try_lock()) {}` implementation of lock.
	64	static void BM_NaiveSpinLockThrashing(benchmark::State &s)
65	{
66	SpinLockMutex spinlock;
67	SpinThrash(
68	s, spinlock,
69	[](SpinLockMutex &m) {
70	while (!m.try_lock())
71	{
72	// Left this comment to keep the same format on old and new versions of clang-format
73	}
74	},
75	[](SpinLockMutex &m) { m.unlock(); });
76	}
77
78	// Simple `while(try_lock()) { yield-processor }`
79	static void BM_ProcYieldSpinLockThrashing(benchmark::State &s)
80	{
81	SpinLockMutex spinlock;
82	SpinThrash<SpinLockMutex>(
83	s, spinlock,
84	[](SpinLockMutex &m) {
85	while (!m.try_lock())
86	{
87	#if defined(_MSC_VER)
88	YieldProcessor();
89	#elif defined(__i386__) \|\| defined(__x86_64__)
90	# if defined(__clang__)
91	_mm_pause();
92	# else
93	__builtin_ia32_pause();
94	# endif
95	#elif defined(__arm__)
96	__asm__ volatile("yield" ::: "memory");
97	#endif
98	}
99	},
100	[](SpinLockMutex &m) { m.unlock(); });
101	}
102
103	// SpinLock thrashing with thread::yield().
104	static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s)
105	{
106	std::atomic_flag mutex = ATOMIC_FLAG_INIT;
107	SpinThrash<std::atomic_flag>(
108	s, mutex,
109	[](std::atomic_flag &l) {
110	uint32_t try_count = 0;
111	while (l.test_and_set(std::memory_order_acq_rel))
112	{
113	++try_count;
114	if (try_count % 32)
115	{
116	std::this_thread::yield();
117	}
118	}
119	std::this_thread::yield();
120	},
121	[](std::atomic_flag &l) { l.clear(std::memory_order_release); });
122	}
123
124	// Run the benchmarks at 2x thread/core and measure the amount of time to thrash around.
125	BENCHMARK(BM_SpinLockThrashing)
126	->RangeMultiplier(2)
127	->Range(1, std::thread::hardware_concurrency())
128	->MeasureProcessCPUTime()
129	->UseRealTime()
130	->Unit(benchmark::kMillisecond);
131	BENCHMARK(BM_ProcYieldSpinLockThrashing)
132	->RangeMultiplier(2)
133	->Range(1, std::thread::hardware_concurrency())
134	->MeasureProcessCPUTime()
135	->UseRealTime()
136	->Unit(benchmark::kMillisecond);
137	BENCHMARK(BM_NaiveSpinLockThrashing)
138	->RangeMultiplier(2)
139	->Range(1, std::thread::hardware_concurrency())
140	->MeasureProcessCPUTime()
141	->UseRealTime()
142	->Unit(benchmark::kMillisecond);
143	BENCHMARK(BM_ThreadYieldSpinLockThrashing)
144	->RangeMultiplier(2)
145	->Range(1, std::thread::hardware_concurrency())
146	->MeasureProcessCPUTime()
147	->UseRealTime()
148	->Unit(benchmark::kMillisecond);
149
150	} // namespace
151
152	BENCHMARK_MAIN();