]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | // Copyright The OpenTelemetry Authors |
2 | // SPDX-License-Identifier: Apache-2.0 | |
3 | ||
4 | #include "opentelemetry/common/spin_lock_mutex.h" | |
5 | ||
6 | #include <benchmark/benchmark.h> | |
7 | #include <mutex> | |
8 | ||
9 | namespace | |
10 | { | |
11 | using opentelemetry::common::SpinLockMutex; | |
12 | ||
13 | constexpr int TightLoopLocks = 10000; | |
14 | ||
15 | // Runs a thrash-test where we spin up N threads, each of which will | |
16 | // attempt to lock-mutate-unlock a total of `TightLoopLocks` times. | |
17 | // | |
18 | // lock: A lambda denoting how to lock. Accepts a reference to `SpinLockType`. | |
19 | // unlock: A lambda denoting how to unlock. Accepts a reference to `SpinLockType`. | |
20 | template <typename SpinLockType, typename LockF, typename UnlockF> | |
21 | inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock) | |
22 | { | |
23 | auto num_threads = s.range(0); | |
24 | // Value we will increment, fighting over a spinlock. | |
25 | // The contention is meant to be brief, as close to our expected | |
26 | // use cases of "updating pointers" or "pushing an event onto a buffer". | |
27 | std::int64_t value = 0; | |
28 | ||
29 | std::vector<std::thread> threads; | |
30 | threads.reserve(num_threads); | |
31 | ||
32 | // Timing loop | |
33 | for (auto _ : s) | |
34 | { | |
35 | for (auto i = 0; i < num_threads; i++) | |
36 | { | |
37 | threads.emplace_back([&] { | |
38 | // Increment value once each time the lock is acquired. Spin a few times | |
39 | // to ensure maximum thread contention. | |
40 | for (int i = 0; i < TightLoopLocks; i++) | |
41 | { | |
42 | lock(spinlock); | |
43 | value++; | |
44 | unlock(spinlock); | |
45 | } | |
46 | }); | |
47 | } | |
48 | // Join threads | |
49 | for (auto &thread : threads) | |
50 | thread.join(); | |
51 | threads.clear(); | |
52 | } | |
53 | } | |
54 | ||
55 | // Benchmark of full spin-lock implementation. | |
56 | static void BM_SpinLockThrashing(benchmark::State &s) | |
57 | { | |
58 | SpinLockMutex spinlock; | |
59 | SpinThrash( | |
60 | s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); }); | |
61 | } | |
62 | ||
63 | // Naive `while(try_lock()) {}` implementation of lock. | |
64 | static void BM_NaiveSpinLockThrashing(benchmark::State &s) | |
65 | { | |
66 | SpinLockMutex spinlock; | |
67 | SpinThrash( | |
68 | s, spinlock, | |
69 | [](SpinLockMutex &m) { | |
70 | while (!m.try_lock()) | |
71 | { | |
72 | // Left this comment to keep the same format on old and new versions of clang-format | |
73 | } | |
74 | }, | |
75 | [](SpinLockMutex &m) { m.unlock(); }); | |
76 | } | |
77 | ||
78 | // Simple `while(try_lock()) { yield-processor }` | |
79 | static void BM_ProcYieldSpinLockThrashing(benchmark::State &s) | |
80 | { | |
81 | SpinLockMutex spinlock; | |
82 | SpinThrash<SpinLockMutex>( | |
83 | s, spinlock, | |
84 | [](SpinLockMutex &m) { | |
85 | while (!m.try_lock()) | |
86 | { | |
87 | #if defined(_MSC_VER) | |
88 | YieldProcessor(); | |
89 | #elif defined(__i386__) || defined(__x86_64__) | |
90 | # if defined(__clang__) | |
91 | _mm_pause(); | |
92 | # else | |
93 | __builtin_ia32_pause(); | |
94 | # endif | |
95 | #elif defined(__arm__) | |
96 | __asm__ volatile("yield" ::: "memory"); | |
97 | #endif | |
98 | } | |
99 | }, | |
100 | [](SpinLockMutex &m) { m.unlock(); }); | |
101 | } | |
102 | ||
103 | // SpinLock thrashing with thread::yield(). | |
104 | static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s) | |
105 | { | |
106 | std::atomic_flag mutex = ATOMIC_FLAG_INIT; | |
107 | SpinThrash<std::atomic_flag>( | |
108 | s, mutex, | |
109 | [](std::atomic_flag &l) { | |
110 | uint32_t try_count = 0; | |
111 | while (l.test_and_set(std::memory_order_acq_rel)) | |
112 | { | |
113 | ++try_count; | |
114 | if (try_count % 32) | |
115 | { | |
116 | std::this_thread::yield(); | |
117 | } | |
118 | } | |
119 | std::this_thread::yield(); | |
120 | }, | |
121 | [](std::atomic_flag &l) { l.clear(std::memory_order_release); }); | |
122 | } | |
123 | ||
124 | // Run the benchmarks at 2x thread/core and measure the amount of time to thrash around. | |
125 | BENCHMARK(BM_SpinLockThrashing) | |
126 | ->RangeMultiplier(2) | |
127 | ->Range(1, std::thread::hardware_concurrency()) | |
128 | ->MeasureProcessCPUTime() | |
129 | ->UseRealTime() | |
130 | ->Unit(benchmark::kMillisecond); | |
131 | BENCHMARK(BM_ProcYieldSpinLockThrashing) | |
132 | ->RangeMultiplier(2) | |
133 | ->Range(1, std::thread::hardware_concurrency()) | |
134 | ->MeasureProcessCPUTime() | |
135 | ->UseRealTime() | |
136 | ->Unit(benchmark::kMillisecond); | |
137 | BENCHMARK(BM_NaiveSpinLockThrashing) | |
138 | ->RangeMultiplier(2) | |
139 | ->Range(1, std::thread::hardware_concurrency()) | |
140 | ->MeasureProcessCPUTime() | |
141 | ->UseRealTime() | |
142 | ->Unit(benchmark::kMillisecond); | |
143 | BENCHMARK(BM_ThreadYieldSpinLockThrashing) | |
144 | ->RangeMultiplier(2) | |
145 | ->Range(1, std::thread::hardware_concurrency()) | |
146 | ->MeasureProcessCPUTime() | |
147 | ->UseRealTime() | |
148 | ->Unit(benchmark::kMillisecond); | |
149 | ||
150 | } // namespace | |
151 | ||
152 | BENCHMARK_MAIN(); |