]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/include/seastar/core/internal/stall_detector.hh
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / include / seastar / core / internal / stall_detector.hh
1
2 /*
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
7 *
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 /*
20 * Copyright (C) 2018 ScyllaDB
21 */
22
23 #pragma once
24
25 #include <signal.h>
26 #include <atomic>
27 #include <limits>
28 #include <chrono>
29 #include <functional>
30 #include <memory>
31 #include <seastar/core/posix.hh>
32 #include <seastar/core/metrics_registration.hh>
33 #include <seastar/core/scheduling.hh>
34 #include <linux/perf_event.h>
35
36 namespace seastar {
37
38 class reactor;
39 class thread_cputime_clock;
40
41 namespace internal {
42
43 struct cpu_stall_detector_config {
44 std::chrono::duration<double> threshold = std::chrono::seconds(2);
45 unsigned stall_detector_reports_per_minute = 1;
46 float slack = 0.3; // fraction of threshold that we're allowed to overshoot
47 bool oneline = true; // print a simplified backtrace on a single line
48 std::function<void ()> report; // alternative reporting function for tests
49 };
50
51 // Detects stalls in continuations that run for too long
52 class cpu_stall_detector {
53 protected:
54 std::atomic<uint64_t> _last_tasks_processed_seen{};
55 unsigned _stall_detector_reports_per_minute;
56 std::atomic<uint64_t> _stall_detector_missed_ticks = { 0 };
57 unsigned _reported = 0;
58 unsigned _total_reported = 0;
59 unsigned _max_reports_per_minute;
60 unsigned _shard_id;
61 unsigned _thread_id;
62 unsigned _report_at{};
63 sched_clock::time_point _minute_mark{};
64 sched_clock::time_point _rearm_timer_at{};
65 sched_clock::time_point _run_started_at{};
66 sched_clock::duration _threshold;
67 sched_clock::duration _slack;
68 cpu_stall_detector_config _config;
69 seastar::metrics::metric_groups _metrics;
70 friend reactor;
71 virtual bool is_spurious_signal() {
72 return false;
73 }
74 virtual void maybe_report_kernel_trace() {}
75 private:
76 void maybe_report();
77 virtual void arm_timer() = 0;
78 void report_suppressions(sched_clock::time_point now);
79 void reset_suppression_state(sched_clock::time_point now);
80 public:
81 using clock_type = thread_cputime_clock;
82 public:
83 explicit cpu_stall_detector(cpu_stall_detector_config cfg = {});
84 virtual ~cpu_stall_detector() = default;
85 static int signal_number() { return SIGRTMIN + 1; }
86 void start_task_run(sched_clock::time_point now);
87 void end_task_run(sched_clock::time_point now);
88 void generate_trace();
89 void update_config(cpu_stall_detector_config cfg);
90 cpu_stall_detector_config get_config() const;
91 void on_signal();
92 virtual void start_sleep() = 0;
93 void end_sleep();
94 };
95
96 class cpu_stall_detector_posix_timer : public cpu_stall_detector {
97 timer_t _timer;
98 public:
99 explicit cpu_stall_detector_posix_timer(cpu_stall_detector_config cfg = {});
100 virtual ~cpu_stall_detector_posix_timer() override;
101 private:
102 virtual void arm_timer() override;
103 virtual void start_sleep() override;
104 };
105
106 class cpu_stall_detector_linux_perf_event : public cpu_stall_detector {
107 file_desc _fd;
108 bool _enabled = false;
109 uint64_t _current_period = 0;
110 struct ::perf_event_mmap_page* _mmap;
111 char* _data_area;
112 size_t _data_area_mask;
113 // after the detector has been armed (i.e., _enabled is true), this
114 // is the moment at or after which the next signal is expected to occur
115 // and can be used for detecting spurious signals
116 sched_clock::time_point _next_signal_time{};
117 private:
118 class data_area_reader {
119 cpu_stall_detector_linux_perf_event& _p;
120 const char* _data_area;
121 size_t _data_area_mask;
122 uint64_t _head;
123 uint64_t _tail;
124 public:
125 explicit data_area_reader(cpu_stall_detector_linux_perf_event& p)
126 : _p(p)
127 , _data_area(p._data_area)
128 , _data_area_mask(p._data_area_mask) {
129 _head = _p._mmap->data_head;
130 _tail = _p._mmap->data_tail;
131 std::atomic_thread_fence(std::memory_order_acquire); // required after reading data_head
132 }
133 ~data_area_reader() {
134 std::atomic_thread_fence(std::memory_order_release); // not documented, but probably required before writing data_tail
135 _p._mmap->data_tail = _tail;
136 }
137 uint64_t read_u64() {
138 uint64_t ret;
139 // We cannot wrap around if the 8-byte unit is aligned
140 std::copy_n(_data_area + (_tail & _data_area_mask), 8, reinterpret_cast<char*>(&ret));
141 _tail += 8;
142 return ret;
143 }
144 template <typename S>
145 S read_struct() {
146 static_assert(sizeof(S) % 8 == 0);
147 S ret;
148 char* p = reinterpret_cast<char*>(&ret);
149 for (size_t i = 0; i != sizeof(S); i += 8) {
150 uint64_t w = read_u64();
151 std::copy_n(reinterpret_cast<const char*>(&w), 8, p + i);
152 }
153 return ret;
154 }
155 void skip(uint64_t bytes_to_skip) {
156 _tail += bytes_to_skip;
157 }
158 // skip all the remaining data in the buffer, as-if calling read until
159 // have_data returns false (but much faster)
160 void skip_all() {
161 _tail = _head;
162 }
163 bool have_data() const {
164 return _head != _tail;
165 }
166 };
167 public:
168 static std::unique_ptr<cpu_stall_detector_linux_perf_event> try_make(cpu_stall_detector_config cfg = {});
169 explicit cpu_stall_detector_linux_perf_event(file_desc fd, cpu_stall_detector_config cfg = {});
170 ~cpu_stall_detector_linux_perf_event();
171 virtual void arm_timer() override;
172 virtual void start_sleep() override;
173 virtual bool is_spurious_signal() override;
174 virtual void maybe_report_kernel_trace() override;
175 };
176
177 std::unique_ptr<cpu_stall_detector> make_cpu_stall_detector(cpu_stall_detector_config cfg = {});
178
179 }
180 }