]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/src/core/stall_detector.hh
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / src / core / stall_detector.hh
CommitLineData
11fdf7f2
TL
1
2/*
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
7 *
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19/*
20 * Copyright (C) 2018 ScyllaDB
21 */
22
23#pragma once
24
25#include <signal.h>
26#include <limits>
27#include <chrono>
28#include <functional>
29#include <seastar/core/posix.hh>
9f95a23c 30#include <seastar/core/metrics_registration.hh>
11fdf7f2
TL
31
32namespace seastar {
33
34class reactor;
35
36namespace internal {
37
38struct cpu_stall_detector_config {
39 std::chrono::duration<double> threshold = std::chrono::seconds(2);
40 unsigned stall_detector_reports_per_minute = 1;
41 float slack = 0.3; // fraction of threshold that we're allowed to overshoot
42 std::function<void ()> report; // alternative reporting function for tests
43};
44
45// Detects stalls in continuations that run for too long
46class cpu_stall_detector {
47 reactor* _r;
48 timer_t _timer;
9f95a23c 49 std::atomic<uint64_t> _last_tasks_processed_seen{};
11fdf7f2
TL
50 unsigned _stall_detector_reports_per_minute;
51 std::atomic<uint64_t> _stall_detector_missed_ticks = { 0 };
52 unsigned _reported = 0;
9f95a23c 53 unsigned _total_reported = 0;
11fdf7f2
TL
54 unsigned _max_reports_per_minute;
55 unsigned _shard_id;
56 unsigned _thread_id;
57 unsigned _report_at{};
58 std::chrono::steady_clock::time_point _minute_mark{};
59 std::chrono::steady_clock::time_point _rearm_timer_at{};
60 std::chrono::steady_clock::time_point _run_started_at{};
61 std::chrono::steady_clock::duration _threshold;
62 std::chrono::steady_clock::duration _slack;
63 cpu_stall_detector_config _config;
9f95a23c 64 seastar::metrics::metric_groups _metrics;
11fdf7f2
TL
65 friend reactor;
66private:
67 void maybe_report();
68 void arm_timer();
69 void report_suppressions(std::chrono::steady_clock::time_point now);
70public:
71 cpu_stall_detector(reactor* r, cpu_stall_detector_config cfg = {});
72 ~cpu_stall_detector();
73 static int signal_number() { return SIGRTMIN + 1; }
74 void start_task_run(std::chrono::steady_clock::time_point now);
75 void end_task_run(std::chrono::steady_clock::time_point now);
76 void generate_trace();
77 void update_config(cpu_stall_detector_config cfg);
78 cpu_stall_detector_config get_config() const;
79 void on_signal();
80 void start_sleep();
81 void end_sleep();
82};
83
84}
85}