]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | |
2 | /* | |
3 | * This file is open source software, licensed to you under the terms | |
4 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
5 | * distributed with this work for additional information regarding copyright | |
6 | * ownership. You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You may obtain a copy of the License at | |
9 | * | |
10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
11 | * | |
12 | * Unless required by applicable law or agreed to in writing, | |
13 | * software distributed under the License is distributed on an | |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
15 | * KIND, either express or implied. See the License for the | |
16 | * specific language governing permissions and limitations | |
17 | * under the License. | |
18 | */ | |
19 | /* | |
20 | * Copyright (C) 2018 ScyllaDB | |
21 | */ | |
22 | ||
23 | #pragma once | |
24 | ||
25 | #include <signal.h> | |
26 | #include <limits> | |
27 | #include <chrono> | |
28 | #include <functional> | |
29 | #include <seastar/core/posix.hh> | |
9f95a23c | 30 | #include <seastar/core/metrics_registration.hh> |
11fdf7f2 TL |
31 | |
32 | namespace seastar { | |
33 | ||
34 | class reactor; | |
35 | ||
36 | namespace internal { | |
37 | ||
38 | struct cpu_stall_detector_config { | |
39 | std::chrono::duration<double> threshold = std::chrono::seconds(2); | |
40 | unsigned stall_detector_reports_per_minute = 1; | |
41 | float slack = 0.3; // fraction of threshold that we're allowed to overshoot | |
42 | std::function<void ()> report; // alternative reporting function for tests | |
43 | }; | |
44 | ||
45 | // Detects stalls in continuations that run for too long | |
46 | class cpu_stall_detector { | |
47 | reactor* _r; | |
48 | timer_t _timer; | |
9f95a23c | 49 | std::atomic<uint64_t> _last_tasks_processed_seen{}; |
11fdf7f2 TL |
50 | unsigned _stall_detector_reports_per_minute; |
51 | std::atomic<uint64_t> _stall_detector_missed_ticks = { 0 }; | |
52 | unsigned _reported = 0; | |
9f95a23c | 53 | unsigned _total_reported = 0; |
11fdf7f2 TL |
54 | unsigned _max_reports_per_minute; |
55 | unsigned _shard_id; | |
56 | unsigned _thread_id; | |
57 | unsigned _report_at{}; | |
58 | std::chrono::steady_clock::time_point _minute_mark{}; | |
59 | std::chrono::steady_clock::time_point _rearm_timer_at{}; | |
60 | std::chrono::steady_clock::time_point _run_started_at{}; | |
61 | std::chrono::steady_clock::duration _threshold; | |
62 | std::chrono::steady_clock::duration _slack; | |
63 | cpu_stall_detector_config _config; | |
9f95a23c | 64 | seastar::metrics::metric_groups _metrics; |
11fdf7f2 TL |
65 | friend reactor; |
66 | private: | |
67 | void maybe_report(); | |
68 | void arm_timer(); | |
69 | void report_suppressions(std::chrono::steady_clock::time_point now); | |
70 | public: | |
71 | cpu_stall_detector(reactor* r, cpu_stall_detector_config cfg = {}); | |
72 | ~cpu_stall_detector(); | |
73 | static int signal_number() { return SIGRTMIN + 1; } | |
74 | void start_task_run(std::chrono::steady_clock::time_point now); | |
75 | void end_task_run(std::chrono::steady_clock::time_point now); | |
76 | void generate_trace(); | |
77 | void update_config(cpu_stall_detector_config cfg); | |
78 | cpu_stall_detector_config get_config() const; | |
79 | void on_signal(); | |
80 | void start_sleep(); | |
81 | void end_sleep(); | |
82 | }; | |
83 | ||
84 | } | |
85 | } |