]> git.proxmox.com Git - mirror_corosync.git/commitdiff
Detect big scheduling pauses
authorJan Friesse <jfriesse@redhat.com>
Mon, 8 Apr 2013 07:57:25 +0000 (09:57 +0200)
committerJan Friesse <jfriesse@redhat.com>
Mon, 8 Apr 2013 07:58:42 +0000 (09:58 +0200)
Add poll timer scheduler to be called 3 times per token timeout.
If poll timer was not called for more then 0.8 * token timeout, it means
corosync process was not scheduled and ether token_timeout should be
increased or load should be reduced (useful for VM, where host is
overcommitted so VM is not scheduled as expected).

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
exec/main.c

index 8f3e30d78e50a252b8716f24fe037f0c607aa868..5a9ae975d2025f1e5cc7ebcc36933adb29edd41b 100644 (file)
@@ -786,6 +786,51 @@ void message_source_set (
        source->conn = conn;
 }
 
+struct scheduler_pause_timeout_data {
+       struct totem_config *totem_config;
+       qb_loop_timer_handle handle;
+       unsigned long long tv_prev;
+       unsigned long long max_tv_diff;
+};
+
+static void timer_function_scheduler_timeout (void *data)
+{
+       struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
+       unsigned long long tv_current;
+       unsigned long long tv_diff;
+
+       tv_current = qb_util_nano_current_get ();
+
+       if (timeout_data->tv_prev == 0) {
+               /*
+                * Initial call -> just pretent everything is ok
+                */
+               timeout_data->tv_prev = tv_current;
+               timeout_data->max_tv_diff = 0;
+       }
+
+       tv_diff = tv_current - timeout_data->tv_prev;
+       timeout_data->tv_prev = tv_current;
+
+       if (tv_diff > timeout_data->max_tv_diff) {
+               log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
+                   "(threshold is %0.4f ms). Consider token timeout increase.",
+                   (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
+       }
+
+       /*
+        * Set next threshold, because token_timeout can change
+        */
+       timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
+       qb_loop_timer_add (corosync_poll_handle,
+               QB_LOOP_MED,
+               timeout_data->totem_config->token_timeout / 3,
+               timeout_data,
+               timer_function_scheduler_timeout,
+               &timeout_data->handle);
+}
+
+
 static void corosync_setscheduler (void)
 {
 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
@@ -1038,6 +1083,7 @@ int main (int argc, char **argv, char **envp)
        char corosync_lib_dir[PATH_MAX];
        enum e_corosync_done flock_err;
        uint64_t totem_config_warnings;
+       struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
 
        /* default configuration
         */
@@ -1194,6 +1240,10 @@ int main (int argc, char **argv, char **envp)
 
        corosync_poll_handle = qb_loop_create ();
 
+       memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
+       scheduler_pause_timeout_data.totem_config = &totem_config;
+       timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
+
        qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
                SIGUSR2, NULL, sig_diag_handler, NULL);
        qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,