(strcmp(path, "totem.token") == 0) ||
(strcmp(path, "totem.token_coefficient") == 0) ||
(strcmp(path, "totem.token_retransmit") == 0) ||
+ (strcmp(path, "totem.token_warning") == 0) ||
(strcmp(path, "totem.hold") == 0) ||
(strcmp(path, "totem.token_retransmits_before_loss_const") == 0) ||
(strcmp(path, "totem.join") == 0) ||
stats->srp->avg_backlog_calc = (total_backlog_calc / token_count);
}
+ stats->srp->time_since_token_last_received = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
+ stats->srp->token[stats->srp->latest_token].rx;
+
stats_trigger_trackers();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
{ STAT_SRP, "recovery_token_lost", offsetof(totemsrp_stats_t, recovery_token_lost), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "consensus_timeouts", offsetof(totemsrp_stats_t, consensus_timeouts), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "rx_msg_dropped", offsetof(totemsrp_stats_t, rx_msg_dropped), ICMAP_VALUETYPE_UINT64},
+ { STAT_SRP, "time_since_token_last_received", offsetof(totemsrp_stats_t, time_since_token_last_received), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "continuous_gather", offsetof(totemsrp_stats_t, continuous_gather), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "continuous_sendmsg_failures", offsetof(totemsrp_stats_t, continuous_sendmsg_failures), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "firewall_enabled_or_nic_failure", offsetof(totemsrp_stats_t, firewall_enabled_or_nic_failure), ICMAP_VALUETYPE_UINT8},
#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4
#define TOKEN_TIMEOUT 1000
+#define TOKEN_WARNING 75
#define TOKEN_COEFFICIENT 650
#define JOIN_TIMEOUT 50
#define MERGE_TIMEOUT 200
{
if (strcmp(param_name, "totem.token") == 0)
return &totem_config->token_timeout;
+ if (strcmp(param_name, "totem.token_warning") == 0)
+ return &totem_config->token_warning;
if (strcmp(param_name, "totem.token_retransmit") == 0)
return &totem_config->token_retransmit_timeout;
if (strcmp(param_name, "totem.hold") == 0)
totem_volatile_config_set_uint32_value(totem_config, "totem.token", deleted_key, TOKEN_TIMEOUT, 0);
+ totem_volatile_config_set_uint32_value(totem_config, "totem.token_warning", deleted_key, TOKEN_WARNING, 1);
+
if (totem_config->interfaces[0].member_count > 2) {
u32 = TOKEN_COEFFICIENT;
icmap_get_uint32("totem.token_coefficient", &u32);
goto parse_error;
}
+ if (totem_config->token_warning > 100 || totem_config->token_warning < 0) {
+ snprintf (local_error_reason, sizeof(local_error_reason),
+ "The token warning parameter (%d%%) must be between 0 (disabled) and 100.",
+ totem_config->token_warning);
+ goto parse_error;
+ }
+
if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token retransmit timeout parameter (%d ms) may not be less than (%d ms).",
log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
+ if (totem_config->token_warning) {
+ uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100;
+ log_printf(LOGSYS_LEVEL_DEBUG, "Token warning every %d ms (%d%% of Token Timeout)",
+ token_warning_ms, totem_config->token_warning);
+ if (token_warning_ms < totem_config->token_retransmit_timeout)
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) "
+ "which can lead to spurious token warnings. Consider increasing the token_warning parameter.",
+ token_warning_ms, totem_config->token_retransmit_timeout);
+
+ } else
+ log_printf(LOGSYS_LEVEL_DEBUG, "Token warnings disabled");
log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
qb_loop_timer_handle timer_orf_token_timeout;
+ qb_loop_timer_handle timer_orf_token_warning;
+
qb_loop_timer_handle timer_orf_token_retransmit_timeout;
qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout;
struct memb_merge_detect *out);
static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in);
static void timer_function_orf_token_timeout (void *data);
+static void timer_function_orf_token_warning (void *data);
static void timer_function_pause_timeout (void *data);
static void timer_function_heartbeat_timeout (void *data);
static void timer_function_token_retransmit_timeout (void *data);
log_printf (instance->totemsrp_log_level_debug,
"Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
+ if (totem_config->token_warning) {
+ uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100;
+ log_printf(instance->totemsrp_log_level_debug,
+ "Token warning every %d ms (%d%% of Token Timeout)",
+ token_warning_ms, totem_config->token_warning);
+ if (token_warning_ms < totem_config->token_retransmit_timeout)
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) "
+ "which can lead to spurious token warnings. Consider increasing the token_warning parameter.",
+ token_warning_ms, totem_config->token_retransmit_timeout);
+ } else {
+ log_printf(instance->totemsrp_log_level_debug,
+ "Token warnings disabled");
+ }
log_printf (instance->totemsrp_log_level_debug,
"token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
}
}
+static void reset_token_warning (struct totemsrp_instance *instance) {
+ int32_t res;
+
+ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
+ res = qb_loop_timer_add (instance->totemsrp_poll_handle,
+ QB_LOOP_MED,
+ instance->totem_config->token_warning * instance->totem_config->token_timeout / 100 * QB_TIME_NS_IN_MSEC,
+ (void *)instance,
+ timer_function_orf_token_warning,
+ &instance->timer_orf_token_warning);
+ if (res != 0) {
+ log_printf(instance->totemsrp_log_level_error, "reset_token_warning - qb_loop_timer_add error : %d", res);
+ }
+}
+
static void reset_token_timeout (struct totemsrp_instance *instance) {
int32_t res;
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res);
}
+
+ if (instance->totem_config->token_warning)
+ reset_token_warning(instance);
}
static void reset_heartbeat_timeout (struct totemsrp_instance *instance) {
}
+static void cancel_token_warning (struct totemsrp_instance *instance) {
+ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
+}
+
static void cancel_token_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
+
+ if (instance->totem_config->token_warning)
+ cancel_token_warning(instance);
}
static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) {
instance->stats.recovery_token_lost++;
}
+static void timer_function_orf_token_warning (void *data)
+{
+ struct totemsrp_instance *instance = data;
+ uint64_t tv_diff;
+
+ /* need to protect against the case where token_warning is set to 0 dynamically */
+ if (instance->totem_config->token_warning) {
+ tv_diff = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
+ instance->stats.token[instance->stats.latest_token].rx;
+ log_printf (instance->totemsrp_log_level_notice,
+ "Token has not been received in %d ms ", (unsigned int) tv_diff);
+ reset_token_warning(instance);
+ } else {
+ cancel_token_warning(instance);
+ }
+}
+
static void timer_function_orf_token_timeout (void *data)
{
struct totemsrp_instance *instance = data;
*/
unsigned int token_timeout;
+ unsigned int token_warning;
+
unsigned int token_retransmit_timeout;
unsigned int token_hold_timeout;
uint64_t rx_msg_dropped;
uint32_t continuous_gather;
uint32_t continuous_sendmsg_failures;
+ uint64_t time_since_token_last_received; // relative time
uint8_t firewall_enabled_or_nic_failure;
uint32_t mtt_rx_token;
The default is 1000 milliseconds.
+.TP
+token_warning
+Specifies the interval between warnings that the token has not been received. The
+value is a percentage of the token timeout and can be set to 0 to disable
+warnings.
+
+The default is 75%.
+
.TP
token_coefficient
This value is used only when