.description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner. This can be either a misconfiguration, bug or some combination thereof. In this case total WALL time was over 5 seconds. Which indicates that FRR might be having trouble being scheduled or some system call is delaying",
.suggestion = "Gather log data and open an Issue",
},
+ {
+ .code = EC_LIB_STARVE_THREAD,
+ .title = "The Event subsystem has detected a thread starvation issue",
+ .description = "The event subsystem has detected a thread starvation issue. This typically indicates that the system FRR is running on is heavily loaded and this load might be impacting FRR's ability to handle events in a timely fashion",
+ .suggestion = "Gather log data and open an Issue",
+ },
{
.code = EC_LIB_NO_THREAD,
.title = "The Event subsystem has detected an internal FD problem",
static unsigned int thread_process_timers(struct thread_master *m,
struct timeval *timenow)
{
+ struct timeval prev = *timenow;
+ bool displayed = false;
struct thread *thread;
unsigned int ready = 0;
while ((thread = thread_timer_list_first(&m->timer))) {
if (timercmp(timenow, &thread->u.sands, <))
break;
+ prev = thread->u.sands;
+ prev.tv_sec += 4;
+ /*
+ * If the timer would have popped 4 seconds in the
+ * past then we are in a situation where we are
+ * really getting behind on handling of events.
+ * Let's log it and do the right thing with it.
+ */
+ if (timercmp(timenow, &prev, >)) {
+ if (!displayed)
+ flog_warn(
+ EC_LIB_STARVE_THREAD,
+ "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
+ thread);
+ displayed = true;
+ }
+
thread_timer_list_pop(&m->timer);
thread->type = THREAD_READY;
thread_list_add_tail(&m->ready, thread);