1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
9 #include <sys/resource.h>
20 #include "frratomic.h"
21 #include "frr_pthread.h"
22 #include "lib_errors.h"
23 #include "libfrr_trace.h"
26 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
27 DEFINE_MTYPE_STATIC(LIB
, EVENT_MASTER
, "Thread master");
28 DEFINE_MTYPE_STATIC(LIB
, EVENT_POLL
, "Thread Poll Info");
29 DEFINE_MTYPE_STATIC(LIB
, EVENT_STATS
, "Thread stats");
31 DECLARE_LIST(event_list
, struct event
, eventitem
);
37 struct event
**threadref
;
40 /* Flags for task cancellation */
41 #define EVENT_CANCEL_FLAG_READY 0x01
43 static int event_timer_cmp(const struct event
*a
, const struct event
*b
)
45 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
47 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
49 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
51 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
56 DECLARE_HEAP(event_timer_list
, struct event
, timeritem
, event_timer_cmp
);
58 #if defined(__APPLE__)
59 #include <mach/mach.h>
60 #include <mach/mach_time.h>
65 const unsigned char wakebyte = 0x01; \
66 write(m->io_pipe[1], &wakebyte, 1); \
69 /* control variable for initializer */
70 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
71 pthread_key_t thread_current
;
73 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
74 static struct list
*masters
;
76 static void thread_free(struct event_loop
*master
, struct event
*thread
);
78 #ifndef EXCLUDE_CPU_TIME
79 #define EXCLUDE_CPU_TIME 0
81 #ifndef CONSUMED_TIME_CHECK
82 #define CONSUMED_TIME_CHECK 0
85 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
86 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
87 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
89 /* CLI start ---------------------------------------------------------------- */
90 #include "lib/event_clippy.c"
92 static unsigned int cpu_record_hash_key(const struct cpu_event_history
*a
)
94 int size
= sizeof(a
->func
);
96 return jhash(&a
->func
, size
, 0);
99 static bool cpu_record_hash_cmp(const struct cpu_event_history
*a
,
100 const struct cpu_event_history
*b
)
102 return a
->func
== b
->func
;
105 static void *cpu_record_hash_alloc(struct cpu_event_history
*a
)
107 struct cpu_event_history
*new;
109 new = XCALLOC(MTYPE_EVENT_STATS
, sizeof(struct cpu_event_history
));
111 new->funcname
= a
->funcname
;
115 static void cpu_record_hash_free(void *a
)
117 struct cpu_event_history
*hist
= a
;
119 XFREE(MTYPE_EVENT_STATS
, hist
);
122 static void vty_out_cpu_event_history(struct vty
*vty
,
123 struct cpu_event_history
*a
)
126 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
127 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
128 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
129 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
130 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
131 vty_out(vty
, " %c%c%c%c%c %s\n",
132 a
->types
& (1 << EVENT_READ
) ? 'R' : ' ',
133 a
->types
& (1 << EVENT_WRITE
) ? 'W' : ' ',
134 a
->types
& (1 << EVENT_TIMER
) ? 'T' : ' ',
135 a
->types
& (1 << EVENT_EVENT
) ? 'E' : ' ',
136 a
->types
& (1 << EVENT_EXECUTE
) ? 'X' : ' ', a
->funcname
);
139 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
141 struct cpu_event_history
*totals
= args
[0];
142 struct cpu_event_history copy
;
143 struct vty
*vty
= args
[1];
144 uint8_t *filter
= args
[2];
146 struct cpu_event_history
*a
= bucket
->data
;
149 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
151 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
152 copy
.total_cpu_warn
=
153 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
154 copy
.total_wall_warn
=
155 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
156 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
157 memory_order_seq_cst
);
159 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
160 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
162 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
164 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
165 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
166 copy
.funcname
= a
->funcname
;
168 if (!(copy
.types
& *filter
))
171 vty_out_cpu_event_history(vty
, ©
);
172 totals
->total_active
+= copy
.total_active
;
173 totals
->total_calls
+= copy
.total_calls
;
174 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
175 totals
->total_wall_warn
+= copy
.total_wall_warn
;
176 totals
->total_starv_warn
+= copy
.total_starv_warn
;
177 totals
->real
.total
+= copy
.real
.total
;
178 if (totals
->real
.max
< copy
.real
.max
)
179 totals
->real
.max
= copy
.real
.max
;
180 totals
->cpu
.total
+= copy
.cpu
.total
;
181 if (totals
->cpu
.max
< copy
.cpu
.max
)
182 totals
->cpu
.max
= copy
.cpu
.max
;
185 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
187 struct cpu_event_history tmp
;
188 void *args
[3] = {&tmp
, vty
, &filter
};
189 struct event_loop
*m
;
192 if (!cputime_enabled
)
195 "Collecting CPU time statistics is currently disabled. Following statistics\n"
196 "will be zero or may display data from when collection was enabled. Use the\n"
197 " \"service cputime-stats\" command to start collecting data.\n"
198 "\nCounters and wallclock times are always maintained and should be accurate.\n");
200 memset(&tmp
, 0, sizeof(tmp
));
201 tmp
.funcname
= "TOTAL";
204 frr_with_mutex (&masters_mtx
) {
205 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
206 const char *name
= m
->name
? m
->name
: "main";
207 char underline
[strlen(name
) + 1];
209 memset(underline
, '-', sizeof(underline
));
210 underline
[sizeof(underline
) - 1] = '\0';
213 vty_out(vty
, "Showing statistics for pthread %s\n",
215 vty_out(vty
, "-------------------------------%s\n",
217 vty_out(vty
, "%30s %18s %18s\n", "",
218 "CPU (user+system):", "Real (wall-clock):");
220 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
221 vty_out(vty
, " Avg uSec Max uSecs");
223 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
225 if (m
->cpu_record
->count
)
228 (void (*)(struct hash_bucket
*,
229 void *))cpu_record_hash_print
,
232 vty_out(vty
, "No data to display yet.\n");
239 vty_out(vty
, "Total thread statistics\n");
240 vty_out(vty
, "-------------------------\n");
241 vty_out(vty
, "%30s %18s %18s\n", "",
242 "CPU (user+system):", "Real (wall-clock):");
243 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
244 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
245 vty_out(vty
, " Type Thread\n");
247 if (tmp
.total_calls
> 0)
248 vty_out_cpu_event_history(vty
, &tmp
);
251 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
253 uint8_t *filter
= args
[0];
254 struct hash
*cpu_record
= args
[1];
256 struct cpu_event_history
*a
= bucket
->data
;
258 if (!(a
->types
& *filter
))
261 hash_release(cpu_record
, bucket
->data
);
264 static void cpu_record_clear(uint8_t filter
)
266 uint8_t *tmp
= &filter
;
267 struct event_loop
*m
;
270 frr_with_mutex (&masters_mtx
) {
271 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
272 frr_with_mutex (&m
->mtx
) {
273 void *args
[2] = {tmp
, m
->cpu_record
};
277 (void (*)(struct hash_bucket
*,
278 void *))cpu_record_hash_clear
,
285 static uint8_t parse_filter(const char *filterstr
)
290 while (filterstr
[i
] != '\0') {
291 switch (filterstr
[i
]) {
294 filter
|= (1 << EVENT_READ
);
298 filter
|= (1 << EVENT_WRITE
);
302 filter
|= (1 << EVENT_TIMER
);
306 filter
|= (1 << EVENT_EVENT
);
310 filter
|= (1 << EVENT_EXECUTE
);
320 DEFUN_NOSH (show_thread_cpu
,
322 "show thread cpu [FILTER]",
324 "Thread information\n"
326 "Display filter (rwtex)\n")
328 uint8_t filter
= (uint8_t)-1U;
331 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
332 filter
= parse_filter(argv
[idx
]->arg
);
335 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
341 cpu_record_print(vty
, filter
);
345 DEFPY (service_cputime_stats
,
346 service_cputime_stats_cmd
,
347 "[no] service cputime-stats",
349 "Set up miscellaneous service\n"
350 "Collect CPU usage statistics\n")
352 cputime_enabled
= !no
;
356 DEFPY (service_cputime_warning
,
357 service_cputime_warning_cmd
,
358 "[no] service cputime-warning (1-4294967295)",
360 "Set up miscellaneous service\n"
361 "Warn for tasks exceeding CPU usage threshold\n"
362 "Warning threshold in milliseconds\n")
365 cputime_threshold
= 0;
367 cputime_threshold
= cputime_warning
* 1000;
371 ALIAS (service_cputime_warning
,
372 no_service_cputime_warning_cmd
,
373 "no service cputime-warning",
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n")
378 DEFPY (service_walltime_warning
,
379 service_walltime_warning_cmd
,
380 "[no] service walltime-warning (1-4294967295)",
382 "Set up miscellaneous service\n"
383 "Warn for tasks exceeding total wallclock threshold\n"
384 "Warning threshold in milliseconds\n")
387 walltime_threshold
= 0;
389 walltime_threshold
= walltime_warning
* 1000;
393 ALIAS (service_walltime_warning
,
394 no_service_walltime_warning_cmd
,
395 "no service walltime-warning",
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n")
400 static void show_thread_poll_helper(struct vty
*vty
, struct event_loop
*m
)
402 const char *name
= m
->name
? m
->name
: "main";
403 char underline
[strlen(name
) + 1];
404 struct event
*thread
;
407 memset(underline
, '-', sizeof(underline
));
408 underline
[sizeof(underline
) - 1] = '\0';
410 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
411 vty_out(vty
, "----------------------%s\n", underline
);
412 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
414 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
415 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
416 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
417 m
->handler
.pfds
[i
].revents
);
419 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
420 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
423 vty_out(vty
, "ERROR ");
425 vty_out(vty
, "%s ", thread
->xref
->funcname
);
429 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
430 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
433 vty_out(vty
, "ERROR\n");
435 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
441 DEFUN_NOSH (show_thread_poll
,
442 show_thread_poll_cmd
,
445 "Thread information\n"
446 "Show poll FD's and information\n")
448 struct listnode
*node
;
449 struct event_loop
*m
;
451 frr_with_mutex (&masters_mtx
) {
452 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
453 show_thread_poll_helper(vty
, m
);
460 DEFUN (clear_thread_cpu
,
461 clear_thread_cpu_cmd
,
462 "clear thread cpu [FILTER]",
463 "Clear stored data in all pthreads\n"
464 "Thread information\n"
466 "Display filter (rwtexb)\n")
468 uint8_t filter
= (uint8_t)-1U;
471 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
472 filter
= parse_filter(argv
[idx
]->arg
);
475 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
481 cpu_record_clear(filter
);
485 static void show_thread_timers_helper(struct vty
*vty
, struct event_loop
*m
)
487 const char *name
= m
->name
? m
->name
: "main";
488 char underline
[strlen(name
) + 1];
489 struct event
*thread
;
491 memset(underline
, '-', sizeof(underline
));
492 underline
[sizeof(underline
) - 1] = '\0';
494 vty_out(vty
, "\nShowing timers for %s\n", name
);
495 vty_out(vty
, "-------------------%s\n", underline
);
497 frr_each (event_timer_list
, &m
->timer
, thread
) {
498 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
502 DEFPY_NOSH (show_thread_timers
,
503 show_thread_timers_cmd
,
504 "show thread timers",
506 "Thread information\n"
507 "Show all timers and how long they have in the system\n")
509 struct listnode
*node
;
510 struct event_loop
*m
;
512 frr_with_mutex (&masters_mtx
) {
513 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
514 show_thread_timers_helper(vty
, m
);
520 void event_cmd_init(void)
522 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
523 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
524 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
526 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
527 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
528 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
529 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
530 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
532 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
534 /* CLI end ------------------------------------------------------------------ */
537 static void cancelreq_del(void *cr
)
539 XFREE(MTYPE_TMP
, cr
);
542 /* initializer, only ever called once */
543 static void initializer(void)
545 pthread_key_create(&thread_current
, NULL
);
548 struct event_loop
*event_master_create(const char *name
)
550 struct event_loop
*rv
;
553 pthread_once(&init_once
, &initializer
);
555 rv
= XCALLOC(MTYPE_EVENT_MASTER
, sizeof(struct event_loop
));
557 /* Initialize master mutex */
558 pthread_mutex_init(&rv
->mtx
, NULL
);
559 pthread_cond_init(&rv
->cancel_cond
, NULL
);
562 name
= name
? name
: "default";
563 rv
->name
= XSTRDUP(MTYPE_EVENT_MASTER
, name
);
565 /* Initialize I/O task data structures */
567 /* Use configured limit if present, ulimit otherwise. */
568 rv
->fd_limit
= frr_get_fd_limit();
569 if (rv
->fd_limit
== 0) {
570 getrlimit(RLIMIT_NOFILE
, &limit
);
571 rv
->fd_limit
= (int)limit
.rlim_cur
;
574 rv
->read
= XCALLOC(MTYPE_EVENT_POLL
,
575 sizeof(struct event
*) * rv
->fd_limit
);
577 rv
->write
= XCALLOC(MTYPE_EVENT_POLL
,
578 sizeof(struct event
*) * rv
->fd_limit
);
580 char tmhashname
[strlen(name
) + 32];
582 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
584 rv
->cpu_record
= hash_create_size(
585 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
586 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
589 event_list_init(&rv
->event
);
590 event_list_init(&rv
->ready
);
591 event_list_init(&rv
->unuse
);
592 event_timer_list_init(&rv
->timer
);
594 /* Initialize event_fetch() settings */
596 rv
->handle_signals
= true;
598 /* Set pthread owner, should be updated by actual owner */
599 rv
->owner
= pthread_self();
600 rv
->cancel_req
= list_new();
601 rv
->cancel_req
->del
= cancelreq_del
;
604 /* Initialize pipe poker */
606 set_nonblocking(rv
->io_pipe
[0]);
607 set_nonblocking(rv
->io_pipe
[1]);
609 /* Initialize data structures for poll() */
610 rv
->handler
.pfdsize
= rv
->fd_limit
;
611 rv
->handler
.pfdcount
= 0;
612 rv
->handler
.pfds
= XCALLOC(MTYPE_EVENT_MASTER
,
613 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
614 rv
->handler
.copy
= XCALLOC(MTYPE_EVENT_MASTER
,
615 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
617 /* add to list of threadmasters */
618 frr_with_mutex (&masters_mtx
) {
620 masters
= list_new();
622 listnode_add(masters
, rv
);
628 void event_master_set_name(struct event_loop
*master
, const char *name
)
630 frr_with_mutex (&master
->mtx
) {
631 XFREE(MTYPE_EVENT_MASTER
, master
->name
);
632 master
->name
= XSTRDUP(MTYPE_EVENT_MASTER
, name
);
636 #define EVENT_UNUSED_DEPTH 10
638 /* Move thread to unuse list. */
639 static void thread_add_unuse(struct event_loop
*m
, struct event
*thread
)
641 pthread_mutex_t mtxc
= thread
->mtx
;
643 assert(m
!= NULL
&& thread
!= NULL
);
645 thread
->hist
->total_active
--;
646 memset(thread
, 0, sizeof(struct event
));
647 thread
->type
= EVENT_UNUSED
;
649 /* Restore the thread mutex context. */
652 if (event_list_count(&m
->unuse
) < EVENT_UNUSED_DEPTH
) {
653 event_list_add_tail(&m
->unuse
, thread
);
657 thread_free(m
, thread
);
660 /* Free all unused thread. */
661 static void thread_list_free(struct event_loop
*m
, struct event_list_head
*list
)
665 while ((t
= event_list_pop(list
)))
669 static void thread_array_free(struct event_loop
*m
, struct event
**thread_array
)
674 for (index
= 0; index
< m
->fd_limit
; ++index
) {
675 t
= thread_array
[index
];
677 thread_array
[index
] = NULL
;
681 XFREE(MTYPE_EVENT_POLL
, thread_array
);
685 * event_master_free_unused
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
692 void event_master_free_unused(struct event_loop
*m
)
694 frr_with_mutex (&m
->mtx
) {
697 while ((t
= event_list_pop(&m
->unuse
)))
702 /* Stop thread scheduler. */
703 void event_master_free(struct event_loop
*m
)
707 frr_with_mutex (&masters_mtx
) {
708 listnode_delete(masters
, m
);
709 if (masters
->count
== 0)
710 list_delete(&masters
);
713 thread_array_free(m
, m
->read
);
714 thread_array_free(m
, m
->write
);
715 while ((t
= event_timer_list_pop(&m
->timer
)))
717 thread_list_free(m
, &m
->event
);
718 thread_list_free(m
, &m
->ready
);
719 thread_list_free(m
, &m
->unuse
);
720 pthread_mutex_destroy(&m
->mtx
);
721 pthread_cond_destroy(&m
->cancel_cond
);
722 close(m
->io_pipe
[0]);
723 close(m
->io_pipe
[1]);
724 list_delete(&m
->cancel_req
);
725 m
->cancel_req
= NULL
;
727 hash_clean_and_free(&m
->cpu_record
, cpu_record_hash_free
);
729 XFREE(MTYPE_EVENT_MASTER
, m
->name
);
730 XFREE(MTYPE_EVENT_MASTER
, m
->handler
.pfds
);
731 XFREE(MTYPE_EVENT_MASTER
, m
->handler
.copy
);
732 XFREE(MTYPE_EVENT_MASTER
, m
);
735 /* Return remain time in milliseconds. */
736 unsigned long event_timer_remain_msec(struct event
*thread
)
740 if (!event_is_scheduled(thread
))
743 frr_with_mutex (&thread
->mtx
) {
744 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
747 return remain
< 0 ? 0 : remain
;
750 /* Return remain time in seconds. */
751 unsigned long event_timer_remain_second(struct event
*thread
)
753 return event_timer_remain_msec(thread
) / 1000LL;
756 struct timeval
event_timer_remain(struct event
*thread
)
758 struct timeval remain
;
760 frr_with_mutex (&thread
->mtx
) {
761 monotime_until(&thread
->u
.sands
, &remain
);
766 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
772 assert(buf_size
>= 8);
779 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
784 char *event_timer_to_hhmmss(char *buf
, int buf_size
, struct event
*t_timer
)
787 time_hhmmss(buf
, buf_size
, event_timer_remain_second(t_timer
));
789 snprintf(buf
, buf_size
, "--:--:--");
794 /* Get new thread. */
795 static struct event
*thread_get(struct event_loop
*m
, uint8_t type
,
796 void (*func
)(struct event
*), void *arg
,
797 const struct xref_eventsched
*xref
)
799 struct event
*thread
= event_list_pop(&m
->unuse
);
800 struct cpu_event_history tmp
;
803 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct event
));
804 /* mutex only needs to be initialized at struct creation. */
805 pthread_mutex_init(&thread
->mtx
, NULL
);
810 thread
->add_type
= type
;
813 thread
->yield
= EVENT_YIELD_TIME_SLOT
; /* default */
815 thread
->ignore_timer_late
= false;
818 * So if the passed in funcname is not what we have
819 * stored that means the thread->hist needs to be
820 * updated. We keep the last one around in unused
821 * under the assumption that we are probably
822 * going to immediately allocate the same
824 * This hopefully saves us some serious
827 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
828 || thread
->func
!= func
) {
830 tmp
.funcname
= xref
->funcname
;
832 hash_get(m
->cpu_record
, &tmp
,
833 (void *(*)(void *))cpu_record_hash_alloc
);
835 thread
->hist
->total_active
++;
842 static void thread_free(struct event_loop
*master
, struct event
*thread
)
844 /* Update statistics. */
845 assert(master
->alloc
> 0);
848 /* Free allocated resources. */
849 pthread_mutex_destroy(&thread
->mtx
);
850 XFREE(MTYPE_THREAD
, thread
);
853 static int fd_poll(struct event_loop
*m
, const struct timeval
*timer_wait
,
857 unsigned char trash
[64];
858 nfds_t count
= m
->handler
.copycount
;
861 * If timer_wait is null here, that means poll() should block
862 * indefinitely, unless the event_master has overridden it by setting
863 * ->selectpoll_timeout.
865 * If the value is positive, it specifies the maximum number of
866 * milliseconds to wait. If the timeout is -1, it specifies that
867 * we should never wait and always return immediately even if no
868 * event is detected. If the value is zero, the behavior is default.
872 /* number of file descriptors with events */
875 if (timer_wait
!= NULL
&& m
->selectpoll_timeout
== 0) {
876 /* use the default value */
877 timeout
= (timer_wait
->tv_sec
* 1000)
878 + (timer_wait
->tv_usec
/ 1000);
879 } else if (m
->selectpoll_timeout
> 0) {
880 /* use the user's timeout */
881 timeout
= m
->selectpoll_timeout
;
882 } else if (m
->selectpoll_timeout
< 0) {
883 /* effect a poll (return immediately) */
887 zlog_tls_buffer_flush();
889 rcu_assert_read_unlocked();
891 /* add poll pipe poker */
892 assert(count
+ 1 < m
->handler
.pfdsize
);
893 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
894 m
->handler
.copy
[count
].events
= POLLIN
;
895 m
->handler
.copy
[count
].revents
= 0x00;
897 /* We need to deal with a signal-handling race here: we
898 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
899 * that may arrive just before we enter poll(). We will block the
900 * key signals, then check whether any have arrived - if so, we return
901 * before calling poll(). If not, we'll re-enable the signals
902 * in the ppoll() call.
905 sigemptyset(&origsigs
);
906 if (m
->handle_signals
) {
907 /* Main pthread that handles the app signals */
908 if (frr_sigevent_check(&origsigs
)) {
909 /* Signal to process - restore signal mask and return */
910 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
916 /* Don't make any changes for the non-main pthreads */
917 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
920 #if defined(HAVE_PPOLL)
921 struct timespec ts
, *tsp
;
924 ts
.tv_sec
= timeout
/ 1000;
925 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
930 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
931 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
933 /* Not ideal - there is a race after we restore the signal mask */
934 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
935 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
940 if (num
< 0 && errno
== EINTR
)
943 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
944 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
952 /* Add new read thread. */
953 void _event_add_read_write(const struct xref_eventsched
*xref
,
954 struct event_loop
*m
, void (*func
)(struct event
*),
955 void *arg
, int fd
, struct event
**t_ptr
)
957 int dir
= xref
->event_type
;
958 struct event
*thread
= NULL
;
959 struct event
**thread_array
;
961 if (dir
== EVENT_READ
)
962 frrtrace(9, frr_libfrr
, schedule_read
, m
,
963 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
964 t_ptr
, fd
, 0, arg
, 0);
966 frrtrace(9, frr_libfrr
, schedule_write
, m
,
967 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
968 t_ptr
, fd
, 0, arg
, 0);
971 if (fd
>= m
->fd_limit
)
972 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
974 frr_with_mutex (&m
->mtx
) {
975 /* Thread is already scheduled; don't reschedule */
979 /* default to a new pollfd */
980 nfds_t queuepos
= m
->handler
.pfdcount
;
982 if (dir
== EVENT_READ
)
983 thread_array
= m
->read
;
985 thread_array
= m
->write
;
988 * if we already have a pollfd for our file descriptor, find and
991 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
992 if (m
->handler
.pfds
[i
].fd
== fd
) {
997 * What happens if we have a thread already
998 * created for this event?
1000 if (thread_array
[fd
])
1001 assert(!"Thread already scheduled for file descriptor");
1006 /* make sure we have room for this fd + pipe poker fd */
1007 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1009 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1011 m
->handler
.pfds
[queuepos
].fd
= fd
;
1012 m
->handler
.pfds
[queuepos
].events
|=
1013 (dir
== EVENT_READ
? POLLIN
: POLLOUT
);
1015 if (queuepos
== m
->handler
.pfdcount
)
1016 m
->handler
.pfdcount
++;
1019 frr_with_mutex (&thread
->mtx
) {
1021 thread_array
[thread
->u
.fd
] = thread
;
1026 thread
->ref
= t_ptr
;
1034 static void _event_add_timer_timeval(const struct xref_eventsched
*xref
,
1035 struct event_loop
*m
,
1036 void (*func
)(struct event
*), void *arg
,
1037 struct timeval
*time_relative
,
1038 struct event
**t_ptr
)
1040 struct event
*thread
;
1045 assert(time_relative
);
1047 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1048 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1049 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1051 /* Compute expiration/deadline time. */
1053 timeradd(&t
, time_relative
, &t
);
1055 frr_with_mutex (&m
->mtx
) {
1056 if (t_ptr
&& *t_ptr
)
1057 /* thread is already scheduled; don't reschedule */
1060 thread
= thread_get(m
, EVENT_TIMER
, func
, arg
, xref
);
1062 frr_with_mutex (&thread
->mtx
) {
1063 thread
->u
.sands
= t
;
1064 event_timer_list_add(&m
->timer
, thread
);
1067 thread
->ref
= t_ptr
;
1071 /* The timer list is sorted - if this new timer
1072 * might change the time we'll wait for, give the pthread
1073 * a chance to re-compute.
1075 if (event_timer_list_first(&m
->timer
) == thread
)
1078 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1081 EC_LIB_TIMER_TOO_LONG
,
1082 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1087 /* Add timer event thread. */
1088 void _event_add_timer(const struct xref_eventsched
*xref
, struct event_loop
*m
,
1089 void (*func
)(struct event
*), void *arg
, long timer
,
1090 struct event
**t_ptr
)
1092 struct timeval trel
;
1096 trel
.tv_sec
= timer
;
1099 _event_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1102 /* Add timer event thread with "millisecond" resolution */
1103 void _event_add_timer_msec(const struct xref_eventsched
*xref
,
1104 struct event_loop
*m
, void (*func
)(struct event
*),
1105 void *arg
, long timer
, struct event
**t_ptr
)
1107 struct timeval trel
;
1111 trel
.tv_sec
= timer
/ 1000;
1112 trel
.tv_usec
= 1000 * (timer
% 1000);
1114 _event_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1117 /* Add timer event thread with "timeval" resolution */
1118 void _event_add_timer_tv(const struct xref_eventsched
*xref
,
1119 struct event_loop
*m
, void (*func
)(struct event
*),
1120 void *arg
, struct timeval
*tv
, struct event
**t_ptr
)
1122 _event_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1125 /* Add simple event thread. */
1126 void _event_add_event(const struct xref_eventsched
*xref
, struct event_loop
*m
,
1127 void (*func
)(struct event
*), void *arg
, int val
,
1128 struct event
**t_ptr
)
1130 struct event
*thread
= NULL
;
1132 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1133 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1134 t_ptr
, 0, val
, arg
, 0);
1138 frr_with_mutex (&m
->mtx
) {
1139 if (t_ptr
&& *t_ptr
)
1140 /* thread is already scheduled; don't reschedule */
1143 thread
= thread_get(m
, EVENT_EVENT
, func
, arg
, xref
);
1144 frr_with_mutex (&thread
->mtx
) {
1145 thread
->u
.val
= val
;
1146 event_list_add_tail(&m
->event
, thread
);
1151 thread
->ref
= t_ptr
;
1158 /* Thread cancellation ------------------------------------------------------ */
1161 * NOT's out the .events field of pollfd corresponding to the given file
1162 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1164 * This needs to happen for both copies of pollfd's. See 'event_fetch'
1165 * implementation for details.
1169 * @param state the event to cancel. One or more (OR'd together) of the
1174 static void event_cancel_rw(struct event_loop
*master
, int fd
, short state
,
1179 /* find the index of corresponding pollfd */
1182 /* Cancel POLLHUP too just in case some bozo set it */
1185 /* Some callers know the index of the pfd already */
1186 if (idx_hint
>= 0) {
1190 /* Have to look for the fd in the pfd array */
1191 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1192 if (master
->handler
.pfds
[i
].fd
== fd
) {
1200 "[!] Received cancellation request for nonexistent rw job");
1201 zlog_debug("[!] threadmaster: %s | fd: %d",
1202 master
->name
? master
->name
: "", fd
);
1206 /* NOT out event. */
1207 master
->handler
.pfds
[i
].events
&= ~(state
);
1209 /* If all events are canceled, delete / resize the pollfd array. */
1210 if (master
->handler
.pfds
[i
].events
== 0) {
1211 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1212 (master
->handler
.pfdcount
- i
- 1)
1213 * sizeof(struct pollfd
));
1214 master
->handler
.pfdcount
--;
1215 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1216 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1220 * If we have the same pollfd in the copy, perform the same operations,
1223 if (i
>= master
->handler
.copycount
)
1226 master
->handler
.copy
[i
].events
&= ~(state
);
1228 if (master
->handler
.copy
[i
].events
== 0) {
1229 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1230 (master
->handler
.copycount
- i
- 1)
1231 * sizeof(struct pollfd
));
1232 master
->handler
.copycount
--;
1233 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1234 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1239 * Process task cancellation given a task argument: iterate through the
1240 * various lists of tasks, looking for any that match the argument.
1242 static void cancel_arg_helper(struct event_loop
*master
,
1243 const struct cancel_req
*cr
)
1250 /* We're only processing arg-based cancellations here. */
1251 if (cr
->eventobj
== NULL
)
1254 /* First process the ready lists. */
1255 frr_each_safe (event_list
, &master
->event
, t
) {
1256 if (t
->arg
!= cr
->eventobj
)
1258 event_list_del(&master
->event
, t
);
1261 thread_add_unuse(master
, t
);
1264 frr_each_safe (event_list
, &master
->ready
, t
) {
1265 if (t
->arg
!= cr
->eventobj
)
1267 event_list_del(&master
->ready
, t
);
1270 thread_add_unuse(master
, t
);
1273 /* If requested, stop here and ignore io and timers */
1274 if (CHECK_FLAG(cr
->flags
, EVENT_CANCEL_FLAG_READY
))
1277 /* Check the io tasks */
1278 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1279 pfd
= master
->handler
.pfds
+ i
;
1281 if (pfd
->events
& POLLIN
)
1282 t
= master
->read
[pfd
->fd
];
1284 t
= master
->write
[pfd
->fd
];
1286 if (t
&& t
->arg
== cr
->eventobj
) {
1289 /* Found a match to cancel: clean up fd arrays */
1290 event_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1292 /* Clean up thread arrays */
1293 master
->read
[fd
] = NULL
;
1294 master
->write
[fd
] = NULL
;
1296 /* Clear caller's ref */
1300 thread_add_unuse(master
, t
);
1302 /* Don't increment 'i' since the cancellation will have
1303 * removed the entry from the pfd array
1309 /* Check the timer tasks */
1310 t
= event_timer_list_first(&master
->timer
);
1312 struct event
*t_next
;
1314 t_next
= event_timer_list_next(&master
->timer
, t
);
1316 if (t
->arg
== cr
->eventobj
) {
1317 event_timer_list_del(&master
->timer
, t
);
1320 thread_add_unuse(master
, t
);
1328 * Process cancellation requests.
1330 * This may only be run from the pthread which owns the event_master.
1332 * @param master the thread master to process
1333 * @REQUIRE master->mtx
1335 static void do_event_cancel(struct event_loop
*master
)
1337 struct event_list_head
*list
= NULL
;
1338 struct event
**thread_array
= NULL
;
1339 struct event
*thread
;
1340 struct cancel_req
*cr
;
1341 struct listnode
*ln
;
1343 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1345 * If this is an event object cancellation, search
1346 * through task lists deleting any tasks which have the
1347 * specified argument - use this handy helper function.
1350 cancel_arg_helper(master
, cr
);
1355 * The pointer varies depending on whether the cancellation
1356 * request was made asynchronously or not. If it was, we
1357 * need to check whether the thread even exists anymore
1358 * before cancelling it.
1360 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1366 thread_array
= NULL
;
1368 /* Determine the appropriate queue to cancel the thread from */
1369 switch (thread
->type
) {
1371 event_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1372 thread_array
= master
->read
;
1375 event_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1376 thread_array
= master
->write
;
1379 event_timer_list_del(&master
->timer
, thread
);
1382 list
= &master
->event
;
1385 list
= &master
->ready
;
1394 event_list_del(list
, thread
);
1395 else if (thread_array
)
1396 thread_array
[thread
->u
.fd
] = NULL
;
1399 *thread
->ref
= NULL
;
1401 thread_add_unuse(thread
->master
, thread
);
1404 /* Delete and free all cancellation requests */
1405 if (master
->cancel_req
)
1406 list_delete_all_node(master
->cancel_req
);
1408 /* Wake up any threads which may be blocked in event_cancel_async() */
1409 master
->canceled
= true;
1410 pthread_cond_broadcast(&master
->cancel_cond
);
1414 * Helper function used for multiple flavors of arg-based cancellation.
1416 static void cancel_event_helper(struct event_loop
*m
, void *arg
, int flags
)
1418 struct cancel_req
*cr
;
1420 assert(m
->owner
== pthread_self());
1422 /* Only worth anything if caller supplies an arg. */
1426 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1430 frr_with_mutex (&m
->mtx
) {
1432 listnode_add(m
->cancel_req
, cr
);
1438 * Cancel any events which have the specified argument.
1442 * @param m the event_master to cancel from
1443 * @param arg the argument passed when creating the event
1445 void event_cancel_event(struct event_loop
*master
, void *arg
)
1447 cancel_event_helper(master
, arg
, 0);
1451 * Cancel ready tasks with an arg matching 'arg'
1455 * @param m the event_master to cancel from
1456 * @param arg the argument passed when creating the event
1458 void event_cancel_event_ready(struct event_loop
*m
, void *arg
)
1461 /* Only cancel ready/event tasks */
1462 cancel_event_helper(m
, arg
, EVENT_CANCEL_FLAG_READY
);
1466 * Cancel a specific task.
1470 * @param thread task to cancel
1472 void event_cancel(struct event
**thread
)
1474 struct event_loop
*master
;
1476 if (thread
== NULL
|| *thread
== NULL
)
1479 master
= (*thread
)->master
;
1481 frrtrace(9, frr_libfrr
, event_cancel
, master
, (*thread
)->xref
->funcname
,
1482 (*thread
)->xref
->xref
.file
, (*thread
)->xref
->xref
.line
, NULL
,
1483 (*thread
)->u
.fd
, (*thread
)->u
.val
, (*thread
)->arg
,
1484 (*thread
)->u
.sands
.tv_sec
);
1486 assert(master
->owner
== pthread_self());
1488 frr_with_mutex (&master
->mtx
) {
1489 struct cancel_req
*cr
=
1490 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1491 cr
->thread
= *thread
;
1492 listnode_add(master
->cancel_req
, cr
);
1493 do_event_cancel(master
);
1500 * Asynchronous cancellation.
1502 * Called with either a struct event ** or void * to an event argument,
1503 * this function posts the correct cancellation request and blocks until it is
1506 * If the thread is currently running, execution blocks until it completes.
1508 * The last two parameters are mutually exclusive, i.e. if you pass one the
1509 * other must be NULL.
1511 * When the cancellation procedure executes on the target event_master, the
1512 * thread * provided is checked for nullity. If it is null, the thread is
1513 * assumed to no longer exist and the cancellation request is a no-op. Thus
1514 * users of this API must pass a back-reference when scheduling the original
1519 * @param master the thread master with the relevant event / task
1520 * @param thread pointer to thread to cancel
1521 * @param eventobj the event
1523 void event_cancel_async(struct event_loop
*master
, struct event
**thread
,
1526 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1528 if (thread
&& *thread
)
1529 frrtrace(9, frr_libfrr
, event_cancel_async
, master
,
1530 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1531 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1532 (*thread
)->u
.val
, (*thread
)->arg
,
1533 (*thread
)->u
.sands
.tv_sec
);
1535 frrtrace(9, frr_libfrr
, event_cancel_async
, master
, NULL
, NULL
,
1536 0, NULL
, 0, 0, eventobj
, 0);
1538 assert(master
->owner
!= pthread_self());
1540 frr_with_mutex (&master
->mtx
) {
1541 master
->canceled
= false;
1544 struct cancel_req
*cr
=
1545 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1546 cr
->threadref
= thread
;
1547 listnode_add(master
->cancel_req
, cr
);
1548 } else if (eventobj
) {
1549 struct cancel_req
*cr
=
1550 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1551 cr
->eventobj
= eventobj
;
1552 listnode_add(master
->cancel_req
, cr
);
1556 while (!master
->canceled
)
1557 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1563 /* ------------------------------------------------------------------------- */
1565 static struct timeval
*thread_timer_wait(struct event_timer_list_head
*timers
,
1566 struct timeval
*timer_val
)
1568 if (!event_timer_list_count(timers
))
1571 struct event
*next_timer
= event_timer_list_first(timers
);
1573 monotime_until(&next_timer
->u
.sands
, timer_val
);
1577 static struct event
*thread_run(struct event_loop
*m
, struct event
*thread
,
1578 struct event
*fetch
)
1581 thread_add_unuse(m
, thread
);
1585 static int thread_process_io_helper(struct event_loop
*m
, struct event
*thread
,
1586 short state
, short actual_state
, int pos
)
1588 struct event
**thread_array
;
1591 * poll() clears the .events field, but the pollfd array we
1592 * pass to poll() is a copy of the one used to schedule threads.
1593 * We need to synchronize state between the two here by applying
1594 * the same changes poll() made on the copy of the "real" pollfd
1597 * This cleans up a possible infinite loop where we refuse
1598 * to respond to a poll event but poll is insistent that
1601 m
->handler
.pfds
[pos
].events
&= ~(state
);
1604 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1605 flog_err(EC_LIB_NO_THREAD
,
1606 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1607 m
->handler
.pfds
[pos
].fd
, actual_state
);
1611 if (thread
->type
== EVENT_READ
)
1612 thread_array
= m
->read
;
1614 thread_array
= m
->write
;
1616 thread_array
[thread
->u
.fd
] = NULL
;
1617 event_list_add_tail(&m
->ready
, thread
);
1618 thread
->type
= EVENT_READY
;
1624 * Process I/O events.
1626 * Walks through file descriptor array looking for those pollfds whose .revents
1627 * field has something interesting. Deletes any invalid file descriptors.
1629 * @param m the thread master
1630 * @param num the number of active file descriptors (return value of poll())
1632 static void thread_process_io(struct event_loop
*m
, unsigned int num
)
1634 unsigned int ready
= 0;
1635 struct pollfd
*pfds
= m
->handler
.copy
;
1637 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1638 /* no event for current fd? immediately continue */
1639 if (pfds
[i
].revents
== 0)
1645 * Unless someone has called event_cancel from another
1646 * pthread, the only thing that could have changed in
1647 * m->handler.pfds while we were asleep is the .events
1648 * field in a given pollfd. Barring event_cancel() that
1649 * value should be a superset of the values we have in our
1650 * copy, so there's no need to update it. Similarily,
1651 * barring deletion, the fd should still be a valid index
1652 * into the master's pfds.
1654 * We are including POLLERR here to do a READ event
1655 * this is because the read should fail and the
1656 * read function should handle it appropriately
1658 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1659 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1660 pfds
[i
].revents
, i
);
1662 if (pfds
[i
].revents
& POLLOUT
)
1663 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1664 POLLOUT
, pfds
[i
].revents
, i
);
1667 * if one of our file descriptors is garbage, remove the same
1668 * from both pfds + update sizes and index
1670 if (pfds
[i
].revents
& POLLNVAL
) {
1671 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1672 (m
->handler
.pfdcount
- i
- 1)
1673 * sizeof(struct pollfd
));
1674 m
->handler
.pfdcount
--;
1675 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1676 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1678 memmove(pfds
+ i
, pfds
+ i
+ 1,
1679 (m
->handler
.copycount
- i
- 1)
1680 * sizeof(struct pollfd
));
1681 m
->handler
.copycount
--;
1682 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1683 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1690 /* Add all timers that have popped to the ready list. */
1691 static unsigned int thread_process_timers(struct event_loop
*m
,
1692 struct timeval
*timenow
)
1694 struct timeval prev
= *timenow
;
1695 bool displayed
= false;
1696 struct event
*thread
;
1697 unsigned int ready
= 0;
1699 while ((thread
= event_timer_list_first(&m
->timer
))) {
1700 if (timercmp(timenow
, &thread
->u
.sands
, <))
1702 prev
= thread
->u
.sands
;
1705 * If the timer would have popped 4 seconds in the
1706 * past then we are in a situation where we are
1707 * really getting behind on handling of events.
1708 * Let's log it and do the right thing with it.
1710 if (timercmp(timenow
, &prev
, >)) {
1711 atomic_fetch_add_explicit(
1712 &thread
->hist
->total_starv_warn
, 1,
1713 memory_order_seq_cst
);
1714 if (!displayed
&& !thread
->ignore_timer_late
) {
1716 EC_LIB_STARVE_THREAD
,
1717 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1723 event_timer_list_pop(&m
->timer
);
1724 thread
->type
= EVENT_READY
;
1725 event_list_add_tail(&m
->ready
, thread
);
1732 /* process a list en masse, e.g. for event thread lists */
1733 static unsigned int thread_process(struct event_list_head
*list
)
1735 struct event
*thread
;
1736 unsigned int ready
= 0;
1738 while ((thread
= event_list_pop(list
))) {
1739 thread
->type
= EVENT_READY
;
1740 event_list_add_tail(&thread
->master
->ready
, thread
);
1747 /* Fetch next ready thread. */
1748 struct event
*event_fetch(struct event_loop
*m
, struct event
*fetch
)
1750 struct event
*thread
= NULL
;
1752 struct timeval zerotime
= {0, 0};
1754 struct timeval
*tw
= NULL
;
1755 bool eintr_p
= false;
1759 /* Handle signals if any */
1760 if (m
->handle_signals
)
1761 frr_sigevent_process();
1763 pthread_mutex_lock(&m
->mtx
);
1765 /* Process any pending cancellation requests */
1769 * Attempt to flush ready queue before going into poll().
1770 * This is performance-critical. Think twice before modifying.
1772 if ((thread
= event_list_pop(&m
->ready
))) {
1773 fetch
= thread_run(m
, thread
, fetch
);
1776 pthread_mutex_unlock(&m
->mtx
);
1777 if (!m
->ready_run_loop
)
1778 GETRUSAGE(&m
->last_getrusage
);
1779 m
->ready_run_loop
= true;
1783 m
->ready_run_loop
= false;
1784 /* otherwise, tick through scheduling sequence */
1787 * Post events to ready queue. This must come before the
1788 * following block since events should occur immediately
1790 thread_process(&m
->event
);
1793 * If there are no tasks on the ready queue, we will poll()
1794 * until a timer expires or we receive I/O, whichever comes
1795 * first. The strategy for doing this is:
1797 * - If there are events pending, set the poll() timeout to zero
1798 * - If there are no events pending, but there are timers
1799 * pending, set the timeout to the smallest remaining time on
1801 * - If there are neither timers nor events pending, but there
1802 * are file descriptors pending, block indefinitely in poll()
1803 * - If nothing is pending, it's time for the application to die
1805 * In every case except the last, we need to hit poll() at least
1806 * once per loop to avoid starvation by events
1808 if (!event_list_count(&m
->ready
))
1809 tw
= thread_timer_wait(&m
->timer
, &tv
);
1811 if (event_list_count(&m
->ready
) ||
1812 (tw
&& !timercmp(tw
, &zerotime
, >)))
1815 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1816 pthread_mutex_unlock(&m
->mtx
);
1822 * Copy pollfd array + # active pollfds in it. Not necessary to
1823 * copy the array size as this is fixed.
1825 m
->handler
.copycount
= m
->handler
.pfdcount
;
1826 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1827 m
->handler
.copycount
* sizeof(struct pollfd
));
1829 pthread_mutex_unlock(&m
->mtx
);
1832 num
= fd_poll(m
, tw
, &eintr_p
);
1834 pthread_mutex_lock(&m
->mtx
);
1836 /* Handle any errors received in poll() */
1839 pthread_mutex_unlock(&m
->mtx
);
1840 /* loop around to signal handler */
1845 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1846 safe_strerror(errno
));
1847 pthread_mutex_unlock(&m
->mtx
);
1852 /* Post timers to ready queue. */
1854 thread_process_timers(m
, &now
);
1856 /* Post I/O to ready queue. */
1858 thread_process_io(m
, num
);
1860 pthread_mutex_unlock(&m
->mtx
);
1862 } while (!thread
&& m
->spin
);
1867 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1869 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1870 + (a
.tv_usec
- b
.tv_usec
));
1873 unsigned long event_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1874 unsigned long *cputime
)
1876 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1880 * FreeBSD appears to have an issue when calling clock_gettime
1881 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1882 * occassionally the now time will be before the start time.
1883 * This is not good and FRR is ending up with CPU HOG's
1884 * when the subtraction wraps to very large numbers
1886 * What we are going to do here is cheat a little bit
1887 * and notice that this is a problem and just correct
1888 * it so that it is impossible to happen
1890 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1891 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1892 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1893 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1894 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1895 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1898 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1899 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1901 /* This is 'user + sys' time. */
1902 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1903 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1905 return timeval_elapsed(now
->real
, start
->real
);
1909 * We should aim to yield after yield milliseconds, which defaults
1910 * to EVENT_YIELD_TIME_SLOT .
1911 * Note: we are using real (wall clock) time for this calculation.
1912 * It could be argued that CPU time may make more sense in certain
1913 * contexts. The things to consider are whether the thread may have
1914 * blocked (in which case wall time increases, but CPU time does not),
1915 * or whether the system is heavily loaded with other processes competing
1916 * for CPU time. On balance, wall clock time seems to make sense.
1917 * Plus it has the added benefit that gettimeofday should be faster
1918 * than calling getrusage.
1920 int event_should_yield(struct event
*thread
)
1924 frr_with_mutex (&thread
->mtx
) {
1925 result
= monotime_since(&thread
->real
, NULL
)
1926 > (int64_t)thread
->yield
;
1931 void event_set_yield_time(struct event
*thread
, unsigned long yield_time
)
1933 frr_with_mutex (&thread
->mtx
) {
1934 thread
->yield
= yield_time
;
1938 void event_getrusage(RUSAGE_T
*r
)
1941 if (!cputime_enabled
) {
1942 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1946 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1947 /* not currently implemented in Linux's vDSO, but maybe at some point
1950 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1951 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1952 #if defined RUSAGE_THREAD
1953 #define FRR_RUSAGE RUSAGE_THREAD
1955 #define FRR_RUSAGE RUSAGE_SELF
1957 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1964 * This function will atomically update the thread's usage history. At present
1965 * this is the only spot where usage history is written. Nevertheless the code
1966 * has been written such that the introduction of writers in the future should
1967 * not need to update it provided the writers atomically perform only the
1968 * operations done here, i.e. updating the total and maximum times. In
1969 * particular, the maximum real and cpu times must be monotonically increasing
1970 * or this code is not correct.
1972 void event_call(struct event
*thread
)
1974 RUSAGE_T before
, after
;
1976 /* if the thread being called is the CLI, it may change cputime_enabled
1977 * ("service cputime-stats" command), which can result in nonsensical
1978 * and very confusing warnings
1980 bool cputime_enabled_here
= cputime_enabled
;
1982 if (thread
->master
->ready_run_loop
)
1983 before
= thread
->master
->last_getrusage
;
1987 thread
->real
= before
.real
;
1989 frrtrace(9, frr_libfrr
, event_call
, thread
->master
,
1990 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1991 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
, thread
->u
.val
,
1992 thread
->arg
, thread
->u
.sands
.tv_sec
);
1994 pthread_setspecific(thread_current
, thread
);
1995 (*thread
->func
)(thread
);
1996 pthread_setspecific(thread_current
, NULL
);
1999 thread
->master
->last_getrusage
= after
;
2001 unsigned long walltime
, cputime
;
2004 walltime
= event_consumed_time(&after
, &before
, &cputime
);
2006 /* update walltime */
2007 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2008 memory_order_seq_cst
);
2009 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2010 memory_order_seq_cst
);
2011 while (exp
< walltime
2012 && !atomic_compare_exchange_weak_explicit(
2013 &thread
->hist
->real
.max
, &exp
, walltime
,
2014 memory_order_seq_cst
, memory_order_seq_cst
))
2017 if (cputime_enabled_here
&& cputime_enabled
) {
2018 /* update cputime */
2019 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2020 memory_order_seq_cst
);
2021 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2022 memory_order_seq_cst
);
2023 while (exp
< cputime
2024 && !atomic_compare_exchange_weak_explicit(
2025 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2026 memory_order_seq_cst
, memory_order_seq_cst
))
2030 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2031 memory_order_seq_cst
);
2032 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2033 memory_order_seq_cst
);
2035 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2036 && cputime
> cputime_threshold
) {
2038 * We have a CPU Hog on our hands. The time FRR has spent
2039 * doing actual work (not sleeping) is greater than 5 seconds.
2040 * Whinge about it now, so we're aware this is yet another task
2043 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2044 1, memory_order_seq_cst
);
2046 EC_LIB_SLOW_THREAD_CPU
,
2047 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2048 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2049 walltime
/ 1000, cputime
/ 1000);
2051 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2053 * The runtime for a task is greater than 5 seconds, but the
2054 * cpu time is under 5 seconds. Let's whine about this because
2055 * this could imply some sort of scheduling issue.
2057 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2058 1, memory_order_seq_cst
);
2060 EC_LIB_SLOW_THREAD_WALL
,
2061 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2062 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2063 walltime
/ 1000, cputime
/ 1000);
2067 /* Execute thread */
2068 void _event_execute(const struct xref_eventsched
*xref
, struct event_loop
*m
,
2069 void (*func
)(struct event
*), void *arg
, int val
)
2071 struct event
*thread
;
2073 /* Get or allocate new thread to execute. */
2074 frr_with_mutex (&m
->mtx
) {
2075 thread
= thread_get(m
, EVENT_EVENT
, func
, arg
, xref
);
2077 /* Set its event value. */
2078 frr_with_mutex (&thread
->mtx
) {
2079 thread
->add_type
= EVENT_EXECUTE
;
2080 thread
->u
.val
= val
;
2081 thread
->ref
= &thread
;
2085 /* Execute thread doing all accounting. */
2088 /* Give back or free thread. */
2089 thread_add_unuse(m
, thread
);
2092 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2093 void debug_signals(const sigset_t
*sigs
)
2100 * We're only looking at the non-realtime signals here, so we need
2101 * some limit value. Platform differences mean at some point we just
2102 * need to pick a reasonable value.
2104 #if defined SIGRTMIN
2105 # define LAST_SIGNAL SIGRTMIN
2107 # define LAST_SIGNAL 32
2112 sigemptyset(&tmpsigs
);
2113 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2120 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2123 if (sigismember(sigs
, i
) > 0) {
2125 strlcat(buf
, ",", sizeof(buf
));
2126 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2127 strlcat(buf
, tmp
, sizeof(buf
));
2133 snprintf(buf
, sizeof(buf
), "<none>");
2135 zlog_debug("%s: %s", __func__
, buf
);
2138 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2139 const struct event
*thread
)
2141 static const char *const types
[] = {
2142 [EVENT_READ
] = "read", [EVENT_WRITE
] = "write",
2143 [EVENT_TIMER
] = "timer", [EVENT_EVENT
] = "event",
2144 [EVENT_READY
] = "ready", [EVENT_UNUSED
] = "unused",
2145 [EVENT_EXECUTE
] = "exec",
2151 return bputs(buf
, "{(thread *)NULL}");
2153 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2155 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2156 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2158 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2160 switch (thread
->type
) {
2163 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2167 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2176 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2177 thread
->xref
->funcname
, thread
->xref
->dest
,
2178 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2182 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2183 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2186 const struct event
*thread
= ptr
;
2187 struct timespec remain
= {};
2189 if (ea
->fmt
[0] == 'D') {
2191 return printfrr_thread_dbg(buf
, ea
, thread
);
2195 /* need to jump over time formatting flag characters in the
2196 * input format string, i.e. adjust ea->fmt!
2198 printfrr_time(buf
, ea
, &remain
,
2199 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2200 return bputch(buf
, '-');
2203 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2204 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);