1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
9 #include <sys/resource.h>
20 #include "frratomic.h"
21 #include "frr_pthread.h"
22 #include "lib_errors.h"
23 #include "libfrr_trace.h"
26 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
27 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
28 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
29 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
31 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
35 struct thread
*thread
;
37 struct thread
**threadref
;
40 /* Flags for task cancellation */
41 #define THREAD_CANCEL_FLAG_READY 0x01
43 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
45 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
47 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
49 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
51 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
56 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
58 #if defined(__APPLE__)
59 #include <mach/mach.h>
60 #include <mach/mach_time.h>
65 const unsigned char wakebyte = 0x01; \
66 write(m->io_pipe[1], &wakebyte, 1); \
69 /* control variable for initializer */
70 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
71 pthread_key_t thread_current
;
73 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
74 static struct list
*masters
;
76 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
78 #ifndef EXCLUDE_CPU_TIME
79 #define EXCLUDE_CPU_TIME 0
81 #ifndef CONSUMED_TIME_CHECK
82 #define CONSUMED_TIME_CHECK 0
85 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
86 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
87 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
89 /* CLI start ---------------------------------------------------------------- */
90 #include "lib/thread_clippy.c"
92 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
94 int size
= sizeof(a
->func
);
96 return jhash(&a
->func
, size
, 0);
99 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
100 const struct cpu_thread_history
*b
)
102 return a
->func
== b
->func
;
105 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
107 struct cpu_thread_history
*new;
108 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
110 new->funcname
= a
->funcname
;
114 static void cpu_record_hash_free(void *a
)
116 struct cpu_thread_history
*hist
= a
;
118 XFREE(MTYPE_THREAD_STATS
, hist
);
121 static void vty_out_cpu_thread_history(struct vty
*vty
,
122 struct cpu_thread_history
*a
)
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
126 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
127 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
128 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
129 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
130 vty_out(vty
, " %c%c%c%c%c %s\n",
131 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
132 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
133 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
134 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
135 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
138 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
140 struct cpu_thread_history
*totals
= args
[0];
141 struct cpu_thread_history copy
;
142 struct vty
*vty
= args
[1];
143 uint8_t *filter
= args
[2];
145 struct cpu_thread_history
*a
= bucket
->data
;
148 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
150 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
151 copy
.total_cpu_warn
=
152 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
153 copy
.total_wall_warn
=
154 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
155 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
156 memory_order_seq_cst
);
158 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
159 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
161 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
163 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
164 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
165 copy
.funcname
= a
->funcname
;
167 if (!(copy
.types
& *filter
))
170 vty_out_cpu_thread_history(vty
, ©
);
171 totals
->total_active
+= copy
.total_active
;
172 totals
->total_calls
+= copy
.total_calls
;
173 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
174 totals
->total_wall_warn
+= copy
.total_wall_warn
;
175 totals
->total_starv_warn
+= copy
.total_starv_warn
;
176 totals
->real
.total
+= copy
.real
.total
;
177 if (totals
->real
.max
< copy
.real
.max
)
178 totals
->real
.max
= copy
.real
.max
;
179 totals
->cpu
.total
+= copy
.cpu
.total
;
180 if (totals
->cpu
.max
< copy
.cpu
.max
)
181 totals
->cpu
.max
= copy
.cpu
.max
;
184 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
186 struct cpu_thread_history tmp
;
187 void *args
[3] = {&tmp
, vty
, &filter
};
188 struct thread_master
*m
;
191 if (!cputime_enabled
)
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
199 memset(&tmp
, 0, sizeof(tmp
));
200 tmp
.funcname
= "TOTAL";
203 frr_with_mutex (&masters_mtx
) {
204 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
205 const char *name
= m
->name
? m
->name
: "main";
207 char underline
[strlen(name
) + 1];
208 memset(underline
, '-', sizeof(underline
));
209 underline
[sizeof(underline
) - 1] = '\0';
212 vty_out(vty
, "Showing statistics for pthread %s\n",
214 vty_out(vty
, "-------------------------------%s\n",
216 vty_out(vty
, "%30s %18s %18s\n", "",
217 "CPU (user+system):", "Real (wall-clock):");
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty
, " Avg uSec Max uSecs");
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
224 if (m
->cpu_record
->count
)
227 (void (*)(struct hash_bucket
*,
228 void *))cpu_record_hash_print
,
231 vty_out(vty
, "No data to display yet.\n");
238 vty_out(vty
, "Total thread statistics\n");
239 vty_out(vty
, "-------------------------\n");
240 vty_out(vty
, "%30s %18s %18s\n", "",
241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
243 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
244 vty_out(vty
, " Type Thread\n");
246 if (tmp
.total_calls
> 0)
247 vty_out_cpu_thread_history(vty
, &tmp
);
250 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
252 uint8_t *filter
= args
[0];
253 struct hash
*cpu_record
= args
[1];
255 struct cpu_thread_history
*a
= bucket
->data
;
257 if (!(a
->types
& *filter
))
260 hash_release(cpu_record
, bucket
->data
);
263 static void cpu_record_clear(uint8_t filter
)
265 uint8_t *tmp
= &filter
;
266 struct thread_master
*m
;
269 frr_with_mutex (&masters_mtx
) {
270 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
271 frr_with_mutex (&m
->mtx
) {
272 void *args
[2] = {tmp
, m
->cpu_record
};
275 (void (*)(struct hash_bucket
*,
276 void *))cpu_record_hash_clear
,
283 static uint8_t parse_filter(const char *filterstr
)
288 while (filterstr
[i
] != '\0') {
289 switch (filterstr
[i
]) {
292 filter
|= (1 << THREAD_READ
);
296 filter
|= (1 << THREAD_WRITE
);
300 filter
|= (1 << THREAD_TIMER
);
304 filter
|= (1 << THREAD_EVENT
);
308 filter
|= (1 << THREAD_EXECUTE
);
318 DEFUN_NOSH (show_thread_cpu
,
320 "show thread cpu [FILTER]",
322 "Thread information\n"
324 "Display filter (rwtex)\n")
326 uint8_t filter
= (uint8_t)-1U;
329 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
330 filter
= parse_filter(argv
[idx
]->arg
);
333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
339 cpu_record_print(vty
, filter
);
343 DEFPY (service_cputime_stats
,
344 service_cputime_stats_cmd
,
345 "[no] service cputime-stats",
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
350 cputime_enabled
= !no
;
354 DEFPY (service_cputime_warning
,
355 service_cputime_warning_cmd
,
356 "[no] service cputime-warning (1-4294967295)",
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
363 cputime_threshold
= 0;
365 cputime_threshold
= cputime_warning
* 1000;
369 ALIAS (service_cputime_warning
,
370 no_service_cputime_warning_cmd
,
371 "no service cputime-warning",
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
376 DEFPY (service_walltime_warning
,
377 service_walltime_warning_cmd
,
378 "[no] service walltime-warning (1-4294967295)",
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
385 walltime_threshold
= 0;
387 walltime_threshold
= walltime_warning
* 1000;
391 ALIAS (service_walltime_warning
,
392 no_service_walltime_warning_cmd
,
393 "no service walltime-warning",
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
398 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
400 const char *name
= m
->name
? m
->name
: "main";
401 char underline
[strlen(name
) + 1];
402 struct thread
*thread
;
405 memset(underline
, '-', sizeof(underline
));
406 underline
[sizeof(underline
) - 1] = '\0';
408 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
409 vty_out(vty
, "----------------------%s\n", underline
);
410 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
412 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
413 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
414 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
415 m
->handler
.pfds
[i
].revents
);
417 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
418 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
421 vty_out(vty
, "ERROR ");
423 vty_out(vty
, "%s ", thread
->xref
->funcname
);
427 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
428 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
431 vty_out(vty
, "ERROR\n");
433 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
439 DEFUN_NOSH (show_thread_poll
,
440 show_thread_poll_cmd
,
443 "Thread information\n"
444 "Show poll FD's and information\n")
446 struct listnode
*node
;
447 struct thread_master
*m
;
449 frr_with_mutex (&masters_mtx
) {
450 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
451 show_thread_poll_helper(vty
, m
);
459 DEFUN (clear_thread_cpu
,
460 clear_thread_cpu_cmd
,
461 "clear thread cpu [FILTER]",
462 "Clear stored data in all pthreads\n"
463 "Thread information\n"
465 "Display filter (rwtexb)\n")
467 uint8_t filter
= (uint8_t)-1U;
470 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
471 filter
= parse_filter(argv
[idx
]->arg
);
474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
480 cpu_record_clear(filter
);
484 static void show_thread_timers_helper(struct vty
*vty
, struct thread_master
*m
)
486 const char *name
= m
->name
? m
->name
: "main";
487 char underline
[strlen(name
) + 1];
488 struct thread
*thread
;
490 memset(underline
, '-', sizeof(underline
));
491 underline
[sizeof(underline
) - 1] = '\0';
493 vty_out(vty
, "\nShowing timers for %s\n", name
);
494 vty_out(vty
, "-------------------%s\n", underline
);
496 frr_each (thread_timer_list
, &m
->timer
, thread
) {
497 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
501 DEFPY_NOSH (show_thread_timers
,
502 show_thread_timers_cmd
,
503 "show thread timers",
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
508 struct listnode
*node
;
509 struct thread_master
*m
;
511 frr_with_mutex (&masters_mtx
) {
512 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
513 show_thread_timers_helper(vty
, m
);
519 void thread_cmd_init(void)
521 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
522 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
523 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
525 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
526 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
527 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
528 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
529 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
531 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
533 /* CLI end ------------------------------------------------------------------ */
536 static void cancelreq_del(void *cr
)
538 XFREE(MTYPE_TMP
, cr
);
541 /* initializer, only ever called once */
542 static void initializer(void)
544 pthread_key_create(&thread_current
, NULL
);
547 struct thread_master
*thread_master_create(const char *name
)
549 struct thread_master
*rv
;
552 pthread_once(&init_once
, &initializer
);
554 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv
->mtx
, NULL
);
558 pthread_cond_init(&rv
->cancel_cond
, NULL
);
561 name
= name
? name
: "default";
562 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
564 /* Initialize I/O task data structures */
566 /* Use configured limit if present, ulimit otherwise. */
567 rv
->fd_limit
= frr_get_fd_limit();
568 if (rv
->fd_limit
== 0) {
569 getrlimit(RLIMIT_NOFILE
, &limit
);
570 rv
->fd_limit
= (int)limit
.rlim_cur
;
573 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
574 sizeof(struct thread
*) * rv
->fd_limit
);
576 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
577 sizeof(struct thread
*) * rv
->fd_limit
);
579 char tmhashname
[strlen(name
) + 32];
580 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
582 rv
->cpu_record
= hash_create_size(
583 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
584 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
587 thread_list_init(&rv
->event
);
588 thread_list_init(&rv
->ready
);
589 thread_list_init(&rv
->unuse
);
590 thread_timer_list_init(&rv
->timer
);
592 /* Initialize thread_fetch() settings */
594 rv
->handle_signals
= true;
596 /* Set pthread owner, should be updated by actual owner */
597 rv
->owner
= pthread_self();
598 rv
->cancel_req
= list_new();
599 rv
->cancel_req
->del
= cancelreq_del
;
602 /* Initialize pipe poker */
604 set_nonblocking(rv
->io_pipe
[0]);
605 set_nonblocking(rv
->io_pipe
[1]);
607 /* Initialize data structures for poll() */
608 rv
->handler
.pfdsize
= rv
->fd_limit
;
609 rv
->handler
.pfdcount
= 0;
610 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
611 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
612 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
613 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
615 /* add to list of threadmasters */
616 frr_with_mutex (&masters_mtx
) {
618 masters
= list_new();
620 listnode_add(masters
, rv
);
626 void thread_master_set_name(struct thread_master
*master
, const char *name
)
628 frr_with_mutex (&master
->mtx
) {
629 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
630 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
634 #define THREAD_UNUSED_DEPTH 10
636 /* Move thread to unuse list. */
637 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
639 pthread_mutex_t mtxc
= thread
->mtx
;
641 assert(m
!= NULL
&& thread
!= NULL
);
643 thread
->hist
->total_active
--;
644 memset(thread
, 0, sizeof(struct thread
));
645 thread
->type
= THREAD_UNUSED
;
647 /* Restore the thread mutex context. */
650 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
651 thread_list_add_tail(&m
->unuse
, thread
);
655 thread_free(m
, thread
);
658 /* Free all unused thread. */
659 static void thread_list_free(struct thread_master
*m
,
660 struct thread_list_head
*list
)
664 while ((t
= thread_list_pop(list
)))
668 static void thread_array_free(struct thread_master
*m
,
669 struct thread
**thread_array
)
674 for (index
= 0; index
< m
->fd_limit
; ++index
) {
675 t
= thread_array
[index
];
677 thread_array
[index
] = NULL
;
681 XFREE(MTYPE_THREAD_POLL
, thread_array
);
685 * thread_master_free_unused
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
692 void thread_master_free_unused(struct thread_master
*m
)
694 frr_with_mutex (&m
->mtx
) {
696 while ((t
= thread_list_pop(&m
->unuse
)))
701 /* Stop thread scheduler. */
702 void thread_master_free(struct thread_master
*m
)
706 frr_with_mutex (&masters_mtx
) {
707 listnode_delete(masters
, m
);
708 if (masters
->count
== 0) {
709 list_delete(&masters
);
713 thread_array_free(m
, m
->read
);
714 thread_array_free(m
, m
->write
);
715 while ((t
= thread_timer_list_pop(&m
->timer
)))
717 thread_list_free(m
, &m
->event
);
718 thread_list_free(m
, &m
->ready
);
719 thread_list_free(m
, &m
->unuse
);
720 pthread_mutex_destroy(&m
->mtx
);
721 pthread_cond_destroy(&m
->cancel_cond
);
722 close(m
->io_pipe
[0]);
723 close(m
->io_pipe
[1]);
724 list_delete(&m
->cancel_req
);
725 m
->cancel_req
= NULL
;
727 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
728 hash_free(m
->cpu_record
);
729 m
->cpu_record
= NULL
;
731 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
732 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
733 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
734 XFREE(MTYPE_THREAD_MASTER
, m
);
737 /* Return remain time in milliseconds. */
738 unsigned long thread_timer_remain_msec(struct thread
*thread
)
742 if (!thread_is_scheduled(thread
))
745 frr_with_mutex (&thread
->mtx
) {
746 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
749 return remain
< 0 ? 0 : remain
;
752 /* Return remain time in seconds. */
753 unsigned long thread_timer_remain_second(struct thread
*thread
)
755 return thread_timer_remain_msec(thread
) / 1000LL;
758 struct timeval
thread_timer_remain(struct thread
*thread
)
760 struct timeval remain
;
761 frr_with_mutex (&thread
->mtx
) {
762 monotime_until(&thread
->u
.sands
, &remain
);
767 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
773 assert(buf_size
>= 8);
780 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
785 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
786 struct thread
*t_timer
)
789 time_hhmmss(buf
, buf_size
,
790 thread_timer_remain_second(t_timer
));
792 snprintf(buf
, buf_size
, "--:--:--");
797 /* Get new thread. */
798 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
799 void (*func
)(struct thread
*), void *arg
,
800 const struct xref_threadsched
*xref
)
802 struct thread
*thread
= thread_list_pop(&m
->unuse
);
803 struct cpu_thread_history tmp
;
806 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
807 /* mutex only needs to be initialized at struct creation. */
808 pthread_mutex_init(&thread
->mtx
, NULL
);
813 thread
->add_type
= type
;
816 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
818 thread
->ignore_timer_late
= false;
821 * So if the passed in funcname is not what we have
822 * stored that means the thread->hist needs to be
823 * updated. We keep the last one around in unused
824 * under the assumption that we are probably
825 * going to immediately allocate the same
827 * This hopefully saves us some serious
830 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
831 || thread
->func
!= func
) {
833 tmp
.funcname
= xref
->funcname
;
835 hash_get(m
->cpu_record
, &tmp
,
836 (void *(*)(void *))cpu_record_hash_alloc
);
838 thread
->hist
->total_active
++;
845 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
847 /* Update statistics. */
848 assert(master
->alloc
> 0);
851 /* Free allocated resources. */
852 pthread_mutex_destroy(&thread
->mtx
);
853 XFREE(MTYPE_THREAD
, thread
);
856 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
860 unsigned char trash
[64];
861 nfds_t count
= m
->handler
.copycount
;
864 * If timer_wait is null here, that means poll() should block
865 * indefinitely, unless the thread_master has overridden it by setting
866 * ->selectpoll_timeout.
868 * If the value is positive, it specifies the maximum number of
869 * milliseconds to wait. If the timeout is -1, it specifies that
870 * we should never wait and always return immediately even if no
871 * event is detected. If the value is zero, the behavior is default.
875 /* number of file descriptors with events */
878 if (timer_wait
!= NULL
879 && m
->selectpoll_timeout
== 0) // use the default value
880 timeout
= (timer_wait
->tv_sec
* 1000)
881 + (timer_wait
->tv_usec
/ 1000);
882 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
883 timeout
= m
->selectpoll_timeout
;
884 else if (m
->selectpoll_timeout
885 < 0) // effect a poll (return immediately)
888 zlog_tls_buffer_flush();
890 rcu_assert_read_unlocked();
892 /* add poll pipe poker */
893 assert(count
+ 1 < m
->handler
.pfdsize
);
894 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
895 m
->handler
.copy
[count
].events
= POLLIN
;
896 m
->handler
.copy
[count
].revents
= 0x00;
898 /* We need to deal with a signal-handling race here: we
899 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
900 * that may arrive just before we enter poll(). We will block the
901 * key signals, then check whether any have arrived - if so, we return
902 * before calling poll(). If not, we'll re-enable the signals
903 * in the ppoll() call.
906 sigemptyset(&origsigs
);
907 if (m
->handle_signals
) {
908 /* Main pthread that handles the app signals */
909 if (frr_sigevent_check(&origsigs
)) {
910 /* Signal to process - restore signal mask and return */
911 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
917 /* Don't make any changes for the non-main pthreads */
918 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
921 #if defined(HAVE_PPOLL)
922 struct timespec ts
, *tsp
;
925 ts
.tv_sec
= timeout
/ 1000;
926 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
931 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
932 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
934 /* Not ideal - there is a race after we restore the signal mask */
935 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
936 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
941 if (num
< 0 && errno
== EINTR
)
944 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
945 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
953 /* Add new read thread. */
954 void _thread_add_read_write(const struct xref_threadsched
*xref
,
955 struct thread_master
*m
,
956 void (*func
)(struct thread
*), void *arg
, int fd
,
957 struct thread
**t_ptr
)
959 int dir
= xref
->thread_type
;
960 struct thread
*thread
= NULL
;
961 struct thread
**thread_array
;
963 if (dir
== THREAD_READ
)
964 frrtrace(9, frr_libfrr
, schedule_read
, m
,
965 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
966 t_ptr
, fd
, 0, arg
, 0);
968 frrtrace(9, frr_libfrr
, schedule_write
, m
,
969 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
970 t_ptr
, fd
, 0, arg
, 0);
973 if (fd
>= m
->fd_limit
)
974 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
976 frr_with_mutex (&m
->mtx
) {
978 // thread is already scheduled; don't reschedule
981 /* default to a new pollfd */
982 nfds_t queuepos
= m
->handler
.pfdcount
;
984 if (dir
== THREAD_READ
)
985 thread_array
= m
->read
;
987 thread_array
= m
->write
;
989 /* if we already have a pollfd for our file descriptor, find and
991 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
992 if (m
->handler
.pfds
[i
].fd
== fd
) {
997 * What happens if we have a thread already
998 * created for this event?
1000 if (thread_array
[fd
])
1001 assert(!"Thread already scheduled for file descriptor");
1006 /* make sure we have room for this fd + pipe poker fd */
1007 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1009 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1011 m
->handler
.pfds
[queuepos
].fd
= fd
;
1012 m
->handler
.pfds
[queuepos
].events
|=
1013 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
1015 if (queuepos
== m
->handler
.pfdcount
)
1016 m
->handler
.pfdcount
++;
1019 frr_with_mutex (&thread
->mtx
) {
1021 thread_array
[thread
->u
.fd
] = thread
;
1026 thread
->ref
= t_ptr
;
1034 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1035 struct thread_master
*m
,
1036 void (*func
)(struct thread
*), void *arg
,
1037 struct timeval
*time_relative
,
1038 struct thread
**t_ptr
)
1040 struct thread
*thread
;
1045 assert(time_relative
);
1047 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1048 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1049 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1051 /* Compute expiration/deadline time. */
1053 timeradd(&t
, time_relative
, &t
);
1055 frr_with_mutex (&m
->mtx
) {
1056 if (t_ptr
&& *t_ptr
)
1057 /* thread is already scheduled; don't reschedule */
1060 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1062 frr_with_mutex (&thread
->mtx
) {
1063 thread
->u
.sands
= t
;
1064 thread_timer_list_add(&m
->timer
, thread
);
1067 thread
->ref
= t_ptr
;
1071 /* The timer list is sorted - if this new timer
1072 * might change the time we'll wait for, give the pthread
1073 * a chance to re-compute.
1075 if (thread_timer_list_first(&m
->timer
) == thread
)
1078 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1081 EC_LIB_TIMER_TOO_LONG
,
1082 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1087 /* Add timer event thread. */
1088 void _thread_add_timer(const struct xref_threadsched
*xref
,
1089 struct thread_master
*m
, void (*func
)(struct thread
*),
1090 void *arg
, long timer
, struct thread
**t_ptr
)
1092 struct timeval trel
;
1096 trel
.tv_sec
= timer
;
1099 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1102 /* Add timer event thread with "millisecond" resolution */
1103 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1104 struct thread_master
*m
,
1105 void (*func
)(struct thread
*), void *arg
,
1106 long timer
, struct thread
**t_ptr
)
1108 struct timeval trel
;
1112 trel
.tv_sec
= timer
/ 1000;
1113 trel
.tv_usec
= 1000 * (timer
% 1000);
1115 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1118 /* Add timer event thread with "timeval" resolution */
1119 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1120 struct thread_master
*m
,
1121 void (*func
)(struct thread
*), void *arg
,
1122 struct timeval
*tv
, struct thread
**t_ptr
)
1124 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1127 /* Add simple event thread. */
1128 void _thread_add_event(const struct xref_threadsched
*xref
,
1129 struct thread_master
*m
, void (*func
)(struct thread
*),
1130 void *arg
, int val
, struct thread
**t_ptr
)
1132 struct thread
*thread
= NULL
;
1134 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1135 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1136 t_ptr
, 0, val
, arg
, 0);
1140 frr_with_mutex (&m
->mtx
) {
1141 if (t_ptr
&& *t_ptr
)
1142 /* thread is already scheduled; don't reschedule */
1145 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1146 frr_with_mutex (&thread
->mtx
) {
1147 thread
->u
.val
= val
;
1148 thread_list_add_tail(&m
->event
, thread
);
1153 thread
->ref
= t_ptr
;
1160 /* Thread cancellation ------------------------------------------------------ */
1163 * NOT's out the .events field of pollfd corresponding to the given file
1164 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1166 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1167 * implementation for details.
1171 * @param state the event to cancel. One or more (OR'd together) of the
1176 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1181 /* find the index of corresponding pollfd */
1184 /* Cancel POLLHUP too just in case some bozo set it */
1187 /* Some callers know the index of the pfd already */
1188 if (idx_hint
>= 0) {
1192 /* Have to look for the fd in the pfd array */
1193 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1194 if (master
->handler
.pfds
[i
].fd
== fd
) {
1202 "[!] Received cancellation request for nonexistent rw job");
1203 zlog_debug("[!] threadmaster: %s | fd: %d",
1204 master
->name
? master
->name
: "", fd
);
1208 /* NOT out event. */
1209 master
->handler
.pfds
[i
].events
&= ~(state
);
1211 /* If all events are canceled, delete / resize the pollfd array. */
1212 if (master
->handler
.pfds
[i
].events
== 0) {
1213 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1214 (master
->handler
.pfdcount
- i
- 1)
1215 * sizeof(struct pollfd
));
1216 master
->handler
.pfdcount
--;
1217 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1218 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1221 /* If we have the same pollfd in the copy, perform the same operations,
1222 * otherwise return. */
1223 if (i
>= master
->handler
.copycount
)
1226 master
->handler
.copy
[i
].events
&= ~(state
);
1228 if (master
->handler
.copy
[i
].events
== 0) {
1229 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1230 (master
->handler
.copycount
- i
- 1)
1231 * sizeof(struct pollfd
));
1232 master
->handler
.copycount
--;
1233 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1234 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1239 * Process task cancellation given a task argument: iterate through the
1240 * various lists of tasks, looking for any that match the argument.
1242 static void cancel_arg_helper(struct thread_master
*master
,
1243 const struct cancel_req
*cr
)
1250 /* We're only processing arg-based cancellations here. */
1251 if (cr
->eventobj
== NULL
)
1254 /* First process the ready lists. */
1255 frr_each_safe(thread_list
, &master
->event
, t
) {
1256 if (t
->arg
!= cr
->eventobj
)
1258 thread_list_del(&master
->event
, t
);
1261 thread_add_unuse(master
, t
);
1264 frr_each_safe(thread_list
, &master
->ready
, t
) {
1265 if (t
->arg
!= cr
->eventobj
)
1267 thread_list_del(&master
->ready
, t
);
1270 thread_add_unuse(master
, t
);
1273 /* If requested, stop here and ignore io and timers */
1274 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1277 /* Check the io tasks */
1278 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1279 pfd
= master
->handler
.pfds
+ i
;
1281 if (pfd
->events
& POLLIN
)
1282 t
= master
->read
[pfd
->fd
];
1284 t
= master
->write
[pfd
->fd
];
1286 if (t
&& t
->arg
== cr
->eventobj
) {
1289 /* Found a match to cancel: clean up fd arrays */
1290 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1292 /* Clean up thread arrays */
1293 master
->read
[fd
] = NULL
;
1294 master
->write
[fd
] = NULL
;
1296 /* Clear caller's ref */
1300 thread_add_unuse(master
, t
);
1302 /* Don't increment 'i' since the cancellation will have
1303 * removed the entry from the pfd array
1309 /* Check the timer tasks */
1310 t
= thread_timer_list_first(&master
->timer
);
1312 struct thread
*t_next
;
1314 t_next
= thread_timer_list_next(&master
->timer
, t
);
1316 if (t
->arg
== cr
->eventobj
) {
1317 thread_timer_list_del(&master
->timer
, t
);
1320 thread_add_unuse(master
, t
);
1328 * Process cancellation requests.
1330 * This may only be run from the pthread which owns the thread_master.
1332 * @param master the thread master to process
1333 * @REQUIRE master->mtx
1335 static void do_thread_cancel(struct thread_master
*master
)
1337 struct thread_list_head
*list
= NULL
;
1338 struct thread
**thread_array
= NULL
;
1339 struct thread
*thread
;
1340 struct cancel_req
*cr
;
1341 struct listnode
*ln
;
1343 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1345 * If this is an event object cancellation, search
1346 * through task lists deleting any tasks which have the
1347 * specified argument - use this handy helper function.
1350 cancel_arg_helper(master
, cr
);
1355 * The pointer varies depending on whether the cancellation
1356 * request was made asynchronously or not. If it was, we
1357 * need to check whether the thread even exists anymore
1358 * before cancelling it.
1360 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1366 thread_array
= NULL
;
1368 /* Determine the appropriate queue to cancel the thread from */
1369 switch (thread
->type
) {
1371 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1372 thread_array
= master
->read
;
1375 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1376 thread_array
= master
->write
;
1379 thread_timer_list_del(&master
->timer
, thread
);
1382 list
= &master
->event
;
1385 list
= &master
->ready
;
1393 thread_list_del(list
, thread
);
1394 } else if (thread_array
) {
1395 thread_array
[thread
->u
.fd
] = NULL
;
1399 *thread
->ref
= NULL
;
1401 thread_add_unuse(thread
->master
, thread
);
1404 /* Delete and free all cancellation requests */
1405 if (master
->cancel_req
)
1406 list_delete_all_node(master
->cancel_req
);
1408 /* Wake up any threads which may be blocked in thread_cancel_async() */
1409 master
->canceled
= true;
1410 pthread_cond_broadcast(&master
->cancel_cond
);
1414 * Helper function used for multiple flavors of arg-based cancellation.
1416 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1418 struct cancel_req
*cr
;
1420 assert(m
->owner
== pthread_self());
1422 /* Only worth anything if caller supplies an arg. */
1426 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1430 frr_with_mutex (&m
->mtx
) {
1432 listnode_add(m
->cancel_req
, cr
);
1433 do_thread_cancel(m
);
1438 * Cancel any events which have the specified argument.
1442 * @param m the thread_master to cancel from
1443 * @param arg the argument passed when creating the event
1445 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1447 cancel_event_helper(master
, arg
, 0);
1451 * Cancel ready tasks with an arg matching 'arg'
1455 * @param m the thread_master to cancel from
1456 * @param arg the argument passed when creating the event
1458 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1461 /* Only cancel ready/event tasks */
1462 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1466 * Cancel a specific task.
1470 * @param thread task to cancel
1472 void thread_cancel(struct thread
**thread
)
1474 struct thread_master
*master
;
1476 if (thread
== NULL
|| *thread
== NULL
)
1479 master
= (*thread
)->master
;
1481 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1482 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1483 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1484 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1486 assert(master
->owner
== pthread_self());
1488 frr_with_mutex (&master
->mtx
) {
1489 struct cancel_req
*cr
=
1490 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1491 cr
->thread
= *thread
;
1492 listnode_add(master
->cancel_req
, cr
);
1493 do_thread_cancel(master
);
1500 * Asynchronous cancellation.
1502 * Called with either a struct thread ** or void * to an event argument,
1503 * this function posts the correct cancellation request and blocks until it is
1506 * If the thread is currently running, execution blocks until it completes.
1508 * The last two parameters are mutually exclusive, i.e. if you pass one the
1509 * other must be NULL.
1511 * When the cancellation procedure executes on the target thread_master, the
1512 * thread * provided is checked for nullity. If it is null, the thread is
1513 * assumed to no longer exist and the cancellation request is a no-op. Thus
1514 * users of this API must pass a back-reference when scheduling the original
1519 * @param master the thread master with the relevant event / task
1520 * @param thread pointer to thread to cancel
1521 * @param eventobj the event
1523 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1526 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1528 if (thread
&& *thread
)
1529 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1530 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1531 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1532 (*thread
)->u
.val
, (*thread
)->arg
,
1533 (*thread
)->u
.sands
.tv_sec
);
1535 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1536 0, NULL
, 0, 0, eventobj
, 0);
1538 assert(master
->owner
!= pthread_self());
1540 frr_with_mutex (&master
->mtx
) {
1541 master
->canceled
= false;
1544 struct cancel_req
*cr
=
1545 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1546 cr
->threadref
= thread
;
1547 listnode_add(master
->cancel_req
, cr
);
1548 } else if (eventobj
) {
1549 struct cancel_req
*cr
=
1550 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1551 cr
->eventobj
= eventobj
;
1552 listnode_add(master
->cancel_req
, cr
);
1556 while (!master
->canceled
)
1557 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1563 /* ------------------------------------------------------------------------- */
1565 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1566 struct timeval
*timer_val
)
1568 if (!thread_timer_list_count(timers
))
1571 struct thread
*next_timer
= thread_timer_list_first(timers
);
1572 monotime_until(&next_timer
->u
.sands
, timer_val
);
1576 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1577 struct thread
*fetch
)
1580 thread_add_unuse(m
, thread
);
1584 static int thread_process_io_helper(struct thread_master
*m
,
1585 struct thread
*thread
, short state
,
1586 short actual_state
, int pos
)
1588 struct thread
**thread_array
;
1591 * poll() clears the .events field, but the pollfd array we
1592 * pass to poll() is a copy of the one used to schedule threads.
1593 * We need to synchronize state between the two here by applying
1594 * the same changes poll() made on the copy of the "real" pollfd
1597 * This cleans up a possible infinite loop where we refuse
1598 * to respond to a poll event but poll is insistent that
1601 m
->handler
.pfds
[pos
].events
&= ~(state
);
1604 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1605 flog_err(EC_LIB_NO_THREAD
,
1606 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1607 m
->handler
.pfds
[pos
].fd
, actual_state
);
1611 if (thread
->type
== THREAD_READ
)
1612 thread_array
= m
->read
;
1614 thread_array
= m
->write
;
1616 thread_array
[thread
->u
.fd
] = NULL
;
1617 thread_list_add_tail(&m
->ready
, thread
);
1618 thread
->type
= THREAD_READY
;
1624 * Process I/O events.
1626 * Walks through file descriptor array looking for those pollfds whose .revents
1627 * field has something interesting. Deletes any invalid file descriptors.
1629 * @param m the thread master
1630 * @param num the number of active file descriptors (return value of poll())
1632 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1634 unsigned int ready
= 0;
1635 struct pollfd
*pfds
= m
->handler
.copy
;
1637 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1638 /* no event for current fd? immediately continue */
1639 if (pfds
[i
].revents
== 0)
1645 * Unless someone has called thread_cancel from another
1646 * pthread, the only thing that could have changed in
1647 * m->handler.pfds while we were asleep is the .events
1648 * field in a given pollfd. Barring thread_cancel() that
1649 * value should be a superset of the values we have in our
1650 * copy, so there's no need to update it. Similarily,
1651 * barring deletion, the fd should still be a valid index
1652 * into the master's pfds.
1654 * We are including POLLERR here to do a READ event
1655 * this is because the read should fail and the
1656 * read function should handle it appropriately
1658 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1659 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1660 pfds
[i
].revents
, i
);
1662 if (pfds
[i
].revents
& POLLOUT
)
1663 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1664 POLLOUT
, pfds
[i
].revents
, i
);
1666 /* if one of our file descriptors is garbage, remove the same
1668 * both pfds + update sizes and index */
1669 if (pfds
[i
].revents
& POLLNVAL
) {
1670 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1671 (m
->handler
.pfdcount
- i
- 1)
1672 * sizeof(struct pollfd
));
1673 m
->handler
.pfdcount
--;
1674 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1675 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1677 memmove(pfds
+ i
, pfds
+ i
+ 1,
1678 (m
->handler
.copycount
- i
- 1)
1679 * sizeof(struct pollfd
));
1680 m
->handler
.copycount
--;
1681 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1682 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1689 /* Add all timers that have popped to the ready list. */
1690 static unsigned int thread_process_timers(struct thread_master
*m
,
1691 struct timeval
*timenow
)
1693 struct timeval prev
= *timenow
;
1694 bool displayed
= false;
1695 struct thread
*thread
;
1696 unsigned int ready
= 0;
1698 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1699 if (timercmp(timenow
, &thread
->u
.sands
, <))
1701 prev
= thread
->u
.sands
;
1704 * If the timer would have popped 4 seconds in the
1705 * past then we are in a situation where we are
1706 * really getting behind on handling of events.
1707 * Let's log it and do the right thing with it.
1709 if (timercmp(timenow
, &prev
, >)) {
1710 atomic_fetch_add_explicit(
1711 &thread
->hist
->total_starv_warn
, 1,
1712 memory_order_seq_cst
);
1713 if (!displayed
&& !thread
->ignore_timer_late
) {
1715 EC_LIB_STARVE_THREAD
,
1716 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1722 thread_timer_list_pop(&m
->timer
);
1723 thread
->type
= THREAD_READY
;
1724 thread_list_add_tail(&m
->ready
, thread
);
1731 /* process a list en masse, e.g. for event thread lists */
1732 static unsigned int thread_process(struct thread_list_head
*list
)
1734 struct thread
*thread
;
1735 unsigned int ready
= 0;
1737 while ((thread
= thread_list_pop(list
))) {
1738 thread
->type
= THREAD_READY
;
1739 thread_list_add_tail(&thread
->master
->ready
, thread
);
1746 /* Fetch next ready thread. */
1747 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1749 struct thread
*thread
= NULL
;
1751 struct timeval zerotime
= {0, 0};
1753 struct timeval
*tw
= NULL
;
1754 bool eintr_p
= false;
1758 /* Handle signals if any */
1759 if (m
->handle_signals
)
1760 frr_sigevent_process();
1762 pthread_mutex_lock(&m
->mtx
);
1764 /* Process any pending cancellation requests */
1765 do_thread_cancel(m
);
1768 * Attempt to flush ready queue before going into poll().
1769 * This is performance-critical. Think twice before modifying.
1771 if ((thread
= thread_list_pop(&m
->ready
))) {
1772 fetch
= thread_run(m
, thread
, fetch
);
1775 pthread_mutex_unlock(&m
->mtx
);
1776 if (!m
->ready_run_loop
)
1777 GETRUSAGE(&m
->last_getrusage
);
1778 m
->ready_run_loop
= true;
1782 m
->ready_run_loop
= false;
1783 /* otherwise, tick through scheduling sequence */
1786 * Post events to ready queue. This must come before the
1787 * following block since events should occur immediately
1789 thread_process(&m
->event
);
1792 * If there are no tasks on the ready queue, we will poll()
1793 * until a timer expires or we receive I/O, whichever comes
1794 * first. The strategy for doing this is:
1796 * - If there are events pending, set the poll() timeout to zero
1797 * - If there are no events pending, but there are timers
1798 * pending, set the timeout to the smallest remaining time on
1800 * - If there are neither timers nor events pending, but there
1801 * are file descriptors pending, block indefinitely in poll()
1802 * - If nothing is pending, it's time for the application to die
1804 * In every case except the last, we need to hit poll() at least
1805 * once per loop to avoid starvation by events
1807 if (!thread_list_count(&m
->ready
))
1808 tw
= thread_timer_wait(&m
->timer
, &tv
);
1810 if (thread_list_count(&m
->ready
) ||
1811 (tw
&& !timercmp(tw
, &zerotime
, >)))
1814 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1815 pthread_mutex_unlock(&m
->mtx
);
1821 * Copy pollfd array + # active pollfds in it. Not necessary to
1822 * copy the array size as this is fixed.
1824 m
->handler
.copycount
= m
->handler
.pfdcount
;
1825 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1826 m
->handler
.copycount
* sizeof(struct pollfd
));
1828 pthread_mutex_unlock(&m
->mtx
);
1831 num
= fd_poll(m
, tw
, &eintr_p
);
1833 pthread_mutex_lock(&m
->mtx
);
1835 /* Handle any errors received in poll() */
1838 pthread_mutex_unlock(&m
->mtx
);
1839 /* loop around to signal handler */
1844 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1845 safe_strerror(errno
));
1846 pthread_mutex_unlock(&m
->mtx
);
1851 /* Post timers to ready queue. */
1853 thread_process_timers(m
, &now
);
1855 /* Post I/O to ready queue. */
1857 thread_process_io(m
, num
);
1859 pthread_mutex_unlock(&m
->mtx
);
1861 } while (!thread
&& m
->spin
);
1866 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1868 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1869 + (a
.tv_usec
- b
.tv_usec
));
1872 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1873 unsigned long *cputime
)
1875 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1879 * FreeBSD appears to have an issue when calling clock_gettime
1880 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1881 * occassionally the now time will be before the start time.
1882 * This is not good and FRR is ending up with CPU HOG's
1883 * when the subtraction wraps to very large numbers
1885 * What we are going to do here is cheat a little bit
1886 * and notice that this is a problem and just correct
1887 * it so that it is impossible to happen
1889 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1890 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1891 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1892 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1893 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1894 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1897 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1898 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1900 /* This is 'user + sys' time. */
1901 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1902 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1904 return timeval_elapsed(now
->real
, start
->real
);
1907 /* We should aim to yield after yield milliseconds, which defaults
1908 to THREAD_YIELD_TIME_SLOT .
1909 Note: we are using real (wall clock) time for this calculation.
1910 It could be argued that CPU time may make more sense in certain
1911 contexts. The things to consider are whether the thread may have
1912 blocked (in which case wall time increases, but CPU time does not),
1913 or whether the system is heavily loaded with other processes competing
1914 for CPU time. On balance, wall clock time seems to make sense.
1915 Plus it has the added benefit that gettimeofday should be faster
1916 than calling getrusage. */
1917 int thread_should_yield(struct thread
*thread
)
1920 frr_with_mutex (&thread
->mtx
) {
1921 result
= monotime_since(&thread
->real
, NULL
)
1922 > (int64_t)thread
->yield
;
1927 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1929 frr_with_mutex (&thread
->mtx
) {
1930 thread
->yield
= yield_time
;
1934 void thread_getrusage(RUSAGE_T
*r
)
1937 if (!cputime_enabled
) {
1938 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1942 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1943 /* not currently implemented in Linux's vDSO, but maybe at some point
1946 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1947 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1948 #if defined RUSAGE_THREAD
1949 #define FRR_RUSAGE RUSAGE_THREAD
1951 #define FRR_RUSAGE RUSAGE_SELF
1953 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1960 * This function will atomically update the thread's usage history. At present
1961 * this is the only spot where usage history is written. Nevertheless the code
1962 * has been written such that the introduction of writers in the future should
1963 * not need to update it provided the writers atomically perform only the
1964 * operations done here, i.e. updating the total and maximum times. In
1965 * particular, the maximum real and cpu times must be monotonically increasing
1966 * or this code is not correct.
1968 void thread_call(struct thread
*thread
)
1970 RUSAGE_T before
, after
;
1972 /* if the thread being called is the CLI, it may change cputime_enabled
1973 * ("service cputime-stats" command), which can result in nonsensical
1974 * and very confusing warnings
1976 bool cputime_enabled_here
= cputime_enabled
;
1978 if (thread
->master
->ready_run_loop
)
1979 before
= thread
->master
->last_getrusage
;
1983 thread
->real
= before
.real
;
1985 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
1986 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1987 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
1988 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
1990 pthread_setspecific(thread_current
, thread
);
1991 (*thread
->func
)(thread
);
1992 pthread_setspecific(thread_current
, NULL
);
1995 thread
->master
->last_getrusage
= after
;
1997 unsigned long walltime
, cputime
;
2000 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
2002 /* update walltime */
2003 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2004 memory_order_seq_cst
);
2005 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2006 memory_order_seq_cst
);
2007 while (exp
< walltime
2008 && !atomic_compare_exchange_weak_explicit(
2009 &thread
->hist
->real
.max
, &exp
, walltime
,
2010 memory_order_seq_cst
, memory_order_seq_cst
))
2013 if (cputime_enabled_here
&& cputime_enabled
) {
2014 /* update cputime */
2015 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2016 memory_order_seq_cst
);
2017 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2018 memory_order_seq_cst
);
2019 while (exp
< cputime
2020 && !atomic_compare_exchange_weak_explicit(
2021 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2022 memory_order_seq_cst
, memory_order_seq_cst
))
2026 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2027 memory_order_seq_cst
);
2028 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2029 memory_order_seq_cst
);
2031 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2032 && cputime
> cputime_threshold
) {
2034 * We have a CPU Hog on our hands. The time FRR has spent
2035 * doing actual work (not sleeping) is greater than 5 seconds.
2036 * Whinge about it now, so we're aware this is yet another task
2039 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2040 1, memory_order_seq_cst
);
2042 EC_LIB_SLOW_THREAD_CPU
,
2043 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2044 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2045 walltime
/ 1000, cputime
/ 1000);
2047 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2049 * The runtime for a task is greater than 5 seconds, but the
2050 * cpu time is under 5 seconds. Let's whine about this because
2051 * this could imply some sort of scheduling issue.
2053 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2054 1, memory_order_seq_cst
);
2056 EC_LIB_SLOW_THREAD_WALL
,
2057 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2058 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2059 walltime
/ 1000, cputime
/ 1000);
2063 /* Execute thread */
2064 void _thread_execute(const struct xref_threadsched
*xref
,
2065 struct thread_master
*m
, void (*func
)(struct thread
*),
2068 struct thread
*thread
;
2070 /* Get or allocate new thread to execute. */
2071 frr_with_mutex (&m
->mtx
) {
2072 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2074 /* Set its event value. */
2075 frr_with_mutex (&thread
->mtx
) {
2076 thread
->add_type
= THREAD_EXECUTE
;
2077 thread
->u
.val
= val
;
2078 thread
->ref
= &thread
;
2082 /* Execute thread doing all accounting. */
2083 thread_call(thread
);
2085 /* Give back or free thread. */
2086 thread_add_unuse(m
, thread
);
2089 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2090 void debug_signals(const sigset_t
*sigs
)
2097 * We're only looking at the non-realtime signals here, so we need
2098 * some limit value. Platform differences mean at some point we just
2099 * need to pick a reasonable value.
2101 #if defined SIGRTMIN
2102 # define LAST_SIGNAL SIGRTMIN
2104 # define LAST_SIGNAL 32
2109 sigemptyset(&tmpsigs
);
2110 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2117 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2120 if (sigismember(sigs
, i
) > 0) {
2122 strlcat(buf
, ",", sizeof(buf
));
2123 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2124 strlcat(buf
, tmp
, sizeof(buf
));
2130 snprintf(buf
, sizeof(buf
), "<none>");
2132 zlog_debug("%s: %s", __func__
, buf
);
2135 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2136 const struct thread
*thread
)
2138 static const char * const types
[] = {
2139 [THREAD_READ
] = "read",
2140 [THREAD_WRITE
] = "write",
2141 [THREAD_TIMER
] = "timer",
2142 [THREAD_EVENT
] = "event",
2143 [THREAD_READY
] = "ready",
2144 [THREAD_UNUSED
] = "unused",
2145 [THREAD_EXECUTE
] = "exec",
2151 return bputs(buf
, "{(thread *)NULL}");
2153 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2155 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2156 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2158 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2160 switch (thread
->type
) {
2163 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2167 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2171 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2172 thread
->xref
->funcname
, thread
->xref
->dest
,
2173 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2177 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2178 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2181 const struct thread
*thread
= ptr
;
2182 struct timespec remain
= {};
2184 if (ea
->fmt
[0] == 'D') {
2186 return printfrr_thread_dbg(buf
, ea
, thread
);
2190 /* need to jump over time formatting flag characters in the
2191 * input format string, i.e. adjust ea->fmt!
2193 printfrr_time(buf
, ea
, &remain
,
2194 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2195 return bputch(buf
, '-');
2198 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2199 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);