1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
9 #include <sys/resource.h>
20 #include "frratomic.h"
21 #include "frr_pthread.h"
22 #include "lib_errors.h"
23 #include "libfrr_trace.h"
26 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
27 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
28 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
29 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
31 DECLARE_LIST(thread_list
, struct event
, threaditem
);
37 struct event
**threadref
;
40 /* Flags for task cancellation */
41 #define THREAD_CANCEL_FLAG_READY 0x01
43 static int thread_timer_cmp(const struct event
*a
, const struct event
*b
)
45 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
47 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
49 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
51 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
56 DECLARE_HEAP(thread_timer_list
, struct event
, timeritem
, thread_timer_cmp
);
58 #if defined(__APPLE__)
59 #include <mach/mach.h>
60 #include <mach/mach_time.h>
65 const unsigned char wakebyte = 0x01; \
66 write(m->io_pipe[1], &wakebyte, 1); \
69 /* control variable for initializer */
70 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
71 pthread_key_t thread_current
;
73 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
74 static struct list
*masters
;
76 static void thread_free(struct thread_master
*master
, struct event
*thread
);
78 #ifndef EXCLUDE_CPU_TIME
79 #define EXCLUDE_CPU_TIME 0
81 #ifndef CONSUMED_TIME_CHECK
82 #define CONSUMED_TIME_CHECK 0
85 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
86 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
87 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
89 /* CLI start ---------------------------------------------------------------- */
90 #include "lib/event_clippy.c"
92 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
94 int size
= sizeof(a
->func
);
96 return jhash(&a
->func
, size
, 0);
99 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
100 const struct cpu_thread_history
*b
)
102 return a
->func
== b
->func
;
105 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
107 struct cpu_thread_history
*new;
108 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
110 new->funcname
= a
->funcname
;
114 static void cpu_record_hash_free(void *a
)
116 struct cpu_thread_history
*hist
= a
;
118 XFREE(MTYPE_THREAD_STATS
, hist
);
121 static void vty_out_cpu_thread_history(struct vty
*vty
,
122 struct cpu_thread_history
*a
)
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
126 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
127 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
128 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
129 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
130 vty_out(vty
, " %c%c%c%c%c %s\n",
131 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
132 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
133 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
134 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
135 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
138 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
140 struct cpu_thread_history
*totals
= args
[0];
141 struct cpu_thread_history copy
;
142 struct vty
*vty
= args
[1];
143 uint8_t *filter
= args
[2];
145 struct cpu_thread_history
*a
= bucket
->data
;
148 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
150 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
151 copy
.total_cpu_warn
=
152 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
153 copy
.total_wall_warn
=
154 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
155 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
156 memory_order_seq_cst
);
158 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
159 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
161 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
163 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
164 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
165 copy
.funcname
= a
->funcname
;
167 if (!(copy
.types
& *filter
))
170 vty_out_cpu_thread_history(vty
, ©
);
171 totals
->total_active
+= copy
.total_active
;
172 totals
->total_calls
+= copy
.total_calls
;
173 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
174 totals
->total_wall_warn
+= copy
.total_wall_warn
;
175 totals
->total_starv_warn
+= copy
.total_starv_warn
;
176 totals
->real
.total
+= copy
.real
.total
;
177 if (totals
->real
.max
< copy
.real
.max
)
178 totals
->real
.max
= copy
.real
.max
;
179 totals
->cpu
.total
+= copy
.cpu
.total
;
180 if (totals
->cpu
.max
< copy
.cpu
.max
)
181 totals
->cpu
.max
= copy
.cpu
.max
;
184 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
186 struct cpu_thread_history tmp
;
187 void *args
[3] = {&tmp
, vty
, &filter
};
188 struct thread_master
*m
;
191 if (!cputime_enabled
)
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
199 memset(&tmp
, 0, sizeof(tmp
));
200 tmp
.funcname
= "TOTAL";
203 frr_with_mutex (&masters_mtx
) {
204 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
205 const char *name
= m
->name
? m
->name
: "main";
207 char underline
[strlen(name
) + 1];
208 memset(underline
, '-', sizeof(underline
));
209 underline
[sizeof(underline
) - 1] = '\0';
212 vty_out(vty
, "Showing statistics for pthread %s\n",
214 vty_out(vty
, "-------------------------------%s\n",
216 vty_out(vty
, "%30s %18s %18s\n", "",
217 "CPU (user+system):", "Real (wall-clock):");
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty
, " Avg uSec Max uSecs");
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
224 if (m
->cpu_record
->count
)
227 (void (*)(struct hash_bucket
*,
228 void *))cpu_record_hash_print
,
231 vty_out(vty
, "No data to display yet.\n");
238 vty_out(vty
, "Total thread statistics\n");
239 vty_out(vty
, "-------------------------\n");
240 vty_out(vty
, "%30s %18s %18s\n", "",
241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
243 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
244 vty_out(vty
, " Type Thread\n");
246 if (tmp
.total_calls
> 0)
247 vty_out_cpu_thread_history(vty
, &tmp
);
250 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
252 uint8_t *filter
= args
[0];
253 struct hash
*cpu_record
= args
[1];
255 struct cpu_thread_history
*a
= bucket
->data
;
257 if (!(a
->types
& *filter
))
260 hash_release(cpu_record
, bucket
->data
);
263 static void cpu_record_clear(uint8_t filter
)
265 uint8_t *tmp
= &filter
;
266 struct thread_master
*m
;
269 frr_with_mutex (&masters_mtx
) {
270 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
271 frr_with_mutex (&m
->mtx
) {
272 void *args
[2] = {tmp
, m
->cpu_record
};
275 (void (*)(struct hash_bucket
*,
276 void *))cpu_record_hash_clear
,
283 static uint8_t parse_filter(const char *filterstr
)
288 while (filterstr
[i
] != '\0') {
289 switch (filterstr
[i
]) {
292 filter
|= (1 << THREAD_READ
);
296 filter
|= (1 << THREAD_WRITE
);
300 filter
|= (1 << THREAD_TIMER
);
304 filter
|= (1 << THREAD_EVENT
);
308 filter
|= (1 << THREAD_EXECUTE
);
318 DEFUN_NOSH (show_thread_cpu
,
320 "show thread cpu [FILTER]",
322 "Thread information\n"
324 "Display filter (rwtex)\n")
326 uint8_t filter
= (uint8_t)-1U;
329 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
330 filter
= parse_filter(argv
[idx
]->arg
);
333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
339 cpu_record_print(vty
, filter
);
343 DEFPY (service_cputime_stats
,
344 service_cputime_stats_cmd
,
345 "[no] service cputime-stats",
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
350 cputime_enabled
= !no
;
354 DEFPY (service_cputime_warning
,
355 service_cputime_warning_cmd
,
356 "[no] service cputime-warning (1-4294967295)",
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
363 cputime_threshold
= 0;
365 cputime_threshold
= cputime_warning
* 1000;
369 ALIAS (service_cputime_warning
,
370 no_service_cputime_warning_cmd
,
371 "no service cputime-warning",
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
376 DEFPY (service_walltime_warning
,
377 service_walltime_warning_cmd
,
378 "[no] service walltime-warning (1-4294967295)",
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
385 walltime_threshold
= 0;
387 walltime_threshold
= walltime_warning
* 1000;
391 ALIAS (service_walltime_warning
,
392 no_service_walltime_warning_cmd
,
393 "no service walltime-warning",
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
398 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
400 const char *name
= m
->name
? m
->name
: "main";
401 char underline
[strlen(name
) + 1];
402 struct event
*thread
;
405 memset(underline
, '-', sizeof(underline
));
406 underline
[sizeof(underline
) - 1] = '\0';
408 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
409 vty_out(vty
, "----------------------%s\n", underline
);
410 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
412 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
413 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
414 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
415 m
->handler
.pfds
[i
].revents
);
417 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
418 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
421 vty_out(vty
, "ERROR ");
423 vty_out(vty
, "%s ", thread
->xref
->funcname
);
427 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
428 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
431 vty_out(vty
, "ERROR\n");
433 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
439 DEFUN_NOSH (show_thread_poll
,
440 show_thread_poll_cmd
,
443 "Thread information\n"
444 "Show poll FD's and information\n")
446 struct listnode
*node
;
447 struct thread_master
*m
;
449 frr_with_mutex (&masters_mtx
) {
450 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
451 show_thread_poll_helper(vty
, m
);
459 DEFUN (clear_thread_cpu
,
460 clear_thread_cpu_cmd
,
461 "clear thread cpu [FILTER]",
462 "Clear stored data in all pthreads\n"
463 "Thread information\n"
465 "Display filter (rwtexb)\n")
467 uint8_t filter
= (uint8_t)-1U;
470 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
471 filter
= parse_filter(argv
[idx
]->arg
);
474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
480 cpu_record_clear(filter
);
484 static void show_thread_timers_helper(struct vty
*vty
, struct thread_master
*m
)
486 const char *name
= m
->name
? m
->name
: "main";
487 char underline
[strlen(name
) + 1];
488 struct event
*thread
;
490 memset(underline
, '-', sizeof(underline
));
491 underline
[sizeof(underline
) - 1] = '\0';
493 vty_out(vty
, "\nShowing timers for %s\n", name
);
494 vty_out(vty
, "-------------------%s\n", underline
);
496 frr_each (thread_timer_list
, &m
->timer
, thread
) {
497 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
501 DEFPY_NOSH (show_thread_timers
,
502 show_thread_timers_cmd
,
503 "show thread timers",
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
508 struct listnode
*node
;
509 struct thread_master
*m
;
511 frr_with_mutex (&masters_mtx
) {
512 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
513 show_thread_timers_helper(vty
, m
);
519 void thread_cmd_init(void)
521 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
522 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
523 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
525 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
526 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
527 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
528 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
529 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
531 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
533 /* CLI end ------------------------------------------------------------------ */
536 static void cancelreq_del(void *cr
)
538 XFREE(MTYPE_TMP
, cr
);
541 /* initializer, only ever called once */
542 static void initializer(void)
544 pthread_key_create(&thread_current
, NULL
);
547 struct thread_master
*thread_master_create(const char *name
)
549 struct thread_master
*rv
;
552 pthread_once(&init_once
, &initializer
);
554 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv
->mtx
, NULL
);
558 pthread_cond_init(&rv
->cancel_cond
, NULL
);
561 name
= name
? name
: "default";
562 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
564 /* Initialize I/O task data structures */
566 /* Use configured limit if present, ulimit otherwise. */
567 rv
->fd_limit
= frr_get_fd_limit();
568 if (rv
->fd_limit
== 0) {
569 getrlimit(RLIMIT_NOFILE
, &limit
);
570 rv
->fd_limit
= (int)limit
.rlim_cur
;
573 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
574 sizeof(struct event
*) * rv
->fd_limit
);
576 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
577 sizeof(struct event
*) * rv
->fd_limit
);
579 char tmhashname
[strlen(name
) + 32];
580 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
582 rv
->cpu_record
= hash_create_size(
583 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
584 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
587 thread_list_init(&rv
->event
);
588 thread_list_init(&rv
->ready
);
589 thread_list_init(&rv
->unuse
);
590 thread_timer_list_init(&rv
->timer
);
592 /* Initialize thread_fetch() settings */
594 rv
->handle_signals
= true;
596 /* Set pthread owner, should be updated by actual owner */
597 rv
->owner
= pthread_self();
598 rv
->cancel_req
= list_new();
599 rv
->cancel_req
->del
= cancelreq_del
;
602 /* Initialize pipe poker */
604 set_nonblocking(rv
->io_pipe
[0]);
605 set_nonblocking(rv
->io_pipe
[1]);
607 /* Initialize data structures for poll() */
608 rv
->handler
.pfdsize
= rv
->fd_limit
;
609 rv
->handler
.pfdcount
= 0;
610 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
611 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
612 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
613 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
615 /* add to list of threadmasters */
616 frr_with_mutex (&masters_mtx
) {
618 masters
= list_new();
620 listnode_add(masters
, rv
);
626 void thread_master_set_name(struct thread_master
*master
, const char *name
)
628 frr_with_mutex (&master
->mtx
) {
629 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
630 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
634 #define THREAD_UNUSED_DEPTH 10
636 /* Move thread to unuse list. */
637 static void thread_add_unuse(struct thread_master
*m
, struct event
*thread
)
639 pthread_mutex_t mtxc
= thread
->mtx
;
641 assert(m
!= NULL
&& thread
!= NULL
);
643 thread
->hist
->total_active
--;
644 memset(thread
, 0, sizeof(struct event
));
645 thread
->type
= THREAD_UNUSED
;
647 /* Restore the thread mutex context. */
650 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
651 thread_list_add_tail(&m
->unuse
, thread
);
655 thread_free(m
, thread
);
658 /* Free all unused thread. */
659 static void thread_list_free(struct thread_master
*m
,
660 struct thread_list_head
*list
)
664 while ((t
= thread_list_pop(list
)))
668 static void thread_array_free(struct thread_master
*m
,
669 struct event
**thread_array
)
674 for (index
= 0; index
< m
->fd_limit
; ++index
) {
675 t
= thread_array
[index
];
677 thread_array
[index
] = NULL
;
681 XFREE(MTYPE_THREAD_POLL
, thread_array
);
685 * thread_master_free_unused
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
692 void thread_master_free_unused(struct thread_master
*m
)
694 frr_with_mutex (&m
->mtx
) {
696 while ((t
= thread_list_pop(&m
->unuse
)))
701 /* Stop thread scheduler. */
702 void thread_master_free(struct thread_master
*m
)
706 frr_with_mutex (&masters_mtx
) {
707 listnode_delete(masters
, m
);
708 if (masters
->count
== 0) {
709 list_delete(&masters
);
713 thread_array_free(m
, m
->read
);
714 thread_array_free(m
, m
->write
);
715 while ((t
= thread_timer_list_pop(&m
->timer
)))
717 thread_list_free(m
, &m
->event
);
718 thread_list_free(m
, &m
->ready
);
719 thread_list_free(m
, &m
->unuse
);
720 pthread_mutex_destroy(&m
->mtx
);
721 pthread_cond_destroy(&m
->cancel_cond
);
722 close(m
->io_pipe
[0]);
723 close(m
->io_pipe
[1]);
724 list_delete(&m
->cancel_req
);
725 m
->cancel_req
= NULL
;
727 hash_clean_and_free(&m
->cpu_record
, cpu_record_hash_free
);
729 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
730 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
731 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
732 XFREE(MTYPE_THREAD_MASTER
, m
);
735 /* Return remain time in milliseconds. */
736 unsigned long thread_timer_remain_msec(struct event
*thread
)
740 if (!thread_is_scheduled(thread
))
743 frr_with_mutex (&thread
->mtx
) {
744 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
747 return remain
< 0 ? 0 : remain
;
750 /* Return remain time in seconds. */
751 unsigned long thread_timer_remain_second(struct event
*thread
)
753 return thread_timer_remain_msec(thread
) / 1000LL;
756 struct timeval
thread_timer_remain(struct event
*thread
)
758 struct timeval remain
;
759 frr_with_mutex (&thread
->mtx
) {
760 monotime_until(&thread
->u
.sands
, &remain
);
765 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
771 assert(buf_size
>= 8);
778 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
783 char *thread_timer_to_hhmmss(char *buf
, int buf_size
, struct event
*t_timer
)
786 time_hhmmss(buf
, buf_size
,
787 thread_timer_remain_second(t_timer
));
789 snprintf(buf
, buf_size
, "--:--:--");
794 /* Get new thread. */
795 static struct event
*thread_get(struct thread_master
*m
, uint8_t type
,
796 void (*func
)(struct event
*), void *arg
,
797 const struct xref_threadsched
*xref
)
799 struct event
*thread
= thread_list_pop(&m
->unuse
);
800 struct cpu_thread_history tmp
;
803 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct event
));
804 /* mutex only needs to be initialized at struct creation. */
805 pthread_mutex_init(&thread
->mtx
, NULL
);
810 thread
->add_type
= type
;
813 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
815 thread
->ignore_timer_late
= false;
818 * So if the passed in funcname is not what we have
819 * stored that means the thread->hist needs to be
820 * updated. We keep the last one around in unused
821 * under the assumption that we are probably
822 * going to immediately allocate the same
824 * This hopefully saves us some serious
827 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
828 || thread
->func
!= func
) {
830 tmp
.funcname
= xref
->funcname
;
832 hash_get(m
->cpu_record
, &tmp
,
833 (void *(*)(void *))cpu_record_hash_alloc
);
835 thread
->hist
->total_active
++;
842 static void thread_free(struct thread_master
*master
, struct event
*thread
)
844 /* Update statistics. */
845 assert(master
->alloc
> 0);
848 /* Free allocated resources. */
849 pthread_mutex_destroy(&thread
->mtx
);
850 XFREE(MTYPE_THREAD
, thread
);
853 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
857 unsigned char trash
[64];
858 nfds_t count
= m
->handler
.copycount
;
861 * If timer_wait is null here, that means poll() should block
862 * indefinitely, unless the thread_master has overridden it by setting
863 * ->selectpoll_timeout.
865 * If the value is positive, it specifies the maximum number of
866 * milliseconds to wait. If the timeout is -1, it specifies that
867 * we should never wait and always return immediately even if no
868 * event is detected. If the value is zero, the behavior is default.
872 /* number of file descriptors with events */
875 if (timer_wait
!= NULL
876 && m
->selectpoll_timeout
== 0) // use the default value
877 timeout
= (timer_wait
->tv_sec
* 1000)
878 + (timer_wait
->tv_usec
/ 1000);
879 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
880 timeout
= m
->selectpoll_timeout
;
881 else if (m
->selectpoll_timeout
882 < 0) // effect a poll (return immediately)
885 zlog_tls_buffer_flush();
887 rcu_assert_read_unlocked();
889 /* add poll pipe poker */
890 assert(count
+ 1 < m
->handler
.pfdsize
);
891 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
892 m
->handler
.copy
[count
].events
= POLLIN
;
893 m
->handler
.copy
[count
].revents
= 0x00;
895 /* We need to deal with a signal-handling race here: we
896 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
897 * that may arrive just before we enter poll(). We will block the
898 * key signals, then check whether any have arrived - if so, we return
899 * before calling poll(). If not, we'll re-enable the signals
900 * in the ppoll() call.
903 sigemptyset(&origsigs
);
904 if (m
->handle_signals
) {
905 /* Main pthread that handles the app signals */
906 if (frr_sigevent_check(&origsigs
)) {
907 /* Signal to process - restore signal mask and return */
908 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
914 /* Don't make any changes for the non-main pthreads */
915 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
918 #if defined(HAVE_PPOLL)
919 struct timespec ts
, *tsp
;
922 ts
.tv_sec
= timeout
/ 1000;
923 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
928 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
929 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
931 /* Not ideal - there is a race after we restore the signal mask */
932 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
933 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
938 if (num
< 0 && errno
== EINTR
)
941 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
942 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
950 /* Add new read thread. */
951 void _event_add_read_write(const struct xref_threadsched
*xref
,
952 struct thread_master
*m
,
953 void (*func
)(struct event
*), void *arg
, int fd
,
954 struct event
**t_ptr
)
956 int dir
= xref
->thread_type
;
957 struct event
*thread
= NULL
;
958 struct event
**thread_array
;
960 if (dir
== THREAD_READ
)
961 frrtrace(9, frr_libfrr
, schedule_read
, m
,
962 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
963 t_ptr
, fd
, 0, arg
, 0);
965 frrtrace(9, frr_libfrr
, schedule_write
, m
,
966 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
967 t_ptr
, fd
, 0, arg
, 0);
970 if (fd
>= m
->fd_limit
)
971 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
973 frr_with_mutex (&m
->mtx
) {
975 // thread is already scheduled; don't reschedule
978 /* default to a new pollfd */
979 nfds_t queuepos
= m
->handler
.pfdcount
;
981 if (dir
== THREAD_READ
)
982 thread_array
= m
->read
;
984 thread_array
= m
->write
;
986 /* if we already have a pollfd for our file descriptor, find and
988 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
989 if (m
->handler
.pfds
[i
].fd
== fd
) {
994 * What happens if we have a thread already
995 * created for this event?
997 if (thread_array
[fd
])
998 assert(!"Thread already scheduled for file descriptor");
1003 /* make sure we have room for this fd + pipe poker fd */
1004 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1006 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1008 m
->handler
.pfds
[queuepos
].fd
= fd
;
1009 m
->handler
.pfds
[queuepos
].events
|=
1010 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
1012 if (queuepos
== m
->handler
.pfdcount
)
1013 m
->handler
.pfdcount
++;
1016 frr_with_mutex (&thread
->mtx
) {
1018 thread_array
[thread
->u
.fd
] = thread
;
1023 thread
->ref
= t_ptr
;
1031 static void _event_add_timer_timeval(const struct xref_threadsched
*xref
,
1032 struct thread_master
*m
,
1033 void (*func
)(struct event
*), void *arg
,
1034 struct timeval
*time_relative
,
1035 struct event
**t_ptr
)
1037 struct event
*thread
;
1042 assert(time_relative
);
1044 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1045 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1046 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1048 /* Compute expiration/deadline time. */
1050 timeradd(&t
, time_relative
, &t
);
1052 frr_with_mutex (&m
->mtx
) {
1053 if (t_ptr
&& *t_ptr
)
1054 /* thread is already scheduled; don't reschedule */
1057 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1059 frr_with_mutex (&thread
->mtx
) {
1060 thread
->u
.sands
= t
;
1061 thread_timer_list_add(&m
->timer
, thread
);
1064 thread
->ref
= t_ptr
;
1068 /* The timer list is sorted - if this new timer
1069 * might change the time we'll wait for, give the pthread
1070 * a chance to re-compute.
1072 if (thread_timer_list_first(&m
->timer
) == thread
)
1075 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1076 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1078 EC_LIB_TIMER_TOO_LONG
,
1079 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1084 /* Add timer event thread. */
1085 void _event_add_timer(const struct xref_threadsched
*xref
,
1086 struct thread_master
*m
, void (*func
)(struct event
*),
1087 void *arg
, long timer
, struct event
**t_ptr
)
1089 struct timeval trel
;
1093 trel
.tv_sec
= timer
;
1096 _event_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1099 /* Add timer event thread with "millisecond" resolution */
1100 void _event_add_timer_msec(const struct xref_threadsched
*xref
,
1101 struct thread_master
*m
,
1102 void (*func
)(struct event
*), void *arg
, long timer
,
1103 struct event
**t_ptr
)
1105 struct timeval trel
;
1109 trel
.tv_sec
= timer
/ 1000;
1110 trel
.tv_usec
= 1000 * (timer
% 1000);
1112 _event_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1115 /* Add timer event thread with "timeval" resolution */
1116 void _event_add_timer_tv(const struct xref_threadsched
*xref
,
1117 struct thread_master
*m
, void (*func
)(struct event
*),
1118 void *arg
, struct timeval
*tv
, struct event
**t_ptr
)
1120 _event_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1123 /* Add simple event thread. */
1124 void _event_add_event(const struct xref_threadsched
*xref
,
1125 struct thread_master
*m
, void (*func
)(struct event
*),
1126 void *arg
, int val
, struct event
**t_ptr
)
1128 struct event
*thread
= NULL
;
1130 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1131 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1132 t_ptr
, 0, val
, arg
, 0);
1136 frr_with_mutex (&m
->mtx
) {
1137 if (t_ptr
&& *t_ptr
)
1138 /* thread is already scheduled; don't reschedule */
1141 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1142 frr_with_mutex (&thread
->mtx
) {
1143 thread
->u
.val
= val
;
1144 thread_list_add_tail(&m
->event
, thread
);
1149 thread
->ref
= t_ptr
;
1156 /* Thread cancellation ------------------------------------------------------ */
1159 * NOT's out the .events field of pollfd corresponding to the given file
1160 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1162 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1163 * implementation for details.
1167 * @param state the event to cancel. One or more (OR'd together) of the
1172 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1177 /* find the index of corresponding pollfd */
1180 /* Cancel POLLHUP too just in case some bozo set it */
1183 /* Some callers know the index of the pfd already */
1184 if (idx_hint
>= 0) {
1188 /* Have to look for the fd in the pfd array */
1189 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1190 if (master
->handler
.pfds
[i
].fd
== fd
) {
1198 "[!] Received cancellation request for nonexistent rw job");
1199 zlog_debug("[!] threadmaster: %s | fd: %d",
1200 master
->name
? master
->name
: "", fd
);
1204 /* NOT out event. */
1205 master
->handler
.pfds
[i
].events
&= ~(state
);
1207 /* If all events are canceled, delete / resize the pollfd array. */
1208 if (master
->handler
.pfds
[i
].events
== 0) {
1209 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1210 (master
->handler
.pfdcount
- i
- 1)
1211 * sizeof(struct pollfd
));
1212 master
->handler
.pfdcount
--;
1213 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1214 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1217 /* If we have the same pollfd in the copy, perform the same operations,
1218 * otherwise return. */
1219 if (i
>= master
->handler
.copycount
)
1222 master
->handler
.copy
[i
].events
&= ~(state
);
1224 if (master
->handler
.copy
[i
].events
== 0) {
1225 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1226 (master
->handler
.copycount
- i
- 1)
1227 * sizeof(struct pollfd
));
1228 master
->handler
.copycount
--;
1229 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1230 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1235 * Process task cancellation given a task argument: iterate through the
1236 * various lists of tasks, looking for any that match the argument.
1238 static void cancel_arg_helper(struct thread_master
*master
,
1239 const struct cancel_req
*cr
)
1246 /* We're only processing arg-based cancellations here. */
1247 if (cr
->eventobj
== NULL
)
1250 /* First process the ready lists. */
1251 frr_each_safe(thread_list
, &master
->event
, t
) {
1252 if (t
->arg
!= cr
->eventobj
)
1254 thread_list_del(&master
->event
, t
);
1257 thread_add_unuse(master
, t
);
1260 frr_each_safe(thread_list
, &master
->ready
, t
) {
1261 if (t
->arg
!= cr
->eventobj
)
1263 thread_list_del(&master
->ready
, t
);
1266 thread_add_unuse(master
, t
);
1269 /* If requested, stop here and ignore io and timers */
1270 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1273 /* Check the io tasks */
1274 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1275 pfd
= master
->handler
.pfds
+ i
;
1277 if (pfd
->events
& POLLIN
)
1278 t
= master
->read
[pfd
->fd
];
1280 t
= master
->write
[pfd
->fd
];
1282 if (t
&& t
->arg
== cr
->eventobj
) {
1285 /* Found a match to cancel: clean up fd arrays */
1286 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1288 /* Clean up thread arrays */
1289 master
->read
[fd
] = NULL
;
1290 master
->write
[fd
] = NULL
;
1292 /* Clear caller's ref */
1296 thread_add_unuse(master
, t
);
1298 /* Don't increment 'i' since the cancellation will have
1299 * removed the entry from the pfd array
1305 /* Check the timer tasks */
1306 t
= thread_timer_list_first(&master
->timer
);
1308 struct event
*t_next
;
1310 t_next
= thread_timer_list_next(&master
->timer
, t
);
1312 if (t
->arg
== cr
->eventobj
) {
1313 thread_timer_list_del(&master
->timer
, t
);
1316 thread_add_unuse(master
, t
);
1324 * Process cancellation requests.
1326 * This may only be run from the pthread which owns the thread_master.
1328 * @param master the thread master to process
1329 * @REQUIRE master->mtx
1331 static void do_thread_cancel(struct thread_master
*master
)
1333 struct thread_list_head
*list
= NULL
;
1334 struct event
**thread_array
= NULL
;
1335 struct event
*thread
;
1336 struct cancel_req
*cr
;
1337 struct listnode
*ln
;
1339 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1341 * If this is an event object cancellation, search
1342 * through task lists deleting any tasks which have the
1343 * specified argument - use this handy helper function.
1346 cancel_arg_helper(master
, cr
);
1351 * The pointer varies depending on whether the cancellation
1352 * request was made asynchronously or not. If it was, we
1353 * need to check whether the thread even exists anymore
1354 * before cancelling it.
1356 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1362 thread_array
= NULL
;
1364 /* Determine the appropriate queue to cancel the thread from */
1365 switch (thread
->type
) {
1367 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1368 thread_array
= master
->read
;
1371 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1372 thread_array
= master
->write
;
1375 thread_timer_list_del(&master
->timer
, thread
);
1378 list
= &master
->event
;
1381 list
= &master
->ready
;
1389 thread_list_del(list
, thread
);
1390 } else if (thread_array
) {
1391 thread_array
[thread
->u
.fd
] = NULL
;
1395 *thread
->ref
= NULL
;
1397 thread_add_unuse(thread
->master
, thread
);
1400 /* Delete and free all cancellation requests */
1401 if (master
->cancel_req
)
1402 list_delete_all_node(master
->cancel_req
);
1404 /* Wake up any threads which may be blocked in thread_cancel_async() */
1405 master
->canceled
= true;
1406 pthread_cond_broadcast(&master
->cancel_cond
);
1410 * Helper function used for multiple flavors of arg-based cancellation.
1412 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1414 struct cancel_req
*cr
;
1416 assert(m
->owner
== pthread_self());
1418 /* Only worth anything if caller supplies an arg. */
1422 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1426 frr_with_mutex (&m
->mtx
) {
1428 listnode_add(m
->cancel_req
, cr
);
1429 do_thread_cancel(m
);
1434 * Cancel any events which have the specified argument.
1438 * @param m the thread_master to cancel from
1439 * @param arg the argument passed when creating the event
1441 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1443 cancel_event_helper(master
, arg
, 0);
1447 * Cancel ready tasks with an arg matching 'arg'
1451 * @param m the thread_master to cancel from
1452 * @param arg the argument passed when creating the event
1454 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1457 /* Only cancel ready/event tasks */
1458 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1462 * Cancel a specific task.
1466 * @param thread task to cancel
1468 void thread_cancel(struct event
**thread
)
1470 struct thread_master
*master
;
1472 if (thread
== NULL
|| *thread
== NULL
)
1475 master
= (*thread
)->master
;
1477 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1478 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1479 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1480 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1482 assert(master
->owner
== pthread_self());
1484 frr_with_mutex (&master
->mtx
) {
1485 struct cancel_req
*cr
=
1486 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1487 cr
->thread
= *thread
;
1488 listnode_add(master
->cancel_req
, cr
);
1489 do_thread_cancel(master
);
1496 * Asynchronous cancellation.
1498 * Called with either a struct event ** or void * to an event argument,
1499 * this function posts the correct cancellation request and blocks until it is
1502 * If the thread is currently running, execution blocks until it completes.
1504 * The last two parameters are mutually exclusive, i.e. if you pass one the
1505 * other must be NULL.
1507 * When the cancellation procedure executes on the target thread_master, the
1508 * thread * provided is checked for nullity. If it is null, the thread is
1509 * assumed to no longer exist and the cancellation request is a no-op. Thus
1510 * users of this API must pass a back-reference when scheduling the original
1515 * @param master the thread master with the relevant event / task
1516 * @param thread pointer to thread to cancel
1517 * @param eventobj the event
1519 void thread_cancel_async(struct thread_master
*master
, struct event
**thread
,
1522 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1524 if (thread
&& *thread
)
1525 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1526 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1527 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1528 (*thread
)->u
.val
, (*thread
)->arg
,
1529 (*thread
)->u
.sands
.tv_sec
);
1531 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1532 0, NULL
, 0, 0, eventobj
, 0);
1534 assert(master
->owner
!= pthread_self());
1536 frr_with_mutex (&master
->mtx
) {
1537 master
->canceled
= false;
1540 struct cancel_req
*cr
=
1541 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1542 cr
->threadref
= thread
;
1543 listnode_add(master
->cancel_req
, cr
);
1544 } else if (eventobj
) {
1545 struct cancel_req
*cr
=
1546 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1547 cr
->eventobj
= eventobj
;
1548 listnode_add(master
->cancel_req
, cr
);
1552 while (!master
->canceled
)
1553 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1559 /* ------------------------------------------------------------------------- */
1561 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1562 struct timeval
*timer_val
)
1564 if (!thread_timer_list_count(timers
))
1567 struct event
*next_timer
= thread_timer_list_first(timers
);
1568 monotime_until(&next_timer
->u
.sands
, timer_val
);
1572 static struct event
*thread_run(struct thread_master
*m
, struct event
*thread
,
1573 struct event
*fetch
)
1576 thread_add_unuse(m
, thread
);
1580 static int thread_process_io_helper(struct thread_master
*m
,
1581 struct event
*thread
, short state
,
1582 short actual_state
, int pos
)
1584 struct event
**thread_array
;
1587 * poll() clears the .events field, but the pollfd array we
1588 * pass to poll() is a copy of the one used to schedule threads.
1589 * We need to synchronize state between the two here by applying
1590 * the same changes poll() made on the copy of the "real" pollfd
1593 * This cleans up a possible infinite loop where we refuse
1594 * to respond to a poll event but poll is insistent that
1597 m
->handler
.pfds
[pos
].events
&= ~(state
);
1600 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1601 flog_err(EC_LIB_NO_THREAD
,
1602 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1603 m
->handler
.pfds
[pos
].fd
, actual_state
);
1607 if (thread
->type
== THREAD_READ
)
1608 thread_array
= m
->read
;
1610 thread_array
= m
->write
;
1612 thread_array
[thread
->u
.fd
] = NULL
;
1613 thread_list_add_tail(&m
->ready
, thread
);
1614 thread
->type
= THREAD_READY
;
1620 * Process I/O events.
1622 * Walks through file descriptor array looking for those pollfds whose .revents
1623 * field has something interesting. Deletes any invalid file descriptors.
1625 * @param m the thread master
1626 * @param num the number of active file descriptors (return value of poll())
1628 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1630 unsigned int ready
= 0;
1631 struct pollfd
*pfds
= m
->handler
.copy
;
1633 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1634 /* no event for current fd? immediately continue */
1635 if (pfds
[i
].revents
== 0)
1641 * Unless someone has called thread_cancel from another
1642 * pthread, the only thing that could have changed in
1643 * m->handler.pfds while we were asleep is the .events
1644 * field in a given pollfd. Barring thread_cancel() that
1645 * value should be a superset of the values we have in our
1646 * copy, so there's no need to update it. Similarily,
1647 * barring deletion, the fd should still be a valid index
1648 * into the master's pfds.
1650 * We are including POLLERR here to do a READ event
1651 * this is because the read should fail and the
1652 * read function should handle it appropriately
1654 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1655 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1656 pfds
[i
].revents
, i
);
1658 if (pfds
[i
].revents
& POLLOUT
)
1659 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1660 POLLOUT
, pfds
[i
].revents
, i
);
1662 /* if one of our file descriptors is garbage, remove the same
1664 * both pfds + update sizes and index */
1665 if (pfds
[i
].revents
& POLLNVAL
) {
1666 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1667 (m
->handler
.pfdcount
- i
- 1)
1668 * sizeof(struct pollfd
));
1669 m
->handler
.pfdcount
--;
1670 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1671 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1673 memmove(pfds
+ i
, pfds
+ i
+ 1,
1674 (m
->handler
.copycount
- i
- 1)
1675 * sizeof(struct pollfd
));
1676 m
->handler
.copycount
--;
1677 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1678 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1685 /* Add all timers that have popped to the ready list. */
1686 static unsigned int thread_process_timers(struct thread_master
*m
,
1687 struct timeval
*timenow
)
1689 struct timeval prev
= *timenow
;
1690 bool displayed
= false;
1691 struct event
*thread
;
1692 unsigned int ready
= 0;
1694 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1695 if (timercmp(timenow
, &thread
->u
.sands
, <))
1697 prev
= thread
->u
.sands
;
1700 * If the timer would have popped 4 seconds in the
1701 * past then we are in a situation where we are
1702 * really getting behind on handling of events.
1703 * Let's log it and do the right thing with it.
1705 if (timercmp(timenow
, &prev
, >)) {
1706 atomic_fetch_add_explicit(
1707 &thread
->hist
->total_starv_warn
, 1,
1708 memory_order_seq_cst
);
1709 if (!displayed
&& !thread
->ignore_timer_late
) {
1711 EC_LIB_STARVE_THREAD
,
1712 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1718 thread_timer_list_pop(&m
->timer
);
1719 thread
->type
= THREAD_READY
;
1720 thread_list_add_tail(&m
->ready
, thread
);
1727 /* process a list en masse, e.g. for event thread lists */
1728 static unsigned int thread_process(struct thread_list_head
*list
)
1730 struct event
*thread
;
1731 unsigned int ready
= 0;
1733 while ((thread
= thread_list_pop(list
))) {
1734 thread
->type
= THREAD_READY
;
1735 thread_list_add_tail(&thread
->master
->ready
, thread
);
1742 /* Fetch next ready thread. */
1743 struct event
*thread_fetch(struct thread_master
*m
, struct event
*fetch
)
1745 struct event
*thread
= NULL
;
1747 struct timeval zerotime
= {0, 0};
1749 struct timeval
*tw
= NULL
;
1750 bool eintr_p
= false;
1754 /* Handle signals if any */
1755 if (m
->handle_signals
)
1756 frr_sigevent_process();
1758 pthread_mutex_lock(&m
->mtx
);
1760 /* Process any pending cancellation requests */
1761 do_thread_cancel(m
);
1764 * Attempt to flush ready queue before going into poll().
1765 * This is performance-critical. Think twice before modifying.
1767 if ((thread
= thread_list_pop(&m
->ready
))) {
1768 fetch
= thread_run(m
, thread
, fetch
);
1771 pthread_mutex_unlock(&m
->mtx
);
1772 if (!m
->ready_run_loop
)
1773 GETRUSAGE(&m
->last_getrusage
);
1774 m
->ready_run_loop
= true;
1778 m
->ready_run_loop
= false;
1779 /* otherwise, tick through scheduling sequence */
1782 * Post events to ready queue. This must come before the
1783 * following block since events should occur immediately
1785 thread_process(&m
->event
);
1788 * If there are no tasks on the ready queue, we will poll()
1789 * until a timer expires or we receive I/O, whichever comes
1790 * first. The strategy for doing this is:
1792 * - If there are events pending, set the poll() timeout to zero
1793 * - If there are no events pending, but there are timers
1794 * pending, set the timeout to the smallest remaining time on
1796 * - If there are neither timers nor events pending, but there
1797 * are file descriptors pending, block indefinitely in poll()
1798 * - If nothing is pending, it's time for the application to die
1800 * In every case except the last, we need to hit poll() at least
1801 * once per loop to avoid starvation by events
1803 if (!thread_list_count(&m
->ready
))
1804 tw
= thread_timer_wait(&m
->timer
, &tv
);
1806 if (thread_list_count(&m
->ready
) ||
1807 (tw
&& !timercmp(tw
, &zerotime
, >)))
1810 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1811 pthread_mutex_unlock(&m
->mtx
);
1817 * Copy pollfd array + # active pollfds in it. Not necessary to
1818 * copy the array size as this is fixed.
1820 m
->handler
.copycount
= m
->handler
.pfdcount
;
1821 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1822 m
->handler
.copycount
* sizeof(struct pollfd
));
1824 pthread_mutex_unlock(&m
->mtx
);
1827 num
= fd_poll(m
, tw
, &eintr_p
);
1829 pthread_mutex_lock(&m
->mtx
);
1831 /* Handle any errors received in poll() */
1834 pthread_mutex_unlock(&m
->mtx
);
1835 /* loop around to signal handler */
1840 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1841 safe_strerror(errno
));
1842 pthread_mutex_unlock(&m
->mtx
);
1847 /* Post timers to ready queue. */
1849 thread_process_timers(m
, &now
);
1851 /* Post I/O to ready queue. */
1853 thread_process_io(m
, num
);
1855 pthread_mutex_unlock(&m
->mtx
);
1857 } while (!thread
&& m
->spin
);
1862 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1864 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1865 + (a
.tv_usec
- b
.tv_usec
));
1868 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1869 unsigned long *cputime
)
1871 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1875 * FreeBSD appears to have an issue when calling clock_gettime
1876 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1877 * occassionally the now time will be before the start time.
1878 * This is not good and FRR is ending up with CPU HOG's
1879 * when the subtraction wraps to very large numbers
1881 * What we are going to do here is cheat a little bit
1882 * and notice that this is a problem and just correct
1883 * it so that it is impossible to happen
1885 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1886 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1887 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1888 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1889 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1890 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1893 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1894 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1896 /* This is 'user + sys' time. */
1897 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1898 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1900 return timeval_elapsed(now
->real
, start
->real
);
1903 /* We should aim to yield after yield milliseconds, which defaults
1904 to THREAD_YIELD_TIME_SLOT .
1905 Note: we are using real (wall clock) time for this calculation.
1906 It could be argued that CPU time may make more sense in certain
1907 contexts. The things to consider are whether the thread may have
1908 blocked (in which case wall time increases, but CPU time does not),
1909 or whether the system is heavily loaded with other processes competing
1910 for CPU time. On balance, wall clock time seems to make sense.
1911 Plus it has the added benefit that gettimeofday should be faster
1912 than calling getrusage. */
1913 int thread_should_yield(struct event
*thread
)
1916 frr_with_mutex (&thread
->mtx
) {
1917 result
= monotime_since(&thread
->real
, NULL
)
1918 > (int64_t)thread
->yield
;
1923 void thread_set_yield_time(struct event
*thread
, unsigned long yield_time
)
1925 frr_with_mutex (&thread
->mtx
) {
1926 thread
->yield
= yield_time
;
1930 void thread_getrusage(RUSAGE_T
*r
)
1933 if (!cputime_enabled
) {
1934 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1938 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1939 /* not currently implemented in Linux's vDSO, but maybe at some point
1942 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1943 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1944 #if defined RUSAGE_THREAD
1945 #define FRR_RUSAGE RUSAGE_THREAD
1947 #define FRR_RUSAGE RUSAGE_SELF
1949 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1956 * This function will atomically update the thread's usage history. At present
1957 * this is the only spot where usage history is written. Nevertheless the code
1958 * has been written such that the introduction of writers in the future should
1959 * not need to update it provided the writers atomically perform only the
1960 * operations done here, i.e. updating the total and maximum times. In
1961 * particular, the maximum real and cpu times must be monotonically increasing
1962 * or this code is not correct.
1964 void thread_call(struct event
*thread
)
1966 RUSAGE_T before
, after
;
1968 /* if the thread being called is the CLI, it may change cputime_enabled
1969 * ("service cputime-stats" command), which can result in nonsensical
1970 * and very confusing warnings
1972 bool cputime_enabled_here
= cputime_enabled
;
1974 if (thread
->master
->ready_run_loop
)
1975 before
= thread
->master
->last_getrusage
;
1979 thread
->real
= before
.real
;
1981 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
1982 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1983 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
1984 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
1986 pthread_setspecific(thread_current
, thread
);
1987 (*thread
->func
)(thread
);
1988 pthread_setspecific(thread_current
, NULL
);
1991 thread
->master
->last_getrusage
= after
;
1993 unsigned long walltime
, cputime
;
1996 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
1998 /* update walltime */
1999 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2000 memory_order_seq_cst
);
2001 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2002 memory_order_seq_cst
);
2003 while (exp
< walltime
2004 && !atomic_compare_exchange_weak_explicit(
2005 &thread
->hist
->real
.max
, &exp
, walltime
,
2006 memory_order_seq_cst
, memory_order_seq_cst
))
2009 if (cputime_enabled_here
&& cputime_enabled
) {
2010 /* update cputime */
2011 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2012 memory_order_seq_cst
);
2013 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2014 memory_order_seq_cst
);
2015 while (exp
< cputime
2016 && !atomic_compare_exchange_weak_explicit(
2017 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2018 memory_order_seq_cst
, memory_order_seq_cst
))
2022 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2023 memory_order_seq_cst
);
2024 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2025 memory_order_seq_cst
);
2027 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2028 && cputime
> cputime_threshold
) {
2030 * We have a CPU Hog on our hands. The time FRR has spent
2031 * doing actual work (not sleeping) is greater than 5 seconds.
2032 * Whinge about it now, so we're aware this is yet another task
2035 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2036 1, memory_order_seq_cst
);
2038 EC_LIB_SLOW_THREAD_CPU
,
2039 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2040 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2041 walltime
/ 1000, cputime
/ 1000);
2043 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2045 * The runtime for a task is greater than 5 seconds, but the
2046 * cpu time is under 5 seconds. Let's whine about this because
2047 * this could imply some sort of scheduling issue.
2049 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2050 1, memory_order_seq_cst
);
2052 EC_LIB_SLOW_THREAD_WALL
,
2053 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2054 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2055 walltime
/ 1000, cputime
/ 1000);
2059 /* Execute thread */
2060 void _thread_execute(const struct xref_threadsched
*xref
,
2061 struct thread_master
*m
, void (*func
)(struct event
*),
2064 struct event
*thread
;
2066 /* Get or allocate new thread to execute. */
2067 frr_with_mutex (&m
->mtx
) {
2068 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2070 /* Set its event value. */
2071 frr_with_mutex (&thread
->mtx
) {
2072 thread
->add_type
= THREAD_EXECUTE
;
2073 thread
->u
.val
= val
;
2074 thread
->ref
= &thread
;
2078 /* Execute thread doing all accounting. */
2079 thread_call(thread
);
2081 /* Give back or free thread. */
2082 thread_add_unuse(m
, thread
);
2085 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2086 void debug_signals(const sigset_t
*sigs
)
2093 * We're only looking at the non-realtime signals here, so we need
2094 * some limit value. Platform differences mean at some point we just
2095 * need to pick a reasonable value.
2097 #if defined SIGRTMIN
2098 # define LAST_SIGNAL SIGRTMIN
2100 # define LAST_SIGNAL 32
2105 sigemptyset(&tmpsigs
);
2106 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2113 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2116 if (sigismember(sigs
, i
) > 0) {
2118 strlcat(buf
, ",", sizeof(buf
));
2119 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2120 strlcat(buf
, tmp
, sizeof(buf
));
2126 snprintf(buf
, sizeof(buf
), "<none>");
2128 zlog_debug("%s: %s", __func__
, buf
);
2131 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2132 const struct event
*thread
)
2134 static const char * const types
[] = {
2135 [THREAD_READ
] = "read",
2136 [THREAD_WRITE
] = "write",
2137 [THREAD_TIMER
] = "timer",
2138 [THREAD_EVENT
] = "event",
2139 [THREAD_READY
] = "ready",
2140 [THREAD_UNUSED
] = "unused",
2141 [THREAD_EXECUTE
] = "exec",
2147 return bputs(buf
, "{(thread *)NULL}");
2149 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2151 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2152 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2154 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2156 switch (thread
->type
) {
2159 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2163 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2167 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2168 thread
->xref
->funcname
, thread
->xref
->dest
,
2169 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2173 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2174 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2177 const struct event
*thread
= ptr
;
2178 struct timespec remain
= {};
2180 if (ea
->fmt
[0] == 'D') {
2182 return printfrr_thread_dbg(buf
, ea
, thread
);
2186 /* need to jump over time formatting flag characters in the
2187 * input format string, i.e. adjust ea->fmt!
2189 printfrr_time(buf
, ea
, &remain
,
2190 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2191 return bputch(buf
, '-');
2194 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2195 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);