1 /* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <sys/resource.h>
35 #include "frratomic.h"
36 #include "frr_pthread.h"
37 #include "lib_errors.h"
38 #include "libfrr_trace.h"
41 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
42 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
43 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
44 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
46 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
50 struct thread
*thread
;
52 struct thread
**threadref
;
55 /* Flags for task cancellation */
56 #define THREAD_CANCEL_FLAG_READY 0x01
58 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
60 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
62 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
64 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
66 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
71 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
73 #if defined(__APPLE__)
74 #include <mach/mach.h>
75 #include <mach/mach_time.h>
80 const unsigned char wakebyte = 0x01; \
81 write(m->io_pipe[1], &wakebyte, 1); \
84 /* control variable for initializer */
85 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
86 pthread_key_t thread_current
;
88 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
89 static struct list
*masters
;
91 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
93 #ifndef EXCLUDE_CPU_TIME
94 #define EXCLUDE_CPU_TIME 0
96 #ifndef CONSUMED_TIME_CHECK
97 #define CONSUMED_TIME_CHECK 0
100 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
101 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
102 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
104 /* CLI start ---------------------------------------------------------------- */
105 #ifndef VTYSH_EXTRACT_PL
106 #include "lib/thread_clippy.c"
109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
111 int size
= sizeof(a
->func
);
113 return jhash(&a
->func
, size
, 0);
116 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
117 const struct cpu_thread_history
*b
)
119 return a
->func
== b
->func
;
122 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
124 struct cpu_thread_history
*new;
125 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
127 new->funcname
= a
->funcname
;
131 static void cpu_record_hash_free(void *a
)
133 struct cpu_thread_history
*hist
= a
;
135 XFREE(MTYPE_THREAD_STATS
, hist
);
138 static void vty_out_cpu_thread_history(struct vty
*vty
,
139 struct cpu_thread_history
*a
)
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
143 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
144 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
145 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
146 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
147 vty_out(vty
, " %c%c%c%c%c %s\n",
148 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
149 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
150 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
151 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
152 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
155 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
157 struct cpu_thread_history
*totals
= args
[0];
158 struct cpu_thread_history copy
;
159 struct vty
*vty
= args
[1];
160 uint8_t *filter
= args
[2];
162 struct cpu_thread_history
*a
= bucket
->data
;
165 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
167 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
168 copy
.total_cpu_warn
=
169 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
170 copy
.total_wall_warn
=
171 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
172 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
173 memory_order_seq_cst
);
175 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
176 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
178 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
180 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
181 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
182 copy
.funcname
= a
->funcname
;
184 if (!(copy
.types
& *filter
))
187 vty_out_cpu_thread_history(vty
, ©
);
188 totals
->total_active
+= copy
.total_active
;
189 totals
->total_calls
+= copy
.total_calls
;
190 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
191 totals
->total_wall_warn
+= copy
.total_wall_warn
;
192 totals
->total_starv_warn
+= copy
.total_starv_warn
;
193 totals
->real
.total
+= copy
.real
.total
;
194 if (totals
->real
.max
< copy
.real
.max
)
195 totals
->real
.max
= copy
.real
.max
;
196 totals
->cpu
.total
+= copy
.cpu
.total
;
197 if (totals
->cpu
.max
< copy
.cpu
.max
)
198 totals
->cpu
.max
= copy
.cpu
.max
;
201 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
203 struct cpu_thread_history tmp
;
204 void *args
[3] = {&tmp
, vty
, &filter
};
205 struct thread_master
*m
;
208 if (!cputime_enabled
)
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
216 memset(&tmp
, 0, sizeof(tmp
));
217 tmp
.funcname
= "TOTAL";
220 frr_with_mutex(&masters_mtx
) {
221 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
222 const char *name
= m
->name
? m
->name
: "main";
224 char underline
[strlen(name
) + 1];
225 memset(underline
, '-', sizeof(underline
));
226 underline
[sizeof(underline
) - 1] = '\0';
229 vty_out(vty
, "Showing statistics for pthread %s\n",
231 vty_out(vty
, "-------------------------------%s\n",
233 vty_out(vty
, "%30s %18s %18s\n", "",
234 "CPU (user+system):", "Real (wall-clock):");
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty
, " Avg uSec Max uSecs");
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
241 if (m
->cpu_record
->count
)
244 (void (*)(struct hash_bucket
*,
245 void *))cpu_record_hash_print
,
248 vty_out(vty
, "No data to display yet.\n");
255 vty_out(vty
, "Total thread statistics\n");
256 vty_out(vty
, "-------------------------\n");
257 vty_out(vty
, "%30s %18s %18s\n", "",
258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
260 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
261 vty_out(vty
, " Type Thread\n");
263 if (tmp
.total_calls
> 0)
264 vty_out_cpu_thread_history(vty
, &tmp
);
267 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
269 uint8_t *filter
= args
[0];
270 struct hash
*cpu_record
= args
[1];
272 struct cpu_thread_history
*a
= bucket
->data
;
274 if (!(a
->types
& *filter
))
277 hash_release(cpu_record
, bucket
->data
);
280 static void cpu_record_clear(uint8_t filter
)
282 uint8_t *tmp
= &filter
;
283 struct thread_master
*m
;
286 frr_with_mutex(&masters_mtx
) {
287 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
288 frr_with_mutex(&m
->mtx
) {
289 void *args
[2] = {tmp
, m
->cpu_record
};
292 (void (*)(struct hash_bucket
*,
293 void *))cpu_record_hash_clear
,
300 static uint8_t parse_filter(const char *filterstr
)
305 while (filterstr
[i
] != '\0') {
306 switch (filterstr
[i
]) {
309 filter
|= (1 << THREAD_READ
);
313 filter
|= (1 << THREAD_WRITE
);
317 filter
|= (1 << THREAD_TIMER
);
321 filter
|= (1 << THREAD_EVENT
);
325 filter
|= (1 << THREAD_EXECUTE
);
335 DEFUN_NOSH (show_thread_cpu
,
337 "show thread cpu [FILTER]",
339 "Thread information\n"
341 "Display filter (rwtex)\n")
343 uint8_t filter
= (uint8_t)-1U;
346 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
347 filter
= parse_filter(argv
[idx
]->arg
);
350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
356 cpu_record_print(vty
, filter
);
360 DEFPY (service_cputime_stats
,
361 service_cputime_stats_cmd
,
362 "[no] service cputime-stats",
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
367 cputime_enabled
= !no
;
371 DEFPY (service_cputime_warning
,
372 service_cputime_warning_cmd
,
373 "[no] service cputime-warning (1-4294967295)",
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
380 cputime_threshold
= 0;
382 cputime_threshold
= cputime_warning
* 1000;
386 ALIAS (service_cputime_warning
,
387 no_service_cputime_warning_cmd
,
388 "no service cputime-warning",
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
393 DEFPY (service_walltime_warning
,
394 service_walltime_warning_cmd
,
395 "[no] service walltime-warning (1-4294967295)",
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
402 walltime_threshold
= 0;
404 walltime_threshold
= walltime_warning
* 1000;
408 ALIAS (service_walltime_warning
,
409 no_service_walltime_warning_cmd
,
410 "no service walltime-warning",
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
415 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
417 const char *name
= m
->name
? m
->name
: "main";
418 char underline
[strlen(name
) + 1];
419 struct thread
*thread
;
422 memset(underline
, '-', sizeof(underline
));
423 underline
[sizeof(underline
) - 1] = '\0';
425 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
426 vty_out(vty
, "----------------------%s\n", underline
);
427 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
429 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
430 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
431 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
432 m
->handler
.pfds
[i
].revents
);
434 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
435 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
438 vty_out(vty
, "ERROR ");
440 vty_out(vty
, "%s ", thread
->xref
->funcname
);
444 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
445 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
448 vty_out(vty
, "ERROR\n");
450 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
456 DEFUN_NOSH (show_thread_poll
,
457 show_thread_poll_cmd
,
460 "Thread information\n"
461 "Show poll FD's and information\n")
463 struct listnode
*node
;
464 struct thread_master
*m
;
466 frr_with_mutex(&masters_mtx
) {
467 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
468 show_thread_poll_helper(vty
, m
);
476 DEFUN (clear_thread_cpu
,
477 clear_thread_cpu_cmd
,
478 "clear thread cpu [FILTER]",
479 "Clear stored data in all pthreads\n"
480 "Thread information\n"
482 "Display filter (rwtexb)\n")
484 uint8_t filter
= (uint8_t)-1U;
487 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
488 filter
= parse_filter(argv
[idx
]->arg
);
491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
497 cpu_record_clear(filter
);
501 static void show_thread_timers_helper(struct vty
*vty
, struct thread_master
*m
)
503 const char *name
= m
->name
? m
->name
: "main";
504 char underline
[strlen(name
) + 1];
505 struct thread
*thread
;
507 memset(underline
, '-', sizeof(underline
));
508 underline
[sizeof(underline
) - 1] = '\0';
510 vty_out(vty
, "\nShowing timers for %s\n", name
);
511 vty_out(vty
, "-------------------%s\n", underline
);
513 frr_each (thread_timer_list
, &m
->timer
, thread
) {
514 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
518 DEFPY_NOSH (show_thread_timers
,
519 show_thread_timers_cmd
,
520 "show thread timers",
522 "Thread information\n"
523 "Show all timers and how long they have in the system\n")
525 struct listnode
*node
;
526 struct thread_master
*m
;
528 frr_with_mutex (&masters_mtx
) {
529 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
530 show_thread_timers_helper(vty
, m
);
536 void thread_cmd_init(void)
538 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
539 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
540 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
542 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
543 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
544 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
545 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
546 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
548 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
550 /* CLI end ------------------------------------------------------------------ */
553 static void cancelreq_del(void *cr
)
555 XFREE(MTYPE_TMP
, cr
);
558 /* initializer, only ever called once */
559 static void initializer(void)
561 pthread_key_create(&thread_current
, NULL
);
564 struct thread_master
*thread_master_create(const char *name
)
566 struct thread_master
*rv
;
569 pthread_once(&init_once
, &initializer
);
571 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
573 /* Initialize master mutex */
574 pthread_mutex_init(&rv
->mtx
, NULL
);
575 pthread_cond_init(&rv
->cancel_cond
, NULL
);
578 name
= name
? name
: "default";
579 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
581 /* Initialize I/O task data structures */
583 /* Use configured limit if present, ulimit otherwise. */
584 rv
->fd_limit
= frr_get_fd_limit();
585 if (rv
->fd_limit
== 0) {
586 getrlimit(RLIMIT_NOFILE
, &limit
);
587 rv
->fd_limit
= (int)limit
.rlim_cur
;
590 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
591 sizeof(struct thread
*) * rv
->fd_limit
);
593 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
594 sizeof(struct thread
*) * rv
->fd_limit
);
596 char tmhashname
[strlen(name
) + 32];
597 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
599 rv
->cpu_record
= hash_create_size(
600 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
601 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
604 thread_list_init(&rv
->event
);
605 thread_list_init(&rv
->ready
);
606 thread_list_init(&rv
->unuse
);
607 thread_timer_list_init(&rv
->timer
);
609 /* Initialize thread_fetch() settings */
611 rv
->handle_signals
= true;
613 /* Set pthread owner, should be updated by actual owner */
614 rv
->owner
= pthread_self();
615 rv
->cancel_req
= list_new();
616 rv
->cancel_req
->del
= cancelreq_del
;
619 /* Initialize pipe poker */
621 set_nonblocking(rv
->io_pipe
[0]);
622 set_nonblocking(rv
->io_pipe
[1]);
624 /* Initialize data structures for poll() */
625 rv
->handler
.pfdsize
= rv
->fd_limit
;
626 rv
->handler
.pfdcount
= 0;
627 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
628 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
629 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
630 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
632 /* add to list of threadmasters */
633 frr_with_mutex(&masters_mtx
) {
635 masters
= list_new();
637 listnode_add(masters
, rv
);
643 void thread_master_set_name(struct thread_master
*master
, const char *name
)
645 frr_with_mutex(&master
->mtx
) {
646 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
647 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
651 #define THREAD_UNUSED_DEPTH 10
653 /* Move thread to unuse list. */
654 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
656 pthread_mutex_t mtxc
= thread
->mtx
;
658 assert(m
!= NULL
&& thread
!= NULL
);
660 thread
->hist
->total_active
--;
661 memset(thread
, 0, sizeof(struct thread
));
662 thread
->type
= THREAD_UNUSED
;
664 /* Restore the thread mutex context. */
667 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
668 thread_list_add_tail(&m
->unuse
, thread
);
672 thread_free(m
, thread
);
675 /* Free all unused thread. */
676 static void thread_list_free(struct thread_master
*m
,
677 struct thread_list_head
*list
)
681 while ((t
= thread_list_pop(list
)))
685 static void thread_array_free(struct thread_master
*m
,
686 struct thread
**thread_array
)
691 for (index
= 0; index
< m
->fd_limit
; ++index
) {
692 t
= thread_array
[index
];
694 thread_array
[index
] = NULL
;
698 XFREE(MTYPE_THREAD_POLL
, thread_array
);
702 * thread_master_free_unused
704 * As threads are finished with they are put on the
705 * unuse list for later reuse.
706 * If we are shutting down, Free up unused threads
707 * So we can see if we forget to shut anything off
709 void thread_master_free_unused(struct thread_master
*m
)
711 frr_with_mutex(&m
->mtx
) {
713 while ((t
= thread_list_pop(&m
->unuse
)))
718 /* Stop thread scheduler. */
719 void thread_master_free(struct thread_master
*m
)
723 frr_with_mutex(&masters_mtx
) {
724 listnode_delete(masters
, m
);
725 if (masters
->count
== 0) {
726 list_delete(&masters
);
730 thread_array_free(m
, m
->read
);
731 thread_array_free(m
, m
->write
);
732 while ((t
= thread_timer_list_pop(&m
->timer
)))
734 thread_list_free(m
, &m
->event
);
735 thread_list_free(m
, &m
->ready
);
736 thread_list_free(m
, &m
->unuse
);
737 pthread_mutex_destroy(&m
->mtx
);
738 pthread_cond_destroy(&m
->cancel_cond
);
739 close(m
->io_pipe
[0]);
740 close(m
->io_pipe
[1]);
741 list_delete(&m
->cancel_req
);
742 m
->cancel_req
= NULL
;
744 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
745 hash_free(m
->cpu_record
);
746 m
->cpu_record
= NULL
;
748 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
749 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
750 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
751 XFREE(MTYPE_THREAD_MASTER
, m
);
754 /* Return remain time in milliseconds. */
755 unsigned long thread_timer_remain_msec(struct thread
*thread
)
759 frr_with_mutex(&thread
->mtx
) {
760 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
763 return remain
< 0 ? 0 : remain
;
766 /* Return remain time in seconds. */
767 unsigned long thread_timer_remain_second(struct thread
*thread
)
769 return thread_timer_remain_msec(thread
) / 1000LL;
772 struct timeval
thread_timer_remain(struct thread
*thread
)
774 struct timeval remain
;
775 frr_with_mutex(&thread
->mtx
) {
776 monotime_until(&thread
->u
.sands
, &remain
);
781 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
787 assert(buf_size
>= 8);
794 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
799 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
800 struct thread
*t_timer
)
803 time_hhmmss(buf
, buf_size
,
804 thread_timer_remain_second(t_timer
));
806 snprintf(buf
, buf_size
, "--:--:--");
811 /* Get new thread. */
812 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
813 void (*func
)(struct thread
*), void *arg
,
814 const struct xref_threadsched
*xref
)
816 struct thread
*thread
= thread_list_pop(&m
->unuse
);
817 struct cpu_thread_history tmp
;
820 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
821 /* mutex only needs to be initialized at struct creation. */
822 pthread_mutex_init(&thread
->mtx
, NULL
);
827 thread
->add_type
= type
;
830 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
832 thread
->ignore_timer_late
= false;
835 * So if the passed in funcname is not what we have
836 * stored that means the thread->hist needs to be
837 * updated. We keep the last one around in unused
838 * under the assumption that we are probably
839 * going to immediately allocate the same
841 * This hopefully saves us some serious
844 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
845 || thread
->func
!= func
) {
847 tmp
.funcname
= xref
->funcname
;
849 hash_get(m
->cpu_record
, &tmp
,
850 (void *(*)(void *))cpu_record_hash_alloc
);
852 thread
->hist
->total_active
++;
859 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
861 /* Update statistics. */
862 assert(master
->alloc
> 0);
865 /* Free allocated resources. */
866 pthread_mutex_destroy(&thread
->mtx
);
867 XFREE(MTYPE_THREAD
, thread
);
870 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
874 unsigned char trash
[64];
875 nfds_t count
= m
->handler
.copycount
;
878 * If timer_wait is null here, that means poll() should block
879 * indefinitely, unless the thread_master has overridden it by setting
880 * ->selectpoll_timeout.
882 * If the value is positive, it specifies the maximum number of
883 * milliseconds to wait. If the timeout is -1, it specifies that
884 * we should never wait and always return immediately even if no
885 * event is detected. If the value is zero, the behavior is default.
889 /* number of file descriptors with events */
892 if (timer_wait
!= NULL
893 && m
->selectpoll_timeout
== 0) // use the default value
894 timeout
= (timer_wait
->tv_sec
* 1000)
895 + (timer_wait
->tv_usec
/ 1000);
896 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
897 timeout
= m
->selectpoll_timeout
;
898 else if (m
->selectpoll_timeout
899 < 0) // effect a poll (return immediately)
902 zlog_tls_buffer_flush();
904 rcu_assert_read_unlocked();
906 /* add poll pipe poker */
907 assert(count
+ 1 < m
->handler
.pfdsize
);
908 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
909 m
->handler
.copy
[count
].events
= POLLIN
;
910 m
->handler
.copy
[count
].revents
= 0x00;
912 /* We need to deal with a signal-handling race here: we
913 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
914 * that may arrive just before we enter poll(). We will block the
915 * key signals, then check whether any have arrived - if so, we return
916 * before calling poll(). If not, we'll re-enable the signals
917 * in the ppoll() call.
920 sigemptyset(&origsigs
);
921 if (m
->handle_signals
) {
922 /* Main pthread that handles the app signals */
923 if (frr_sigevent_check(&origsigs
)) {
924 /* Signal to process - restore signal mask and return */
925 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
931 /* Don't make any changes for the non-main pthreads */
932 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
935 #if defined(HAVE_PPOLL)
936 struct timespec ts
, *tsp
;
939 ts
.tv_sec
= timeout
/ 1000;
940 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
945 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
946 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
948 /* Not ideal - there is a race after we restore the signal mask */
949 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
950 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
955 if (num
< 0 && errno
== EINTR
)
958 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
959 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
967 /* Add new read thread. */
968 void _thread_add_read_write(const struct xref_threadsched
*xref
,
969 struct thread_master
*m
,
970 void (*func
)(struct thread
*), void *arg
, int fd
,
971 struct thread
**t_ptr
)
973 int dir
= xref
->thread_type
;
974 struct thread
*thread
= NULL
;
975 struct thread
**thread_array
;
977 if (dir
== THREAD_READ
)
978 frrtrace(9, frr_libfrr
, schedule_read
, m
,
979 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
980 t_ptr
, fd
, 0, arg
, 0);
982 frrtrace(9, frr_libfrr
, schedule_write
, m
,
983 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
984 t_ptr
, fd
, 0, arg
, 0);
987 if (fd
>= m
->fd_limit
)
988 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
990 frr_with_mutex(&m
->mtx
) {
992 // thread is already scheduled; don't reschedule
995 /* default to a new pollfd */
996 nfds_t queuepos
= m
->handler
.pfdcount
;
998 if (dir
== THREAD_READ
)
999 thread_array
= m
->read
;
1001 thread_array
= m
->write
;
1003 /* if we already have a pollfd for our file descriptor, find and
1005 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
1006 if (m
->handler
.pfds
[i
].fd
== fd
) {
1011 * What happens if we have a thread already
1012 * created for this event?
1014 if (thread_array
[fd
])
1015 assert(!"Thread already scheduled for file descriptor");
1020 /* make sure we have room for this fd + pipe poker fd */
1021 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1023 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1025 m
->handler
.pfds
[queuepos
].fd
= fd
;
1026 m
->handler
.pfds
[queuepos
].events
|=
1027 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
1029 if (queuepos
== m
->handler
.pfdcount
)
1030 m
->handler
.pfdcount
++;
1033 frr_with_mutex(&thread
->mtx
) {
1035 thread_array
[thread
->u
.fd
] = thread
;
1040 thread
->ref
= t_ptr
;
1048 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1049 struct thread_master
*m
,
1050 void (*func
)(struct thread
*), void *arg
,
1051 struct timeval
*time_relative
,
1052 struct thread
**t_ptr
)
1054 struct thread
*thread
;
1059 assert(time_relative
);
1061 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1062 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1063 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1065 /* Compute expiration/deadline time. */
1067 timeradd(&t
, time_relative
, &t
);
1069 frr_with_mutex(&m
->mtx
) {
1070 if (t_ptr
&& *t_ptr
)
1071 /* thread is already scheduled; don't reschedule */
1074 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1076 frr_with_mutex(&thread
->mtx
) {
1077 thread
->u
.sands
= t
;
1078 thread_timer_list_add(&m
->timer
, thread
);
1081 thread
->ref
= t_ptr
;
1085 /* The timer list is sorted - if this new timer
1086 * might change the time we'll wait for, give the pthread
1087 * a chance to re-compute.
1089 if (thread_timer_list_first(&m
->timer
) == thread
)
1092 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1093 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1095 EC_LIB_TIMER_TOO_LONG
,
1096 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1101 /* Add timer event thread. */
1102 void _thread_add_timer(const struct xref_threadsched
*xref
,
1103 struct thread_master
*m
, void (*func
)(struct thread
*),
1104 void *arg
, long timer
, struct thread
**t_ptr
)
1106 struct timeval trel
;
1110 trel
.tv_sec
= timer
;
1113 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1116 /* Add timer event thread with "millisecond" resolution */
1117 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1118 struct thread_master
*m
,
1119 void (*func
)(struct thread
*), void *arg
,
1120 long timer
, struct thread
**t_ptr
)
1122 struct timeval trel
;
1126 trel
.tv_sec
= timer
/ 1000;
1127 trel
.tv_usec
= 1000 * (timer
% 1000);
1129 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1132 /* Add timer event thread with "timeval" resolution */
1133 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1134 struct thread_master
*m
,
1135 void (*func
)(struct thread
*), void *arg
,
1136 struct timeval
*tv
, struct thread
**t_ptr
)
1138 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1141 /* Add simple event thread. */
1142 void _thread_add_event(const struct xref_threadsched
*xref
,
1143 struct thread_master
*m
, void (*func
)(struct thread
*),
1144 void *arg
, int val
, struct thread
**t_ptr
)
1146 struct thread
*thread
= NULL
;
1148 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1149 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1150 t_ptr
, 0, val
, arg
, 0);
1154 frr_with_mutex(&m
->mtx
) {
1155 if (t_ptr
&& *t_ptr
)
1156 /* thread is already scheduled; don't reschedule */
1159 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1160 frr_with_mutex(&thread
->mtx
) {
1161 thread
->u
.val
= val
;
1162 thread_list_add_tail(&m
->event
, thread
);
1167 thread
->ref
= t_ptr
;
1174 /* Thread cancellation ------------------------------------------------------ */
1177 * NOT's out the .events field of pollfd corresponding to the given file
1178 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1180 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1181 * implementation for details.
1185 * @param state the event to cancel. One or more (OR'd together) of the
1190 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1195 /* find the index of corresponding pollfd */
1198 /* Cancel POLLHUP too just in case some bozo set it */
1201 /* Some callers know the index of the pfd already */
1202 if (idx_hint
>= 0) {
1206 /* Have to look for the fd in the pfd array */
1207 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1208 if (master
->handler
.pfds
[i
].fd
== fd
) {
1216 "[!] Received cancellation request for nonexistent rw job");
1217 zlog_debug("[!] threadmaster: %s | fd: %d",
1218 master
->name
? master
->name
: "", fd
);
1222 /* NOT out event. */
1223 master
->handler
.pfds
[i
].events
&= ~(state
);
1225 /* If all events are canceled, delete / resize the pollfd array. */
1226 if (master
->handler
.pfds
[i
].events
== 0) {
1227 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1228 (master
->handler
.pfdcount
- i
- 1)
1229 * sizeof(struct pollfd
));
1230 master
->handler
.pfdcount
--;
1231 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1232 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1235 /* If we have the same pollfd in the copy, perform the same operations,
1236 * otherwise return. */
1237 if (i
>= master
->handler
.copycount
)
1240 master
->handler
.copy
[i
].events
&= ~(state
);
1242 if (master
->handler
.copy
[i
].events
== 0) {
1243 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1244 (master
->handler
.copycount
- i
- 1)
1245 * sizeof(struct pollfd
));
1246 master
->handler
.copycount
--;
1247 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1248 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1253 * Process task cancellation given a task argument: iterate through the
1254 * various lists of tasks, looking for any that match the argument.
1256 static void cancel_arg_helper(struct thread_master
*master
,
1257 const struct cancel_req
*cr
)
1264 /* We're only processing arg-based cancellations here. */
1265 if (cr
->eventobj
== NULL
)
1268 /* First process the ready lists. */
1269 frr_each_safe(thread_list
, &master
->event
, t
) {
1270 if (t
->arg
!= cr
->eventobj
)
1272 thread_list_del(&master
->event
, t
);
1275 thread_add_unuse(master
, t
);
1278 frr_each_safe(thread_list
, &master
->ready
, t
) {
1279 if (t
->arg
!= cr
->eventobj
)
1281 thread_list_del(&master
->ready
, t
);
1284 thread_add_unuse(master
, t
);
1287 /* If requested, stop here and ignore io and timers */
1288 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1291 /* Check the io tasks */
1292 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1293 pfd
= master
->handler
.pfds
+ i
;
1295 if (pfd
->events
& POLLIN
)
1296 t
= master
->read
[pfd
->fd
];
1298 t
= master
->write
[pfd
->fd
];
1300 if (t
&& t
->arg
== cr
->eventobj
) {
1303 /* Found a match to cancel: clean up fd arrays */
1304 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1306 /* Clean up thread arrays */
1307 master
->read
[fd
] = NULL
;
1308 master
->write
[fd
] = NULL
;
1310 /* Clear caller's ref */
1314 thread_add_unuse(master
, t
);
1316 /* Don't increment 'i' since the cancellation will have
1317 * removed the entry from the pfd array
1323 /* Check the timer tasks */
1324 t
= thread_timer_list_first(&master
->timer
);
1326 struct thread
*t_next
;
1328 t_next
= thread_timer_list_next(&master
->timer
, t
);
1330 if (t
->arg
== cr
->eventobj
) {
1331 thread_timer_list_del(&master
->timer
, t
);
1334 thread_add_unuse(master
, t
);
1342 * Process cancellation requests.
1344 * This may only be run from the pthread which owns the thread_master.
1346 * @param master the thread master to process
1347 * @REQUIRE master->mtx
1349 static void do_thread_cancel(struct thread_master
*master
)
1351 struct thread_list_head
*list
= NULL
;
1352 struct thread
**thread_array
= NULL
;
1353 struct thread
*thread
;
1355 struct cancel_req
*cr
;
1356 struct listnode
*ln
;
1357 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1359 * If this is an event object cancellation, search
1360 * through task lists deleting any tasks which have the
1361 * specified argument - use this handy helper function.
1364 cancel_arg_helper(master
, cr
);
1369 * The pointer varies depending on whether the cancellation
1370 * request was made asynchronously or not. If it was, we
1371 * need to check whether the thread even exists anymore
1372 * before cancelling it.
1374 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1379 /* Determine the appropriate queue to cancel the thread from */
1380 switch (thread
->type
) {
1382 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1383 thread_array
= master
->read
;
1386 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1387 thread_array
= master
->write
;
1390 thread_timer_list_del(&master
->timer
, thread
);
1393 list
= &master
->event
;
1396 list
= &master
->ready
;
1404 thread_list_del(list
, thread
);
1405 } else if (thread_array
) {
1406 thread_array
[thread
->u
.fd
] = NULL
;
1410 *thread
->ref
= NULL
;
1412 thread_add_unuse(thread
->master
, thread
);
1415 /* Delete and free all cancellation requests */
1416 if (master
->cancel_req
)
1417 list_delete_all_node(master
->cancel_req
);
1419 /* Wake up any threads which may be blocked in thread_cancel_async() */
1420 master
->canceled
= true;
1421 pthread_cond_broadcast(&master
->cancel_cond
);
1425 * Helper function used for multiple flavors of arg-based cancellation.
1427 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1429 struct cancel_req
*cr
;
1431 assert(m
->owner
== pthread_self());
1433 /* Only worth anything if caller supplies an arg. */
1437 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1441 frr_with_mutex(&m
->mtx
) {
1443 listnode_add(m
->cancel_req
, cr
);
1444 do_thread_cancel(m
);
1449 * Cancel any events which have the specified argument.
1453 * @param m the thread_master to cancel from
1454 * @param arg the argument passed when creating the event
1456 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1458 cancel_event_helper(master
, arg
, 0);
1462 * Cancel ready tasks with an arg matching 'arg'
1466 * @param m the thread_master to cancel from
1467 * @param arg the argument passed when creating the event
1469 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1472 /* Only cancel ready/event tasks */
1473 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1477 * Cancel a specific task.
1481 * @param thread task to cancel
1483 void thread_cancel(struct thread
**thread
)
1485 struct thread_master
*master
;
1487 if (thread
== NULL
|| *thread
== NULL
)
1490 master
= (*thread
)->master
;
1492 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1493 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1494 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1495 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1497 assert(master
->owner
== pthread_self());
1499 frr_with_mutex(&master
->mtx
) {
1500 struct cancel_req
*cr
=
1501 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1502 cr
->thread
= *thread
;
1503 listnode_add(master
->cancel_req
, cr
);
1504 do_thread_cancel(master
);
1511 * Asynchronous cancellation.
1513 * Called with either a struct thread ** or void * to an event argument,
1514 * this function posts the correct cancellation request and blocks until it is
1517 * If the thread is currently running, execution blocks until it completes.
1519 * The last two parameters are mutually exclusive, i.e. if you pass one the
1520 * other must be NULL.
1522 * When the cancellation procedure executes on the target thread_master, the
1523 * thread * provided is checked for nullity. If it is null, the thread is
1524 * assumed to no longer exist and the cancellation request is a no-op. Thus
1525 * users of this API must pass a back-reference when scheduling the original
1530 * @param master the thread master with the relevant event / task
1531 * @param thread pointer to thread to cancel
1532 * @param eventobj the event
1534 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1537 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1539 if (thread
&& *thread
)
1540 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1541 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1542 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1543 (*thread
)->u
.val
, (*thread
)->arg
,
1544 (*thread
)->u
.sands
.tv_sec
);
1546 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1547 0, NULL
, 0, 0, eventobj
, 0);
1549 assert(master
->owner
!= pthread_self());
1551 frr_with_mutex(&master
->mtx
) {
1552 master
->canceled
= false;
1555 struct cancel_req
*cr
=
1556 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1557 cr
->threadref
= thread
;
1558 listnode_add(master
->cancel_req
, cr
);
1559 } else if (eventobj
) {
1560 struct cancel_req
*cr
=
1561 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1562 cr
->eventobj
= eventobj
;
1563 listnode_add(master
->cancel_req
, cr
);
1567 while (!master
->canceled
)
1568 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1574 /* ------------------------------------------------------------------------- */
1576 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1577 struct timeval
*timer_val
)
1579 if (!thread_timer_list_count(timers
))
1582 struct thread
*next_timer
= thread_timer_list_first(timers
);
1583 monotime_until(&next_timer
->u
.sands
, timer_val
);
1587 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1588 struct thread
*fetch
)
1591 thread_add_unuse(m
, thread
);
1595 static int thread_process_io_helper(struct thread_master
*m
,
1596 struct thread
*thread
, short state
,
1597 short actual_state
, int pos
)
1599 struct thread
**thread_array
;
1602 * poll() clears the .events field, but the pollfd array we
1603 * pass to poll() is a copy of the one used to schedule threads.
1604 * We need to synchronize state between the two here by applying
1605 * the same changes poll() made on the copy of the "real" pollfd
1608 * This cleans up a possible infinite loop where we refuse
1609 * to respond to a poll event but poll is insistent that
1612 m
->handler
.pfds
[pos
].events
&= ~(state
);
1615 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1616 flog_err(EC_LIB_NO_THREAD
,
1617 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1618 m
->handler
.pfds
[pos
].fd
, actual_state
);
1622 if (thread
->type
== THREAD_READ
)
1623 thread_array
= m
->read
;
1625 thread_array
= m
->write
;
1627 thread_array
[thread
->u
.fd
] = NULL
;
1628 thread_list_add_tail(&m
->ready
, thread
);
1629 thread
->type
= THREAD_READY
;
1635 * Process I/O events.
1637 * Walks through file descriptor array looking for those pollfds whose .revents
1638 * field has something interesting. Deletes any invalid file descriptors.
1640 * @param m the thread master
1641 * @param num the number of active file descriptors (return value of poll())
1643 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1645 unsigned int ready
= 0;
1646 struct pollfd
*pfds
= m
->handler
.copy
;
1648 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1649 /* no event for current fd? immediately continue */
1650 if (pfds
[i
].revents
== 0)
1656 * Unless someone has called thread_cancel from another
1657 * pthread, the only thing that could have changed in
1658 * m->handler.pfds while we were asleep is the .events
1659 * field in a given pollfd. Barring thread_cancel() that
1660 * value should be a superset of the values we have in our
1661 * copy, so there's no need to update it. Similarily,
1662 * barring deletion, the fd should still be a valid index
1663 * into the master's pfds.
1665 * We are including POLLERR here to do a READ event
1666 * this is because the read should fail and the
1667 * read function should handle it appropriately
1669 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1670 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1671 pfds
[i
].revents
, i
);
1673 if (pfds
[i
].revents
& POLLOUT
)
1674 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1675 POLLOUT
, pfds
[i
].revents
, i
);
1677 /* if one of our file descriptors is garbage, remove the same
1679 * both pfds + update sizes and index */
1680 if (pfds
[i
].revents
& POLLNVAL
) {
1681 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1682 (m
->handler
.pfdcount
- i
- 1)
1683 * sizeof(struct pollfd
));
1684 m
->handler
.pfdcount
--;
1685 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1686 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1688 memmove(pfds
+ i
, pfds
+ i
+ 1,
1689 (m
->handler
.copycount
- i
- 1)
1690 * sizeof(struct pollfd
));
1691 m
->handler
.copycount
--;
1692 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1693 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1700 /* Add all timers that have popped to the ready list. */
1701 static unsigned int thread_process_timers(struct thread_master
*m
,
1702 struct timeval
*timenow
)
1704 struct timeval prev
= *timenow
;
1705 bool displayed
= false;
1706 struct thread
*thread
;
1707 unsigned int ready
= 0;
1709 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1710 if (timercmp(timenow
, &thread
->u
.sands
, <))
1712 prev
= thread
->u
.sands
;
1715 * If the timer would have popped 4 seconds in the
1716 * past then we are in a situation where we are
1717 * really getting behind on handling of events.
1718 * Let's log it and do the right thing with it.
1720 if (timercmp(timenow
, &prev
, >)) {
1721 atomic_fetch_add_explicit(
1722 &thread
->hist
->total_starv_warn
, 1,
1723 memory_order_seq_cst
);
1724 if (!displayed
&& !thread
->ignore_timer_late
) {
1726 EC_LIB_STARVE_THREAD
,
1727 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1733 thread_timer_list_pop(&m
->timer
);
1734 thread
->type
= THREAD_READY
;
1735 thread_list_add_tail(&m
->ready
, thread
);
1742 /* process a list en masse, e.g. for event thread lists */
1743 static unsigned int thread_process(struct thread_list_head
*list
)
1745 struct thread
*thread
;
1746 unsigned int ready
= 0;
1748 while ((thread
= thread_list_pop(list
))) {
1749 thread
->type
= THREAD_READY
;
1750 thread_list_add_tail(&thread
->master
->ready
, thread
);
1757 /* Fetch next ready thread. */
1758 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1760 struct thread
*thread
= NULL
;
1762 struct timeval zerotime
= {0, 0};
1764 struct timeval
*tw
= NULL
;
1765 bool eintr_p
= false;
1769 /* Handle signals if any */
1770 if (m
->handle_signals
)
1771 frr_sigevent_process();
1773 pthread_mutex_lock(&m
->mtx
);
1775 /* Process any pending cancellation requests */
1776 do_thread_cancel(m
);
1779 * Attempt to flush ready queue before going into poll().
1780 * This is performance-critical. Think twice before modifying.
1782 if ((thread
= thread_list_pop(&m
->ready
))) {
1783 fetch
= thread_run(m
, thread
, fetch
);
1786 pthread_mutex_unlock(&m
->mtx
);
1787 if (!m
->ready_run_loop
)
1788 GETRUSAGE(&m
->last_getrusage
);
1789 m
->ready_run_loop
= true;
1793 m
->ready_run_loop
= false;
1794 /* otherwise, tick through scheduling sequence */
1797 * Post events to ready queue. This must come before the
1798 * following block since events should occur immediately
1800 thread_process(&m
->event
);
1803 * If there are no tasks on the ready queue, we will poll()
1804 * until a timer expires or we receive I/O, whichever comes
1805 * first. The strategy for doing this is:
1807 * - If there are events pending, set the poll() timeout to zero
1808 * - If there are no events pending, but there are timers
1809 * pending, set the timeout to the smallest remaining time on
1811 * - If there are neither timers nor events pending, but there
1812 * are file descriptors pending, block indefinitely in poll()
1813 * - If nothing is pending, it's time for the application to die
1815 * In every case except the last, we need to hit poll() at least
1816 * once per loop to avoid starvation by events
1818 if (!thread_list_count(&m
->ready
))
1819 tw
= thread_timer_wait(&m
->timer
, &tv
);
1821 if (thread_list_count(&m
->ready
) ||
1822 (tw
&& !timercmp(tw
, &zerotime
, >)))
1825 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1826 pthread_mutex_unlock(&m
->mtx
);
1832 * Copy pollfd array + # active pollfds in it. Not necessary to
1833 * copy the array size as this is fixed.
1835 m
->handler
.copycount
= m
->handler
.pfdcount
;
1836 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1837 m
->handler
.copycount
* sizeof(struct pollfd
));
1839 pthread_mutex_unlock(&m
->mtx
);
1842 num
= fd_poll(m
, tw
, &eintr_p
);
1844 pthread_mutex_lock(&m
->mtx
);
1846 /* Handle any errors received in poll() */
1849 pthread_mutex_unlock(&m
->mtx
);
1850 /* loop around to signal handler */
1855 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1856 safe_strerror(errno
));
1857 pthread_mutex_unlock(&m
->mtx
);
1862 /* Post timers to ready queue. */
1864 thread_process_timers(m
, &now
);
1866 /* Post I/O to ready queue. */
1868 thread_process_io(m
, num
);
1870 pthread_mutex_unlock(&m
->mtx
);
1872 } while (!thread
&& m
->spin
);
1877 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1879 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1880 + (a
.tv_usec
- b
.tv_usec
));
1883 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1884 unsigned long *cputime
)
1886 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1890 * FreeBSD appears to have an issue when calling clock_gettime
1891 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1892 * occassionally the now time will be before the start time.
1893 * This is not good and FRR is ending up with CPU HOG's
1894 * when the subtraction wraps to very large numbers
1896 * What we are going to do here is cheat a little bit
1897 * and notice that this is a problem and just correct
1898 * it so that it is impossible to happen
1900 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1901 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1902 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1903 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1904 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1905 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1908 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1909 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1911 /* This is 'user + sys' time. */
1912 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1913 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1915 return timeval_elapsed(now
->real
, start
->real
);
1918 /* We should aim to yield after yield milliseconds, which defaults
1919 to THREAD_YIELD_TIME_SLOT .
1920 Note: we are using real (wall clock) time for this calculation.
1921 It could be argued that CPU time may make more sense in certain
1922 contexts. The things to consider are whether the thread may have
1923 blocked (in which case wall time increases, but CPU time does not),
1924 or whether the system is heavily loaded with other processes competing
1925 for CPU time. On balance, wall clock time seems to make sense.
1926 Plus it has the added benefit that gettimeofday should be faster
1927 than calling getrusage. */
1928 int thread_should_yield(struct thread
*thread
)
1931 frr_with_mutex(&thread
->mtx
) {
1932 result
= monotime_since(&thread
->real
, NULL
)
1933 > (int64_t)thread
->yield
;
1938 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1940 frr_with_mutex(&thread
->mtx
) {
1941 thread
->yield
= yield_time
;
1945 void thread_getrusage(RUSAGE_T
*r
)
1948 if (!cputime_enabled
) {
1949 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1953 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1954 /* not currently implemented in Linux's vDSO, but maybe at some point
1957 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1958 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1959 #if defined RUSAGE_THREAD
1960 #define FRR_RUSAGE RUSAGE_THREAD
1962 #define FRR_RUSAGE RUSAGE_SELF
1964 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1971 * This function will atomically update the thread's usage history. At present
1972 * this is the only spot where usage history is written. Nevertheless the code
1973 * has been written such that the introduction of writers in the future should
1974 * not need to update it provided the writers atomically perform only the
1975 * operations done here, i.e. updating the total and maximum times. In
1976 * particular, the maximum real and cpu times must be monotonically increasing
1977 * or this code is not correct.
1979 void thread_call(struct thread
*thread
)
1981 RUSAGE_T before
, after
;
1983 /* if the thread being called is the CLI, it may change cputime_enabled
1984 * ("service cputime-stats" command), which can result in nonsensical
1985 * and very confusing warnings
1987 bool cputime_enabled_here
= cputime_enabled
;
1989 if (thread
->master
->ready_run_loop
)
1990 before
= thread
->master
->last_getrusage
;
1994 thread
->real
= before
.real
;
1996 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
1997 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1998 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
1999 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
2001 pthread_setspecific(thread_current
, thread
);
2002 (*thread
->func
)(thread
);
2003 pthread_setspecific(thread_current
, NULL
);
2006 thread
->master
->last_getrusage
= after
;
2008 unsigned long walltime
, cputime
;
2011 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
2013 /* update walltime */
2014 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2015 memory_order_seq_cst
);
2016 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2017 memory_order_seq_cst
);
2018 while (exp
< walltime
2019 && !atomic_compare_exchange_weak_explicit(
2020 &thread
->hist
->real
.max
, &exp
, walltime
,
2021 memory_order_seq_cst
, memory_order_seq_cst
))
2024 if (cputime_enabled_here
&& cputime_enabled
) {
2025 /* update cputime */
2026 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2027 memory_order_seq_cst
);
2028 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2029 memory_order_seq_cst
);
2030 while (exp
< cputime
2031 && !atomic_compare_exchange_weak_explicit(
2032 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2033 memory_order_seq_cst
, memory_order_seq_cst
))
2037 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2038 memory_order_seq_cst
);
2039 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2040 memory_order_seq_cst
);
2042 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2043 && cputime
> cputime_threshold
) {
2045 * We have a CPU Hog on our hands. The time FRR has spent
2046 * doing actual work (not sleeping) is greater than 5 seconds.
2047 * Whinge about it now, so we're aware this is yet another task
2050 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2051 1, memory_order_seq_cst
);
2053 EC_LIB_SLOW_THREAD_CPU
,
2054 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2055 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2056 walltime
/ 1000, cputime
/ 1000);
2058 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2060 * The runtime for a task is greater than 5 seconds, but the
2061 * cpu time is under 5 seconds. Let's whine about this because
2062 * this could imply some sort of scheduling issue.
2064 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2065 1, memory_order_seq_cst
);
2067 EC_LIB_SLOW_THREAD_WALL
,
2068 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2069 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2070 walltime
/ 1000, cputime
/ 1000);
2074 /* Execute thread */
2075 void _thread_execute(const struct xref_threadsched
*xref
,
2076 struct thread_master
*m
, void (*func
)(struct thread
*),
2079 struct thread
*thread
;
2081 /* Get or allocate new thread to execute. */
2082 frr_with_mutex(&m
->mtx
) {
2083 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2085 /* Set its event value. */
2086 frr_with_mutex(&thread
->mtx
) {
2087 thread
->add_type
= THREAD_EXECUTE
;
2088 thread
->u
.val
= val
;
2089 thread
->ref
= &thread
;
2093 /* Execute thread doing all accounting. */
2094 thread_call(thread
);
2096 /* Give back or free thread. */
2097 thread_add_unuse(m
, thread
);
2100 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2101 void debug_signals(const sigset_t
*sigs
)
2108 * We're only looking at the non-realtime signals here, so we need
2109 * some limit value. Platform differences mean at some point we just
2110 * need to pick a reasonable value.
2112 #if defined SIGRTMIN
2113 # define LAST_SIGNAL SIGRTMIN
2115 # define LAST_SIGNAL 32
2120 sigemptyset(&tmpsigs
);
2121 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2128 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2131 if (sigismember(sigs
, i
) > 0) {
2133 strlcat(buf
, ",", sizeof(buf
));
2134 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2135 strlcat(buf
, tmp
, sizeof(buf
));
2141 snprintf(buf
, sizeof(buf
), "<none>");
2143 zlog_debug("%s: %s", __func__
, buf
);
2146 bool thread_is_scheduled(struct thread
*thread
)
2154 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2155 const struct thread
*thread
)
2157 static const char * const types
[] = {
2158 [THREAD_READ
] = "read",
2159 [THREAD_WRITE
] = "write",
2160 [THREAD_TIMER
] = "timer",
2161 [THREAD_EVENT
] = "event",
2162 [THREAD_READY
] = "ready",
2163 [THREAD_UNUSED
] = "unused",
2164 [THREAD_EXECUTE
] = "exec",
2170 return bputs(buf
, "{(thread *)NULL}");
2172 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2174 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2175 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2177 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2179 switch (thread
->type
) {
2182 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2186 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2190 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2191 thread
->xref
->funcname
, thread
->xref
->dest
,
2192 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2196 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2197 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2200 const struct thread
*thread
= ptr
;
2201 struct timespec remain
= {};
2203 if (ea
->fmt
[0] == 'D') {
2205 return printfrr_thread_dbg(buf
, ea
, thread
);
2209 /* need to jump over time formatting flag characters in the
2210 * input format string, i.e. adjust ea->fmt!
2212 printfrr_time(buf
, ea
, &remain
,
2213 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2214 return bputch(buf
, '-');
2217 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2218 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);