1 /* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <sys/resource.h>
35 #include "frratomic.h"
36 #include "frr_pthread.h"
37 #include "lib_errors.h"
38 #include "libfrr_trace.h"
41 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
42 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
43 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
44 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
46 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
50 struct thread
*thread
;
52 struct thread
**threadref
;
55 /* Flags for task cancellation */
56 #define THREAD_CANCEL_FLAG_READY 0x01
58 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
60 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
62 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
64 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
66 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
71 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
73 #if defined(__APPLE__)
74 #include <mach/mach.h>
75 #include <mach/mach_time.h>
80 const unsigned char wakebyte = 0x01; \
81 write(m->io_pipe[1], &wakebyte, 1); \
84 /* control variable for initializer */
85 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
86 pthread_key_t thread_current
;
88 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
89 static struct list
*masters
;
91 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
93 #ifndef EXCLUDE_CPU_TIME
94 #define EXCLUDE_CPU_TIME 0
96 #ifndef CONSUMED_TIME_CHECK
97 #define CONSUMED_TIME_CHECK 0
100 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
101 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
102 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
104 /* CLI start ---------------------------------------------------------------- */
105 #include "lib/thread_clippy.c"
107 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
109 int size
= sizeof(a
->func
);
111 return jhash(&a
->func
, size
, 0);
114 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
115 const struct cpu_thread_history
*b
)
117 return a
->func
== b
->func
;
120 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
122 struct cpu_thread_history
*new;
123 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
125 new->funcname
= a
->funcname
;
129 static void cpu_record_hash_free(void *a
)
131 struct cpu_thread_history
*hist
= a
;
133 XFREE(MTYPE_THREAD_STATS
, hist
);
136 static void vty_out_cpu_thread_history(struct vty
*vty
,
137 struct cpu_thread_history
*a
)
140 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
141 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
142 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
143 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
144 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
145 vty_out(vty
, " %c%c%c%c%c %s\n",
146 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
147 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
148 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
149 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
150 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
153 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
155 struct cpu_thread_history
*totals
= args
[0];
156 struct cpu_thread_history copy
;
157 struct vty
*vty
= args
[1];
158 uint8_t *filter
= args
[2];
160 struct cpu_thread_history
*a
= bucket
->data
;
163 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
165 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
166 copy
.total_cpu_warn
=
167 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
168 copy
.total_wall_warn
=
169 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
170 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
171 memory_order_seq_cst
);
173 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
174 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
176 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
178 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
179 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
180 copy
.funcname
= a
->funcname
;
182 if (!(copy
.types
& *filter
))
185 vty_out_cpu_thread_history(vty
, ©
);
186 totals
->total_active
+= copy
.total_active
;
187 totals
->total_calls
+= copy
.total_calls
;
188 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
189 totals
->total_wall_warn
+= copy
.total_wall_warn
;
190 totals
->total_starv_warn
+= copy
.total_starv_warn
;
191 totals
->real
.total
+= copy
.real
.total
;
192 if (totals
->real
.max
< copy
.real
.max
)
193 totals
->real
.max
= copy
.real
.max
;
194 totals
->cpu
.total
+= copy
.cpu
.total
;
195 if (totals
->cpu
.max
< copy
.cpu
.max
)
196 totals
->cpu
.max
= copy
.cpu
.max
;
199 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
201 struct cpu_thread_history tmp
;
202 void *args
[3] = {&tmp
, vty
, &filter
};
203 struct thread_master
*m
;
206 if (!cputime_enabled
)
209 "Collecting CPU time statistics is currently disabled. Following statistics\n"
210 "will be zero or may display data from when collection was enabled. Use the\n"
211 " \"service cputime-stats\" command to start collecting data.\n"
212 "\nCounters and wallclock times are always maintained and should be accurate.\n");
214 memset(&tmp
, 0, sizeof(tmp
));
215 tmp
.funcname
= "TOTAL";
218 frr_with_mutex (&masters_mtx
) {
219 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
220 const char *name
= m
->name
? m
->name
: "main";
222 char underline
[strlen(name
) + 1];
223 memset(underline
, '-', sizeof(underline
));
224 underline
[sizeof(underline
) - 1] = '\0';
227 vty_out(vty
, "Showing statistics for pthread %s\n",
229 vty_out(vty
, "-------------------------------%s\n",
231 vty_out(vty
, "%30s %18s %18s\n", "",
232 "CPU (user+system):", "Real (wall-clock):");
234 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
235 vty_out(vty
, " Avg uSec Max uSecs");
237 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
239 if (m
->cpu_record
->count
)
242 (void (*)(struct hash_bucket
*,
243 void *))cpu_record_hash_print
,
246 vty_out(vty
, "No data to display yet.\n");
253 vty_out(vty
, "Total thread statistics\n");
254 vty_out(vty
, "-------------------------\n");
255 vty_out(vty
, "%30s %18s %18s\n", "",
256 "CPU (user+system):", "Real (wall-clock):");
257 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
258 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
259 vty_out(vty
, " Type Thread\n");
261 if (tmp
.total_calls
> 0)
262 vty_out_cpu_thread_history(vty
, &tmp
);
265 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
267 uint8_t *filter
= args
[0];
268 struct hash
*cpu_record
= args
[1];
270 struct cpu_thread_history
*a
= bucket
->data
;
272 if (!(a
->types
& *filter
))
275 hash_release(cpu_record
, bucket
->data
);
278 static void cpu_record_clear(uint8_t filter
)
280 uint8_t *tmp
= &filter
;
281 struct thread_master
*m
;
284 frr_with_mutex (&masters_mtx
) {
285 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
286 frr_with_mutex (&m
->mtx
) {
287 void *args
[2] = {tmp
, m
->cpu_record
};
290 (void (*)(struct hash_bucket
*,
291 void *))cpu_record_hash_clear
,
298 static uint8_t parse_filter(const char *filterstr
)
303 while (filterstr
[i
] != '\0') {
304 switch (filterstr
[i
]) {
307 filter
|= (1 << THREAD_READ
);
311 filter
|= (1 << THREAD_WRITE
);
315 filter
|= (1 << THREAD_TIMER
);
319 filter
|= (1 << THREAD_EVENT
);
323 filter
|= (1 << THREAD_EXECUTE
);
333 DEFUN_NOSH (show_thread_cpu
,
335 "show thread cpu [FILTER]",
337 "Thread information\n"
339 "Display filter (rwtex)\n")
341 uint8_t filter
= (uint8_t)-1U;
344 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
345 filter
= parse_filter(argv
[idx
]->arg
);
348 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
354 cpu_record_print(vty
, filter
);
358 DEFPY (service_cputime_stats
,
359 service_cputime_stats_cmd
,
360 "[no] service cputime-stats",
362 "Set up miscellaneous service\n"
363 "Collect CPU usage statistics\n")
365 cputime_enabled
= !no
;
369 DEFPY (service_cputime_warning
,
370 service_cputime_warning_cmd
,
371 "[no] service cputime-warning (1-4294967295)",
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n"
375 "Warning threshold in milliseconds\n")
378 cputime_threshold
= 0;
380 cputime_threshold
= cputime_warning
* 1000;
384 ALIAS (service_cputime_warning
,
385 no_service_cputime_warning_cmd
,
386 "no service cputime-warning",
388 "Set up miscellaneous service\n"
389 "Warn for tasks exceeding CPU usage threshold\n")
391 DEFPY (service_walltime_warning
,
392 service_walltime_warning_cmd
,
393 "[no] service walltime-warning (1-4294967295)",
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n"
397 "Warning threshold in milliseconds\n")
400 walltime_threshold
= 0;
402 walltime_threshold
= walltime_warning
* 1000;
406 ALIAS (service_walltime_warning
,
407 no_service_walltime_warning_cmd
,
408 "no service walltime-warning",
410 "Set up miscellaneous service\n"
411 "Warn for tasks exceeding total wallclock threshold\n")
413 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
415 const char *name
= m
->name
? m
->name
: "main";
416 char underline
[strlen(name
) + 1];
417 struct thread
*thread
;
420 memset(underline
, '-', sizeof(underline
));
421 underline
[sizeof(underline
) - 1] = '\0';
423 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
424 vty_out(vty
, "----------------------%s\n", underline
);
425 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
427 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
428 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
429 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
430 m
->handler
.pfds
[i
].revents
);
432 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
433 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
436 vty_out(vty
, "ERROR ");
438 vty_out(vty
, "%s ", thread
->xref
->funcname
);
442 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
443 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
446 vty_out(vty
, "ERROR\n");
448 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
454 DEFUN_NOSH (show_thread_poll
,
455 show_thread_poll_cmd
,
458 "Thread information\n"
459 "Show poll FD's and information\n")
461 struct listnode
*node
;
462 struct thread_master
*m
;
464 frr_with_mutex (&masters_mtx
) {
465 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
466 show_thread_poll_helper(vty
, m
);
474 DEFUN (clear_thread_cpu
,
475 clear_thread_cpu_cmd
,
476 "clear thread cpu [FILTER]",
477 "Clear stored data in all pthreads\n"
478 "Thread information\n"
480 "Display filter (rwtexb)\n")
482 uint8_t filter
= (uint8_t)-1U;
485 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
486 filter
= parse_filter(argv
[idx
]->arg
);
489 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
495 cpu_record_clear(filter
);
499 static void show_thread_timers_helper(struct vty
*vty
, struct thread_master
*m
)
501 const char *name
= m
->name
? m
->name
: "main";
502 char underline
[strlen(name
) + 1];
503 struct thread
*thread
;
505 memset(underline
, '-', sizeof(underline
));
506 underline
[sizeof(underline
) - 1] = '\0';
508 vty_out(vty
, "\nShowing timers for %s\n", name
);
509 vty_out(vty
, "-------------------%s\n", underline
);
511 frr_each (thread_timer_list
, &m
->timer
, thread
) {
512 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
516 DEFPY_NOSH (show_thread_timers
,
517 show_thread_timers_cmd
,
518 "show thread timers",
520 "Thread information\n"
521 "Show all timers and how long they have in the system\n")
523 struct listnode
*node
;
524 struct thread_master
*m
;
526 frr_with_mutex (&masters_mtx
) {
527 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
528 show_thread_timers_helper(vty
, m
);
534 void thread_cmd_init(void)
536 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
537 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
538 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
540 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
541 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
542 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
543 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
544 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
546 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
548 /* CLI end ------------------------------------------------------------------ */
551 static void cancelreq_del(void *cr
)
553 XFREE(MTYPE_TMP
, cr
);
556 /* initializer, only ever called once */
557 static void initializer(void)
559 pthread_key_create(&thread_current
, NULL
);
562 struct thread_master
*thread_master_create(const char *name
)
564 struct thread_master
*rv
;
567 pthread_once(&init_once
, &initializer
);
569 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
571 /* Initialize master mutex */
572 pthread_mutex_init(&rv
->mtx
, NULL
);
573 pthread_cond_init(&rv
->cancel_cond
, NULL
);
576 name
= name
? name
: "default";
577 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
579 /* Initialize I/O task data structures */
581 /* Use configured limit if present, ulimit otherwise. */
582 rv
->fd_limit
= frr_get_fd_limit();
583 if (rv
->fd_limit
== 0) {
584 getrlimit(RLIMIT_NOFILE
, &limit
);
585 rv
->fd_limit
= (int)limit
.rlim_cur
;
588 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
589 sizeof(struct thread
*) * rv
->fd_limit
);
591 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
592 sizeof(struct thread
*) * rv
->fd_limit
);
594 char tmhashname
[strlen(name
) + 32];
595 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
597 rv
->cpu_record
= hash_create_size(
598 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
599 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
602 thread_list_init(&rv
->event
);
603 thread_list_init(&rv
->ready
);
604 thread_list_init(&rv
->unuse
);
605 thread_timer_list_init(&rv
->timer
);
607 /* Initialize thread_fetch() settings */
609 rv
->handle_signals
= true;
611 /* Set pthread owner, should be updated by actual owner */
612 rv
->owner
= pthread_self();
613 rv
->cancel_req
= list_new();
614 rv
->cancel_req
->del
= cancelreq_del
;
617 /* Initialize pipe poker */
619 set_nonblocking(rv
->io_pipe
[0]);
620 set_nonblocking(rv
->io_pipe
[1]);
622 /* Initialize data structures for poll() */
623 rv
->handler
.pfdsize
= rv
->fd_limit
;
624 rv
->handler
.pfdcount
= 0;
625 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
626 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
627 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
628 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
630 /* add to list of threadmasters */
631 frr_with_mutex (&masters_mtx
) {
633 masters
= list_new();
635 listnode_add(masters
, rv
);
641 void thread_master_set_name(struct thread_master
*master
, const char *name
)
643 frr_with_mutex (&master
->mtx
) {
644 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
645 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
649 #define THREAD_UNUSED_DEPTH 10
651 /* Move thread to unuse list. */
652 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
654 pthread_mutex_t mtxc
= thread
->mtx
;
656 assert(m
!= NULL
&& thread
!= NULL
);
658 thread
->hist
->total_active
--;
659 memset(thread
, 0, sizeof(struct thread
));
660 thread
->type
= THREAD_UNUSED
;
662 /* Restore the thread mutex context. */
665 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
666 thread_list_add_tail(&m
->unuse
, thread
);
670 thread_free(m
, thread
);
673 /* Free all unused thread. */
674 static void thread_list_free(struct thread_master
*m
,
675 struct thread_list_head
*list
)
679 while ((t
= thread_list_pop(list
)))
683 static void thread_array_free(struct thread_master
*m
,
684 struct thread
**thread_array
)
689 for (index
= 0; index
< m
->fd_limit
; ++index
) {
690 t
= thread_array
[index
];
692 thread_array
[index
] = NULL
;
696 XFREE(MTYPE_THREAD_POLL
, thread_array
);
700 * thread_master_free_unused
702 * As threads are finished with they are put on the
703 * unuse list for later reuse.
704 * If we are shutting down, Free up unused threads
705 * So we can see if we forget to shut anything off
707 void thread_master_free_unused(struct thread_master
*m
)
709 frr_with_mutex (&m
->mtx
) {
711 while ((t
= thread_list_pop(&m
->unuse
)))
716 /* Stop thread scheduler. */
717 void thread_master_free(struct thread_master
*m
)
721 frr_with_mutex (&masters_mtx
) {
722 listnode_delete(masters
, m
);
723 if (masters
->count
== 0) {
724 list_delete(&masters
);
728 thread_array_free(m
, m
->read
);
729 thread_array_free(m
, m
->write
);
730 while ((t
= thread_timer_list_pop(&m
->timer
)))
732 thread_list_free(m
, &m
->event
);
733 thread_list_free(m
, &m
->ready
);
734 thread_list_free(m
, &m
->unuse
);
735 pthread_mutex_destroy(&m
->mtx
);
736 pthread_cond_destroy(&m
->cancel_cond
);
737 close(m
->io_pipe
[0]);
738 close(m
->io_pipe
[1]);
739 list_delete(&m
->cancel_req
);
740 m
->cancel_req
= NULL
;
742 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
743 hash_free(m
->cpu_record
);
744 m
->cpu_record
= NULL
;
746 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
747 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
748 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
749 XFREE(MTYPE_THREAD_MASTER
, m
);
752 /* Return remain time in milliseconds. */
753 unsigned long thread_timer_remain_msec(struct thread
*thread
)
757 if (!thread_is_scheduled(thread
))
760 frr_with_mutex (&thread
->mtx
) {
761 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
764 return remain
< 0 ? 0 : remain
;
767 /* Return remain time in seconds. */
768 unsigned long thread_timer_remain_second(struct thread
*thread
)
770 return thread_timer_remain_msec(thread
) / 1000LL;
773 struct timeval
thread_timer_remain(struct thread
*thread
)
775 struct timeval remain
;
776 frr_with_mutex (&thread
->mtx
) {
777 monotime_until(&thread
->u
.sands
, &remain
);
782 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
788 assert(buf_size
>= 8);
795 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
800 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
801 struct thread
*t_timer
)
804 time_hhmmss(buf
, buf_size
,
805 thread_timer_remain_second(t_timer
));
807 snprintf(buf
, buf_size
, "--:--:--");
812 /* Get new thread. */
813 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
814 void (*func
)(struct thread
*), void *arg
,
815 const struct xref_threadsched
*xref
)
817 struct thread
*thread
= thread_list_pop(&m
->unuse
);
818 struct cpu_thread_history tmp
;
821 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
822 /* mutex only needs to be initialized at struct creation. */
823 pthread_mutex_init(&thread
->mtx
, NULL
);
828 thread
->add_type
= type
;
831 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
833 thread
->ignore_timer_late
= false;
836 * So if the passed in funcname is not what we have
837 * stored that means the thread->hist needs to be
838 * updated. We keep the last one around in unused
839 * under the assumption that we are probably
840 * going to immediately allocate the same
842 * This hopefully saves us some serious
845 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
846 || thread
->func
!= func
) {
848 tmp
.funcname
= xref
->funcname
;
850 hash_get(m
->cpu_record
, &tmp
,
851 (void *(*)(void *))cpu_record_hash_alloc
);
853 thread
->hist
->total_active
++;
860 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
862 /* Update statistics. */
863 assert(master
->alloc
> 0);
866 /* Free allocated resources. */
867 pthread_mutex_destroy(&thread
->mtx
);
868 XFREE(MTYPE_THREAD
, thread
);
871 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
875 unsigned char trash
[64];
876 nfds_t count
= m
->handler
.copycount
;
879 * If timer_wait is null here, that means poll() should block
880 * indefinitely, unless the thread_master has overridden it by setting
881 * ->selectpoll_timeout.
883 * If the value is positive, it specifies the maximum number of
884 * milliseconds to wait. If the timeout is -1, it specifies that
885 * we should never wait and always return immediately even if no
886 * event is detected. If the value is zero, the behavior is default.
890 /* number of file descriptors with events */
893 if (timer_wait
!= NULL
894 && m
->selectpoll_timeout
== 0) // use the default value
895 timeout
= (timer_wait
->tv_sec
* 1000)
896 + (timer_wait
->tv_usec
/ 1000);
897 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
898 timeout
= m
->selectpoll_timeout
;
899 else if (m
->selectpoll_timeout
900 < 0) // effect a poll (return immediately)
903 zlog_tls_buffer_flush();
905 rcu_assert_read_unlocked();
907 /* add poll pipe poker */
908 assert(count
+ 1 < m
->handler
.pfdsize
);
909 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
910 m
->handler
.copy
[count
].events
= POLLIN
;
911 m
->handler
.copy
[count
].revents
= 0x00;
913 /* We need to deal with a signal-handling race here: we
914 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
915 * that may arrive just before we enter poll(). We will block the
916 * key signals, then check whether any have arrived - if so, we return
917 * before calling poll(). If not, we'll re-enable the signals
918 * in the ppoll() call.
921 sigemptyset(&origsigs
);
922 if (m
->handle_signals
) {
923 /* Main pthread that handles the app signals */
924 if (frr_sigevent_check(&origsigs
)) {
925 /* Signal to process - restore signal mask and return */
926 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
932 /* Don't make any changes for the non-main pthreads */
933 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
936 #if defined(HAVE_PPOLL)
937 struct timespec ts
, *tsp
;
940 ts
.tv_sec
= timeout
/ 1000;
941 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
946 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
947 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
949 /* Not ideal - there is a race after we restore the signal mask */
950 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
951 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
956 if (num
< 0 && errno
== EINTR
)
959 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
960 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
968 /* Add new read thread. */
969 void _thread_add_read_write(const struct xref_threadsched
*xref
,
970 struct thread_master
*m
,
971 void (*func
)(struct thread
*), void *arg
, int fd
,
972 struct thread
**t_ptr
)
974 int dir
= xref
->thread_type
;
975 struct thread
*thread
= NULL
;
976 struct thread
**thread_array
;
978 if (dir
== THREAD_READ
)
979 frrtrace(9, frr_libfrr
, schedule_read
, m
,
980 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
981 t_ptr
, fd
, 0, arg
, 0);
983 frrtrace(9, frr_libfrr
, schedule_write
, m
,
984 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
985 t_ptr
, fd
, 0, arg
, 0);
988 if (fd
>= m
->fd_limit
)
989 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
991 frr_with_mutex (&m
->mtx
) {
993 // thread is already scheduled; don't reschedule
996 /* default to a new pollfd */
997 nfds_t queuepos
= m
->handler
.pfdcount
;
999 if (dir
== THREAD_READ
)
1000 thread_array
= m
->read
;
1002 thread_array
= m
->write
;
1004 /* if we already have a pollfd for our file descriptor, find and
1006 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
1007 if (m
->handler
.pfds
[i
].fd
== fd
) {
1012 * What happens if we have a thread already
1013 * created for this event?
1015 if (thread_array
[fd
])
1016 assert(!"Thread already scheduled for file descriptor");
1021 /* make sure we have room for this fd + pipe poker fd */
1022 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1024 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1026 m
->handler
.pfds
[queuepos
].fd
= fd
;
1027 m
->handler
.pfds
[queuepos
].events
|=
1028 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
1030 if (queuepos
== m
->handler
.pfdcount
)
1031 m
->handler
.pfdcount
++;
1034 frr_with_mutex (&thread
->mtx
) {
1036 thread_array
[thread
->u
.fd
] = thread
;
1041 thread
->ref
= t_ptr
;
1049 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1050 struct thread_master
*m
,
1051 void (*func
)(struct thread
*), void *arg
,
1052 struct timeval
*time_relative
,
1053 struct thread
**t_ptr
)
1055 struct thread
*thread
;
1060 assert(time_relative
);
1062 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1063 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1064 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1066 /* Compute expiration/deadline time. */
1068 timeradd(&t
, time_relative
, &t
);
1070 frr_with_mutex (&m
->mtx
) {
1071 if (t_ptr
&& *t_ptr
)
1072 /* thread is already scheduled; don't reschedule */
1075 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1077 frr_with_mutex (&thread
->mtx
) {
1078 thread
->u
.sands
= t
;
1079 thread_timer_list_add(&m
->timer
, thread
);
1082 thread
->ref
= t_ptr
;
1086 /* The timer list is sorted - if this new timer
1087 * might change the time we'll wait for, give the pthread
1088 * a chance to re-compute.
1090 if (thread_timer_list_first(&m
->timer
) == thread
)
1093 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1094 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1096 EC_LIB_TIMER_TOO_LONG
,
1097 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1102 /* Add timer event thread. */
1103 void _thread_add_timer(const struct xref_threadsched
*xref
,
1104 struct thread_master
*m
, void (*func
)(struct thread
*),
1105 void *arg
, long timer
, struct thread
**t_ptr
)
1107 struct timeval trel
;
1111 trel
.tv_sec
= timer
;
1114 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1117 /* Add timer event thread with "millisecond" resolution */
1118 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1119 struct thread_master
*m
,
1120 void (*func
)(struct thread
*), void *arg
,
1121 long timer
, struct thread
**t_ptr
)
1123 struct timeval trel
;
1127 trel
.tv_sec
= timer
/ 1000;
1128 trel
.tv_usec
= 1000 * (timer
% 1000);
1130 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1133 /* Add timer event thread with "timeval" resolution */
1134 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1135 struct thread_master
*m
,
1136 void (*func
)(struct thread
*), void *arg
,
1137 struct timeval
*tv
, struct thread
**t_ptr
)
1139 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1142 /* Add simple event thread. */
1143 void _thread_add_event(const struct xref_threadsched
*xref
,
1144 struct thread_master
*m
, void (*func
)(struct thread
*),
1145 void *arg
, int val
, struct thread
**t_ptr
)
1147 struct thread
*thread
= NULL
;
1149 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1150 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1151 t_ptr
, 0, val
, arg
, 0);
1155 frr_with_mutex (&m
->mtx
) {
1156 if (t_ptr
&& *t_ptr
)
1157 /* thread is already scheduled; don't reschedule */
1160 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1161 frr_with_mutex (&thread
->mtx
) {
1162 thread
->u
.val
= val
;
1163 thread_list_add_tail(&m
->event
, thread
);
1168 thread
->ref
= t_ptr
;
1175 /* Thread cancellation ------------------------------------------------------ */
1178 * NOT's out the .events field of pollfd corresponding to the given file
1179 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1181 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1182 * implementation for details.
1186 * @param state the event to cancel. One or more (OR'd together) of the
1191 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1196 /* find the index of corresponding pollfd */
1199 /* Cancel POLLHUP too just in case some bozo set it */
1202 /* Some callers know the index of the pfd already */
1203 if (idx_hint
>= 0) {
1207 /* Have to look for the fd in the pfd array */
1208 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1209 if (master
->handler
.pfds
[i
].fd
== fd
) {
1217 "[!] Received cancellation request for nonexistent rw job");
1218 zlog_debug("[!] threadmaster: %s | fd: %d",
1219 master
->name
? master
->name
: "", fd
);
1223 /* NOT out event. */
1224 master
->handler
.pfds
[i
].events
&= ~(state
);
1226 /* If all events are canceled, delete / resize the pollfd array. */
1227 if (master
->handler
.pfds
[i
].events
== 0) {
1228 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1229 (master
->handler
.pfdcount
- i
- 1)
1230 * sizeof(struct pollfd
));
1231 master
->handler
.pfdcount
--;
1232 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1233 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1236 /* If we have the same pollfd in the copy, perform the same operations,
1237 * otherwise return. */
1238 if (i
>= master
->handler
.copycount
)
1241 master
->handler
.copy
[i
].events
&= ~(state
);
1243 if (master
->handler
.copy
[i
].events
== 0) {
1244 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1245 (master
->handler
.copycount
- i
- 1)
1246 * sizeof(struct pollfd
));
1247 master
->handler
.copycount
--;
1248 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1249 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1254 * Process task cancellation given a task argument: iterate through the
1255 * various lists of tasks, looking for any that match the argument.
1257 static void cancel_arg_helper(struct thread_master
*master
,
1258 const struct cancel_req
*cr
)
1265 /* We're only processing arg-based cancellations here. */
1266 if (cr
->eventobj
== NULL
)
1269 /* First process the ready lists. */
1270 frr_each_safe(thread_list
, &master
->event
, t
) {
1271 if (t
->arg
!= cr
->eventobj
)
1273 thread_list_del(&master
->event
, t
);
1276 thread_add_unuse(master
, t
);
1279 frr_each_safe(thread_list
, &master
->ready
, t
) {
1280 if (t
->arg
!= cr
->eventobj
)
1282 thread_list_del(&master
->ready
, t
);
1285 thread_add_unuse(master
, t
);
1288 /* If requested, stop here and ignore io and timers */
1289 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1292 /* Check the io tasks */
1293 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1294 pfd
= master
->handler
.pfds
+ i
;
1296 if (pfd
->events
& POLLIN
)
1297 t
= master
->read
[pfd
->fd
];
1299 t
= master
->write
[pfd
->fd
];
1301 if (t
&& t
->arg
== cr
->eventobj
) {
1304 /* Found a match to cancel: clean up fd arrays */
1305 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1307 /* Clean up thread arrays */
1308 master
->read
[fd
] = NULL
;
1309 master
->write
[fd
] = NULL
;
1311 /* Clear caller's ref */
1315 thread_add_unuse(master
, t
);
1317 /* Don't increment 'i' since the cancellation will have
1318 * removed the entry from the pfd array
1324 /* Check the timer tasks */
1325 t
= thread_timer_list_first(&master
->timer
);
1327 struct thread
*t_next
;
1329 t_next
= thread_timer_list_next(&master
->timer
, t
);
1331 if (t
->arg
== cr
->eventobj
) {
1332 thread_timer_list_del(&master
->timer
, t
);
1335 thread_add_unuse(master
, t
);
1343 * Process cancellation requests.
1345 * This may only be run from the pthread which owns the thread_master.
1347 * @param master the thread master to process
1348 * @REQUIRE master->mtx
1350 static void do_thread_cancel(struct thread_master
*master
)
1352 struct thread_list_head
*list
= NULL
;
1353 struct thread
**thread_array
= NULL
;
1354 struct thread
*thread
;
1355 struct cancel_req
*cr
;
1356 struct listnode
*ln
;
1358 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1360 * If this is an event object cancellation, search
1361 * through task lists deleting any tasks which have the
1362 * specified argument - use this handy helper function.
1365 cancel_arg_helper(master
, cr
);
1370 * The pointer varies depending on whether the cancellation
1371 * request was made asynchronously or not. If it was, we
1372 * need to check whether the thread even exists anymore
1373 * before cancelling it.
1375 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1381 thread_array
= NULL
;
1383 /* Determine the appropriate queue to cancel the thread from */
1384 switch (thread
->type
) {
1386 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1387 thread_array
= master
->read
;
1390 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1391 thread_array
= master
->write
;
1394 thread_timer_list_del(&master
->timer
, thread
);
1397 list
= &master
->event
;
1400 list
= &master
->ready
;
1408 thread_list_del(list
, thread
);
1409 } else if (thread_array
) {
1410 thread_array
[thread
->u
.fd
] = NULL
;
1414 *thread
->ref
= NULL
;
1416 thread_add_unuse(thread
->master
, thread
);
1419 /* Delete and free all cancellation requests */
1420 if (master
->cancel_req
)
1421 list_delete_all_node(master
->cancel_req
);
1423 /* Wake up any threads which may be blocked in thread_cancel_async() */
1424 master
->canceled
= true;
1425 pthread_cond_broadcast(&master
->cancel_cond
);
1429 * Helper function used for multiple flavors of arg-based cancellation.
1431 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1433 struct cancel_req
*cr
;
1435 assert(m
->owner
== pthread_self());
1437 /* Only worth anything if caller supplies an arg. */
1441 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1445 frr_with_mutex (&m
->mtx
) {
1447 listnode_add(m
->cancel_req
, cr
);
1448 do_thread_cancel(m
);
1453 * Cancel any events which have the specified argument.
1457 * @param m the thread_master to cancel from
1458 * @param arg the argument passed when creating the event
1460 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1462 cancel_event_helper(master
, arg
, 0);
1466 * Cancel ready tasks with an arg matching 'arg'
1470 * @param m the thread_master to cancel from
1471 * @param arg the argument passed when creating the event
1473 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1476 /* Only cancel ready/event tasks */
1477 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1481 * Cancel a specific task.
1485 * @param thread task to cancel
1487 void thread_cancel(struct thread
**thread
)
1489 struct thread_master
*master
;
1491 if (thread
== NULL
|| *thread
== NULL
)
1494 master
= (*thread
)->master
;
1496 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1497 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1498 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1499 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1501 assert(master
->owner
== pthread_self());
1503 frr_with_mutex (&master
->mtx
) {
1504 struct cancel_req
*cr
=
1505 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1506 cr
->thread
= *thread
;
1507 listnode_add(master
->cancel_req
, cr
);
1508 do_thread_cancel(master
);
1515 * Asynchronous cancellation.
1517 * Called with either a struct thread ** or void * to an event argument,
1518 * this function posts the correct cancellation request and blocks until it is
1521 * If the thread is currently running, execution blocks until it completes.
1523 * The last two parameters are mutually exclusive, i.e. if you pass one the
1524 * other must be NULL.
1526 * When the cancellation procedure executes on the target thread_master, the
1527 * thread * provided is checked for nullity. If it is null, the thread is
1528 * assumed to no longer exist and the cancellation request is a no-op. Thus
1529 * users of this API must pass a back-reference when scheduling the original
1534 * @param master the thread master with the relevant event / task
1535 * @param thread pointer to thread to cancel
1536 * @param eventobj the event
1538 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1541 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1543 if (thread
&& *thread
)
1544 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1545 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1546 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1547 (*thread
)->u
.val
, (*thread
)->arg
,
1548 (*thread
)->u
.sands
.tv_sec
);
1550 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1551 0, NULL
, 0, 0, eventobj
, 0);
1553 assert(master
->owner
!= pthread_self());
1555 frr_with_mutex (&master
->mtx
) {
1556 master
->canceled
= false;
1559 struct cancel_req
*cr
=
1560 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1561 cr
->threadref
= thread
;
1562 listnode_add(master
->cancel_req
, cr
);
1563 } else if (eventobj
) {
1564 struct cancel_req
*cr
=
1565 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1566 cr
->eventobj
= eventobj
;
1567 listnode_add(master
->cancel_req
, cr
);
1571 while (!master
->canceled
)
1572 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1578 /* ------------------------------------------------------------------------- */
1580 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1581 struct timeval
*timer_val
)
1583 if (!thread_timer_list_count(timers
))
1586 struct thread
*next_timer
= thread_timer_list_first(timers
);
1587 monotime_until(&next_timer
->u
.sands
, timer_val
);
1591 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1592 struct thread
*fetch
)
1595 thread_add_unuse(m
, thread
);
1599 static int thread_process_io_helper(struct thread_master
*m
,
1600 struct thread
*thread
, short state
,
1601 short actual_state
, int pos
)
1603 struct thread
**thread_array
;
1606 * poll() clears the .events field, but the pollfd array we
1607 * pass to poll() is a copy of the one used to schedule threads.
1608 * We need to synchronize state between the two here by applying
1609 * the same changes poll() made on the copy of the "real" pollfd
1612 * This cleans up a possible infinite loop where we refuse
1613 * to respond to a poll event but poll is insistent that
1616 m
->handler
.pfds
[pos
].events
&= ~(state
);
1619 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1620 flog_err(EC_LIB_NO_THREAD
,
1621 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1622 m
->handler
.pfds
[pos
].fd
, actual_state
);
1626 if (thread
->type
== THREAD_READ
)
1627 thread_array
= m
->read
;
1629 thread_array
= m
->write
;
1631 thread_array
[thread
->u
.fd
] = NULL
;
1632 thread_list_add_tail(&m
->ready
, thread
);
1633 thread
->type
= THREAD_READY
;
1639 * Process I/O events.
1641 * Walks through file descriptor array looking for those pollfds whose .revents
1642 * field has something interesting. Deletes any invalid file descriptors.
1644 * @param m the thread master
1645 * @param num the number of active file descriptors (return value of poll())
1647 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1649 unsigned int ready
= 0;
1650 struct pollfd
*pfds
= m
->handler
.copy
;
1652 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1653 /* no event for current fd? immediately continue */
1654 if (pfds
[i
].revents
== 0)
1660 * Unless someone has called thread_cancel from another
1661 * pthread, the only thing that could have changed in
1662 * m->handler.pfds while we were asleep is the .events
1663 * field in a given pollfd. Barring thread_cancel() that
1664 * value should be a superset of the values we have in our
1665 * copy, so there's no need to update it. Similarily,
1666 * barring deletion, the fd should still be a valid index
1667 * into the master's pfds.
1669 * We are including POLLERR here to do a READ event
1670 * this is because the read should fail and the
1671 * read function should handle it appropriately
1673 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1674 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1675 pfds
[i
].revents
, i
);
1677 if (pfds
[i
].revents
& POLLOUT
)
1678 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1679 POLLOUT
, pfds
[i
].revents
, i
);
1681 /* if one of our file descriptors is garbage, remove the same
1683 * both pfds + update sizes and index */
1684 if (pfds
[i
].revents
& POLLNVAL
) {
1685 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1686 (m
->handler
.pfdcount
- i
- 1)
1687 * sizeof(struct pollfd
));
1688 m
->handler
.pfdcount
--;
1689 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1690 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1692 memmove(pfds
+ i
, pfds
+ i
+ 1,
1693 (m
->handler
.copycount
- i
- 1)
1694 * sizeof(struct pollfd
));
1695 m
->handler
.copycount
--;
1696 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1697 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1704 /* Add all timers that have popped to the ready list. */
1705 static unsigned int thread_process_timers(struct thread_master
*m
,
1706 struct timeval
*timenow
)
1708 struct timeval prev
= *timenow
;
1709 bool displayed
= false;
1710 struct thread
*thread
;
1711 unsigned int ready
= 0;
1713 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1714 if (timercmp(timenow
, &thread
->u
.sands
, <))
1716 prev
= thread
->u
.sands
;
1719 * If the timer would have popped 4 seconds in the
1720 * past then we are in a situation where we are
1721 * really getting behind on handling of events.
1722 * Let's log it and do the right thing with it.
1724 if (timercmp(timenow
, &prev
, >)) {
1725 atomic_fetch_add_explicit(
1726 &thread
->hist
->total_starv_warn
, 1,
1727 memory_order_seq_cst
);
1728 if (!displayed
&& !thread
->ignore_timer_late
) {
1730 EC_LIB_STARVE_THREAD
,
1731 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1737 thread_timer_list_pop(&m
->timer
);
1738 thread
->type
= THREAD_READY
;
1739 thread_list_add_tail(&m
->ready
, thread
);
1746 /* process a list en masse, e.g. for event thread lists */
1747 static unsigned int thread_process(struct thread_list_head
*list
)
1749 struct thread
*thread
;
1750 unsigned int ready
= 0;
1752 while ((thread
= thread_list_pop(list
))) {
1753 thread
->type
= THREAD_READY
;
1754 thread_list_add_tail(&thread
->master
->ready
, thread
);
1761 /* Fetch next ready thread. */
1762 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1764 struct thread
*thread
= NULL
;
1766 struct timeval zerotime
= {0, 0};
1768 struct timeval
*tw
= NULL
;
1769 bool eintr_p
= false;
1773 /* Handle signals if any */
1774 if (m
->handle_signals
)
1775 frr_sigevent_process();
1777 pthread_mutex_lock(&m
->mtx
);
1779 /* Process any pending cancellation requests */
1780 do_thread_cancel(m
);
1783 * Attempt to flush ready queue before going into poll().
1784 * This is performance-critical. Think twice before modifying.
1786 if ((thread
= thread_list_pop(&m
->ready
))) {
1787 fetch
= thread_run(m
, thread
, fetch
);
1790 pthread_mutex_unlock(&m
->mtx
);
1791 if (!m
->ready_run_loop
)
1792 GETRUSAGE(&m
->last_getrusage
);
1793 m
->ready_run_loop
= true;
1797 m
->ready_run_loop
= false;
1798 /* otherwise, tick through scheduling sequence */
1801 * Post events to ready queue. This must come before the
1802 * following block since events should occur immediately
1804 thread_process(&m
->event
);
1807 * If there are no tasks on the ready queue, we will poll()
1808 * until a timer expires or we receive I/O, whichever comes
1809 * first. The strategy for doing this is:
1811 * - If there are events pending, set the poll() timeout to zero
1812 * - If there are no events pending, but there are timers
1813 * pending, set the timeout to the smallest remaining time on
1815 * - If there are neither timers nor events pending, but there
1816 * are file descriptors pending, block indefinitely in poll()
1817 * - If nothing is pending, it's time for the application to die
1819 * In every case except the last, we need to hit poll() at least
1820 * once per loop to avoid starvation by events
1822 if (!thread_list_count(&m
->ready
))
1823 tw
= thread_timer_wait(&m
->timer
, &tv
);
1825 if (thread_list_count(&m
->ready
) ||
1826 (tw
&& !timercmp(tw
, &zerotime
, >)))
1829 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1830 pthread_mutex_unlock(&m
->mtx
);
1836 * Copy pollfd array + # active pollfds in it. Not necessary to
1837 * copy the array size as this is fixed.
1839 m
->handler
.copycount
= m
->handler
.pfdcount
;
1840 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1841 m
->handler
.copycount
* sizeof(struct pollfd
));
1843 pthread_mutex_unlock(&m
->mtx
);
1846 num
= fd_poll(m
, tw
, &eintr_p
);
1848 pthread_mutex_lock(&m
->mtx
);
1850 /* Handle any errors received in poll() */
1853 pthread_mutex_unlock(&m
->mtx
);
1854 /* loop around to signal handler */
1859 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1860 safe_strerror(errno
));
1861 pthread_mutex_unlock(&m
->mtx
);
1866 /* Post timers to ready queue. */
1868 thread_process_timers(m
, &now
);
1870 /* Post I/O to ready queue. */
1872 thread_process_io(m
, num
);
1874 pthread_mutex_unlock(&m
->mtx
);
1876 } while (!thread
&& m
->spin
);
1881 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1883 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1884 + (a
.tv_usec
- b
.tv_usec
));
1887 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1888 unsigned long *cputime
)
1890 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1894 * FreeBSD appears to have an issue when calling clock_gettime
1895 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1896 * occassionally the now time will be before the start time.
1897 * This is not good and FRR is ending up with CPU HOG's
1898 * when the subtraction wraps to very large numbers
1900 * What we are going to do here is cheat a little bit
1901 * and notice that this is a problem and just correct
1902 * it so that it is impossible to happen
1904 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1905 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1906 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1907 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1908 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1909 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1912 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1913 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1915 /* This is 'user + sys' time. */
1916 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1917 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1919 return timeval_elapsed(now
->real
, start
->real
);
1922 /* We should aim to yield after yield milliseconds, which defaults
1923 to THREAD_YIELD_TIME_SLOT .
1924 Note: we are using real (wall clock) time for this calculation.
1925 It could be argued that CPU time may make more sense in certain
1926 contexts. The things to consider are whether the thread may have
1927 blocked (in which case wall time increases, but CPU time does not),
1928 or whether the system is heavily loaded with other processes competing
1929 for CPU time. On balance, wall clock time seems to make sense.
1930 Plus it has the added benefit that gettimeofday should be faster
1931 than calling getrusage. */
1932 int thread_should_yield(struct thread
*thread
)
1935 frr_with_mutex (&thread
->mtx
) {
1936 result
= monotime_since(&thread
->real
, NULL
)
1937 > (int64_t)thread
->yield
;
1942 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1944 frr_with_mutex (&thread
->mtx
) {
1945 thread
->yield
= yield_time
;
1949 void thread_getrusage(RUSAGE_T
*r
)
1952 if (!cputime_enabled
) {
1953 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1957 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1958 /* not currently implemented in Linux's vDSO, but maybe at some point
1961 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1962 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1963 #if defined RUSAGE_THREAD
1964 #define FRR_RUSAGE RUSAGE_THREAD
1966 #define FRR_RUSAGE RUSAGE_SELF
1968 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1975 * This function will atomically update the thread's usage history. At present
1976 * this is the only spot where usage history is written. Nevertheless the code
1977 * has been written such that the introduction of writers in the future should
1978 * not need to update it provided the writers atomically perform only the
1979 * operations done here, i.e. updating the total and maximum times. In
1980 * particular, the maximum real and cpu times must be monotonically increasing
1981 * or this code is not correct.
1983 void thread_call(struct thread
*thread
)
1985 RUSAGE_T before
, after
;
1987 /* if the thread being called is the CLI, it may change cputime_enabled
1988 * ("service cputime-stats" command), which can result in nonsensical
1989 * and very confusing warnings
1991 bool cputime_enabled_here
= cputime_enabled
;
1993 if (thread
->master
->ready_run_loop
)
1994 before
= thread
->master
->last_getrusage
;
1998 thread
->real
= before
.real
;
2000 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
2001 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
2002 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
2003 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
2005 pthread_setspecific(thread_current
, thread
);
2006 (*thread
->func
)(thread
);
2007 pthread_setspecific(thread_current
, NULL
);
2010 thread
->master
->last_getrusage
= after
;
2012 unsigned long walltime
, cputime
;
2015 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
2017 /* update walltime */
2018 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2019 memory_order_seq_cst
);
2020 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2021 memory_order_seq_cst
);
2022 while (exp
< walltime
2023 && !atomic_compare_exchange_weak_explicit(
2024 &thread
->hist
->real
.max
, &exp
, walltime
,
2025 memory_order_seq_cst
, memory_order_seq_cst
))
2028 if (cputime_enabled_here
&& cputime_enabled
) {
2029 /* update cputime */
2030 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2031 memory_order_seq_cst
);
2032 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2033 memory_order_seq_cst
);
2034 while (exp
< cputime
2035 && !atomic_compare_exchange_weak_explicit(
2036 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2037 memory_order_seq_cst
, memory_order_seq_cst
))
2041 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2042 memory_order_seq_cst
);
2043 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2044 memory_order_seq_cst
);
2046 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2047 && cputime
> cputime_threshold
) {
2049 * We have a CPU Hog on our hands. The time FRR has spent
2050 * doing actual work (not sleeping) is greater than 5 seconds.
2051 * Whinge about it now, so we're aware this is yet another task
2054 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2055 1, memory_order_seq_cst
);
2057 EC_LIB_SLOW_THREAD_CPU
,
2058 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2059 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2060 walltime
/ 1000, cputime
/ 1000);
2062 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2064 * The runtime for a task is greater than 5 seconds, but the
2065 * cpu time is under 5 seconds. Let's whine about this because
2066 * this could imply some sort of scheduling issue.
2068 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2069 1, memory_order_seq_cst
);
2071 EC_LIB_SLOW_THREAD_WALL
,
2072 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2073 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2074 walltime
/ 1000, cputime
/ 1000);
2078 /* Execute thread */
2079 void _thread_execute(const struct xref_threadsched
*xref
,
2080 struct thread_master
*m
, void (*func
)(struct thread
*),
2083 struct thread
*thread
;
2085 /* Get or allocate new thread to execute. */
2086 frr_with_mutex (&m
->mtx
) {
2087 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2089 /* Set its event value. */
2090 frr_with_mutex (&thread
->mtx
) {
2091 thread
->add_type
= THREAD_EXECUTE
;
2092 thread
->u
.val
= val
;
2093 thread
->ref
= &thread
;
2097 /* Execute thread doing all accounting. */
2098 thread_call(thread
);
2100 /* Give back or free thread. */
2101 thread_add_unuse(m
, thread
);
2104 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2105 void debug_signals(const sigset_t
*sigs
)
2112 * We're only looking at the non-realtime signals here, so we need
2113 * some limit value. Platform differences mean at some point we just
2114 * need to pick a reasonable value.
2116 #if defined SIGRTMIN
2117 # define LAST_SIGNAL SIGRTMIN
2119 # define LAST_SIGNAL 32
2124 sigemptyset(&tmpsigs
);
2125 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2132 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2135 if (sigismember(sigs
, i
) > 0) {
2137 strlcat(buf
, ",", sizeof(buf
));
2138 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2139 strlcat(buf
, tmp
, sizeof(buf
));
2145 snprintf(buf
, sizeof(buf
), "<none>");
2147 zlog_debug("%s: %s", __func__
, buf
);
2150 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2151 const struct thread
*thread
)
2153 static const char * const types
[] = {
2154 [THREAD_READ
] = "read",
2155 [THREAD_WRITE
] = "write",
2156 [THREAD_TIMER
] = "timer",
2157 [THREAD_EVENT
] = "event",
2158 [THREAD_READY
] = "ready",
2159 [THREAD_UNUSED
] = "unused",
2160 [THREAD_EXECUTE
] = "exec",
2166 return bputs(buf
, "{(thread *)NULL}");
2168 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2170 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2171 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2173 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2175 switch (thread
->type
) {
2178 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2182 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2186 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2187 thread
->xref
->funcname
, thread
->xref
->dest
,
2188 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2192 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2193 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2196 const struct thread
*thread
= ptr
;
2197 struct timespec remain
= {};
2199 if (ea
->fmt
[0] == 'D') {
2201 return printfrr_thread_dbg(buf
, ea
, thread
);
2205 /* need to jump over time formatting flag characters in the
2206 * input format string, i.e. adjust ea->fmt!
2208 printfrr_time(buf
, ea
, &remain
,
2209 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2210 return bputch(buf
, '-');
2213 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2214 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);