1 /* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <sys/resource.h>
35 #include "frratomic.h"
36 #include "frr_pthread.h"
37 #include "lib_errors.h"
38 #include "libfrr_trace.h"
41 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
42 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
43 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
44 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
46 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
50 struct thread
*thread
;
52 struct thread
**threadref
;
55 /* Flags for task cancellation */
56 #define THREAD_CANCEL_FLAG_READY 0x01
58 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
60 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
62 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
64 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
66 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
71 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
73 #if defined(__APPLE__)
74 #include <mach/mach.h>
75 #include <mach/mach_time.h>
80 const unsigned char wakebyte = 0x01; \
81 write(m->io_pipe[1], &wakebyte, 1); \
84 /* control variable for initializer */
85 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
86 pthread_key_t thread_current
;
88 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
89 static struct list
*masters
;
91 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
93 #ifndef EXCLUDE_CPU_TIME
94 #define EXCLUDE_CPU_TIME 0
96 #ifndef CONSUMED_TIME_CHECK
97 #define CONSUMED_TIME_CHECK 0
100 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
101 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
102 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
104 /* CLI start ---------------------------------------------------------------- */
105 #ifndef VTYSH_EXTRACT_PL
106 #include "lib/thread_clippy.c"
109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
111 int size
= sizeof(a
->func
);
113 return jhash(&a
->func
, size
, 0);
116 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
117 const struct cpu_thread_history
*b
)
119 return a
->func
== b
->func
;
122 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
124 struct cpu_thread_history
*new;
125 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
127 new->funcname
= a
->funcname
;
131 static void cpu_record_hash_free(void *a
)
133 struct cpu_thread_history
*hist
= a
;
135 XFREE(MTYPE_THREAD_STATS
, hist
);
138 static void vty_out_cpu_thread_history(struct vty
*vty
,
139 struct cpu_thread_history
*a
)
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
143 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
144 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
145 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
146 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
147 vty_out(vty
, " %c%c%c%c%c %s\n",
148 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
149 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
150 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
151 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
152 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
155 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
157 struct cpu_thread_history
*totals
= args
[0];
158 struct cpu_thread_history copy
;
159 struct vty
*vty
= args
[1];
160 uint8_t *filter
= args
[2];
162 struct cpu_thread_history
*a
= bucket
->data
;
165 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
167 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
168 copy
.total_cpu_warn
=
169 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
170 copy
.total_wall_warn
=
171 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
172 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
173 memory_order_seq_cst
);
175 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
176 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
178 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
180 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
181 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
182 copy
.funcname
= a
->funcname
;
184 if (!(copy
.types
& *filter
))
187 vty_out_cpu_thread_history(vty
, ©
);
188 totals
->total_active
+= copy
.total_active
;
189 totals
->total_calls
+= copy
.total_calls
;
190 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
191 totals
->total_wall_warn
+= copy
.total_wall_warn
;
192 totals
->total_starv_warn
+= copy
.total_starv_warn
;
193 totals
->real
.total
+= copy
.real
.total
;
194 if (totals
->real
.max
< copy
.real
.max
)
195 totals
->real
.max
= copy
.real
.max
;
196 totals
->cpu
.total
+= copy
.cpu
.total
;
197 if (totals
->cpu
.max
< copy
.cpu
.max
)
198 totals
->cpu
.max
= copy
.cpu
.max
;
201 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
203 struct cpu_thread_history tmp
;
204 void *args
[3] = {&tmp
, vty
, &filter
};
205 struct thread_master
*m
;
208 if (!cputime_enabled
)
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
216 memset(&tmp
, 0, sizeof(tmp
));
217 tmp
.funcname
= "TOTAL";
220 frr_with_mutex(&masters_mtx
) {
221 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
222 const char *name
= m
->name
? m
->name
: "main";
224 char underline
[strlen(name
) + 1];
225 memset(underline
, '-', sizeof(underline
));
226 underline
[sizeof(underline
) - 1] = '\0';
229 vty_out(vty
, "Showing statistics for pthread %s\n",
231 vty_out(vty
, "-------------------------------%s\n",
233 vty_out(vty
, "%30s %18s %18s\n", "",
234 "CPU (user+system):", "Real (wall-clock):");
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty
, " Avg uSec Max uSecs");
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
241 if (m
->cpu_record
->count
)
244 (void (*)(struct hash_bucket
*,
245 void *))cpu_record_hash_print
,
248 vty_out(vty
, "No data to display yet.\n");
255 vty_out(vty
, "Total thread statistics\n");
256 vty_out(vty
, "-------------------------\n");
257 vty_out(vty
, "%30s %18s %18s\n", "",
258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
260 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
261 vty_out(vty
, " Type Thread\n");
263 if (tmp
.total_calls
> 0)
264 vty_out_cpu_thread_history(vty
, &tmp
);
267 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
269 uint8_t *filter
= args
[0];
270 struct hash
*cpu_record
= args
[1];
272 struct cpu_thread_history
*a
= bucket
->data
;
274 if (!(a
->types
& *filter
))
277 hash_release(cpu_record
, bucket
->data
);
280 static void cpu_record_clear(uint8_t filter
)
282 uint8_t *tmp
= &filter
;
283 struct thread_master
*m
;
286 frr_with_mutex(&masters_mtx
) {
287 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
288 frr_with_mutex(&m
->mtx
) {
289 void *args
[2] = {tmp
, m
->cpu_record
};
292 (void (*)(struct hash_bucket
*,
293 void *))cpu_record_hash_clear
,
300 static uint8_t parse_filter(const char *filterstr
)
305 while (filterstr
[i
] != '\0') {
306 switch (filterstr
[i
]) {
309 filter
|= (1 << THREAD_READ
);
313 filter
|= (1 << THREAD_WRITE
);
317 filter
|= (1 << THREAD_TIMER
);
321 filter
|= (1 << THREAD_EVENT
);
325 filter
|= (1 << THREAD_EXECUTE
);
335 DEFUN_NOSH (show_thread_cpu
,
337 "show thread cpu [FILTER]",
339 "Thread information\n"
341 "Display filter (rwtex)\n")
343 uint8_t filter
= (uint8_t)-1U;
346 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
347 filter
= parse_filter(argv
[idx
]->arg
);
350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
356 cpu_record_print(vty
, filter
);
360 DEFPY (service_cputime_stats
,
361 service_cputime_stats_cmd
,
362 "[no] service cputime-stats",
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
367 cputime_enabled
= !no
;
371 DEFPY (service_cputime_warning
,
372 service_cputime_warning_cmd
,
373 "[no] service cputime-warning (1-4294967295)",
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
380 cputime_threshold
= 0;
382 cputime_threshold
= cputime_warning
* 1000;
386 ALIAS (service_cputime_warning
,
387 no_service_cputime_warning_cmd
,
388 "no service cputime-warning",
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
393 DEFPY (service_walltime_warning
,
394 service_walltime_warning_cmd
,
395 "[no] service walltime-warning (1-4294967295)",
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
402 walltime_threshold
= 0;
404 walltime_threshold
= walltime_warning
* 1000;
408 ALIAS (service_walltime_warning
,
409 no_service_walltime_warning_cmd
,
410 "no service walltime-warning",
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
415 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
417 const char *name
= m
->name
? m
->name
: "main";
418 char underline
[strlen(name
) + 1];
419 struct thread
*thread
;
422 memset(underline
, '-', sizeof(underline
));
423 underline
[sizeof(underline
) - 1] = '\0';
425 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
426 vty_out(vty
, "----------------------%s\n", underline
);
427 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
429 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
430 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
431 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
432 m
->handler
.pfds
[i
].revents
);
434 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
435 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
438 vty_out(vty
, "ERROR ");
440 vty_out(vty
, "%s ", thread
->xref
->funcname
);
444 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
445 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
448 vty_out(vty
, "ERROR\n");
450 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
456 DEFUN_NOSH (show_thread_poll
,
457 show_thread_poll_cmd
,
460 "Thread information\n"
461 "Show poll FD's and information\n")
463 struct listnode
*node
;
464 struct thread_master
*m
;
466 frr_with_mutex(&masters_mtx
) {
467 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
468 show_thread_poll_helper(vty
, m
);
476 DEFUN (clear_thread_cpu
,
477 clear_thread_cpu_cmd
,
478 "clear thread cpu [FILTER]",
479 "Clear stored data in all pthreads\n"
480 "Thread information\n"
482 "Display filter (rwtexb)\n")
484 uint8_t filter
= (uint8_t)-1U;
487 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
488 filter
= parse_filter(argv
[idx
]->arg
);
491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
497 cpu_record_clear(filter
);
501 static void show_thread_timers_helper(struct vty
*vty
, struct thread_master
*m
)
503 const char *name
= m
->name
? m
->name
: "main";
504 char underline
[strlen(name
) + 1];
505 struct thread
*thread
;
507 memset(underline
, '-', sizeof(underline
));
508 underline
[sizeof(underline
) - 1] = '\0';
510 vty_out(vty
, "\nShowing timers for %s\n", name
);
511 vty_out(vty
, "-------------------%s\n", underline
);
513 frr_each (thread_timer_list
, &m
->timer
, thread
) {
514 vty_out(vty
, " %-50s%pTH\n", thread
->hist
->funcname
, thread
);
518 DEFPY_NOSH (show_thread_timers
,
519 show_thread_timers_cmd
,
520 "show thread timers",
522 "Thread information\n"
523 "Show all timers and how long they have in the system\n")
525 struct listnode
*node
;
526 struct thread_master
*m
;
528 frr_with_mutex (&masters_mtx
) {
529 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
))
530 show_thread_timers_helper(vty
, m
);
536 void thread_cmd_init(void)
538 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
539 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
540 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
542 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
543 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
544 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
545 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
546 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
548 install_element(VIEW_NODE
, &show_thread_timers_cmd
);
550 /* CLI end ------------------------------------------------------------------ */
553 static void cancelreq_del(void *cr
)
555 XFREE(MTYPE_TMP
, cr
);
558 /* initializer, only ever called once */
559 static void initializer(void)
561 pthread_key_create(&thread_current
, NULL
);
564 struct thread_master
*thread_master_create(const char *name
)
566 struct thread_master
*rv
;
569 pthread_once(&init_once
, &initializer
);
571 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
573 /* Initialize master mutex */
574 pthread_mutex_init(&rv
->mtx
, NULL
);
575 pthread_cond_init(&rv
->cancel_cond
, NULL
);
578 name
= name
? name
: "default";
579 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
581 /* Initialize I/O task data structures */
583 /* Use configured limit if present, ulimit otherwise. */
584 rv
->fd_limit
= frr_get_fd_limit();
585 if (rv
->fd_limit
== 0) {
586 getrlimit(RLIMIT_NOFILE
, &limit
);
587 rv
->fd_limit
= (int)limit
.rlim_cur
;
590 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
591 sizeof(struct thread
*) * rv
->fd_limit
);
593 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
594 sizeof(struct thread
*) * rv
->fd_limit
);
596 char tmhashname
[strlen(name
) + 32];
597 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
599 rv
->cpu_record
= hash_create_size(
600 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
601 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
604 thread_list_init(&rv
->event
);
605 thread_list_init(&rv
->ready
);
606 thread_list_init(&rv
->unuse
);
607 thread_timer_list_init(&rv
->timer
);
609 /* Initialize thread_fetch() settings */
611 rv
->handle_signals
= true;
613 /* Set pthread owner, should be updated by actual owner */
614 rv
->owner
= pthread_self();
615 rv
->cancel_req
= list_new();
616 rv
->cancel_req
->del
= cancelreq_del
;
619 /* Initialize pipe poker */
621 set_nonblocking(rv
->io_pipe
[0]);
622 set_nonblocking(rv
->io_pipe
[1]);
624 /* Initialize data structures for poll() */
625 rv
->handler
.pfdsize
= rv
->fd_limit
;
626 rv
->handler
.pfdcount
= 0;
627 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
628 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
629 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
630 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
632 /* add to list of threadmasters */
633 frr_with_mutex(&masters_mtx
) {
635 masters
= list_new();
637 listnode_add(masters
, rv
);
643 void thread_master_set_name(struct thread_master
*master
, const char *name
)
645 frr_with_mutex(&master
->mtx
) {
646 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
647 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
651 #define THREAD_UNUSED_DEPTH 10
653 /* Move thread to unuse list. */
654 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
656 pthread_mutex_t mtxc
= thread
->mtx
;
658 assert(m
!= NULL
&& thread
!= NULL
);
660 thread
->hist
->total_active
--;
661 memset(thread
, 0, sizeof(struct thread
));
662 thread
->type
= THREAD_UNUSED
;
664 /* Restore the thread mutex context. */
667 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
668 thread_list_add_tail(&m
->unuse
, thread
);
672 thread_free(m
, thread
);
675 /* Free all unused thread. */
676 static void thread_list_free(struct thread_master
*m
,
677 struct thread_list_head
*list
)
681 while ((t
= thread_list_pop(list
)))
685 static void thread_array_free(struct thread_master
*m
,
686 struct thread
**thread_array
)
691 for (index
= 0; index
< m
->fd_limit
; ++index
) {
692 t
= thread_array
[index
];
694 thread_array
[index
] = NULL
;
698 XFREE(MTYPE_THREAD_POLL
, thread_array
);
702 * thread_master_free_unused
704 * As threads are finished with they are put on the
705 * unuse list for later reuse.
706 * If we are shutting down, Free up unused threads
707 * So we can see if we forget to shut anything off
709 void thread_master_free_unused(struct thread_master
*m
)
711 frr_with_mutex(&m
->mtx
) {
713 while ((t
= thread_list_pop(&m
->unuse
)))
718 /* Stop thread scheduler. */
719 void thread_master_free(struct thread_master
*m
)
723 frr_with_mutex(&masters_mtx
) {
724 listnode_delete(masters
, m
);
725 if (masters
->count
== 0) {
726 list_delete(&masters
);
730 thread_array_free(m
, m
->read
);
731 thread_array_free(m
, m
->write
);
732 while ((t
= thread_timer_list_pop(&m
->timer
)))
734 thread_list_free(m
, &m
->event
);
735 thread_list_free(m
, &m
->ready
);
736 thread_list_free(m
, &m
->unuse
);
737 pthread_mutex_destroy(&m
->mtx
);
738 pthread_cond_destroy(&m
->cancel_cond
);
739 close(m
->io_pipe
[0]);
740 close(m
->io_pipe
[1]);
741 list_delete(&m
->cancel_req
);
742 m
->cancel_req
= NULL
;
744 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
745 hash_free(m
->cpu_record
);
746 m
->cpu_record
= NULL
;
748 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
749 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
750 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
751 XFREE(MTYPE_THREAD_MASTER
, m
);
754 /* Return remain time in milliseconds. */
755 unsigned long thread_timer_remain_msec(struct thread
*thread
)
759 if (!thread_is_scheduled(thread
))
762 frr_with_mutex(&thread
->mtx
) {
763 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
766 return remain
< 0 ? 0 : remain
;
769 /* Return remain time in seconds. */
770 unsigned long thread_timer_remain_second(struct thread
*thread
)
772 return thread_timer_remain_msec(thread
) / 1000LL;
775 struct timeval
thread_timer_remain(struct thread
*thread
)
777 struct timeval remain
;
778 frr_with_mutex(&thread
->mtx
) {
779 monotime_until(&thread
->u
.sands
, &remain
);
784 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
790 assert(buf_size
>= 8);
797 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
802 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
803 struct thread
*t_timer
)
806 time_hhmmss(buf
, buf_size
,
807 thread_timer_remain_second(t_timer
));
809 snprintf(buf
, buf_size
, "--:--:--");
814 /* Get new thread. */
815 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
816 void (*func
)(struct thread
*), void *arg
,
817 const struct xref_threadsched
*xref
)
819 struct thread
*thread
= thread_list_pop(&m
->unuse
);
820 struct cpu_thread_history tmp
;
823 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
824 /* mutex only needs to be initialized at struct creation. */
825 pthread_mutex_init(&thread
->mtx
, NULL
);
830 thread
->add_type
= type
;
833 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
835 thread
->ignore_timer_late
= false;
838 * So if the passed in funcname is not what we have
839 * stored that means the thread->hist needs to be
840 * updated. We keep the last one around in unused
841 * under the assumption that we are probably
842 * going to immediately allocate the same
844 * This hopefully saves us some serious
847 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
848 || thread
->func
!= func
) {
850 tmp
.funcname
= xref
->funcname
;
852 hash_get(m
->cpu_record
, &tmp
,
853 (void *(*)(void *))cpu_record_hash_alloc
);
855 thread
->hist
->total_active
++;
862 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
864 /* Update statistics. */
865 assert(master
->alloc
> 0);
868 /* Free allocated resources. */
869 pthread_mutex_destroy(&thread
->mtx
);
870 XFREE(MTYPE_THREAD
, thread
);
873 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
877 unsigned char trash
[64];
878 nfds_t count
= m
->handler
.copycount
;
881 * If timer_wait is null here, that means poll() should block
882 * indefinitely, unless the thread_master has overridden it by setting
883 * ->selectpoll_timeout.
885 * If the value is positive, it specifies the maximum number of
886 * milliseconds to wait. If the timeout is -1, it specifies that
887 * we should never wait and always return immediately even if no
888 * event is detected. If the value is zero, the behavior is default.
892 /* number of file descriptors with events */
895 if (timer_wait
!= NULL
896 && m
->selectpoll_timeout
== 0) // use the default value
897 timeout
= (timer_wait
->tv_sec
* 1000)
898 + (timer_wait
->tv_usec
/ 1000);
899 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
900 timeout
= m
->selectpoll_timeout
;
901 else if (m
->selectpoll_timeout
902 < 0) // effect a poll (return immediately)
905 zlog_tls_buffer_flush();
907 rcu_assert_read_unlocked();
909 /* add poll pipe poker */
910 assert(count
+ 1 < m
->handler
.pfdsize
);
911 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
912 m
->handler
.copy
[count
].events
= POLLIN
;
913 m
->handler
.copy
[count
].revents
= 0x00;
915 /* We need to deal with a signal-handling race here: we
916 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
917 * that may arrive just before we enter poll(). We will block the
918 * key signals, then check whether any have arrived - if so, we return
919 * before calling poll(). If not, we'll re-enable the signals
920 * in the ppoll() call.
923 sigemptyset(&origsigs
);
924 if (m
->handle_signals
) {
925 /* Main pthread that handles the app signals */
926 if (frr_sigevent_check(&origsigs
)) {
927 /* Signal to process - restore signal mask and return */
928 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
934 /* Don't make any changes for the non-main pthreads */
935 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
938 #if defined(HAVE_PPOLL)
939 struct timespec ts
, *tsp
;
942 ts
.tv_sec
= timeout
/ 1000;
943 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
948 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
949 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
951 /* Not ideal - there is a race after we restore the signal mask */
952 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
953 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
958 if (num
< 0 && errno
== EINTR
)
961 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
962 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
970 /* Add new read thread. */
971 void _thread_add_read_write(const struct xref_threadsched
*xref
,
972 struct thread_master
*m
,
973 void (*func
)(struct thread
*), void *arg
, int fd
,
974 struct thread
**t_ptr
)
976 int dir
= xref
->thread_type
;
977 struct thread
*thread
= NULL
;
978 struct thread
**thread_array
;
980 if (dir
== THREAD_READ
)
981 frrtrace(9, frr_libfrr
, schedule_read
, m
,
982 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
983 t_ptr
, fd
, 0, arg
, 0);
985 frrtrace(9, frr_libfrr
, schedule_write
, m
,
986 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
987 t_ptr
, fd
, 0, arg
, 0);
990 if (fd
>= m
->fd_limit
)
991 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
993 frr_with_mutex(&m
->mtx
) {
995 // thread is already scheduled; don't reschedule
998 /* default to a new pollfd */
999 nfds_t queuepos
= m
->handler
.pfdcount
;
1001 if (dir
== THREAD_READ
)
1002 thread_array
= m
->read
;
1004 thread_array
= m
->write
;
1006 /* if we already have a pollfd for our file descriptor, find and
1008 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
1009 if (m
->handler
.pfds
[i
].fd
== fd
) {
1014 * What happens if we have a thread already
1015 * created for this event?
1017 if (thread_array
[fd
])
1018 assert(!"Thread already scheduled for file descriptor");
1023 /* make sure we have room for this fd + pipe poker fd */
1024 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
1026 thread
= thread_get(m
, dir
, func
, arg
, xref
);
1028 m
->handler
.pfds
[queuepos
].fd
= fd
;
1029 m
->handler
.pfds
[queuepos
].events
|=
1030 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
1032 if (queuepos
== m
->handler
.pfdcount
)
1033 m
->handler
.pfdcount
++;
1036 frr_with_mutex(&thread
->mtx
) {
1038 thread_array
[thread
->u
.fd
] = thread
;
1043 thread
->ref
= t_ptr
;
1051 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1052 struct thread_master
*m
,
1053 void (*func
)(struct thread
*), void *arg
,
1054 struct timeval
*time_relative
,
1055 struct thread
**t_ptr
)
1057 struct thread
*thread
;
1062 assert(time_relative
);
1064 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1065 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1066 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1068 /* Compute expiration/deadline time. */
1070 timeradd(&t
, time_relative
, &t
);
1072 frr_with_mutex(&m
->mtx
) {
1073 if (t_ptr
&& *t_ptr
)
1074 /* thread is already scheduled; don't reschedule */
1077 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1079 frr_with_mutex(&thread
->mtx
) {
1080 thread
->u
.sands
= t
;
1081 thread_timer_list_add(&m
->timer
, thread
);
1084 thread
->ref
= t_ptr
;
1088 /* The timer list is sorted - if this new timer
1089 * might change the time we'll wait for, give the pthread
1090 * a chance to re-compute.
1092 if (thread_timer_list_first(&m
->timer
) == thread
)
1095 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1096 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1098 EC_LIB_TIMER_TOO_LONG
,
1099 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1104 /* Add timer event thread. */
1105 void _thread_add_timer(const struct xref_threadsched
*xref
,
1106 struct thread_master
*m
, void (*func
)(struct thread
*),
1107 void *arg
, long timer
, struct thread
**t_ptr
)
1109 struct timeval trel
;
1113 trel
.tv_sec
= timer
;
1116 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1119 /* Add timer event thread with "millisecond" resolution */
1120 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1121 struct thread_master
*m
,
1122 void (*func
)(struct thread
*), void *arg
,
1123 long timer
, struct thread
**t_ptr
)
1125 struct timeval trel
;
1129 trel
.tv_sec
= timer
/ 1000;
1130 trel
.tv_usec
= 1000 * (timer
% 1000);
1132 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1135 /* Add timer event thread with "timeval" resolution */
1136 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1137 struct thread_master
*m
,
1138 void (*func
)(struct thread
*), void *arg
,
1139 struct timeval
*tv
, struct thread
**t_ptr
)
1141 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1144 /* Add simple event thread. */
1145 void _thread_add_event(const struct xref_threadsched
*xref
,
1146 struct thread_master
*m
, void (*func
)(struct thread
*),
1147 void *arg
, int val
, struct thread
**t_ptr
)
1149 struct thread
*thread
= NULL
;
1151 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1152 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1153 t_ptr
, 0, val
, arg
, 0);
1157 frr_with_mutex(&m
->mtx
) {
1158 if (t_ptr
&& *t_ptr
)
1159 /* thread is already scheduled; don't reschedule */
1162 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1163 frr_with_mutex(&thread
->mtx
) {
1164 thread
->u
.val
= val
;
1165 thread_list_add_tail(&m
->event
, thread
);
1170 thread
->ref
= t_ptr
;
1177 /* Thread cancellation ------------------------------------------------------ */
1180 * NOT's out the .events field of pollfd corresponding to the given file
1181 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1183 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1184 * implementation for details.
1188 * @param state the event to cancel. One or more (OR'd together) of the
1193 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1198 /* find the index of corresponding pollfd */
1201 /* Cancel POLLHUP too just in case some bozo set it */
1204 /* Some callers know the index of the pfd already */
1205 if (idx_hint
>= 0) {
1209 /* Have to look for the fd in the pfd array */
1210 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1211 if (master
->handler
.pfds
[i
].fd
== fd
) {
1219 "[!] Received cancellation request for nonexistent rw job");
1220 zlog_debug("[!] threadmaster: %s | fd: %d",
1221 master
->name
? master
->name
: "", fd
);
1225 /* NOT out event. */
1226 master
->handler
.pfds
[i
].events
&= ~(state
);
1228 /* If all events are canceled, delete / resize the pollfd array. */
1229 if (master
->handler
.pfds
[i
].events
== 0) {
1230 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1231 (master
->handler
.pfdcount
- i
- 1)
1232 * sizeof(struct pollfd
));
1233 master
->handler
.pfdcount
--;
1234 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1235 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1238 /* If we have the same pollfd in the copy, perform the same operations,
1239 * otherwise return. */
1240 if (i
>= master
->handler
.copycount
)
1243 master
->handler
.copy
[i
].events
&= ~(state
);
1245 if (master
->handler
.copy
[i
].events
== 0) {
1246 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1247 (master
->handler
.copycount
- i
- 1)
1248 * sizeof(struct pollfd
));
1249 master
->handler
.copycount
--;
1250 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1251 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1256 * Process task cancellation given a task argument: iterate through the
1257 * various lists of tasks, looking for any that match the argument.
1259 static void cancel_arg_helper(struct thread_master
*master
,
1260 const struct cancel_req
*cr
)
1267 /* We're only processing arg-based cancellations here. */
1268 if (cr
->eventobj
== NULL
)
1271 /* First process the ready lists. */
1272 frr_each_safe(thread_list
, &master
->event
, t
) {
1273 if (t
->arg
!= cr
->eventobj
)
1275 thread_list_del(&master
->event
, t
);
1278 thread_add_unuse(master
, t
);
1281 frr_each_safe(thread_list
, &master
->ready
, t
) {
1282 if (t
->arg
!= cr
->eventobj
)
1284 thread_list_del(&master
->ready
, t
);
1287 thread_add_unuse(master
, t
);
1290 /* If requested, stop here and ignore io and timers */
1291 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1294 /* Check the io tasks */
1295 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1296 pfd
= master
->handler
.pfds
+ i
;
1298 if (pfd
->events
& POLLIN
)
1299 t
= master
->read
[pfd
->fd
];
1301 t
= master
->write
[pfd
->fd
];
1303 if (t
&& t
->arg
== cr
->eventobj
) {
1306 /* Found a match to cancel: clean up fd arrays */
1307 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1309 /* Clean up thread arrays */
1310 master
->read
[fd
] = NULL
;
1311 master
->write
[fd
] = NULL
;
1313 /* Clear caller's ref */
1317 thread_add_unuse(master
, t
);
1319 /* Don't increment 'i' since the cancellation will have
1320 * removed the entry from the pfd array
1326 /* Check the timer tasks */
1327 t
= thread_timer_list_first(&master
->timer
);
1329 struct thread
*t_next
;
1331 t_next
= thread_timer_list_next(&master
->timer
, t
);
1333 if (t
->arg
== cr
->eventobj
) {
1334 thread_timer_list_del(&master
->timer
, t
);
1337 thread_add_unuse(master
, t
);
1345 * Process cancellation requests.
1347 * This may only be run from the pthread which owns the thread_master.
1349 * @param master the thread master to process
1350 * @REQUIRE master->mtx
1352 static void do_thread_cancel(struct thread_master
*master
)
1354 struct thread_list_head
*list
= NULL
;
1355 struct thread
**thread_array
= NULL
;
1356 struct thread
*thread
;
1358 struct cancel_req
*cr
;
1359 struct listnode
*ln
;
1360 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1362 * If this is an event object cancellation, search
1363 * through task lists deleting any tasks which have the
1364 * specified argument - use this handy helper function.
1367 cancel_arg_helper(master
, cr
);
1372 * The pointer varies depending on whether the cancellation
1373 * request was made asynchronously or not. If it was, we
1374 * need to check whether the thread even exists anymore
1375 * before cancelling it.
1377 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1382 /* Determine the appropriate queue to cancel the thread from */
1383 switch (thread
->type
) {
1385 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1386 thread_array
= master
->read
;
1389 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1390 thread_array
= master
->write
;
1393 thread_timer_list_del(&master
->timer
, thread
);
1396 list
= &master
->event
;
1399 list
= &master
->ready
;
1407 thread_list_del(list
, thread
);
1408 } else if (thread_array
) {
1409 thread_array
[thread
->u
.fd
] = NULL
;
1413 *thread
->ref
= NULL
;
1415 thread_add_unuse(thread
->master
, thread
);
1418 /* Delete and free all cancellation requests */
1419 if (master
->cancel_req
)
1420 list_delete_all_node(master
->cancel_req
);
1422 /* Wake up any threads which may be blocked in thread_cancel_async() */
1423 master
->canceled
= true;
1424 pthread_cond_broadcast(&master
->cancel_cond
);
1428 * Helper function used for multiple flavors of arg-based cancellation.
1430 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1432 struct cancel_req
*cr
;
1434 assert(m
->owner
== pthread_self());
1436 /* Only worth anything if caller supplies an arg. */
1440 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1444 frr_with_mutex(&m
->mtx
) {
1446 listnode_add(m
->cancel_req
, cr
);
1447 do_thread_cancel(m
);
1452 * Cancel any events which have the specified argument.
1456 * @param m the thread_master to cancel from
1457 * @param arg the argument passed when creating the event
1459 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1461 cancel_event_helper(master
, arg
, 0);
1465 * Cancel ready tasks with an arg matching 'arg'
1469 * @param m the thread_master to cancel from
1470 * @param arg the argument passed when creating the event
1472 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1475 /* Only cancel ready/event tasks */
1476 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1480 * Cancel a specific task.
1484 * @param thread task to cancel
1486 void thread_cancel(struct thread
**thread
)
1488 struct thread_master
*master
;
1490 if (thread
== NULL
|| *thread
== NULL
)
1493 master
= (*thread
)->master
;
1495 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1496 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1497 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1498 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1500 assert(master
->owner
== pthread_self());
1502 frr_with_mutex(&master
->mtx
) {
1503 struct cancel_req
*cr
=
1504 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1505 cr
->thread
= *thread
;
1506 listnode_add(master
->cancel_req
, cr
);
1507 do_thread_cancel(master
);
1514 * Asynchronous cancellation.
1516 * Called with either a struct thread ** or void * to an event argument,
1517 * this function posts the correct cancellation request and blocks until it is
1520 * If the thread is currently running, execution blocks until it completes.
1522 * The last two parameters are mutually exclusive, i.e. if you pass one the
1523 * other must be NULL.
1525 * When the cancellation procedure executes on the target thread_master, the
1526 * thread * provided is checked for nullity. If it is null, the thread is
1527 * assumed to no longer exist and the cancellation request is a no-op. Thus
1528 * users of this API must pass a back-reference when scheduling the original
1533 * @param master the thread master with the relevant event / task
1534 * @param thread pointer to thread to cancel
1535 * @param eventobj the event
1537 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1540 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1542 if (thread
&& *thread
)
1543 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1544 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1545 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1546 (*thread
)->u
.val
, (*thread
)->arg
,
1547 (*thread
)->u
.sands
.tv_sec
);
1549 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1550 0, NULL
, 0, 0, eventobj
, 0);
1552 assert(master
->owner
!= pthread_self());
1554 frr_with_mutex(&master
->mtx
) {
1555 master
->canceled
= false;
1558 struct cancel_req
*cr
=
1559 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1560 cr
->threadref
= thread
;
1561 listnode_add(master
->cancel_req
, cr
);
1562 } else if (eventobj
) {
1563 struct cancel_req
*cr
=
1564 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1565 cr
->eventobj
= eventobj
;
1566 listnode_add(master
->cancel_req
, cr
);
1570 while (!master
->canceled
)
1571 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1577 /* ------------------------------------------------------------------------- */
1579 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1580 struct timeval
*timer_val
)
1582 if (!thread_timer_list_count(timers
))
1585 struct thread
*next_timer
= thread_timer_list_first(timers
);
1586 monotime_until(&next_timer
->u
.sands
, timer_val
);
1590 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1591 struct thread
*fetch
)
1594 thread_add_unuse(m
, thread
);
1598 static int thread_process_io_helper(struct thread_master
*m
,
1599 struct thread
*thread
, short state
,
1600 short actual_state
, int pos
)
1602 struct thread
**thread_array
;
1605 * poll() clears the .events field, but the pollfd array we
1606 * pass to poll() is a copy of the one used to schedule threads.
1607 * We need to synchronize state between the two here by applying
1608 * the same changes poll() made on the copy of the "real" pollfd
1611 * This cleans up a possible infinite loop where we refuse
1612 * to respond to a poll event but poll is insistent that
1615 m
->handler
.pfds
[pos
].events
&= ~(state
);
1618 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1619 flog_err(EC_LIB_NO_THREAD
,
1620 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1621 m
->handler
.pfds
[pos
].fd
, actual_state
);
1625 if (thread
->type
== THREAD_READ
)
1626 thread_array
= m
->read
;
1628 thread_array
= m
->write
;
1630 thread_array
[thread
->u
.fd
] = NULL
;
1631 thread_list_add_tail(&m
->ready
, thread
);
1632 thread
->type
= THREAD_READY
;
1638 * Process I/O events.
1640 * Walks through file descriptor array looking for those pollfds whose .revents
1641 * field has something interesting. Deletes any invalid file descriptors.
1643 * @param m the thread master
1644 * @param num the number of active file descriptors (return value of poll())
1646 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1648 unsigned int ready
= 0;
1649 struct pollfd
*pfds
= m
->handler
.copy
;
1651 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1652 /* no event for current fd? immediately continue */
1653 if (pfds
[i
].revents
== 0)
1659 * Unless someone has called thread_cancel from another
1660 * pthread, the only thing that could have changed in
1661 * m->handler.pfds while we were asleep is the .events
1662 * field in a given pollfd. Barring thread_cancel() that
1663 * value should be a superset of the values we have in our
1664 * copy, so there's no need to update it. Similarily,
1665 * barring deletion, the fd should still be a valid index
1666 * into the master's pfds.
1668 * We are including POLLERR here to do a READ event
1669 * this is because the read should fail and the
1670 * read function should handle it appropriately
1672 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1673 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1674 pfds
[i
].revents
, i
);
1676 if (pfds
[i
].revents
& POLLOUT
)
1677 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1678 POLLOUT
, pfds
[i
].revents
, i
);
1680 /* if one of our file descriptors is garbage, remove the same
1682 * both pfds + update sizes and index */
1683 if (pfds
[i
].revents
& POLLNVAL
) {
1684 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1685 (m
->handler
.pfdcount
- i
- 1)
1686 * sizeof(struct pollfd
));
1687 m
->handler
.pfdcount
--;
1688 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1689 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1691 memmove(pfds
+ i
, pfds
+ i
+ 1,
1692 (m
->handler
.copycount
- i
- 1)
1693 * sizeof(struct pollfd
));
1694 m
->handler
.copycount
--;
1695 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1696 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1703 /* Add all timers that have popped to the ready list. */
1704 static unsigned int thread_process_timers(struct thread_master
*m
,
1705 struct timeval
*timenow
)
1707 struct timeval prev
= *timenow
;
1708 bool displayed
= false;
1709 struct thread
*thread
;
1710 unsigned int ready
= 0;
1712 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1713 if (timercmp(timenow
, &thread
->u
.sands
, <))
1715 prev
= thread
->u
.sands
;
1718 * If the timer would have popped 4 seconds in the
1719 * past then we are in a situation where we are
1720 * really getting behind on handling of events.
1721 * Let's log it and do the right thing with it.
1723 if (timercmp(timenow
, &prev
, >)) {
1724 atomic_fetch_add_explicit(
1725 &thread
->hist
->total_starv_warn
, 1,
1726 memory_order_seq_cst
);
1727 if (!displayed
&& !thread
->ignore_timer_late
) {
1729 EC_LIB_STARVE_THREAD
,
1730 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1736 thread_timer_list_pop(&m
->timer
);
1737 thread
->type
= THREAD_READY
;
1738 thread_list_add_tail(&m
->ready
, thread
);
1745 /* process a list en masse, e.g. for event thread lists */
1746 static unsigned int thread_process(struct thread_list_head
*list
)
1748 struct thread
*thread
;
1749 unsigned int ready
= 0;
1751 while ((thread
= thread_list_pop(list
))) {
1752 thread
->type
= THREAD_READY
;
1753 thread_list_add_tail(&thread
->master
->ready
, thread
);
1760 /* Fetch next ready thread. */
1761 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1763 struct thread
*thread
= NULL
;
1765 struct timeval zerotime
= {0, 0};
1767 struct timeval
*tw
= NULL
;
1768 bool eintr_p
= false;
1772 /* Handle signals if any */
1773 if (m
->handle_signals
)
1774 frr_sigevent_process();
1776 pthread_mutex_lock(&m
->mtx
);
1778 /* Process any pending cancellation requests */
1779 do_thread_cancel(m
);
1782 * Attempt to flush ready queue before going into poll().
1783 * This is performance-critical. Think twice before modifying.
1785 if ((thread
= thread_list_pop(&m
->ready
))) {
1786 fetch
= thread_run(m
, thread
, fetch
);
1789 pthread_mutex_unlock(&m
->mtx
);
1790 if (!m
->ready_run_loop
)
1791 GETRUSAGE(&m
->last_getrusage
);
1792 m
->ready_run_loop
= true;
1796 m
->ready_run_loop
= false;
1797 /* otherwise, tick through scheduling sequence */
1800 * Post events to ready queue. This must come before the
1801 * following block since events should occur immediately
1803 thread_process(&m
->event
);
1806 * If there are no tasks on the ready queue, we will poll()
1807 * until a timer expires or we receive I/O, whichever comes
1808 * first. The strategy for doing this is:
1810 * - If there are events pending, set the poll() timeout to zero
1811 * - If there are no events pending, but there are timers
1812 * pending, set the timeout to the smallest remaining time on
1814 * - If there are neither timers nor events pending, but there
1815 * are file descriptors pending, block indefinitely in poll()
1816 * - If nothing is pending, it's time for the application to die
1818 * In every case except the last, we need to hit poll() at least
1819 * once per loop to avoid starvation by events
1821 if (!thread_list_count(&m
->ready
))
1822 tw
= thread_timer_wait(&m
->timer
, &tv
);
1824 if (thread_list_count(&m
->ready
) ||
1825 (tw
&& !timercmp(tw
, &zerotime
, >)))
1828 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1829 pthread_mutex_unlock(&m
->mtx
);
1835 * Copy pollfd array + # active pollfds in it. Not necessary to
1836 * copy the array size as this is fixed.
1838 m
->handler
.copycount
= m
->handler
.pfdcount
;
1839 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1840 m
->handler
.copycount
* sizeof(struct pollfd
));
1842 pthread_mutex_unlock(&m
->mtx
);
1845 num
= fd_poll(m
, tw
, &eintr_p
);
1847 pthread_mutex_lock(&m
->mtx
);
1849 /* Handle any errors received in poll() */
1852 pthread_mutex_unlock(&m
->mtx
);
1853 /* loop around to signal handler */
1858 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1859 safe_strerror(errno
));
1860 pthread_mutex_unlock(&m
->mtx
);
1865 /* Post timers to ready queue. */
1867 thread_process_timers(m
, &now
);
1869 /* Post I/O to ready queue. */
1871 thread_process_io(m
, num
);
1873 pthread_mutex_unlock(&m
->mtx
);
1875 } while (!thread
&& m
->spin
);
1880 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1882 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1883 + (a
.tv_usec
- b
.tv_usec
));
1886 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1887 unsigned long *cputime
)
1889 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1893 * FreeBSD appears to have an issue when calling clock_gettime
1894 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1895 * occassionally the now time will be before the start time.
1896 * This is not good and FRR is ending up with CPU HOG's
1897 * when the subtraction wraps to very large numbers
1899 * What we are going to do here is cheat a little bit
1900 * and notice that this is a problem and just correct
1901 * it so that it is impossible to happen
1903 if (start
->cpu
.tv_sec
== now
->cpu
.tv_sec
&&
1904 start
->cpu
.tv_nsec
> now
->cpu
.tv_nsec
)
1905 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1906 else if (start
->cpu
.tv_sec
> now
->cpu
.tv_sec
) {
1907 now
->cpu
.tv_sec
= start
->cpu
.tv_sec
;
1908 now
->cpu
.tv_nsec
= start
->cpu
.tv_nsec
+ 1;
1911 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1912 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1914 /* This is 'user + sys' time. */
1915 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1916 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1918 return timeval_elapsed(now
->real
, start
->real
);
1921 /* We should aim to yield after yield milliseconds, which defaults
1922 to THREAD_YIELD_TIME_SLOT .
1923 Note: we are using real (wall clock) time for this calculation.
1924 It could be argued that CPU time may make more sense in certain
1925 contexts. The things to consider are whether the thread may have
1926 blocked (in which case wall time increases, but CPU time does not),
1927 or whether the system is heavily loaded with other processes competing
1928 for CPU time. On balance, wall clock time seems to make sense.
1929 Plus it has the added benefit that gettimeofday should be faster
1930 than calling getrusage. */
1931 int thread_should_yield(struct thread
*thread
)
1934 frr_with_mutex(&thread
->mtx
) {
1935 result
= monotime_since(&thread
->real
, NULL
)
1936 > (int64_t)thread
->yield
;
1941 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1943 frr_with_mutex(&thread
->mtx
) {
1944 thread
->yield
= yield_time
;
1948 void thread_getrusage(RUSAGE_T
*r
)
1951 if (!cputime_enabled
) {
1952 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1956 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1957 /* not currently implemented in Linux's vDSO, but maybe at some point
1960 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1961 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1962 #if defined RUSAGE_THREAD
1963 #define FRR_RUSAGE RUSAGE_THREAD
1965 #define FRR_RUSAGE RUSAGE_SELF
1967 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1974 * This function will atomically update the thread's usage history. At present
1975 * this is the only spot where usage history is written. Nevertheless the code
1976 * has been written such that the introduction of writers in the future should
1977 * not need to update it provided the writers atomically perform only the
1978 * operations done here, i.e. updating the total and maximum times. In
1979 * particular, the maximum real and cpu times must be monotonically increasing
1980 * or this code is not correct.
1982 void thread_call(struct thread
*thread
)
1984 RUSAGE_T before
, after
;
1986 /* if the thread being called is the CLI, it may change cputime_enabled
1987 * ("service cputime-stats" command), which can result in nonsensical
1988 * and very confusing warnings
1990 bool cputime_enabled_here
= cputime_enabled
;
1992 if (thread
->master
->ready_run_loop
)
1993 before
= thread
->master
->last_getrusage
;
1997 thread
->real
= before
.real
;
1999 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
2000 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
2001 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
2002 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
2004 pthread_setspecific(thread_current
, thread
);
2005 (*thread
->func
)(thread
);
2006 pthread_setspecific(thread_current
, NULL
);
2009 thread
->master
->last_getrusage
= after
;
2011 unsigned long walltime
, cputime
;
2014 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
2016 /* update walltime */
2017 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
2018 memory_order_seq_cst
);
2019 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
2020 memory_order_seq_cst
);
2021 while (exp
< walltime
2022 && !atomic_compare_exchange_weak_explicit(
2023 &thread
->hist
->real
.max
, &exp
, walltime
,
2024 memory_order_seq_cst
, memory_order_seq_cst
))
2027 if (cputime_enabled_here
&& cputime_enabled
) {
2028 /* update cputime */
2029 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
2030 memory_order_seq_cst
);
2031 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
2032 memory_order_seq_cst
);
2033 while (exp
< cputime
2034 && !atomic_compare_exchange_weak_explicit(
2035 &thread
->hist
->cpu
.max
, &exp
, cputime
,
2036 memory_order_seq_cst
, memory_order_seq_cst
))
2040 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
2041 memory_order_seq_cst
);
2042 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
2043 memory_order_seq_cst
);
2045 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
2046 && cputime
> cputime_threshold
) {
2048 * We have a CPU Hog on our hands. The time FRR has spent
2049 * doing actual work (not sleeping) is greater than 5 seconds.
2050 * Whinge about it now, so we're aware this is yet another task
2053 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
2054 1, memory_order_seq_cst
);
2056 EC_LIB_SLOW_THREAD_CPU
,
2057 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2058 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2059 walltime
/ 1000, cputime
/ 1000);
2061 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2063 * The runtime for a task is greater than 5 seconds, but the
2064 * cpu time is under 5 seconds. Let's whine about this because
2065 * this could imply some sort of scheduling issue.
2067 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2068 1, memory_order_seq_cst
);
2070 EC_LIB_SLOW_THREAD_WALL
,
2071 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2072 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2073 walltime
/ 1000, cputime
/ 1000);
2077 /* Execute thread */
2078 void _thread_execute(const struct xref_threadsched
*xref
,
2079 struct thread_master
*m
, void (*func
)(struct thread
*),
2082 struct thread
*thread
;
2084 /* Get or allocate new thread to execute. */
2085 frr_with_mutex(&m
->mtx
) {
2086 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2088 /* Set its event value. */
2089 frr_with_mutex(&thread
->mtx
) {
2090 thread
->add_type
= THREAD_EXECUTE
;
2091 thread
->u
.val
= val
;
2092 thread
->ref
= &thread
;
2096 /* Execute thread doing all accounting. */
2097 thread_call(thread
);
2099 /* Give back or free thread. */
2100 thread_add_unuse(m
, thread
);
2103 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2104 void debug_signals(const sigset_t
*sigs
)
2111 * We're only looking at the non-realtime signals here, so we need
2112 * some limit value. Platform differences mean at some point we just
2113 * need to pick a reasonable value.
2115 #if defined SIGRTMIN
2116 # define LAST_SIGNAL SIGRTMIN
2118 # define LAST_SIGNAL 32
2123 sigemptyset(&tmpsigs
);
2124 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2131 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2134 if (sigismember(sigs
, i
) > 0) {
2136 strlcat(buf
, ",", sizeof(buf
));
2137 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2138 strlcat(buf
, tmp
, sizeof(buf
));
2144 snprintf(buf
, sizeof(buf
), "<none>");
2146 zlog_debug("%s: %s", __func__
, buf
);
2149 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2150 const struct thread
*thread
)
2152 static const char * const types
[] = {
2153 [THREAD_READ
] = "read",
2154 [THREAD_WRITE
] = "write",
2155 [THREAD_TIMER
] = "timer",
2156 [THREAD_EVENT
] = "event",
2157 [THREAD_READY
] = "ready",
2158 [THREAD_UNUSED
] = "unused",
2159 [THREAD_EXECUTE
] = "exec",
2165 return bputs(buf
, "{(thread *)NULL}");
2167 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2169 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2170 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2172 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2174 switch (thread
->type
) {
2177 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2181 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2185 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2186 thread
->xref
->funcname
, thread
->xref
->dest
,
2187 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2191 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2192 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2195 const struct thread
*thread
= ptr
;
2196 struct timespec remain
= {};
2198 if (ea
->fmt
[0] == 'D') {
2200 return printfrr_thread_dbg(buf
, ea
, thread
);
2204 /* need to jump over time formatting flag characters in the
2205 * input format string, i.e. adjust ea->fmt!
2207 printfrr_time(buf
, ea
, &remain
,
2208 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2209 return bputch(buf
, '-');
2212 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2213 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);