1 /* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <sys/resource.h>
35 #include "frratomic.h"
36 #include "frr_pthread.h"
37 #include "lib_errors.h"
38 #include "libfrr_trace.h"
41 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
42 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
43 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
44 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
46 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
50 struct thread
*thread
;
52 struct thread
**threadref
;
55 /* Flags for task cancellation */
56 #define THREAD_CANCEL_FLAG_READY 0x01
58 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
60 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
62 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
64 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
66 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
71 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
73 #if defined(__APPLE__)
74 #include <mach/mach.h>
75 #include <mach/mach_time.h>
80 const unsigned char wakebyte = 0x01; \
81 write(m->io_pipe[1], &wakebyte, 1); \
84 /* control variable for initializer */
85 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
86 pthread_key_t thread_current
;
88 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
89 static struct list
*masters
;
91 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
93 #ifndef EXCLUDE_CPU_TIME
94 #define EXCLUDE_CPU_TIME 0
96 #ifndef CONSUMED_TIME_CHECK
97 #define CONSUMED_TIME_CHECK 0
100 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
101 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
102 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
104 /* CLI start ---------------------------------------------------------------- */
105 #ifndef VTYSH_EXTRACT_PL
106 #include "lib/thread_clippy.c"
109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
111 int size
= sizeof(a
->func
);
113 return jhash(&a
->func
, size
, 0);
116 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
117 const struct cpu_thread_history
*b
)
119 return a
->func
== b
->func
;
122 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
124 struct cpu_thread_history
*new;
125 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
127 new->funcname
= a
->funcname
;
131 static void cpu_record_hash_free(void *a
)
133 struct cpu_thread_history
*hist
= a
;
135 XFREE(MTYPE_THREAD_STATS
, hist
);
138 static void vty_out_cpu_thread_history(struct vty
*vty
,
139 struct cpu_thread_history
*a
)
141 vty_out(vty
, "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu",
142 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
143 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
144 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
145 a
->total_cpu_warn
, a
->total_wall_warn
);
146 vty_out(vty
, " %c%c%c%c%c %s\n",
147 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
148 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
149 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
150 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
151 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
154 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
156 struct cpu_thread_history
*totals
= args
[0];
157 struct cpu_thread_history copy
;
158 struct vty
*vty
= args
[1];
159 uint8_t *filter
= args
[2];
161 struct cpu_thread_history
*a
= bucket
->data
;
164 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
166 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
167 copy
.total_cpu_warn
=
168 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
169 copy
.total_wall_warn
=
170 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
172 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
173 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
175 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
177 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
178 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
179 copy
.funcname
= a
->funcname
;
181 if (!(copy
.types
& *filter
))
184 vty_out_cpu_thread_history(vty
, ©
);
185 totals
->total_active
+= copy
.total_active
;
186 totals
->total_calls
+= copy
.total_calls
;
187 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
188 totals
->total_wall_warn
+= copy
.total_wall_warn
;
189 totals
->real
.total
+= copy
.real
.total
;
190 if (totals
->real
.max
< copy
.real
.max
)
191 totals
->real
.max
= copy
.real
.max
;
192 totals
->cpu
.total
+= copy
.cpu
.total
;
193 if (totals
->cpu
.max
< copy
.cpu
.max
)
194 totals
->cpu
.max
= copy
.cpu
.max
;
197 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
199 struct cpu_thread_history tmp
;
200 void *args
[3] = {&tmp
, vty
, &filter
};
201 struct thread_master
*m
;
204 if (!cputime_enabled
)
207 "Collecting CPU time statistics is currently disabled. Following statistics\n"
208 "will be zero or may display data from when collection was enabled. Use the\n"
209 " \"service cputime-stats\" command to start collecting data.\n"
210 "\nCounters and wallclock times are always maintained and should be accurate.\n");
212 memset(&tmp
, 0, sizeof(tmp
));
213 tmp
.funcname
= "TOTAL";
216 frr_with_mutex(&masters_mtx
) {
217 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
218 const char *name
= m
->name
? m
->name
: "main";
220 char underline
[strlen(name
) + 1];
221 memset(underline
, '-', sizeof(underline
));
222 underline
[sizeof(underline
) - 1] = '\0';
225 vty_out(vty
, "Showing statistics for pthread %s\n",
227 vty_out(vty
, "-------------------------------%s\n",
229 vty_out(vty
, "%30s %18s %18s\n", "",
230 "CPU (user+system):", "Real (wall-clock):");
232 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
233 vty_out(vty
, " Avg uSec Max uSecs");
234 vty_out(vty
, " CPU_Warn Wall_Warn Type Thread\n");
236 if (m
->cpu_record
->count
)
239 (void (*)(struct hash_bucket
*,
240 void *))cpu_record_hash_print
,
243 vty_out(vty
, "No data to display yet.\n");
250 vty_out(vty
, "Total thread statistics\n");
251 vty_out(vty
, "-------------------------\n");
252 vty_out(vty
, "%30s %18s %18s\n", "",
253 "CPU (user+system):", "Real (wall-clock):");
254 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
255 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
256 vty_out(vty
, " Type Thread\n");
258 if (tmp
.total_calls
> 0)
259 vty_out_cpu_thread_history(vty
, &tmp
);
262 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
264 uint8_t *filter
= args
[0];
265 struct hash
*cpu_record
= args
[1];
267 struct cpu_thread_history
*a
= bucket
->data
;
269 if (!(a
->types
& *filter
))
272 hash_release(cpu_record
, bucket
->data
);
275 static void cpu_record_clear(uint8_t filter
)
277 uint8_t *tmp
= &filter
;
278 struct thread_master
*m
;
281 frr_with_mutex(&masters_mtx
) {
282 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
283 frr_with_mutex(&m
->mtx
) {
284 void *args
[2] = {tmp
, m
->cpu_record
};
287 (void (*)(struct hash_bucket
*,
288 void *))cpu_record_hash_clear
,
295 static uint8_t parse_filter(const char *filterstr
)
300 while (filterstr
[i
] != '\0') {
301 switch (filterstr
[i
]) {
304 filter
|= (1 << THREAD_READ
);
308 filter
|= (1 << THREAD_WRITE
);
312 filter
|= (1 << THREAD_TIMER
);
316 filter
|= (1 << THREAD_EVENT
);
320 filter
|= (1 << THREAD_EXECUTE
);
330 DEFUN_NOSH (show_thread_cpu
,
332 "show thread cpu [FILTER]",
334 "Thread information\n"
336 "Display filter (rwtex)\n")
338 uint8_t filter
= (uint8_t)-1U;
341 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
342 filter
= parse_filter(argv
[idx
]->arg
);
345 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
351 cpu_record_print(vty
, filter
);
355 DEFPY (service_cputime_stats
,
356 service_cputime_stats_cmd
,
357 "[no] service cputime-stats",
359 "Set up miscellaneous service\n"
360 "Collect CPU usage statistics\n")
362 cputime_enabled
= !no
;
366 DEFPY (service_cputime_warning
,
367 service_cputime_warning_cmd
,
368 "[no] service cputime-warning (1-4294967295)",
370 "Set up miscellaneous service\n"
371 "Warn for tasks exceeding CPU usage threshold\n"
372 "Warning threshold in milliseconds\n")
375 cputime_threshold
= 0;
377 cputime_threshold
= cputime_warning
* 1000;
381 ALIAS (service_cputime_warning
,
382 no_service_cputime_warning_cmd
,
383 "no service cputime-warning",
385 "Set up miscellaneous service\n"
386 "Warn for tasks exceeding CPU usage threshold\n")
388 DEFPY (service_walltime_warning
,
389 service_walltime_warning_cmd
,
390 "[no] service walltime-warning (1-4294967295)",
392 "Set up miscellaneous service\n"
393 "Warn for tasks exceeding total wallclock threshold\n"
394 "Warning threshold in milliseconds\n")
397 walltime_threshold
= 0;
399 walltime_threshold
= walltime_warning
* 1000;
403 ALIAS (service_walltime_warning
,
404 no_service_walltime_warning_cmd
,
405 "no service walltime-warning",
407 "Set up miscellaneous service\n"
408 "Warn for tasks exceeding total wallclock threshold\n")
410 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
412 const char *name
= m
->name
? m
->name
: "main";
413 char underline
[strlen(name
) + 1];
414 struct thread
*thread
;
417 memset(underline
, '-', sizeof(underline
));
418 underline
[sizeof(underline
) - 1] = '\0';
420 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
421 vty_out(vty
, "----------------------%s\n", underline
);
422 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
424 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
425 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
426 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
427 m
->handler
.pfds
[i
].revents
);
429 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
430 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
433 vty_out(vty
, "ERROR ");
435 vty_out(vty
, "%s ", thread
->xref
->funcname
);
439 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
440 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
443 vty_out(vty
, "ERROR\n");
445 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
451 DEFUN_NOSH (show_thread_poll
,
452 show_thread_poll_cmd
,
455 "Thread information\n"
456 "Show poll FD's and information\n")
458 struct listnode
*node
;
459 struct thread_master
*m
;
461 frr_with_mutex(&masters_mtx
) {
462 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
463 show_thread_poll_helper(vty
, m
);
471 DEFUN (clear_thread_cpu
,
472 clear_thread_cpu_cmd
,
473 "clear thread cpu [FILTER]",
474 "Clear stored data in all pthreads\n"
475 "Thread information\n"
477 "Display filter (rwtexb)\n")
479 uint8_t filter
= (uint8_t)-1U;
482 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
483 filter
= parse_filter(argv
[idx
]->arg
);
486 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
492 cpu_record_clear(filter
);
496 void thread_cmd_init(void)
498 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
499 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
500 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
502 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
503 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
504 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
505 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
506 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
508 /* CLI end ------------------------------------------------------------------ */
511 static void cancelreq_del(void *cr
)
513 XFREE(MTYPE_TMP
, cr
);
516 /* initializer, only ever called once */
517 static void initializer(void)
519 pthread_key_create(&thread_current
, NULL
);
522 struct thread_master
*thread_master_create(const char *name
)
524 struct thread_master
*rv
;
527 pthread_once(&init_once
, &initializer
);
529 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
531 /* Initialize master mutex */
532 pthread_mutex_init(&rv
->mtx
, NULL
);
533 pthread_cond_init(&rv
->cancel_cond
, NULL
);
536 name
= name
? name
: "default";
537 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
539 /* Initialize I/O task data structures */
541 /* Use configured limit if present, ulimit otherwise. */
542 rv
->fd_limit
= frr_get_fd_limit();
543 if (rv
->fd_limit
== 0) {
544 getrlimit(RLIMIT_NOFILE
, &limit
);
545 rv
->fd_limit
= (int)limit
.rlim_cur
;
548 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
549 sizeof(struct thread
*) * rv
->fd_limit
);
551 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
552 sizeof(struct thread
*) * rv
->fd_limit
);
554 char tmhashname
[strlen(name
) + 32];
555 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
557 rv
->cpu_record
= hash_create_size(
558 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
559 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
562 thread_list_init(&rv
->event
);
563 thread_list_init(&rv
->ready
);
564 thread_list_init(&rv
->unuse
);
565 thread_timer_list_init(&rv
->timer
);
567 /* Initialize thread_fetch() settings */
569 rv
->handle_signals
= true;
571 /* Set pthread owner, should be updated by actual owner */
572 rv
->owner
= pthread_self();
573 rv
->cancel_req
= list_new();
574 rv
->cancel_req
->del
= cancelreq_del
;
577 /* Initialize pipe poker */
579 set_nonblocking(rv
->io_pipe
[0]);
580 set_nonblocking(rv
->io_pipe
[1]);
582 /* Initialize data structures for poll() */
583 rv
->handler
.pfdsize
= rv
->fd_limit
;
584 rv
->handler
.pfdcount
= 0;
585 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
586 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
587 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
588 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
590 /* add to list of threadmasters */
591 frr_with_mutex(&masters_mtx
) {
593 masters
= list_new();
595 listnode_add(masters
, rv
);
601 void thread_master_set_name(struct thread_master
*master
, const char *name
)
603 frr_with_mutex(&master
->mtx
) {
604 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
605 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
609 #define THREAD_UNUSED_DEPTH 10
611 /* Move thread to unuse list. */
612 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
614 pthread_mutex_t mtxc
= thread
->mtx
;
616 assert(m
!= NULL
&& thread
!= NULL
);
618 thread
->hist
->total_active
--;
619 memset(thread
, 0, sizeof(struct thread
));
620 thread
->type
= THREAD_UNUSED
;
622 /* Restore the thread mutex context. */
625 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
626 thread_list_add_tail(&m
->unuse
, thread
);
630 thread_free(m
, thread
);
633 /* Free all unused thread. */
634 static void thread_list_free(struct thread_master
*m
,
635 struct thread_list_head
*list
)
639 while ((t
= thread_list_pop(list
)))
643 static void thread_array_free(struct thread_master
*m
,
644 struct thread
**thread_array
)
649 for (index
= 0; index
< m
->fd_limit
; ++index
) {
650 t
= thread_array
[index
];
652 thread_array
[index
] = NULL
;
656 XFREE(MTYPE_THREAD_POLL
, thread_array
);
660 * thread_master_free_unused
662 * As threads are finished with they are put on the
663 * unuse list for later reuse.
664 * If we are shutting down, Free up unused threads
665 * So we can see if we forget to shut anything off
667 void thread_master_free_unused(struct thread_master
*m
)
669 frr_with_mutex(&m
->mtx
) {
671 while ((t
= thread_list_pop(&m
->unuse
)))
676 /* Stop thread scheduler. */
677 void thread_master_free(struct thread_master
*m
)
681 frr_with_mutex(&masters_mtx
) {
682 listnode_delete(masters
, m
);
683 if (masters
->count
== 0) {
684 list_delete(&masters
);
688 thread_array_free(m
, m
->read
);
689 thread_array_free(m
, m
->write
);
690 while ((t
= thread_timer_list_pop(&m
->timer
)))
692 thread_list_free(m
, &m
->event
);
693 thread_list_free(m
, &m
->ready
);
694 thread_list_free(m
, &m
->unuse
);
695 pthread_mutex_destroy(&m
->mtx
);
696 pthread_cond_destroy(&m
->cancel_cond
);
697 close(m
->io_pipe
[0]);
698 close(m
->io_pipe
[1]);
699 list_delete(&m
->cancel_req
);
700 m
->cancel_req
= NULL
;
702 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
703 hash_free(m
->cpu_record
);
704 m
->cpu_record
= NULL
;
706 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
707 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
708 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
709 XFREE(MTYPE_THREAD_MASTER
, m
);
712 /* Return remain time in miliseconds. */
713 unsigned long thread_timer_remain_msec(struct thread
*thread
)
717 frr_with_mutex(&thread
->mtx
) {
718 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
721 return remain
< 0 ? 0 : remain
;
724 /* Return remain time in seconds. */
725 unsigned long thread_timer_remain_second(struct thread
*thread
)
727 return thread_timer_remain_msec(thread
) / 1000LL;
730 struct timeval
thread_timer_remain(struct thread
*thread
)
732 struct timeval remain
;
733 frr_with_mutex(&thread
->mtx
) {
734 monotime_until(&thread
->u
.sands
, &remain
);
739 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
745 assert(buf_size
>= 8);
752 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
757 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
758 struct thread
*t_timer
)
761 time_hhmmss(buf
, buf_size
,
762 thread_timer_remain_second(t_timer
));
764 snprintf(buf
, buf_size
, "--:--:--");
769 /* Get new thread. */
770 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
771 int (*func
)(struct thread
*), void *arg
,
772 const struct xref_threadsched
*xref
)
774 struct thread
*thread
= thread_list_pop(&m
->unuse
);
775 struct cpu_thread_history tmp
;
778 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
779 /* mutex only needs to be initialized at struct creation. */
780 pthread_mutex_init(&thread
->mtx
, NULL
);
785 thread
->add_type
= type
;
788 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
790 thread
->ignore_timer_late
= false;
793 * So if the passed in funcname is not what we have
794 * stored that means the thread->hist needs to be
795 * updated. We keep the last one around in unused
796 * under the assumption that we are probably
797 * going to immediately allocate the same
799 * This hopefully saves us some serious
802 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
803 || thread
->func
!= func
) {
805 tmp
.funcname
= xref
->funcname
;
807 hash_get(m
->cpu_record
, &tmp
,
808 (void *(*)(void *))cpu_record_hash_alloc
);
810 thread
->hist
->total_active
++;
817 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
819 /* Update statistics. */
820 assert(master
->alloc
> 0);
823 /* Free allocated resources. */
824 pthread_mutex_destroy(&thread
->mtx
);
825 XFREE(MTYPE_THREAD
, thread
);
828 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
832 unsigned char trash
[64];
833 nfds_t count
= m
->handler
.copycount
;
836 * If timer_wait is null here, that means poll() should block
837 * indefinitely, unless the thread_master has overridden it by setting
838 * ->selectpoll_timeout.
840 * If the value is positive, it specifies the maximum number of
841 * milliseconds to wait. If the timeout is -1, it specifies that
842 * we should never wait and always return immediately even if no
843 * event is detected. If the value is zero, the behavior is default.
847 /* number of file descriptors with events */
850 if (timer_wait
!= NULL
851 && m
->selectpoll_timeout
== 0) // use the default value
852 timeout
= (timer_wait
->tv_sec
* 1000)
853 + (timer_wait
->tv_usec
/ 1000);
854 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
855 timeout
= m
->selectpoll_timeout
;
856 else if (m
->selectpoll_timeout
857 < 0) // effect a poll (return immediately)
860 zlog_tls_buffer_flush();
862 rcu_assert_read_unlocked();
864 /* add poll pipe poker */
865 assert(count
+ 1 < m
->handler
.pfdsize
);
866 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
867 m
->handler
.copy
[count
].events
= POLLIN
;
868 m
->handler
.copy
[count
].revents
= 0x00;
870 /* We need to deal with a signal-handling race here: we
871 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
872 * that may arrive just before we enter poll(). We will block the
873 * key signals, then check whether any have arrived - if so, we return
874 * before calling poll(). If not, we'll re-enable the signals
875 * in the ppoll() call.
878 sigemptyset(&origsigs
);
879 if (m
->handle_signals
) {
880 /* Main pthread that handles the app signals */
881 if (frr_sigevent_check(&origsigs
)) {
882 /* Signal to process - restore signal mask and return */
883 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
889 /* Don't make any changes for the non-main pthreads */
890 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
893 #if defined(HAVE_PPOLL)
894 struct timespec ts
, *tsp
;
897 ts
.tv_sec
= timeout
/ 1000;
898 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
903 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
904 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
906 /* Not ideal - there is a race after we restore the signal mask */
907 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
908 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
913 if (num
< 0 && errno
== EINTR
)
916 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
917 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
925 /* Add new read thread. */
926 void _thread_add_read_write(const struct xref_threadsched
*xref
,
927 struct thread_master
*m
,
928 int (*func
)(struct thread
*), void *arg
, int fd
,
929 struct thread
**t_ptr
)
931 int dir
= xref
->thread_type
;
932 struct thread
*thread
= NULL
;
933 struct thread
**thread_array
;
935 if (dir
== THREAD_READ
)
936 frrtrace(9, frr_libfrr
, schedule_read
, m
,
937 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
938 t_ptr
, fd
, 0, arg
, 0);
940 frrtrace(9, frr_libfrr
, schedule_write
, m
,
941 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
942 t_ptr
, fd
, 0, arg
, 0);
945 if (fd
>= m
->fd_limit
)
946 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
948 frr_with_mutex(&m
->mtx
) {
950 // thread is already scheduled; don't reschedule
953 /* default to a new pollfd */
954 nfds_t queuepos
= m
->handler
.pfdcount
;
956 if (dir
== THREAD_READ
)
957 thread_array
= m
->read
;
959 thread_array
= m
->write
;
961 /* if we already have a pollfd for our file descriptor, find and
963 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
964 if (m
->handler
.pfds
[i
].fd
== fd
) {
969 * What happens if we have a thread already
970 * created for this event?
972 if (thread_array
[fd
])
973 assert(!"Thread already scheduled for file descriptor");
978 /* make sure we have room for this fd + pipe poker fd */
979 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
981 thread
= thread_get(m
, dir
, func
, arg
, xref
);
983 m
->handler
.pfds
[queuepos
].fd
= fd
;
984 m
->handler
.pfds
[queuepos
].events
|=
985 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
987 if (queuepos
== m
->handler
.pfdcount
)
988 m
->handler
.pfdcount
++;
991 frr_with_mutex(&thread
->mtx
) {
993 thread_array
[thread
->u
.fd
] = thread
;
1006 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1007 struct thread_master
*m
,
1008 int (*func
)(struct thread
*), void *arg
,
1009 struct timeval
*time_relative
,
1010 struct thread
**t_ptr
)
1012 struct thread
*thread
;
1017 assert(time_relative
);
1019 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1020 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1021 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1023 /* Compute expiration/deadline time. */
1025 timeradd(&t
, time_relative
, &t
);
1027 frr_with_mutex(&m
->mtx
) {
1028 if (t_ptr
&& *t_ptr
)
1029 /* thread is already scheduled; don't reschedule */
1032 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1034 frr_with_mutex(&thread
->mtx
) {
1035 thread
->u
.sands
= t
;
1036 thread_timer_list_add(&m
->timer
, thread
);
1039 thread
->ref
= t_ptr
;
1043 /* The timer list is sorted - if this new timer
1044 * might change the time we'll wait for, give the pthread
1045 * a chance to re-compute.
1047 if (thread_timer_list_first(&m
->timer
) == thread
)
1053 /* Add timer event thread. */
1054 void _thread_add_timer(const struct xref_threadsched
*xref
,
1055 struct thread_master
*m
, int (*func
)(struct thread
*),
1056 void *arg
, long timer
, struct thread
**t_ptr
)
1058 struct timeval trel
;
1062 trel
.tv_sec
= timer
;
1065 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1068 /* Add timer event thread with "millisecond" resolution */
1069 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1070 struct thread_master
*m
,
1071 int (*func
)(struct thread
*), void *arg
, long timer
,
1072 struct thread
**t_ptr
)
1074 struct timeval trel
;
1078 trel
.tv_sec
= timer
/ 1000;
1079 trel
.tv_usec
= 1000 * (timer
% 1000);
1081 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1084 /* Add timer event thread with "timeval" resolution */
1085 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1086 struct thread_master
*m
, int (*func
)(struct thread
*),
1087 void *arg
, struct timeval
*tv
, struct thread
**t_ptr
)
1089 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1092 /* Add simple event thread. */
1093 void _thread_add_event(const struct xref_threadsched
*xref
,
1094 struct thread_master
*m
, int (*func
)(struct thread
*),
1095 void *arg
, int val
, struct thread
**t_ptr
)
1097 struct thread
*thread
= NULL
;
1099 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1100 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1101 t_ptr
, 0, val
, arg
, 0);
1105 frr_with_mutex(&m
->mtx
) {
1106 if (t_ptr
&& *t_ptr
)
1107 /* thread is already scheduled; don't reschedule */
1110 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1111 frr_with_mutex(&thread
->mtx
) {
1112 thread
->u
.val
= val
;
1113 thread_list_add_tail(&m
->event
, thread
);
1118 thread
->ref
= t_ptr
;
1125 /* Thread cancellation ------------------------------------------------------ */
1128 * NOT's out the .events field of pollfd corresponding to the given file
1129 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1131 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1132 * implementation for details.
1136 * @param state the event to cancel. One or more (OR'd together) of the
1141 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1146 /* find the index of corresponding pollfd */
1149 /* Cancel POLLHUP too just in case some bozo set it */
1152 /* Some callers know the index of the pfd already */
1153 if (idx_hint
>= 0) {
1157 /* Have to look for the fd in the pfd array */
1158 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1159 if (master
->handler
.pfds
[i
].fd
== fd
) {
1167 "[!] Received cancellation request for nonexistent rw job");
1168 zlog_debug("[!] threadmaster: %s | fd: %d",
1169 master
->name
? master
->name
: "", fd
);
1173 /* NOT out event. */
1174 master
->handler
.pfds
[i
].events
&= ~(state
);
1176 /* If all events are canceled, delete / resize the pollfd array. */
1177 if (master
->handler
.pfds
[i
].events
== 0) {
1178 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1179 (master
->handler
.pfdcount
- i
- 1)
1180 * sizeof(struct pollfd
));
1181 master
->handler
.pfdcount
--;
1182 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1183 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1186 /* If we have the same pollfd in the copy, perform the same operations,
1187 * otherwise return. */
1188 if (i
>= master
->handler
.copycount
)
1191 master
->handler
.copy
[i
].events
&= ~(state
);
1193 if (master
->handler
.copy
[i
].events
== 0) {
1194 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1195 (master
->handler
.copycount
- i
- 1)
1196 * sizeof(struct pollfd
));
1197 master
->handler
.copycount
--;
1198 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1199 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1204 * Process task cancellation given a task argument: iterate through the
1205 * various lists of tasks, looking for any that match the argument.
1207 static void cancel_arg_helper(struct thread_master
*master
,
1208 const struct cancel_req
*cr
)
1215 /* We're only processing arg-based cancellations here. */
1216 if (cr
->eventobj
== NULL
)
1219 /* First process the ready lists. */
1220 frr_each_safe(thread_list
, &master
->event
, t
) {
1221 if (t
->arg
!= cr
->eventobj
)
1223 thread_list_del(&master
->event
, t
);
1226 thread_add_unuse(master
, t
);
1229 frr_each_safe(thread_list
, &master
->ready
, t
) {
1230 if (t
->arg
!= cr
->eventobj
)
1232 thread_list_del(&master
->ready
, t
);
1235 thread_add_unuse(master
, t
);
1238 /* If requested, stop here and ignore io and timers */
1239 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1242 /* Check the io tasks */
1243 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1244 pfd
= master
->handler
.pfds
+ i
;
1246 if (pfd
->events
& POLLIN
)
1247 t
= master
->read
[pfd
->fd
];
1249 t
= master
->write
[pfd
->fd
];
1251 if (t
&& t
->arg
== cr
->eventobj
) {
1254 /* Found a match to cancel: clean up fd arrays */
1255 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1257 /* Clean up thread arrays */
1258 master
->read
[fd
] = NULL
;
1259 master
->write
[fd
] = NULL
;
1261 /* Clear caller's ref */
1265 thread_add_unuse(master
, t
);
1267 /* Don't increment 'i' since the cancellation will have
1268 * removed the entry from the pfd array
1274 /* Check the timer tasks */
1275 t
= thread_timer_list_first(&master
->timer
);
1277 struct thread
*t_next
;
1279 t_next
= thread_timer_list_next(&master
->timer
, t
);
1281 if (t
->arg
== cr
->eventobj
) {
1282 thread_timer_list_del(&master
->timer
, t
);
1285 thread_add_unuse(master
, t
);
1293 * Process cancellation requests.
1295 * This may only be run from the pthread which owns the thread_master.
1297 * @param master the thread master to process
1298 * @REQUIRE master->mtx
1300 static void do_thread_cancel(struct thread_master
*master
)
1302 struct thread_list_head
*list
= NULL
;
1303 struct thread
**thread_array
= NULL
;
1304 struct thread
*thread
;
1306 struct cancel_req
*cr
;
1307 struct listnode
*ln
;
1308 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1310 * If this is an event object cancellation, search
1311 * through task lists deleting any tasks which have the
1312 * specified argument - use this handy helper function.
1315 cancel_arg_helper(master
, cr
);
1320 * The pointer varies depending on whether the cancellation
1321 * request was made asynchronously or not. If it was, we
1322 * need to check whether the thread even exists anymore
1323 * before cancelling it.
1325 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1330 /* Determine the appropriate queue to cancel the thread from */
1331 switch (thread
->type
) {
1333 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1334 thread_array
= master
->read
;
1337 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1338 thread_array
= master
->write
;
1341 thread_timer_list_del(&master
->timer
, thread
);
1344 list
= &master
->event
;
1347 list
= &master
->ready
;
1355 thread_list_del(list
, thread
);
1356 } else if (thread_array
) {
1357 thread_array
[thread
->u
.fd
] = NULL
;
1361 *thread
->ref
= NULL
;
1363 thread_add_unuse(thread
->master
, thread
);
1366 /* Delete and free all cancellation requests */
1367 if (master
->cancel_req
)
1368 list_delete_all_node(master
->cancel_req
);
1370 /* Wake up any threads which may be blocked in thread_cancel_async() */
1371 master
->canceled
= true;
1372 pthread_cond_broadcast(&master
->cancel_cond
);
1376 * Helper function used for multiple flavors of arg-based cancellation.
1378 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1380 struct cancel_req
*cr
;
1382 assert(m
->owner
== pthread_self());
1384 /* Only worth anything if caller supplies an arg. */
1388 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1392 frr_with_mutex(&m
->mtx
) {
1394 listnode_add(m
->cancel_req
, cr
);
1395 do_thread_cancel(m
);
1400 * Cancel any events which have the specified argument.
1404 * @param m the thread_master to cancel from
1405 * @param arg the argument passed when creating the event
1407 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1409 cancel_event_helper(master
, arg
, 0);
1413 * Cancel ready tasks with an arg matching 'arg'
1417 * @param m the thread_master to cancel from
1418 * @param arg the argument passed when creating the event
1420 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1423 /* Only cancel ready/event tasks */
1424 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1428 * Cancel a specific task.
1432 * @param thread task to cancel
1434 void thread_cancel(struct thread
**thread
)
1436 struct thread_master
*master
;
1438 if (thread
== NULL
|| *thread
== NULL
)
1441 master
= (*thread
)->master
;
1443 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1444 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1445 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1446 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1448 assert(master
->owner
== pthread_self());
1450 frr_with_mutex(&master
->mtx
) {
1451 struct cancel_req
*cr
=
1452 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1453 cr
->thread
= *thread
;
1454 listnode_add(master
->cancel_req
, cr
);
1455 do_thread_cancel(master
);
1462 * Asynchronous cancellation.
1464 * Called with either a struct thread ** or void * to an event argument,
1465 * this function posts the correct cancellation request and blocks until it is
1468 * If the thread is currently running, execution blocks until it completes.
1470 * The last two parameters are mutually exclusive, i.e. if you pass one the
1471 * other must be NULL.
1473 * When the cancellation procedure executes on the target thread_master, the
1474 * thread * provided is checked for nullity. If it is null, the thread is
1475 * assumed to no longer exist and the cancellation request is a no-op. Thus
1476 * users of this API must pass a back-reference when scheduling the original
1481 * @param master the thread master with the relevant event / task
1482 * @param thread pointer to thread to cancel
1483 * @param eventobj the event
1485 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1488 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1490 if (thread
&& *thread
)
1491 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1492 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1493 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1494 (*thread
)->u
.val
, (*thread
)->arg
,
1495 (*thread
)->u
.sands
.tv_sec
);
1497 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1498 0, NULL
, 0, 0, eventobj
, 0);
1500 assert(master
->owner
!= pthread_self());
1502 frr_with_mutex(&master
->mtx
) {
1503 master
->canceled
= false;
1506 struct cancel_req
*cr
=
1507 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1508 cr
->threadref
= thread
;
1509 listnode_add(master
->cancel_req
, cr
);
1510 } else if (eventobj
) {
1511 struct cancel_req
*cr
=
1512 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1513 cr
->eventobj
= eventobj
;
1514 listnode_add(master
->cancel_req
, cr
);
1518 while (!master
->canceled
)
1519 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1525 /* ------------------------------------------------------------------------- */
1527 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1528 struct timeval
*timer_val
)
1530 if (!thread_timer_list_count(timers
))
1533 struct thread
*next_timer
= thread_timer_list_first(timers
);
1534 monotime_until(&next_timer
->u
.sands
, timer_val
);
1538 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1539 struct thread
*fetch
)
1542 thread_add_unuse(m
, thread
);
1546 static int thread_process_io_helper(struct thread_master
*m
,
1547 struct thread
*thread
, short state
,
1548 short actual_state
, int pos
)
1550 struct thread
**thread_array
;
1553 * poll() clears the .events field, but the pollfd array we
1554 * pass to poll() is a copy of the one used to schedule threads.
1555 * We need to synchronize state between the two here by applying
1556 * the same changes poll() made on the copy of the "real" pollfd
1559 * This cleans up a possible infinite loop where we refuse
1560 * to respond to a poll event but poll is insistent that
1563 m
->handler
.pfds
[pos
].events
&= ~(state
);
1566 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1567 flog_err(EC_LIB_NO_THREAD
,
1568 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1569 m
->handler
.pfds
[pos
].fd
, actual_state
);
1573 if (thread
->type
== THREAD_READ
)
1574 thread_array
= m
->read
;
1576 thread_array
= m
->write
;
1578 thread_array
[thread
->u
.fd
] = NULL
;
1579 thread_list_add_tail(&m
->ready
, thread
);
1580 thread
->type
= THREAD_READY
;
1586 * Process I/O events.
1588 * Walks through file descriptor array looking for those pollfds whose .revents
1589 * field has something interesting. Deletes any invalid file descriptors.
1591 * @param m the thread master
1592 * @param num the number of active file descriptors (return value of poll())
1594 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1596 unsigned int ready
= 0;
1597 struct pollfd
*pfds
= m
->handler
.copy
;
1599 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1600 /* no event for current fd? immediately continue */
1601 if (pfds
[i
].revents
== 0)
1607 * Unless someone has called thread_cancel from another
1608 * pthread, the only thing that could have changed in
1609 * m->handler.pfds while we were asleep is the .events
1610 * field in a given pollfd. Barring thread_cancel() that
1611 * value should be a superset of the values we have in our
1612 * copy, so there's no need to update it. Similarily,
1613 * barring deletion, the fd should still be a valid index
1614 * into the master's pfds.
1616 * We are including POLLERR here to do a READ event
1617 * this is because the read should fail and the
1618 * read function should handle it appropriately
1620 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1621 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1622 pfds
[i
].revents
, i
);
1624 if (pfds
[i
].revents
& POLLOUT
)
1625 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1626 POLLOUT
, pfds
[i
].revents
, i
);
1628 /* if one of our file descriptors is garbage, remove the same
1630 * both pfds + update sizes and index */
1631 if (pfds
[i
].revents
& POLLNVAL
) {
1632 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1633 (m
->handler
.pfdcount
- i
- 1)
1634 * sizeof(struct pollfd
));
1635 m
->handler
.pfdcount
--;
1636 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1637 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1639 memmove(pfds
+ i
, pfds
+ i
+ 1,
1640 (m
->handler
.copycount
- i
- 1)
1641 * sizeof(struct pollfd
));
1642 m
->handler
.copycount
--;
1643 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1644 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1651 /* Add all timers that have popped to the ready list. */
1652 static unsigned int thread_process_timers(struct thread_master
*m
,
1653 struct timeval
*timenow
)
1655 struct timeval prev
= *timenow
;
1656 bool displayed
= false;
1657 struct thread
*thread
;
1658 unsigned int ready
= 0;
1660 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1661 if (timercmp(timenow
, &thread
->u
.sands
, <))
1663 prev
= thread
->u
.sands
;
1666 * If the timer would have popped 4 seconds in the
1667 * past then we are in a situation where we are
1668 * really getting behind on handling of events.
1669 * Let's log it and do the right thing with it.
1671 if (!displayed
&& !thread
->ignore_timer_late
&&
1672 timercmp(timenow
, &prev
, >)) {
1674 EC_LIB_STARVE_THREAD
,
1675 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1680 thread_timer_list_pop(&m
->timer
);
1681 thread
->type
= THREAD_READY
;
1682 thread_list_add_tail(&m
->ready
, thread
);
1689 /* process a list en masse, e.g. for event thread lists */
1690 static unsigned int thread_process(struct thread_list_head
*list
)
1692 struct thread
*thread
;
1693 unsigned int ready
= 0;
1695 while ((thread
= thread_list_pop(list
))) {
1696 thread
->type
= THREAD_READY
;
1697 thread_list_add_tail(&thread
->master
->ready
, thread
);
1704 /* Fetch next ready thread. */
1705 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1707 struct thread
*thread
= NULL
;
1709 struct timeval zerotime
= {0, 0};
1711 struct timeval
*tw
= NULL
;
1712 bool eintr_p
= false;
1716 /* Handle signals if any */
1717 if (m
->handle_signals
)
1718 frr_sigevent_process();
1720 pthread_mutex_lock(&m
->mtx
);
1722 /* Process any pending cancellation requests */
1723 do_thread_cancel(m
);
1726 * Attempt to flush ready queue before going into poll().
1727 * This is performance-critical. Think twice before modifying.
1729 if ((thread
= thread_list_pop(&m
->ready
))) {
1730 fetch
= thread_run(m
, thread
, fetch
);
1733 pthread_mutex_unlock(&m
->mtx
);
1734 if (!m
->ready_run_loop
)
1735 GETRUSAGE(&m
->last_getrusage
);
1736 m
->ready_run_loop
= true;
1740 m
->ready_run_loop
= false;
1741 /* otherwise, tick through scheduling sequence */
1744 * Post events to ready queue. This must come before the
1745 * following block since events should occur immediately
1747 thread_process(&m
->event
);
1750 * If there are no tasks on the ready queue, we will poll()
1751 * until a timer expires or we receive I/O, whichever comes
1752 * first. The strategy for doing this is:
1754 * - If there are events pending, set the poll() timeout to zero
1755 * - If there are no events pending, but there are timers
1756 * pending, set the timeout to the smallest remaining time on
1758 * - If there are neither timers nor events pending, but there
1759 * are file descriptors pending, block indefinitely in poll()
1760 * - If nothing is pending, it's time for the application to die
1762 * In every case except the last, we need to hit poll() at least
1763 * once per loop to avoid starvation by events
1765 if (!thread_list_count(&m
->ready
))
1766 tw
= thread_timer_wait(&m
->timer
, &tv
);
1768 if (thread_list_count(&m
->ready
) ||
1769 (tw
&& !timercmp(tw
, &zerotime
, >)))
1772 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1773 pthread_mutex_unlock(&m
->mtx
);
1779 * Copy pollfd array + # active pollfds in it. Not necessary to
1780 * copy the array size as this is fixed.
1782 m
->handler
.copycount
= m
->handler
.pfdcount
;
1783 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1784 m
->handler
.copycount
* sizeof(struct pollfd
));
1786 pthread_mutex_unlock(&m
->mtx
);
1789 num
= fd_poll(m
, tw
, &eintr_p
);
1791 pthread_mutex_lock(&m
->mtx
);
1793 /* Handle any errors received in poll() */
1796 pthread_mutex_unlock(&m
->mtx
);
1797 /* loop around to signal handler */
1802 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1803 safe_strerror(errno
));
1804 pthread_mutex_unlock(&m
->mtx
);
1809 /* Post timers to ready queue. */
1811 thread_process_timers(m
, &now
);
1813 /* Post I/O to ready queue. */
1815 thread_process_io(m
, num
);
1817 pthread_mutex_unlock(&m
->mtx
);
1819 } while (!thread
&& m
->spin
);
1824 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1826 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1827 + (a
.tv_usec
- b
.tv_usec
));
1830 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1831 unsigned long *cputime
)
1833 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1834 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1835 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1837 /* This is 'user + sys' time. */
1838 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1839 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1841 return timeval_elapsed(now
->real
, start
->real
);
1844 /* We should aim to yield after yield milliseconds, which defaults
1845 to THREAD_YIELD_TIME_SLOT .
1846 Note: we are using real (wall clock) time for this calculation.
1847 It could be argued that CPU time may make more sense in certain
1848 contexts. The things to consider are whether the thread may have
1849 blocked (in which case wall time increases, but CPU time does not),
1850 or whether the system is heavily loaded with other processes competing
1851 for CPU time. On balance, wall clock time seems to make sense.
1852 Plus it has the added benefit that gettimeofday should be faster
1853 than calling getrusage. */
1854 int thread_should_yield(struct thread
*thread
)
1857 frr_with_mutex(&thread
->mtx
) {
1858 result
= monotime_since(&thread
->real
, NULL
)
1859 > (int64_t)thread
->yield
;
1864 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1866 frr_with_mutex(&thread
->mtx
) {
1867 thread
->yield
= yield_time
;
1871 void thread_getrusage(RUSAGE_T
*r
)
1874 if (!cputime_enabled
) {
1875 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1879 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1880 /* not currently implemented in Linux's vDSO, but maybe at some point
1883 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1884 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1885 #if defined RUSAGE_THREAD
1886 #define FRR_RUSAGE RUSAGE_THREAD
1888 #define FRR_RUSAGE RUSAGE_SELF
1890 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1897 * This function will atomically update the thread's usage history. At present
1898 * this is the only spot where usage history is written. Nevertheless the code
1899 * has been written such that the introduction of writers in the future should
1900 * not need to update it provided the writers atomically perform only the
1901 * operations done here, i.e. updating the total and maximum times. In
1902 * particular, the maximum real and cpu times must be monotonically increasing
1903 * or this code is not correct.
1905 void thread_call(struct thread
*thread
)
1907 RUSAGE_T before
, after
;
1909 /* if the thread being called is the CLI, it may change cputime_enabled
1910 * ("service cputime-stats" command), which can result in nonsensical
1911 * and very confusing warnings
1913 bool cputime_enabled_here
= cputime_enabled
;
1915 if (thread
->master
->ready_run_loop
)
1916 before
= thread
->master
->last_getrusage
;
1920 thread
->real
= before
.real
;
1922 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
1923 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1924 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
1925 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
1927 pthread_setspecific(thread_current
, thread
);
1928 (*thread
->func
)(thread
);
1929 pthread_setspecific(thread_current
, NULL
);
1932 thread
->master
->last_getrusage
= after
;
1934 unsigned long walltime
, cputime
;
1937 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
1939 /* update walltime */
1940 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
1941 memory_order_seq_cst
);
1942 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
1943 memory_order_seq_cst
);
1944 while (exp
< walltime
1945 && !atomic_compare_exchange_weak_explicit(
1946 &thread
->hist
->real
.max
, &exp
, walltime
,
1947 memory_order_seq_cst
, memory_order_seq_cst
))
1950 if (cputime_enabled_here
&& cputime_enabled
) {
1951 /* update cputime */
1952 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
1953 memory_order_seq_cst
);
1954 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
1955 memory_order_seq_cst
);
1956 while (exp
< cputime
1957 && !atomic_compare_exchange_weak_explicit(
1958 &thread
->hist
->cpu
.max
, &exp
, cputime
,
1959 memory_order_seq_cst
, memory_order_seq_cst
))
1963 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
1964 memory_order_seq_cst
);
1965 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
1966 memory_order_seq_cst
);
1968 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
1969 && cputime
> cputime_threshold
) {
1971 * We have a CPU Hog on our hands. The time FRR has spent
1972 * doing actual work (not sleeping) is greater than 5 seconds.
1973 * Whinge about it now, so we're aware this is yet another task
1976 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
1977 1, memory_order_seq_cst
);
1979 EC_LIB_SLOW_THREAD_CPU
,
1980 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1981 thread
->xref
->funcname
, (unsigned long)thread
->func
,
1982 walltime
/ 1000, cputime
/ 1000);
1984 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
1986 * The runtime for a task is greater than 5 seconds, but the
1987 * cpu time is under 5 seconds. Let's whine about this because
1988 * this could imply some sort of scheduling issue.
1990 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
1991 1, memory_order_seq_cst
);
1993 EC_LIB_SLOW_THREAD_WALL
,
1994 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
1995 thread
->xref
->funcname
, (unsigned long)thread
->func
,
1996 walltime
/ 1000, cputime
/ 1000);
2000 /* Execute thread */
2001 void _thread_execute(const struct xref_threadsched
*xref
,
2002 struct thread_master
*m
, int (*func
)(struct thread
*),
2005 struct thread
*thread
;
2007 /* Get or allocate new thread to execute. */
2008 frr_with_mutex(&m
->mtx
) {
2009 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2011 /* Set its event value. */
2012 frr_with_mutex(&thread
->mtx
) {
2013 thread
->add_type
= THREAD_EXECUTE
;
2014 thread
->u
.val
= val
;
2015 thread
->ref
= &thread
;
2019 /* Execute thread doing all accounting. */
2020 thread_call(thread
);
2022 /* Give back or free thread. */
2023 thread_add_unuse(m
, thread
);
2026 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2027 void debug_signals(const sigset_t
*sigs
)
2034 * We're only looking at the non-realtime signals here, so we need
2035 * some limit value. Platform differences mean at some point we just
2036 * need to pick a reasonable value.
2038 #if defined SIGRTMIN
2039 # define LAST_SIGNAL SIGRTMIN
2041 # define LAST_SIGNAL 32
2046 sigemptyset(&tmpsigs
);
2047 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2054 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2057 if (sigismember(sigs
, i
) > 0) {
2059 strlcat(buf
, ",", sizeof(buf
));
2060 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2061 strlcat(buf
, tmp
, sizeof(buf
));
2067 snprintf(buf
, sizeof(buf
), "<none>");
2069 zlog_debug("%s: %s", __func__
, buf
);
2072 bool thread_is_scheduled(struct thread
*thread
)
2080 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2081 const struct thread
*thread
)
2083 static const char * const types
[] = {
2084 [THREAD_READ
] = "read",
2085 [THREAD_WRITE
] = "write",
2086 [THREAD_TIMER
] = "timer",
2087 [THREAD_EVENT
] = "event",
2088 [THREAD_READY
] = "ready",
2089 [THREAD_UNUSED
] = "unused",
2090 [THREAD_EXECUTE
] = "exec",
2096 return bputs(buf
, "{(thread *)NULL}");
2098 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2100 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2101 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2103 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2105 switch (thread
->type
) {
2108 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2112 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2116 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2117 thread
->xref
->funcname
, thread
->xref
->dest
,
2118 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2122 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2123 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2126 const struct thread
*thread
= ptr
;
2127 struct timespec remain
= {};
2129 if (ea
->fmt
[0] == 'D') {
2131 return printfrr_thread_dbg(buf
, ea
, thread
);
2135 /* need to jump over time formatting flag characters in the
2136 * input format string, i.e. adjust ea->fmt!
2138 printfrr_time(buf
, ea
, &remain
,
2139 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2140 return bputch(buf
, '-');
2143 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2144 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);