1 /* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <sys/resource.h>
35 #include "frratomic.h"
36 #include "frr_pthread.h"
37 #include "lib_errors.h"
38 #include "libfrr_trace.h"
41 DEFINE_MTYPE_STATIC(LIB
, THREAD
, "Thread");
42 DEFINE_MTYPE_STATIC(LIB
, THREAD_MASTER
, "Thread master");
43 DEFINE_MTYPE_STATIC(LIB
, THREAD_POLL
, "Thread Poll Info");
44 DEFINE_MTYPE_STATIC(LIB
, THREAD_STATS
, "Thread stats");
46 DECLARE_LIST(thread_list
, struct thread
, threaditem
);
50 struct thread
*thread
;
52 struct thread
**threadref
;
55 /* Flags for task cancellation */
56 #define THREAD_CANCEL_FLAG_READY 0x01
58 static int thread_timer_cmp(const struct thread
*a
, const struct thread
*b
)
60 if (a
->u
.sands
.tv_sec
< b
->u
.sands
.tv_sec
)
62 if (a
->u
.sands
.tv_sec
> b
->u
.sands
.tv_sec
)
64 if (a
->u
.sands
.tv_usec
< b
->u
.sands
.tv_usec
)
66 if (a
->u
.sands
.tv_usec
> b
->u
.sands
.tv_usec
)
71 DECLARE_HEAP(thread_timer_list
, struct thread
, timeritem
, thread_timer_cmp
);
73 #if defined(__APPLE__)
74 #include <mach/mach.h>
75 #include <mach/mach_time.h>
80 const unsigned char wakebyte = 0x01; \
81 write(m->io_pipe[1], &wakebyte, 1); \
84 /* control variable for initializer */
85 static pthread_once_t init_once
= PTHREAD_ONCE_INIT
;
86 pthread_key_t thread_current
;
88 static pthread_mutex_t masters_mtx
= PTHREAD_MUTEX_INITIALIZER
;
89 static struct list
*masters
;
91 static void thread_free(struct thread_master
*master
, struct thread
*thread
);
93 #ifndef EXCLUDE_CPU_TIME
94 #define EXCLUDE_CPU_TIME 0
96 #ifndef CONSUMED_TIME_CHECK
97 #define CONSUMED_TIME_CHECK 0
100 bool cputime_enabled
= !EXCLUDE_CPU_TIME
;
101 unsigned long cputime_threshold
= CONSUMED_TIME_CHECK
;
102 unsigned long walltime_threshold
= CONSUMED_TIME_CHECK
;
104 /* CLI start ---------------------------------------------------------------- */
105 #ifndef VTYSH_EXTRACT_PL
106 #include "lib/thread_clippy.c"
109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history
*a
)
111 int size
= sizeof(a
->func
);
113 return jhash(&a
->func
, size
, 0);
116 static bool cpu_record_hash_cmp(const struct cpu_thread_history
*a
,
117 const struct cpu_thread_history
*b
)
119 return a
->func
== b
->func
;
122 static void *cpu_record_hash_alloc(struct cpu_thread_history
*a
)
124 struct cpu_thread_history
*new;
125 new = XCALLOC(MTYPE_THREAD_STATS
, sizeof(struct cpu_thread_history
));
127 new->funcname
= a
->funcname
;
131 static void cpu_record_hash_free(void *a
)
133 struct cpu_thread_history
*hist
= a
;
135 XFREE(MTYPE_THREAD_STATS
, hist
);
138 static void vty_out_cpu_thread_history(struct vty
*vty
,
139 struct cpu_thread_history
*a
)
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
143 a
->total_active
, a
->cpu
.total
/ 1000, a
->cpu
.total
% 1000,
144 a
->total_calls
, (a
->cpu
.total
/ a
->total_calls
), a
->cpu
.max
,
145 (a
->real
.total
/ a
->total_calls
), a
->real
.max
,
146 a
->total_cpu_warn
, a
->total_wall_warn
, a
->total_starv_warn
);
147 vty_out(vty
, " %c%c%c%c%c %s\n",
148 a
->types
& (1 << THREAD_READ
) ? 'R' : ' ',
149 a
->types
& (1 << THREAD_WRITE
) ? 'W' : ' ',
150 a
->types
& (1 << THREAD_TIMER
) ? 'T' : ' ',
151 a
->types
& (1 << THREAD_EVENT
) ? 'E' : ' ',
152 a
->types
& (1 << THREAD_EXECUTE
) ? 'X' : ' ', a
->funcname
);
155 static void cpu_record_hash_print(struct hash_bucket
*bucket
, void *args
[])
157 struct cpu_thread_history
*totals
= args
[0];
158 struct cpu_thread_history copy
;
159 struct vty
*vty
= args
[1];
160 uint8_t *filter
= args
[2];
162 struct cpu_thread_history
*a
= bucket
->data
;
165 atomic_load_explicit(&a
->total_active
, memory_order_seq_cst
);
167 atomic_load_explicit(&a
->total_calls
, memory_order_seq_cst
);
168 copy
.total_cpu_warn
=
169 atomic_load_explicit(&a
->total_cpu_warn
, memory_order_seq_cst
);
170 copy
.total_wall_warn
=
171 atomic_load_explicit(&a
->total_wall_warn
, memory_order_seq_cst
);
172 copy
.total_starv_warn
= atomic_load_explicit(&a
->total_starv_warn
,
173 memory_order_seq_cst
);
175 atomic_load_explicit(&a
->cpu
.total
, memory_order_seq_cst
);
176 copy
.cpu
.max
= atomic_load_explicit(&a
->cpu
.max
, memory_order_seq_cst
);
178 atomic_load_explicit(&a
->real
.total
, memory_order_seq_cst
);
180 atomic_load_explicit(&a
->real
.max
, memory_order_seq_cst
);
181 copy
.types
= atomic_load_explicit(&a
->types
, memory_order_seq_cst
);
182 copy
.funcname
= a
->funcname
;
184 if (!(copy
.types
& *filter
))
187 vty_out_cpu_thread_history(vty
, ©
);
188 totals
->total_active
+= copy
.total_active
;
189 totals
->total_calls
+= copy
.total_calls
;
190 totals
->total_cpu_warn
+= copy
.total_cpu_warn
;
191 totals
->total_wall_warn
+= copy
.total_wall_warn
;
192 totals
->total_starv_warn
+= copy
.total_starv_warn
;
193 totals
->real
.total
+= copy
.real
.total
;
194 if (totals
->real
.max
< copy
.real
.max
)
195 totals
->real
.max
= copy
.real
.max
;
196 totals
->cpu
.total
+= copy
.cpu
.total
;
197 if (totals
->cpu
.max
< copy
.cpu
.max
)
198 totals
->cpu
.max
= copy
.cpu
.max
;
201 static void cpu_record_print(struct vty
*vty
, uint8_t filter
)
203 struct cpu_thread_history tmp
;
204 void *args
[3] = {&tmp
, vty
, &filter
};
205 struct thread_master
*m
;
208 if (!cputime_enabled
)
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
216 memset(&tmp
, 0, sizeof(tmp
));
217 tmp
.funcname
= "TOTAL";
220 frr_with_mutex(&masters_mtx
) {
221 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
222 const char *name
= m
->name
? m
->name
: "main";
224 char underline
[strlen(name
) + 1];
225 memset(underline
, '-', sizeof(underline
));
226 underline
[sizeof(underline
) - 1] = '\0';
229 vty_out(vty
, "Showing statistics for pthread %s\n",
231 vty_out(vty
, "-------------------------------%s\n",
233 vty_out(vty
, "%30s %18s %18s\n", "",
234 "CPU (user+system):", "Real (wall-clock):");
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty
, " Avg uSec Max uSecs");
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
241 if (m
->cpu_record
->count
)
244 (void (*)(struct hash_bucket
*,
245 void *))cpu_record_hash_print
,
248 vty_out(vty
, "No data to display yet.\n");
255 vty_out(vty
, "Total thread statistics\n");
256 vty_out(vty
, "-------------------------\n");
257 vty_out(vty
, "%30s %18s %18s\n", "",
258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty
, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
260 vty_out(vty
, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
261 vty_out(vty
, " Type Thread\n");
263 if (tmp
.total_calls
> 0)
264 vty_out_cpu_thread_history(vty
, &tmp
);
267 static void cpu_record_hash_clear(struct hash_bucket
*bucket
, void *args
[])
269 uint8_t *filter
= args
[0];
270 struct hash
*cpu_record
= args
[1];
272 struct cpu_thread_history
*a
= bucket
->data
;
274 if (!(a
->types
& *filter
))
277 hash_release(cpu_record
, bucket
->data
);
280 static void cpu_record_clear(uint8_t filter
)
282 uint8_t *tmp
= &filter
;
283 struct thread_master
*m
;
286 frr_with_mutex(&masters_mtx
) {
287 for (ALL_LIST_ELEMENTS_RO(masters
, ln
, m
)) {
288 frr_with_mutex(&m
->mtx
) {
289 void *args
[2] = {tmp
, m
->cpu_record
};
292 (void (*)(struct hash_bucket
*,
293 void *))cpu_record_hash_clear
,
300 static uint8_t parse_filter(const char *filterstr
)
305 while (filterstr
[i
] != '\0') {
306 switch (filterstr
[i
]) {
309 filter
|= (1 << THREAD_READ
);
313 filter
|= (1 << THREAD_WRITE
);
317 filter
|= (1 << THREAD_TIMER
);
321 filter
|= (1 << THREAD_EVENT
);
325 filter
|= (1 << THREAD_EXECUTE
);
335 DEFUN_NOSH (show_thread_cpu
,
337 "show thread cpu [FILTER]",
339 "Thread information\n"
341 "Display filter (rwtex)\n")
343 uint8_t filter
= (uint8_t)-1U;
346 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
347 filter
= parse_filter(argv
[idx
]->arg
);
350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
356 cpu_record_print(vty
, filter
);
360 DEFPY (service_cputime_stats
,
361 service_cputime_stats_cmd
,
362 "[no] service cputime-stats",
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
367 cputime_enabled
= !no
;
371 DEFPY (service_cputime_warning
,
372 service_cputime_warning_cmd
,
373 "[no] service cputime-warning (1-4294967295)",
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
380 cputime_threshold
= 0;
382 cputime_threshold
= cputime_warning
* 1000;
386 ALIAS (service_cputime_warning
,
387 no_service_cputime_warning_cmd
,
388 "no service cputime-warning",
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
393 DEFPY (service_walltime_warning
,
394 service_walltime_warning_cmd
,
395 "[no] service walltime-warning (1-4294967295)",
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
402 walltime_threshold
= 0;
404 walltime_threshold
= walltime_warning
* 1000;
408 ALIAS (service_walltime_warning
,
409 no_service_walltime_warning_cmd
,
410 "no service walltime-warning",
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
415 static void show_thread_poll_helper(struct vty
*vty
, struct thread_master
*m
)
417 const char *name
= m
->name
? m
->name
: "main";
418 char underline
[strlen(name
) + 1];
419 struct thread
*thread
;
422 memset(underline
, '-', sizeof(underline
));
423 underline
[sizeof(underline
) - 1] = '\0';
425 vty_out(vty
, "\nShowing poll FD's for %s\n", name
);
426 vty_out(vty
, "----------------------%s\n", underline
);
427 vty_out(vty
, "Count: %u/%d\n", (uint32_t)m
->handler
.pfdcount
,
429 for (i
= 0; i
< m
->handler
.pfdcount
; i
++) {
430 vty_out(vty
, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i
,
431 m
->handler
.pfds
[i
].fd
, m
->handler
.pfds
[i
].events
,
432 m
->handler
.pfds
[i
].revents
);
434 if (m
->handler
.pfds
[i
].events
& POLLIN
) {
435 thread
= m
->read
[m
->handler
.pfds
[i
].fd
];
438 vty_out(vty
, "ERROR ");
440 vty_out(vty
, "%s ", thread
->xref
->funcname
);
444 if (m
->handler
.pfds
[i
].events
& POLLOUT
) {
445 thread
= m
->write
[m
->handler
.pfds
[i
].fd
];
448 vty_out(vty
, "ERROR\n");
450 vty_out(vty
, "%s\n", thread
->xref
->funcname
);
456 DEFUN_NOSH (show_thread_poll
,
457 show_thread_poll_cmd
,
460 "Thread information\n"
461 "Show poll FD's and information\n")
463 struct listnode
*node
;
464 struct thread_master
*m
;
466 frr_with_mutex(&masters_mtx
) {
467 for (ALL_LIST_ELEMENTS_RO(masters
, node
, m
)) {
468 show_thread_poll_helper(vty
, m
);
476 DEFUN (clear_thread_cpu
,
477 clear_thread_cpu_cmd
,
478 "clear thread cpu [FILTER]",
479 "Clear stored data in all pthreads\n"
480 "Thread information\n"
482 "Display filter (rwtexb)\n")
484 uint8_t filter
= (uint8_t)-1U;
487 if (argv_find(argv
, argc
, "FILTER", &idx
)) {
488 filter
= parse_filter(argv
[idx
]->arg
);
491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
497 cpu_record_clear(filter
);
501 void thread_cmd_init(void)
503 install_element(VIEW_NODE
, &show_thread_cpu_cmd
);
504 install_element(VIEW_NODE
, &show_thread_poll_cmd
);
505 install_element(ENABLE_NODE
, &clear_thread_cpu_cmd
);
507 install_element(CONFIG_NODE
, &service_cputime_stats_cmd
);
508 install_element(CONFIG_NODE
, &service_cputime_warning_cmd
);
509 install_element(CONFIG_NODE
, &no_service_cputime_warning_cmd
);
510 install_element(CONFIG_NODE
, &service_walltime_warning_cmd
);
511 install_element(CONFIG_NODE
, &no_service_walltime_warning_cmd
);
513 /* CLI end ------------------------------------------------------------------ */
516 static void cancelreq_del(void *cr
)
518 XFREE(MTYPE_TMP
, cr
);
521 /* initializer, only ever called once */
522 static void initializer(void)
524 pthread_key_create(&thread_current
, NULL
);
527 struct thread_master
*thread_master_create(const char *name
)
529 struct thread_master
*rv
;
532 pthread_once(&init_once
, &initializer
);
534 rv
= XCALLOC(MTYPE_THREAD_MASTER
, sizeof(struct thread_master
));
536 /* Initialize master mutex */
537 pthread_mutex_init(&rv
->mtx
, NULL
);
538 pthread_cond_init(&rv
->cancel_cond
, NULL
);
541 name
= name
? name
: "default";
542 rv
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
544 /* Initialize I/O task data structures */
546 /* Use configured limit if present, ulimit otherwise. */
547 rv
->fd_limit
= frr_get_fd_limit();
548 if (rv
->fd_limit
== 0) {
549 getrlimit(RLIMIT_NOFILE
, &limit
);
550 rv
->fd_limit
= (int)limit
.rlim_cur
;
553 rv
->read
= XCALLOC(MTYPE_THREAD_POLL
,
554 sizeof(struct thread
*) * rv
->fd_limit
);
556 rv
->write
= XCALLOC(MTYPE_THREAD_POLL
,
557 sizeof(struct thread
*) * rv
->fd_limit
);
559 char tmhashname
[strlen(name
) + 32];
560 snprintf(tmhashname
, sizeof(tmhashname
), "%s - threadmaster event hash",
562 rv
->cpu_record
= hash_create_size(
563 8, (unsigned int (*)(const void *))cpu_record_hash_key
,
564 (bool (*)(const void *, const void *))cpu_record_hash_cmp
,
567 thread_list_init(&rv
->event
);
568 thread_list_init(&rv
->ready
);
569 thread_list_init(&rv
->unuse
);
570 thread_timer_list_init(&rv
->timer
);
572 /* Initialize thread_fetch() settings */
574 rv
->handle_signals
= true;
576 /* Set pthread owner, should be updated by actual owner */
577 rv
->owner
= pthread_self();
578 rv
->cancel_req
= list_new();
579 rv
->cancel_req
->del
= cancelreq_del
;
582 /* Initialize pipe poker */
584 set_nonblocking(rv
->io_pipe
[0]);
585 set_nonblocking(rv
->io_pipe
[1]);
587 /* Initialize data structures for poll() */
588 rv
->handler
.pfdsize
= rv
->fd_limit
;
589 rv
->handler
.pfdcount
= 0;
590 rv
->handler
.pfds
= XCALLOC(MTYPE_THREAD_MASTER
,
591 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
592 rv
->handler
.copy
= XCALLOC(MTYPE_THREAD_MASTER
,
593 sizeof(struct pollfd
) * rv
->handler
.pfdsize
);
595 /* add to list of threadmasters */
596 frr_with_mutex(&masters_mtx
) {
598 masters
= list_new();
600 listnode_add(masters
, rv
);
606 void thread_master_set_name(struct thread_master
*master
, const char *name
)
608 frr_with_mutex(&master
->mtx
) {
609 XFREE(MTYPE_THREAD_MASTER
, master
->name
);
610 master
->name
= XSTRDUP(MTYPE_THREAD_MASTER
, name
);
614 #define THREAD_UNUSED_DEPTH 10
616 /* Move thread to unuse list. */
617 static void thread_add_unuse(struct thread_master
*m
, struct thread
*thread
)
619 pthread_mutex_t mtxc
= thread
->mtx
;
621 assert(m
!= NULL
&& thread
!= NULL
);
623 thread
->hist
->total_active
--;
624 memset(thread
, 0, sizeof(struct thread
));
625 thread
->type
= THREAD_UNUSED
;
627 /* Restore the thread mutex context. */
630 if (thread_list_count(&m
->unuse
) < THREAD_UNUSED_DEPTH
) {
631 thread_list_add_tail(&m
->unuse
, thread
);
635 thread_free(m
, thread
);
638 /* Free all unused thread. */
639 static void thread_list_free(struct thread_master
*m
,
640 struct thread_list_head
*list
)
644 while ((t
= thread_list_pop(list
)))
648 static void thread_array_free(struct thread_master
*m
,
649 struct thread
**thread_array
)
654 for (index
= 0; index
< m
->fd_limit
; ++index
) {
655 t
= thread_array
[index
];
657 thread_array
[index
] = NULL
;
661 XFREE(MTYPE_THREAD_POLL
, thread_array
);
665 * thread_master_free_unused
667 * As threads are finished with they are put on the
668 * unuse list for later reuse.
669 * If we are shutting down, Free up unused threads
670 * So we can see if we forget to shut anything off
672 void thread_master_free_unused(struct thread_master
*m
)
674 frr_with_mutex(&m
->mtx
) {
676 while ((t
= thread_list_pop(&m
->unuse
)))
681 /* Stop thread scheduler. */
682 void thread_master_free(struct thread_master
*m
)
686 frr_with_mutex(&masters_mtx
) {
687 listnode_delete(masters
, m
);
688 if (masters
->count
== 0) {
689 list_delete(&masters
);
693 thread_array_free(m
, m
->read
);
694 thread_array_free(m
, m
->write
);
695 while ((t
= thread_timer_list_pop(&m
->timer
)))
697 thread_list_free(m
, &m
->event
);
698 thread_list_free(m
, &m
->ready
);
699 thread_list_free(m
, &m
->unuse
);
700 pthread_mutex_destroy(&m
->mtx
);
701 pthread_cond_destroy(&m
->cancel_cond
);
702 close(m
->io_pipe
[0]);
703 close(m
->io_pipe
[1]);
704 list_delete(&m
->cancel_req
);
705 m
->cancel_req
= NULL
;
707 hash_clean(m
->cpu_record
, cpu_record_hash_free
);
708 hash_free(m
->cpu_record
);
709 m
->cpu_record
= NULL
;
711 XFREE(MTYPE_THREAD_MASTER
, m
->name
);
712 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.pfds
);
713 XFREE(MTYPE_THREAD_MASTER
, m
->handler
.copy
);
714 XFREE(MTYPE_THREAD_MASTER
, m
);
717 /* Return remain time in miliseconds. */
718 unsigned long thread_timer_remain_msec(struct thread
*thread
)
722 frr_with_mutex(&thread
->mtx
) {
723 remain
= monotime_until(&thread
->u
.sands
, NULL
) / 1000LL;
726 return remain
< 0 ? 0 : remain
;
729 /* Return remain time in seconds. */
730 unsigned long thread_timer_remain_second(struct thread
*thread
)
732 return thread_timer_remain_msec(thread
) / 1000LL;
735 struct timeval
thread_timer_remain(struct thread
*thread
)
737 struct timeval remain
;
738 frr_with_mutex(&thread
->mtx
) {
739 monotime_until(&thread
->u
.sands
, &remain
);
744 static int time_hhmmss(char *buf
, int buf_size
, long sec
)
750 assert(buf_size
>= 8);
757 wr
= snprintf(buf
, buf_size
, "%02ld:%02ld:%02ld", hh
, mm
, sec
);
762 char *thread_timer_to_hhmmss(char *buf
, int buf_size
,
763 struct thread
*t_timer
)
766 time_hhmmss(buf
, buf_size
,
767 thread_timer_remain_second(t_timer
));
769 snprintf(buf
, buf_size
, "--:--:--");
774 /* Get new thread. */
775 static struct thread
*thread_get(struct thread_master
*m
, uint8_t type
,
776 void (*func
)(struct thread
*), void *arg
,
777 const struct xref_threadsched
*xref
)
779 struct thread
*thread
= thread_list_pop(&m
->unuse
);
780 struct cpu_thread_history tmp
;
783 thread
= XCALLOC(MTYPE_THREAD
, sizeof(struct thread
));
784 /* mutex only needs to be initialized at struct creation. */
785 pthread_mutex_init(&thread
->mtx
, NULL
);
790 thread
->add_type
= type
;
793 thread
->yield
= THREAD_YIELD_TIME_SLOT
; /* default */
795 thread
->ignore_timer_late
= false;
798 * So if the passed in funcname is not what we have
799 * stored that means the thread->hist needs to be
800 * updated. We keep the last one around in unused
801 * under the assumption that we are probably
802 * going to immediately allocate the same
804 * This hopefully saves us some serious
807 if ((thread
->xref
&& thread
->xref
->funcname
!= xref
->funcname
)
808 || thread
->func
!= func
) {
810 tmp
.funcname
= xref
->funcname
;
812 hash_get(m
->cpu_record
, &tmp
,
813 (void *(*)(void *))cpu_record_hash_alloc
);
815 thread
->hist
->total_active
++;
822 static void thread_free(struct thread_master
*master
, struct thread
*thread
)
824 /* Update statistics. */
825 assert(master
->alloc
> 0);
828 /* Free allocated resources. */
829 pthread_mutex_destroy(&thread
->mtx
);
830 XFREE(MTYPE_THREAD
, thread
);
833 static int fd_poll(struct thread_master
*m
, const struct timeval
*timer_wait
,
837 unsigned char trash
[64];
838 nfds_t count
= m
->handler
.copycount
;
841 * If timer_wait is null here, that means poll() should block
842 * indefinitely, unless the thread_master has overridden it by setting
843 * ->selectpoll_timeout.
845 * If the value is positive, it specifies the maximum number of
846 * milliseconds to wait. If the timeout is -1, it specifies that
847 * we should never wait and always return immediately even if no
848 * event is detected. If the value is zero, the behavior is default.
852 /* number of file descriptors with events */
855 if (timer_wait
!= NULL
856 && m
->selectpoll_timeout
== 0) // use the default value
857 timeout
= (timer_wait
->tv_sec
* 1000)
858 + (timer_wait
->tv_usec
/ 1000);
859 else if (m
->selectpoll_timeout
> 0) // use the user's timeout
860 timeout
= m
->selectpoll_timeout
;
861 else if (m
->selectpoll_timeout
862 < 0) // effect a poll (return immediately)
865 zlog_tls_buffer_flush();
867 rcu_assert_read_unlocked();
869 /* add poll pipe poker */
870 assert(count
+ 1 < m
->handler
.pfdsize
);
871 m
->handler
.copy
[count
].fd
= m
->io_pipe
[0];
872 m
->handler
.copy
[count
].events
= POLLIN
;
873 m
->handler
.copy
[count
].revents
= 0x00;
875 /* We need to deal with a signal-handling race here: we
876 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
877 * that may arrive just before we enter poll(). We will block the
878 * key signals, then check whether any have arrived - if so, we return
879 * before calling poll(). If not, we'll re-enable the signals
880 * in the ppoll() call.
883 sigemptyset(&origsigs
);
884 if (m
->handle_signals
) {
885 /* Main pthread that handles the app signals */
886 if (frr_sigevent_check(&origsigs
)) {
887 /* Signal to process - restore signal mask and return */
888 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
894 /* Don't make any changes for the non-main pthreads */
895 pthread_sigmask(SIG_SETMASK
, NULL
, &origsigs
);
898 #if defined(HAVE_PPOLL)
899 struct timespec ts
, *tsp
;
902 ts
.tv_sec
= timeout
/ 1000;
903 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
908 num
= ppoll(m
->handler
.copy
, count
+ 1, tsp
, &origsigs
);
909 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
911 /* Not ideal - there is a race after we restore the signal mask */
912 pthread_sigmask(SIG_SETMASK
, &origsigs
, NULL
);
913 num
= poll(m
->handler
.copy
, count
+ 1, timeout
);
918 if (num
< 0 && errno
== EINTR
)
921 if (num
> 0 && m
->handler
.copy
[count
].revents
!= 0 && num
--)
922 while (read(m
->io_pipe
[0], &trash
, sizeof(trash
)) > 0)
930 /* Add new read thread. */
931 void _thread_add_read_write(const struct xref_threadsched
*xref
,
932 struct thread_master
*m
,
933 void (*func
)(struct thread
*), void *arg
, int fd
,
934 struct thread
**t_ptr
)
936 int dir
= xref
->thread_type
;
937 struct thread
*thread
= NULL
;
938 struct thread
**thread_array
;
940 if (dir
== THREAD_READ
)
941 frrtrace(9, frr_libfrr
, schedule_read
, m
,
942 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
943 t_ptr
, fd
, 0, arg
, 0);
945 frrtrace(9, frr_libfrr
, schedule_write
, m
,
946 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
947 t_ptr
, fd
, 0, arg
, 0);
950 if (fd
>= m
->fd_limit
)
951 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
953 frr_with_mutex(&m
->mtx
) {
955 // thread is already scheduled; don't reschedule
958 /* default to a new pollfd */
959 nfds_t queuepos
= m
->handler
.pfdcount
;
961 if (dir
== THREAD_READ
)
962 thread_array
= m
->read
;
964 thread_array
= m
->write
;
966 /* if we already have a pollfd for our file descriptor, find and
968 for (nfds_t i
= 0; i
< m
->handler
.pfdcount
; i
++)
969 if (m
->handler
.pfds
[i
].fd
== fd
) {
974 * What happens if we have a thread already
975 * created for this event?
977 if (thread_array
[fd
])
978 assert(!"Thread already scheduled for file descriptor");
983 /* make sure we have room for this fd + pipe poker fd */
984 assert(queuepos
+ 1 < m
->handler
.pfdsize
);
986 thread
= thread_get(m
, dir
, func
, arg
, xref
);
988 m
->handler
.pfds
[queuepos
].fd
= fd
;
989 m
->handler
.pfds
[queuepos
].events
|=
990 (dir
== THREAD_READ
? POLLIN
: POLLOUT
);
992 if (queuepos
== m
->handler
.pfdcount
)
993 m
->handler
.pfdcount
++;
996 frr_with_mutex(&thread
->mtx
) {
998 thread_array
[thread
->u
.fd
] = thread
;
1003 thread
->ref
= t_ptr
;
1011 static void _thread_add_timer_timeval(const struct xref_threadsched
*xref
,
1012 struct thread_master
*m
,
1013 void (*func
)(struct thread
*), void *arg
,
1014 struct timeval
*time_relative
,
1015 struct thread
**t_ptr
)
1017 struct thread
*thread
;
1022 assert(time_relative
);
1024 frrtrace(9, frr_libfrr
, schedule_timer
, m
,
1025 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1026 t_ptr
, 0, 0, arg
, (long)time_relative
->tv_sec
);
1028 /* Compute expiration/deadline time. */
1030 timeradd(&t
, time_relative
, &t
);
1032 frr_with_mutex(&m
->mtx
) {
1033 if (t_ptr
&& *t_ptr
)
1034 /* thread is already scheduled; don't reschedule */
1037 thread
= thread_get(m
, THREAD_TIMER
, func
, arg
, xref
);
1039 frr_with_mutex(&thread
->mtx
) {
1040 thread
->u
.sands
= t
;
1041 thread_timer_list_add(&m
->timer
, thread
);
1044 thread
->ref
= t_ptr
;
1048 /* The timer list is sorted - if this new timer
1049 * might change the time we'll wait for, give the pthread
1050 * a chance to re-compute.
1052 if (thread_timer_list_first(&m
->timer
) == thread
)
1055 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1056 if (time_relative
->tv_sec
> ONEYEAR2SEC
)
1058 EC_LIB_TIMER_TOO_LONG
,
1059 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1064 /* Add timer event thread. */
1065 void _thread_add_timer(const struct xref_threadsched
*xref
,
1066 struct thread_master
*m
, void (*func
)(struct thread
*),
1067 void *arg
, long timer
, struct thread
**t_ptr
)
1069 struct timeval trel
;
1073 trel
.tv_sec
= timer
;
1076 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1079 /* Add timer event thread with "millisecond" resolution */
1080 void _thread_add_timer_msec(const struct xref_threadsched
*xref
,
1081 struct thread_master
*m
,
1082 void (*func
)(struct thread
*), void *arg
,
1083 long timer
, struct thread
**t_ptr
)
1085 struct timeval trel
;
1089 trel
.tv_sec
= timer
/ 1000;
1090 trel
.tv_usec
= 1000 * (timer
% 1000);
1092 _thread_add_timer_timeval(xref
, m
, func
, arg
, &trel
, t_ptr
);
1095 /* Add timer event thread with "timeval" resolution */
1096 void _thread_add_timer_tv(const struct xref_threadsched
*xref
,
1097 struct thread_master
*m
,
1098 void (*func
)(struct thread
*), void *arg
,
1099 struct timeval
*tv
, struct thread
**t_ptr
)
1101 _thread_add_timer_timeval(xref
, m
, func
, arg
, tv
, t_ptr
);
1104 /* Add simple event thread. */
1105 void _thread_add_event(const struct xref_threadsched
*xref
,
1106 struct thread_master
*m
, void (*func
)(struct thread
*),
1107 void *arg
, int val
, struct thread
**t_ptr
)
1109 struct thread
*thread
= NULL
;
1111 frrtrace(9, frr_libfrr
, schedule_event
, m
,
1112 xref
->funcname
, xref
->xref
.file
, xref
->xref
.line
,
1113 t_ptr
, 0, val
, arg
, 0);
1117 frr_with_mutex(&m
->mtx
) {
1118 if (t_ptr
&& *t_ptr
)
1119 /* thread is already scheduled; don't reschedule */
1122 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
1123 frr_with_mutex(&thread
->mtx
) {
1124 thread
->u
.val
= val
;
1125 thread_list_add_tail(&m
->event
, thread
);
1130 thread
->ref
= t_ptr
;
1137 /* Thread cancellation ------------------------------------------------------ */
1140 * NOT's out the .events field of pollfd corresponding to the given file
1141 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1143 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1144 * implementation for details.
1148 * @param state the event to cancel. One or more (OR'd together) of the
1153 static void thread_cancel_rw(struct thread_master
*master
, int fd
, short state
,
1158 /* find the index of corresponding pollfd */
1161 /* Cancel POLLHUP too just in case some bozo set it */
1164 /* Some callers know the index of the pfd already */
1165 if (idx_hint
>= 0) {
1169 /* Have to look for the fd in the pfd array */
1170 for (i
= 0; i
< master
->handler
.pfdcount
; i
++)
1171 if (master
->handler
.pfds
[i
].fd
== fd
) {
1179 "[!] Received cancellation request for nonexistent rw job");
1180 zlog_debug("[!] threadmaster: %s | fd: %d",
1181 master
->name
? master
->name
: "", fd
);
1185 /* NOT out event. */
1186 master
->handler
.pfds
[i
].events
&= ~(state
);
1188 /* If all events are canceled, delete / resize the pollfd array. */
1189 if (master
->handler
.pfds
[i
].events
== 0) {
1190 memmove(master
->handler
.pfds
+ i
, master
->handler
.pfds
+ i
+ 1,
1191 (master
->handler
.pfdcount
- i
- 1)
1192 * sizeof(struct pollfd
));
1193 master
->handler
.pfdcount
--;
1194 master
->handler
.pfds
[master
->handler
.pfdcount
].fd
= 0;
1195 master
->handler
.pfds
[master
->handler
.pfdcount
].events
= 0;
1198 /* If we have the same pollfd in the copy, perform the same operations,
1199 * otherwise return. */
1200 if (i
>= master
->handler
.copycount
)
1203 master
->handler
.copy
[i
].events
&= ~(state
);
1205 if (master
->handler
.copy
[i
].events
== 0) {
1206 memmove(master
->handler
.copy
+ i
, master
->handler
.copy
+ i
+ 1,
1207 (master
->handler
.copycount
- i
- 1)
1208 * sizeof(struct pollfd
));
1209 master
->handler
.copycount
--;
1210 master
->handler
.copy
[master
->handler
.copycount
].fd
= 0;
1211 master
->handler
.copy
[master
->handler
.copycount
].events
= 0;
1216 * Process task cancellation given a task argument: iterate through the
1217 * various lists of tasks, looking for any that match the argument.
1219 static void cancel_arg_helper(struct thread_master
*master
,
1220 const struct cancel_req
*cr
)
1227 /* We're only processing arg-based cancellations here. */
1228 if (cr
->eventobj
== NULL
)
1231 /* First process the ready lists. */
1232 frr_each_safe(thread_list
, &master
->event
, t
) {
1233 if (t
->arg
!= cr
->eventobj
)
1235 thread_list_del(&master
->event
, t
);
1238 thread_add_unuse(master
, t
);
1241 frr_each_safe(thread_list
, &master
->ready
, t
) {
1242 if (t
->arg
!= cr
->eventobj
)
1244 thread_list_del(&master
->ready
, t
);
1247 thread_add_unuse(master
, t
);
1250 /* If requested, stop here and ignore io and timers */
1251 if (CHECK_FLAG(cr
->flags
, THREAD_CANCEL_FLAG_READY
))
1254 /* Check the io tasks */
1255 for (i
= 0; i
< master
->handler
.pfdcount
;) {
1256 pfd
= master
->handler
.pfds
+ i
;
1258 if (pfd
->events
& POLLIN
)
1259 t
= master
->read
[pfd
->fd
];
1261 t
= master
->write
[pfd
->fd
];
1263 if (t
&& t
->arg
== cr
->eventobj
) {
1266 /* Found a match to cancel: clean up fd arrays */
1267 thread_cancel_rw(master
, pfd
->fd
, pfd
->events
, i
);
1269 /* Clean up thread arrays */
1270 master
->read
[fd
] = NULL
;
1271 master
->write
[fd
] = NULL
;
1273 /* Clear caller's ref */
1277 thread_add_unuse(master
, t
);
1279 /* Don't increment 'i' since the cancellation will have
1280 * removed the entry from the pfd array
1286 /* Check the timer tasks */
1287 t
= thread_timer_list_first(&master
->timer
);
1289 struct thread
*t_next
;
1291 t_next
= thread_timer_list_next(&master
->timer
, t
);
1293 if (t
->arg
== cr
->eventobj
) {
1294 thread_timer_list_del(&master
->timer
, t
);
1297 thread_add_unuse(master
, t
);
1305 * Process cancellation requests.
1307 * This may only be run from the pthread which owns the thread_master.
1309 * @param master the thread master to process
1310 * @REQUIRE master->mtx
1312 static void do_thread_cancel(struct thread_master
*master
)
1314 struct thread_list_head
*list
= NULL
;
1315 struct thread
**thread_array
= NULL
;
1316 struct thread
*thread
;
1318 struct cancel_req
*cr
;
1319 struct listnode
*ln
;
1320 for (ALL_LIST_ELEMENTS_RO(master
->cancel_req
, ln
, cr
)) {
1322 * If this is an event object cancellation, search
1323 * through task lists deleting any tasks which have the
1324 * specified argument - use this handy helper function.
1327 cancel_arg_helper(master
, cr
);
1332 * The pointer varies depending on whether the cancellation
1333 * request was made asynchronously or not. If it was, we
1334 * need to check whether the thread even exists anymore
1335 * before cancelling it.
1337 thread
= (cr
->thread
) ? cr
->thread
: *cr
->threadref
;
1342 /* Determine the appropriate queue to cancel the thread from */
1343 switch (thread
->type
) {
1345 thread_cancel_rw(master
, thread
->u
.fd
, POLLIN
, -1);
1346 thread_array
= master
->read
;
1349 thread_cancel_rw(master
, thread
->u
.fd
, POLLOUT
, -1);
1350 thread_array
= master
->write
;
1353 thread_timer_list_del(&master
->timer
, thread
);
1356 list
= &master
->event
;
1359 list
= &master
->ready
;
1367 thread_list_del(list
, thread
);
1368 } else if (thread_array
) {
1369 thread_array
[thread
->u
.fd
] = NULL
;
1373 *thread
->ref
= NULL
;
1375 thread_add_unuse(thread
->master
, thread
);
1378 /* Delete and free all cancellation requests */
1379 if (master
->cancel_req
)
1380 list_delete_all_node(master
->cancel_req
);
1382 /* Wake up any threads which may be blocked in thread_cancel_async() */
1383 master
->canceled
= true;
1384 pthread_cond_broadcast(&master
->cancel_cond
);
1388 * Helper function used for multiple flavors of arg-based cancellation.
1390 static void cancel_event_helper(struct thread_master
*m
, void *arg
, int flags
)
1392 struct cancel_req
*cr
;
1394 assert(m
->owner
== pthread_self());
1396 /* Only worth anything if caller supplies an arg. */
1400 cr
= XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1404 frr_with_mutex(&m
->mtx
) {
1406 listnode_add(m
->cancel_req
, cr
);
1407 do_thread_cancel(m
);
1412 * Cancel any events which have the specified argument.
1416 * @param m the thread_master to cancel from
1417 * @param arg the argument passed when creating the event
1419 void thread_cancel_event(struct thread_master
*master
, void *arg
)
1421 cancel_event_helper(master
, arg
, 0);
1425 * Cancel ready tasks with an arg matching 'arg'
1429 * @param m the thread_master to cancel from
1430 * @param arg the argument passed when creating the event
1432 void thread_cancel_event_ready(struct thread_master
*m
, void *arg
)
1435 /* Only cancel ready/event tasks */
1436 cancel_event_helper(m
, arg
, THREAD_CANCEL_FLAG_READY
);
1440 * Cancel a specific task.
1444 * @param thread task to cancel
1446 void thread_cancel(struct thread
**thread
)
1448 struct thread_master
*master
;
1450 if (thread
== NULL
|| *thread
== NULL
)
1453 master
= (*thread
)->master
;
1455 frrtrace(9, frr_libfrr
, thread_cancel
, master
,
1456 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1457 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1458 (*thread
)->u
.val
, (*thread
)->arg
, (*thread
)->u
.sands
.tv_sec
);
1460 assert(master
->owner
== pthread_self());
1462 frr_with_mutex(&master
->mtx
) {
1463 struct cancel_req
*cr
=
1464 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1465 cr
->thread
= *thread
;
1466 listnode_add(master
->cancel_req
, cr
);
1467 do_thread_cancel(master
);
1474 * Asynchronous cancellation.
1476 * Called with either a struct thread ** or void * to an event argument,
1477 * this function posts the correct cancellation request and blocks until it is
1480 * If the thread is currently running, execution blocks until it completes.
1482 * The last two parameters are mutually exclusive, i.e. if you pass one the
1483 * other must be NULL.
1485 * When the cancellation procedure executes on the target thread_master, the
1486 * thread * provided is checked for nullity. If it is null, the thread is
1487 * assumed to no longer exist and the cancellation request is a no-op. Thus
1488 * users of this API must pass a back-reference when scheduling the original
1493 * @param master the thread master with the relevant event / task
1494 * @param thread pointer to thread to cancel
1495 * @param eventobj the event
1497 void thread_cancel_async(struct thread_master
*master
, struct thread
**thread
,
1500 assert(!(thread
&& eventobj
) && (thread
|| eventobj
));
1502 if (thread
&& *thread
)
1503 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
,
1504 (*thread
)->xref
->funcname
, (*thread
)->xref
->xref
.file
,
1505 (*thread
)->xref
->xref
.line
, NULL
, (*thread
)->u
.fd
,
1506 (*thread
)->u
.val
, (*thread
)->arg
,
1507 (*thread
)->u
.sands
.tv_sec
);
1509 frrtrace(9, frr_libfrr
, thread_cancel_async
, master
, NULL
, NULL
,
1510 0, NULL
, 0, 0, eventobj
, 0);
1512 assert(master
->owner
!= pthread_self());
1514 frr_with_mutex(&master
->mtx
) {
1515 master
->canceled
= false;
1518 struct cancel_req
*cr
=
1519 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1520 cr
->threadref
= thread
;
1521 listnode_add(master
->cancel_req
, cr
);
1522 } else if (eventobj
) {
1523 struct cancel_req
*cr
=
1524 XCALLOC(MTYPE_TMP
, sizeof(struct cancel_req
));
1525 cr
->eventobj
= eventobj
;
1526 listnode_add(master
->cancel_req
, cr
);
1530 while (!master
->canceled
)
1531 pthread_cond_wait(&master
->cancel_cond
, &master
->mtx
);
1537 /* ------------------------------------------------------------------------- */
1539 static struct timeval
*thread_timer_wait(struct thread_timer_list_head
*timers
,
1540 struct timeval
*timer_val
)
1542 if (!thread_timer_list_count(timers
))
1545 struct thread
*next_timer
= thread_timer_list_first(timers
);
1546 monotime_until(&next_timer
->u
.sands
, timer_val
);
1550 static struct thread
*thread_run(struct thread_master
*m
, struct thread
*thread
,
1551 struct thread
*fetch
)
1554 thread_add_unuse(m
, thread
);
1558 static int thread_process_io_helper(struct thread_master
*m
,
1559 struct thread
*thread
, short state
,
1560 short actual_state
, int pos
)
1562 struct thread
**thread_array
;
1565 * poll() clears the .events field, but the pollfd array we
1566 * pass to poll() is a copy of the one used to schedule threads.
1567 * We need to synchronize state between the two here by applying
1568 * the same changes poll() made on the copy of the "real" pollfd
1571 * This cleans up a possible infinite loop where we refuse
1572 * to respond to a poll event but poll is insistent that
1575 m
->handler
.pfds
[pos
].events
&= ~(state
);
1578 if ((actual_state
& (POLLHUP
|POLLIN
)) != POLLHUP
)
1579 flog_err(EC_LIB_NO_THREAD
,
1580 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1581 m
->handler
.pfds
[pos
].fd
, actual_state
);
1585 if (thread
->type
== THREAD_READ
)
1586 thread_array
= m
->read
;
1588 thread_array
= m
->write
;
1590 thread_array
[thread
->u
.fd
] = NULL
;
1591 thread_list_add_tail(&m
->ready
, thread
);
1592 thread
->type
= THREAD_READY
;
1598 * Process I/O events.
1600 * Walks through file descriptor array looking for those pollfds whose .revents
1601 * field has something interesting. Deletes any invalid file descriptors.
1603 * @param m the thread master
1604 * @param num the number of active file descriptors (return value of poll())
1606 static void thread_process_io(struct thread_master
*m
, unsigned int num
)
1608 unsigned int ready
= 0;
1609 struct pollfd
*pfds
= m
->handler
.copy
;
1611 for (nfds_t i
= 0; i
< m
->handler
.copycount
&& ready
< num
; ++i
) {
1612 /* no event for current fd? immediately continue */
1613 if (pfds
[i
].revents
== 0)
1619 * Unless someone has called thread_cancel from another
1620 * pthread, the only thing that could have changed in
1621 * m->handler.pfds while we were asleep is the .events
1622 * field in a given pollfd. Barring thread_cancel() that
1623 * value should be a superset of the values we have in our
1624 * copy, so there's no need to update it. Similarily,
1625 * barring deletion, the fd should still be a valid index
1626 * into the master's pfds.
1628 * We are including POLLERR here to do a READ event
1629 * this is because the read should fail and the
1630 * read function should handle it appropriately
1632 if (pfds
[i
].revents
& (POLLIN
| POLLHUP
| POLLERR
)) {
1633 thread_process_io_helper(m
, m
->read
[pfds
[i
].fd
], POLLIN
,
1634 pfds
[i
].revents
, i
);
1636 if (pfds
[i
].revents
& POLLOUT
)
1637 thread_process_io_helper(m
, m
->write
[pfds
[i
].fd
],
1638 POLLOUT
, pfds
[i
].revents
, i
);
1640 /* if one of our file descriptors is garbage, remove the same
1642 * both pfds + update sizes and index */
1643 if (pfds
[i
].revents
& POLLNVAL
) {
1644 memmove(m
->handler
.pfds
+ i
, m
->handler
.pfds
+ i
+ 1,
1645 (m
->handler
.pfdcount
- i
- 1)
1646 * sizeof(struct pollfd
));
1647 m
->handler
.pfdcount
--;
1648 m
->handler
.pfds
[m
->handler
.pfdcount
].fd
= 0;
1649 m
->handler
.pfds
[m
->handler
.pfdcount
].events
= 0;
1651 memmove(pfds
+ i
, pfds
+ i
+ 1,
1652 (m
->handler
.copycount
- i
- 1)
1653 * sizeof(struct pollfd
));
1654 m
->handler
.copycount
--;
1655 m
->handler
.copy
[m
->handler
.copycount
].fd
= 0;
1656 m
->handler
.copy
[m
->handler
.copycount
].events
= 0;
1663 /* Add all timers that have popped to the ready list. */
1664 static unsigned int thread_process_timers(struct thread_master
*m
,
1665 struct timeval
*timenow
)
1667 struct timeval prev
= *timenow
;
1668 bool displayed
= false;
1669 struct thread
*thread
;
1670 unsigned int ready
= 0;
1672 while ((thread
= thread_timer_list_first(&m
->timer
))) {
1673 if (timercmp(timenow
, &thread
->u
.sands
, <))
1675 prev
= thread
->u
.sands
;
1678 * If the timer would have popped 4 seconds in the
1679 * past then we are in a situation where we are
1680 * really getting behind on handling of events.
1681 * Let's log it and do the right thing with it.
1683 if (timercmp(timenow
, &prev
, >)) {
1684 atomic_fetch_add_explicit(
1685 &thread
->hist
->total_starv_warn
, 1,
1686 memory_order_seq_cst
);
1687 if (!displayed
&& !thread
->ignore_timer_late
) {
1689 EC_LIB_STARVE_THREAD
,
1690 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1696 thread_timer_list_pop(&m
->timer
);
1697 thread
->type
= THREAD_READY
;
1698 thread_list_add_tail(&m
->ready
, thread
);
1705 /* process a list en masse, e.g. for event thread lists */
1706 static unsigned int thread_process(struct thread_list_head
*list
)
1708 struct thread
*thread
;
1709 unsigned int ready
= 0;
1711 while ((thread
= thread_list_pop(list
))) {
1712 thread
->type
= THREAD_READY
;
1713 thread_list_add_tail(&thread
->master
->ready
, thread
);
1720 /* Fetch next ready thread. */
1721 struct thread
*thread_fetch(struct thread_master
*m
, struct thread
*fetch
)
1723 struct thread
*thread
= NULL
;
1725 struct timeval zerotime
= {0, 0};
1727 struct timeval
*tw
= NULL
;
1728 bool eintr_p
= false;
1732 /* Handle signals if any */
1733 if (m
->handle_signals
)
1734 frr_sigevent_process();
1736 pthread_mutex_lock(&m
->mtx
);
1738 /* Process any pending cancellation requests */
1739 do_thread_cancel(m
);
1742 * Attempt to flush ready queue before going into poll().
1743 * This is performance-critical. Think twice before modifying.
1745 if ((thread
= thread_list_pop(&m
->ready
))) {
1746 fetch
= thread_run(m
, thread
, fetch
);
1749 pthread_mutex_unlock(&m
->mtx
);
1750 if (!m
->ready_run_loop
)
1751 GETRUSAGE(&m
->last_getrusage
);
1752 m
->ready_run_loop
= true;
1756 m
->ready_run_loop
= false;
1757 /* otherwise, tick through scheduling sequence */
1760 * Post events to ready queue. This must come before the
1761 * following block since events should occur immediately
1763 thread_process(&m
->event
);
1766 * If there are no tasks on the ready queue, we will poll()
1767 * until a timer expires or we receive I/O, whichever comes
1768 * first. The strategy for doing this is:
1770 * - If there are events pending, set the poll() timeout to zero
1771 * - If there are no events pending, but there are timers
1772 * pending, set the timeout to the smallest remaining time on
1774 * - If there are neither timers nor events pending, but there
1775 * are file descriptors pending, block indefinitely in poll()
1776 * - If nothing is pending, it's time for the application to die
1778 * In every case except the last, we need to hit poll() at least
1779 * once per loop to avoid starvation by events
1781 if (!thread_list_count(&m
->ready
))
1782 tw
= thread_timer_wait(&m
->timer
, &tv
);
1784 if (thread_list_count(&m
->ready
) ||
1785 (tw
&& !timercmp(tw
, &zerotime
, >)))
1788 if (!tw
&& m
->handler
.pfdcount
== 0) { /* die */
1789 pthread_mutex_unlock(&m
->mtx
);
1795 * Copy pollfd array + # active pollfds in it. Not necessary to
1796 * copy the array size as this is fixed.
1798 m
->handler
.copycount
= m
->handler
.pfdcount
;
1799 memcpy(m
->handler
.copy
, m
->handler
.pfds
,
1800 m
->handler
.copycount
* sizeof(struct pollfd
));
1802 pthread_mutex_unlock(&m
->mtx
);
1805 num
= fd_poll(m
, tw
, &eintr_p
);
1807 pthread_mutex_lock(&m
->mtx
);
1809 /* Handle any errors received in poll() */
1812 pthread_mutex_unlock(&m
->mtx
);
1813 /* loop around to signal handler */
1818 flog_err(EC_LIB_SYSTEM_CALL
, "poll() error: %s",
1819 safe_strerror(errno
));
1820 pthread_mutex_unlock(&m
->mtx
);
1825 /* Post timers to ready queue. */
1827 thread_process_timers(m
, &now
);
1829 /* Post I/O to ready queue. */
1831 thread_process_io(m
, num
);
1833 pthread_mutex_unlock(&m
->mtx
);
1835 } while (!thread
&& m
->spin
);
1840 static unsigned long timeval_elapsed(struct timeval a
, struct timeval b
)
1842 return (((a
.tv_sec
- b
.tv_sec
) * TIMER_SECOND_MICRO
)
1843 + (a
.tv_usec
- b
.tv_usec
));
1846 unsigned long thread_consumed_time(RUSAGE_T
*now
, RUSAGE_T
*start
,
1847 unsigned long *cputime
)
1849 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1850 *cputime
= (now
->cpu
.tv_sec
- start
->cpu
.tv_sec
) * TIMER_SECOND_MICRO
1851 + (now
->cpu
.tv_nsec
- start
->cpu
.tv_nsec
) / 1000;
1853 /* This is 'user + sys' time. */
1854 *cputime
= timeval_elapsed(now
->cpu
.ru_utime
, start
->cpu
.ru_utime
)
1855 + timeval_elapsed(now
->cpu
.ru_stime
, start
->cpu
.ru_stime
);
1857 return timeval_elapsed(now
->real
, start
->real
);
1860 /* We should aim to yield after yield milliseconds, which defaults
1861 to THREAD_YIELD_TIME_SLOT .
1862 Note: we are using real (wall clock) time for this calculation.
1863 It could be argued that CPU time may make more sense in certain
1864 contexts. The things to consider are whether the thread may have
1865 blocked (in which case wall time increases, but CPU time does not),
1866 or whether the system is heavily loaded with other processes competing
1867 for CPU time. On balance, wall clock time seems to make sense.
1868 Plus it has the added benefit that gettimeofday should be faster
1869 than calling getrusage. */
1870 int thread_should_yield(struct thread
*thread
)
1873 frr_with_mutex(&thread
->mtx
) {
1874 result
= monotime_since(&thread
->real
, NULL
)
1875 > (int64_t)thread
->yield
;
1880 void thread_set_yield_time(struct thread
*thread
, unsigned long yield_time
)
1882 frr_with_mutex(&thread
->mtx
) {
1883 thread
->yield
= yield_time
;
1887 void thread_getrusage(RUSAGE_T
*r
)
1890 if (!cputime_enabled
) {
1891 memset(&r
->cpu
, 0, sizeof(r
->cpu
));
1895 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1896 /* not currently implemented in Linux's vDSO, but maybe at some point
1899 clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &r
->cpu
);
1900 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1901 #if defined RUSAGE_THREAD
1902 #define FRR_RUSAGE RUSAGE_THREAD
1904 #define FRR_RUSAGE RUSAGE_SELF
1906 getrusage(FRR_RUSAGE
, &(r
->cpu
));
1913 * This function will atomically update the thread's usage history. At present
1914 * this is the only spot where usage history is written. Nevertheless the code
1915 * has been written such that the introduction of writers in the future should
1916 * not need to update it provided the writers atomically perform only the
1917 * operations done here, i.e. updating the total and maximum times. In
1918 * particular, the maximum real and cpu times must be monotonically increasing
1919 * or this code is not correct.
1921 void thread_call(struct thread
*thread
)
1923 RUSAGE_T before
, after
;
1925 /* if the thread being called is the CLI, it may change cputime_enabled
1926 * ("service cputime-stats" command), which can result in nonsensical
1927 * and very confusing warnings
1929 bool cputime_enabled_here
= cputime_enabled
;
1931 if (thread
->master
->ready_run_loop
)
1932 before
= thread
->master
->last_getrusage
;
1936 thread
->real
= before
.real
;
1938 frrtrace(9, frr_libfrr
, thread_call
, thread
->master
,
1939 thread
->xref
->funcname
, thread
->xref
->xref
.file
,
1940 thread
->xref
->xref
.line
, NULL
, thread
->u
.fd
,
1941 thread
->u
.val
, thread
->arg
, thread
->u
.sands
.tv_sec
);
1943 pthread_setspecific(thread_current
, thread
);
1944 (*thread
->func
)(thread
);
1945 pthread_setspecific(thread_current
, NULL
);
1948 thread
->master
->last_getrusage
= after
;
1950 unsigned long walltime
, cputime
;
1953 walltime
= thread_consumed_time(&after
, &before
, &cputime
);
1955 /* update walltime */
1956 atomic_fetch_add_explicit(&thread
->hist
->real
.total
, walltime
,
1957 memory_order_seq_cst
);
1958 exp
= atomic_load_explicit(&thread
->hist
->real
.max
,
1959 memory_order_seq_cst
);
1960 while (exp
< walltime
1961 && !atomic_compare_exchange_weak_explicit(
1962 &thread
->hist
->real
.max
, &exp
, walltime
,
1963 memory_order_seq_cst
, memory_order_seq_cst
))
1966 if (cputime_enabled_here
&& cputime_enabled
) {
1967 /* update cputime */
1968 atomic_fetch_add_explicit(&thread
->hist
->cpu
.total
, cputime
,
1969 memory_order_seq_cst
);
1970 exp
= atomic_load_explicit(&thread
->hist
->cpu
.max
,
1971 memory_order_seq_cst
);
1972 while (exp
< cputime
1973 && !atomic_compare_exchange_weak_explicit(
1974 &thread
->hist
->cpu
.max
, &exp
, cputime
,
1975 memory_order_seq_cst
, memory_order_seq_cst
))
1979 atomic_fetch_add_explicit(&thread
->hist
->total_calls
, 1,
1980 memory_order_seq_cst
);
1981 atomic_fetch_or_explicit(&thread
->hist
->types
, 1 << thread
->add_type
,
1982 memory_order_seq_cst
);
1984 if (cputime_enabled_here
&& cputime_enabled
&& cputime_threshold
1985 && cputime
> cputime_threshold
) {
1987 * We have a CPU Hog on our hands. The time FRR has spent
1988 * doing actual work (not sleeping) is greater than 5 seconds.
1989 * Whinge about it now, so we're aware this is yet another task
1992 atomic_fetch_add_explicit(&thread
->hist
->total_cpu_warn
,
1993 1, memory_order_seq_cst
);
1995 EC_LIB_SLOW_THREAD_CPU
,
1996 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1997 thread
->xref
->funcname
, (unsigned long)thread
->func
,
1998 walltime
/ 1000, cputime
/ 1000);
2000 } else if (walltime_threshold
&& walltime
> walltime_threshold
) {
2002 * The runtime for a task is greater than 5 seconds, but the
2003 * cpu time is under 5 seconds. Let's whine about this because
2004 * this could imply some sort of scheduling issue.
2006 atomic_fetch_add_explicit(&thread
->hist
->total_wall_warn
,
2007 1, memory_order_seq_cst
);
2009 EC_LIB_SLOW_THREAD_WALL
,
2010 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2011 thread
->xref
->funcname
, (unsigned long)thread
->func
,
2012 walltime
/ 1000, cputime
/ 1000);
2016 /* Execute thread */
2017 void _thread_execute(const struct xref_threadsched
*xref
,
2018 struct thread_master
*m
, void (*func
)(struct thread
*),
2021 struct thread
*thread
;
2023 /* Get or allocate new thread to execute. */
2024 frr_with_mutex(&m
->mtx
) {
2025 thread
= thread_get(m
, THREAD_EVENT
, func
, arg
, xref
);
2027 /* Set its event value. */
2028 frr_with_mutex(&thread
->mtx
) {
2029 thread
->add_type
= THREAD_EXECUTE
;
2030 thread
->u
.val
= val
;
2031 thread
->ref
= &thread
;
2035 /* Execute thread doing all accounting. */
2036 thread_call(thread
);
2038 /* Give back or free thread. */
2039 thread_add_unuse(m
, thread
);
2042 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2043 void debug_signals(const sigset_t
*sigs
)
2050 * We're only looking at the non-realtime signals here, so we need
2051 * some limit value. Platform differences mean at some point we just
2052 * need to pick a reasonable value.
2054 #if defined SIGRTMIN
2055 # define LAST_SIGNAL SIGRTMIN
2057 # define LAST_SIGNAL 32
2062 sigemptyset(&tmpsigs
);
2063 pthread_sigmask(SIG_BLOCK
, NULL
, &tmpsigs
);
2070 for (i
= 0; i
< LAST_SIGNAL
; i
++) {
2073 if (sigismember(sigs
, i
) > 0) {
2075 strlcat(buf
, ",", sizeof(buf
));
2076 snprintf(tmp
, sizeof(tmp
), "%d", i
);
2077 strlcat(buf
, tmp
, sizeof(buf
));
2083 snprintf(buf
, sizeof(buf
), "<none>");
2085 zlog_debug("%s: %s", __func__
, buf
);
2088 bool thread_is_scheduled(struct thread
*thread
)
2096 static ssize_t
printfrr_thread_dbg(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2097 const struct thread
*thread
)
2099 static const char * const types
[] = {
2100 [THREAD_READ
] = "read",
2101 [THREAD_WRITE
] = "write",
2102 [THREAD_TIMER
] = "timer",
2103 [THREAD_EVENT
] = "event",
2104 [THREAD_READY
] = "ready",
2105 [THREAD_UNUSED
] = "unused",
2106 [THREAD_EXECUTE
] = "exec",
2112 return bputs(buf
, "{(thread *)NULL}");
2114 rv
+= bprintfrr(buf
, "{(thread *)%p arg=%p", thread
, thread
->arg
);
2116 if (thread
->type
< array_size(types
) && types
[thread
->type
])
2117 rv
+= bprintfrr(buf
, " %-6s", types
[thread
->type
]);
2119 rv
+= bprintfrr(buf
, " INVALID(%u)", thread
->type
);
2121 switch (thread
->type
) {
2124 snprintfrr(info
, sizeof(info
), "fd=%d", thread
->u
.fd
);
2128 snprintfrr(info
, sizeof(info
), "r=%pTVMud", &thread
->u
.sands
);
2132 rv
+= bprintfrr(buf
, " %-12s %s() %s from %s:%d}", info
,
2133 thread
->xref
->funcname
, thread
->xref
->dest
,
2134 thread
->xref
->xref
.file
, thread
->xref
->xref
.line
);
2138 printfrr_ext_autoreg_p("TH", printfrr_thread
);
2139 static ssize_t
printfrr_thread(struct fbuf
*buf
, struct printfrr_eargs
*ea
,
2142 const struct thread
*thread
= ptr
;
2143 struct timespec remain
= {};
2145 if (ea
->fmt
[0] == 'D') {
2147 return printfrr_thread_dbg(buf
, ea
, thread
);
2151 /* need to jump over time formatting flag characters in the
2152 * input format string, i.e. adjust ea->fmt!
2154 printfrr_time(buf
, ea
, &remain
,
2155 TIMEFMT_TIMER_DEADLINE
| TIMEFMT_SKIP
);
2156 return bputch(buf
, '-');
2159 TIMEVAL_TO_TIMESPEC(&thread
->u
.sands
, &remain
);
2160 return printfrr_time(buf
, ea
, &remain
, TIMEFMT_TIMER_DEADLINE
);