]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
Merge pull request #10650 from rgirada/ospf_ei
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188
DL
105#ifndef VTYSH_EXTRACT_PL
106#include "lib/thread_clippy.c"
107#endif
108
d8b87afe 109static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 110{
883cc51d 111 int size = sizeof(a->func);
bd74dc61
DS
112
113 return jhash(&a->func, size, 0);
e04ab74d 114}
115
74df8d6d 116static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 117 const struct cpu_thread_history *b)
e04ab74d 118{
d62a17ae 119 return a->func == b->func;
e04ab74d 120}
121
d62a17ae 122static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 123{
d62a17ae 124 struct cpu_thread_history *new;
125 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
126 new->func = a->func;
127 new->funcname = a->funcname;
128 return new;
e04ab74d 129}
130
d62a17ae 131static void cpu_record_hash_free(void *a)
228da428 132{
d62a17ae 133 struct cpu_thread_history *hist = a;
134
135 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
136}
137
d62a17ae 138static void vty_out_cpu_thread_history(struct vty *vty,
139 struct cpu_thread_history *a)
e04ab74d 140{
1dd08c22
DS
141 vty_out(vty,
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 143 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
144 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
145 (a->real.total / a->total_calls), a->real.max,
1dd08c22 146 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 147 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 148 a->types & (1 << THREAD_READ) ? 'R' : ' ',
149 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
150 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
151 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
152 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 153}
154
e3b78da8 155static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 156{
d62a17ae 157 struct cpu_thread_history *totals = args[0];
fbcac826 158 struct cpu_thread_history copy;
d62a17ae 159 struct vty *vty = args[1];
fbcac826 160 uint8_t *filter = args[2];
d62a17ae 161
162 struct cpu_thread_history *a = bucket->data;
163
fbcac826
QY
164 copy.total_active =
165 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
166 copy.total_calls =
167 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
168 copy.total_cpu_warn =
169 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
170 copy.total_wall_warn =
171 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
172 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
173 memory_order_seq_cst);
fbcac826
QY
174 copy.cpu.total =
175 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
176 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
177 copy.real.total =
178 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
179 copy.real.max =
180 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
181 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
182 copy.funcname = a->funcname;
183
184 if (!(copy.types & *filter))
d62a17ae 185 return;
fbcac826
QY
186
187 vty_out_cpu_thread_history(vty, &copy);
188 totals->total_active += copy.total_active;
189 totals->total_calls += copy.total_calls;
9b8e01ca
DS
190 totals->total_cpu_warn += copy.total_cpu_warn;
191 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 192 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
193 totals->real.total += copy.real.total;
194 if (totals->real.max < copy.real.max)
195 totals->real.max = copy.real.max;
196 totals->cpu.total += copy.cpu.total;
197 if (totals->cpu.max < copy.cpu.max)
198 totals->cpu.max = copy.cpu.max;
e04ab74d 199}
200
fbcac826 201static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 202{
d62a17ae 203 struct cpu_thread_history tmp;
204 void *args[3] = {&tmp, vty, &filter};
205 struct thread_master *m;
206 struct listnode *ln;
207
45f01188
DL
208 if (!cputime_enabled)
209 vty_out(vty,
210 "\n"
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
215
0d6f7fd6 216 memset(&tmp, 0, sizeof(tmp));
d62a17ae 217 tmp.funcname = "TOTAL";
218 tmp.types = filter;
219
00dffa8c 220 frr_with_mutex(&masters_mtx) {
d62a17ae 221 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
222 const char *name = m->name ? m->name : "main";
223
224 char underline[strlen(name) + 1];
225 memset(underline, '-', sizeof(underline));
4f113d60 226 underline[sizeof(underline) - 1] = '\0';
d62a17ae 227
228 vty_out(vty, "\n");
229 vty_out(vty, "Showing statistics for pthread %s\n",
230 name);
231 vty_out(vty, "-------------------------------%s\n",
232 underline);
84d951d0 233 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 234 "CPU (user+system):", "Real (wall-clock):");
235 vty_out(vty,
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
238 vty_out(vty,
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 240
241 if (m->cpu_record->count)
242 hash_iterate(
243 m->cpu_record,
e3b78da8 244 (void (*)(struct hash_bucket *,
d62a17ae 245 void *))cpu_record_hash_print,
246 args);
247 else
248 vty_out(vty, "No data to display yet.\n");
249
250 vty_out(vty, "\n");
251 }
252 }
d62a17ae 253
254 vty_out(vty, "\n");
255 vty_out(vty, "Total thread statistics\n");
256 vty_out(vty, "-------------------------\n");
84d951d0 257 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 260 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 261 vty_out(vty, " Type Thread\n");
262
263 if (tmp.total_calls > 0)
264 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 265}
266
e3b78da8 267static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 268{
fbcac826 269 uint8_t *filter = args[0];
d62a17ae 270 struct hash *cpu_record = args[1];
271
272 struct cpu_thread_history *a = bucket->data;
62f44022 273
d62a17ae 274 if (!(a->types & *filter))
275 return;
f48f65d2 276
d62a17ae 277 hash_release(cpu_record, bucket->data);
e276eb82
PJ
278}
279
fbcac826 280static void cpu_record_clear(uint8_t filter)
e276eb82 281{
fbcac826 282 uint8_t *tmp = &filter;
d62a17ae 283 struct thread_master *m;
284 struct listnode *ln;
285
00dffa8c 286 frr_with_mutex(&masters_mtx) {
d62a17ae 287 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
00dffa8c 288 frr_with_mutex(&m->mtx) {
d62a17ae 289 void *args[2] = {tmp, m->cpu_record};
290 hash_iterate(
291 m->cpu_record,
e3b78da8 292 (void (*)(struct hash_bucket *,
d62a17ae 293 void *))cpu_record_hash_clear,
294 args);
295 }
d62a17ae 296 }
297 }
62f44022
QY
298}
299
fbcac826 300static uint8_t parse_filter(const char *filterstr)
62f44022 301{
d62a17ae 302 int i = 0;
303 int filter = 0;
304
305 while (filterstr[i] != '\0') {
306 switch (filterstr[i]) {
307 case 'r':
308 case 'R':
309 filter |= (1 << THREAD_READ);
310 break;
311 case 'w':
312 case 'W':
313 filter |= (1 << THREAD_WRITE);
314 break;
315 case 't':
316 case 'T':
317 filter |= (1 << THREAD_TIMER);
318 break;
319 case 'e':
320 case 'E':
321 filter |= (1 << THREAD_EVENT);
322 break;
323 case 'x':
324 case 'X':
325 filter |= (1 << THREAD_EXECUTE);
326 break;
327 default:
328 break;
329 }
330 ++i;
331 }
332 return filter;
62f44022
QY
333}
334
ee4dcee8
DL
335DEFUN_NOSH (show_thread_cpu,
336 show_thread_cpu_cmd,
337 "show thread cpu [FILTER]",
338 SHOW_STR
339 "Thread information\n"
340 "Thread CPU usage\n"
341 "Display filter (rwtex)\n")
62f44022 342{
fbcac826 343 uint8_t filter = (uint8_t)-1U;
d62a17ae 344 int idx = 0;
345
346 if (argv_find(argv, argc, "FILTER", &idx)) {
347 filter = parse_filter(argv[idx]->arg);
348 if (!filter) {
349 vty_out(vty,
3efd0893 350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 351 argv[idx]->arg);
352 return CMD_WARNING;
353 }
354 }
355
356 cpu_record_print(vty, filter);
357 return CMD_SUCCESS;
e276eb82 358}
45f01188
DL
359
360DEFPY (service_cputime_stats,
361 service_cputime_stats_cmd,
362 "[no] service cputime-stats",
363 NO_STR
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
366{
367 cputime_enabled = !no;
368 return CMD_SUCCESS;
369}
370
371DEFPY (service_cputime_warning,
372 service_cputime_warning_cmd,
373 "[no] service cputime-warning (1-4294967295)",
374 NO_STR
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
378{
379 if (no)
380 cputime_threshold = 0;
381 else
382 cputime_threshold = cputime_warning * 1000;
383 return CMD_SUCCESS;
384}
385
386ALIAS (service_cputime_warning,
387 no_service_cputime_warning_cmd,
388 "no service cputime-warning",
389 NO_STR
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
392
393DEFPY (service_walltime_warning,
394 service_walltime_warning_cmd,
395 "[no] service walltime-warning (1-4294967295)",
396 NO_STR
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
400{
401 if (no)
402 walltime_threshold = 0;
403 else
404 walltime_threshold = walltime_warning * 1000;
405 return CMD_SUCCESS;
406}
407
408ALIAS (service_walltime_warning,
409 no_service_walltime_warning_cmd,
410 "no service walltime-warning",
411 NO_STR
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 414
8872626b
DS
415static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
416{
417 const char *name = m->name ? m->name : "main";
418 char underline[strlen(name) + 1];
a0b36ae6 419 struct thread *thread;
8872626b
DS
420 uint32_t i;
421
422 memset(underline, '-', sizeof(underline));
423 underline[sizeof(underline) - 1] = '\0';
424
425 vty_out(vty, "\nShowing poll FD's for %s\n", name);
426 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
427 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
428 m->fd_limit);
a0b36ae6
DS
429 for (i = 0; i < m->handler.pfdcount; i++) {
430 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
431 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 432 m->handler.pfds[i].revents);
a0b36ae6
DS
433
434 if (m->handler.pfds[i].events & POLLIN) {
435 thread = m->read[m->handler.pfds[i].fd];
436
437 if (!thread)
438 vty_out(vty, "ERROR ");
439 else
60a3efec 440 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
441 } else
442 vty_out(vty, " ");
443
444 if (m->handler.pfds[i].events & POLLOUT) {
445 thread = m->write[m->handler.pfds[i].fd];
446
447 if (!thread)
448 vty_out(vty, "ERROR\n");
449 else
60a3efec 450 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
451 } else
452 vty_out(vty, "\n");
453 }
8872626b
DS
454}
455
ee4dcee8
DL
456DEFUN_NOSH (show_thread_poll,
457 show_thread_poll_cmd,
458 "show thread poll",
459 SHOW_STR
460 "Thread information\n"
461 "Show poll FD's and information\n")
8872626b
DS
462{
463 struct listnode *node;
464 struct thread_master *m;
465
00dffa8c 466 frr_with_mutex(&masters_mtx) {
8872626b
DS
467 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
468 show_thread_poll_helper(vty, m);
469 }
470 }
8872626b
DS
471
472 return CMD_SUCCESS;
473}
474
475
49d41a26
DS
476DEFUN (clear_thread_cpu,
477 clear_thread_cpu_cmd,
478 "clear thread cpu [FILTER]",
62f44022 479 "Clear stored data in all pthreads\n"
49d41a26
DS
480 "Thread information\n"
481 "Thread CPU usage\n"
482 "Display filter (rwtexb)\n")
e276eb82 483{
fbcac826 484 uint8_t filter = (uint8_t)-1U;
d62a17ae 485 int idx = 0;
486
487 if (argv_find(argv, argc, "FILTER", &idx)) {
488 filter = parse_filter(argv[idx]->arg);
489 if (!filter) {
490 vty_out(vty,
3efd0893 491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 492 argv[idx]->arg);
493 return CMD_WARNING;
494 }
495 }
496
497 cpu_record_clear(filter);
498 return CMD_SUCCESS;
e276eb82 499}
6b0655a2 500
d62a17ae 501void thread_cmd_init(void)
0b84f294 502{
d62a17ae 503 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 504 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 505 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
506
507 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
508 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
509 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
510 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
511 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
0b84f294 512}
62f44022
QY
513/* CLI end ------------------------------------------------------------------ */
514
0b84f294 515
d62a17ae 516static void cancelreq_del(void *cr)
63ccb9cb 517{
d62a17ae 518 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
519}
520
e0bebc7c 521/* initializer, only ever called once */
4d762f26 522static void initializer(void)
e0bebc7c 523{
d62a17ae 524 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
525}
526
d62a17ae 527struct thread_master *thread_master_create(const char *name)
718e3744 528{
d62a17ae 529 struct thread_master *rv;
530 struct rlimit limit;
531
532 pthread_once(&init_once, &initializer);
533
534 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 535
536 /* Initialize master mutex */
537 pthread_mutex_init(&rv->mtx, NULL);
538 pthread_cond_init(&rv->cancel_cond, NULL);
539
540 /* Set name */
7ffcd8bd
QY
541 name = name ? name : "default";
542 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 543
544 /* Initialize I/O task data structures */
1a9f340b
MS
545
546 /* Use configured limit if present, ulimit otherwise. */
547 rv->fd_limit = frr_get_fd_limit();
548 if (rv->fd_limit == 0) {
549 getrlimit(RLIMIT_NOFILE, &limit);
550 rv->fd_limit = (int)limit.rlim_cur;
551 }
552
a6f235f3
DS
553 rv->read = XCALLOC(MTYPE_THREAD_POLL,
554 sizeof(struct thread *) * rv->fd_limit);
555
556 rv->write = XCALLOC(MTYPE_THREAD_POLL,
557 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 558
7ffcd8bd
QY
559 char tmhashname[strlen(name) + 32];
560 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
561 name);
bd74dc61 562 rv->cpu_record = hash_create_size(
d8b87afe 563 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 564 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 565 tmhashname);
d62a17ae 566
c284542b
DL
567 thread_list_init(&rv->event);
568 thread_list_init(&rv->ready);
569 thread_list_init(&rv->unuse);
27d29ced 570 thread_timer_list_init(&rv->timer);
d62a17ae 571
572 /* Initialize thread_fetch() settings */
573 rv->spin = true;
574 rv->handle_signals = true;
575
576 /* Set pthread owner, should be updated by actual owner */
577 rv->owner = pthread_self();
578 rv->cancel_req = list_new();
579 rv->cancel_req->del = cancelreq_del;
580 rv->canceled = true;
581
582 /* Initialize pipe poker */
583 pipe(rv->io_pipe);
584 set_nonblocking(rv->io_pipe[0]);
585 set_nonblocking(rv->io_pipe[1]);
586
587 /* Initialize data structures for poll() */
588 rv->handler.pfdsize = rv->fd_limit;
589 rv->handler.pfdcount = 0;
590 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
591 sizeof(struct pollfd) * rv->handler.pfdsize);
592 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
593 sizeof(struct pollfd) * rv->handler.pfdsize);
594
eff09c66 595 /* add to list of threadmasters */
00dffa8c 596 frr_with_mutex(&masters_mtx) {
eff09c66
QY
597 if (!masters)
598 masters = list_new();
599
d62a17ae 600 listnode_add(masters, rv);
601 }
d62a17ae 602
603 return rv;
718e3744 604}
605
d8a8a8de
QY
606void thread_master_set_name(struct thread_master *master, const char *name)
607{
00dffa8c 608 frr_with_mutex(&master->mtx) {
0a22ddfb 609 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
610 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
611 }
d8a8a8de
QY
612}
613
6ed04aa2
DS
614#define THREAD_UNUSED_DEPTH 10
615
718e3744 616/* Move thread to unuse list. */
d62a17ae 617static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 618{
6655966d
RZ
619 pthread_mutex_t mtxc = thread->mtx;
620
d62a17ae 621 assert(m != NULL && thread != NULL);
d62a17ae 622
d62a17ae 623 thread->hist->total_active--;
6ed04aa2
DS
624 memset(thread, 0, sizeof(struct thread));
625 thread->type = THREAD_UNUSED;
626
6655966d
RZ
627 /* Restore the thread mutex context. */
628 thread->mtx = mtxc;
629
c284542b
DL
630 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
631 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
632 return;
633 }
634
635 thread_free(m, thread);
718e3744 636}
637
638/* Free all unused thread. */
c284542b
DL
639static void thread_list_free(struct thread_master *m,
640 struct thread_list_head *list)
718e3744 641{
d62a17ae 642 struct thread *t;
d62a17ae 643
c284542b 644 while ((t = thread_list_pop(list)))
6655966d 645 thread_free(m, t);
718e3744 646}
647
d62a17ae 648static void thread_array_free(struct thread_master *m,
649 struct thread **thread_array)
308d14ae 650{
d62a17ae 651 struct thread *t;
652 int index;
653
654 for (index = 0; index < m->fd_limit; ++index) {
655 t = thread_array[index];
656 if (t) {
657 thread_array[index] = NULL;
6655966d 658 thread_free(m, t);
d62a17ae 659 }
660 }
a6f235f3 661 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
662}
663
495f0b13
DS
664/*
665 * thread_master_free_unused
666 *
667 * As threads are finished with they are put on the
668 * unuse list for later reuse.
669 * If we are shutting down, Free up unused threads
670 * So we can see if we forget to shut anything off
671 */
d62a17ae 672void thread_master_free_unused(struct thread_master *m)
495f0b13 673{
00dffa8c 674 frr_with_mutex(&m->mtx) {
d62a17ae 675 struct thread *t;
c284542b 676 while ((t = thread_list_pop(&m->unuse)))
6655966d 677 thread_free(m, t);
d62a17ae 678 }
495f0b13
DS
679}
680
718e3744 681/* Stop thread scheduler. */
d62a17ae 682void thread_master_free(struct thread_master *m)
718e3744 683{
27d29ced
DL
684 struct thread *t;
685
00dffa8c 686 frr_with_mutex(&masters_mtx) {
d62a17ae 687 listnode_delete(masters, m);
eff09c66 688 if (masters->count == 0) {
6a154c88 689 list_delete(&masters);
eff09c66 690 }
d62a17ae 691 }
d62a17ae 692
693 thread_array_free(m, m->read);
694 thread_array_free(m, m->write);
27d29ced
DL
695 while ((t = thread_timer_list_pop(&m->timer)))
696 thread_free(m, t);
d62a17ae 697 thread_list_free(m, &m->event);
698 thread_list_free(m, &m->ready);
699 thread_list_free(m, &m->unuse);
700 pthread_mutex_destroy(&m->mtx);
33844bbe 701 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 702 close(m->io_pipe[0]);
703 close(m->io_pipe[1]);
6a154c88 704 list_delete(&m->cancel_req);
1a0a92ea 705 m->cancel_req = NULL;
d62a17ae 706
707 hash_clean(m->cpu_record, cpu_record_hash_free);
708 hash_free(m->cpu_record);
709 m->cpu_record = NULL;
710
0a22ddfb 711 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 712 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
713 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
714 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 715}
716
78ca0342
CF
717/* Return remain time in miliseconds. */
718unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 719{
d62a17ae 720 int64_t remain;
1189d95f 721
00dffa8c 722 frr_with_mutex(&thread->mtx) {
78ca0342 723 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 724 }
1189d95f 725
d62a17ae 726 return remain < 0 ? 0 : remain;
718e3744 727}
728
78ca0342
CF
729/* Return remain time in seconds. */
730unsigned long thread_timer_remain_second(struct thread *thread)
731{
732 return thread_timer_remain_msec(thread) / 1000LL;
733}
734
d62a17ae 735struct timeval thread_timer_remain(struct thread *thread)
6ac44687 736{
d62a17ae 737 struct timeval remain;
00dffa8c 738 frr_with_mutex(&thread->mtx) {
d62a17ae 739 monotime_until(&thread->u.sands, &remain);
740 }
d62a17ae 741 return remain;
6ac44687
CF
742}
743
0447957e
AK
744static int time_hhmmss(char *buf, int buf_size, long sec)
745{
746 long hh;
747 long mm;
748 int wr;
749
642ac49d 750 assert(buf_size >= 8);
0447957e
AK
751
752 hh = sec / 3600;
753 sec %= 3600;
754 mm = sec / 60;
755 sec %= 60;
756
757 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
758
759 return wr != 8;
760}
761
762char *thread_timer_to_hhmmss(char *buf, int buf_size,
763 struct thread *t_timer)
764{
765 if (t_timer) {
766 time_hhmmss(buf, buf_size,
767 thread_timer_remain_second(t_timer));
768 } else {
769 snprintf(buf, buf_size, "--:--:--");
770 }
771 return buf;
772}
773
718e3744 774/* Get new thread. */
d7c0a89a 775static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 776 void (*func)(struct thread *), void *arg,
60a3efec 777 const struct xref_threadsched *xref)
718e3744 778{
c284542b 779 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 780 struct cpu_thread_history tmp;
781
782 if (!thread) {
783 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
784 /* mutex only needs to be initialized at struct creation. */
785 pthread_mutex_init(&thread->mtx, NULL);
786 m->alloc++;
787 }
788
789 thread->type = type;
790 thread->add_type = type;
791 thread->master = m;
792 thread->arg = arg;
d62a17ae 793 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
794 thread->ref = NULL;
e8b3a2f7 795 thread->ignore_timer_late = false;
d62a17ae 796
797 /*
798 * So if the passed in funcname is not what we have
799 * stored that means the thread->hist needs to be
800 * updated. We keep the last one around in unused
801 * under the assumption that we are probably
802 * going to immediately allocate the same
803 * type of thread.
804 * This hopefully saves us some serious
805 * hash_get lookups.
806 */
60a3efec
DL
807 if ((thread->xref && thread->xref->funcname != xref->funcname)
808 || thread->func != func) {
d62a17ae 809 tmp.func = func;
60a3efec 810 tmp.funcname = xref->funcname;
d62a17ae 811 thread->hist =
812 hash_get(m->cpu_record, &tmp,
813 (void *(*)(void *))cpu_record_hash_alloc);
814 }
815 thread->hist->total_active++;
816 thread->func = func;
60a3efec 817 thread->xref = xref;
d62a17ae 818
819 return thread;
718e3744 820}
821
6655966d
RZ
822static void thread_free(struct thread_master *master, struct thread *thread)
823{
824 /* Update statistics. */
825 assert(master->alloc > 0);
826 master->alloc--;
827
828 /* Free allocated resources. */
829 pthread_mutex_destroy(&thread->mtx);
830 XFREE(MTYPE_THREAD, thread);
831}
832
d81ca9a3
MS
833static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
834 bool *eintr_p)
209a72a6 835{
d81ca9a3
MS
836 sigset_t origsigs;
837 unsigned char trash[64];
838 nfds_t count = m->handler.copycount;
839
d279ef57
DS
840 /*
841 * If timer_wait is null here, that means poll() should block
842 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 843 * ->selectpoll_timeout.
d279ef57 844 *
d62a17ae 845 * If the value is positive, it specifies the maximum number of
d279ef57
DS
846 * milliseconds to wait. If the timeout is -1, it specifies that
847 * we should never wait and always return immediately even if no
848 * event is detected. If the value is zero, the behavior is default.
849 */
d62a17ae 850 int timeout = -1;
851
852 /* number of file descriptors with events */
853 int num;
854
855 if (timer_wait != NULL
856 && m->selectpoll_timeout == 0) // use the default value
857 timeout = (timer_wait->tv_sec * 1000)
858 + (timer_wait->tv_usec / 1000);
859 else if (m->selectpoll_timeout > 0) // use the user's timeout
860 timeout = m->selectpoll_timeout;
861 else if (m->selectpoll_timeout
862 < 0) // effect a poll (return immediately)
863 timeout = 0;
864
0bdeb5e5 865 zlog_tls_buffer_flush();
3e41733f
DL
866 rcu_read_unlock();
867 rcu_assert_read_unlocked();
868
d62a17ae 869 /* add poll pipe poker */
d81ca9a3
MS
870 assert(count + 1 < m->handler.pfdsize);
871 m->handler.copy[count].fd = m->io_pipe[0];
872 m->handler.copy[count].events = POLLIN;
873 m->handler.copy[count].revents = 0x00;
874
875 /* We need to deal with a signal-handling race here: we
876 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
877 * that may arrive just before we enter poll(). We will block the
878 * key signals, then check whether any have arrived - if so, we return
879 * before calling poll(). If not, we'll re-enable the signals
880 * in the ppoll() call.
881 */
882
883 sigemptyset(&origsigs);
884 if (m->handle_signals) {
885 /* Main pthread that handles the app signals */
886 if (frr_sigevent_check(&origsigs)) {
887 /* Signal to process - restore signal mask and return */
888 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
889 num = -1;
890 *eintr_p = true;
891 goto done;
892 }
893 } else {
894 /* Don't make any changes for the non-main pthreads */
895 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
896 }
d62a17ae 897
d81ca9a3
MS
898#if defined(HAVE_PPOLL)
899 struct timespec ts, *tsp;
900
901 if (timeout >= 0) {
902 ts.tv_sec = timeout / 1000;
903 ts.tv_nsec = (timeout % 1000) * 1000000;
904 tsp = &ts;
905 } else
906 tsp = NULL;
907
908 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
909 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
910#else
911 /* Not ideal - there is a race after we restore the signal mask */
912 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
913 num = poll(m->handler.copy, count + 1, timeout);
914#endif
d62a17ae 915
d81ca9a3
MS
916done:
917
918 if (num < 0 && errno == EINTR)
919 *eintr_p = true;
920
921 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 922 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
923 ;
924
3e41733f
DL
925 rcu_read_lock();
926
d62a17ae 927 return num;
209a72a6
DS
928}
929
718e3744 930/* Add new read thread. */
ee1455dd
IR
931void _thread_add_read_write(const struct xref_threadsched *xref,
932 struct thread_master *m,
cc9f21da 933 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 934 struct thread **t_ptr)
718e3744 935{
60a3efec 936 int dir = xref->thread_type;
d62a17ae 937 struct thread *thread = NULL;
1ef14bee 938 struct thread **thread_array;
d62a17ae 939
abf96a87 940 if (dir == THREAD_READ)
6c3aa850
DL
941 frrtrace(9, frr_libfrr, schedule_read, m,
942 xref->funcname, xref->xref.file, xref->xref.line,
943 t_ptr, fd, 0, arg, 0);
abf96a87 944 else
6c3aa850
DL
945 frrtrace(9, frr_libfrr, schedule_write, m,
946 xref->funcname, xref->xref.file, xref->xref.line,
947 t_ptr, fd, 0, arg, 0);
abf96a87 948
188acbb9
DS
949 assert(fd >= 0);
950 if (fd >= m->fd_limit)
951 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
952
00dffa8c
DL
953 frr_with_mutex(&m->mtx) {
954 if (t_ptr && *t_ptr)
955 // thread is already scheduled; don't reschedule
956 break;
d62a17ae 957
958 /* default to a new pollfd */
959 nfds_t queuepos = m->handler.pfdcount;
960
1ef14bee
DS
961 if (dir == THREAD_READ)
962 thread_array = m->read;
963 else
964 thread_array = m->write;
965
d62a17ae 966 /* if we already have a pollfd for our file descriptor, find and
967 * use it */
968 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
969 if (m->handler.pfds[i].fd == fd) {
970 queuepos = i;
1ef14bee
DS
971
972#ifdef DEV_BUILD
973 /*
974 * What happens if we have a thread already
975 * created for this event?
976 */
977 if (thread_array[fd])
978 assert(!"Thread already scheduled for file descriptor");
979#endif
d62a17ae 980 break;
981 }
982
983 /* make sure we have room for this fd + pipe poker fd */
984 assert(queuepos + 1 < m->handler.pfdsize);
985
60a3efec 986 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 987
988 m->handler.pfds[queuepos].fd = fd;
989 m->handler.pfds[queuepos].events |=
990 (dir == THREAD_READ ? POLLIN : POLLOUT);
991
992 if (queuepos == m->handler.pfdcount)
993 m->handler.pfdcount++;
994
995 if (thread) {
00dffa8c 996 frr_with_mutex(&thread->mtx) {
d62a17ae 997 thread->u.fd = fd;
1ef14bee 998 thread_array[thread->u.fd] = thread;
d62a17ae 999 }
d62a17ae 1000
1001 if (t_ptr) {
1002 *t_ptr = thread;
1003 thread->ref = t_ptr;
1004 }
1005 }
1006
1007 AWAKEN(m);
1008 }
718e3744 1009}
1010
ee1455dd
IR
1011static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1012 struct thread_master *m,
cc9f21da 1013 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1014 struct timeval *time_relative,
1015 struct thread **t_ptr)
718e3744 1016{
d62a17ae 1017 struct thread *thread;
96fe578a 1018 struct timeval t;
d62a17ae 1019
1020 assert(m != NULL);
1021
d62a17ae 1022 assert(time_relative);
1023
6c3aa850
DL
1024 frrtrace(9, frr_libfrr, schedule_timer, m,
1025 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1026 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1027
96fe578a
MS
1028 /* Compute expiration/deadline time. */
1029 monotime(&t);
1030 timeradd(&t, time_relative, &t);
1031
00dffa8c
DL
1032 frr_with_mutex(&m->mtx) {
1033 if (t_ptr && *t_ptr)
d279ef57 1034 /* thread is already scheduled; don't reschedule */
ee1455dd 1035 return;
d62a17ae 1036
4322dea7 1037 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1038
00dffa8c 1039 frr_with_mutex(&thread->mtx) {
96fe578a 1040 thread->u.sands = t;
27d29ced 1041 thread_timer_list_add(&m->timer, thread);
d62a17ae 1042 if (t_ptr) {
1043 *t_ptr = thread;
1044 thread->ref = t_ptr;
1045 }
1046 }
d62a17ae 1047
96fe578a
MS
1048 /* The timer list is sorted - if this new timer
1049 * might change the time we'll wait for, give the pthread
1050 * a chance to re-compute.
1051 */
1052 if (thread_timer_list_first(&m->timer) == thread)
1053 AWAKEN(m);
d62a17ae 1054 }
9e867fe6 1055}
1056
98c91ac6 1057
1058/* Add timer event thread. */
ee1455dd 1059void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1060 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1061 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1062{
d62a17ae 1063 struct timeval trel;
9e867fe6 1064
d62a17ae 1065 assert(m != NULL);
9e867fe6 1066
d62a17ae 1067 trel.tv_sec = timer;
1068 trel.tv_usec = 0;
9e867fe6 1069
ee1455dd 1070 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1071}
9e867fe6 1072
98c91ac6 1073/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1074void _thread_add_timer_msec(const struct xref_threadsched *xref,
1075 struct thread_master *m,
cc9f21da
DS
1076 void (*func)(struct thread *), void *arg,
1077 long timer, struct thread **t_ptr)
98c91ac6 1078{
d62a17ae 1079 struct timeval trel;
9e867fe6 1080
d62a17ae 1081 assert(m != NULL);
718e3744 1082
d62a17ae 1083 trel.tv_sec = timer / 1000;
1084 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1085
ee1455dd 1086 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1087}
1088
4322dea7 1089/* Add timer event thread with "timeval" resolution */
ee1455dd 1090void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1091 struct thread_master *m,
1092 void (*func)(struct thread *), void *arg,
1093 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1094{
ee1455dd 1095 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1096}
1097
718e3744 1098/* Add simple event thread. */
ee1455dd 1099void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1100 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1101 void *arg, int val, struct thread **t_ptr)
718e3744 1102{
00dffa8c 1103 struct thread *thread = NULL;
d62a17ae 1104
6c3aa850
DL
1105 frrtrace(9, frr_libfrr, schedule_event, m,
1106 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1107 t_ptr, 0, val, arg, 0);
abf96a87 1108
d62a17ae 1109 assert(m != NULL);
1110
00dffa8c
DL
1111 frr_with_mutex(&m->mtx) {
1112 if (t_ptr && *t_ptr)
d279ef57 1113 /* thread is already scheduled; don't reschedule */
00dffa8c 1114 break;
d62a17ae 1115
60a3efec 1116 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
00dffa8c 1117 frr_with_mutex(&thread->mtx) {
d62a17ae 1118 thread->u.val = val;
c284542b 1119 thread_list_add_tail(&m->event, thread);
d62a17ae 1120 }
d62a17ae 1121
1122 if (t_ptr) {
1123 *t_ptr = thread;
1124 thread->ref = t_ptr;
1125 }
1126
1127 AWAKEN(m);
1128 }
718e3744 1129}
1130
63ccb9cb
QY
1131/* Thread cancellation ------------------------------------------------------ */
1132
8797240e
QY
1133/**
1134 * NOT's out the .events field of pollfd corresponding to the given file
1135 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1136 *
1137 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1138 * implementation for details.
1139 *
1140 * @param master
1141 * @param fd
1142 * @param state the event to cancel. One or more (OR'd together) of the
1143 * following:
1144 * - POLLIN
1145 * - POLLOUT
1146 */
a9318a32
MS
1147static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1148 int idx_hint)
0a95a0d0 1149{
42d74538
QY
1150 bool found = false;
1151
d62a17ae 1152 /* find the index of corresponding pollfd */
1153 nfds_t i;
1154
a9318a32
MS
1155 /* Cancel POLLHUP too just in case some bozo set it */
1156 state |= POLLHUP;
1157
1158 /* Some callers know the index of the pfd already */
1159 if (idx_hint >= 0) {
1160 i = idx_hint;
1161 found = true;
1162 } else {
1163 /* Have to look for the fd in the pfd array */
1164 for (i = 0; i < master->handler.pfdcount; i++)
1165 if (master->handler.pfds[i].fd == fd) {
1166 found = true;
1167 break;
1168 }
1169 }
42d74538
QY
1170
1171 if (!found) {
1172 zlog_debug(
1173 "[!] Received cancellation request for nonexistent rw job");
1174 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1175 master->name ? master->name : "", fd);
42d74538
QY
1176 return;
1177 }
d62a17ae 1178
1179 /* NOT out event. */
1180 master->handler.pfds[i].events &= ~(state);
1181
1182 /* If all events are canceled, delete / resize the pollfd array. */
1183 if (master->handler.pfds[i].events == 0) {
1184 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1185 (master->handler.pfdcount - i - 1)
1186 * sizeof(struct pollfd));
1187 master->handler.pfdcount--;
e985cda0
S
1188 master->handler.pfds[master->handler.pfdcount].fd = 0;
1189 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1190 }
1191
1192 /* If we have the same pollfd in the copy, perform the same operations,
1193 * otherwise return. */
1194 if (i >= master->handler.copycount)
1195 return;
1196
1197 master->handler.copy[i].events &= ~(state);
1198
1199 if (master->handler.copy[i].events == 0) {
1200 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1201 (master->handler.copycount - i - 1)
1202 * sizeof(struct pollfd));
1203 master->handler.copycount--;
e985cda0
S
1204 master->handler.copy[master->handler.copycount].fd = 0;
1205 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1206 }
0a95a0d0
DS
1207}
1208
a9318a32
MS
1209/*
1210 * Process task cancellation given a task argument: iterate through the
1211 * various lists of tasks, looking for any that match the argument.
1212 */
1213static void cancel_arg_helper(struct thread_master *master,
1214 const struct cancel_req *cr)
1215{
1216 struct thread *t;
1217 nfds_t i;
1218 int fd;
1219 struct pollfd *pfd;
1220
1221 /* We're only processing arg-based cancellations here. */
1222 if (cr->eventobj == NULL)
1223 return;
1224
1225 /* First process the ready lists. */
1226 frr_each_safe(thread_list, &master->event, t) {
1227 if (t->arg != cr->eventobj)
1228 continue;
1229 thread_list_del(&master->event, t);
1230 if (t->ref)
1231 *t->ref = NULL;
1232 thread_add_unuse(master, t);
1233 }
1234
1235 frr_each_safe(thread_list, &master->ready, t) {
1236 if (t->arg != cr->eventobj)
1237 continue;
1238 thread_list_del(&master->ready, t);
1239 if (t->ref)
1240 *t->ref = NULL;
1241 thread_add_unuse(master, t);
1242 }
1243
1244 /* If requested, stop here and ignore io and timers */
1245 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1246 return;
1247
1248 /* Check the io tasks */
1249 for (i = 0; i < master->handler.pfdcount;) {
1250 pfd = master->handler.pfds + i;
1251
1252 if (pfd->events & POLLIN)
1253 t = master->read[pfd->fd];
1254 else
1255 t = master->write[pfd->fd];
1256
1257 if (t && t->arg == cr->eventobj) {
1258 fd = pfd->fd;
1259
1260 /* Found a match to cancel: clean up fd arrays */
1261 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1262
1263 /* Clean up thread arrays */
1264 master->read[fd] = NULL;
1265 master->write[fd] = NULL;
1266
1267 /* Clear caller's ref */
1268 if (t->ref)
1269 *t->ref = NULL;
1270
1271 thread_add_unuse(master, t);
1272
1273 /* Don't increment 'i' since the cancellation will have
1274 * removed the entry from the pfd array
1275 */
1276 } else
1277 i++;
1278 }
1279
1280 /* Check the timer tasks */
1281 t = thread_timer_list_first(&master->timer);
1282 while (t) {
1283 struct thread *t_next;
1284
1285 t_next = thread_timer_list_next(&master->timer, t);
1286
1287 if (t->arg == cr->eventobj) {
1288 thread_timer_list_del(&master->timer, t);
1289 if (t->ref)
1290 *t->ref = NULL;
1291 thread_add_unuse(master, t);
1292 }
1293
1294 t = t_next;
1295 }
1296}
1297
1189d95f 1298/**
63ccb9cb 1299 * Process cancellation requests.
1189d95f 1300 *
63ccb9cb
QY
1301 * This may only be run from the pthread which owns the thread_master.
1302 *
1303 * @param master the thread master to process
1304 * @REQUIRE master->mtx
1189d95f 1305 */
d62a17ae 1306static void do_thread_cancel(struct thread_master *master)
718e3744 1307{
c284542b 1308 struct thread_list_head *list = NULL;
d62a17ae 1309 struct thread **thread_array = NULL;
1310 struct thread *thread;
1311
1312 struct cancel_req *cr;
1313 struct listnode *ln;
1314 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1315 /*
a9318a32
MS
1316 * If this is an event object cancellation, search
1317 * through task lists deleting any tasks which have the
1318 * specified argument - use this handy helper function.
d279ef57 1319 */
d62a17ae 1320 if (cr->eventobj) {
a9318a32 1321 cancel_arg_helper(master, cr);
d62a17ae 1322 continue;
1323 }
1324
d279ef57
DS
1325 /*
1326 * The pointer varies depending on whether the cancellation
1327 * request was made asynchronously or not. If it was, we
1328 * need to check whether the thread even exists anymore
1329 * before cancelling it.
1330 */
d62a17ae 1331 thread = (cr->thread) ? cr->thread : *cr->threadref;
1332
1333 if (!thread)
1334 continue;
1335
1336 /* Determine the appropriate queue to cancel the thread from */
1337 switch (thread->type) {
1338 case THREAD_READ:
a9318a32 1339 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1340 thread_array = master->read;
1341 break;
1342 case THREAD_WRITE:
a9318a32 1343 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1344 thread_array = master->write;
1345 break;
1346 case THREAD_TIMER:
27d29ced 1347 thread_timer_list_del(&master->timer, thread);
d62a17ae 1348 break;
1349 case THREAD_EVENT:
1350 list = &master->event;
1351 break;
1352 case THREAD_READY:
1353 list = &master->ready;
1354 break;
1355 default:
1356 continue;
1357 break;
1358 }
1359
27d29ced 1360 if (list) {
c284542b 1361 thread_list_del(list, thread);
d62a17ae 1362 } else if (thread_array) {
1363 thread_array[thread->u.fd] = NULL;
d62a17ae 1364 }
1365
1366 if (thread->ref)
1367 *thread->ref = NULL;
1368
1369 thread_add_unuse(thread->master, thread);
1370 }
1371
1372 /* Delete and free all cancellation requests */
41b21bfa
MS
1373 if (master->cancel_req)
1374 list_delete_all_node(master->cancel_req);
d62a17ae 1375
1376 /* Wake up any threads which may be blocked in thread_cancel_async() */
1377 master->canceled = true;
1378 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1379}
1380
a9318a32
MS
1381/*
1382 * Helper function used for multiple flavors of arg-based cancellation.
1383 */
1384static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1385{
1386 struct cancel_req *cr;
1387
1388 assert(m->owner == pthread_self());
1389
1390 /* Only worth anything if caller supplies an arg. */
1391 if (arg == NULL)
1392 return;
1393
1394 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1395
1396 cr->flags = flags;
1397
1398 frr_with_mutex(&m->mtx) {
1399 cr->eventobj = arg;
1400 listnode_add(m->cancel_req, cr);
1401 do_thread_cancel(m);
1402 }
1403}
1404
63ccb9cb
QY
1405/**
1406 * Cancel any events which have the specified argument.
1407 *
1408 * MT-Unsafe
1409 *
1410 * @param m the thread_master to cancel from
1411 * @param arg the argument passed when creating the event
1412 */
d62a17ae 1413void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1414{
a9318a32
MS
1415 cancel_event_helper(master, arg, 0);
1416}
d62a17ae 1417
a9318a32
MS
1418/*
1419 * Cancel ready tasks with an arg matching 'arg'
1420 *
1421 * MT-Unsafe
1422 *
1423 * @param m the thread_master to cancel from
1424 * @param arg the argument passed when creating the event
1425 */
1426void thread_cancel_event_ready(struct thread_master *m, void *arg)
1427{
1428
1429 /* Only cancel ready/event tasks */
1430 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1431}
1189d95f 1432
63ccb9cb
QY
1433/**
1434 * Cancel a specific task.
1435 *
1436 * MT-Unsafe
1437 *
1438 * @param thread task to cancel
1439 */
b3d6bc6e 1440void thread_cancel(struct thread **thread)
63ccb9cb 1441{
b3d6bc6e
MS
1442 struct thread_master *master;
1443
1444 if (thread == NULL || *thread == NULL)
1445 return;
1446
1447 master = (*thread)->master;
d62a17ae 1448
6c3aa850
DL
1449 frrtrace(9, frr_libfrr, thread_cancel, master,
1450 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1451 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1452 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1453
6ed04aa2
DS
1454 assert(master->owner == pthread_self());
1455
00dffa8c 1456 frr_with_mutex(&master->mtx) {
d62a17ae 1457 struct cancel_req *cr =
1458 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1459 cr->thread = *thread;
6ed04aa2
DS
1460 listnode_add(master->cancel_req, cr);
1461 do_thread_cancel(master);
d62a17ae 1462 }
b3d6bc6e
MS
1463
1464 *thread = NULL;
63ccb9cb 1465}
1189d95f 1466
63ccb9cb
QY
1467/**
1468 * Asynchronous cancellation.
1469 *
8797240e
QY
1470 * Called with either a struct thread ** or void * to an event argument,
1471 * this function posts the correct cancellation request and blocks until it is
1472 * serviced.
63ccb9cb
QY
1473 *
1474 * If the thread is currently running, execution blocks until it completes.
1475 *
8797240e
QY
1476 * The last two parameters are mutually exclusive, i.e. if you pass one the
1477 * other must be NULL.
1478 *
1479 * When the cancellation procedure executes on the target thread_master, the
1480 * thread * provided is checked for nullity. If it is null, the thread is
1481 * assumed to no longer exist and the cancellation request is a no-op. Thus
1482 * users of this API must pass a back-reference when scheduling the original
1483 * task.
1484 *
63ccb9cb
QY
1485 * MT-Safe
1486 *
8797240e
QY
1487 * @param master the thread master with the relevant event / task
1488 * @param thread pointer to thread to cancel
1489 * @param eventobj the event
63ccb9cb 1490 */
d62a17ae 1491void thread_cancel_async(struct thread_master *master, struct thread **thread,
1492 void *eventobj)
63ccb9cb 1493{
d62a17ae 1494 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1495
1496 if (thread && *thread)
c7bb4f00 1497 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1498 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1499 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1500 (*thread)->u.val, (*thread)->arg,
1501 (*thread)->u.sands.tv_sec);
abf96a87 1502 else
c7bb4f00
QY
1503 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1504 0, NULL, 0, 0, eventobj, 0);
abf96a87 1505
d62a17ae 1506 assert(master->owner != pthread_self());
1507
00dffa8c 1508 frr_with_mutex(&master->mtx) {
d62a17ae 1509 master->canceled = false;
1510
1511 if (thread) {
1512 struct cancel_req *cr =
1513 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1514 cr->threadref = thread;
1515 listnode_add(master->cancel_req, cr);
1516 } else if (eventobj) {
1517 struct cancel_req *cr =
1518 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1519 cr->eventobj = eventobj;
1520 listnode_add(master->cancel_req, cr);
1521 }
1522 AWAKEN(master);
1523
1524 while (!master->canceled)
1525 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1526 }
50478845
MS
1527
1528 if (thread)
1529 *thread = NULL;
718e3744 1530}
63ccb9cb 1531/* ------------------------------------------------------------------------- */
718e3744 1532
27d29ced 1533static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1534 struct timeval *timer_val)
718e3744 1535{
27d29ced
DL
1536 if (!thread_timer_list_count(timers))
1537 return NULL;
1538
1539 struct thread *next_timer = thread_timer_list_first(timers);
1540 monotime_until(&next_timer->u.sands, timer_val);
1541 return timer_val;
718e3744 1542}
718e3744 1543
d62a17ae 1544static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1545 struct thread *fetch)
718e3744 1546{
d62a17ae 1547 *fetch = *thread;
1548 thread_add_unuse(m, thread);
1549 return fetch;
718e3744 1550}
1551
d62a17ae 1552static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1553 struct thread *thread, short state,
1554 short actual_state, int pos)
5d4ccd4e 1555{
d62a17ae 1556 struct thread **thread_array;
1557
45f3d590
DS
1558 /*
1559 * poll() clears the .events field, but the pollfd array we
1560 * pass to poll() is a copy of the one used to schedule threads.
1561 * We need to synchronize state between the two here by applying
1562 * the same changes poll() made on the copy of the "real" pollfd
1563 * array.
1564 *
1565 * This cleans up a possible infinite loop where we refuse
1566 * to respond to a poll event but poll is insistent that
1567 * we should.
1568 */
1569 m->handler.pfds[pos].events &= ~(state);
1570
1571 if (!thread) {
1572 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1573 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1574 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1575 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1576 return 0;
45f3d590 1577 }
d62a17ae 1578
1579 if (thread->type == THREAD_READ)
1580 thread_array = m->read;
1581 else
1582 thread_array = m->write;
1583
1584 thread_array[thread->u.fd] = NULL;
c284542b 1585 thread_list_add_tail(&m->ready, thread);
d62a17ae 1586 thread->type = THREAD_READY;
45f3d590 1587
d62a17ae 1588 return 1;
5d4ccd4e
DS
1589}
1590
8797240e
QY
1591/**
1592 * Process I/O events.
1593 *
1594 * Walks through file descriptor array looking for those pollfds whose .revents
1595 * field has something interesting. Deletes any invalid file descriptors.
1596 *
1597 * @param m the thread master
1598 * @param num the number of active file descriptors (return value of poll())
1599 */
d62a17ae 1600static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1601{
d62a17ae 1602 unsigned int ready = 0;
1603 struct pollfd *pfds = m->handler.copy;
1604
1605 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1606 /* no event for current fd? immediately continue */
1607 if (pfds[i].revents == 0)
1608 continue;
1609
1610 ready++;
1611
d279ef57
DS
1612 /*
1613 * Unless someone has called thread_cancel from another
1614 * pthread, the only thing that could have changed in
1615 * m->handler.pfds while we were asleep is the .events
1616 * field in a given pollfd. Barring thread_cancel() that
1617 * value should be a superset of the values we have in our
1618 * copy, so there's no need to update it. Similarily,
1619 * barring deletion, the fd should still be a valid index
1620 * into the master's pfds.
d142453d
DS
1621 *
1622 * We are including POLLERR here to do a READ event
1623 * this is because the read should fail and the
1624 * read function should handle it appropriately
d279ef57 1625 */
d142453d 1626 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1627 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1628 pfds[i].revents, i);
1629 }
d62a17ae 1630 if (pfds[i].revents & POLLOUT)
1631 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1632 POLLOUT, pfds[i].revents, i);
d62a17ae 1633
1634 /* if one of our file descriptors is garbage, remove the same
1635 * from
1636 * both pfds + update sizes and index */
1637 if (pfds[i].revents & POLLNVAL) {
1638 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1639 (m->handler.pfdcount - i - 1)
1640 * sizeof(struct pollfd));
1641 m->handler.pfdcount--;
e985cda0
S
1642 m->handler.pfds[m->handler.pfdcount].fd = 0;
1643 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1644
1645 memmove(pfds + i, pfds + i + 1,
1646 (m->handler.copycount - i - 1)
1647 * sizeof(struct pollfd));
1648 m->handler.copycount--;
e985cda0
S
1649 m->handler.copy[m->handler.copycount].fd = 0;
1650 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1651
1652 i--;
1653 }
1654 }
718e3744 1655}
1656
8b70d0b0 1657/* Add all timers that have popped to the ready list. */
e7d9e44b 1658static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1659 struct timeval *timenow)
a48b4e6d 1660{
ab01a001
DS
1661 struct timeval prev = *timenow;
1662 bool displayed = false;
d62a17ae 1663 struct thread *thread;
1664 unsigned int ready = 0;
1665
e7d9e44b 1666 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1667 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1668 break;
ab01a001
DS
1669 prev = thread->u.sands;
1670 prev.tv_sec += 4;
1671 /*
1672 * If the timer would have popped 4 seconds in the
1673 * past then we are in a situation where we are
1674 * really getting behind on handling of events.
1675 * Let's log it and do the right thing with it.
1676 */
1dd08c22
DS
1677 if (timercmp(timenow, &prev, >)) {
1678 atomic_fetch_add_explicit(
1679 &thread->hist->total_starv_warn, 1,
1680 memory_order_seq_cst);
1681 if (!displayed && !thread->ignore_timer_late) {
1682 flog_warn(
1683 EC_LIB_STARVE_THREAD,
1684 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1685 thread);
1686 displayed = true;
1687 }
ab01a001
DS
1688 }
1689
e7d9e44b 1690 thread_timer_list_pop(&m->timer);
d62a17ae 1691 thread->type = THREAD_READY;
e7d9e44b 1692 thread_list_add_tail(&m->ready, thread);
d62a17ae 1693 ready++;
1694 }
e7d9e44b 1695
d62a17ae 1696 return ready;
a48b4e6d 1697}
1698
2613abe6 1699/* process a list en masse, e.g. for event thread lists */
c284542b 1700static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1701{
d62a17ae 1702 struct thread *thread;
d62a17ae 1703 unsigned int ready = 0;
1704
c284542b 1705 while ((thread = thread_list_pop(list))) {
d62a17ae 1706 thread->type = THREAD_READY;
c284542b 1707 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1708 ready++;
1709 }
1710 return ready;
2613abe6
PJ
1711}
1712
1713
718e3744 1714/* Fetch next ready thread. */
d62a17ae 1715struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1716{
d62a17ae 1717 struct thread *thread = NULL;
1718 struct timeval now;
1719 struct timeval zerotime = {0, 0};
1720 struct timeval tv;
1721 struct timeval *tw = NULL;
d81ca9a3 1722 bool eintr_p = false;
d62a17ae 1723 int num = 0;
1724
1725 do {
1726 /* Handle signals if any */
1727 if (m->handle_signals)
7cc91e67 1728 frr_sigevent_process();
d62a17ae 1729
1730 pthread_mutex_lock(&m->mtx);
1731
1732 /* Process any pending cancellation requests */
1733 do_thread_cancel(m);
1734
e3c9529e
QY
1735 /*
1736 * Attempt to flush ready queue before going into poll().
1737 * This is performance-critical. Think twice before modifying.
1738 */
c284542b 1739 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1740 fetch = thread_run(m, thread, fetch);
1741 if (fetch->ref)
1742 *fetch->ref = NULL;
1743 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1744 if (!m->ready_run_loop)
1745 GETRUSAGE(&m->last_getrusage);
1746 m->ready_run_loop = true;
e3c9529e
QY
1747 break;
1748 }
1749
5e822957 1750 m->ready_run_loop = false;
e3c9529e
QY
1751 /* otherwise, tick through scheduling sequence */
1752
bca37d17
QY
1753 /*
1754 * Post events to ready queue. This must come before the
1755 * following block since events should occur immediately
1756 */
d62a17ae 1757 thread_process(&m->event);
1758
bca37d17
QY
1759 /*
1760 * If there are no tasks on the ready queue, we will poll()
1761 * until a timer expires or we receive I/O, whichever comes
1762 * first. The strategy for doing this is:
d62a17ae 1763 *
1764 * - If there are events pending, set the poll() timeout to zero
1765 * - If there are no events pending, but there are timers
d279ef57
DS
1766 * pending, set the timeout to the smallest remaining time on
1767 * any timer.
d62a17ae 1768 * - If there are neither timers nor events pending, but there
d279ef57 1769 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1770 * - If nothing is pending, it's time for the application to die
1771 *
1772 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1773 * once per loop to avoid starvation by events
1774 */
c284542b 1775 if (!thread_list_count(&m->ready))
27d29ced 1776 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1777
c284542b
DL
1778 if (thread_list_count(&m->ready) ||
1779 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1780 tw = &zerotime;
1781
1782 if (!tw && m->handler.pfdcount == 0) { /* die */
1783 pthread_mutex_unlock(&m->mtx);
1784 fetch = NULL;
1785 break;
1786 }
1787
bca37d17
QY
1788 /*
1789 * Copy pollfd array + # active pollfds in it. Not necessary to
1790 * copy the array size as this is fixed.
1791 */
d62a17ae 1792 m->handler.copycount = m->handler.pfdcount;
1793 memcpy(m->handler.copy, m->handler.pfds,
1794 m->handler.copycount * sizeof(struct pollfd));
1795
e3c9529e
QY
1796 pthread_mutex_unlock(&m->mtx);
1797 {
d81ca9a3
MS
1798 eintr_p = false;
1799 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1800 }
1801 pthread_mutex_lock(&m->mtx);
d764d2cc 1802
e3c9529e
QY
1803 /* Handle any errors received in poll() */
1804 if (num < 0) {
d81ca9a3 1805 if (eintr_p) {
d62a17ae 1806 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1807 /* loop around to signal handler */
1808 continue;
d62a17ae 1809 }
1810
e3c9529e 1811 /* else die */
450971aa 1812 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1813 safe_strerror(errno));
e3c9529e
QY
1814 pthread_mutex_unlock(&m->mtx);
1815 fetch = NULL;
1816 break;
bca37d17 1817 }
d62a17ae 1818
1819 /* Post timers to ready queue. */
1820 monotime(&now);
e7d9e44b 1821 thread_process_timers(m, &now);
d62a17ae 1822
1823 /* Post I/O to ready queue. */
1824 if (num > 0)
1825 thread_process_io(m, num);
1826
d62a17ae 1827 pthread_mutex_unlock(&m->mtx);
1828
1829 } while (!thread && m->spin);
1830
1831 return fetch;
718e3744 1832}
1833
d62a17ae 1834static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1835{
d62a17ae 1836 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1837 + (a.tv_usec - b.tv_usec));
62f44022
QY
1838}
1839
d62a17ae 1840unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1841 unsigned long *cputime)
718e3744 1842{
6418e2d3
DL
1843#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1844 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1845 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1846#else
d62a17ae 1847 /* This is 'user + sys' time. */
1848 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1849 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1850#endif
d62a17ae 1851 return timeval_elapsed(now->real, start->real);
8b70d0b0 1852}
1853
50596be0
DS
1854/* We should aim to yield after yield milliseconds, which defaults
1855 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1856 Note: we are using real (wall clock) time for this calculation.
1857 It could be argued that CPU time may make more sense in certain
1858 contexts. The things to consider are whether the thread may have
1859 blocked (in which case wall time increases, but CPU time does not),
1860 or whether the system is heavily loaded with other processes competing
d62a17ae 1861 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1862 Plus it has the added benefit that gettimeofday should be faster
1863 than calling getrusage. */
d62a17ae 1864int thread_should_yield(struct thread *thread)
718e3744 1865{
d62a17ae 1866 int result;
00dffa8c 1867 frr_with_mutex(&thread->mtx) {
d62a17ae 1868 result = monotime_since(&thread->real, NULL)
1869 > (int64_t)thread->yield;
1870 }
d62a17ae 1871 return result;
50596be0
DS
1872}
1873
d62a17ae 1874void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1875{
00dffa8c 1876 frr_with_mutex(&thread->mtx) {
d62a17ae 1877 thread->yield = yield_time;
1878 }
718e3744 1879}
1880
d62a17ae 1881void thread_getrusage(RUSAGE_T *r)
db9c0df9 1882{
6418e2d3
DL
1883 monotime(&r->real);
1884 if (!cputime_enabled) {
1885 memset(&r->cpu, 0, sizeof(r->cpu));
1886 return;
1887 }
1888
1889#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1890 /* not currently implemented in Linux's vDSO, but maybe at some point
1891 * in the future?
1892 */
1893 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1894#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1895#if defined RUSAGE_THREAD
1896#define FRR_RUSAGE RUSAGE_THREAD
1897#else
1898#define FRR_RUSAGE RUSAGE_SELF
1899#endif
6418e2d3
DL
1900 getrusage(FRR_RUSAGE, &(r->cpu));
1901#endif
db9c0df9
PJ
1902}
1903
fbcac826
QY
1904/*
1905 * Call a thread.
1906 *
1907 * This function will atomically update the thread's usage history. At present
1908 * this is the only spot where usage history is written. Nevertheless the code
1909 * has been written such that the introduction of writers in the future should
1910 * not need to update it provided the writers atomically perform only the
1911 * operations done here, i.e. updating the total and maximum times. In
1912 * particular, the maximum real and cpu times must be monotonically increasing
1913 * or this code is not correct.
1914 */
d62a17ae 1915void thread_call(struct thread *thread)
718e3744 1916{
d62a17ae 1917 RUSAGE_T before, after;
cc8b13a0 1918
45f01188
DL
1919 /* if the thread being called is the CLI, it may change cputime_enabled
1920 * ("service cputime-stats" command), which can result in nonsensical
1921 * and very confusing warnings
1922 */
1923 bool cputime_enabled_here = cputime_enabled;
1924
5e822957
DS
1925 if (thread->master->ready_run_loop)
1926 before = thread->master->last_getrusage;
1927 else
1928 GETRUSAGE(&before);
1929
d62a17ae 1930 thread->real = before.real;
718e3744 1931
6c3aa850
DL
1932 frrtrace(9, frr_libfrr, thread_call, thread->master,
1933 thread->xref->funcname, thread->xref->xref.file,
1934 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1935 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 1936
d62a17ae 1937 pthread_setspecific(thread_current, thread);
1938 (*thread->func)(thread);
1939 pthread_setspecific(thread_current, NULL);
718e3744 1940
d62a17ae 1941 GETRUSAGE(&after);
5e822957 1942 thread->master->last_getrusage = after;
718e3744 1943
45f01188
DL
1944 unsigned long walltime, cputime;
1945 unsigned long exp;
fbcac826 1946
45f01188
DL
1947 walltime = thread_consumed_time(&after, &before, &cputime);
1948
1949 /* update walltime */
1950 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
1951 memory_order_seq_cst);
1952 exp = atomic_load_explicit(&thread->hist->real.max,
1953 memory_order_seq_cst);
45f01188 1954 while (exp < walltime
fbcac826 1955 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
1956 &thread->hist->real.max, &exp, walltime,
1957 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
1958 ;
1959
45f01188
DL
1960 if (cputime_enabled_here && cputime_enabled) {
1961 /* update cputime */
1962 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
1963 memory_order_seq_cst);
1964 exp = atomic_load_explicit(&thread->hist->cpu.max,
1965 memory_order_seq_cst);
1966 while (exp < cputime
1967 && !atomic_compare_exchange_weak_explicit(
1968 &thread->hist->cpu.max, &exp, cputime,
1969 memory_order_seq_cst, memory_order_seq_cst))
1970 ;
1971 }
fbcac826
QY
1972
1973 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
1974 memory_order_seq_cst);
1975 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
1976 memory_order_seq_cst);
718e3744 1977
45f01188
DL
1978 if (cputime_enabled_here && cputime_enabled && cputime_threshold
1979 && cputime > cputime_threshold) {
d62a17ae 1980 /*
45f01188
DL
1981 * We have a CPU Hog on our hands. The time FRR has spent
1982 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 1983 * Whinge about it now, so we're aware this is yet another task
1984 * to fix.
1985 */
9b8e01ca
DS
1986 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
1987 1, memory_order_seq_cst);
9ef9495e 1988 flog_warn(
039d547f
DS
1989 EC_LIB_SLOW_THREAD_CPU,
1990 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1991 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
1992 walltime / 1000, cputime / 1000);
1993
1994 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 1995 /*
45f01188
DL
1996 * The runtime for a task is greater than 5 seconds, but the
1997 * cpu time is under 5 seconds. Let's whine about this because
1998 * this could imply some sort of scheduling issue.
039d547f 1999 */
9b8e01ca
DS
2000 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2001 1, memory_order_seq_cst);
039d547f
DS
2002 flog_warn(
2003 EC_LIB_SLOW_THREAD_WALL,
2004 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2005 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2006 walltime / 1000, cputime / 1000);
d62a17ae 2007 }
718e3744 2008}
2009
2010/* Execute thread */
60a3efec 2011void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2012 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2013 void *arg, int val)
718e3744 2014{
c4345fbf 2015 struct thread *thread;
718e3744 2016
c4345fbf 2017 /* Get or allocate new thread to execute. */
00dffa8c 2018 frr_with_mutex(&m->mtx) {
60a3efec 2019 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2020
c4345fbf 2021 /* Set its event value. */
00dffa8c 2022 frr_with_mutex(&thread->mtx) {
c4345fbf
RZ
2023 thread->add_type = THREAD_EXECUTE;
2024 thread->u.val = val;
2025 thread->ref = &thread;
2026 }
c4345fbf 2027 }
f7c62e11 2028
c4345fbf
RZ
2029 /* Execute thread doing all accounting. */
2030 thread_call(thread);
9c7753e4 2031
c4345fbf
RZ
2032 /* Give back or free thread. */
2033 thread_add_unuse(m, thread);
718e3744 2034}
1543c387
MS
2035
2036/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2037void debug_signals(const sigset_t *sigs)
2038{
2039 int i, found;
2040 sigset_t tmpsigs;
2041 char buf[300];
2042
2043 /*
2044 * We're only looking at the non-realtime signals here, so we need
2045 * some limit value. Platform differences mean at some point we just
2046 * need to pick a reasonable value.
2047 */
2048#if defined SIGRTMIN
2049# define LAST_SIGNAL SIGRTMIN
2050#else
2051# define LAST_SIGNAL 32
2052#endif
2053
2054
2055 if (sigs == NULL) {
2056 sigemptyset(&tmpsigs);
2057 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2058 sigs = &tmpsigs;
2059 }
2060
2061 found = 0;
2062 buf[0] = '\0';
2063
2064 for (i = 0; i < LAST_SIGNAL; i++) {
2065 char tmp[20];
2066
2067 if (sigismember(sigs, i) > 0) {
2068 if (found > 0)
2069 strlcat(buf, ",", sizeof(buf));
2070 snprintf(tmp, sizeof(tmp), "%d", i);
2071 strlcat(buf, tmp, sizeof(buf));
2072 found++;
2073 }
2074 }
2075
2076 if (found == 0)
2077 snprintf(buf, sizeof(buf), "<none>");
2078
2079 zlog_debug("%s: %s", __func__, buf);
2080}
a505383d
DS
2081
2082bool thread_is_scheduled(struct thread *thread)
2083{
2084 if (thread == NULL)
2085 return false;
2086
2087 return true;
2088}
f59e6882
DL
2089
2090static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2091 const struct thread *thread)
2092{
2093 static const char * const types[] = {
2094 [THREAD_READ] = "read",
2095 [THREAD_WRITE] = "write",
2096 [THREAD_TIMER] = "timer",
2097 [THREAD_EVENT] = "event",
2098 [THREAD_READY] = "ready",
2099 [THREAD_UNUSED] = "unused",
2100 [THREAD_EXECUTE] = "exec",
2101 };
2102 ssize_t rv = 0;
2103 char info[16] = "";
2104
2105 if (!thread)
2106 return bputs(buf, "{(thread *)NULL}");
2107
2108 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2109
2110 if (thread->type < array_size(types) && types[thread->type])
2111 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2112 else
2113 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2114
2115 switch (thread->type) {
2116 case THREAD_READ:
2117 case THREAD_WRITE:
2118 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2119 break;
2120
2121 case THREAD_TIMER:
2122 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2123 break;
2124 }
2125
2126 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2127 thread->xref->funcname, thread->xref->dest,
2128 thread->xref->xref.file, thread->xref->xref.line);
2129 return rv;
2130}
2131
54929fd3 2132printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2133static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2134 const void *ptr)
2135{
2136 const struct thread *thread = ptr;
2137 struct timespec remain = {};
2138
2139 if (ea->fmt[0] == 'D') {
2140 ea->fmt++;
2141 return printfrr_thread_dbg(buf, ea, thread);
2142 }
2143
2144 if (!thread) {
2145 /* need to jump over time formatting flag characters in the
2146 * input format string, i.e. adjust ea->fmt!
2147 */
2148 printfrr_time(buf, ea, &remain,
2149 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2150 return bputch(buf, '-');
2151 }
2152
2153 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2154 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2155}