]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
Merge pull request #10376 from patrasar/master_pimv6_vtysh
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188
DL
105#ifndef VTYSH_EXTRACT_PL
106#include "lib/thread_clippy.c"
107#endif
108
d8b87afe 109static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 110{
883cc51d 111 int size = sizeof(a->func);
bd74dc61
DS
112
113 return jhash(&a->func, size, 0);
e04ab74d 114}
115
74df8d6d 116static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 117 const struct cpu_thread_history *b)
e04ab74d 118{
d62a17ae 119 return a->func == b->func;
e04ab74d 120}
121
d62a17ae 122static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 123{
d62a17ae 124 struct cpu_thread_history *new;
125 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
126 new->func = a->func;
127 new->funcname = a->funcname;
128 return new;
e04ab74d 129}
130
d62a17ae 131static void cpu_record_hash_free(void *a)
228da428 132{
d62a17ae 133 struct cpu_thread_history *hist = a;
134
135 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
136}
137
d62a17ae 138static void vty_out_cpu_thread_history(struct vty *vty,
139 struct cpu_thread_history *a)
e04ab74d 140{
1dd08c22
DS
141 vty_out(vty,
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 143 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
144 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
145 (a->real.total / a->total_calls), a->real.max,
1dd08c22 146 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 147 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 148 a->types & (1 << THREAD_READ) ? 'R' : ' ',
149 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
150 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
151 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
152 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 153}
154
e3b78da8 155static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 156{
d62a17ae 157 struct cpu_thread_history *totals = args[0];
fbcac826 158 struct cpu_thread_history copy;
d62a17ae 159 struct vty *vty = args[1];
fbcac826 160 uint8_t *filter = args[2];
d62a17ae 161
162 struct cpu_thread_history *a = bucket->data;
163
fbcac826
QY
164 copy.total_active =
165 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
166 copy.total_calls =
167 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
168 copy.total_cpu_warn =
169 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
170 copy.total_wall_warn =
171 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
172 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
173 memory_order_seq_cst);
fbcac826
QY
174 copy.cpu.total =
175 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
176 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
177 copy.real.total =
178 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
179 copy.real.max =
180 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
181 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
182 copy.funcname = a->funcname;
183
184 if (!(copy.types & *filter))
d62a17ae 185 return;
fbcac826
QY
186
187 vty_out_cpu_thread_history(vty, &copy);
188 totals->total_active += copy.total_active;
189 totals->total_calls += copy.total_calls;
9b8e01ca
DS
190 totals->total_cpu_warn += copy.total_cpu_warn;
191 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 192 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
193 totals->real.total += copy.real.total;
194 if (totals->real.max < copy.real.max)
195 totals->real.max = copy.real.max;
196 totals->cpu.total += copy.cpu.total;
197 if (totals->cpu.max < copy.cpu.max)
198 totals->cpu.max = copy.cpu.max;
e04ab74d 199}
200
fbcac826 201static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 202{
d62a17ae 203 struct cpu_thread_history tmp;
204 void *args[3] = {&tmp, vty, &filter};
205 struct thread_master *m;
206 struct listnode *ln;
207
45f01188
DL
208 if (!cputime_enabled)
209 vty_out(vty,
210 "\n"
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
215
0d6f7fd6 216 memset(&tmp, 0, sizeof(tmp));
d62a17ae 217 tmp.funcname = "TOTAL";
218 tmp.types = filter;
219
00dffa8c 220 frr_with_mutex(&masters_mtx) {
d62a17ae 221 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
222 const char *name = m->name ? m->name : "main";
223
224 char underline[strlen(name) + 1];
225 memset(underline, '-', sizeof(underline));
4f113d60 226 underline[sizeof(underline) - 1] = '\0';
d62a17ae 227
228 vty_out(vty, "\n");
229 vty_out(vty, "Showing statistics for pthread %s\n",
230 name);
231 vty_out(vty, "-------------------------------%s\n",
232 underline);
84d951d0 233 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 234 "CPU (user+system):", "Real (wall-clock):");
235 vty_out(vty,
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
238 vty_out(vty,
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 240
241 if (m->cpu_record->count)
242 hash_iterate(
243 m->cpu_record,
e3b78da8 244 (void (*)(struct hash_bucket *,
d62a17ae 245 void *))cpu_record_hash_print,
246 args);
247 else
248 vty_out(vty, "No data to display yet.\n");
249
250 vty_out(vty, "\n");
251 }
252 }
d62a17ae 253
254 vty_out(vty, "\n");
255 vty_out(vty, "Total thread statistics\n");
256 vty_out(vty, "-------------------------\n");
84d951d0 257 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 260 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 261 vty_out(vty, " Type Thread\n");
262
263 if (tmp.total_calls > 0)
264 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 265}
266
e3b78da8 267static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 268{
fbcac826 269 uint8_t *filter = args[0];
d62a17ae 270 struct hash *cpu_record = args[1];
271
272 struct cpu_thread_history *a = bucket->data;
62f44022 273
d62a17ae 274 if (!(a->types & *filter))
275 return;
f48f65d2 276
d62a17ae 277 hash_release(cpu_record, bucket->data);
e276eb82
PJ
278}
279
fbcac826 280static void cpu_record_clear(uint8_t filter)
e276eb82 281{
fbcac826 282 uint8_t *tmp = &filter;
d62a17ae 283 struct thread_master *m;
284 struct listnode *ln;
285
00dffa8c 286 frr_with_mutex(&masters_mtx) {
d62a17ae 287 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
00dffa8c 288 frr_with_mutex(&m->mtx) {
d62a17ae 289 void *args[2] = {tmp, m->cpu_record};
290 hash_iterate(
291 m->cpu_record,
e3b78da8 292 (void (*)(struct hash_bucket *,
d62a17ae 293 void *))cpu_record_hash_clear,
294 args);
295 }
d62a17ae 296 }
297 }
62f44022
QY
298}
299
fbcac826 300static uint8_t parse_filter(const char *filterstr)
62f44022 301{
d62a17ae 302 int i = 0;
303 int filter = 0;
304
305 while (filterstr[i] != '\0') {
306 switch (filterstr[i]) {
307 case 'r':
308 case 'R':
309 filter |= (1 << THREAD_READ);
310 break;
311 case 'w':
312 case 'W':
313 filter |= (1 << THREAD_WRITE);
314 break;
315 case 't':
316 case 'T':
317 filter |= (1 << THREAD_TIMER);
318 break;
319 case 'e':
320 case 'E':
321 filter |= (1 << THREAD_EVENT);
322 break;
323 case 'x':
324 case 'X':
325 filter |= (1 << THREAD_EXECUTE);
326 break;
327 default:
328 break;
329 }
330 ++i;
331 }
332 return filter;
62f44022
QY
333}
334
ee4dcee8
DL
335DEFUN_NOSH (show_thread_cpu,
336 show_thread_cpu_cmd,
337 "show thread cpu [FILTER]",
338 SHOW_STR
339 "Thread information\n"
340 "Thread CPU usage\n"
341 "Display filter (rwtex)\n")
62f44022 342{
fbcac826 343 uint8_t filter = (uint8_t)-1U;
d62a17ae 344 int idx = 0;
345
346 if (argv_find(argv, argc, "FILTER", &idx)) {
347 filter = parse_filter(argv[idx]->arg);
348 if (!filter) {
349 vty_out(vty,
3efd0893 350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 351 argv[idx]->arg);
352 return CMD_WARNING;
353 }
354 }
355
356 cpu_record_print(vty, filter);
357 return CMD_SUCCESS;
e276eb82 358}
45f01188
DL
359
360DEFPY (service_cputime_stats,
361 service_cputime_stats_cmd,
362 "[no] service cputime-stats",
363 NO_STR
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
366{
367 cputime_enabled = !no;
368 return CMD_SUCCESS;
369}
370
371DEFPY (service_cputime_warning,
372 service_cputime_warning_cmd,
373 "[no] service cputime-warning (1-4294967295)",
374 NO_STR
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
378{
379 if (no)
380 cputime_threshold = 0;
381 else
382 cputime_threshold = cputime_warning * 1000;
383 return CMD_SUCCESS;
384}
385
386ALIAS (service_cputime_warning,
387 no_service_cputime_warning_cmd,
388 "no service cputime-warning",
389 NO_STR
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
392
393DEFPY (service_walltime_warning,
394 service_walltime_warning_cmd,
395 "[no] service walltime-warning (1-4294967295)",
396 NO_STR
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
400{
401 if (no)
402 walltime_threshold = 0;
403 else
404 walltime_threshold = walltime_warning * 1000;
405 return CMD_SUCCESS;
406}
407
408ALIAS (service_walltime_warning,
409 no_service_walltime_warning_cmd,
410 "no service walltime-warning",
411 NO_STR
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 414
8872626b
DS
415static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
416{
417 const char *name = m->name ? m->name : "main";
418 char underline[strlen(name) + 1];
a0b36ae6 419 struct thread *thread;
8872626b
DS
420 uint32_t i;
421
422 memset(underline, '-', sizeof(underline));
423 underline[sizeof(underline) - 1] = '\0';
424
425 vty_out(vty, "\nShowing poll FD's for %s\n", name);
426 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
427 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
428 m->fd_limit);
a0b36ae6
DS
429 for (i = 0; i < m->handler.pfdcount; i++) {
430 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
431 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 432 m->handler.pfds[i].revents);
a0b36ae6
DS
433
434 if (m->handler.pfds[i].events & POLLIN) {
435 thread = m->read[m->handler.pfds[i].fd];
436
437 if (!thread)
438 vty_out(vty, "ERROR ");
439 else
60a3efec 440 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
441 } else
442 vty_out(vty, " ");
443
444 if (m->handler.pfds[i].events & POLLOUT) {
445 thread = m->write[m->handler.pfds[i].fd];
446
447 if (!thread)
448 vty_out(vty, "ERROR\n");
449 else
60a3efec 450 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
451 } else
452 vty_out(vty, "\n");
453 }
8872626b
DS
454}
455
ee4dcee8
DL
456DEFUN_NOSH (show_thread_poll,
457 show_thread_poll_cmd,
458 "show thread poll",
459 SHOW_STR
460 "Thread information\n"
461 "Show poll FD's and information\n")
8872626b
DS
462{
463 struct listnode *node;
464 struct thread_master *m;
465
00dffa8c 466 frr_with_mutex(&masters_mtx) {
8872626b
DS
467 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
468 show_thread_poll_helper(vty, m);
469 }
470 }
8872626b
DS
471
472 return CMD_SUCCESS;
473}
474
475
49d41a26
DS
476DEFUN (clear_thread_cpu,
477 clear_thread_cpu_cmd,
478 "clear thread cpu [FILTER]",
62f44022 479 "Clear stored data in all pthreads\n"
49d41a26
DS
480 "Thread information\n"
481 "Thread CPU usage\n"
482 "Display filter (rwtexb)\n")
e276eb82 483{
fbcac826 484 uint8_t filter = (uint8_t)-1U;
d62a17ae 485 int idx = 0;
486
487 if (argv_find(argv, argc, "FILTER", &idx)) {
488 filter = parse_filter(argv[idx]->arg);
489 if (!filter) {
490 vty_out(vty,
3efd0893 491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 492 argv[idx]->arg);
493 return CMD_WARNING;
494 }
495 }
496
497 cpu_record_clear(filter);
498 return CMD_SUCCESS;
e276eb82 499}
6b0655a2 500
d62a17ae 501void thread_cmd_init(void)
0b84f294 502{
d62a17ae 503 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 504 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 505 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
506
507 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
508 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
509 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
510 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
511 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
0b84f294 512}
62f44022
QY
513/* CLI end ------------------------------------------------------------------ */
514
0b84f294 515
d62a17ae 516static void cancelreq_del(void *cr)
63ccb9cb 517{
d62a17ae 518 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
519}
520
e0bebc7c 521/* initializer, only ever called once */
4d762f26 522static void initializer(void)
e0bebc7c 523{
d62a17ae 524 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
525}
526
d62a17ae 527struct thread_master *thread_master_create(const char *name)
718e3744 528{
d62a17ae 529 struct thread_master *rv;
530 struct rlimit limit;
531
532 pthread_once(&init_once, &initializer);
533
534 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 535
536 /* Initialize master mutex */
537 pthread_mutex_init(&rv->mtx, NULL);
538 pthread_cond_init(&rv->cancel_cond, NULL);
539
540 /* Set name */
7ffcd8bd
QY
541 name = name ? name : "default";
542 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 543
544 /* Initialize I/O task data structures */
1a9f340b
MS
545
546 /* Use configured limit if present, ulimit otherwise. */
547 rv->fd_limit = frr_get_fd_limit();
548 if (rv->fd_limit == 0) {
549 getrlimit(RLIMIT_NOFILE, &limit);
550 rv->fd_limit = (int)limit.rlim_cur;
551 }
552
a6f235f3
DS
553 rv->read = XCALLOC(MTYPE_THREAD_POLL,
554 sizeof(struct thread *) * rv->fd_limit);
555
556 rv->write = XCALLOC(MTYPE_THREAD_POLL,
557 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 558
7ffcd8bd
QY
559 char tmhashname[strlen(name) + 32];
560 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
561 name);
bd74dc61 562 rv->cpu_record = hash_create_size(
d8b87afe 563 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 564 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 565 tmhashname);
d62a17ae 566
c284542b
DL
567 thread_list_init(&rv->event);
568 thread_list_init(&rv->ready);
569 thread_list_init(&rv->unuse);
27d29ced 570 thread_timer_list_init(&rv->timer);
d62a17ae 571
572 /* Initialize thread_fetch() settings */
573 rv->spin = true;
574 rv->handle_signals = true;
575
576 /* Set pthread owner, should be updated by actual owner */
577 rv->owner = pthread_self();
578 rv->cancel_req = list_new();
579 rv->cancel_req->del = cancelreq_del;
580 rv->canceled = true;
581
582 /* Initialize pipe poker */
583 pipe(rv->io_pipe);
584 set_nonblocking(rv->io_pipe[0]);
585 set_nonblocking(rv->io_pipe[1]);
586
587 /* Initialize data structures for poll() */
588 rv->handler.pfdsize = rv->fd_limit;
589 rv->handler.pfdcount = 0;
590 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
591 sizeof(struct pollfd) * rv->handler.pfdsize);
592 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
593 sizeof(struct pollfd) * rv->handler.pfdsize);
594
eff09c66 595 /* add to list of threadmasters */
00dffa8c 596 frr_with_mutex(&masters_mtx) {
eff09c66
QY
597 if (!masters)
598 masters = list_new();
599
d62a17ae 600 listnode_add(masters, rv);
601 }
d62a17ae 602
603 return rv;
718e3744 604}
605
d8a8a8de
QY
606void thread_master_set_name(struct thread_master *master, const char *name)
607{
00dffa8c 608 frr_with_mutex(&master->mtx) {
0a22ddfb 609 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
610 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
611 }
d8a8a8de
QY
612}
613
6ed04aa2
DS
614#define THREAD_UNUSED_DEPTH 10
615
718e3744 616/* Move thread to unuse list. */
d62a17ae 617static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 618{
6655966d
RZ
619 pthread_mutex_t mtxc = thread->mtx;
620
d62a17ae 621 assert(m != NULL && thread != NULL);
d62a17ae 622
d62a17ae 623 thread->hist->total_active--;
6ed04aa2
DS
624 memset(thread, 0, sizeof(struct thread));
625 thread->type = THREAD_UNUSED;
626
6655966d
RZ
627 /* Restore the thread mutex context. */
628 thread->mtx = mtxc;
629
c284542b
DL
630 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
631 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
632 return;
633 }
634
635 thread_free(m, thread);
718e3744 636}
637
638/* Free all unused thread. */
c284542b
DL
639static void thread_list_free(struct thread_master *m,
640 struct thread_list_head *list)
718e3744 641{
d62a17ae 642 struct thread *t;
d62a17ae 643
c284542b 644 while ((t = thread_list_pop(list)))
6655966d 645 thread_free(m, t);
718e3744 646}
647
d62a17ae 648static void thread_array_free(struct thread_master *m,
649 struct thread **thread_array)
308d14ae 650{
d62a17ae 651 struct thread *t;
652 int index;
653
654 for (index = 0; index < m->fd_limit; ++index) {
655 t = thread_array[index];
656 if (t) {
657 thread_array[index] = NULL;
6655966d 658 thread_free(m, t);
d62a17ae 659 }
660 }
a6f235f3 661 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
662}
663
495f0b13
DS
664/*
665 * thread_master_free_unused
666 *
667 * As threads are finished with they are put on the
668 * unuse list for later reuse.
669 * If we are shutting down, Free up unused threads
670 * So we can see if we forget to shut anything off
671 */
d62a17ae 672void thread_master_free_unused(struct thread_master *m)
495f0b13 673{
00dffa8c 674 frr_with_mutex(&m->mtx) {
d62a17ae 675 struct thread *t;
c284542b 676 while ((t = thread_list_pop(&m->unuse)))
6655966d 677 thread_free(m, t);
d62a17ae 678 }
495f0b13
DS
679}
680
718e3744 681/* Stop thread scheduler. */
d62a17ae 682void thread_master_free(struct thread_master *m)
718e3744 683{
27d29ced
DL
684 struct thread *t;
685
00dffa8c 686 frr_with_mutex(&masters_mtx) {
d62a17ae 687 listnode_delete(masters, m);
eff09c66 688 if (masters->count == 0) {
6a154c88 689 list_delete(&masters);
eff09c66 690 }
d62a17ae 691 }
d62a17ae 692
693 thread_array_free(m, m->read);
694 thread_array_free(m, m->write);
27d29ced
DL
695 while ((t = thread_timer_list_pop(&m->timer)))
696 thread_free(m, t);
d62a17ae 697 thread_list_free(m, &m->event);
698 thread_list_free(m, &m->ready);
699 thread_list_free(m, &m->unuse);
700 pthread_mutex_destroy(&m->mtx);
33844bbe 701 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 702 close(m->io_pipe[0]);
703 close(m->io_pipe[1]);
6a154c88 704 list_delete(&m->cancel_req);
1a0a92ea 705 m->cancel_req = NULL;
d62a17ae 706
707 hash_clean(m->cpu_record, cpu_record_hash_free);
708 hash_free(m->cpu_record);
709 m->cpu_record = NULL;
710
0a22ddfb 711 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 712 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
713 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
714 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 715}
716
78ca0342
CF
717/* Return remain time in miliseconds. */
718unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 719{
d62a17ae 720 int64_t remain;
1189d95f 721
00dffa8c 722 frr_with_mutex(&thread->mtx) {
78ca0342 723 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 724 }
1189d95f 725
d62a17ae 726 return remain < 0 ? 0 : remain;
718e3744 727}
728
78ca0342
CF
729/* Return remain time in seconds. */
730unsigned long thread_timer_remain_second(struct thread *thread)
731{
732 return thread_timer_remain_msec(thread) / 1000LL;
733}
734
d62a17ae 735struct timeval thread_timer_remain(struct thread *thread)
6ac44687 736{
d62a17ae 737 struct timeval remain;
00dffa8c 738 frr_with_mutex(&thread->mtx) {
d62a17ae 739 monotime_until(&thread->u.sands, &remain);
740 }
d62a17ae 741 return remain;
6ac44687
CF
742}
743
0447957e
AK
744static int time_hhmmss(char *buf, int buf_size, long sec)
745{
746 long hh;
747 long mm;
748 int wr;
749
642ac49d 750 assert(buf_size >= 8);
0447957e
AK
751
752 hh = sec / 3600;
753 sec %= 3600;
754 mm = sec / 60;
755 sec %= 60;
756
757 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
758
759 return wr != 8;
760}
761
762char *thread_timer_to_hhmmss(char *buf, int buf_size,
763 struct thread *t_timer)
764{
765 if (t_timer) {
766 time_hhmmss(buf, buf_size,
767 thread_timer_remain_second(t_timer));
768 } else {
769 snprintf(buf, buf_size, "--:--:--");
770 }
771 return buf;
772}
773
718e3744 774/* Get new thread. */
d7c0a89a 775static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 776 void (*func)(struct thread *), void *arg,
60a3efec 777 const struct xref_threadsched *xref)
718e3744 778{
c284542b 779 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 780 struct cpu_thread_history tmp;
781
782 if (!thread) {
783 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
784 /* mutex only needs to be initialized at struct creation. */
785 pthread_mutex_init(&thread->mtx, NULL);
786 m->alloc++;
787 }
788
789 thread->type = type;
790 thread->add_type = type;
791 thread->master = m;
792 thread->arg = arg;
d62a17ae 793 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
794 thread->ref = NULL;
e8b3a2f7 795 thread->ignore_timer_late = false;
d62a17ae 796
797 /*
798 * So if the passed in funcname is not what we have
799 * stored that means the thread->hist needs to be
800 * updated. We keep the last one around in unused
801 * under the assumption that we are probably
802 * going to immediately allocate the same
803 * type of thread.
804 * This hopefully saves us some serious
805 * hash_get lookups.
806 */
60a3efec
DL
807 if ((thread->xref && thread->xref->funcname != xref->funcname)
808 || thread->func != func) {
d62a17ae 809 tmp.func = func;
60a3efec 810 tmp.funcname = xref->funcname;
d62a17ae 811 thread->hist =
812 hash_get(m->cpu_record, &tmp,
813 (void *(*)(void *))cpu_record_hash_alloc);
814 }
815 thread->hist->total_active++;
816 thread->func = func;
60a3efec 817 thread->xref = xref;
d62a17ae 818
819 return thread;
718e3744 820}
821
6655966d
RZ
822static void thread_free(struct thread_master *master, struct thread *thread)
823{
824 /* Update statistics. */
825 assert(master->alloc > 0);
826 master->alloc--;
827
828 /* Free allocated resources. */
829 pthread_mutex_destroy(&thread->mtx);
830 XFREE(MTYPE_THREAD, thread);
831}
832
d81ca9a3
MS
833static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
834 bool *eintr_p)
209a72a6 835{
d81ca9a3
MS
836 sigset_t origsigs;
837 unsigned char trash[64];
838 nfds_t count = m->handler.copycount;
839
d279ef57
DS
840 /*
841 * If timer_wait is null here, that means poll() should block
842 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 843 * ->selectpoll_timeout.
d279ef57 844 *
d62a17ae 845 * If the value is positive, it specifies the maximum number of
d279ef57
DS
846 * milliseconds to wait. If the timeout is -1, it specifies that
847 * we should never wait and always return immediately even if no
848 * event is detected. If the value is zero, the behavior is default.
849 */
d62a17ae 850 int timeout = -1;
851
852 /* number of file descriptors with events */
853 int num;
854
855 if (timer_wait != NULL
856 && m->selectpoll_timeout == 0) // use the default value
857 timeout = (timer_wait->tv_sec * 1000)
858 + (timer_wait->tv_usec / 1000);
859 else if (m->selectpoll_timeout > 0) // use the user's timeout
860 timeout = m->selectpoll_timeout;
861 else if (m->selectpoll_timeout
862 < 0) // effect a poll (return immediately)
863 timeout = 0;
864
0bdeb5e5 865 zlog_tls_buffer_flush();
3e41733f
DL
866 rcu_read_unlock();
867 rcu_assert_read_unlocked();
868
d62a17ae 869 /* add poll pipe poker */
d81ca9a3
MS
870 assert(count + 1 < m->handler.pfdsize);
871 m->handler.copy[count].fd = m->io_pipe[0];
872 m->handler.copy[count].events = POLLIN;
873 m->handler.copy[count].revents = 0x00;
874
875 /* We need to deal with a signal-handling race here: we
876 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
877 * that may arrive just before we enter poll(). We will block the
878 * key signals, then check whether any have arrived - if so, we return
879 * before calling poll(). If not, we'll re-enable the signals
880 * in the ppoll() call.
881 */
882
883 sigemptyset(&origsigs);
884 if (m->handle_signals) {
885 /* Main pthread that handles the app signals */
886 if (frr_sigevent_check(&origsigs)) {
887 /* Signal to process - restore signal mask and return */
888 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
889 num = -1;
890 *eintr_p = true;
891 goto done;
892 }
893 } else {
894 /* Don't make any changes for the non-main pthreads */
895 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
896 }
d62a17ae 897
d81ca9a3
MS
898#if defined(HAVE_PPOLL)
899 struct timespec ts, *tsp;
900
901 if (timeout >= 0) {
902 ts.tv_sec = timeout / 1000;
903 ts.tv_nsec = (timeout % 1000) * 1000000;
904 tsp = &ts;
905 } else
906 tsp = NULL;
907
908 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
909 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
910#else
911 /* Not ideal - there is a race after we restore the signal mask */
912 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
913 num = poll(m->handler.copy, count + 1, timeout);
914#endif
d62a17ae 915
d81ca9a3
MS
916done:
917
918 if (num < 0 && errno == EINTR)
919 *eintr_p = true;
920
921 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 922 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
923 ;
924
3e41733f
DL
925 rcu_read_lock();
926
d62a17ae 927 return num;
209a72a6
DS
928}
929
718e3744 930/* Add new read thread. */
ee1455dd
IR
931void _thread_add_read_write(const struct xref_threadsched *xref,
932 struct thread_master *m,
cc9f21da 933 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 934 struct thread **t_ptr)
718e3744 935{
60a3efec 936 int dir = xref->thread_type;
d62a17ae 937 struct thread *thread = NULL;
1ef14bee 938 struct thread **thread_array;
d62a17ae 939
abf96a87 940 if (dir == THREAD_READ)
6c3aa850
DL
941 frrtrace(9, frr_libfrr, schedule_read, m,
942 xref->funcname, xref->xref.file, xref->xref.line,
943 t_ptr, fd, 0, arg, 0);
abf96a87 944 else
6c3aa850
DL
945 frrtrace(9, frr_libfrr, schedule_write, m,
946 xref->funcname, xref->xref.file, xref->xref.line,
947 t_ptr, fd, 0, arg, 0);
abf96a87 948
188acbb9
DS
949 assert(fd >= 0);
950 if (fd >= m->fd_limit)
951 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
952
00dffa8c
DL
953 frr_with_mutex(&m->mtx) {
954 if (t_ptr && *t_ptr)
955 // thread is already scheduled; don't reschedule
956 break;
d62a17ae 957
958 /* default to a new pollfd */
959 nfds_t queuepos = m->handler.pfdcount;
960
1ef14bee
DS
961 if (dir == THREAD_READ)
962 thread_array = m->read;
963 else
964 thread_array = m->write;
965
d62a17ae 966 /* if we already have a pollfd for our file descriptor, find and
967 * use it */
968 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
969 if (m->handler.pfds[i].fd == fd) {
970 queuepos = i;
1ef14bee
DS
971
972#ifdef DEV_BUILD
973 /*
974 * What happens if we have a thread already
975 * created for this event?
976 */
977 if (thread_array[fd])
978 assert(!"Thread already scheduled for file descriptor");
979#endif
d62a17ae 980 break;
981 }
982
983 /* make sure we have room for this fd + pipe poker fd */
984 assert(queuepos + 1 < m->handler.pfdsize);
985
60a3efec 986 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 987
988 m->handler.pfds[queuepos].fd = fd;
989 m->handler.pfds[queuepos].events |=
990 (dir == THREAD_READ ? POLLIN : POLLOUT);
991
992 if (queuepos == m->handler.pfdcount)
993 m->handler.pfdcount++;
994
995 if (thread) {
00dffa8c 996 frr_with_mutex(&thread->mtx) {
d62a17ae 997 thread->u.fd = fd;
1ef14bee 998 thread_array[thread->u.fd] = thread;
d62a17ae 999 }
d62a17ae 1000
1001 if (t_ptr) {
1002 *t_ptr = thread;
1003 thread->ref = t_ptr;
1004 }
1005 }
1006
1007 AWAKEN(m);
1008 }
718e3744 1009}
1010
ee1455dd
IR
1011static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1012 struct thread_master *m,
cc9f21da 1013 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1014 struct timeval *time_relative,
1015 struct thread **t_ptr)
718e3744 1016{
d62a17ae 1017 struct thread *thread;
96fe578a 1018 struct timeval t;
d62a17ae 1019
1020 assert(m != NULL);
1021
d62a17ae 1022 assert(time_relative);
1023
6c3aa850
DL
1024 frrtrace(9, frr_libfrr, schedule_timer, m,
1025 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1026 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1027
96fe578a
MS
1028 /* Compute expiration/deadline time. */
1029 monotime(&t);
1030 timeradd(&t, time_relative, &t);
1031
00dffa8c
DL
1032 frr_with_mutex(&m->mtx) {
1033 if (t_ptr && *t_ptr)
d279ef57 1034 /* thread is already scheduled; don't reschedule */
ee1455dd 1035 return;
d62a17ae 1036
4322dea7 1037 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1038
00dffa8c 1039 frr_with_mutex(&thread->mtx) {
96fe578a 1040 thread->u.sands = t;
27d29ced 1041 thread_timer_list_add(&m->timer, thread);
d62a17ae 1042 if (t_ptr) {
1043 *t_ptr = thread;
1044 thread->ref = t_ptr;
1045 }
1046 }
d62a17ae 1047
96fe578a
MS
1048 /* The timer list is sorted - if this new timer
1049 * might change the time we'll wait for, give the pthread
1050 * a chance to re-compute.
1051 */
1052 if (thread_timer_list_first(&m->timer) == thread)
1053 AWAKEN(m);
d62a17ae 1054 }
e2eff5c3
DS
1055#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1056 if (time_relative->tv_sec > ONEYEAR2SEC)
1057 flog_err(
1058 EC_LIB_TIMER_TOO_LONG,
1059 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1060 thread);
9e867fe6 1061}
1062
98c91ac6 1063
1064/* Add timer event thread. */
ee1455dd 1065void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1066 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1067 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1068{
d62a17ae 1069 struct timeval trel;
9e867fe6 1070
d62a17ae 1071 assert(m != NULL);
9e867fe6 1072
d62a17ae 1073 trel.tv_sec = timer;
1074 trel.tv_usec = 0;
9e867fe6 1075
ee1455dd 1076 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1077}
9e867fe6 1078
98c91ac6 1079/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1080void _thread_add_timer_msec(const struct xref_threadsched *xref,
1081 struct thread_master *m,
cc9f21da
DS
1082 void (*func)(struct thread *), void *arg,
1083 long timer, struct thread **t_ptr)
98c91ac6 1084{
d62a17ae 1085 struct timeval trel;
9e867fe6 1086
d62a17ae 1087 assert(m != NULL);
718e3744 1088
d62a17ae 1089 trel.tv_sec = timer / 1000;
1090 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1091
ee1455dd 1092 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1093}
1094
4322dea7 1095/* Add timer event thread with "timeval" resolution */
ee1455dd 1096void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1097 struct thread_master *m,
1098 void (*func)(struct thread *), void *arg,
1099 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1100{
ee1455dd 1101 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1102}
1103
718e3744 1104/* Add simple event thread. */
ee1455dd 1105void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1106 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1107 void *arg, int val, struct thread **t_ptr)
718e3744 1108{
00dffa8c 1109 struct thread *thread = NULL;
d62a17ae 1110
6c3aa850
DL
1111 frrtrace(9, frr_libfrr, schedule_event, m,
1112 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1113 t_ptr, 0, val, arg, 0);
abf96a87 1114
d62a17ae 1115 assert(m != NULL);
1116
00dffa8c
DL
1117 frr_with_mutex(&m->mtx) {
1118 if (t_ptr && *t_ptr)
d279ef57 1119 /* thread is already scheduled; don't reschedule */
00dffa8c 1120 break;
d62a17ae 1121
60a3efec 1122 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
00dffa8c 1123 frr_with_mutex(&thread->mtx) {
d62a17ae 1124 thread->u.val = val;
c284542b 1125 thread_list_add_tail(&m->event, thread);
d62a17ae 1126 }
d62a17ae 1127
1128 if (t_ptr) {
1129 *t_ptr = thread;
1130 thread->ref = t_ptr;
1131 }
1132
1133 AWAKEN(m);
1134 }
718e3744 1135}
1136
63ccb9cb
QY
1137/* Thread cancellation ------------------------------------------------------ */
1138
8797240e
QY
1139/**
1140 * NOT's out the .events field of pollfd corresponding to the given file
1141 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1142 *
1143 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1144 * implementation for details.
1145 *
1146 * @param master
1147 * @param fd
1148 * @param state the event to cancel. One or more (OR'd together) of the
1149 * following:
1150 * - POLLIN
1151 * - POLLOUT
1152 */
a9318a32
MS
1153static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1154 int idx_hint)
0a95a0d0 1155{
42d74538
QY
1156 bool found = false;
1157
d62a17ae 1158 /* find the index of corresponding pollfd */
1159 nfds_t i;
1160
a9318a32
MS
1161 /* Cancel POLLHUP too just in case some bozo set it */
1162 state |= POLLHUP;
1163
1164 /* Some callers know the index of the pfd already */
1165 if (idx_hint >= 0) {
1166 i = idx_hint;
1167 found = true;
1168 } else {
1169 /* Have to look for the fd in the pfd array */
1170 for (i = 0; i < master->handler.pfdcount; i++)
1171 if (master->handler.pfds[i].fd == fd) {
1172 found = true;
1173 break;
1174 }
1175 }
42d74538
QY
1176
1177 if (!found) {
1178 zlog_debug(
1179 "[!] Received cancellation request for nonexistent rw job");
1180 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1181 master->name ? master->name : "", fd);
42d74538
QY
1182 return;
1183 }
d62a17ae 1184
1185 /* NOT out event. */
1186 master->handler.pfds[i].events &= ~(state);
1187
1188 /* If all events are canceled, delete / resize the pollfd array. */
1189 if (master->handler.pfds[i].events == 0) {
1190 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1191 (master->handler.pfdcount - i - 1)
1192 * sizeof(struct pollfd));
1193 master->handler.pfdcount--;
e985cda0
S
1194 master->handler.pfds[master->handler.pfdcount].fd = 0;
1195 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1196 }
1197
1198 /* If we have the same pollfd in the copy, perform the same operations,
1199 * otherwise return. */
1200 if (i >= master->handler.copycount)
1201 return;
1202
1203 master->handler.copy[i].events &= ~(state);
1204
1205 if (master->handler.copy[i].events == 0) {
1206 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1207 (master->handler.copycount - i - 1)
1208 * sizeof(struct pollfd));
1209 master->handler.copycount--;
e985cda0
S
1210 master->handler.copy[master->handler.copycount].fd = 0;
1211 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1212 }
0a95a0d0
DS
1213}
1214
a9318a32
MS
1215/*
1216 * Process task cancellation given a task argument: iterate through the
1217 * various lists of tasks, looking for any that match the argument.
1218 */
1219static void cancel_arg_helper(struct thread_master *master,
1220 const struct cancel_req *cr)
1221{
1222 struct thread *t;
1223 nfds_t i;
1224 int fd;
1225 struct pollfd *pfd;
1226
1227 /* We're only processing arg-based cancellations here. */
1228 if (cr->eventobj == NULL)
1229 return;
1230
1231 /* First process the ready lists. */
1232 frr_each_safe(thread_list, &master->event, t) {
1233 if (t->arg != cr->eventobj)
1234 continue;
1235 thread_list_del(&master->event, t);
1236 if (t->ref)
1237 *t->ref = NULL;
1238 thread_add_unuse(master, t);
1239 }
1240
1241 frr_each_safe(thread_list, &master->ready, t) {
1242 if (t->arg != cr->eventobj)
1243 continue;
1244 thread_list_del(&master->ready, t);
1245 if (t->ref)
1246 *t->ref = NULL;
1247 thread_add_unuse(master, t);
1248 }
1249
1250 /* If requested, stop here and ignore io and timers */
1251 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1252 return;
1253
1254 /* Check the io tasks */
1255 for (i = 0; i < master->handler.pfdcount;) {
1256 pfd = master->handler.pfds + i;
1257
1258 if (pfd->events & POLLIN)
1259 t = master->read[pfd->fd];
1260 else
1261 t = master->write[pfd->fd];
1262
1263 if (t && t->arg == cr->eventobj) {
1264 fd = pfd->fd;
1265
1266 /* Found a match to cancel: clean up fd arrays */
1267 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1268
1269 /* Clean up thread arrays */
1270 master->read[fd] = NULL;
1271 master->write[fd] = NULL;
1272
1273 /* Clear caller's ref */
1274 if (t->ref)
1275 *t->ref = NULL;
1276
1277 thread_add_unuse(master, t);
1278
1279 /* Don't increment 'i' since the cancellation will have
1280 * removed the entry from the pfd array
1281 */
1282 } else
1283 i++;
1284 }
1285
1286 /* Check the timer tasks */
1287 t = thread_timer_list_first(&master->timer);
1288 while (t) {
1289 struct thread *t_next;
1290
1291 t_next = thread_timer_list_next(&master->timer, t);
1292
1293 if (t->arg == cr->eventobj) {
1294 thread_timer_list_del(&master->timer, t);
1295 if (t->ref)
1296 *t->ref = NULL;
1297 thread_add_unuse(master, t);
1298 }
1299
1300 t = t_next;
1301 }
1302}
1303
1189d95f 1304/**
63ccb9cb 1305 * Process cancellation requests.
1189d95f 1306 *
63ccb9cb
QY
1307 * This may only be run from the pthread which owns the thread_master.
1308 *
1309 * @param master the thread master to process
1310 * @REQUIRE master->mtx
1189d95f 1311 */
d62a17ae 1312static void do_thread_cancel(struct thread_master *master)
718e3744 1313{
c284542b 1314 struct thread_list_head *list = NULL;
d62a17ae 1315 struct thread **thread_array = NULL;
1316 struct thread *thread;
1317
1318 struct cancel_req *cr;
1319 struct listnode *ln;
1320 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1321 /*
a9318a32
MS
1322 * If this is an event object cancellation, search
1323 * through task lists deleting any tasks which have the
1324 * specified argument - use this handy helper function.
d279ef57 1325 */
d62a17ae 1326 if (cr->eventobj) {
a9318a32 1327 cancel_arg_helper(master, cr);
d62a17ae 1328 continue;
1329 }
1330
d279ef57
DS
1331 /*
1332 * The pointer varies depending on whether the cancellation
1333 * request was made asynchronously or not. If it was, we
1334 * need to check whether the thread even exists anymore
1335 * before cancelling it.
1336 */
d62a17ae 1337 thread = (cr->thread) ? cr->thread : *cr->threadref;
1338
1339 if (!thread)
1340 continue;
1341
1342 /* Determine the appropriate queue to cancel the thread from */
1343 switch (thread->type) {
1344 case THREAD_READ:
a9318a32 1345 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1346 thread_array = master->read;
1347 break;
1348 case THREAD_WRITE:
a9318a32 1349 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1350 thread_array = master->write;
1351 break;
1352 case THREAD_TIMER:
27d29ced 1353 thread_timer_list_del(&master->timer, thread);
d62a17ae 1354 break;
1355 case THREAD_EVENT:
1356 list = &master->event;
1357 break;
1358 case THREAD_READY:
1359 list = &master->ready;
1360 break;
1361 default:
1362 continue;
1363 break;
1364 }
1365
27d29ced 1366 if (list) {
c284542b 1367 thread_list_del(list, thread);
d62a17ae 1368 } else if (thread_array) {
1369 thread_array[thread->u.fd] = NULL;
d62a17ae 1370 }
1371
1372 if (thread->ref)
1373 *thread->ref = NULL;
1374
1375 thread_add_unuse(thread->master, thread);
1376 }
1377
1378 /* Delete and free all cancellation requests */
41b21bfa
MS
1379 if (master->cancel_req)
1380 list_delete_all_node(master->cancel_req);
d62a17ae 1381
1382 /* Wake up any threads which may be blocked in thread_cancel_async() */
1383 master->canceled = true;
1384 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1385}
1386
a9318a32
MS
1387/*
1388 * Helper function used for multiple flavors of arg-based cancellation.
1389 */
1390static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1391{
1392 struct cancel_req *cr;
1393
1394 assert(m->owner == pthread_self());
1395
1396 /* Only worth anything if caller supplies an arg. */
1397 if (arg == NULL)
1398 return;
1399
1400 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1401
1402 cr->flags = flags;
1403
1404 frr_with_mutex(&m->mtx) {
1405 cr->eventobj = arg;
1406 listnode_add(m->cancel_req, cr);
1407 do_thread_cancel(m);
1408 }
1409}
1410
63ccb9cb
QY
1411/**
1412 * Cancel any events which have the specified argument.
1413 *
1414 * MT-Unsafe
1415 *
1416 * @param m the thread_master to cancel from
1417 * @param arg the argument passed when creating the event
1418 */
d62a17ae 1419void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1420{
a9318a32
MS
1421 cancel_event_helper(master, arg, 0);
1422}
d62a17ae 1423
a9318a32
MS
1424/*
1425 * Cancel ready tasks with an arg matching 'arg'
1426 *
1427 * MT-Unsafe
1428 *
1429 * @param m the thread_master to cancel from
1430 * @param arg the argument passed when creating the event
1431 */
1432void thread_cancel_event_ready(struct thread_master *m, void *arg)
1433{
1434
1435 /* Only cancel ready/event tasks */
1436 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1437}
1189d95f 1438
63ccb9cb
QY
1439/**
1440 * Cancel a specific task.
1441 *
1442 * MT-Unsafe
1443 *
1444 * @param thread task to cancel
1445 */
b3d6bc6e 1446void thread_cancel(struct thread **thread)
63ccb9cb 1447{
b3d6bc6e
MS
1448 struct thread_master *master;
1449
1450 if (thread == NULL || *thread == NULL)
1451 return;
1452
1453 master = (*thread)->master;
d62a17ae 1454
6c3aa850
DL
1455 frrtrace(9, frr_libfrr, thread_cancel, master,
1456 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1457 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1458 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1459
6ed04aa2
DS
1460 assert(master->owner == pthread_self());
1461
00dffa8c 1462 frr_with_mutex(&master->mtx) {
d62a17ae 1463 struct cancel_req *cr =
1464 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1465 cr->thread = *thread;
6ed04aa2
DS
1466 listnode_add(master->cancel_req, cr);
1467 do_thread_cancel(master);
d62a17ae 1468 }
b3d6bc6e
MS
1469
1470 *thread = NULL;
63ccb9cb 1471}
1189d95f 1472
63ccb9cb
QY
1473/**
1474 * Asynchronous cancellation.
1475 *
8797240e
QY
1476 * Called with either a struct thread ** or void * to an event argument,
1477 * this function posts the correct cancellation request and blocks until it is
1478 * serviced.
63ccb9cb
QY
1479 *
1480 * If the thread is currently running, execution blocks until it completes.
1481 *
8797240e
QY
1482 * The last two parameters are mutually exclusive, i.e. if you pass one the
1483 * other must be NULL.
1484 *
1485 * When the cancellation procedure executes on the target thread_master, the
1486 * thread * provided is checked for nullity. If it is null, the thread is
1487 * assumed to no longer exist and the cancellation request is a no-op. Thus
1488 * users of this API must pass a back-reference when scheduling the original
1489 * task.
1490 *
63ccb9cb
QY
1491 * MT-Safe
1492 *
8797240e
QY
1493 * @param master the thread master with the relevant event / task
1494 * @param thread pointer to thread to cancel
1495 * @param eventobj the event
63ccb9cb 1496 */
d62a17ae 1497void thread_cancel_async(struct thread_master *master, struct thread **thread,
1498 void *eventobj)
63ccb9cb 1499{
d62a17ae 1500 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1501
1502 if (thread && *thread)
c7bb4f00 1503 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1504 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1505 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1506 (*thread)->u.val, (*thread)->arg,
1507 (*thread)->u.sands.tv_sec);
abf96a87 1508 else
c7bb4f00
QY
1509 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1510 0, NULL, 0, 0, eventobj, 0);
abf96a87 1511
d62a17ae 1512 assert(master->owner != pthread_self());
1513
00dffa8c 1514 frr_with_mutex(&master->mtx) {
d62a17ae 1515 master->canceled = false;
1516
1517 if (thread) {
1518 struct cancel_req *cr =
1519 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1520 cr->threadref = thread;
1521 listnode_add(master->cancel_req, cr);
1522 } else if (eventobj) {
1523 struct cancel_req *cr =
1524 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1525 cr->eventobj = eventobj;
1526 listnode_add(master->cancel_req, cr);
1527 }
1528 AWAKEN(master);
1529
1530 while (!master->canceled)
1531 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1532 }
50478845
MS
1533
1534 if (thread)
1535 *thread = NULL;
718e3744 1536}
63ccb9cb 1537/* ------------------------------------------------------------------------- */
718e3744 1538
27d29ced 1539static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1540 struct timeval *timer_val)
718e3744 1541{
27d29ced
DL
1542 if (!thread_timer_list_count(timers))
1543 return NULL;
1544
1545 struct thread *next_timer = thread_timer_list_first(timers);
1546 monotime_until(&next_timer->u.sands, timer_val);
1547 return timer_val;
718e3744 1548}
718e3744 1549
d62a17ae 1550static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1551 struct thread *fetch)
718e3744 1552{
d62a17ae 1553 *fetch = *thread;
1554 thread_add_unuse(m, thread);
1555 return fetch;
718e3744 1556}
1557
d62a17ae 1558static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1559 struct thread *thread, short state,
1560 short actual_state, int pos)
5d4ccd4e 1561{
d62a17ae 1562 struct thread **thread_array;
1563
45f3d590
DS
1564 /*
1565 * poll() clears the .events field, but the pollfd array we
1566 * pass to poll() is a copy of the one used to schedule threads.
1567 * We need to synchronize state between the two here by applying
1568 * the same changes poll() made on the copy of the "real" pollfd
1569 * array.
1570 *
1571 * This cleans up a possible infinite loop where we refuse
1572 * to respond to a poll event but poll is insistent that
1573 * we should.
1574 */
1575 m->handler.pfds[pos].events &= ~(state);
1576
1577 if (!thread) {
1578 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1579 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1580 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1581 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1582 return 0;
45f3d590 1583 }
d62a17ae 1584
1585 if (thread->type == THREAD_READ)
1586 thread_array = m->read;
1587 else
1588 thread_array = m->write;
1589
1590 thread_array[thread->u.fd] = NULL;
c284542b 1591 thread_list_add_tail(&m->ready, thread);
d62a17ae 1592 thread->type = THREAD_READY;
45f3d590 1593
d62a17ae 1594 return 1;
5d4ccd4e
DS
1595}
1596
8797240e
QY
1597/**
1598 * Process I/O events.
1599 *
1600 * Walks through file descriptor array looking for those pollfds whose .revents
1601 * field has something interesting. Deletes any invalid file descriptors.
1602 *
1603 * @param m the thread master
1604 * @param num the number of active file descriptors (return value of poll())
1605 */
d62a17ae 1606static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1607{
d62a17ae 1608 unsigned int ready = 0;
1609 struct pollfd *pfds = m->handler.copy;
1610
1611 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1612 /* no event for current fd? immediately continue */
1613 if (pfds[i].revents == 0)
1614 continue;
1615
1616 ready++;
1617
d279ef57
DS
1618 /*
1619 * Unless someone has called thread_cancel from another
1620 * pthread, the only thing that could have changed in
1621 * m->handler.pfds while we were asleep is the .events
1622 * field in a given pollfd. Barring thread_cancel() that
1623 * value should be a superset of the values we have in our
1624 * copy, so there's no need to update it. Similarily,
1625 * barring deletion, the fd should still be a valid index
1626 * into the master's pfds.
d142453d
DS
1627 *
1628 * We are including POLLERR here to do a READ event
1629 * this is because the read should fail and the
1630 * read function should handle it appropriately
d279ef57 1631 */
d142453d 1632 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1633 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1634 pfds[i].revents, i);
1635 }
d62a17ae 1636 if (pfds[i].revents & POLLOUT)
1637 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1638 POLLOUT, pfds[i].revents, i);
d62a17ae 1639
1640 /* if one of our file descriptors is garbage, remove the same
1641 * from
1642 * both pfds + update sizes and index */
1643 if (pfds[i].revents & POLLNVAL) {
1644 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1645 (m->handler.pfdcount - i - 1)
1646 * sizeof(struct pollfd));
1647 m->handler.pfdcount--;
e985cda0
S
1648 m->handler.pfds[m->handler.pfdcount].fd = 0;
1649 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1650
1651 memmove(pfds + i, pfds + i + 1,
1652 (m->handler.copycount - i - 1)
1653 * sizeof(struct pollfd));
1654 m->handler.copycount--;
e985cda0
S
1655 m->handler.copy[m->handler.copycount].fd = 0;
1656 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1657
1658 i--;
1659 }
1660 }
718e3744 1661}
1662
8b70d0b0 1663/* Add all timers that have popped to the ready list. */
e7d9e44b 1664static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1665 struct timeval *timenow)
a48b4e6d 1666{
ab01a001
DS
1667 struct timeval prev = *timenow;
1668 bool displayed = false;
d62a17ae 1669 struct thread *thread;
1670 unsigned int ready = 0;
1671
e7d9e44b 1672 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1673 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1674 break;
ab01a001
DS
1675 prev = thread->u.sands;
1676 prev.tv_sec += 4;
1677 /*
1678 * If the timer would have popped 4 seconds in the
1679 * past then we are in a situation where we are
1680 * really getting behind on handling of events.
1681 * Let's log it and do the right thing with it.
1682 */
1dd08c22
DS
1683 if (timercmp(timenow, &prev, >)) {
1684 atomic_fetch_add_explicit(
1685 &thread->hist->total_starv_warn, 1,
1686 memory_order_seq_cst);
1687 if (!displayed && !thread->ignore_timer_late) {
1688 flog_warn(
1689 EC_LIB_STARVE_THREAD,
1690 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1691 thread);
1692 displayed = true;
1693 }
ab01a001
DS
1694 }
1695
e7d9e44b 1696 thread_timer_list_pop(&m->timer);
d62a17ae 1697 thread->type = THREAD_READY;
e7d9e44b 1698 thread_list_add_tail(&m->ready, thread);
d62a17ae 1699 ready++;
1700 }
e7d9e44b 1701
d62a17ae 1702 return ready;
a48b4e6d 1703}
1704
2613abe6 1705/* process a list en masse, e.g. for event thread lists */
c284542b 1706static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1707{
d62a17ae 1708 struct thread *thread;
d62a17ae 1709 unsigned int ready = 0;
1710
c284542b 1711 while ((thread = thread_list_pop(list))) {
d62a17ae 1712 thread->type = THREAD_READY;
c284542b 1713 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1714 ready++;
1715 }
1716 return ready;
2613abe6
PJ
1717}
1718
1719
718e3744 1720/* Fetch next ready thread. */
d62a17ae 1721struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1722{
d62a17ae 1723 struct thread *thread = NULL;
1724 struct timeval now;
1725 struct timeval zerotime = {0, 0};
1726 struct timeval tv;
1727 struct timeval *tw = NULL;
d81ca9a3 1728 bool eintr_p = false;
d62a17ae 1729 int num = 0;
1730
1731 do {
1732 /* Handle signals if any */
1733 if (m->handle_signals)
7cc91e67 1734 frr_sigevent_process();
d62a17ae 1735
1736 pthread_mutex_lock(&m->mtx);
1737
1738 /* Process any pending cancellation requests */
1739 do_thread_cancel(m);
1740
e3c9529e
QY
1741 /*
1742 * Attempt to flush ready queue before going into poll().
1743 * This is performance-critical. Think twice before modifying.
1744 */
c284542b 1745 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1746 fetch = thread_run(m, thread, fetch);
1747 if (fetch->ref)
1748 *fetch->ref = NULL;
1749 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1750 if (!m->ready_run_loop)
1751 GETRUSAGE(&m->last_getrusage);
1752 m->ready_run_loop = true;
e3c9529e
QY
1753 break;
1754 }
1755
5e822957 1756 m->ready_run_loop = false;
e3c9529e
QY
1757 /* otherwise, tick through scheduling sequence */
1758
bca37d17
QY
1759 /*
1760 * Post events to ready queue. This must come before the
1761 * following block since events should occur immediately
1762 */
d62a17ae 1763 thread_process(&m->event);
1764
bca37d17
QY
1765 /*
1766 * If there are no tasks on the ready queue, we will poll()
1767 * until a timer expires or we receive I/O, whichever comes
1768 * first. The strategy for doing this is:
d62a17ae 1769 *
1770 * - If there are events pending, set the poll() timeout to zero
1771 * - If there are no events pending, but there are timers
d279ef57
DS
1772 * pending, set the timeout to the smallest remaining time on
1773 * any timer.
d62a17ae 1774 * - If there are neither timers nor events pending, but there
d279ef57 1775 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1776 * - If nothing is pending, it's time for the application to die
1777 *
1778 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1779 * once per loop to avoid starvation by events
1780 */
c284542b 1781 if (!thread_list_count(&m->ready))
27d29ced 1782 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1783
c284542b
DL
1784 if (thread_list_count(&m->ready) ||
1785 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1786 tw = &zerotime;
1787
1788 if (!tw && m->handler.pfdcount == 0) { /* die */
1789 pthread_mutex_unlock(&m->mtx);
1790 fetch = NULL;
1791 break;
1792 }
1793
bca37d17
QY
1794 /*
1795 * Copy pollfd array + # active pollfds in it. Not necessary to
1796 * copy the array size as this is fixed.
1797 */
d62a17ae 1798 m->handler.copycount = m->handler.pfdcount;
1799 memcpy(m->handler.copy, m->handler.pfds,
1800 m->handler.copycount * sizeof(struct pollfd));
1801
e3c9529e
QY
1802 pthread_mutex_unlock(&m->mtx);
1803 {
d81ca9a3
MS
1804 eintr_p = false;
1805 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1806 }
1807 pthread_mutex_lock(&m->mtx);
d764d2cc 1808
e3c9529e
QY
1809 /* Handle any errors received in poll() */
1810 if (num < 0) {
d81ca9a3 1811 if (eintr_p) {
d62a17ae 1812 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1813 /* loop around to signal handler */
1814 continue;
d62a17ae 1815 }
1816
e3c9529e 1817 /* else die */
450971aa 1818 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1819 safe_strerror(errno));
e3c9529e
QY
1820 pthread_mutex_unlock(&m->mtx);
1821 fetch = NULL;
1822 break;
bca37d17 1823 }
d62a17ae 1824
1825 /* Post timers to ready queue. */
1826 monotime(&now);
e7d9e44b 1827 thread_process_timers(m, &now);
d62a17ae 1828
1829 /* Post I/O to ready queue. */
1830 if (num > 0)
1831 thread_process_io(m, num);
1832
d62a17ae 1833 pthread_mutex_unlock(&m->mtx);
1834
1835 } while (!thread && m->spin);
1836
1837 return fetch;
718e3744 1838}
1839
d62a17ae 1840static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1841{
d62a17ae 1842 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1843 + (a.tv_usec - b.tv_usec));
62f44022
QY
1844}
1845
d62a17ae 1846unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1847 unsigned long *cputime)
718e3744 1848{
6418e2d3
DL
1849#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1850 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1851 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1852#else
d62a17ae 1853 /* This is 'user + sys' time. */
1854 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1855 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1856#endif
d62a17ae 1857 return timeval_elapsed(now->real, start->real);
8b70d0b0 1858}
1859
50596be0
DS
1860/* We should aim to yield after yield milliseconds, which defaults
1861 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1862 Note: we are using real (wall clock) time for this calculation.
1863 It could be argued that CPU time may make more sense in certain
1864 contexts. The things to consider are whether the thread may have
1865 blocked (in which case wall time increases, but CPU time does not),
1866 or whether the system is heavily loaded with other processes competing
d62a17ae 1867 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1868 Plus it has the added benefit that gettimeofday should be faster
1869 than calling getrusage. */
d62a17ae 1870int thread_should_yield(struct thread *thread)
718e3744 1871{
d62a17ae 1872 int result;
00dffa8c 1873 frr_with_mutex(&thread->mtx) {
d62a17ae 1874 result = monotime_since(&thread->real, NULL)
1875 > (int64_t)thread->yield;
1876 }
d62a17ae 1877 return result;
50596be0
DS
1878}
1879
d62a17ae 1880void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1881{
00dffa8c 1882 frr_with_mutex(&thread->mtx) {
d62a17ae 1883 thread->yield = yield_time;
1884 }
718e3744 1885}
1886
d62a17ae 1887void thread_getrusage(RUSAGE_T *r)
db9c0df9 1888{
6418e2d3
DL
1889 monotime(&r->real);
1890 if (!cputime_enabled) {
1891 memset(&r->cpu, 0, sizeof(r->cpu));
1892 return;
1893 }
1894
1895#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1896 /* not currently implemented in Linux's vDSO, but maybe at some point
1897 * in the future?
1898 */
1899 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1900#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1901#if defined RUSAGE_THREAD
1902#define FRR_RUSAGE RUSAGE_THREAD
1903#else
1904#define FRR_RUSAGE RUSAGE_SELF
1905#endif
6418e2d3
DL
1906 getrusage(FRR_RUSAGE, &(r->cpu));
1907#endif
db9c0df9
PJ
1908}
1909
fbcac826
QY
1910/*
1911 * Call a thread.
1912 *
1913 * This function will atomically update the thread's usage history. At present
1914 * this is the only spot where usage history is written. Nevertheless the code
1915 * has been written such that the introduction of writers in the future should
1916 * not need to update it provided the writers atomically perform only the
1917 * operations done here, i.e. updating the total and maximum times. In
1918 * particular, the maximum real and cpu times must be monotonically increasing
1919 * or this code is not correct.
1920 */
d62a17ae 1921void thread_call(struct thread *thread)
718e3744 1922{
d62a17ae 1923 RUSAGE_T before, after;
cc8b13a0 1924
45f01188
DL
1925 /* if the thread being called is the CLI, it may change cputime_enabled
1926 * ("service cputime-stats" command), which can result in nonsensical
1927 * and very confusing warnings
1928 */
1929 bool cputime_enabled_here = cputime_enabled;
1930
5e822957
DS
1931 if (thread->master->ready_run_loop)
1932 before = thread->master->last_getrusage;
1933 else
1934 GETRUSAGE(&before);
1935
d62a17ae 1936 thread->real = before.real;
718e3744 1937
6c3aa850
DL
1938 frrtrace(9, frr_libfrr, thread_call, thread->master,
1939 thread->xref->funcname, thread->xref->xref.file,
1940 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1941 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 1942
d62a17ae 1943 pthread_setspecific(thread_current, thread);
1944 (*thread->func)(thread);
1945 pthread_setspecific(thread_current, NULL);
718e3744 1946
d62a17ae 1947 GETRUSAGE(&after);
5e822957 1948 thread->master->last_getrusage = after;
718e3744 1949
45f01188
DL
1950 unsigned long walltime, cputime;
1951 unsigned long exp;
fbcac826 1952
45f01188
DL
1953 walltime = thread_consumed_time(&after, &before, &cputime);
1954
1955 /* update walltime */
1956 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
1957 memory_order_seq_cst);
1958 exp = atomic_load_explicit(&thread->hist->real.max,
1959 memory_order_seq_cst);
45f01188 1960 while (exp < walltime
fbcac826 1961 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
1962 &thread->hist->real.max, &exp, walltime,
1963 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
1964 ;
1965
45f01188
DL
1966 if (cputime_enabled_here && cputime_enabled) {
1967 /* update cputime */
1968 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
1969 memory_order_seq_cst);
1970 exp = atomic_load_explicit(&thread->hist->cpu.max,
1971 memory_order_seq_cst);
1972 while (exp < cputime
1973 && !atomic_compare_exchange_weak_explicit(
1974 &thread->hist->cpu.max, &exp, cputime,
1975 memory_order_seq_cst, memory_order_seq_cst))
1976 ;
1977 }
fbcac826
QY
1978
1979 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
1980 memory_order_seq_cst);
1981 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
1982 memory_order_seq_cst);
718e3744 1983
45f01188
DL
1984 if (cputime_enabled_here && cputime_enabled && cputime_threshold
1985 && cputime > cputime_threshold) {
d62a17ae 1986 /*
45f01188
DL
1987 * We have a CPU Hog on our hands. The time FRR has spent
1988 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 1989 * Whinge about it now, so we're aware this is yet another task
1990 * to fix.
1991 */
9b8e01ca
DS
1992 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
1993 1, memory_order_seq_cst);
9ef9495e 1994 flog_warn(
039d547f
DS
1995 EC_LIB_SLOW_THREAD_CPU,
1996 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1997 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
1998 walltime / 1000, cputime / 1000);
1999
2000 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2001 /*
45f01188
DL
2002 * The runtime for a task is greater than 5 seconds, but the
2003 * cpu time is under 5 seconds. Let's whine about this because
2004 * this could imply some sort of scheduling issue.
039d547f 2005 */
9b8e01ca
DS
2006 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2007 1, memory_order_seq_cst);
039d547f
DS
2008 flog_warn(
2009 EC_LIB_SLOW_THREAD_WALL,
2010 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2011 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2012 walltime / 1000, cputime / 1000);
d62a17ae 2013 }
718e3744 2014}
2015
2016/* Execute thread */
60a3efec 2017void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2018 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2019 void *arg, int val)
718e3744 2020{
c4345fbf 2021 struct thread *thread;
718e3744 2022
c4345fbf 2023 /* Get or allocate new thread to execute. */
00dffa8c 2024 frr_with_mutex(&m->mtx) {
60a3efec 2025 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2026
c4345fbf 2027 /* Set its event value. */
00dffa8c 2028 frr_with_mutex(&thread->mtx) {
c4345fbf
RZ
2029 thread->add_type = THREAD_EXECUTE;
2030 thread->u.val = val;
2031 thread->ref = &thread;
2032 }
c4345fbf 2033 }
f7c62e11 2034
c4345fbf
RZ
2035 /* Execute thread doing all accounting. */
2036 thread_call(thread);
9c7753e4 2037
c4345fbf
RZ
2038 /* Give back or free thread. */
2039 thread_add_unuse(m, thread);
718e3744 2040}
1543c387
MS
2041
2042/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2043void debug_signals(const sigset_t *sigs)
2044{
2045 int i, found;
2046 sigset_t tmpsigs;
2047 char buf[300];
2048
2049 /*
2050 * We're only looking at the non-realtime signals here, so we need
2051 * some limit value. Platform differences mean at some point we just
2052 * need to pick a reasonable value.
2053 */
2054#if defined SIGRTMIN
2055# define LAST_SIGNAL SIGRTMIN
2056#else
2057# define LAST_SIGNAL 32
2058#endif
2059
2060
2061 if (sigs == NULL) {
2062 sigemptyset(&tmpsigs);
2063 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2064 sigs = &tmpsigs;
2065 }
2066
2067 found = 0;
2068 buf[0] = '\0';
2069
2070 for (i = 0; i < LAST_SIGNAL; i++) {
2071 char tmp[20];
2072
2073 if (sigismember(sigs, i) > 0) {
2074 if (found > 0)
2075 strlcat(buf, ",", sizeof(buf));
2076 snprintf(tmp, sizeof(tmp), "%d", i);
2077 strlcat(buf, tmp, sizeof(buf));
2078 found++;
2079 }
2080 }
2081
2082 if (found == 0)
2083 snprintf(buf, sizeof(buf), "<none>");
2084
2085 zlog_debug("%s: %s", __func__, buf);
2086}
a505383d
DS
2087
2088bool thread_is_scheduled(struct thread *thread)
2089{
2090 if (thread == NULL)
2091 return false;
2092
2093 return true;
2094}
f59e6882
DL
2095
2096static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2097 const struct thread *thread)
2098{
2099 static const char * const types[] = {
2100 [THREAD_READ] = "read",
2101 [THREAD_WRITE] = "write",
2102 [THREAD_TIMER] = "timer",
2103 [THREAD_EVENT] = "event",
2104 [THREAD_READY] = "ready",
2105 [THREAD_UNUSED] = "unused",
2106 [THREAD_EXECUTE] = "exec",
2107 };
2108 ssize_t rv = 0;
2109 char info[16] = "";
2110
2111 if (!thread)
2112 return bputs(buf, "{(thread *)NULL}");
2113
2114 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2115
2116 if (thread->type < array_size(types) && types[thread->type])
2117 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2118 else
2119 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2120
2121 switch (thread->type) {
2122 case THREAD_READ:
2123 case THREAD_WRITE:
2124 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2125 break;
2126
2127 case THREAD_TIMER:
2128 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2129 break;
2130 }
2131
2132 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2133 thread->xref->funcname, thread->xref->dest,
2134 thread->xref->xref.file, thread->xref->xref.line);
2135 return rv;
2136}
2137
54929fd3 2138printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2139static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2140 const void *ptr)
2141{
2142 const struct thread *thread = ptr;
2143 struct timespec remain = {};
2144
2145 if (ea->fmt[0] == 'D') {
2146 ea->fmt++;
2147 return printfrr_thread_dbg(buf, ea, thread);
2148 }
2149
2150 if (!thread) {
2151 /* need to jump over time formatting flag characters in the
2152 * input format string, i.e. adjust ea->fmt!
2153 */
2154 printfrr_time(buf, ea, &remain,
2155 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2156 return bputch(buf, '-');
2157 }
2158
2159 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2160 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2161}