]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
doc: Add `show ipv6 rpf X:X::X:X` command to docs
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188 105#include "lib/thread_clippy.c"
45f01188 106
d8b87afe 107static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 108{
883cc51d 109 int size = sizeof(a->func);
bd74dc61
DS
110
111 return jhash(&a->func, size, 0);
e04ab74d 112}
113
74df8d6d 114static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 115 const struct cpu_thread_history *b)
e04ab74d 116{
d62a17ae 117 return a->func == b->func;
e04ab74d 118}
119
d62a17ae 120static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 121{
d62a17ae 122 struct cpu_thread_history *new;
123 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
124 new->func = a->func;
125 new->funcname = a->funcname;
126 return new;
e04ab74d 127}
128
d62a17ae 129static void cpu_record_hash_free(void *a)
228da428 130{
d62a17ae 131 struct cpu_thread_history *hist = a;
132
133 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
134}
135
d62a17ae 136static void vty_out_cpu_thread_history(struct vty *vty,
137 struct cpu_thread_history *a)
e04ab74d 138{
1dd08c22
DS
139 vty_out(vty,
140 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 141 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
142 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
143 (a->real.total / a->total_calls), a->real.max,
1dd08c22 144 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 145 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 146 a->types & (1 << THREAD_READ) ? 'R' : ' ',
147 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
148 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
149 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
150 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 151}
152
e3b78da8 153static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 154{
d62a17ae 155 struct cpu_thread_history *totals = args[0];
fbcac826 156 struct cpu_thread_history copy;
d62a17ae 157 struct vty *vty = args[1];
fbcac826 158 uint8_t *filter = args[2];
d62a17ae 159
160 struct cpu_thread_history *a = bucket->data;
161
fbcac826
QY
162 copy.total_active =
163 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
164 copy.total_calls =
165 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
166 copy.total_cpu_warn =
167 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
168 copy.total_wall_warn =
169 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
170 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
171 memory_order_seq_cst);
fbcac826
QY
172 copy.cpu.total =
173 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
174 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
175 copy.real.total =
176 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
177 copy.real.max =
178 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
179 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
180 copy.funcname = a->funcname;
181
182 if (!(copy.types & *filter))
d62a17ae 183 return;
fbcac826
QY
184
185 vty_out_cpu_thread_history(vty, &copy);
186 totals->total_active += copy.total_active;
187 totals->total_calls += copy.total_calls;
9b8e01ca
DS
188 totals->total_cpu_warn += copy.total_cpu_warn;
189 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 190 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
191 totals->real.total += copy.real.total;
192 if (totals->real.max < copy.real.max)
193 totals->real.max = copy.real.max;
194 totals->cpu.total += copy.cpu.total;
195 if (totals->cpu.max < copy.cpu.max)
196 totals->cpu.max = copy.cpu.max;
e04ab74d 197}
198
fbcac826 199static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 200{
d62a17ae 201 struct cpu_thread_history tmp;
202 void *args[3] = {&tmp, vty, &filter};
203 struct thread_master *m;
204 struct listnode *ln;
205
45f01188
DL
206 if (!cputime_enabled)
207 vty_out(vty,
208 "\n"
209 "Collecting CPU time statistics is currently disabled. Following statistics\n"
210 "will be zero or may display data from when collection was enabled. Use the\n"
211 " \"service cputime-stats\" command to start collecting data.\n"
212 "\nCounters and wallclock times are always maintained and should be accurate.\n");
213
0d6f7fd6 214 memset(&tmp, 0, sizeof(tmp));
d62a17ae 215 tmp.funcname = "TOTAL";
216 tmp.types = filter;
217
cb1991af 218 frr_with_mutex (&masters_mtx) {
d62a17ae 219 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
220 const char *name = m->name ? m->name : "main";
221
222 char underline[strlen(name) + 1];
223 memset(underline, '-', sizeof(underline));
4f113d60 224 underline[sizeof(underline) - 1] = '\0';
d62a17ae 225
226 vty_out(vty, "\n");
227 vty_out(vty, "Showing statistics for pthread %s\n",
228 name);
229 vty_out(vty, "-------------------------------%s\n",
230 underline);
84d951d0 231 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 232 "CPU (user+system):", "Real (wall-clock):");
233 vty_out(vty,
234 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
235 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
236 vty_out(vty,
237 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 238
239 if (m->cpu_record->count)
240 hash_iterate(
241 m->cpu_record,
e3b78da8 242 (void (*)(struct hash_bucket *,
d62a17ae 243 void *))cpu_record_hash_print,
244 args);
245 else
246 vty_out(vty, "No data to display yet.\n");
247
248 vty_out(vty, "\n");
249 }
250 }
d62a17ae 251
252 vty_out(vty, "\n");
253 vty_out(vty, "Total thread statistics\n");
254 vty_out(vty, "-------------------------\n");
84d951d0 255 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 256 "CPU (user+system):", "Real (wall-clock):");
257 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 258 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 259 vty_out(vty, " Type Thread\n");
260
261 if (tmp.total_calls > 0)
262 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 263}
264
e3b78da8 265static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 266{
fbcac826 267 uint8_t *filter = args[0];
d62a17ae 268 struct hash *cpu_record = args[1];
269
270 struct cpu_thread_history *a = bucket->data;
62f44022 271
d62a17ae 272 if (!(a->types & *filter))
273 return;
f48f65d2 274
d62a17ae 275 hash_release(cpu_record, bucket->data);
e276eb82
PJ
276}
277
fbcac826 278static void cpu_record_clear(uint8_t filter)
e276eb82 279{
fbcac826 280 uint8_t *tmp = &filter;
d62a17ae 281 struct thread_master *m;
282 struct listnode *ln;
283
cb1991af 284 frr_with_mutex (&masters_mtx) {
d62a17ae 285 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 286 frr_with_mutex (&m->mtx) {
d62a17ae 287 void *args[2] = {tmp, m->cpu_record};
288 hash_iterate(
289 m->cpu_record,
e3b78da8 290 (void (*)(struct hash_bucket *,
d62a17ae 291 void *))cpu_record_hash_clear,
292 args);
293 }
d62a17ae 294 }
295 }
62f44022
QY
296}
297
fbcac826 298static uint8_t parse_filter(const char *filterstr)
62f44022 299{
d62a17ae 300 int i = 0;
301 int filter = 0;
302
303 while (filterstr[i] != '\0') {
304 switch (filterstr[i]) {
305 case 'r':
306 case 'R':
307 filter |= (1 << THREAD_READ);
308 break;
309 case 'w':
310 case 'W':
311 filter |= (1 << THREAD_WRITE);
312 break;
313 case 't':
314 case 'T':
315 filter |= (1 << THREAD_TIMER);
316 break;
317 case 'e':
318 case 'E':
319 filter |= (1 << THREAD_EVENT);
320 break;
321 case 'x':
322 case 'X':
323 filter |= (1 << THREAD_EXECUTE);
324 break;
325 default:
326 break;
327 }
328 ++i;
329 }
330 return filter;
62f44022
QY
331}
332
ee4dcee8
DL
333DEFUN_NOSH (show_thread_cpu,
334 show_thread_cpu_cmd,
335 "show thread cpu [FILTER]",
336 SHOW_STR
337 "Thread information\n"
338 "Thread CPU usage\n"
339 "Display filter (rwtex)\n")
62f44022 340{
fbcac826 341 uint8_t filter = (uint8_t)-1U;
d62a17ae 342 int idx = 0;
343
344 if (argv_find(argv, argc, "FILTER", &idx)) {
345 filter = parse_filter(argv[idx]->arg);
346 if (!filter) {
347 vty_out(vty,
3efd0893 348 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 349 argv[idx]->arg);
350 return CMD_WARNING;
351 }
352 }
353
354 cpu_record_print(vty, filter);
355 return CMD_SUCCESS;
e276eb82 356}
45f01188
DL
357
358DEFPY (service_cputime_stats,
359 service_cputime_stats_cmd,
360 "[no] service cputime-stats",
361 NO_STR
362 "Set up miscellaneous service\n"
363 "Collect CPU usage statistics\n")
364{
365 cputime_enabled = !no;
366 return CMD_SUCCESS;
367}
368
369DEFPY (service_cputime_warning,
370 service_cputime_warning_cmd,
371 "[no] service cputime-warning (1-4294967295)",
372 NO_STR
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n"
375 "Warning threshold in milliseconds\n")
376{
377 if (no)
378 cputime_threshold = 0;
379 else
380 cputime_threshold = cputime_warning * 1000;
381 return CMD_SUCCESS;
382}
383
384ALIAS (service_cputime_warning,
385 no_service_cputime_warning_cmd,
386 "no service cputime-warning",
387 NO_STR
388 "Set up miscellaneous service\n"
389 "Warn for tasks exceeding CPU usage threshold\n")
390
391DEFPY (service_walltime_warning,
392 service_walltime_warning_cmd,
393 "[no] service walltime-warning (1-4294967295)",
394 NO_STR
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n"
397 "Warning threshold in milliseconds\n")
398{
399 if (no)
400 walltime_threshold = 0;
401 else
402 walltime_threshold = walltime_warning * 1000;
403 return CMD_SUCCESS;
404}
405
406ALIAS (service_walltime_warning,
407 no_service_walltime_warning_cmd,
408 "no service walltime-warning",
409 NO_STR
410 "Set up miscellaneous service\n"
411 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 412
8872626b
DS
413static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
414{
415 const char *name = m->name ? m->name : "main";
416 char underline[strlen(name) + 1];
a0b36ae6 417 struct thread *thread;
8872626b
DS
418 uint32_t i;
419
420 memset(underline, '-', sizeof(underline));
421 underline[sizeof(underline) - 1] = '\0';
422
423 vty_out(vty, "\nShowing poll FD's for %s\n", name);
424 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
425 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
426 m->fd_limit);
a0b36ae6
DS
427 for (i = 0; i < m->handler.pfdcount; i++) {
428 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
429 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 430 m->handler.pfds[i].revents);
a0b36ae6
DS
431
432 if (m->handler.pfds[i].events & POLLIN) {
433 thread = m->read[m->handler.pfds[i].fd];
434
435 if (!thread)
436 vty_out(vty, "ERROR ");
437 else
60a3efec 438 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
439 } else
440 vty_out(vty, " ");
441
442 if (m->handler.pfds[i].events & POLLOUT) {
443 thread = m->write[m->handler.pfds[i].fd];
444
445 if (!thread)
446 vty_out(vty, "ERROR\n");
447 else
60a3efec 448 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
449 } else
450 vty_out(vty, "\n");
451 }
8872626b
DS
452}
453
ee4dcee8
DL
454DEFUN_NOSH (show_thread_poll,
455 show_thread_poll_cmd,
456 "show thread poll",
457 SHOW_STR
458 "Thread information\n"
459 "Show poll FD's and information\n")
8872626b
DS
460{
461 struct listnode *node;
462 struct thread_master *m;
463
cb1991af 464 frr_with_mutex (&masters_mtx) {
8872626b
DS
465 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
466 show_thread_poll_helper(vty, m);
467 }
468 }
8872626b
DS
469
470 return CMD_SUCCESS;
471}
472
473
49d41a26
DS
474DEFUN (clear_thread_cpu,
475 clear_thread_cpu_cmd,
476 "clear thread cpu [FILTER]",
62f44022 477 "Clear stored data in all pthreads\n"
49d41a26
DS
478 "Thread information\n"
479 "Thread CPU usage\n"
480 "Display filter (rwtexb)\n")
e276eb82 481{
fbcac826 482 uint8_t filter = (uint8_t)-1U;
d62a17ae 483 int idx = 0;
484
485 if (argv_find(argv, argc, "FILTER", &idx)) {
486 filter = parse_filter(argv[idx]->arg);
487 if (!filter) {
488 vty_out(vty,
3efd0893 489 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 490 argv[idx]->arg);
491 return CMD_WARNING;
492 }
493 }
494
495 cpu_record_clear(filter);
496 return CMD_SUCCESS;
e276eb82 497}
6b0655a2 498
22f31b8c
DS
499static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
500{
501 const char *name = m->name ? m->name : "main";
502 char underline[strlen(name) + 1];
503 struct thread *thread;
504
505 memset(underline, '-', sizeof(underline));
506 underline[sizeof(underline) - 1] = '\0';
507
508 vty_out(vty, "\nShowing timers for %s\n", name);
509 vty_out(vty, "-------------------%s\n", underline);
510
511 frr_each (thread_timer_list, &m->timer, thread) {
512 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
513 }
514}
515
516DEFPY_NOSH (show_thread_timers,
517 show_thread_timers_cmd,
518 "show thread timers",
519 SHOW_STR
520 "Thread information\n"
521 "Show all timers and how long they have in the system\n")
522{
523 struct listnode *node;
524 struct thread_master *m;
525
526 frr_with_mutex (&masters_mtx) {
527 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
528 show_thread_timers_helper(vty, m);
529 }
530
531 return CMD_SUCCESS;
532}
533
d62a17ae 534void thread_cmd_init(void)
0b84f294 535{
d62a17ae 536 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 537 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 538 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
539
540 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
541 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
542 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
543 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
544 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
545
546 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 547}
62f44022
QY
548/* CLI end ------------------------------------------------------------------ */
549
0b84f294 550
d62a17ae 551static void cancelreq_del(void *cr)
63ccb9cb 552{
d62a17ae 553 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
554}
555
e0bebc7c 556/* initializer, only ever called once */
4d762f26 557static void initializer(void)
e0bebc7c 558{
d62a17ae 559 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
560}
561
d62a17ae 562struct thread_master *thread_master_create(const char *name)
718e3744 563{
d62a17ae 564 struct thread_master *rv;
565 struct rlimit limit;
566
567 pthread_once(&init_once, &initializer);
568
569 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 570
571 /* Initialize master mutex */
572 pthread_mutex_init(&rv->mtx, NULL);
573 pthread_cond_init(&rv->cancel_cond, NULL);
574
575 /* Set name */
7ffcd8bd
QY
576 name = name ? name : "default";
577 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 578
579 /* Initialize I/O task data structures */
1a9f340b
MS
580
581 /* Use configured limit if present, ulimit otherwise. */
582 rv->fd_limit = frr_get_fd_limit();
583 if (rv->fd_limit == 0) {
584 getrlimit(RLIMIT_NOFILE, &limit);
585 rv->fd_limit = (int)limit.rlim_cur;
586 }
587
a6f235f3
DS
588 rv->read = XCALLOC(MTYPE_THREAD_POLL,
589 sizeof(struct thread *) * rv->fd_limit);
590
591 rv->write = XCALLOC(MTYPE_THREAD_POLL,
592 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 593
7ffcd8bd
QY
594 char tmhashname[strlen(name) + 32];
595 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
596 name);
bd74dc61 597 rv->cpu_record = hash_create_size(
d8b87afe 598 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 599 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 600 tmhashname);
d62a17ae 601
c284542b
DL
602 thread_list_init(&rv->event);
603 thread_list_init(&rv->ready);
604 thread_list_init(&rv->unuse);
27d29ced 605 thread_timer_list_init(&rv->timer);
d62a17ae 606
607 /* Initialize thread_fetch() settings */
608 rv->spin = true;
609 rv->handle_signals = true;
610
611 /* Set pthread owner, should be updated by actual owner */
612 rv->owner = pthread_self();
613 rv->cancel_req = list_new();
614 rv->cancel_req->del = cancelreq_del;
615 rv->canceled = true;
616
617 /* Initialize pipe poker */
618 pipe(rv->io_pipe);
619 set_nonblocking(rv->io_pipe[0]);
620 set_nonblocking(rv->io_pipe[1]);
621
622 /* Initialize data structures for poll() */
623 rv->handler.pfdsize = rv->fd_limit;
624 rv->handler.pfdcount = 0;
625 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
626 sizeof(struct pollfd) * rv->handler.pfdsize);
627 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
628 sizeof(struct pollfd) * rv->handler.pfdsize);
629
eff09c66 630 /* add to list of threadmasters */
cb1991af 631 frr_with_mutex (&masters_mtx) {
eff09c66
QY
632 if (!masters)
633 masters = list_new();
634
d62a17ae 635 listnode_add(masters, rv);
636 }
d62a17ae 637
638 return rv;
718e3744 639}
640
d8a8a8de
QY
641void thread_master_set_name(struct thread_master *master, const char *name)
642{
cb1991af 643 frr_with_mutex (&master->mtx) {
0a22ddfb 644 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
645 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
646 }
d8a8a8de
QY
647}
648
6ed04aa2
DS
649#define THREAD_UNUSED_DEPTH 10
650
718e3744 651/* Move thread to unuse list. */
d62a17ae 652static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 653{
6655966d
RZ
654 pthread_mutex_t mtxc = thread->mtx;
655
d62a17ae 656 assert(m != NULL && thread != NULL);
d62a17ae 657
d62a17ae 658 thread->hist->total_active--;
6ed04aa2
DS
659 memset(thread, 0, sizeof(struct thread));
660 thread->type = THREAD_UNUSED;
661
6655966d
RZ
662 /* Restore the thread mutex context. */
663 thread->mtx = mtxc;
664
c284542b
DL
665 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
666 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
667 return;
668 }
669
670 thread_free(m, thread);
718e3744 671}
672
673/* Free all unused thread. */
c284542b
DL
674static void thread_list_free(struct thread_master *m,
675 struct thread_list_head *list)
718e3744 676{
d62a17ae 677 struct thread *t;
d62a17ae 678
c284542b 679 while ((t = thread_list_pop(list)))
6655966d 680 thread_free(m, t);
718e3744 681}
682
d62a17ae 683static void thread_array_free(struct thread_master *m,
684 struct thread **thread_array)
308d14ae 685{
d62a17ae 686 struct thread *t;
687 int index;
688
689 for (index = 0; index < m->fd_limit; ++index) {
690 t = thread_array[index];
691 if (t) {
692 thread_array[index] = NULL;
6655966d 693 thread_free(m, t);
d62a17ae 694 }
695 }
a6f235f3 696 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
697}
698
495f0b13
DS
699/*
700 * thread_master_free_unused
701 *
702 * As threads are finished with they are put on the
703 * unuse list for later reuse.
704 * If we are shutting down, Free up unused threads
705 * So we can see if we forget to shut anything off
706 */
d62a17ae 707void thread_master_free_unused(struct thread_master *m)
495f0b13 708{
cb1991af 709 frr_with_mutex (&m->mtx) {
d62a17ae 710 struct thread *t;
c284542b 711 while ((t = thread_list_pop(&m->unuse)))
6655966d 712 thread_free(m, t);
d62a17ae 713 }
495f0b13
DS
714}
715
718e3744 716/* Stop thread scheduler. */
d62a17ae 717void thread_master_free(struct thread_master *m)
718e3744 718{
27d29ced
DL
719 struct thread *t;
720
cb1991af 721 frr_with_mutex (&masters_mtx) {
d62a17ae 722 listnode_delete(masters, m);
eff09c66 723 if (masters->count == 0) {
6a154c88 724 list_delete(&masters);
eff09c66 725 }
d62a17ae 726 }
d62a17ae 727
728 thread_array_free(m, m->read);
729 thread_array_free(m, m->write);
27d29ced
DL
730 while ((t = thread_timer_list_pop(&m->timer)))
731 thread_free(m, t);
d62a17ae 732 thread_list_free(m, &m->event);
733 thread_list_free(m, &m->ready);
734 thread_list_free(m, &m->unuse);
735 pthread_mutex_destroy(&m->mtx);
33844bbe 736 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 737 close(m->io_pipe[0]);
738 close(m->io_pipe[1]);
6a154c88 739 list_delete(&m->cancel_req);
1a0a92ea 740 m->cancel_req = NULL;
d62a17ae 741
742 hash_clean(m->cpu_record, cpu_record_hash_free);
743 hash_free(m->cpu_record);
744 m->cpu_record = NULL;
745
0a22ddfb 746 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 747 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
748 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
749 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 750}
751
94202742 752/* Return remain time in milliseconds. */
78ca0342 753unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 754{
d62a17ae 755 int64_t remain;
1189d95f 756
13bcc010
DA
757 if (!thread_is_scheduled(thread))
758 return 0;
759
cb1991af 760 frr_with_mutex (&thread->mtx) {
78ca0342 761 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 762 }
1189d95f 763
d62a17ae 764 return remain < 0 ? 0 : remain;
718e3744 765}
766
78ca0342
CF
767/* Return remain time in seconds. */
768unsigned long thread_timer_remain_second(struct thread *thread)
769{
770 return thread_timer_remain_msec(thread) / 1000LL;
771}
772
d62a17ae 773struct timeval thread_timer_remain(struct thread *thread)
6ac44687 774{
d62a17ae 775 struct timeval remain;
cb1991af 776 frr_with_mutex (&thread->mtx) {
d62a17ae 777 monotime_until(&thread->u.sands, &remain);
778 }
d62a17ae 779 return remain;
6ac44687
CF
780}
781
0447957e
AK
782static int time_hhmmss(char *buf, int buf_size, long sec)
783{
784 long hh;
785 long mm;
786 int wr;
787
642ac49d 788 assert(buf_size >= 8);
0447957e
AK
789
790 hh = sec / 3600;
791 sec %= 3600;
792 mm = sec / 60;
793 sec %= 60;
794
795 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
796
797 return wr != 8;
798}
799
800char *thread_timer_to_hhmmss(char *buf, int buf_size,
801 struct thread *t_timer)
802{
803 if (t_timer) {
804 time_hhmmss(buf, buf_size,
805 thread_timer_remain_second(t_timer));
806 } else {
807 snprintf(buf, buf_size, "--:--:--");
808 }
809 return buf;
810}
811
718e3744 812/* Get new thread. */
d7c0a89a 813static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 814 void (*func)(struct thread *), void *arg,
60a3efec 815 const struct xref_threadsched *xref)
718e3744 816{
c284542b 817 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 818 struct cpu_thread_history tmp;
819
820 if (!thread) {
821 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
822 /* mutex only needs to be initialized at struct creation. */
823 pthread_mutex_init(&thread->mtx, NULL);
824 m->alloc++;
825 }
826
827 thread->type = type;
828 thread->add_type = type;
829 thread->master = m;
830 thread->arg = arg;
d62a17ae 831 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
832 thread->ref = NULL;
e8b3a2f7 833 thread->ignore_timer_late = false;
d62a17ae 834
835 /*
836 * So if the passed in funcname is not what we have
837 * stored that means the thread->hist needs to be
838 * updated. We keep the last one around in unused
839 * under the assumption that we are probably
840 * going to immediately allocate the same
841 * type of thread.
842 * This hopefully saves us some serious
843 * hash_get lookups.
844 */
60a3efec
DL
845 if ((thread->xref && thread->xref->funcname != xref->funcname)
846 || thread->func != func) {
d62a17ae 847 tmp.func = func;
60a3efec 848 tmp.funcname = xref->funcname;
d62a17ae 849 thread->hist =
850 hash_get(m->cpu_record, &tmp,
851 (void *(*)(void *))cpu_record_hash_alloc);
852 }
853 thread->hist->total_active++;
854 thread->func = func;
60a3efec 855 thread->xref = xref;
d62a17ae 856
857 return thread;
718e3744 858}
859
6655966d
RZ
860static void thread_free(struct thread_master *master, struct thread *thread)
861{
862 /* Update statistics. */
863 assert(master->alloc > 0);
864 master->alloc--;
865
866 /* Free allocated resources. */
867 pthread_mutex_destroy(&thread->mtx);
868 XFREE(MTYPE_THREAD, thread);
869}
870
d81ca9a3
MS
871static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
872 bool *eintr_p)
209a72a6 873{
d81ca9a3
MS
874 sigset_t origsigs;
875 unsigned char trash[64];
876 nfds_t count = m->handler.copycount;
877
d279ef57
DS
878 /*
879 * If timer_wait is null here, that means poll() should block
880 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 881 * ->selectpoll_timeout.
d279ef57 882 *
d62a17ae 883 * If the value is positive, it specifies the maximum number of
d279ef57
DS
884 * milliseconds to wait. If the timeout is -1, it specifies that
885 * we should never wait and always return immediately even if no
886 * event is detected. If the value is zero, the behavior is default.
887 */
d62a17ae 888 int timeout = -1;
889
890 /* number of file descriptors with events */
891 int num;
892
893 if (timer_wait != NULL
894 && m->selectpoll_timeout == 0) // use the default value
895 timeout = (timer_wait->tv_sec * 1000)
896 + (timer_wait->tv_usec / 1000);
897 else if (m->selectpoll_timeout > 0) // use the user's timeout
898 timeout = m->selectpoll_timeout;
899 else if (m->selectpoll_timeout
900 < 0) // effect a poll (return immediately)
901 timeout = 0;
902
0bdeb5e5 903 zlog_tls_buffer_flush();
3e41733f
DL
904 rcu_read_unlock();
905 rcu_assert_read_unlocked();
906
d62a17ae 907 /* add poll pipe poker */
d81ca9a3
MS
908 assert(count + 1 < m->handler.pfdsize);
909 m->handler.copy[count].fd = m->io_pipe[0];
910 m->handler.copy[count].events = POLLIN;
911 m->handler.copy[count].revents = 0x00;
912
913 /* We need to deal with a signal-handling race here: we
914 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
915 * that may arrive just before we enter poll(). We will block the
916 * key signals, then check whether any have arrived - if so, we return
917 * before calling poll(). If not, we'll re-enable the signals
918 * in the ppoll() call.
919 */
920
921 sigemptyset(&origsigs);
922 if (m->handle_signals) {
923 /* Main pthread that handles the app signals */
924 if (frr_sigevent_check(&origsigs)) {
925 /* Signal to process - restore signal mask and return */
926 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
927 num = -1;
928 *eintr_p = true;
929 goto done;
930 }
931 } else {
932 /* Don't make any changes for the non-main pthreads */
933 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
934 }
d62a17ae 935
d81ca9a3
MS
936#if defined(HAVE_PPOLL)
937 struct timespec ts, *tsp;
938
939 if (timeout >= 0) {
940 ts.tv_sec = timeout / 1000;
941 ts.tv_nsec = (timeout % 1000) * 1000000;
942 tsp = &ts;
943 } else
944 tsp = NULL;
945
946 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
947 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
948#else
949 /* Not ideal - there is a race after we restore the signal mask */
950 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
951 num = poll(m->handler.copy, count + 1, timeout);
952#endif
d62a17ae 953
d81ca9a3
MS
954done:
955
956 if (num < 0 && errno == EINTR)
957 *eintr_p = true;
958
959 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 960 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
961 ;
962
3e41733f
DL
963 rcu_read_lock();
964
d62a17ae 965 return num;
209a72a6
DS
966}
967
718e3744 968/* Add new read thread. */
ee1455dd
IR
969void _thread_add_read_write(const struct xref_threadsched *xref,
970 struct thread_master *m,
cc9f21da 971 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 972 struct thread **t_ptr)
718e3744 973{
60a3efec 974 int dir = xref->thread_type;
d62a17ae 975 struct thread *thread = NULL;
1ef14bee 976 struct thread **thread_array;
d62a17ae 977
abf96a87 978 if (dir == THREAD_READ)
6c3aa850
DL
979 frrtrace(9, frr_libfrr, schedule_read, m,
980 xref->funcname, xref->xref.file, xref->xref.line,
981 t_ptr, fd, 0, arg, 0);
abf96a87 982 else
6c3aa850
DL
983 frrtrace(9, frr_libfrr, schedule_write, m,
984 xref->funcname, xref->xref.file, xref->xref.line,
985 t_ptr, fd, 0, arg, 0);
abf96a87 986
188acbb9
DS
987 assert(fd >= 0);
988 if (fd >= m->fd_limit)
989 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
990
cb1991af 991 frr_with_mutex (&m->mtx) {
00dffa8c
DL
992 if (t_ptr && *t_ptr)
993 // thread is already scheduled; don't reschedule
994 break;
d62a17ae 995
996 /* default to a new pollfd */
997 nfds_t queuepos = m->handler.pfdcount;
998
1ef14bee
DS
999 if (dir == THREAD_READ)
1000 thread_array = m->read;
1001 else
1002 thread_array = m->write;
1003
d62a17ae 1004 /* if we already have a pollfd for our file descriptor, find and
1005 * use it */
1006 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
1007 if (m->handler.pfds[i].fd == fd) {
1008 queuepos = i;
1ef14bee
DS
1009
1010#ifdef DEV_BUILD
1011 /*
1012 * What happens if we have a thread already
1013 * created for this event?
1014 */
1015 if (thread_array[fd])
1016 assert(!"Thread already scheduled for file descriptor");
1017#endif
d62a17ae 1018 break;
1019 }
1020
1021 /* make sure we have room for this fd + pipe poker fd */
1022 assert(queuepos + 1 < m->handler.pfdsize);
1023
60a3efec 1024 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1025
1026 m->handler.pfds[queuepos].fd = fd;
1027 m->handler.pfds[queuepos].events |=
1028 (dir == THREAD_READ ? POLLIN : POLLOUT);
1029
1030 if (queuepos == m->handler.pfdcount)
1031 m->handler.pfdcount++;
1032
1033 if (thread) {
cb1991af 1034 frr_with_mutex (&thread->mtx) {
d62a17ae 1035 thread->u.fd = fd;
1ef14bee 1036 thread_array[thread->u.fd] = thread;
d62a17ae 1037 }
d62a17ae 1038
1039 if (t_ptr) {
1040 *t_ptr = thread;
1041 thread->ref = t_ptr;
1042 }
1043 }
1044
1045 AWAKEN(m);
1046 }
718e3744 1047}
1048
ee1455dd
IR
1049static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1050 struct thread_master *m,
cc9f21da 1051 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1052 struct timeval *time_relative,
1053 struct thread **t_ptr)
718e3744 1054{
d62a17ae 1055 struct thread *thread;
96fe578a 1056 struct timeval t;
d62a17ae 1057
1058 assert(m != NULL);
1059
d62a17ae 1060 assert(time_relative);
1061
6c3aa850
DL
1062 frrtrace(9, frr_libfrr, schedule_timer, m,
1063 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1064 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1065
96fe578a
MS
1066 /* Compute expiration/deadline time. */
1067 monotime(&t);
1068 timeradd(&t, time_relative, &t);
1069
cb1991af 1070 frr_with_mutex (&m->mtx) {
00dffa8c 1071 if (t_ptr && *t_ptr)
d279ef57 1072 /* thread is already scheduled; don't reschedule */
ee1455dd 1073 return;
d62a17ae 1074
4322dea7 1075 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1076
cb1991af 1077 frr_with_mutex (&thread->mtx) {
96fe578a 1078 thread->u.sands = t;
27d29ced 1079 thread_timer_list_add(&m->timer, thread);
d62a17ae 1080 if (t_ptr) {
1081 *t_ptr = thread;
1082 thread->ref = t_ptr;
1083 }
1084 }
d62a17ae 1085
96fe578a
MS
1086 /* The timer list is sorted - if this new timer
1087 * might change the time we'll wait for, give the pthread
1088 * a chance to re-compute.
1089 */
1090 if (thread_timer_list_first(&m->timer) == thread)
1091 AWAKEN(m);
d62a17ae 1092 }
e2eff5c3
DS
1093#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1094 if (time_relative->tv_sec > ONEYEAR2SEC)
1095 flog_err(
1096 EC_LIB_TIMER_TOO_LONG,
1097 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1098 thread);
9e867fe6 1099}
1100
98c91ac6 1101
1102/* Add timer event thread. */
ee1455dd 1103void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1104 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1105 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1106{
d62a17ae 1107 struct timeval trel;
9e867fe6 1108
d62a17ae 1109 assert(m != NULL);
9e867fe6 1110
d62a17ae 1111 trel.tv_sec = timer;
1112 trel.tv_usec = 0;
9e867fe6 1113
ee1455dd 1114 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1115}
9e867fe6 1116
98c91ac6 1117/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1118void _thread_add_timer_msec(const struct xref_threadsched *xref,
1119 struct thread_master *m,
cc9f21da
DS
1120 void (*func)(struct thread *), void *arg,
1121 long timer, struct thread **t_ptr)
98c91ac6 1122{
d62a17ae 1123 struct timeval trel;
9e867fe6 1124
d62a17ae 1125 assert(m != NULL);
718e3744 1126
d62a17ae 1127 trel.tv_sec = timer / 1000;
1128 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1129
ee1455dd 1130 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1131}
1132
4322dea7 1133/* Add timer event thread with "timeval" resolution */
ee1455dd 1134void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1135 struct thread_master *m,
1136 void (*func)(struct thread *), void *arg,
1137 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1138{
ee1455dd 1139 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1140}
1141
718e3744 1142/* Add simple event thread. */
ee1455dd 1143void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1144 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1145 void *arg, int val, struct thread **t_ptr)
718e3744 1146{
00dffa8c 1147 struct thread *thread = NULL;
d62a17ae 1148
6c3aa850
DL
1149 frrtrace(9, frr_libfrr, schedule_event, m,
1150 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1151 t_ptr, 0, val, arg, 0);
abf96a87 1152
d62a17ae 1153 assert(m != NULL);
1154
cb1991af 1155 frr_with_mutex (&m->mtx) {
00dffa8c 1156 if (t_ptr && *t_ptr)
d279ef57 1157 /* thread is already scheduled; don't reschedule */
00dffa8c 1158 break;
d62a17ae 1159
60a3efec 1160 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
cb1991af 1161 frr_with_mutex (&thread->mtx) {
d62a17ae 1162 thread->u.val = val;
c284542b 1163 thread_list_add_tail(&m->event, thread);
d62a17ae 1164 }
d62a17ae 1165
1166 if (t_ptr) {
1167 *t_ptr = thread;
1168 thread->ref = t_ptr;
1169 }
1170
1171 AWAKEN(m);
1172 }
718e3744 1173}
1174
63ccb9cb
QY
1175/* Thread cancellation ------------------------------------------------------ */
1176
8797240e
QY
1177/**
1178 * NOT's out the .events field of pollfd corresponding to the given file
1179 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1180 *
1181 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1182 * implementation for details.
1183 *
1184 * @param master
1185 * @param fd
1186 * @param state the event to cancel. One or more (OR'd together) of the
1187 * following:
1188 * - POLLIN
1189 * - POLLOUT
1190 */
a9318a32
MS
1191static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1192 int idx_hint)
0a95a0d0 1193{
42d74538
QY
1194 bool found = false;
1195
d62a17ae 1196 /* find the index of corresponding pollfd */
1197 nfds_t i;
1198
a9318a32
MS
1199 /* Cancel POLLHUP too just in case some bozo set it */
1200 state |= POLLHUP;
1201
1202 /* Some callers know the index of the pfd already */
1203 if (idx_hint >= 0) {
1204 i = idx_hint;
1205 found = true;
1206 } else {
1207 /* Have to look for the fd in the pfd array */
1208 for (i = 0; i < master->handler.pfdcount; i++)
1209 if (master->handler.pfds[i].fd == fd) {
1210 found = true;
1211 break;
1212 }
1213 }
42d74538
QY
1214
1215 if (!found) {
1216 zlog_debug(
1217 "[!] Received cancellation request for nonexistent rw job");
1218 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1219 master->name ? master->name : "", fd);
42d74538
QY
1220 return;
1221 }
d62a17ae 1222
1223 /* NOT out event. */
1224 master->handler.pfds[i].events &= ~(state);
1225
1226 /* If all events are canceled, delete / resize the pollfd array. */
1227 if (master->handler.pfds[i].events == 0) {
1228 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1229 (master->handler.pfdcount - i - 1)
1230 * sizeof(struct pollfd));
1231 master->handler.pfdcount--;
e985cda0
S
1232 master->handler.pfds[master->handler.pfdcount].fd = 0;
1233 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1234 }
1235
1236 /* If we have the same pollfd in the copy, perform the same operations,
1237 * otherwise return. */
1238 if (i >= master->handler.copycount)
1239 return;
1240
1241 master->handler.copy[i].events &= ~(state);
1242
1243 if (master->handler.copy[i].events == 0) {
1244 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1245 (master->handler.copycount - i - 1)
1246 * sizeof(struct pollfd));
1247 master->handler.copycount--;
e985cda0
S
1248 master->handler.copy[master->handler.copycount].fd = 0;
1249 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1250 }
0a95a0d0
DS
1251}
1252
a9318a32
MS
1253/*
1254 * Process task cancellation given a task argument: iterate through the
1255 * various lists of tasks, looking for any that match the argument.
1256 */
1257static void cancel_arg_helper(struct thread_master *master,
1258 const struct cancel_req *cr)
1259{
1260 struct thread *t;
1261 nfds_t i;
1262 int fd;
1263 struct pollfd *pfd;
1264
1265 /* We're only processing arg-based cancellations here. */
1266 if (cr->eventobj == NULL)
1267 return;
1268
1269 /* First process the ready lists. */
1270 frr_each_safe(thread_list, &master->event, t) {
1271 if (t->arg != cr->eventobj)
1272 continue;
1273 thread_list_del(&master->event, t);
1274 if (t->ref)
1275 *t->ref = NULL;
1276 thread_add_unuse(master, t);
1277 }
1278
1279 frr_each_safe(thread_list, &master->ready, t) {
1280 if (t->arg != cr->eventobj)
1281 continue;
1282 thread_list_del(&master->ready, t);
1283 if (t->ref)
1284 *t->ref = NULL;
1285 thread_add_unuse(master, t);
1286 }
1287
1288 /* If requested, stop here and ignore io and timers */
1289 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1290 return;
1291
1292 /* Check the io tasks */
1293 for (i = 0; i < master->handler.pfdcount;) {
1294 pfd = master->handler.pfds + i;
1295
1296 if (pfd->events & POLLIN)
1297 t = master->read[pfd->fd];
1298 else
1299 t = master->write[pfd->fd];
1300
1301 if (t && t->arg == cr->eventobj) {
1302 fd = pfd->fd;
1303
1304 /* Found a match to cancel: clean up fd arrays */
1305 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1306
1307 /* Clean up thread arrays */
1308 master->read[fd] = NULL;
1309 master->write[fd] = NULL;
1310
1311 /* Clear caller's ref */
1312 if (t->ref)
1313 *t->ref = NULL;
1314
1315 thread_add_unuse(master, t);
1316
1317 /* Don't increment 'i' since the cancellation will have
1318 * removed the entry from the pfd array
1319 */
1320 } else
1321 i++;
1322 }
1323
1324 /* Check the timer tasks */
1325 t = thread_timer_list_first(&master->timer);
1326 while (t) {
1327 struct thread *t_next;
1328
1329 t_next = thread_timer_list_next(&master->timer, t);
1330
1331 if (t->arg == cr->eventobj) {
1332 thread_timer_list_del(&master->timer, t);
1333 if (t->ref)
1334 *t->ref = NULL;
1335 thread_add_unuse(master, t);
1336 }
1337
1338 t = t_next;
1339 }
1340}
1341
1189d95f 1342/**
63ccb9cb 1343 * Process cancellation requests.
1189d95f 1344 *
63ccb9cb
QY
1345 * This may only be run from the pthread which owns the thread_master.
1346 *
1347 * @param master the thread master to process
1348 * @REQUIRE master->mtx
1189d95f 1349 */
d62a17ae 1350static void do_thread_cancel(struct thread_master *master)
718e3744 1351{
c284542b 1352 struct thread_list_head *list = NULL;
d62a17ae 1353 struct thread **thread_array = NULL;
1354 struct thread *thread;
d62a17ae 1355 struct cancel_req *cr;
1356 struct listnode *ln;
7e93a54c 1357
d62a17ae 1358 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1359 /*
a9318a32
MS
1360 * If this is an event object cancellation, search
1361 * through task lists deleting any tasks which have the
1362 * specified argument - use this handy helper function.
d279ef57 1363 */
d62a17ae 1364 if (cr->eventobj) {
a9318a32 1365 cancel_arg_helper(master, cr);
d62a17ae 1366 continue;
1367 }
1368
d279ef57
DS
1369 /*
1370 * The pointer varies depending on whether the cancellation
1371 * request was made asynchronously or not. If it was, we
1372 * need to check whether the thread even exists anymore
1373 * before cancelling it.
1374 */
d62a17ae 1375 thread = (cr->thread) ? cr->thread : *cr->threadref;
1376
1377 if (!thread)
1378 continue;
1379
7e93a54c
MS
1380 list = NULL;
1381 thread_array = NULL;
1382
d62a17ae 1383 /* Determine the appropriate queue to cancel the thread from */
1384 switch (thread->type) {
1385 case THREAD_READ:
a9318a32 1386 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1387 thread_array = master->read;
1388 break;
1389 case THREAD_WRITE:
a9318a32 1390 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1391 thread_array = master->write;
1392 break;
1393 case THREAD_TIMER:
27d29ced 1394 thread_timer_list_del(&master->timer, thread);
d62a17ae 1395 break;
1396 case THREAD_EVENT:
1397 list = &master->event;
1398 break;
1399 case THREAD_READY:
1400 list = &master->ready;
1401 break;
1402 default:
1403 continue;
1404 break;
1405 }
1406
27d29ced 1407 if (list) {
c284542b 1408 thread_list_del(list, thread);
d62a17ae 1409 } else if (thread_array) {
1410 thread_array[thread->u.fd] = NULL;
d62a17ae 1411 }
1412
1413 if (thread->ref)
1414 *thread->ref = NULL;
1415
1416 thread_add_unuse(thread->master, thread);
1417 }
1418
1419 /* Delete and free all cancellation requests */
41b21bfa
MS
1420 if (master->cancel_req)
1421 list_delete_all_node(master->cancel_req);
d62a17ae 1422
1423 /* Wake up any threads which may be blocked in thread_cancel_async() */
1424 master->canceled = true;
1425 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1426}
1427
a9318a32
MS
1428/*
1429 * Helper function used for multiple flavors of arg-based cancellation.
1430 */
1431static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1432{
1433 struct cancel_req *cr;
1434
1435 assert(m->owner == pthread_self());
1436
1437 /* Only worth anything if caller supplies an arg. */
1438 if (arg == NULL)
1439 return;
1440
1441 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1442
1443 cr->flags = flags;
1444
cb1991af 1445 frr_with_mutex (&m->mtx) {
a9318a32
MS
1446 cr->eventobj = arg;
1447 listnode_add(m->cancel_req, cr);
1448 do_thread_cancel(m);
1449 }
1450}
1451
63ccb9cb
QY
1452/**
1453 * Cancel any events which have the specified argument.
1454 *
1455 * MT-Unsafe
1456 *
1457 * @param m the thread_master to cancel from
1458 * @param arg the argument passed when creating the event
1459 */
d62a17ae 1460void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1461{
a9318a32
MS
1462 cancel_event_helper(master, arg, 0);
1463}
d62a17ae 1464
a9318a32
MS
1465/*
1466 * Cancel ready tasks with an arg matching 'arg'
1467 *
1468 * MT-Unsafe
1469 *
1470 * @param m the thread_master to cancel from
1471 * @param arg the argument passed when creating the event
1472 */
1473void thread_cancel_event_ready(struct thread_master *m, void *arg)
1474{
1475
1476 /* Only cancel ready/event tasks */
1477 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1478}
1189d95f 1479
63ccb9cb
QY
1480/**
1481 * Cancel a specific task.
1482 *
1483 * MT-Unsafe
1484 *
1485 * @param thread task to cancel
1486 */
b3d6bc6e 1487void thread_cancel(struct thread **thread)
63ccb9cb 1488{
b3d6bc6e
MS
1489 struct thread_master *master;
1490
1491 if (thread == NULL || *thread == NULL)
1492 return;
1493
1494 master = (*thread)->master;
d62a17ae 1495
6c3aa850
DL
1496 frrtrace(9, frr_libfrr, thread_cancel, master,
1497 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1498 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1499 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1500
6ed04aa2
DS
1501 assert(master->owner == pthread_self());
1502
cb1991af 1503 frr_with_mutex (&master->mtx) {
d62a17ae 1504 struct cancel_req *cr =
1505 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1506 cr->thread = *thread;
6ed04aa2
DS
1507 listnode_add(master->cancel_req, cr);
1508 do_thread_cancel(master);
d62a17ae 1509 }
b3d6bc6e
MS
1510
1511 *thread = NULL;
63ccb9cb 1512}
1189d95f 1513
63ccb9cb
QY
1514/**
1515 * Asynchronous cancellation.
1516 *
8797240e
QY
1517 * Called with either a struct thread ** or void * to an event argument,
1518 * this function posts the correct cancellation request and blocks until it is
1519 * serviced.
63ccb9cb
QY
1520 *
1521 * If the thread is currently running, execution blocks until it completes.
1522 *
8797240e
QY
1523 * The last two parameters are mutually exclusive, i.e. if you pass one the
1524 * other must be NULL.
1525 *
1526 * When the cancellation procedure executes on the target thread_master, the
1527 * thread * provided is checked for nullity. If it is null, the thread is
1528 * assumed to no longer exist and the cancellation request is a no-op. Thus
1529 * users of this API must pass a back-reference when scheduling the original
1530 * task.
1531 *
63ccb9cb
QY
1532 * MT-Safe
1533 *
8797240e
QY
1534 * @param master the thread master with the relevant event / task
1535 * @param thread pointer to thread to cancel
1536 * @param eventobj the event
63ccb9cb 1537 */
d62a17ae 1538void thread_cancel_async(struct thread_master *master, struct thread **thread,
1539 void *eventobj)
63ccb9cb 1540{
d62a17ae 1541 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1542
1543 if (thread && *thread)
c7bb4f00 1544 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1545 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1546 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1547 (*thread)->u.val, (*thread)->arg,
1548 (*thread)->u.sands.tv_sec);
abf96a87 1549 else
c7bb4f00
QY
1550 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1551 0, NULL, 0, 0, eventobj, 0);
abf96a87 1552
d62a17ae 1553 assert(master->owner != pthread_self());
1554
cb1991af 1555 frr_with_mutex (&master->mtx) {
d62a17ae 1556 master->canceled = false;
1557
1558 if (thread) {
1559 struct cancel_req *cr =
1560 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1561 cr->threadref = thread;
1562 listnode_add(master->cancel_req, cr);
1563 } else if (eventobj) {
1564 struct cancel_req *cr =
1565 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1566 cr->eventobj = eventobj;
1567 listnode_add(master->cancel_req, cr);
1568 }
1569 AWAKEN(master);
1570
1571 while (!master->canceled)
1572 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1573 }
50478845
MS
1574
1575 if (thread)
1576 *thread = NULL;
718e3744 1577}
63ccb9cb 1578/* ------------------------------------------------------------------------- */
718e3744 1579
27d29ced 1580static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1581 struct timeval *timer_val)
718e3744 1582{
27d29ced
DL
1583 if (!thread_timer_list_count(timers))
1584 return NULL;
1585
1586 struct thread *next_timer = thread_timer_list_first(timers);
1587 monotime_until(&next_timer->u.sands, timer_val);
1588 return timer_val;
718e3744 1589}
718e3744 1590
d62a17ae 1591static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1592 struct thread *fetch)
718e3744 1593{
d62a17ae 1594 *fetch = *thread;
1595 thread_add_unuse(m, thread);
1596 return fetch;
718e3744 1597}
1598
d62a17ae 1599static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1600 struct thread *thread, short state,
1601 short actual_state, int pos)
5d4ccd4e 1602{
d62a17ae 1603 struct thread **thread_array;
1604
45f3d590
DS
1605 /*
1606 * poll() clears the .events field, but the pollfd array we
1607 * pass to poll() is a copy of the one used to schedule threads.
1608 * We need to synchronize state between the two here by applying
1609 * the same changes poll() made on the copy of the "real" pollfd
1610 * array.
1611 *
1612 * This cleans up a possible infinite loop where we refuse
1613 * to respond to a poll event but poll is insistent that
1614 * we should.
1615 */
1616 m->handler.pfds[pos].events &= ~(state);
1617
1618 if (!thread) {
1619 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1620 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1621 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1622 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1623 return 0;
45f3d590 1624 }
d62a17ae 1625
1626 if (thread->type == THREAD_READ)
1627 thread_array = m->read;
1628 else
1629 thread_array = m->write;
1630
1631 thread_array[thread->u.fd] = NULL;
c284542b 1632 thread_list_add_tail(&m->ready, thread);
d62a17ae 1633 thread->type = THREAD_READY;
45f3d590 1634
d62a17ae 1635 return 1;
5d4ccd4e
DS
1636}
1637
8797240e
QY
1638/**
1639 * Process I/O events.
1640 *
1641 * Walks through file descriptor array looking for those pollfds whose .revents
1642 * field has something interesting. Deletes any invalid file descriptors.
1643 *
1644 * @param m the thread master
1645 * @param num the number of active file descriptors (return value of poll())
1646 */
d62a17ae 1647static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1648{
d62a17ae 1649 unsigned int ready = 0;
1650 struct pollfd *pfds = m->handler.copy;
1651
1652 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1653 /* no event for current fd? immediately continue */
1654 if (pfds[i].revents == 0)
1655 continue;
1656
1657 ready++;
1658
d279ef57
DS
1659 /*
1660 * Unless someone has called thread_cancel from another
1661 * pthread, the only thing that could have changed in
1662 * m->handler.pfds while we were asleep is the .events
1663 * field in a given pollfd. Barring thread_cancel() that
1664 * value should be a superset of the values we have in our
1665 * copy, so there's no need to update it. Similarily,
1666 * barring deletion, the fd should still be a valid index
1667 * into the master's pfds.
d142453d
DS
1668 *
1669 * We are including POLLERR here to do a READ event
1670 * this is because the read should fail and the
1671 * read function should handle it appropriately
d279ef57 1672 */
d142453d 1673 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1674 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1675 pfds[i].revents, i);
1676 }
d62a17ae 1677 if (pfds[i].revents & POLLOUT)
1678 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1679 POLLOUT, pfds[i].revents, i);
d62a17ae 1680
1681 /* if one of our file descriptors is garbage, remove the same
1682 * from
1683 * both pfds + update sizes and index */
1684 if (pfds[i].revents & POLLNVAL) {
1685 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1686 (m->handler.pfdcount - i - 1)
1687 * sizeof(struct pollfd));
1688 m->handler.pfdcount--;
e985cda0
S
1689 m->handler.pfds[m->handler.pfdcount].fd = 0;
1690 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1691
1692 memmove(pfds + i, pfds + i + 1,
1693 (m->handler.copycount - i - 1)
1694 * sizeof(struct pollfd));
1695 m->handler.copycount--;
e985cda0
S
1696 m->handler.copy[m->handler.copycount].fd = 0;
1697 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1698
1699 i--;
1700 }
1701 }
718e3744 1702}
1703
8b70d0b0 1704/* Add all timers that have popped to the ready list. */
e7d9e44b 1705static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1706 struct timeval *timenow)
a48b4e6d 1707{
ab01a001
DS
1708 struct timeval prev = *timenow;
1709 bool displayed = false;
d62a17ae 1710 struct thread *thread;
1711 unsigned int ready = 0;
1712
e7d9e44b 1713 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1714 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1715 break;
ab01a001
DS
1716 prev = thread->u.sands;
1717 prev.tv_sec += 4;
1718 /*
1719 * If the timer would have popped 4 seconds in the
1720 * past then we are in a situation where we are
1721 * really getting behind on handling of events.
1722 * Let's log it and do the right thing with it.
1723 */
1dd08c22
DS
1724 if (timercmp(timenow, &prev, >)) {
1725 atomic_fetch_add_explicit(
1726 &thread->hist->total_starv_warn, 1,
1727 memory_order_seq_cst);
1728 if (!displayed && !thread->ignore_timer_late) {
1729 flog_warn(
1730 EC_LIB_STARVE_THREAD,
1731 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1732 thread);
1733 displayed = true;
1734 }
ab01a001
DS
1735 }
1736
e7d9e44b 1737 thread_timer_list_pop(&m->timer);
d62a17ae 1738 thread->type = THREAD_READY;
e7d9e44b 1739 thread_list_add_tail(&m->ready, thread);
d62a17ae 1740 ready++;
1741 }
e7d9e44b 1742
d62a17ae 1743 return ready;
a48b4e6d 1744}
1745
2613abe6 1746/* process a list en masse, e.g. for event thread lists */
c284542b 1747static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1748{
d62a17ae 1749 struct thread *thread;
d62a17ae 1750 unsigned int ready = 0;
1751
c284542b 1752 while ((thread = thread_list_pop(list))) {
d62a17ae 1753 thread->type = THREAD_READY;
c284542b 1754 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1755 ready++;
1756 }
1757 return ready;
2613abe6
PJ
1758}
1759
1760
718e3744 1761/* Fetch next ready thread. */
d62a17ae 1762struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1763{
d62a17ae 1764 struct thread *thread = NULL;
1765 struct timeval now;
1766 struct timeval zerotime = {0, 0};
1767 struct timeval tv;
1768 struct timeval *tw = NULL;
d81ca9a3 1769 bool eintr_p = false;
d62a17ae 1770 int num = 0;
1771
1772 do {
1773 /* Handle signals if any */
1774 if (m->handle_signals)
7cc91e67 1775 frr_sigevent_process();
d62a17ae 1776
1777 pthread_mutex_lock(&m->mtx);
1778
1779 /* Process any pending cancellation requests */
1780 do_thread_cancel(m);
1781
e3c9529e
QY
1782 /*
1783 * Attempt to flush ready queue before going into poll().
1784 * This is performance-critical. Think twice before modifying.
1785 */
c284542b 1786 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1787 fetch = thread_run(m, thread, fetch);
1788 if (fetch->ref)
1789 *fetch->ref = NULL;
1790 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1791 if (!m->ready_run_loop)
1792 GETRUSAGE(&m->last_getrusage);
1793 m->ready_run_loop = true;
e3c9529e
QY
1794 break;
1795 }
1796
5e822957 1797 m->ready_run_loop = false;
e3c9529e
QY
1798 /* otherwise, tick through scheduling sequence */
1799
bca37d17
QY
1800 /*
1801 * Post events to ready queue. This must come before the
1802 * following block since events should occur immediately
1803 */
d62a17ae 1804 thread_process(&m->event);
1805
bca37d17
QY
1806 /*
1807 * If there are no tasks on the ready queue, we will poll()
1808 * until a timer expires or we receive I/O, whichever comes
1809 * first. The strategy for doing this is:
d62a17ae 1810 *
1811 * - If there are events pending, set the poll() timeout to zero
1812 * - If there are no events pending, but there are timers
d279ef57
DS
1813 * pending, set the timeout to the smallest remaining time on
1814 * any timer.
d62a17ae 1815 * - If there are neither timers nor events pending, but there
d279ef57 1816 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1817 * - If nothing is pending, it's time for the application to die
1818 *
1819 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1820 * once per loop to avoid starvation by events
1821 */
c284542b 1822 if (!thread_list_count(&m->ready))
27d29ced 1823 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1824
c284542b
DL
1825 if (thread_list_count(&m->ready) ||
1826 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1827 tw = &zerotime;
1828
1829 if (!tw && m->handler.pfdcount == 0) { /* die */
1830 pthread_mutex_unlock(&m->mtx);
1831 fetch = NULL;
1832 break;
1833 }
1834
bca37d17
QY
1835 /*
1836 * Copy pollfd array + # active pollfds in it. Not necessary to
1837 * copy the array size as this is fixed.
1838 */
d62a17ae 1839 m->handler.copycount = m->handler.pfdcount;
1840 memcpy(m->handler.copy, m->handler.pfds,
1841 m->handler.copycount * sizeof(struct pollfd));
1842
e3c9529e
QY
1843 pthread_mutex_unlock(&m->mtx);
1844 {
d81ca9a3
MS
1845 eintr_p = false;
1846 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1847 }
1848 pthread_mutex_lock(&m->mtx);
d764d2cc 1849
e3c9529e
QY
1850 /* Handle any errors received in poll() */
1851 if (num < 0) {
d81ca9a3 1852 if (eintr_p) {
d62a17ae 1853 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1854 /* loop around to signal handler */
1855 continue;
d62a17ae 1856 }
1857
e3c9529e 1858 /* else die */
450971aa 1859 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1860 safe_strerror(errno));
e3c9529e
QY
1861 pthread_mutex_unlock(&m->mtx);
1862 fetch = NULL;
1863 break;
bca37d17 1864 }
d62a17ae 1865
1866 /* Post timers to ready queue. */
1867 monotime(&now);
e7d9e44b 1868 thread_process_timers(m, &now);
d62a17ae 1869
1870 /* Post I/O to ready queue. */
1871 if (num > 0)
1872 thread_process_io(m, num);
1873
d62a17ae 1874 pthread_mutex_unlock(&m->mtx);
1875
1876 } while (!thread && m->spin);
1877
1878 return fetch;
718e3744 1879}
1880
d62a17ae 1881static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1882{
d62a17ae 1883 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1884 + (a.tv_usec - b.tv_usec));
62f44022
QY
1885}
1886
d62a17ae 1887unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1888 unsigned long *cputime)
718e3744 1889{
6418e2d3 1890#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1891
1892#ifdef __FreeBSD__
1893 /*
1894 * FreeBSD appears to have an issue when calling clock_gettime
1895 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1896 * occassionally the now time will be before the start time.
1897 * This is not good and FRR is ending up with CPU HOG's
1898 * when the subtraction wraps to very large numbers
1899 *
1900 * What we are going to do here is cheat a little bit
1901 * and notice that this is a problem and just correct
1902 * it so that it is impossible to happen
1903 */
1904 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1905 start->cpu.tv_nsec > now->cpu.tv_nsec)
1906 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1907 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1908 now->cpu.tv_sec = start->cpu.tv_sec;
1909 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1910 }
1911#endif
6418e2d3
DL
1912 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1913 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1914#else
d62a17ae 1915 /* This is 'user + sys' time. */
1916 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1917 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1918#endif
d62a17ae 1919 return timeval_elapsed(now->real, start->real);
8b70d0b0 1920}
1921
50596be0
DS
1922/* We should aim to yield after yield milliseconds, which defaults
1923 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1924 Note: we are using real (wall clock) time for this calculation.
1925 It could be argued that CPU time may make more sense in certain
1926 contexts. The things to consider are whether the thread may have
1927 blocked (in which case wall time increases, but CPU time does not),
1928 or whether the system is heavily loaded with other processes competing
d62a17ae 1929 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1930 Plus it has the added benefit that gettimeofday should be faster
1931 than calling getrusage. */
d62a17ae 1932int thread_should_yield(struct thread *thread)
718e3744 1933{
d62a17ae 1934 int result;
cb1991af 1935 frr_with_mutex (&thread->mtx) {
d62a17ae 1936 result = monotime_since(&thread->real, NULL)
1937 > (int64_t)thread->yield;
1938 }
d62a17ae 1939 return result;
50596be0
DS
1940}
1941
d62a17ae 1942void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1943{
cb1991af 1944 frr_with_mutex (&thread->mtx) {
d62a17ae 1945 thread->yield = yield_time;
1946 }
718e3744 1947}
1948
d62a17ae 1949void thread_getrusage(RUSAGE_T *r)
db9c0df9 1950{
6418e2d3
DL
1951 monotime(&r->real);
1952 if (!cputime_enabled) {
1953 memset(&r->cpu, 0, sizeof(r->cpu));
1954 return;
1955 }
1956
1957#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1958 /* not currently implemented in Linux's vDSO, but maybe at some point
1959 * in the future?
1960 */
1961 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1962#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1963#if defined RUSAGE_THREAD
1964#define FRR_RUSAGE RUSAGE_THREAD
1965#else
1966#define FRR_RUSAGE RUSAGE_SELF
1967#endif
6418e2d3
DL
1968 getrusage(FRR_RUSAGE, &(r->cpu));
1969#endif
db9c0df9
PJ
1970}
1971
fbcac826
QY
1972/*
1973 * Call a thread.
1974 *
1975 * This function will atomically update the thread's usage history. At present
1976 * this is the only spot where usage history is written. Nevertheless the code
1977 * has been written such that the introduction of writers in the future should
1978 * not need to update it provided the writers atomically perform only the
1979 * operations done here, i.e. updating the total and maximum times. In
1980 * particular, the maximum real and cpu times must be monotonically increasing
1981 * or this code is not correct.
1982 */
d62a17ae 1983void thread_call(struct thread *thread)
718e3744 1984{
d62a17ae 1985 RUSAGE_T before, after;
cc8b13a0 1986
45f01188
DL
1987 /* if the thread being called is the CLI, it may change cputime_enabled
1988 * ("service cputime-stats" command), which can result in nonsensical
1989 * and very confusing warnings
1990 */
1991 bool cputime_enabled_here = cputime_enabled;
1992
5e822957
DS
1993 if (thread->master->ready_run_loop)
1994 before = thread->master->last_getrusage;
1995 else
1996 GETRUSAGE(&before);
1997
d62a17ae 1998 thread->real = before.real;
718e3744 1999
6c3aa850
DL
2000 frrtrace(9, frr_libfrr, thread_call, thread->master,
2001 thread->xref->funcname, thread->xref->xref.file,
2002 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 2003 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 2004
d62a17ae 2005 pthread_setspecific(thread_current, thread);
2006 (*thread->func)(thread);
2007 pthread_setspecific(thread_current, NULL);
718e3744 2008
d62a17ae 2009 GETRUSAGE(&after);
5e822957 2010 thread->master->last_getrusage = after;
718e3744 2011
45f01188
DL
2012 unsigned long walltime, cputime;
2013 unsigned long exp;
fbcac826 2014
45f01188
DL
2015 walltime = thread_consumed_time(&after, &before, &cputime);
2016
2017 /* update walltime */
2018 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2019 memory_order_seq_cst);
2020 exp = atomic_load_explicit(&thread->hist->real.max,
2021 memory_order_seq_cst);
45f01188 2022 while (exp < walltime
fbcac826 2023 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2024 &thread->hist->real.max, &exp, walltime,
2025 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2026 ;
2027
45f01188
DL
2028 if (cputime_enabled_here && cputime_enabled) {
2029 /* update cputime */
2030 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2031 memory_order_seq_cst);
2032 exp = atomic_load_explicit(&thread->hist->cpu.max,
2033 memory_order_seq_cst);
2034 while (exp < cputime
2035 && !atomic_compare_exchange_weak_explicit(
2036 &thread->hist->cpu.max, &exp, cputime,
2037 memory_order_seq_cst, memory_order_seq_cst))
2038 ;
2039 }
fbcac826
QY
2040
2041 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2042 memory_order_seq_cst);
2043 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2044 memory_order_seq_cst);
718e3744 2045
45f01188
DL
2046 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2047 && cputime > cputime_threshold) {
d62a17ae 2048 /*
45f01188
DL
2049 * We have a CPU Hog on our hands. The time FRR has spent
2050 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2051 * Whinge about it now, so we're aware this is yet another task
2052 * to fix.
2053 */
9b8e01ca
DS
2054 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2055 1, memory_order_seq_cst);
9ef9495e 2056 flog_warn(
039d547f
DS
2057 EC_LIB_SLOW_THREAD_CPU,
2058 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2059 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2060 walltime / 1000, cputime / 1000);
2061
2062 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2063 /*
45f01188
DL
2064 * The runtime for a task is greater than 5 seconds, but the
2065 * cpu time is under 5 seconds. Let's whine about this because
2066 * this could imply some sort of scheduling issue.
039d547f 2067 */
9b8e01ca
DS
2068 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2069 1, memory_order_seq_cst);
039d547f
DS
2070 flog_warn(
2071 EC_LIB_SLOW_THREAD_WALL,
2072 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2073 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2074 walltime / 1000, cputime / 1000);
d62a17ae 2075 }
718e3744 2076}
2077
2078/* Execute thread */
60a3efec 2079void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2080 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2081 void *arg, int val)
718e3744 2082{
c4345fbf 2083 struct thread *thread;
718e3744 2084
c4345fbf 2085 /* Get or allocate new thread to execute. */
cb1991af 2086 frr_with_mutex (&m->mtx) {
60a3efec 2087 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2088
c4345fbf 2089 /* Set its event value. */
cb1991af 2090 frr_with_mutex (&thread->mtx) {
c4345fbf
RZ
2091 thread->add_type = THREAD_EXECUTE;
2092 thread->u.val = val;
2093 thread->ref = &thread;
2094 }
c4345fbf 2095 }
f7c62e11 2096
c4345fbf
RZ
2097 /* Execute thread doing all accounting. */
2098 thread_call(thread);
9c7753e4 2099
c4345fbf
RZ
2100 /* Give back or free thread. */
2101 thread_add_unuse(m, thread);
718e3744 2102}
1543c387
MS
2103
2104/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2105void debug_signals(const sigset_t *sigs)
2106{
2107 int i, found;
2108 sigset_t tmpsigs;
2109 char buf[300];
2110
2111 /*
2112 * We're only looking at the non-realtime signals here, so we need
2113 * some limit value. Platform differences mean at some point we just
2114 * need to pick a reasonable value.
2115 */
2116#if defined SIGRTMIN
2117# define LAST_SIGNAL SIGRTMIN
2118#else
2119# define LAST_SIGNAL 32
2120#endif
2121
2122
2123 if (sigs == NULL) {
2124 sigemptyset(&tmpsigs);
2125 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2126 sigs = &tmpsigs;
2127 }
2128
2129 found = 0;
2130 buf[0] = '\0';
2131
2132 for (i = 0; i < LAST_SIGNAL; i++) {
2133 char tmp[20];
2134
2135 if (sigismember(sigs, i) > 0) {
2136 if (found > 0)
2137 strlcat(buf, ",", sizeof(buf));
2138 snprintf(tmp, sizeof(tmp), "%d", i);
2139 strlcat(buf, tmp, sizeof(buf));
2140 found++;
2141 }
2142 }
2143
2144 if (found == 0)
2145 snprintf(buf, sizeof(buf), "<none>");
2146
2147 zlog_debug("%s: %s", __func__, buf);
2148}
a505383d 2149
f59e6882
DL
2150static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2151 const struct thread *thread)
2152{
2153 static const char * const types[] = {
2154 [THREAD_READ] = "read",
2155 [THREAD_WRITE] = "write",
2156 [THREAD_TIMER] = "timer",
2157 [THREAD_EVENT] = "event",
2158 [THREAD_READY] = "ready",
2159 [THREAD_UNUSED] = "unused",
2160 [THREAD_EXECUTE] = "exec",
2161 };
2162 ssize_t rv = 0;
2163 char info[16] = "";
2164
2165 if (!thread)
2166 return bputs(buf, "{(thread *)NULL}");
2167
2168 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2169
2170 if (thread->type < array_size(types) && types[thread->type])
2171 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2172 else
2173 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2174
2175 switch (thread->type) {
2176 case THREAD_READ:
2177 case THREAD_WRITE:
2178 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2179 break;
2180
2181 case THREAD_TIMER:
2182 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2183 break;
2184 }
2185
2186 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2187 thread->xref->funcname, thread->xref->dest,
2188 thread->xref->xref.file, thread->xref->xref.line);
2189 return rv;
2190}
2191
54929fd3 2192printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2193static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2194 const void *ptr)
2195{
2196 const struct thread *thread = ptr;
2197 struct timespec remain = {};
2198
2199 if (ea->fmt[0] == 'D') {
2200 ea->fmt++;
2201 return printfrr_thread_dbg(buf, ea, thread);
2202 }
2203
2204 if (!thread) {
2205 /* need to jump over time formatting flag characters in the
2206 * input format string, i.e. adjust ea->fmt!
2207 */
2208 printfrr_time(buf, ea, &remain,
2209 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2210 return bputch(buf, '-');
2211 }
2212
2213 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2214 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2215}