]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
*: auto-convert to SPDX License IDs
[mirror_frr.git] / lib / thread.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
718e3744 4 */
5
6/* #define DEBUG */
7
8#include <zebra.h>
308d14ae 9#include <sys/resource.h>
718e3744 10
11#include "thread.h"
12#include "memory.h"
3e41733f 13#include "frrcu.h"
718e3744 14#include "log.h"
e04ab74d 15#include "hash.h"
16#include "command.h"
05c447dd 17#include "sigevent.h"
3bf2673b 18#include "network.h"
bd74dc61 19#include "jhash.h"
fbcac826 20#include "frratomic.h"
00dffa8c 21#include "frr_pthread.h"
9ef9495e 22#include "lib_errors.h"
912d45a1 23#include "libfrr_trace.h"
1a9f340b 24#include "libfrr.h"
d6be5fb9 25
bf8d3d6a
DL
26DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
27DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
28DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
29DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 30
960b9a53 31DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 32
aea25d1e
MS
33struct cancel_req {
34 int flags;
35 struct thread *thread;
36 void *eventobj;
37 struct thread **threadref;
38};
39
40/* Flags for task cancellation */
41#define THREAD_CANCEL_FLAG_READY 0x01
42
27d29ced
DL
43static int thread_timer_cmp(const struct thread *a, const struct thread *b)
44{
45 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 return -1;
47 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 return 1;
49 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 return -1;
51 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 return 1;
53 return 0;
54}
55
960b9a53 56DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 57
3b96b781
HT
58#if defined(__APPLE__)
59#include <mach/mach.h>
60#include <mach/mach_time.h>
61#endif
62
d62a17ae 63#define AWAKEN(m) \
64 do { \
2b64873d 65 const unsigned char wakebyte = 0x01; \
d62a17ae 66 write(m->io_pipe[1], &wakebyte, 1); \
67 } while (0);
3bf2673b 68
62f44022 69/* control variable for initializer */
c17faa4b 70static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 71pthread_key_t thread_current;
6b0655a2 72
c17faa4b 73static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
74static struct list *masters;
75
6655966d 76static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 77
45f01188
DL
78#ifndef EXCLUDE_CPU_TIME
79#define EXCLUDE_CPU_TIME 0
80#endif
81#ifndef CONSUMED_TIME_CHECK
82#define CONSUMED_TIME_CHECK 0
83#endif
84
85bool cputime_enabled = !EXCLUDE_CPU_TIME;
86unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
62f44022 89/* CLI start ---------------------------------------------------------------- */
45f01188 90#include "lib/thread_clippy.c"
45f01188 91
d8b87afe 92static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 93{
883cc51d 94 int size = sizeof(a->func);
bd74dc61
DS
95
96 return jhash(&a->func, size, 0);
e04ab74d 97}
98
74df8d6d 99static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 100 const struct cpu_thread_history *b)
e04ab74d 101{
d62a17ae 102 return a->func == b->func;
e04ab74d 103}
104
d62a17ae 105static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 106{
d62a17ae 107 struct cpu_thread_history *new;
108 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
109 new->func = a->func;
110 new->funcname = a->funcname;
111 return new;
e04ab74d 112}
113
d62a17ae 114static void cpu_record_hash_free(void *a)
228da428 115{
d62a17ae 116 struct cpu_thread_history *hist = a;
117
118 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
119}
120
d62a17ae 121static void vty_out_cpu_thread_history(struct vty *vty,
122 struct cpu_thread_history *a)
e04ab74d 123{
1dd08c22
DS
124 vty_out(vty,
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 126 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
127 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
128 (a->real.total / a->total_calls), a->real.max,
1dd08c22 129 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 130 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 131 a->types & (1 << THREAD_READ) ? 'R' : ' ',
132 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
133 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
134 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
135 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 136}
137
e3b78da8 138static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 139{
d62a17ae 140 struct cpu_thread_history *totals = args[0];
fbcac826 141 struct cpu_thread_history copy;
d62a17ae 142 struct vty *vty = args[1];
fbcac826 143 uint8_t *filter = args[2];
d62a17ae 144
145 struct cpu_thread_history *a = bucket->data;
146
fbcac826
QY
147 copy.total_active =
148 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
149 copy.total_calls =
150 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
151 copy.total_cpu_warn =
152 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
153 copy.total_wall_warn =
154 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
155 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
156 memory_order_seq_cst);
fbcac826
QY
157 copy.cpu.total =
158 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
159 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
160 copy.real.total =
161 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
162 copy.real.max =
163 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
164 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
165 copy.funcname = a->funcname;
166
167 if (!(copy.types & *filter))
d62a17ae 168 return;
fbcac826
QY
169
170 vty_out_cpu_thread_history(vty, &copy);
171 totals->total_active += copy.total_active;
172 totals->total_calls += copy.total_calls;
9b8e01ca
DS
173 totals->total_cpu_warn += copy.total_cpu_warn;
174 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 175 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
176 totals->real.total += copy.real.total;
177 if (totals->real.max < copy.real.max)
178 totals->real.max = copy.real.max;
179 totals->cpu.total += copy.cpu.total;
180 if (totals->cpu.max < copy.cpu.max)
181 totals->cpu.max = copy.cpu.max;
e04ab74d 182}
183
fbcac826 184static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 185{
d62a17ae 186 struct cpu_thread_history tmp;
187 void *args[3] = {&tmp, vty, &filter};
188 struct thread_master *m;
189 struct listnode *ln;
190
45f01188
DL
191 if (!cputime_enabled)
192 vty_out(vty,
193 "\n"
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
198
0d6f7fd6 199 memset(&tmp, 0, sizeof(tmp));
d62a17ae 200 tmp.funcname = "TOTAL";
201 tmp.types = filter;
202
cb1991af 203 frr_with_mutex (&masters_mtx) {
d62a17ae 204 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
205 const char *name = m->name ? m->name : "main";
206
207 char underline[strlen(name) + 1];
208 memset(underline, '-', sizeof(underline));
4f113d60 209 underline[sizeof(underline) - 1] = '\0';
d62a17ae 210
211 vty_out(vty, "\n");
212 vty_out(vty, "Showing statistics for pthread %s\n",
213 name);
214 vty_out(vty, "-------------------------------%s\n",
215 underline);
84d951d0 216 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 217 "CPU (user+system):", "Real (wall-clock):");
218 vty_out(vty,
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
221 vty_out(vty,
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 223
224 if (m->cpu_record->count)
225 hash_iterate(
226 m->cpu_record,
e3b78da8 227 (void (*)(struct hash_bucket *,
d62a17ae 228 void *))cpu_record_hash_print,
229 args);
230 else
231 vty_out(vty, "No data to display yet.\n");
232
233 vty_out(vty, "\n");
234 }
235 }
d62a17ae 236
237 vty_out(vty, "\n");
238 vty_out(vty, "Total thread statistics\n");
239 vty_out(vty, "-------------------------\n");
84d951d0 240 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 243 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 244 vty_out(vty, " Type Thread\n");
245
246 if (tmp.total_calls > 0)
247 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 248}
249
e3b78da8 250static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 251{
fbcac826 252 uint8_t *filter = args[0];
d62a17ae 253 struct hash *cpu_record = args[1];
254
255 struct cpu_thread_history *a = bucket->data;
62f44022 256
d62a17ae 257 if (!(a->types & *filter))
258 return;
f48f65d2 259
d62a17ae 260 hash_release(cpu_record, bucket->data);
e276eb82
PJ
261}
262
fbcac826 263static void cpu_record_clear(uint8_t filter)
e276eb82 264{
fbcac826 265 uint8_t *tmp = &filter;
d62a17ae 266 struct thread_master *m;
267 struct listnode *ln;
268
cb1991af 269 frr_with_mutex (&masters_mtx) {
d62a17ae 270 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 271 frr_with_mutex (&m->mtx) {
d62a17ae 272 void *args[2] = {tmp, m->cpu_record};
273 hash_iterate(
274 m->cpu_record,
e3b78da8 275 (void (*)(struct hash_bucket *,
d62a17ae 276 void *))cpu_record_hash_clear,
277 args);
278 }
d62a17ae 279 }
280 }
62f44022
QY
281}
282
fbcac826 283static uint8_t parse_filter(const char *filterstr)
62f44022 284{
d62a17ae 285 int i = 0;
286 int filter = 0;
287
288 while (filterstr[i] != '\0') {
289 switch (filterstr[i]) {
290 case 'r':
291 case 'R':
292 filter |= (1 << THREAD_READ);
293 break;
294 case 'w':
295 case 'W':
296 filter |= (1 << THREAD_WRITE);
297 break;
298 case 't':
299 case 'T':
300 filter |= (1 << THREAD_TIMER);
301 break;
302 case 'e':
303 case 'E':
304 filter |= (1 << THREAD_EVENT);
305 break;
306 case 'x':
307 case 'X':
308 filter |= (1 << THREAD_EXECUTE);
309 break;
310 default:
311 break;
312 }
313 ++i;
314 }
315 return filter;
62f44022
QY
316}
317
ee4dcee8
DL
318DEFUN_NOSH (show_thread_cpu,
319 show_thread_cpu_cmd,
320 "show thread cpu [FILTER]",
321 SHOW_STR
322 "Thread information\n"
323 "Thread CPU usage\n"
324 "Display filter (rwtex)\n")
62f44022 325{
fbcac826 326 uint8_t filter = (uint8_t)-1U;
d62a17ae 327 int idx = 0;
328
329 if (argv_find(argv, argc, "FILTER", &idx)) {
330 filter = parse_filter(argv[idx]->arg);
331 if (!filter) {
332 vty_out(vty,
3efd0893 333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 334 argv[idx]->arg);
335 return CMD_WARNING;
336 }
337 }
338
339 cpu_record_print(vty, filter);
340 return CMD_SUCCESS;
e276eb82 341}
45f01188
DL
342
343DEFPY (service_cputime_stats,
344 service_cputime_stats_cmd,
345 "[no] service cputime-stats",
346 NO_STR
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
349{
350 cputime_enabled = !no;
351 return CMD_SUCCESS;
352}
353
354DEFPY (service_cputime_warning,
355 service_cputime_warning_cmd,
356 "[no] service cputime-warning (1-4294967295)",
357 NO_STR
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
361{
362 if (no)
363 cputime_threshold = 0;
364 else
365 cputime_threshold = cputime_warning * 1000;
366 return CMD_SUCCESS;
367}
368
369ALIAS (service_cputime_warning,
370 no_service_cputime_warning_cmd,
371 "no service cputime-warning",
372 NO_STR
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
375
376DEFPY (service_walltime_warning,
377 service_walltime_warning_cmd,
378 "[no] service walltime-warning (1-4294967295)",
379 NO_STR
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
383{
384 if (no)
385 walltime_threshold = 0;
386 else
387 walltime_threshold = walltime_warning * 1000;
388 return CMD_SUCCESS;
389}
390
391ALIAS (service_walltime_warning,
392 no_service_walltime_warning_cmd,
393 "no service walltime-warning",
394 NO_STR
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 397
8872626b
DS
398static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
399{
400 const char *name = m->name ? m->name : "main";
401 char underline[strlen(name) + 1];
a0b36ae6 402 struct thread *thread;
8872626b
DS
403 uint32_t i;
404
405 memset(underline, '-', sizeof(underline));
406 underline[sizeof(underline) - 1] = '\0';
407
408 vty_out(vty, "\nShowing poll FD's for %s\n", name);
409 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
410 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
411 m->fd_limit);
a0b36ae6
DS
412 for (i = 0; i < m->handler.pfdcount; i++) {
413 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
414 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 415 m->handler.pfds[i].revents);
a0b36ae6
DS
416
417 if (m->handler.pfds[i].events & POLLIN) {
418 thread = m->read[m->handler.pfds[i].fd];
419
420 if (!thread)
421 vty_out(vty, "ERROR ");
422 else
60a3efec 423 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
424 } else
425 vty_out(vty, " ");
426
427 if (m->handler.pfds[i].events & POLLOUT) {
428 thread = m->write[m->handler.pfds[i].fd];
429
430 if (!thread)
431 vty_out(vty, "ERROR\n");
432 else
60a3efec 433 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
434 } else
435 vty_out(vty, "\n");
436 }
8872626b
DS
437}
438
ee4dcee8
DL
439DEFUN_NOSH (show_thread_poll,
440 show_thread_poll_cmd,
441 "show thread poll",
442 SHOW_STR
443 "Thread information\n"
444 "Show poll FD's and information\n")
8872626b
DS
445{
446 struct listnode *node;
447 struct thread_master *m;
448
cb1991af 449 frr_with_mutex (&masters_mtx) {
8872626b
DS
450 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
451 show_thread_poll_helper(vty, m);
452 }
453 }
8872626b
DS
454
455 return CMD_SUCCESS;
456}
457
458
49d41a26
DS
459DEFUN (clear_thread_cpu,
460 clear_thread_cpu_cmd,
461 "clear thread cpu [FILTER]",
62f44022 462 "Clear stored data in all pthreads\n"
49d41a26
DS
463 "Thread information\n"
464 "Thread CPU usage\n"
465 "Display filter (rwtexb)\n")
e276eb82 466{
fbcac826 467 uint8_t filter = (uint8_t)-1U;
d62a17ae 468 int idx = 0;
469
470 if (argv_find(argv, argc, "FILTER", &idx)) {
471 filter = parse_filter(argv[idx]->arg);
472 if (!filter) {
473 vty_out(vty,
3efd0893 474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 475 argv[idx]->arg);
476 return CMD_WARNING;
477 }
478 }
479
480 cpu_record_clear(filter);
481 return CMD_SUCCESS;
e276eb82 482}
6b0655a2 483
22f31b8c
DS
484static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
485{
486 const char *name = m->name ? m->name : "main";
487 char underline[strlen(name) + 1];
488 struct thread *thread;
489
490 memset(underline, '-', sizeof(underline));
491 underline[sizeof(underline) - 1] = '\0';
492
493 vty_out(vty, "\nShowing timers for %s\n", name);
494 vty_out(vty, "-------------------%s\n", underline);
495
496 frr_each (thread_timer_list, &m->timer, thread) {
497 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
498 }
499}
500
501DEFPY_NOSH (show_thread_timers,
502 show_thread_timers_cmd,
503 "show thread timers",
504 SHOW_STR
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
507{
508 struct listnode *node;
509 struct thread_master *m;
510
511 frr_with_mutex (&masters_mtx) {
512 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
513 show_thread_timers_helper(vty, m);
514 }
515
516 return CMD_SUCCESS;
517}
518
d62a17ae 519void thread_cmd_init(void)
0b84f294 520{
d62a17ae 521 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 522 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 523 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
524
525 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
526 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
527 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
528 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
529 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
530
531 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 532}
62f44022
QY
533/* CLI end ------------------------------------------------------------------ */
534
0b84f294 535
d62a17ae 536static void cancelreq_del(void *cr)
63ccb9cb 537{
d62a17ae 538 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
539}
540
e0bebc7c 541/* initializer, only ever called once */
4d762f26 542static void initializer(void)
e0bebc7c 543{
d62a17ae 544 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
545}
546
d62a17ae 547struct thread_master *thread_master_create(const char *name)
718e3744 548{
d62a17ae 549 struct thread_master *rv;
550 struct rlimit limit;
551
552 pthread_once(&init_once, &initializer);
553
554 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 555
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv->mtx, NULL);
558 pthread_cond_init(&rv->cancel_cond, NULL);
559
560 /* Set name */
7ffcd8bd
QY
561 name = name ? name : "default";
562 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 563
564 /* Initialize I/O task data structures */
1a9f340b
MS
565
566 /* Use configured limit if present, ulimit otherwise. */
567 rv->fd_limit = frr_get_fd_limit();
568 if (rv->fd_limit == 0) {
569 getrlimit(RLIMIT_NOFILE, &limit);
570 rv->fd_limit = (int)limit.rlim_cur;
571 }
572
a6f235f3
DS
573 rv->read = XCALLOC(MTYPE_THREAD_POLL,
574 sizeof(struct thread *) * rv->fd_limit);
575
576 rv->write = XCALLOC(MTYPE_THREAD_POLL,
577 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 578
7ffcd8bd
QY
579 char tmhashname[strlen(name) + 32];
580 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
581 name);
bd74dc61 582 rv->cpu_record = hash_create_size(
d8b87afe 583 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 584 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 585 tmhashname);
d62a17ae 586
c284542b
DL
587 thread_list_init(&rv->event);
588 thread_list_init(&rv->ready);
589 thread_list_init(&rv->unuse);
27d29ced 590 thread_timer_list_init(&rv->timer);
d62a17ae 591
592 /* Initialize thread_fetch() settings */
593 rv->spin = true;
594 rv->handle_signals = true;
595
596 /* Set pthread owner, should be updated by actual owner */
597 rv->owner = pthread_self();
598 rv->cancel_req = list_new();
599 rv->cancel_req->del = cancelreq_del;
600 rv->canceled = true;
601
602 /* Initialize pipe poker */
603 pipe(rv->io_pipe);
604 set_nonblocking(rv->io_pipe[0]);
605 set_nonblocking(rv->io_pipe[1]);
606
607 /* Initialize data structures for poll() */
608 rv->handler.pfdsize = rv->fd_limit;
609 rv->handler.pfdcount = 0;
610 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
611 sizeof(struct pollfd) * rv->handler.pfdsize);
612 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
613 sizeof(struct pollfd) * rv->handler.pfdsize);
614
eff09c66 615 /* add to list of threadmasters */
cb1991af 616 frr_with_mutex (&masters_mtx) {
eff09c66
QY
617 if (!masters)
618 masters = list_new();
619
d62a17ae 620 listnode_add(masters, rv);
621 }
d62a17ae 622
623 return rv;
718e3744 624}
625
d8a8a8de
QY
626void thread_master_set_name(struct thread_master *master, const char *name)
627{
cb1991af 628 frr_with_mutex (&master->mtx) {
0a22ddfb 629 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
630 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
631 }
d8a8a8de
QY
632}
633
6ed04aa2
DS
634#define THREAD_UNUSED_DEPTH 10
635
718e3744 636/* Move thread to unuse list. */
d62a17ae 637static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 638{
6655966d
RZ
639 pthread_mutex_t mtxc = thread->mtx;
640
d62a17ae 641 assert(m != NULL && thread != NULL);
d62a17ae 642
d62a17ae 643 thread->hist->total_active--;
6ed04aa2
DS
644 memset(thread, 0, sizeof(struct thread));
645 thread->type = THREAD_UNUSED;
646
6655966d
RZ
647 /* Restore the thread mutex context. */
648 thread->mtx = mtxc;
649
c284542b
DL
650 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
651 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
652 return;
653 }
654
655 thread_free(m, thread);
718e3744 656}
657
658/* Free all unused thread. */
c284542b
DL
659static void thread_list_free(struct thread_master *m,
660 struct thread_list_head *list)
718e3744 661{
d62a17ae 662 struct thread *t;
d62a17ae 663
c284542b 664 while ((t = thread_list_pop(list)))
6655966d 665 thread_free(m, t);
718e3744 666}
667
d62a17ae 668static void thread_array_free(struct thread_master *m,
669 struct thread **thread_array)
308d14ae 670{
d62a17ae 671 struct thread *t;
672 int index;
673
674 for (index = 0; index < m->fd_limit; ++index) {
675 t = thread_array[index];
676 if (t) {
677 thread_array[index] = NULL;
6655966d 678 thread_free(m, t);
d62a17ae 679 }
680 }
a6f235f3 681 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
682}
683
495f0b13
DS
684/*
685 * thread_master_free_unused
686 *
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
691 */
d62a17ae 692void thread_master_free_unused(struct thread_master *m)
495f0b13 693{
cb1991af 694 frr_with_mutex (&m->mtx) {
d62a17ae 695 struct thread *t;
c284542b 696 while ((t = thread_list_pop(&m->unuse)))
6655966d 697 thread_free(m, t);
d62a17ae 698 }
495f0b13
DS
699}
700
718e3744 701/* Stop thread scheduler. */
d62a17ae 702void thread_master_free(struct thread_master *m)
718e3744 703{
27d29ced
DL
704 struct thread *t;
705
cb1991af 706 frr_with_mutex (&masters_mtx) {
d62a17ae 707 listnode_delete(masters, m);
eff09c66 708 if (masters->count == 0) {
6a154c88 709 list_delete(&masters);
eff09c66 710 }
d62a17ae 711 }
d62a17ae 712
713 thread_array_free(m, m->read);
714 thread_array_free(m, m->write);
27d29ced
DL
715 while ((t = thread_timer_list_pop(&m->timer)))
716 thread_free(m, t);
d62a17ae 717 thread_list_free(m, &m->event);
718 thread_list_free(m, &m->ready);
719 thread_list_free(m, &m->unuse);
720 pthread_mutex_destroy(&m->mtx);
33844bbe 721 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 722 close(m->io_pipe[0]);
723 close(m->io_pipe[1]);
6a154c88 724 list_delete(&m->cancel_req);
1a0a92ea 725 m->cancel_req = NULL;
d62a17ae 726
727 hash_clean(m->cpu_record, cpu_record_hash_free);
728 hash_free(m->cpu_record);
729 m->cpu_record = NULL;
730
0a22ddfb 731 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 732 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
733 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
734 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 735}
736
94202742 737/* Return remain time in milliseconds. */
78ca0342 738unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 739{
d62a17ae 740 int64_t remain;
1189d95f 741
13bcc010
DA
742 if (!thread_is_scheduled(thread))
743 return 0;
744
cb1991af 745 frr_with_mutex (&thread->mtx) {
78ca0342 746 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 747 }
1189d95f 748
d62a17ae 749 return remain < 0 ? 0 : remain;
718e3744 750}
751
78ca0342
CF
752/* Return remain time in seconds. */
753unsigned long thread_timer_remain_second(struct thread *thread)
754{
755 return thread_timer_remain_msec(thread) / 1000LL;
756}
757
d62a17ae 758struct timeval thread_timer_remain(struct thread *thread)
6ac44687 759{
d62a17ae 760 struct timeval remain;
cb1991af 761 frr_with_mutex (&thread->mtx) {
d62a17ae 762 monotime_until(&thread->u.sands, &remain);
763 }
d62a17ae 764 return remain;
6ac44687
CF
765}
766
0447957e
AK
767static int time_hhmmss(char *buf, int buf_size, long sec)
768{
769 long hh;
770 long mm;
771 int wr;
772
642ac49d 773 assert(buf_size >= 8);
0447957e
AK
774
775 hh = sec / 3600;
776 sec %= 3600;
777 mm = sec / 60;
778 sec %= 60;
779
780 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
781
782 return wr != 8;
783}
784
785char *thread_timer_to_hhmmss(char *buf, int buf_size,
786 struct thread *t_timer)
787{
788 if (t_timer) {
789 time_hhmmss(buf, buf_size,
790 thread_timer_remain_second(t_timer));
791 } else {
792 snprintf(buf, buf_size, "--:--:--");
793 }
794 return buf;
795}
796
718e3744 797/* Get new thread. */
d7c0a89a 798static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 799 void (*func)(struct thread *), void *arg,
60a3efec 800 const struct xref_threadsched *xref)
718e3744 801{
c284542b 802 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 803 struct cpu_thread_history tmp;
804
805 if (!thread) {
806 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
807 /* mutex only needs to be initialized at struct creation. */
808 pthread_mutex_init(&thread->mtx, NULL);
809 m->alloc++;
810 }
811
812 thread->type = type;
813 thread->add_type = type;
814 thread->master = m;
815 thread->arg = arg;
d62a17ae 816 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
817 thread->ref = NULL;
e8b3a2f7 818 thread->ignore_timer_late = false;
d62a17ae 819
820 /*
821 * So if the passed in funcname is not what we have
822 * stored that means the thread->hist needs to be
823 * updated. We keep the last one around in unused
824 * under the assumption that we are probably
825 * going to immediately allocate the same
826 * type of thread.
827 * This hopefully saves us some serious
828 * hash_get lookups.
829 */
60a3efec
DL
830 if ((thread->xref && thread->xref->funcname != xref->funcname)
831 || thread->func != func) {
d62a17ae 832 tmp.func = func;
60a3efec 833 tmp.funcname = xref->funcname;
d62a17ae 834 thread->hist =
835 hash_get(m->cpu_record, &tmp,
836 (void *(*)(void *))cpu_record_hash_alloc);
837 }
838 thread->hist->total_active++;
839 thread->func = func;
60a3efec 840 thread->xref = xref;
d62a17ae 841
842 return thread;
718e3744 843}
844
6655966d
RZ
845static void thread_free(struct thread_master *master, struct thread *thread)
846{
847 /* Update statistics. */
848 assert(master->alloc > 0);
849 master->alloc--;
850
851 /* Free allocated resources. */
852 pthread_mutex_destroy(&thread->mtx);
853 XFREE(MTYPE_THREAD, thread);
854}
855
d81ca9a3
MS
856static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
857 bool *eintr_p)
209a72a6 858{
d81ca9a3
MS
859 sigset_t origsigs;
860 unsigned char trash[64];
861 nfds_t count = m->handler.copycount;
862
d279ef57
DS
863 /*
864 * If timer_wait is null here, that means poll() should block
865 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 866 * ->selectpoll_timeout.
d279ef57 867 *
d62a17ae 868 * If the value is positive, it specifies the maximum number of
d279ef57
DS
869 * milliseconds to wait. If the timeout is -1, it specifies that
870 * we should never wait and always return immediately even if no
871 * event is detected. If the value is zero, the behavior is default.
872 */
d62a17ae 873 int timeout = -1;
874
875 /* number of file descriptors with events */
876 int num;
877
878 if (timer_wait != NULL
879 && m->selectpoll_timeout == 0) // use the default value
880 timeout = (timer_wait->tv_sec * 1000)
881 + (timer_wait->tv_usec / 1000);
882 else if (m->selectpoll_timeout > 0) // use the user's timeout
883 timeout = m->selectpoll_timeout;
884 else if (m->selectpoll_timeout
885 < 0) // effect a poll (return immediately)
886 timeout = 0;
887
0bdeb5e5 888 zlog_tls_buffer_flush();
3e41733f
DL
889 rcu_read_unlock();
890 rcu_assert_read_unlocked();
891
d62a17ae 892 /* add poll pipe poker */
d81ca9a3
MS
893 assert(count + 1 < m->handler.pfdsize);
894 m->handler.copy[count].fd = m->io_pipe[0];
895 m->handler.copy[count].events = POLLIN;
896 m->handler.copy[count].revents = 0x00;
897
898 /* We need to deal with a signal-handling race here: we
899 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
900 * that may arrive just before we enter poll(). We will block the
901 * key signals, then check whether any have arrived - if so, we return
902 * before calling poll(). If not, we'll re-enable the signals
903 * in the ppoll() call.
904 */
905
906 sigemptyset(&origsigs);
907 if (m->handle_signals) {
908 /* Main pthread that handles the app signals */
909 if (frr_sigevent_check(&origsigs)) {
910 /* Signal to process - restore signal mask and return */
911 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
912 num = -1;
913 *eintr_p = true;
914 goto done;
915 }
916 } else {
917 /* Don't make any changes for the non-main pthreads */
918 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
919 }
d62a17ae 920
d81ca9a3
MS
921#if defined(HAVE_PPOLL)
922 struct timespec ts, *tsp;
923
924 if (timeout >= 0) {
925 ts.tv_sec = timeout / 1000;
926 ts.tv_nsec = (timeout % 1000) * 1000000;
927 tsp = &ts;
928 } else
929 tsp = NULL;
930
931 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
932 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
933#else
934 /* Not ideal - there is a race after we restore the signal mask */
935 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
936 num = poll(m->handler.copy, count + 1, timeout);
937#endif
d62a17ae 938
d81ca9a3
MS
939done:
940
941 if (num < 0 && errno == EINTR)
942 *eintr_p = true;
943
944 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 945 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
946 ;
947
3e41733f
DL
948 rcu_read_lock();
949
d62a17ae 950 return num;
209a72a6
DS
951}
952
718e3744 953/* Add new read thread. */
ee1455dd
IR
954void _thread_add_read_write(const struct xref_threadsched *xref,
955 struct thread_master *m,
cc9f21da 956 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 957 struct thread **t_ptr)
718e3744 958{
60a3efec 959 int dir = xref->thread_type;
d62a17ae 960 struct thread *thread = NULL;
1ef14bee 961 struct thread **thread_array;
d62a17ae 962
abf96a87 963 if (dir == THREAD_READ)
6c3aa850
DL
964 frrtrace(9, frr_libfrr, schedule_read, m,
965 xref->funcname, xref->xref.file, xref->xref.line,
966 t_ptr, fd, 0, arg, 0);
abf96a87 967 else
6c3aa850
DL
968 frrtrace(9, frr_libfrr, schedule_write, m,
969 xref->funcname, xref->xref.file, xref->xref.line,
970 t_ptr, fd, 0, arg, 0);
abf96a87 971
188acbb9
DS
972 assert(fd >= 0);
973 if (fd >= m->fd_limit)
974 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
975
cb1991af 976 frr_with_mutex (&m->mtx) {
00dffa8c
DL
977 if (t_ptr && *t_ptr)
978 // thread is already scheduled; don't reschedule
979 break;
d62a17ae 980
981 /* default to a new pollfd */
982 nfds_t queuepos = m->handler.pfdcount;
983
1ef14bee
DS
984 if (dir == THREAD_READ)
985 thread_array = m->read;
986 else
987 thread_array = m->write;
988
d62a17ae 989 /* if we already have a pollfd for our file descriptor, find and
990 * use it */
991 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
992 if (m->handler.pfds[i].fd == fd) {
993 queuepos = i;
1ef14bee
DS
994
995#ifdef DEV_BUILD
996 /*
997 * What happens if we have a thread already
998 * created for this event?
999 */
1000 if (thread_array[fd])
1001 assert(!"Thread already scheduled for file descriptor");
1002#endif
d62a17ae 1003 break;
1004 }
1005
1006 /* make sure we have room for this fd + pipe poker fd */
1007 assert(queuepos + 1 < m->handler.pfdsize);
1008
60a3efec 1009 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1010
1011 m->handler.pfds[queuepos].fd = fd;
1012 m->handler.pfds[queuepos].events |=
1013 (dir == THREAD_READ ? POLLIN : POLLOUT);
1014
1015 if (queuepos == m->handler.pfdcount)
1016 m->handler.pfdcount++;
1017
1018 if (thread) {
cb1991af 1019 frr_with_mutex (&thread->mtx) {
d62a17ae 1020 thread->u.fd = fd;
1ef14bee 1021 thread_array[thread->u.fd] = thread;
d62a17ae 1022 }
d62a17ae 1023
1024 if (t_ptr) {
1025 *t_ptr = thread;
1026 thread->ref = t_ptr;
1027 }
1028 }
1029
1030 AWAKEN(m);
1031 }
718e3744 1032}
1033
ee1455dd
IR
1034static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1035 struct thread_master *m,
cc9f21da 1036 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1037 struct timeval *time_relative,
1038 struct thread **t_ptr)
718e3744 1039{
d62a17ae 1040 struct thread *thread;
96fe578a 1041 struct timeval t;
d62a17ae 1042
1043 assert(m != NULL);
1044
d62a17ae 1045 assert(time_relative);
1046
6c3aa850
DL
1047 frrtrace(9, frr_libfrr, schedule_timer, m,
1048 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1049 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1050
96fe578a
MS
1051 /* Compute expiration/deadline time. */
1052 monotime(&t);
1053 timeradd(&t, time_relative, &t);
1054
cb1991af 1055 frr_with_mutex (&m->mtx) {
00dffa8c 1056 if (t_ptr && *t_ptr)
d279ef57 1057 /* thread is already scheduled; don't reschedule */
ee1455dd 1058 return;
d62a17ae 1059
4322dea7 1060 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1061
cb1991af 1062 frr_with_mutex (&thread->mtx) {
96fe578a 1063 thread->u.sands = t;
27d29ced 1064 thread_timer_list_add(&m->timer, thread);
d62a17ae 1065 if (t_ptr) {
1066 *t_ptr = thread;
1067 thread->ref = t_ptr;
1068 }
1069 }
d62a17ae 1070
96fe578a
MS
1071 /* The timer list is sorted - if this new timer
1072 * might change the time we'll wait for, give the pthread
1073 * a chance to re-compute.
1074 */
1075 if (thread_timer_list_first(&m->timer) == thread)
1076 AWAKEN(m);
d62a17ae 1077 }
e2eff5c3
DS
1078#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079 if (time_relative->tv_sec > ONEYEAR2SEC)
1080 flog_err(
1081 EC_LIB_TIMER_TOO_LONG,
1082 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1083 thread);
9e867fe6 1084}
1085
98c91ac6 1086
1087/* Add timer event thread. */
ee1455dd 1088void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1089 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1090 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1091{
d62a17ae 1092 struct timeval trel;
9e867fe6 1093
d62a17ae 1094 assert(m != NULL);
9e867fe6 1095
d62a17ae 1096 trel.tv_sec = timer;
1097 trel.tv_usec = 0;
9e867fe6 1098
ee1455dd 1099 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1100}
9e867fe6 1101
98c91ac6 1102/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1103void _thread_add_timer_msec(const struct xref_threadsched *xref,
1104 struct thread_master *m,
cc9f21da
DS
1105 void (*func)(struct thread *), void *arg,
1106 long timer, struct thread **t_ptr)
98c91ac6 1107{
d62a17ae 1108 struct timeval trel;
9e867fe6 1109
d62a17ae 1110 assert(m != NULL);
718e3744 1111
d62a17ae 1112 trel.tv_sec = timer / 1000;
1113 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1114
ee1455dd 1115 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1116}
1117
4322dea7 1118/* Add timer event thread with "timeval" resolution */
ee1455dd 1119void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1120 struct thread_master *m,
1121 void (*func)(struct thread *), void *arg,
1122 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1123{
ee1455dd 1124 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1125}
1126
718e3744 1127/* Add simple event thread. */
ee1455dd 1128void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1129 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1130 void *arg, int val, struct thread **t_ptr)
718e3744 1131{
00dffa8c 1132 struct thread *thread = NULL;
d62a17ae 1133
6c3aa850
DL
1134 frrtrace(9, frr_libfrr, schedule_event, m,
1135 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1136 t_ptr, 0, val, arg, 0);
abf96a87 1137
d62a17ae 1138 assert(m != NULL);
1139
cb1991af 1140 frr_with_mutex (&m->mtx) {
00dffa8c 1141 if (t_ptr && *t_ptr)
d279ef57 1142 /* thread is already scheduled; don't reschedule */
00dffa8c 1143 break;
d62a17ae 1144
60a3efec 1145 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
cb1991af 1146 frr_with_mutex (&thread->mtx) {
d62a17ae 1147 thread->u.val = val;
c284542b 1148 thread_list_add_tail(&m->event, thread);
d62a17ae 1149 }
d62a17ae 1150
1151 if (t_ptr) {
1152 *t_ptr = thread;
1153 thread->ref = t_ptr;
1154 }
1155
1156 AWAKEN(m);
1157 }
718e3744 1158}
1159
63ccb9cb
QY
1160/* Thread cancellation ------------------------------------------------------ */
1161
8797240e
QY
1162/**
1163 * NOT's out the .events field of pollfd corresponding to the given file
1164 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1165 *
1166 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1167 * implementation for details.
1168 *
1169 * @param master
1170 * @param fd
1171 * @param state the event to cancel. One or more (OR'd together) of the
1172 * following:
1173 * - POLLIN
1174 * - POLLOUT
1175 */
a9318a32
MS
1176static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1177 int idx_hint)
0a95a0d0 1178{
42d74538
QY
1179 bool found = false;
1180
d62a17ae 1181 /* find the index of corresponding pollfd */
1182 nfds_t i;
1183
a9318a32
MS
1184 /* Cancel POLLHUP too just in case some bozo set it */
1185 state |= POLLHUP;
1186
1187 /* Some callers know the index of the pfd already */
1188 if (idx_hint >= 0) {
1189 i = idx_hint;
1190 found = true;
1191 } else {
1192 /* Have to look for the fd in the pfd array */
1193 for (i = 0; i < master->handler.pfdcount; i++)
1194 if (master->handler.pfds[i].fd == fd) {
1195 found = true;
1196 break;
1197 }
1198 }
42d74538
QY
1199
1200 if (!found) {
1201 zlog_debug(
1202 "[!] Received cancellation request for nonexistent rw job");
1203 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1204 master->name ? master->name : "", fd);
42d74538
QY
1205 return;
1206 }
d62a17ae 1207
1208 /* NOT out event. */
1209 master->handler.pfds[i].events &= ~(state);
1210
1211 /* If all events are canceled, delete / resize the pollfd array. */
1212 if (master->handler.pfds[i].events == 0) {
1213 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1214 (master->handler.pfdcount - i - 1)
1215 * sizeof(struct pollfd));
1216 master->handler.pfdcount--;
e985cda0
S
1217 master->handler.pfds[master->handler.pfdcount].fd = 0;
1218 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1219 }
1220
1221 /* If we have the same pollfd in the copy, perform the same operations,
1222 * otherwise return. */
1223 if (i >= master->handler.copycount)
1224 return;
1225
1226 master->handler.copy[i].events &= ~(state);
1227
1228 if (master->handler.copy[i].events == 0) {
1229 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1230 (master->handler.copycount - i - 1)
1231 * sizeof(struct pollfd));
1232 master->handler.copycount--;
e985cda0
S
1233 master->handler.copy[master->handler.copycount].fd = 0;
1234 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1235 }
0a95a0d0
DS
1236}
1237
a9318a32
MS
1238/*
1239 * Process task cancellation given a task argument: iterate through the
1240 * various lists of tasks, looking for any that match the argument.
1241 */
1242static void cancel_arg_helper(struct thread_master *master,
1243 const struct cancel_req *cr)
1244{
1245 struct thread *t;
1246 nfds_t i;
1247 int fd;
1248 struct pollfd *pfd;
1249
1250 /* We're only processing arg-based cancellations here. */
1251 if (cr->eventobj == NULL)
1252 return;
1253
1254 /* First process the ready lists. */
1255 frr_each_safe(thread_list, &master->event, t) {
1256 if (t->arg != cr->eventobj)
1257 continue;
1258 thread_list_del(&master->event, t);
1259 if (t->ref)
1260 *t->ref = NULL;
1261 thread_add_unuse(master, t);
1262 }
1263
1264 frr_each_safe(thread_list, &master->ready, t) {
1265 if (t->arg != cr->eventobj)
1266 continue;
1267 thread_list_del(&master->ready, t);
1268 if (t->ref)
1269 *t->ref = NULL;
1270 thread_add_unuse(master, t);
1271 }
1272
1273 /* If requested, stop here and ignore io and timers */
1274 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1275 return;
1276
1277 /* Check the io tasks */
1278 for (i = 0; i < master->handler.pfdcount;) {
1279 pfd = master->handler.pfds + i;
1280
1281 if (pfd->events & POLLIN)
1282 t = master->read[pfd->fd];
1283 else
1284 t = master->write[pfd->fd];
1285
1286 if (t && t->arg == cr->eventobj) {
1287 fd = pfd->fd;
1288
1289 /* Found a match to cancel: clean up fd arrays */
1290 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1291
1292 /* Clean up thread arrays */
1293 master->read[fd] = NULL;
1294 master->write[fd] = NULL;
1295
1296 /* Clear caller's ref */
1297 if (t->ref)
1298 *t->ref = NULL;
1299
1300 thread_add_unuse(master, t);
1301
1302 /* Don't increment 'i' since the cancellation will have
1303 * removed the entry from the pfd array
1304 */
1305 } else
1306 i++;
1307 }
1308
1309 /* Check the timer tasks */
1310 t = thread_timer_list_first(&master->timer);
1311 while (t) {
1312 struct thread *t_next;
1313
1314 t_next = thread_timer_list_next(&master->timer, t);
1315
1316 if (t->arg == cr->eventobj) {
1317 thread_timer_list_del(&master->timer, t);
1318 if (t->ref)
1319 *t->ref = NULL;
1320 thread_add_unuse(master, t);
1321 }
1322
1323 t = t_next;
1324 }
1325}
1326
1189d95f 1327/**
63ccb9cb 1328 * Process cancellation requests.
1189d95f 1329 *
63ccb9cb
QY
1330 * This may only be run from the pthread which owns the thread_master.
1331 *
1332 * @param master the thread master to process
1333 * @REQUIRE master->mtx
1189d95f 1334 */
d62a17ae 1335static void do_thread_cancel(struct thread_master *master)
718e3744 1336{
c284542b 1337 struct thread_list_head *list = NULL;
d62a17ae 1338 struct thread **thread_array = NULL;
1339 struct thread *thread;
d62a17ae 1340 struct cancel_req *cr;
1341 struct listnode *ln;
7e93a54c 1342
d62a17ae 1343 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1344 /*
a9318a32
MS
1345 * If this is an event object cancellation, search
1346 * through task lists deleting any tasks which have the
1347 * specified argument - use this handy helper function.
d279ef57 1348 */
d62a17ae 1349 if (cr->eventobj) {
a9318a32 1350 cancel_arg_helper(master, cr);
d62a17ae 1351 continue;
1352 }
1353
d279ef57
DS
1354 /*
1355 * The pointer varies depending on whether the cancellation
1356 * request was made asynchronously or not. If it was, we
1357 * need to check whether the thread even exists anymore
1358 * before cancelling it.
1359 */
d62a17ae 1360 thread = (cr->thread) ? cr->thread : *cr->threadref;
1361
1362 if (!thread)
1363 continue;
1364
7e93a54c
MS
1365 list = NULL;
1366 thread_array = NULL;
1367
d62a17ae 1368 /* Determine the appropriate queue to cancel the thread from */
1369 switch (thread->type) {
1370 case THREAD_READ:
a9318a32 1371 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1372 thread_array = master->read;
1373 break;
1374 case THREAD_WRITE:
a9318a32 1375 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1376 thread_array = master->write;
1377 break;
1378 case THREAD_TIMER:
27d29ced 1379 thread_timer_list_del(&master->timer, thread);
d62a17ae 1380 break;
1381 case THREAD_EVENT:
1382 list = &master->event;
1383 break;
1384 case THREAD_READY:
1385 list = &master->ready;
1386 break;
1387 default:
1388 continue;
1389 break;
1390 }
1391
27d29ced 1392 if (list) {
c284542b 1393 thread_list_del(list, thread);
d62a17ae 1394 } else if (thread_array) {
1395 thread_array[thread->u.fd] = NULL;
d62a17ae 1396 }
1397
1398 if (thread->ref)
1399 *thread->ref = NULL;
1400
1401 thread_add_unuse(thread->master, thread);
1402 }
1403
1404 /* Delete and free all cancellation requests */
41b21bfa
MS
1405 if (master->cancel_req)
1406 list_delete_all_node(master->cancel_req);
d62a17ae 1407
1408 /* Wake up any threads which may be blocked in thread_cancel_async() */
1409 master->canceled = true;
1410 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1411}
1412
a9318a32
MS
1413/*
1414 * Helper function used for multiple flavors of arg-based cancellation.
1415 */
1416static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1417{
1418 struct cancel_req *cr;
1419
1420 assert(m->owner == pthread_self());
1421
1422 /* Only worth anything if caller supplies an arg. */
1423 if (arg == NULL)
1424 return;
1425
1426 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1427
1428 cr->flags = flags;
1429
cb1991af 1430 frr_with_mutex (&m->mtx) {
a9318a32
MS
1431 cr->eventobj = arg;
1432 listnode_add(m->cancel_req, cr);
1433 do_thread_cancel(m);
1434 }
1435}
1436
63ccb9cb
QY
1437/**
1438 * Cancel any events which have the specified argument.
1439 *
1440 * MT-Unsafe
1441 *
1442 * @param m the thread_master to cancel from
1443 * @param arg the argument passed when creating the event
1444 */
d62a17ae 1445void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1446{
a9318a32
MS
1447 cancel_event_helper(master, arg, 0);
1448}
d62a17ae 1449
a9318a32
MS
1450/*
1451 * Cancel ready tasks with an arg matching 'arg'
1452 *
1453 * MT-Unsafe
1454 *
1455 * @param m the thread_master to cancel from
1456 * @param arg the argument passed when creating the event
1457 */
1458void thread_cancel_event_ready(struct thread_master *m, void *arg)
1459{
1460
1461 /* Only cancel ready/event tasks */
1462 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1463}
1189d95f 1464
63ccb9cb
QY
1465/**
1466 * Cancel a specific task.
1467 *
1468 * MT-Unsafe
1469 *
1470 * @param thread task to cancel
1471 */
b3d6bc6e 1472void thread_cancel(struct thread **thread)
63ccb9cb 1473{
b3d6bc6e
MS
1474 struct thread_master *master;
1475
1476 if (thread == NULL || *thread == NULL)
1477 return;
1478
1479 master = (*thread)->master;
d62a17ae 1480
6c3aa850
DL
1481 frrtrace(9, frr_libfrr, thread_cancel, master,
1482 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1483 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1484 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1485
6ed04aa2
DS
1486 assert(master->owner == pthread_self());
1487
cb1991af 1488 frr_with_mutex (&master->mtx) {
d62a17ae 1489 struct cancel_req *cr =
1490 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1491 cr->thread = *thread;
6ed04aa2
DS
1492 listnode_add(master->cancel_req, cr);
1493 do_thread_cancel(master);
d62a17ae 1494 }
b3d6bc6e
MS
1495
1496 *thread = NULL;
63ccb9cb 1497}
1189d95f 1498
63ccb9cb
QY
1499/**
1500 * Asynchronous cancellation.
1501 *
8797240e
QY
1502 * Called with either a struct thread ** or void * to an event argument,
1503 * this function posts the correct cancellation request and blocks until it is
1504 * serviced.
63ccb9cb
QY
1505 *
1506 * If the thread is currently running, execution blocks until it completes.
1507 *
8797240e
QY
1508 * The last two parameters are mutually exclusive, i.e. if you pass one the
1509 * other must be NULL.
1510 *
1511 * When the cancellation procedure executes on the target thread_master, the
1512 * thread * provided is checked for nullity. If it is null, the thread is
1513 * assumed to no longer exist and the cancellation request is a no-op. Thus
1514 * users of this API must pass a back-reference when scheduling the original
1515 * task.
1516 *
63ccb9cb
QY
1517 * MT-Safe
1518 *
8797240e
QY
1519 * @param master the thread master with the relevant event / task
1520 * @param thread pointer to thread to cancel
1521 * @param eventobj the event
63ccb9cb 1522 */
d62a17ae 1523void thread_cancel_async(struct thread_master *master, struct thread **thread,
1524 void *eventobj)
63ccb9cb 1525{
d62a17ae 1526 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1527
1528 if (thread && *thread)
c7bb4f00 1529 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1530 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1531 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1532 (*thread)->u.val, (*thread)->arg,
1533 (*thread)->u.sands.tv_sec);
abf96a87 1534 else
c7bb4f00
QY
1535 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1536 0, NULL, 0, 0, eventobj, 0);
abf96a87 1537
d62a17ae 1538 assert(master->owner != pthread_self());
1539
cb1991af 1540 frr_with_mutex (&master->mtx) {
d62a17ae 1541 master->canceled = false;
1542
1543 if (thread) {
1544 struct cancel_req *cr =
1545 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1546 cr->threadref = thread;
1547 listnode_add(master->cancel_req, cr);
1548 } else if (eventobj) {
1549 struct cancel_req *cr =
1550 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1551 cr->eventobj = eventobj;
1552 listnode_add(master->cancel_req, cr);
1553 }
1554 AWAKEN(master);
1555
1556 while (!master->canceled)
1557 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1558 }
50478845
MS
1559
1560 if (thread)
1561 *thread = NULL;
718e3744 1562}
63ccb9cb 1563/* ------------------------------------------------------------------------- */
718e3744 1564
27d29ced 1565static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1566 struct timeval *timer_val)
718e3744 1567{
27d29ced
DL
1568 if (!thread_timer_list_count(timers))
1569 return NULL;
1570
1571 struct thread *next_timer = thread_timer_list_first(timers);
1572 monotime_until(&next_timer->u.sands, timer_val);
1573 return timer_val;
718e3744 1574}
718e3744 1575
d62a17ae 1576static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1577 struct thread *fetch)
718e3744 1578{
d62a17ae 1579 *fetch = *thread;
1580 thread_add_unuse(m, thread);
1581 return fetch;
718e3744 1582}
1583
d62a17ae 1584static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1585 struct thread *thread, short state,
1586 short actual_state, int pos)
5d4ccd4e 1587{
d62a17ae 1588 struct thread **thread_array;
1589
45f3d590
DS
1590 /*
1591 * poll() clears the .events field, but the pollfd array we
1592 * pass to poll() is a copy of the one used to schedule threads.
1593 * We need to synchronize state between the two here by applying
1594 * the same changes poll() made on the copy of the "real" pollfd
1595 * array.
1596 *
1597 * This cleans up a possible infinite loop where we refuse
1598 * to respond to a poll event but poll is insistent that
1599 * we should.
1600 */
1601 m->handler.pfds[pos].events &= ~(state);
1602
1603 if (!thread) {
1604 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1605 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1606 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1607 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1608 return 0;
45f3d590 1609 }
d62a17ae 1610
1611 if (thread->type == THREAD_READ)
1612 thread_array = m->read;
1613 else
1614 thread_array = m->write;
1615
1616 thread_array[thread->u.fd] = NULL;
c284542b 1617 thread_list_add_tail(&m->ready, thread);
d62a17ae 1618 thread->type = THREAD_READY;
45f3d590 1619
d62a17ae 1620 return 1;
5d4ccd4e
DS
1621}
1622
8797240e
QY
1623/**
1624 * Process I/O events.
1625 *
1626 * Walks through file descriptor array looking for those pollfds whose .revents
1627 * field has something interesting. Deletes any invalid file descriptors.
1628 *
1629 * @param m the thread master
1630 * @param num the number of active file descriptors (return value of poll())
1631 */
d62a17ae 1632static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1633{
d62a17ae 1634 unsigned int ready = 0;
1635 struct pollfd *pfds = m->handler.copy;
1636
1637 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1638 /* no event for current fd? immediately continue */
1639 if (pfds[i].revents == 0)
1640 continue;
1641
1642 ready++;
1643
d279ef57
DS
1644 /*
1645 * Unless someone has called thread_cancel from another
1646 * pthread, the only thing that could have changed in
1647 * m->handler.pfds while we were asleep is the .events
1648 * field in a given pollfd. Barring thread_cancel() that
1649 * value should be a superset of the values we have in our
1650 * copy, so there's no need to update it. Similarily,
1651 * barring deletion, the fd should still be a valid index
1652 * into the master's pfds.
d142453d
DS
1653 *
1654 * We are including POLLERR here to do a READ event
1655 * this is because the read should fail and the
1656 * read function should handle it appropriately
d279ef57 1657 */
d142453d 1658 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1659 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1660 pfds[i].revents, i);
1661 }
d62a17ae 1662 if (pfds[i].revents & POLLOUT)
1663 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1664 POLLOUT, pfds[i].revents, i);
d62a17ae 1665
1666 /* if one of our file descriptors is garbage, remove the same
1667 * from
1668 * both pfds + update sizes and index */
1669 if (pfds[i].revents & POLLNVAL) {
1670 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1671 (m->handler.pfdcount - i - 1)
1672 * sizeof(struct pollfd));
1673 m->handler.pfdcount--;
e985cda0
S
1674 m->handler.pfds[m->handler.pfdcount].fd = 0;
1675 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1676
1677 memmove(pfds + i, pfds + i + 1,
1678 (m->handler.copycount - i - 1)
1679 * sizeof(struct pollfd));
1680 m->handler.copycount--;
e985cda0
S
1681 m->handler.copy[m->handler.copycount].fd = 0;
1682 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1683
1684 i--;
1685 }
1686 }
718e3744 1687}
1688
8b70d0b0 1689/* Add all timers that have popped to the ready list. */
e7d9e44b 1690static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1691 struct timeval *timenow)
a48b4e6d 1692{
ab01a001
DS
1693 struct timeval prev = *timenow;
1694 bool displayed = false;
d62a17ae 1695 struct thread *thread;
1696 unsigned int ready = 0;
1697
e7d9e44b 1698 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1699 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1700 break;
ab01a001
DS
1701 prev = thread->u.sands;
1702 prev.tv_sec += 4;
1703 /*
1704 * If the timer would have popped 4 seconds in the
1705 * past then we are in a situation where we are
1706 * really getting behind on handling of events.
1707 * Let's log it and do the right thing with it.
1708 */
1dd08c22
DS
1709 if (timercmp(timenow, &prev, >)) {
1710 atomic_fetch_add_explicit(
1711 &thread->hist->total_starv_warn, 1,
1712 memory_order_seq_cst);
1713 if (!displayed && !thread->ignore_timer_late) {
1714 flog_warn(
1715 EC_LIB_STARVE_THREAD,
1716 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1717 thread);
1718 displayed = true;
1719 }
ab01a001
DS
1720 }
1721
e7d9e44b 1722 thread_timer_list_pop(&m->timer);
d62a17ae 1723 thread->type = THREAD_READY;
e7d9e44b 1724 thread_list_add_tail(&m->ready, thread);
d62a17ae 1725 ready++;
1726 }
e7d9e44b 1727
d62a17ae 1728 return ready;
a48b4e6d 1729}
1730
2613abe6 1731/* process a list en masse, e.g. for event thread lists */
c284542b 1732static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1733{
d62a17ae 1734 struct thread *thread;
d62a17ae 1735 unsigned int ready = 0;
1736
c284542b 1737 while ((thread = thread_list_pop(list))) {
d62a17ae 1738 thread->type = THREAD_READY;
c284542b 1739 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1740 ready++;
1741 }
1742 return ready;
2613abe6
PJ
1743}
1744
1745
718e3744 1746/* Fetch next ready thread. */
d62a17ae 1747struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1748{
d62a17ae 1749 struct thread *thread = NULL;
1750 struct timeval now;
1751 struct timeval zerotime = {0, 0};
1752 struct timeval tv;
1753 struct timeval *tw = NULL;
d81ca9a3 1754 bool eintr_p = false;
d62a17ae 1755 int num = 0;
1756
1757 do {
1758 /* Handle signals if any */
1759 if (m->handle_signals)
7cc91e67 1760 frr_sigevent_process();
d62a17ae 1761
1762 pthread_mutex_lock(&m->mtx);
1763
1764 /* Process any pending cancellation requests */
1765 do_thread_cancel(m);
1766
e3c9529e
QY
1767 /*
1768 * Attempt to flush ready queue before going into poll().
1769 * This is performance-critical. Think twice before modifying.
1770 */
c284542b 1771 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1772 fetch = thread_run(m, thread, fetch);
1773 if (fetch->ref)
1774 *fetch->ref = NULL;
1775 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1776 if (!m->ready_run_loop)
1777 GETRUSAGE(&m->last_getrusage);
1778 m->ready_run_loop = true;
e3c9529e
QY
1779 break;
1780 }
1781
5e822957 1782 m->ready_run_loop = false;
e3c9529e
QY
1783 /* otherwise, tick through scheduling sequence */
1784
bca37d17
QY
1785 /*
1786 * Post events to ready queue. This must come before the
1787 * following block since events should occur immediately
1788 */
d62a17ae 1789 thread_process(&m->event);
1790
bca37d17
QY
1791 /*
1792 * If there are no tasks on the ready queue, we will poll()
1793 * until a timer expires or we receive I/O, whichever comes
1794 * first. The strategy for doing this is:
d62a17ae 1795 *
1796 * - If there are events pending, set the poll() timeout to zero
1797 * - If there are no events pending, but there are timers
d279ef57
DS
1798 * pending, set the timeout to the smallest remaining time on
1799 * any timer.
d62a17ae 1800 * - If there are neither timers nor events pending, but there
d279ef57 1801 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1802 * - If nothing is pending, it's time for the application to die
1803 *
1804 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1805 * once per loop to avoid starvation by events
1806 */
c284542b 1807 if (!thread_list_count(&m->ready))
27d29ced 1808 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1809
c284542b
DL
1810 if (thread_list_count(&m->ready) ||
1811 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1812 tw = &zerotime;
1813
1814 if (!tw && m->handler.pfdcount == 0) { /* die */
1815 pthread_mutex_unlock(&m->mtx);
1816 fetch = NULL;
1817 break;
1818 }
1819
bca37d17
QY
1820 /*
1821 * Copy pollfd array + # active pollfds in it. Not necessary to
1822 * copy the array size as this is fixed.
1823 */
d62a17ae 1824 m->handler.copycount = m->handler.pfdcount;
1825 memcpy(m->handler.copy, m->handler.pfds,
1826 m->handler.copycount * sizeof(struct pollfd));
1827
e3c9529e
QY
1828 pthread_mutex_unlock(&m->mtx);
1829 {
d81ca9a3
MS
1830 eintr_p = false;
1831 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1832 }
1833 pthread_mutex_lock(&m->mtx);
d764d2cc 1834
e3c9529e
QY
1835 /* Handle any errors received in poll() */
1836 if (num < 0) {
d81ca9a3 1837 if (eintr_p) {
d62a17ae 1838 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1839 /* loop around to signal handler */
1840 continue;
d62a17ae 1841 }
1842
e3c9529e 1843 /* else die */
450971aa 1844 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1845 safe_strerror(errno));
e3c9529e
QY
1846 pthread_mutex_unlock(&m->mtx);
1847 fetch = NULL;
1848 break;
bca37d17 1849 }
d62a17ae 1850
1851 /* Post timers to ready queue. */
1852 monotime(&now);
e7d9e44b 1853 thread_process_timers(m, &now);
d62a17ae 1854
1855 /* Post I/O to ready queue. */
1856 if (num > 0)
1857 thread_process_io(m, num);
1858
d62a17ae 1859 pthread_mutex_unlock(&m->mtx);
1860
1861 } while (!thread && m->spin);
1862
1863 return fetch;
718e3744 1864}
1865
d62a17ae 1866static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1867{
d62a17ae 1868 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1869 + (a.tv_usec - b.tv_usec));
62f44022
QY
1870}
1871
d62a17ae 1872unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1873 unsigned long *cputime)
718e3744 1874{
6418e2d3 1875#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1876
1877#ifdef __FreeBSD__
1878 /*
1879 * FreeBSD appears to have an issue when calling clock_gettime
1880 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1881 * occassionally the now time will be before the start time.
1882 * This is not good and FRR is ending up with CPU HOG's
1883 * when the subtraction wraps to very large numbers
1884 *
1885 * What we are going to do here is cheat a little bit
1886 * and notice that this is a problem and just correct
1887 * it so that it is impossible to happen
1888 */
1889 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1890 start->cpu.tv_nsec > now->cpu.tv_nsec)
1891 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1892 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1893 now->cpu.tv_sec = start->cpu.tv_sec;
1894 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1895 }
1896#endif
6418e2d3
DL
1897 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1898 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1899#else
d62a17ae 1900 /* This is 'user + sys' time. */
1901 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1902 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1903#endif
d62a17ae 1904 return timeval_elapsed(now->real, start->real);
8b70d0b0 1905}
1906
50596be0
DS
1907/* We should aim to yield after yield milliseconds, which defaults
1908 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1909 Note: we are using real (wall clock) time for this calculation.
1910 It could be argued that CPU time may make more sense in certain
1911 contexts. The things to consider are whether the thread may have
1912 blocked (in which case wall time increases, but CPU time does not),
1913 or whether the system is heavily loaded with other processes competing
d62a17ae 1914 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1915 Plus it has the added benefit that gettimeofday should be faster
1916 than calling getrusage. */
d62a17ae 1917int thread_should_yield(struct thread *thread)
718e3744 1918{
d62a17ae 1919 int result;
cb1991af 1920 frr_with_mutex (&thread->mtx) {
d62a17ae 1921 result = monotime_since(&thread->real, NULL)
1922 > (int64_t)thread->yield;
1923 }
d62a17ae 1924 return result;
50596be0
DS
1925}
1926
d62a17ae 1927void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1928{
cb1991af 1929 frr_with_mutex (&thread->mtx) {
d62a17ae 1930 thread->yield = yield_time;
1931 }
718e3744 1932}
1933
d62a17ae 1934void thread_getrusage(RUSAGE_T *r)
db9c0df9 1935{
6418e2d3
DL
1936 monotime(&r->real);
1937 if (!cputime_enabled) {
1938 memset(&r->cpu, 0, sizeof(r->cpu));
1939 return;
1940 }
1941
1942#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1943 /* not currently implemented in Linux's vDSO, but maybe at some point
1944 * in the future?
1945 */
1946 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1947#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1948#if defined RUSAGE_THREAD
1949#define FRR_RUSAGE RUSAGE_THREAD
1950#else
1951#define FRR_RUSAGE RUSAGE_SELF
1952#endif
6418e2d3
DL
1953 getrusage(FRR_RUSAGE, &(r->cpu));
1954#endif
db9c0df9
PJ
1955}
1956
fbcac826
QY
1957/*
1958 * Call a thread.
1959 *
1960 * This function will atomically update the thread's usage history. At present
1961 * this is the only spot where usage history is written. Nevertheless the code
1962 * has been written such that the introduction of writers in the future should
1963 * not need to update it provided the writers atomically perform only the
1964 * operations done here, i.e. updating the total and maximum times. In
1965 * particular, the maximum real and cpu times must be monotonically increasing
1966 * or this code is not correct.
1967 */
d62a17ae 1968void thread_call(struct thread *thread)
718e3744 1969{
d62a17ae 1970 RUSAGE_T before, after;
cc8b13a0 1971
45f01188
DL
1972 /* if the thread being called is the CLI, it may change cputime_enabled
1973 * ("service cputime-stats" command), which can result in nonsensical
1974 * and very confusing warnings
1975 */
1976 bool cputime_enabled_here = cputime_enabled;
1977
5e822957
DS
1978 if (thread->master->ready_run_loop)
1979 before = thread->master->last_getrusage;
1980 else
1981 GETRUSAGE(&before);
1982
d62a17ae 1983 thread->real = before.real;
718e3744 1984
6c3aa850
DL
1985 frrtrace(9, frr_libfrr, thread_call, thread->master,
1986 thread->xref->funcname, thread->xref->xref.file,
1987 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1988 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 1989
d62a17ae 1990 pthread_setspecific(thread_current, thread);
1991 (*thread->func)(thread);
1992 pthread_setspecific(thread_current, NULL);
718e3744 1993
d62a17ae 1994 GETRUSAGE(&after);
5e822957 1995 thread->master->last_getrusage = after;
718e3744 1996
45f01188
DL
1997 unsigned long walltime, cputime;
1998 unsigned long exp;
fbcac826 1999
45f01188
DL
2000 walltime = thread_consumed_time(&after, &before, &cputime);
2001
2002 /* update walltime */
2003 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2004 memory_order_seq_cst);
2005 exp = atomic_load_explicit(&thread->hist->real.max,
2006 memory_order_seq_cst);
45f01188 2007 while (exp < walltime
fbcac826 2008 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2009 &thread->hist->real.max, &exp, walltime,
2010 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2011 ;
2012
45f01188
DL
2013 if (cputime_enabled_here && cputime_enabled) {
2014 /* update cputime */
2015 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2016 memory_order_seq_cst);
2017 exp = atomic_load_explicit(&thread->hist->cpu.max,
2018 memory_order_seq_cst);
2019 while (exp < cputime
2020 && !atomic_compare_exchange_weak_explicit(
2021 &thread->hist->cpu.max, &exp, cputime,
2022 memory_order_seq_cst, memory_order_seq_cst))
2023 ;
2024 }
fbcac826
QY
2025
2026 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2027 memory_order_seq_cst);
2028 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2029 memory_order_seq_cst);
718e3744 2030
45f01188
DL
2031 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2032 && cputime > cputime_threshold) {
d62a17ae 2033 /*
45f01188
DL
2034 * We have a CPU Hog on our hands. The time FRR has spent
2035 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2036 * Whinge about it now, so we're aware this is yet another task
2037 * to fix.
2038 */
9b8e01ca
DS
2039 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2040 1, memory_order_seq_cst);
9ef9495e 2041 flog_warn(
039d547f
DS
2042 EC_LIB_SLOW_THREAD_CPU,
2043 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2044 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2045 walltime / 1000, cputime / 1000);
2046
2047 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2048 /*
45f01188
DL
2049 * The runtime for a task is greater than 5 seconds, but the
2050 * cpu time is under 5 seconds. Let's whine about this because
2051 * this could imply some sort of scheduling issue.
039d547f 2052 */
9b8e01ca
DS
2053 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2054 1, memory_order_seq_cst);
039d547f
DS
2055 flog_warn(
2056 EC_LIB_SLOW_THREAD_WALL,
2057 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2058 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2059 walltime / 1000, cputime / 1000);
d62a17ae 2060 }
718e3744 2061}
2062
2063/* Execute thread */
60a3efec 2064void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2065 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2066 void *arg, int val)
718e3744 2067{
c4345fbf 2068 struct thread *thread;
718e3744 2069
c4345fbf 2070 /* Get or allocate new thread to execute. */
cb1991af 2071 frr_with_mutex (&m->mtx) {
60a3efec 2072 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2073
c4345fbf 2074 /* Set its event value. */
cb1991af 2075 frr_with_mutex (&thread->mtx) {
c4345fbf
RZ
2076 thread->add_type = THREAD_EXECUTE;
2077 thread->u.val = val;
2078 thread->ref = &thread;
2079 }
c4345fbf 2080 }
f7c62e11 2081
c4345fbf
RZ
2082 /* Execute thread doing all accounting. */
2083 thread_call(thread);
9c7753e4 2084
c4345fbf
RZ
2085 /* Give back or free thread. */
2086 thread_add_unuse(m, thread);
718e3744 2087}
1543c387
MS
2088
2089/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2090void debug_signals(const sigset_t *sigs)
2091{
2092 int i, found;
2093 sigset_t tmpsigs;
2094 char buf[300];
2095
2096 /*
2097 * We're only looking at the non-realtime signals here, so we need
2098 * some limit value. Platform differences mean at some point we just
2099 * need to pick a reasonable value.
2100 */
2101#if defined SIGRTMIN
2102# define LAST_SIGNAL SIGRTMIN
2103#else
2104# define LAST_SIGNAL 32
2105#endif
2106
2107
2108 if (sigs == NULL) {
2109 sigemptyset(&tmpsigs);
2110 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2111 sigs = &tmpsigs;
2112 }
2113
2114 found = 0;
2115 buf[0] = '\0';
2116
2117 for (i = 0; i < LAST_SIGNAL; i++) {
2118 char tmp[20];
2119
2120 if (sigismember(sigs, i) > 0) {
2121 if (found > 0)
2122 strlcat(buf, ",", sizeof(buf));
2123 snprintf(tmp, sizeof(tmp), "%d", i);
2124 strlcat(buf, tmp, sizeof(buf));
2125 found++;
2126 }
2127 }
2128
2129 if (found == 0)
2130 snprintf(buf, sizeof(buf), "<none>");
2131
2132 zlog_debug("%s: %s", __func__, buf);
2133}
a505383d 2134
f59e6882
DL
2135static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2136 const struct thread *thread)
2137{
2138 static const char * const types[] = {
2139 [THREAD_READ] = "read",
2140 [THREAD_WRITE] = "write",
2141 [THREAD_TIMER] = "timer",
2142 [THREAD_EVENT] = "event",
2143 [THREAD_READY] = "ready",
2144 [THREAD_UNUSED] = "unused",
2145 [THREAD_EXECUTE] = "exec",
2146 };
2147 ssize_t rv = 0;
2148 char info[16] = "";
2149
2150 if (!thread)
2151 return bputs(buf, "{(thread *)NULL}");
2152
2153 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2154
2155 if (thread->type < array_size(types) && types[thread->type])
2156 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2157 else
2158 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2159
2160 switch (thread->type) {
2161 case THREAD_READ:
2162 case THREAD_WRITE:
2163 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2164 break;
2165
2166 case THREAD_TIMER:
2167 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2168 break;
2169 }
2170
2171 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2172 thread->xref->funcname, thread->xref->dest,
2173 thread->xref->xref.file, thread->xref->xref.line);
2174 return rv;
2175}
2176
54929fd3 2177printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2178static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2179 const void *ptr)
2180{
2181 const struct thread *thread = ptr;
2182 struct timespec remain = {};
2183
2184 if (ea->fmt[0] == 'D') {
2185 ea->fmt++;
2186 return printfrr_thread_dbg(buf, ea, thread);
2187 }
2188
2189 if (!thread) {
2190 /* need to jump over time formatting flag characters in the
2191 * input format string, i.e. adjust ea->fmt!
2192 */
2193 printfrr_time(buf, ea, &remain,
2194 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2195 return bputch(buf, '-');
2196 }
2197
2198 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2199 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2200}