]> git.proxmox.com Git - mirror_frr.git/blame - lib/event.c
*: Convert struct thread_master to struct event_master and it's ilk
[mirror_frr.git] / lib / event.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
718e3744 4 */
5
6/* #define DEBUG */
7
8#include <zebra.h>
308d14ae 9#include <sys/resource.h>
718e3744 10
cb37cb33 11#include "event.h"
718e3744 12#include "memory.h"
3e41733f 13#include "frrcu.h"
718e3744 14#include "log.h"
e04ab74d 15#include "hash.h"
16#include "command.h"
05c447dd 17#include "sigevent.h"
3bf2673b 18#include "network.h"
bd74dc61 19#include "jhash.h"
fbcac826 20#include "frratomic.h"
00dffa8c 21#include "frr_pthread.h"
9ef9495e 22#include "lib_errors.h"
912d45a1 23#include "libfrr_trace.h"
1a9f340b 24#include "libfrr.h"
d6be5fb9 25
bf8d3d6a 26DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
2ccccdf5
DS
27DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
28DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
29DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
4a1ab8e4 30
e6685141 31DECLARE_LIST(thread_list, struct event, threaditem);
c284542b 32
aea25d1e
MS
33struct cancel_req {
34 int flags;
e6685141 35 struct event *thread;
aea25d1e 36 void *eventobj;
e6685141 37 struct event **threadref;
aea25d1e
MS
38};
39
40/* Flags for task cancellation */
332beb64 41#define EVENT_CANCEL_FLAG_READY 0x01
aea25d1e 42
e6685141 43static int thread_timer_cmp(const struct event *a, const struct event *b)
27d29ced
DL
44{
45 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 return -1;
47 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 return 1;
49 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 return -1;
51 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 return 1;
53 return 0;
54}
55
e6685141 56DECLARE_HEAP(thread_timer_list, struct event, timeritem, thread_timer_cmp);
27d29ced 57
3b96b781
HT
58#if defined(__APPLE__)
59#include <mach/mach.h>
60#include <mach/mach_time.h>
61#endif
62
d62a17ae 63#define AWAKEN(m) \
64 do { \
2b64873d 65 const unsigned char wakebyte = 0x01; \
d62a17ae 66 write(m->io_pipe[1], &wakebyte, 1); \
67 } while (0);
3bf2673b 68
62f44022 69/* control variable for initializer */
c17faa4b 70static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 71pthread_key_t thread_current;
6b0655a2 72
c17faa4b 73static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
74static struct list *masters;
75
2453d15d 76static void thread_free(struct event_master *master, struct event *thread);
6b0655a2 77
45f01188
DL
78#ifndef EXCLUDE_CPU_TIME
79#define EXCLUDE_CPU_TIME 0
80#endif
81#ifndef CONSUMED_TIME_CHECK
82#define CONSUMED_TIME_CHECK 0
83#endif
84
85bool cputime_enabled = !EXCLUDE_CPU_TIME;
86unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
62f44022 89/* CLI start ---------------------------------------------------------------- */
cb37cb33 90#include "lib/event_clippy.c"
45f01188 91
d8b87afe 92static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 93{
883cc51d 94 int size = sizeof(a->func);
bd74dc61
DS
95
96 return jhash(&a->func, size, 0);
e04ab74d 97}
98
74df8d6d 99static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 100 const struct cpu_thread_history *b)
e04ab74d 101{
d62a17ae 102 return a->func == b->func;
e04ab74d 103}
104
d62a17ae 105static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 106{
d62a17ae 107 struct cpu_thread_history *new;
2ccccdf5 108 new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_thread_history));
d62a17ae 109 new->func = a->func;
110 new->funcname = a->funcname;
111 return new;
e04ab74d 112}
113
d62a17ae 114static void cpu_record_hash_free(void *a)
228da428 115{
d62a17ae 116 struct cpu_thread_history *hist = a;
117
2ccccdf5 118 XFREE(MTYPE_EVENT_STATS, hist);
228da428
CC
119}
120
d62a17ae 121static void vty_out_cpu_thread_history(struct vty *vty,
122 struct cpu_thread_history *a)
e04ab74d 123{
1dd08c22
DS
124 vty_out(vty,
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 126 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
127 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
128 (a->real.total / a->total_calls), a->real.max,
1dd08c22 129 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 130 vty_out(vty, " %c%c%c%c%c %s\n",
2ccccdf5
DS
131 a->types & (1 << EVENT_READ) ? 'R' : ' ',
132 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
133 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
134 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
135 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 136}
137
e3b78da8 138static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 139{
d62a17ae 140 struct cpu_thread_history *totals = args[0];
fbcac826 141 struct cpu_thread_history copy;
d62a17ae 142 struct vty *vty = args[1];
fbcac826 143 uint8_t *filter = args[2];
d62a17ae 144
145 struct cpu_thread_history *a = bucket->data;
146
fbcac826
QY
147 copy.total_active =
148 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
149 copy.total_calls =
150 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
151 copy.total_cpu_warn =
152 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
153 copy.total_wall_warn =
154 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
155 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
156 memory_order_seq_cst);
fbcac826
QY
157 copy.cpu.total =
158 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
159 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
160 copy.real.total =
161 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
162 copy.real.max =
163 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
164 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
165 copy.funcname = a->funcname;
166
167 if (!(copy.types & *filter))
d62a17ae 168 return;
fbcac826
QY
169
170 vty_out_cpu_thread_history(vty, &copy);
171 totals->total_active += copy.total_active;
172 totals->total_calls += copy.total_calls;
9b8e01ca
DS
173 totals->total_cpu_warn += copy.total_cpu_warn;
174 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 175 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
176 totals->real.total += copy.real.total;
177 if (totals->real.max < copy.real.max)
178 totals->real.max = copy.real.max;
179 totals->cpu.total += copy.cpu.total;
180 if (totals->cpu.max < copy.cpu.max)
181 totals->cpu.max = copy.cpu.max;
e04ab74d 182}
183
fbcac826 184static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 185{
d62a17ae 186 struct cpu_thread_history tmp;
187 void *args[3] = {&tmp, vty, &filter};
2453d15d 188 struct event_master *m;
d62a17ae 189 struct listnode *ln;
190
45f01188
DL
191 if (!cputime_enabled)
192 vty_out(vty,
193 "\n"
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
198
0d6f7fd6 199 memset(&tmp, 0, sizeof(tmp));
d62a17ae 200 tmp.funcname = "TOTAL";
201 tmp.types = filter;
202
cb1991af 203 frr_with_mutex (&masters_mtx) {
d62a17ae 204 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
205 const char *name = m->name ? m->name : "main";
206
207 char underline[strlen(name) + 1];
208 memset(underline, '-', sizeof(underline));
4f113d60 209 underline[sizeof(underline) - 1] = '\0';
d62a17ae 210
211 vty_out(vty, "\n");
212 vty_out(vty, "Showing statistics for pthread %s\n",
213 name);
214 vty_out(vty, "-------------------------------%s\n",
215 underline);
84d951d0 216 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 217 "CPU (user+system):", "Real (wall-clock):");
218 vty_out(vty,
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
221 vty_out(vty,
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 223
224 if (m->cpu_record->count)
225 hash_iterate(
226 m->cpu_record,
e3b78da8 227 (void (*)(struct hash_bucket *,
d62a17ae 228 void *))cpu_record_hash_print,
229 args);
230 else
231 vty_out(vty, "No data to display yet.\n");
232
233 vty_out(vty, "\n");
234 }
235 }
d62a17ae 236
237 vty_out(vty, "\n");
238 vty_out(vty, "Total thread statistics\n");
239 vty_out(vty, "-------------------------\n");
84d951d0 240 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 243 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 244 vty_out(vty, " Type Thread\n");
245
246 if (tmp.total_calls > 0)
247 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 248}
249
e3b78da8 250static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 251{
fbcac826 252 uint8_t *filter = args[0];
d62a17ae 253 struct hash *cpu_record = args[1];
254
255 struct cpu_thread_history *a = bucket->data;
62f44022 256
d62a17ae 257 if (!(a->types & *filter))
258 return;
f48f65d2 259
d62a17ae 260 hash_release(cpu_record, bucket->data);
e276eb82
PJ
261}
262
fbcac826 263static void cpu_record_clear(uint8_t filter)
e276eb82 264{
fbcac826 265 uint8_t *tmp = &filter;
2453d15d 266 struct event_master *m;
d62a17ae 267 struct listnode *ln;
268
cb1991af 269 frr_with_mutex (&masters_mtx) {
d62a17ae 270 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 271 frr_with_mutex (&m->mtx) {
d62a17ae 272 void *args[2] = {tmp, m->cpu_record};
273 hash_iterate(
274 m->cpu_record,
e3b78da8 275 (void (*)(struct hash_bucket *,
d62a17ae 276 void *))cpu_record_hash_clear,
277 args);
278 }
d62a17ae 279 }
280 }
62f44022
QY
281}
282
fbcac826 283static uint8_t parse_filter(const char *filterstr)
62f44022 284{
d62a17ae 285 int i = 0;
286 int filter = 0;
287
288 while (filterstr[i] != '\0') {
289 switch (filterstr[i]) {
290 case 'r':
291 case 'R':
2ccccdf5 292 filter |= (1 << EVENT_READ);
d62a17ae 293 break;
294 case 'w':
295 case 'W':
2ccccdf5 296 filter |= (1 << EVENT_WRITE);
d62a17ae 297 break;
298 case 't':
299 case 'T':
2ccccdf5 300 filter |= (1 << EVENT_TIMER);
d62a17ae 301 break;
302 case 'e':
303 case 'E':
2ccccdf5 304 filter |= (1 << EVENT_EVENT);
d62a17ae 305 break;
306 case 'x':
307 case 'X':
2ccccdf5 308 filter |= (1 << EVENT_EXECUTE);
d62a17ae 309 break;
310 default:
311 break;
312 }
313 ++i;
314 }
315 return filter;
62f44022
QY
316}
317
ee4dcee8
DL
318DEFUN_NOSH (show_thread_cpu,
319 show_thread_cpu_cmd,
320 "show thread cpu [FILTER]",
321 SHOW_STR
322 "Thread information\n"
323 "Thread CPU usage\n"
324 "Display filter (rwtex)\n")
62f44022 325{
fbcac826 326 uint8_t filter = (uint8_t)-1U;
d62a17ae 327 int idx = 0;
328
329 if (argv_find(argv, argc, "FILTER", &idx)) {
330 filter = parse_filter(argv[idx]->arg);
331 if (!filter) {
332 vty_out(vty,
3efd0893 333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 334 argv[idx]->arg);
335 return CMD_WARNING;
336 }
337 }
338
339 cpu_record_print(vty, filter);
340 return CMD_SUCCESS;
e276eb82 341}
45f01188
DL
342
343DEFPY (service_cputime_stats,
344 service_cputime_stats_cmd,
345 "[no] service cputime-stats",
346 NO_STR
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
349{
350 cputime_enabled = !no;
351 return CMD_SUCCESS;
352}
353
354DEFPY (service_cputime_warning,
355 service_cputime_warning_cmd,
356 "[no] service cputime-warning (1-4294967295)",
357 NO_STR
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
361{
362 if (no)
363 cputime_threshold = 0;
364 else
365 cputime_threshold = cputime_warning * 1000;
366 return CMD_SUCCESS;
367}
368
369ALIAS (service_cputime_warning,
370 no_service_cputime_warning_cmd,
371 "no service cputime-warning",
372 NO_STR
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
375
376DEFPY (service_walltime_warning,
377 service_walltime_warning_cmd,
378 "[no] service walltime-warning (1-4294967295)",
379 NO_STR
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
383{
384 if (no)
385 walltime_threshold = 0;
386 else
387 walltime_threshold = walltime_warning * 1000;
388 return CMD_SUCCESS;
389}
390
391ALIAS (service_walltime_warning,
392 no_service_walltime_warning_cmd,
393 "no service walltime-warning",
394 NO_STR
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 397
2453d15d 398static void show_thread_poll_helper(struct vty *vty, struct event_master *m)
8872626b
DS
399{
400 const char *name = m->name ? m->name : "main";
401 char underline[strlen(name) + 1];
e6685141 402 struct event *thread;
8872626b
DS
403 uint32_t i;
404
405 memset(underline, '-', sizeof(underline));
406 underline[sizeof(underline) - 1] = '\0';
407
408 vty_out(vty, "\nShowing poll FD's for %s\n", name);
409 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
410 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
411 m->fd_limit);
a0b36ae6
DS
412 for (i = 0; i < m->handler.pfdcount; i++) {
413 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
414 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 415 m->handler.pfds[i].revents);
a0b36ae6
DS
416
417 if (m->handler.pfds[i].events & POLLIN) {
418 thread = m->read[m->handler.pfds[i].fd];
419
420 if (!thread)
421 vty_out(vty, "ERROR ");
422 else
60a3efec 423 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
424 } else
425 vty_out(vty, " ");
426
427 if (m->handler.pfds[i].events & POLLOUT) {
428 thread = m->write[m->handler.pfds[i].fd];
429
430 if (!thread)
431 vty_out(vty, "ERROR\n");
432 else
60a3efec 433 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
434 } else
435 vty_out(vty, "\n");
436 }
8872626b
DS
437}
438
ee4dcee8
DL
439DEFUN_NOSH (show_thread_poll,
440 show_thread_poll_cmd,
441 "show thread poll",
442 SHOW_STR
443 "Thread information\n"
444 "Show poll FD's and information\n")
8872626b
DS
445{
446 struct listnode *node;
2453d15d 447 struct event_master *m;
8872626b 448
cb1991af 449 frr_with_mutex (&masters_mtx) {
8872626b
DS
450 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
451 show_thread_poll_helper(vty, m);
452 }
453 }
8872626b
DS
454
455 return CMD_SUCCESS;
456}
457
458
49d41a26
DS
459DEFUN (clear_thread_cpu,
460 clear_thread_cpu_cmd,
461 "clear thread cpu [FILTER]",
62f44022 462 "Clear stored data in all pthreads\n"
49d41a26
DS
463 "Thread information\n"
464 "Thread CPU usage\n"
465 "Display filter (rwtexb)\n")
e276eb82 466{
fbcac826 467 uint8_t filter = (uint8_t)-1U;
d62a17ae 468 int idx = 0;
469
470 if (argv_find(argv, argc, "FILTER", &idx)) {
471 filter = parse_filter(argv[idx]->arg);
472 if (!filter) {
473 vty_out(vty,
3efd0893 474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 475 argv[idx]->arg);
476 return CMD_WARNING;
477 }
478 }
479
480 cpu_record_clear(filter);
481 return CMD_SUCCESS;
e276eb82 482}
6b0655a2 483
2453d15d 484static void show_thread_timers_helper(struct vty *vty, struct event_master *m)
22f31b8c
DS
485{
486 const char *name = m->name ? m->name : "main";
487 char underline[strlen(name) + 1];
e6685141 488 struct event *thread;
22f31b8c
DS
489
490 memset(underline, '-', sizeof(underline));
491 underline[sizeof(underline) - 1] = '\0';
492
493 vty_out(vty, "\nShowing timers for %s\n", name);
494 vty_out(vty, "-------------------%s\n", underline);
495
496 frr_each (thread_timer_list, &m->timer, thread) {
497 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
498 }
499}
500
501DEFPY_NOSH (show_thread_timers,
502 show_thread_timers_cmd,
503 "show thread timers",
504 SHOW_STR
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
507{
508 struct listnode *node;
2453d15d 509 struct event_master *m;
22f31b8c
DS
510
511 frr_with_mutex (&masters_mtx) {
512 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
513 show_thread_timers_helper(vty, m);
514 }
515
516 return CMD_SUCCESS;
517}
518
5f6eaa9b 519void event_cmd_init(void)
0b84f294 520{
d62a17ae 521 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 522 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 523 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
524
525 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
526 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
527 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
528 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
529 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
530
531 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 532}
62f44022
QY
533/* CLI end ------------------------------------------------------------------ */
534
0b84f294 535
d62a17ae 536static void cancelreq_del(void *cr)
63ccb9cb 537{
d62a17ae 538 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
539}
540
e0bebc7c 541/* initializer, only ever called once */
4d762f26 542static void initializer(void)
e0bebc7c 543{
d62a17ae 544 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
545}
546
2453d15d 547struct event_master *thread_master_create(const char *name)
718e3744 548{
2453d15d 549 struct event_master *rv;
d62a17ae 550 struct rlimit limit;
551
552 pthread_once(&init_once, &initializer);
553
2453d15d 554 rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_master));
d62a17ae 555
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv->mtx, NULL);
558 pthread_cond_init(&rv->cancel_cond, NULL);
559
560 /* Set name */
7ffcd8bd 561 name = name ? name : "default";
2ccccdf5 562 rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d62a17ae 563
564 /* Initialize I/O task data structures */
1a9f340b
MS
565
566 /* Use configured limit if present, ulimit otherwise. */
567 rv->fd_limit = frr_get_fd_limit();
568 if (rv->fd_limit == 0) {
569 getrlimit(RLIMIT_NOFILE, &limit);
570 rv->fd_limit = (int)limit.rlim_cur;
571 }
572
2ccccdf5 573 rv->read = XCALLOC(MTYPE_EVENT_POLL,
e6685141 574 sizeof(struct event *) * rv->fd_limit);
a6f235f3 575
2ccccdf5 576 rv->write = XCALLOC(MTYPE_EVENT_POLL,
e6685141 577 sizeof(struct event *) * rv->fd_limit);
d62a17ae 578
7ffcd8bd
QY
579 char tmhashname[strlen(name) + 32];
580 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
581 name);
bd74dc61 582 rv->cpu_record = hash_create_size(
d8b87afe 583 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 584 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 585 tmhashname);
d62a17ae 586
c284542b
DL
587 thread_list_init(&rv->event);
588 thread_list_init(&rv->ready);
589 thread_list_init(&rv->unuse);
27d29ced 590 thread_timer_list_init(&rv->timer);
d62a17ae 591
de2754be 592 /* Initialize event_fetch() settings */
d62a17ae 593 rv->spin = true;
594 rv->handle_signals = true;
595
596 /* Set pthread owner, should be updated by actual owner */
597 rv->owner = pthread_self();
598 rv->cancel_req = list_new();
599 rv->cancel_req->del = cancelreq_del;
600 rv->canceled = true;
601
602 /* Initialize pipe poker */
603 pipe(rv->io_pipe);
604 set_nonblocking(rv->io_pipe[0]);
605 set_nonblocking(rv->io_pipe[1]);
606
607 /* Initialize data structures for poll() */
608 rv->handler.pfdsize = rv->fd_limit;
609 rv->handler.pfdcount = 0;
2ccccdf5 610 rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 611 sizeof(struct pollfd) * rv->handler.pfdsize);
2ccccdf5 612 rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 613 sizeof(struct pollfd) * rv->handler.pfdsize);
614
eff09c66 615 /* add to list of threadmasters */
cb1991af 616 frr_with_mutex (&masters_mtx) {
eff09c66
QY
617 if (!masters)
618 masters = list_new();
619
d62a17ae 620 listnode_add(masters, rv);
621 }
d62a17ae 622
623 return rv;
718e3744 624}
625
2453d15d 626void thread_master_set_name(struct event_master *master, const char *name)
d8a8a8de 627{
cb1991af 628 frr_with_mutex (&master->mtx) {
2ccccdf5
DS
629 XFREE(MTYPE_EVENT_MASTER, master->name);
630 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d8a8a8de 631 }
d8a8a8de
QY
632}
633
2ccccdf5 634#define EVENT_UNUSED_DEPTH 10
6ed04aa2 635
718e3744 636/* Move thread to unuse list. */
2453d15d 637static void thread_add_unuse(struct event_master *m, struct event *thread)
718e3744 638{
6655966d
RZ
639 pthread_mutex_t mtxc = thread->mtx;
640
d62a17ae 641 assert(m != NULL && thread != NULL);
d62a17ae 642
d62a17ae 643 thread->hist->total_active--;
e6685141 644 memset(thread, 0, sizeof(struct event));
2ccccdf5 645 thread->type = EVENT_UNUSED;
6ed04aa2 646
6655966d
RZ
647 /* Restore the thread mutex context. */
648 thread->mtx = mtxc;
649
2ccccdf5 650 if (thread_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
c284542b 651 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
652 return;
653 }
654
655 thread_free(m, thread);
718e3744 656}
657
658/* Free all unused thread. */
2453d15d
DS
659static void thread_list_free(struct event_master *m,
660 struct thread_list_head *list)
718e3744 661{
e6685141 662 struct event *t;
d62a17ae 663
c284542b 664 while ((t = thread_list_pop(list)))
6655966d 665 thread_free(m, t);
718e3744 666}
667
2453d15d 668static void thread_array_free(struct event_master *m,
e6685141 669 struct event **thread_array)
308d14ae 670{
e6685141 671 struct event *t;
d62a17ae 672 int index;
673
674 for (index = 0; index < m->fd_limit; ++index) {
675 t = thread_array[index];
676 if (t) {
677 thread_array[index] = NULL;
6655966d 678 thread_free(m, t);
d62a17ae 679 }
680 }
2ccccdf5 681 XFREE(MTYPE_EVENT_POLL, thread_array);
308d14ae
DV
682}
683
495f0b13
DS
684/*
685 * thread_master_free_unused
686 *
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
691 */
2453d15d 692void thread_master_free_unused(struct event_master *m)
495f0b13 693{
cb1991af 694 frr_with_mutex (&m->mtx) {
e6685141 695 struct event *t;
c284542b 696 while ((t = thread_list_pop(&m->unuse)))
6655966d 697 thread_free(m, t);
d62a17ae 698 }
495f0b13
DS
699}
700
718e3744 701/* Stop thread scheduler. */
2453d15d 702void thread_master_free(struct event_master *m)
718e3744 703{
e6685141 704 struct event *t;
27d29ced 705
cb1991af 706 frr_with_mutex (&masters_mtx) {
d62a17ae 707 listnode_delete(masters, m);
eff09c66 708 if (masters->count == 0) {
6a154c88 709 list_delete(&masters);
eff09c66 710 }
d62a17ae 711 }
d62a17ae 712
713 thread_array_free(m, m->read);
714 thread_array_free(m, m->write);
27d29ced
DL
715 while ((t = thread_timer_list_pop(&m->timer)))
716 thread_free(m, t);
d62a17ae 717 thread_list_free(m, &m->event);
718 thread_list_free(m, &m->ready);
719 thread_list_free(m, &m->unuse);
720 pthread_mutex_destroy(&m->mtx);
33844bbe 721 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 722 close(m->io_pipe[0]);
723 close(m->io_pipe[1]);
6a154c88 724 list_delete(&m->cancel_req);
1a0a92ea 725 m->cancel_req = NULL;
d62a17ae 726
d8bc11a5 727 hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
d62a17ae 728
2ccccdf5
DS
729 XFREE(MTYPE_EVENT_MASTER, m->name);
730 XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
731 XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
732 XFREE(MTYPE_EVENT_MASTER, m);
718e3744 733}
734
94202742 735/* Return remain time in milliseconds. */
4f830a07 736unsigned long event_timer_remain_msec(struct event *thread)
718e3744 737{
d62a17ae 738 int64_t remain;
1189d95f 739
5f6eaa9b 740 if (!event_is_scheduled(thread))
13bcc010
DA
741 return 0;
742
cb1991af 743 frr_with_mutex (&thread->mtx) {
78ca0342 744 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 745 }
1189d95f 746
d62a17ae 747 return remain < 0 ? 0 : remain;
718e3744 748}
749
78ca0342 750/* Return remain time in seconds. */
4f830a07 751unsigned long event_timer_remain_second(struct event *thread)
78ca0342 752{
4f830a07 753 return event_timer_remain_msec(thread) / 1000LL;
78ca0342
CF
754}
755
4f830a07 756struct timeval event_timer_remain(struct event *thread)
6ac44687 757{
d62a17ae 758 struct timeval remain;
cb1991af 759 frr_with_mutex (&thread->mtx) {
d62a17ae 760 monotime_until(&thread->u.sands, &remain);
761 }
d62a17ae 762 return remain;
6ac44687
CF
763}
764
0447957e
AK
765static int time_hhmmss(char *buf, int buf_size, long sec)
766{
767 long hh;
768 long mm;
769 int wr;
770
642ac49d 771 assert(buf_size >= 8);
0447957e
AK
772
773 hh = sec / 3600;
774 sec %= 3600;
775 mm = sec / 60;
776 sec %= 60;
777
778 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
779
780 return wr != 8;
781}
782
5f6eaa9b 783char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
0447957e
AK
784{
785 if (t_timer) {
4f830a07 786 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
0447957e
AK
787 } else {
788 snprintf(buf, buf_size, "--:--:--");
789 }
790 return buf;
791}
792
718e3744 793/* Get new thread. */
2453d15d 794static struct event *thread_get(struct event_master *m, uint8_t type,
e6685141
DS
795 void (*func)(struct event *), void *arg,
796 const struct xref_threadsched *xref)
718e3744 797{
e6685141 798 struct event *thread = thread_list_pop(&m->unuse);
d62a17ae 799 struct cpu_thread_history tmp;
800
801 if (!thread) {
e6685141 802 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
d62a17ae 803 /* mutex only needs to be initialized at struct creation. */
804 pthread_mutex_init(&thread->mtx, NULL);
805 m->alloc++;
806 }
807
808 thread->type = type;
809 thread->add_type = type;
810 thread->master = m;
811 thread->arg = arg;
ba7d2705 812 thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
d62a17ae 813 thread->ref = NULL;
e8b3a2f7 814 thread->ignore_timer_late = false;
d62a17ae 815
816 /*
817 * So if the passed in funcname is not what we have
818 * stored that means the thread->hist needs to be
819 * updated. We keep the last one around in unused
820 * under the assumption that we are probably
821 * going to immediately allocate the same
822 * type of thread.
823 * This hopefully saves us some serious
824 * hash_get lookups.
825 */
60a3efec
DL
826 if ((thread->xref && thread->xref->funcname != xref->funcname)
827 || thread->func != func) {
d62a17ae 828 tmp.func = func;
60a3efec 829 tmp.funcname = xref->funcname;
d62a17ae 830 thread->hist =
831 hash_get(m->cpu_record, &tmp,
832 (void *(*)(void *))cpu_record_hash_alloc);
833 }
834 thread->hist->total_active++;
835 thread->func = func;
60a3efec 836 thread->xref = xref;
d62a17ae 837
838 return thread;
718e3744 839}
840
2453d15d 841static void thread_free(struct event_master *master, struct event *thread)
6655966d
RZ
842{
843 /* Update statistics. */
844 assert(master->alloc > 0);
845 master->alloc--;
846
847 /* Free allocated resources. */
848 pthread_mutex_destroy(&thread->mtx);
849 XFREE(MTYPE_THREAD, thread);
850}
851
2453d15d 852static int fd_poll(struct event_master *m, const struct timeval *timer_wait,
d81ca9a3 853 bool *eintr_p)
209a72a6 854{
d81ca9a3
MS
855 sigset_t origsigs;
856 unsigned char trash[64];
857 nfds_t count = m->handler.copycount;
858
d279ef57
DS
859 /*
860 * If timer_wait is null here, that means poll() should block
861 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 862 * ->selectpoll_timeout.
d279ef57 863 *
d62a17ae 864 * If the value is positive, it specifies the maximum number of
d279ef57
DS
865 * milliseconds to wait. If the timeout is -1, it specifies that
866 * we should never wait and always return immediately even if no
867 * event is detected. If the value is zero, the behavior is default.
868 */
d62a17ae 869 int timeout = -1;
870
871 /* number of file descriptors with events */
872 int num;
873
874 if (timer_wait != NULL
875 && m->selectpoll_timeout == 0) // use the default value
876 timeout = (timer_wait->tv_sec * 1000)
877 + (timer_wait->tv_usec / 1000);
878 else if (m->selectpoll_timeout > 0) // use the user's timeout
879 timeout = m->selectpoll_timeout;
880 else if (m->selectpoll_timeout
881 < 0) // effect a poll (return immediately)
882 timeout = 0;
883
0bdeb5e5 884 zlog_tls_buffer_flush();
3e41733f
DL
885 rcu_read_unlock();
886 rcu_assert_read_unlocked();
887
d62a17ae 888 /* add poll pipe poker */
d81ca9a3
MS
889 assert(count + 1 < m->handler.pfdsize);
890 m->handler.copy[count].fd = m->io_pipe[0];
891 m->handler.copy[count].events = POLLIN;
892 m->handler.copy[count].revents = 0x00;
893
894 /* We need to deal with a signal-handling race here: we
895 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
896 * that may arrive just before we enter poll(). We will block the
897 * key signals, then check whether any have arrived - if so, we return
898 * before calling poll(). If not, we'll re-enable the signals
899 * in the ppoll() call.
900 */
901
902 sigemptyset(&origsigs);
903 if (m->handle_signals) {
904 /* Main pthread that handles the app signals */
905 if (frr_sigevent_check(&origsigs)) {
906 /* Signal to process - restore signal mask and return */
907 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
908 num = -1;
909 *eintr_p = true;
910 goto done;
911 }
912 } else {
913 /* Don't make any changes for the non-main pthreads */
914 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
915 }
d62a17ae 916
d81ca9a3
MS
917#if defined(HAVE_PPOLL)
918 struct timespec ts, *tsp;
919
920 if (timeout >= 0) {
921 ts.tv_sec = timeout / 1000;
922 ts.tv_nsec = (timeout % 1000) * 1000000;
923 tsp = &ts;
924 } else
925 tsp = NULL;
926
927 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
928 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
929#else
930 /* Not ideal - there is a race after we restore the signal mask */
931 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
932 num = poll(m->handler.copy, count + 1, timeout);
933#endif
d62a17ae 934
d81ca9a3
MS
935done:
936
937 if (num < 0 && errno == EINTR)
938 *eintr_p = true;
939
940 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 941 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
942 ;
943
3e41733f
DL
944 rcu_read_lock();
945
d62a17ae 946 return num;
209a72a6
DS
947}
948
718e3744 949/* Add new read thread. */
907a2395 950void _event_add_read_write(const struct xref_threadsched *xref,
2453d15d
DS
951 struct event_master *m, void (*func)(struct event *),
952 void *arg, int fd, struct event **t_ptr)
718e3744 953{
2ccccdf5 954 int dir = xref->event_type;
e6685141
DS
955 struct event *thread = NULL;
956 struct event **thread_array;
d62a17ae 957
2ccccdf5 958 if (dir == EVENT_READ)
6c3aa850
DL
959 frrtrace(9, frr_libfrr, schedule_read, m,
960 xref->funcname, xref->xref.file, xref->xref.line,
961 t_ptr, fd, 0, arg, 0);
abf96a87 962 else
6c3aa850
DL
963 frrtrace(9, frr_libfrr, schedule_write, m,
964 xref->funcname, xref->xref.file, xref->xref.line,
965 t_ptr, fd, 0, arg, 0);
abf96a87 966
188acbb9
DS
967 assert(fd >= 0);
968 if (fd >= m->fd_limit)
969 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
970
cb1991af 971 frr_with_mutex (&m->mtx) {
00dffa8c
DL
972 if (t_ptr && *t_ptr)
973 // thread is already scheduled; don't reschedule
974 break;
d62a17ae 975
976 /* default to a new pollfd */
977 nfds_t queuepos = m->handler.pfdcount;
978
2ccccdf5 979 if (dir == EVENT_READ)
1ef14bee
DS
980 thread_array = m->read;
981 else
982 thread_array = m->write;
983
d62a17ae 984 /* if we already have a pollfd for our file descriptor, find and
985 * use it */
986 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
987 if (m->handler.pfds[i].fd == fd) {
988 queuepos = i;
1ef14bee
DS
989
990#ifdef DEV_BUILD
991 /*
992 * What happens if we have a thread already
993 * created for this event?
994 */
995 if (thread_array[fd])
996 assert(!"Thread already scheduled for file descriptor");
997#endif
d62a17ae 998 break;
999 }
1000
1001 /* make sure we have room for this fd + pipe poker fd */
1002 assert(queuepos + 1 < m->handler.pfdsize);
1003
60a3efec 1004 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1005
1006 m->handler.pfds[queuepos].fd = fd;
1007 m->handler.pfds[queuepos].events |=
2ccccdf5 1008 (dir == EVENT_READ ? POLLIN : POLLOUT);
d62a17ae 1009
1010 if (queuepos == m->handler.pfdcount)
1011 m->handler.pfdcount++;
1012
1013 if (thread) {
cb1991af 1014 frr_with_mutex (&thread->mtx) {
d62a17ae 1015 thread->u.fd = fd;
1ef14bee 1016 thread_array[thread->u.fd] = thread;
d62a17ae 1017 }
d62a17ae 1018
1019 if (t_ptr) {
1020 *t_ptr = thread;
1021 thread->ref = t_ptr;
1022 }
1023 }
1024
1025 AWAKEN(m);
1026 }
718e3744 1027}
1028
907a2395 1029static void _event_add_timer_timeval(const struct xref_threadsched *xref,
2453d15d 1030 struct event_master *m,
907a2395
DS
1031 void (*func)(struct event *), void *arg,
1032 struct timeval *time_relative,
1033 struct event **t_ptr)
718e3744 1034{
e6685141 1035 struct event *thread;
96fe578a 1036 struct timeval t;
d62a17ae 1037
1038 assert(m != NULL);
1039
d62a17ae 1040 assert(time_relative);
1041
6c3aa850
DL
1042 frrtrace(9, frr_libfrr, schedule_timer, m,
1043 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1044 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1045
96fe578a
MS
1046 /* Compute expiration/deadline time. */
1047 monotime(&t);
1048 timeradd(&t, time_relative, &t);
1049
cb1991af 1050 frr_with_mutex (&m->mtx) {
00dffa8c 1051 if (t_ptr && *t_ptr)
d279ef57 1052 /* thread is already scheduled; don't reschedule */
ee1455dd 1053 return;
d62a17ae 1054
2ccccdf5 1055 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
d62a17ae 1056
cb1991af 1057 frr_with_mutex (&thread->mtx) {
96fe578a 1058 thread->u.sands = t;
27d29ced 1059 thread_timer_list_add(&m->timer, thread);
d62a17ae 1060 if (t_ptr) {
1061 *t_ptr = thread;
1062 thread->ref = t_ptr;
1063 }
1064 }
d62a17ae 1065
96fe578a
MS
1066 /* The timer list is sorted - if this new timer
1067 * might change the time we'll wait for, give the pthread
1068 * a chance to re-compute.
1069 */
1070 if (thread_timer_list_first(&m->timer) == thread)
1071 AWAKEN(m);
d62a17ae 1072 }
e2eff5c3
DS
1073#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1074 if (time_relative->tv_sec > ONEYEAR2SEC)
1075 flog_err(
1076 EC_LIB_TIMER_TOO_LONG,
1077 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1078 thread);
9e867fe6 1079}
1080
98c91ac6 1081
1082/* Add timer event thread. */
907a2395 1083void _event_add_timer(const struct xref_threadsched *xref,
2453d15d 1084 struct event_master *m, void (*func)(struct event *),
907a2395 1085 void *arg, long timer, struct event **t_ptr)
9e867fe6 1086{
d62a17ae 1087 struct timeval trel;
9e867fe6 1088
d62a17ae 1089 assert(m != NULL);
9e867fe6 1090
d62a17ae 1091 trel.tv_sec = timer;
1092 trel.tv_usec = 0;
9e867fe6 1093
907a2395 1094 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1095}
9e867fe6 1096
98c91ac6 1097/* Add timer event thread with "millisecond" resolution */
907a2395 1098void _event_add_timer_msec(const struct xref_threadsched *xref,
2453d15d
DS
1099 struct event_master *m, void (*func)(struct event *),
1100 void *arg, long timer, struct event **t_ptr)
98c91ac6 1101{
d62a17ae 1102 struct timeval trel;
9e867fe6 1103
d62a17ae 1104 assert(m != NULL);
718e3744 1105
d62a17ae 1106 trel.tv_sec = timer / 1000;
1107 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1108
907a2395 1109 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1110}
1111
4322dea7 1112/* Add timer event thread with "timeval" resolution */
907a2395 1113void _event_add_timer_tv(const struct xref_threadsched *xref,
2453d15d 1114 struct event_master *m, void (*func)(struct event *),
907a2395 1115 void *arg, struct timeval *tv, struct event **t_ptr)
d03c4cbd 1116{
907a2395 1117 _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1118}
1119
718e3744 1120/* Add simple event thread. */
907a2395 1121void _event_add_event(const struct xref_threadsched *xref,
2453d15d 1122 struct event_master *m, void (*func)(struct event *),
907a2395 1123 void *arg, int val, struct event **t_ptr)
718e3744 1124{
e6685141 1125 struct event *thread = NULL;
d62a17ae 1126
6c3aa850
DL
1127 frrtrace(9, frr_libfrr, schedule_event, m,
1128 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1129 t_ptr, 0, val, arg, 0);
abf96a87 1130
d62a17ae 1131 assert(m != NULL);
1132
cb1991af 1133 frr_with_mutex (&m->mtx) {
00dffa8c 1134 if (t_ptr && *t_ptr)
d279ef57 1135 /* thread is already scheduled; don't reschedule */
00dffa8c 1136 break;
d62a17ae 1137
2ccccdf5 1138 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
cb1991af 1139 frr_with_mutex (&thread->mtx) {
d62a17ae 1140 thread->u.val = val;
c284542b 1141 thread_list_add_tail(&m->event, thread);
d62a17ae 1142 }
d62a17ae 1143
1144 if (t_ptr) {
1145 *t_ptr = thread;
1146 thread->ref = t_ptr;
1147 }
1148
1149 AWAKEN(m);
1150 }
718e3744 1151}
1152
63ccb9cb
QY
1153/* Thread cancellation ------------------------------------------------------ */
1154
8797240e
QY
1155/**
1156 * NOT's out the .events field of pollfd corresponding to the given file
1157 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1158 *
de2754be 1159 * This needs to happen for both copies of pollfd's. See 'event_fetch'
8797240e
QY
1160 * implementation for details.
1161 *
1162 * @param master
1163 * @param fd
1164 * @param state the event to cancel. One or more (OR'd together) of the
1165 * following:
1166 * - POLLIN
1167 * - POLLOUT
1168 */
2453d15d 1169static void event_cancel_rw(struct event_master *master, int fd, short state,
332beb64 1170 int idx_hint)
0a95a0d0 1171{
42d74538
QY
1172 bool found = false;
1173
d62a17ae 1174 /* find the index of corresponding pollfd */
1175 nfds_t i;
1176
a9318a32
MS
1177 /* Cancel POLLHUP too just in case some bozo set it */
1178 state |= POLLHUP;
1179
1180 /* Some callers know the index of the pfd already */
1181 if (idx_hint >= 0) {
1182 i = idx_hint;
1183 found = true;
1184 } else {
1185 /* Have to look for the fd in the pfd array */
1186 for (i = 0; i < master->handler.pfdcount; i++)
1187 if (master->handler.pfds[i].fd == fd) {
1188 found = true;
1189 break;
1190 }
1191 }
42d74538
QY
1192
1193 if (!found) {
1194 zlog_debug(
1195 "[!] Received cancellation request for nonexistent rw job");
1196 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1197 master->name ? master->name : "", fd);
42d74538
QY
1198 return;
1199 }
d62a17ae 1200
1201 /* NOT out event. */
1202 master->handler.pfds[i].events &= ~(state);
1203
1204 /* If all events are canceled, delete / resize the pollfd array. */
1205 if (master->handler.pfds[i].events == 0) {
1206 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1207 (master->handler.pfdcount - i - 1)
1208 * sizeof(struct pollfd));
1209 master->handler.pfdcount--;
e985cda0
S
1210 master->handler.pfds[master->handler.pfdcount].fd = 0;
1211 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1212 }
1213
1214 /* If we have the same pollfd in the copy, perform the same operations,
1215 * otherwise return. */
1216 if (i >= master->handler.copycount)
1217 return;
1218
1219 master->handler.copy[i].events &= ~(state);
1220
1221 if (master->handler.copy[i].events == 0) {
1222 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1223 (master->handler.copycount - i - 1)
1224 * sizeof(struct pollfd));
1225 master->handler.copycount--;
e985cda0
S
1226 master->handler.copy[master->handler.copycount].fd = 0;
1227 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1228 }
0a95a0d0
DS
1229}
1230
a9318a32
MS
1231/*
1232 * Process task cancellation given a task argument: iterate through the
1233 * various lists of tasks, looking for any that match the argument.
1234 */
2453d15d 1235static void cancel_arg_helper(struct event_master *master,
a9318a32
MS
1236 const struct cancel_req *cr)
1237{
e6685141 1238 struct event *t;
a9318a32
MS
1239 nfds_t i;
1240 int fd;
1241 struct pollfd *pfd;
1242
1243 /* We're only processing arg-based cancellations here. */
1244 if (cr->eventobj == NULL)
1245 return;
1246
1247 /* First process the ready lists. */
1248 frr_each_safe(thread_list, &master->event, t) {
1249 if (t->arg != cr->eventobj)
1250 continue;
1251 thread_list_del(&master->event, t);
1252 if (t->ref)
1253 *t->ref = NULL;
1254 thread_add_unuse(master, t);
1255 }
1256
1257 frr_each_safe(thread_list, &master->ready, t) {
1258 if (t->arg != cr->eventobj)
1259 continue;
1260 thread_list_del(&master->ready, t);
1261 if (t->ref)
1262 *t->ref = NULL;
1263 thread_add_unuse(master, t);
1264 }
1265
1266 /* If requested, stop here and ignore io and timers */
332beb64 1267 if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
a9318a32
MS
1268 return;
1269
1270 /* Check the io tasks */
1271 for (i = 0; i < master->handler.pfdcount;) {
1272 pfd = master->handler.pfds + i;
1273
1274 if (pfd->events & POLLIN)
1275 t = master->read[pfd->fd];
1276 else
1277 t = master->write[pfd->fd];
1278
1279 if (t && t->arg == cr->eventobj) {
1280 fd = pfd->fd;
1281
1282 /* Found a match to cancel: clean up fd arrays */
332beb64 1283 event_cancel_rw(master, pfd->fd, pfd->events, i);
a9318a32
MS
1284
1285 /* Clean up thread arrays */
1286 master->read[fd] = NULL;
1287 master->write[fd] = NULL;
1288
1289 /* Clear caller's ref */
1290 if (t->ref)
1291 *t->ref = NULL;
1292
1293 thread_add_unuse(master, t);
1294
1295 /* Don't increment 'i' since the cancellation will have
1296 * removed the entry from the pfd array
1297 */
1298 } else
1299 i++;
1300 }
1301
1302 /* Check the timer tasks */
1303 t = thread_timer_list_first(&master->timer);
1304 while (t) {
e6685141 1305 struct event *t_next;
a9318a32
MS
1306
1307 t_next = thread_timer_list_next(&master->timer, t);
1308
1309 if (t->arg == cr->eventobj) {
1310 thread_timer_list_del(&master->timer, t);
1311 if (t->ref)
1312 *t->ref = NULL;
1313 thread_add_unuse(master, t);
1314 }
1315
1316 t = t_next;
1317 }
1318}
1319
1189d95f 1320/**
63ccb9cb 1321 * Process cancellation requests.
1189d95f 1322 *
63ccb9cb
QY
1323 * This may only be run from the pthread which owns the thread_master.
1324 *
1325 * @param master the thread master to process
1326 * @REQUIRE master->mtx
1189d95f 1327 */
2453d15d 1328static void do_event_cancel(struct event_master *master)
718e3744 1329{
c284542b 1330 struct thread_list_head *list = NULL;
e6685141
DS
1331 struct event **thread_array = NULL;
1332 struct event *thread;
d62a17ae 1333 struct cancel_req *cr;
1334 struct listnode *ln;
7e93a54c 1335
d62a17ae 1336 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1337 /*
a9318a32
MS
1338 * If this is an event object cancellation, search
1339 * through task lists deleting any tasks which have the
1340 * specified argument - use this handy helper function.
d279ef57 1341 */
d62a17ae 1342 if (cr->eventobj) {
a9318a32 1343 cancel_arg_helper(master, cr);
d62a17ae 1344 continue;
1345 }
1346
d279ef57
DS
1347 /*
1348 * The pointer varies depending on whether the cancellation
1349 * request was made asynchronously or not. If it was, we
1350 * need to check whether the thread even exists anymore
1351 * before cancelling it.
1352 */
d62a17ae 1353 thread = (cr->thread) ? cr->thread : *cr->threadref;
1354
1355 if (!thread)
1356 continue;
1357
7e93a54c
MS
1358 list = NULL;
1359 thread_array = NULL;
1360
d62a17ae 1361 /* Determine the appropriate queue to cancel the thread from */
1362 switch (thread->type) {
2ccccdf5 1363 case EVENT_READ:
332beb64 1364 event_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1365 thread_array = master->read;
1366 break;
2ccccdf5 1367 case EVENT_WRITE:
332beb64 1368 event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1369 thread_array = master->write;
1370 break;
2ccccdf5 1371 case EVENT_TIMER:
27d29ced 1372 thread_timer_list_del(&master->timer, thread);
d62a17ae 1373 break;
2ccccdf5 1374 case EVENT_EVENT:
d62a17ae 1375 list = &master->event;
1376 break;
2ccccdf5 1377 case EVENT_READY:
d62a17ae 1378 list = &master->ready;
1379 break;
2ccccdf5
DS
1380 case EVENT_UNUSED:
1381 case EVENT_EXECUTE:
d62a17ae 1382 continue;
1383 break;
1384 }
1385
27d29ced 1386 if (list) {
c284542b 1387 thread_list_del(list, thread);
d62a17ae 1388 } else if (thread_array) {
1389 thread_array[thread->u.fd] = NULL;
d62a17ae 1390 }
1391
1392 if (thread->ref)
1393 *thread->ref = NULL;
1394
1395 thread_add_unuse(thread->master, thread);
1396 }
1397
1398 /* Delete and free all cancellation requests */
41b21bfa
MS
1399 if (master->cancel_req)
1400 list_delete_all_node(master->cancel_req);
d62a17ae 1401
332beb64 1402 /* Wake up any threads which may be blocked in event_cancel_async() */
d62a17ae 1403 master->canceled = true;
1404 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1405}
1406
a9318a32
MS
1407/*
1408 * Helper function used for multiple flavors of arg-based cancellation.
1409 */
2453d15d 1410static void cancel_event_helper(struct event_master *m, void *arg, int flags)
a9318a32
MS
1411{
1412 struct cancel_req *cr;
1413
1414 assert(m->owner == pthread_self());
1415
1416 /* Only worth anything if caller supplies an arg. */
1417 if (arg == NULL)
1418 return;
1419
1420 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1421
1422 cr->flags = flags;
1423
cb1991af 1424 frr_with_mutex (&m->mtx) {
a9318a32
MS
1425 cr->eventobj = arg;
1426 listnode_add(m->cancel_req, cr);
332beb64 1427 do_event_cancel(m);
a9318a32
MS
1428 }
1429}
1430
63ccb9cb
QY
1431/**
1432 * Cancel any events which have the specified argument.
1433 *
1434 * MT-Unsafe
1435 *
1436 * @param m the thread_master to cancel from
1437 * @param arg the argument passed when creating the event
1438 */
2453d15d 1439void event_cancel_event(struct event_master *master, void *arg)
718e3744 1440{
a9318a32
MS
1441 cancel_event_helper(master, arg, 0);
1442}
d62a17ae 1443
a9318a32
MS
1444/*
1445 * Cancel ready tasks with an arg matching 'arg'
1446 *
1447 * MT-Unsafe
1448 *
1449 * @param m the thread_master to cancel from
1450 * @param arg the argument passed when creating the event
1451 */
2453d15d 1452void event_cancel_event_ready(struct event_master *m, void *arg)
a9318a32
MS
1453{
1454
1455 /* Only cancel ready/event tasks */
332beb64 1456 cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
63ccb9cb 1457}
1189d95f 1458
63ccb9cb
QY
1459/**
1460 * Cancel a specific task.
1461 *
1462 * MT-Unsafe
1463 *
1464 * @param thread task to cancel
1465 */
332beb64 1466void event_cancel(struct event **thread)
63ccb9cb 1467{
2453d15d 1468 struct event_master *master;
b3d6bc6e
MS
1469
1470 if (thread == NULL || *thread == NULL)
1471 return;
1472
1473 master = (*thread)->master;
d62a17ae 1474
332beb64
DS
1475 frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1476 (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1477 (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1478 (*thread)->u.sands.tv_sec);
abf96a87 1479
6ed04aa2
DS
1480 assert(master->owner == pthread_self());
1481
cb1991af 1482 frr_with_mutex (&master->mtx) {
d62a17ae 1483 struct cancel_req *cr =
1484 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1485 cr->thread = *thread;
6ed04aa2 1486 listnode_add(master->cancel_req, cr);
332beb64 1487 do_event_cancel(master);
d62a17ae 1488 }
b3d6bc6e
MS
1489
1490 *thread = NULL;
63ccb9cb 1491}
1189d95f 1492
63ccb9cb
QY
1493/**
1494 * Asynchronous cancellation.
1495 *
e6685141 1496 * Called with either a struct event ** or void * to an event argument,
8797240e
QY
1497 * this function posts the correct cancellation request and blocks until it is
1498 * serviced.
63ccb9cb
QY
1499 *
1500 * If the thread is currently running, execution blocks until it completes.
1501 *
8797240e
QY
1502 * The last two parameters are mutually exclusive, i.e. if you pass one the
1503 * other must be NULL.
1504 *
1505 * When the cancellation procedure executes on the target thread_master, the
1506 * thread * provided is checked for nullity. If it is null, the thread is
1507 * assumed to no longer exist and the cancellation request is a no-op. Thus
1508 * users of this API must pass a back-reference when scheduling the original
1509 * task.
1510 *
63ccb9cb
QY
1511 * MT-Safe
1512 *
8797240e
QY
1513 * @param master the thread master with the relevant event / task
1514 * @param thread pointer to thread to cancel
1515 * @param eventobj the event
63ccb9cb 1516 */
2453d15d 1517void event_cancel_async(struct event_master *master, struct event **thread,
332beb64 1518 void *eventobj)
63ccb9cb 1519{
d62a17ae 1520 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1521
1522 if (thread && *thread)
332beb64 1523 frrtrace(9, frr_libfrr, event_cancel_async, master,
6c3aa850
DL
1524 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1525 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1526 (*thread)->u.val, (*thread)->arg,
1527 (*thread)->u.sands.tv_sec);
abf96a87 1528 else
332beb64 1529 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
c7bb4f00 1530 0, NULL, 0, 0, eventobj, 0);
abf96a87 1531
d62a17ae 1532 assert(master->owner != pthread_self());
1533
cb1991af 1534 frr_with_mutex (&master->mtx) {
d62a17ae 1535 master->canceled = false;
1536
1537 if (thread) {
1538 struct cancel_req *cr =
1539 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1540 cr->threadref = thread;
1541 listnode_add(master->cancel_req, cr);
1542 } else if (eventobj) {
1543 struct cancel_req *cr =
1544 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1545 cr->eventobj = eventobj;
1546 listnode_add(master->cancel_req, cr);
1547 }
1548 AWAKEN(master);
1549
1550 while (!master->canceled)
1551 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1552 }
50478845
MS
1553
1554 if (thread)
1555 *thread = NULL;
718e3744 1556}
63ccb9cb 1557/* ------------------------------------------------------------------------- */
718e3744 1558
27d29ced 1559static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1560 struct timeval *timer_val)
718e3744 1561{
27d29ced
DL
1562 if (!thread_timer_list_count(timers))
1563 return NULL;
1564
e6685141 1565 struct event *next_timer = thread_timer_list_first(timers);
27d29ced
DL
1566 monotime_until(&next_timer->u.sands, timer_val);
1567 return timer_val;
718e3744 1568}
718e3744 1569
2453d15d 1570static struct event *thread_run(struct event_master *m, struct event *thread,
e6685141 1571 struct event *fetch)
718e3744 1572{
d62a17ae 1573 *fetch = *thread;
1574 thread_add_unuse(m, thread);
1575 return fetch;
718e3744 1576}
1577
2453d15d 1578static int thread_process_io_helper(struct event_master *m,
e6685141 1579 struct event *thread, short state,
45f3d590 1580 short actual_state, int pos)
5d4ccd4e 1581{
e6685141 1582 struct event **thread_array;
d62a17ae 1583
45f3d590
DS
1584 /*
1585 * poll() clears the .events field, but the pollfd array we
1586 * pass to poll() is a copy of the one used to schedule threads.
1587 * We need to synchronize state between the two here by applying
1588 * the same changes poll() made on the copy of the "real" pollfd
1589 * array.
1590 *
1591 * This cleans up a possible infinite loop where we refuse
1592 * to respond to a poll event but poll is insistent that
1593 * we should.
1594 */
1595 m->handler.pfds[pos].events &= ~(state);
1596
1597 if (!thread) {
1598 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1599 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1600 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1601 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1602 return 0;
45f3d590 1603 }
d62a17ae 1604
2ccccdf5 1605 if (thread->type == EVENT_READ)
d62a17ae 1606 thread_array = m->read;
1607 else
1608 thread_array = m->write;
1609
1610 thread_array[thread->u.fd] = NULL;
c284542b 1611 thread_list_add_tail(&m->ready, thread);
2ccccdf5 1612 thread->type = EVENT_READY;
45f3d590 1613
d62a17ae 1614 return 1;
5d4ccd4e
DS
1615}
1616
8797240e
QY
1617/**
1618 * Process I/O events.
1619 *
1620 * Walks through file descriptor array looking for those pollfds whose .revents
1621 * field has something interesting. Deletes any invalid file descriptors.
1622 *
1623 * @param m the thread master
1624 * @param num the number of active file descriptors (return value of poll())
1625 */
2453d15d 1626static void thread_process_io(struct event_master *m, unsigned int num)
0a95a0d0 1627{
d62a17ae 1628 unsigned int ready = 0;
1629 struct pollfd *pfds = m->handler.copy;
1630
1631 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1632 /* no event for current fd? immediately continue */
1633 if (pfds[i].revents == 0)
1634 continue;
1635
1636 ready++;
1637
d279ef57 1638 /*
332beb64 1639 * Unless someone has called event_cancel from another
d279ef57
DS
1640 * pthread, the only thing that could have changed in
1641 * m->handler.pfds while we were asleep is the .events
332beb64 1642 * field in a given pollfd. Barring event_cancel() that
d279ef57
DS
1643 * value should be a superset of the values we have in our
1644 * copy, so there's no need to update it. Similarily,
1645 * barring deletion, the fd should still be a valid index
1646 * into the master's pfds.
d142453d
DS
1647 *
1648 * We are including POLLERR here to do a READ event
1649 * this is because the read should fail and the
1650 * read function should handle it appropriately
d279ef57 1651 */
d142453d 1652 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1653 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1654 pfds[i].revents, i);
1655 }
d62a17ae 1656 if (pfds[i].revents & POLLOUT)
1657 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1658 POLLOUT, pfds[i].revents, i);
d62a17ae 1659
1660 /* if one of our file descriptors is garbage, remove the same
1661 * from
1662 * both pfds + update sizes and index */
1663 if (pfds[i].revents & POLLNVAL) {
1664 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1665 (m->handler.pfdcount - i - 1)
1666 * sizeof(struct pollfd));
1667 m->handler.pfdcount--;
e985cda0
S
1668 m->handler.pfds[m->handler.pfdcount].fd = 0;
1669 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1670
1671 memmove(pfds + i, pfds + i + 1,
1672 (m->handler.copycount - i - 1)
1673 * sizeof(struct pollfd));
1674 m->handler.copycount--;
e985cda0
S
1675 m->handler.copy[m->handler.copycount].fd = 0;
1676 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1677
1678 i--;
1679 }
1680 }
718e3744 1681}
1682
8b70d0b0 1683/* Add all timers that have popped to the ready list. */
2453d15d 1684static unsigned int thread_process_timers(struct event_master *m,
d62a17ae 1685 struct timeval *timenow)
a48b4e6d 1686{
ab01a001
DS
1687 struct timeval prev = *timenow;
1688 bool displayed = false;
e6685141 1689 struct event *thread;
d62a17ae 1690 unsigned int ready = 0;
1691
e7d9e44b 1692 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1693 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1694 break;
ab01a001
DS
1695 prev = thread->u.sands;
1696 prev.tv_sec += 4;
1697 /*
1698 * If the timer would have popped 4 seconds in the
1699 * past then we are in a situation where we are
1700 * really getting behind on handling of events.
1701 * Let's log it and do the right thing with it.
1702 */
1dd08c22
DS
1703 if (timercmp(timenow, &prev, >)) {
1704 atomic_fetch_add_explicit(
1705 &thread->hist->total_starv_warn, 1,
1706 memory_order_seq_cst);
1707 if (!displayed && !thread->ignore_timer_late) {
1708 flog_warn(
1709 EC_LIB_STARVE_THREAD,
1710 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1711 thread);
1712 displayed = true;
1713 }
ab01a001
DS
1714 }
1715
e7d9e44b 1716 thread_timer_list_pop(&m->timer);
2ccccdf5 1717 thread->type = EVENT_READY;
e7d9e44b 1718 thread_list_add_tail(&m->ready, thread);
d62a17ae 1719 ready++;
1720 }
e7d9e44b 1721
d62a17ae 1722 return ready;
a48b4e6d 1723}
1724
2613abe6 1725/* process a list en masse, e.g. for event thread lists */
c284542b 1726static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1727{
e6685141 1728 struct event *thread;
d62a17ae 1729 unsigned int ready = 0;
1730
c284542b 1731 while ((thread = thread_list_pop(list))) {
2ccccdf5 1732 thread->type = EVENT_READY;
c284542b 1733 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1734 ready++;
1735 }
1736 return ready;
2613abe6
PJ
1737}
1738
1739
718e3744 1740/* Fetch next ready thread. */
2453d15d 1741struct event *event_fetch(struct event_master *m, struct event *fetch)
718e3744 1742{
e6685141 1743 struct event *thread = NULL;
d62a17ae 1744 struct timeval now;
1745 struct timeval zerotime = {0, 0};
1746 struct timeval tv;
1747 struct timeval *tw = NULL;
d81ca9a3 1748 bool eintr_p = false;
d62a17ae 1749 int num = 0;
1750
1751 do {
1752 /* Handle signals if any */
1753 if (m->handle_signals)
7cc91e67 1754 frr_sigevent_process();
d62a17ae 1755
1756 pthread_mutex_lock(&m->mtx);
1757
1758 /* Process any pending cancellation requests */
332beb64 1759 do_event_cancel(m);
d62a17ae 1760
e3c9529e
QY
1761 /*
1762 * Attempt to flush ready queue before going into poll().
1763 * This is performance-critical. Think twice before modifying.
1764 */
c284542b 1765 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1766 fetch = thread_run(m, thread, fetch);
1767 if (fetch->ref)
1768 *fetch->ref = NULL;
1769 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1770 if (!m->ready_run_loop)
1771 GETRUSAGE(&m->last_getrusage);
1772 m->ready_run_loop = true;
e3c9529e
QY
1773 break;
1774 }
1775
5e822957 1776 m->ready_run_loop = false;
e3c9529e
QY
1777 /* otherwise, tick through scheduling sequence */
1778
bca37d17
QY
1779 /*
1780 * Post events to ready queue. This must come before the
1781 * following block since events should occur immediately
1782 */
d62a17ae 1783 thread_process(&m->event);
1784
bca37d17
QY
1785 /*
1786 * If there are no tasks on the ready queue, we will poll()
1787 * until a timer expires or we receive I/O, whichever comes
1788 * first. The strategy for doing this is:
d62a17ae 1789 *
1790 * - If there are events pending, set the poll() timeout to zero
1791 * - If there are no events pending, but there are timers
d279ef57
DS
1792 * pending, set the timeout to the smallest remaining time on
1793 * any timer.
d62a17ae 1794 * - If there are neither timers nor events pending, but there
d279ef57 1795 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1796 * - If nothing is pending, it's time for the application to die
1797 *
1798 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1799 * once per loop to avoid starvation by events
1800 */
c284542b 1801 if (!thread_list_count(&m->ready))
27d29ced 1802 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1803
c284542b
DL
1804 if (thread_list_count(&m->ready) ||
1805 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1806 tw = &zerotime;
1807
1808 if (!tw && m->handler.pfdcount == 0) { /* die */
1809 pthread_mutex_unlock(&m->mtx);
1810 fetch = NULL;
1811 break;
1812 }
1813
bca37d17
QY
1814 /*
1815 * Copy pollfd array + # active pollfds in it. Not necessary to
1816 * copy the array size as this is fixed.
1817 */
d62a17ae 1818 m->handler.copycount = m->handler.pfdcount;
1819 memcpy(m->handler.copy, m->handler.pfds,
1820 m->handler.copycount * sizeof(struct pollfd));
1821
e3c9529e
QY
1822 pthread_mutex_unlock(&m->mtx);
1823 {
d81ca9a3
MS
1824 eintr_p = false;
1825 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1826 }
1827 pthread_mutex_lock(&m->mtx);
d764d2cc 1828
e3c9529e
QY
1829 /* Handle any errors received in poll() */
1830 if (num < 0) {
d81ca9a3 1831 if (eintr_p) {
d62a17ae 1832 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1833 /* loop around to signal handler */
1834 continue;
d62a17ae 1835 }
1836
e3c9529e 1837 /* else die */
450971aa 1838 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1839 safe_strerror(errno));
e3c9529e
QY
1840 pthread_mutex_unlock(&m->mtx);
1841 fetch = NULL;
1842 break;
bca37d17 1843 }
d62a17ae 1844
1845 /* Post timers to ready queue. */
1846 monotime(&now);
e7d9e44b 1847 thread_process_timers(m, &now);
d62a17ae 1848
1849 /* Post I/O to ready queue. */
1850 if (num > 0)
1851 thread_process_io(m, num);
1852
d62a17ae 1853 pthread_mutex_unlock(&m->mtx);
1854
1855 } while (!thread && m->spin);
1856
1857 return fetch;
718e3744 1858}
1859
d62a17ae 1860static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1861{
d62a17ae 1862 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1863 + (a.tv_usec - b.tv_usec));
62f44022
QY
1864}
1865
5f6eaa9b
DS
1866unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1867 unsigned long *cputime)
718e3744 1868{
6418e2d3 1869#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1870
1871#ifdef __FreeBSD__
1872 /*
1873 * FreeBSD appears to have an issue when calling clock_gettime
1874 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1875 * occassionally the now time will be before the start time.
1876 * This is not good and FRR is ending up with CPU HOG's
1877 * when the subtraction wraps to very large numbers
1878 *
1879 * What we are going to do here is cheat a little bit
1880 * and notice that this is a problem and just correct
1881 * it so that it is impossible to happen
1882 */
1883 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1884 start->cpu.tv_nsec > now->cpu.tv_nsec)
1885 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1886 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1887 now->cpu.tv_sec = start->cpu.tv_sec;
1888 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1889 }
1890#endif
6418e2d3
DL
1891 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1892 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1893#else
d62a17ae 1894 /* This is 'user + sys' time. */
1895 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1896 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1897#endif
d62a17ae 1898 return timeval_elapsed(now->real, start->real);
8b70d0b0 1899}
1900
50596be0 1901/* We should aim to yield after yield milliseconds, which defaults
2ccccdf5 1902 to EVENT_YIELD_TIME_SLOT .
8b70d0b0 1903 Note: we are using real (wall clock) time for this calculation.
1904 It could be argued that CPU time may make more sense in certain
1905 contexts. The things to consider are whether the thread may have
1906 blocked (in which case wall time increases, but CPU time does not),
1907 or whether the system is heavily loaded with other processes competing
d62a17ae 1908 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1909 Plus it has the added benefit that gettimeofday should be faster
1910 than calling getrusage. */
70c35c11 1911int event_should_yield(struct event *thread)
718e3744 1912{
d62a17ae 1913 int result;
cb1991af 1914 frr_with_mutex (&thread->mtx) {
d62a17ae 1915 result = monotime_since(&thread->real, NULL)
1916 > (int64_t)thread->yield;
1917 }
d62a17ae 1918 return result;
50596be0
DS
1919}
1920
70c35c11 1921void event_set_yield_time(struct event *thread, unsigned long yield_time)
50596be0 1922{
cb1991af 1923 frr_with_mutex (&thread->mtx) {
d62a17ae 1924 thread->yield = yield_time;
1925 }
718e3744 1926}
1927
5f6eaa9b 1928void event_getrusage(RUSAGE_T *r)
db9c0df9 1929{
6418e2d3
DL
1930 monotime(&r->real);
1931 if (!cputime_enabled) {
1932 memset(&r->cpu, 0, sizeof(r->cpu));
1933 return;
1934 }
1935
1936#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1937 /* not currently implemented in Linux's vDSO, but maybe at some point
1938 * in the future?
1939 */
1940 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1941#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1942#if defined RUSAGE_THREAD
1943#define FRR_RUSAGE RUSAGE_THREAD
1944#else
1945#define FRR_RUSAGE RUSAGE_SELF
1946#endif
6418e2d3
DL
1947 getrusage(FRR_RUSAGE, &(r->cpu));
1948#endif
db9c0df9
PJ
1949}
1950
fbcac826
QY
1951/*
1952 * Call a thread.
1953 *
1954 * This function will atomically update the thread's usage history. At present
1955 * this is the only spot where usage history is written. Nevertheless the code
1956 * has been written such that the introduction of writers in the future should
1957 * not need to update it provided the writers atomically perform only the
1958 * operations done here, i.e. updating the total and maximum times. In
1959 * particular, the maximum real and cpu times must be monotonically increasing
1960 * or this code is not correct.
1961 */
de2754be 1962void event_call(struct event *thread)
718e3744 1963{
d62a17ae 1964 RUSAGE_T before, after;
cc8b13a0 1965
45f01188
DL
1966 /* if the thread being called is the CLI, it may change cputime_enabled
1967 * ("service cputime-stats" command), which can result in nonsensical
1968 * and very confusing warnings
1969 */
1970 bool cputime_enabled_here = cputime_enabled;
1971
5e822957
DS
1972 if (thread->master->ready_run_loop)
1973 before = thread->master->last_getrusage;
1974 else
1975 GETRUSAGE(&before);
1976
d62a17ae 1977 thread->real = before.real;
718e3744 1978
de2754be 1979 frrtrace(9, frr_libfrr, event_call, thread->master,
6c3aa850 1980 thread->xref->funcname, thread->xref->xref.file,
de2754be
DS
1981 thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1982 thread->arg, thread->u.sands.tv_sec);
abf96a87 1983
d62a17ae 1984 pthread_setspecific(thread_current, thread);
1985 (*thread->func)(thread);
1986 pthread_setspecific(thread_current, NULL);
718e3744 1987
d62a17ae 1988 GETRUSAGE(&after);
5e822957 1989 thread->master->last_getrusage = after;
718e3744 1990
45f01188
DL
1991 unsigned long walltime, cputime;
1992 unsigned long exp;
fbcac826 1993
5f6eaa9b 1994 walltime = event_consumed_time(&after, &before, &cputime);
45f01188
DL
1995
1996 /* update walltime */
1997 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
1998 memory_order_seq_cst);
1999 exp = atomic_load_explicit(&thread->hist->real.max,
2000 memory_order_seq_cst);
45f01188 2001 while (exp < walltime
fbcac826 2002 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2003 &thread->hist->real.max, &exp, walltime,
2004 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2005 ;
2006
45f01188
DL
2007 if (cputime_enabled_here && cputime_enabled) {
2008 /* update cputime */
2009 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2010 memory_order_seq_cst);
2011 exp = atomic_load_explicit(&thread->hist->cpu.max,
2012 memory_order_seq_cst);
2013 while (exp < cputime
2014 && !atomic_compare_exchange_weak_explicit(
2015 &thread->hist->cpu.max, &exp, cputime,
2016 memory_order_seq_cst, memory_order_seq_cst))
2017 ;
2018 }
fbcac826
QY
2019
2020 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2021 memory_order_seq_cst);
2022 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2023 memory_order_seq_cst);
718e3744 2024
45f01188
DL
2025 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2026 && cputime > cputime_threshold) {
d62a17ae 2027 /*
45f01188
DL
2028 * We have a CPU Hog on our hands. The time FRR has spent
2029 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2030 * Whinge about it now, so we're aware this is yet another task
2031 * to fix.
2032 */
9b8e01ca
DS
2033 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2034 1, memory_order_seq_cst);
9ef9495e 2035 flog_warn(
039d547f
DS
2036 EC_LIB_SLOW_THREAD_CPU,
2037 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2038 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2039 walltime / 1000, cputime / 1000);
2040
2041 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2042 /*
45f01188
DL
2043 * The runtime for a task is greater than 5 seconds, but the
2044 * cpu time is under 5 seconds. Let's whine about this because
2045 * this could imply some sort of scheduling issue.
039d547f 2046 */
9b8e01ca
DS
2047 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2048 1, memory_order_seq_cst);
039d547f
DS
2049 flog_warn(
2050 EC_LIB_SLOW_THREAD_WALL,
2051 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2052 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2053 walltime / 1000, cputime / 1000);
d62a17ae 2054 }
718e3744 2055}
2056
2057/* Execute thread */
2453d15d
DS
2058void _event_execute(const struct xref_threadsched *xref, struct event_master *m,
2059 void (*func)(struct event *), void *arg, int val)
718e3744 2060{
e6685141 2061 struct event *thread;
718e3744 2062
c4345fbf 2063 /* Get or allocate new thread to execute. */
cb1991af 2064 frr_with_mutex (&m->mtx) {
2ccccdf5 2065 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
9c7753e4 2066
c4345fbf 2067 /* Set its event value. */
cb1991af 2068 frr_with_mutex (&thread->mtx) {
2ccccdf5 2069 thread->add_type = EVENT_EXECUTE;
c4345fbf
RZ
2070 thread->u.val = val;
2071 thread->ref = &thread;
2072 }
c4345fbf 2073 }
f7c62e11 2074
c4345fbf 2075 /* Execute thread doing all accounting. */
de2754be 2076 event_call(thread);
9c7753e4 2077
c4345fbf
RZ
2078 /* Give back or free thread. */
2079 thread_add_unuse(m, thread);
718e3744 2080}
1543c387
MS
2081
2082/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2083void debug_signals(const sigset_t *sigs)
2084{
2085 int i, found;
2086 sigset_t tmpsigs;
2087 char buf[300];
2088
2089 /*
2090 * We're only looking at the non-realtime signals here, so we need
2091 * some limit value. Platform differences mean at some point we just
2092 * need to pick a reasonable value.
2093 */
2094#if defined SIGRTMIN
2095# define LAST_SIGNAL SIGRTMIN
2096#else
2097# define LAST_SIGNAL 32
2098#endif
2099
2100
2101 if (sigs == NULL) {
2102 sigemptyset(&tmpsigs);
2103 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2104 sigs = &tmpsigs;
2105 }
2106
2107 found = 0;
2108 buf[0] = '\0';
2109
2110 for (i = 0; i < LAST_SIGNAL; i++) {
2111 char tmp[20];
2112
2113 if (sigismember(sigs, i) > 0) {
2114 if (found > 0)
2115 strlcat(buf, ",", sizeof(buf));
2116 snprintf(tmp, sizeof(tmp), "%d", i);
2117 strlcat(buf, tmp, sizeof(buf));
2118 found++;
2119 }
2120 }
2121
2122 if (found == 0)
2123 snprintf(buf, sizeof(buf), "<none>");
2124
2125 zlog_debug("%s: %s", __func__, buf);
2126}
a505383d 2127
f59e6882 2128static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
e6685141 2129 const struct event *thread)
f59e6882 2130{
2ccccdf5
DS
2131 static const char *const types[] = {
2132 [EVENT_READ] = "read", [EVENT_WRITE] = "write",
2133 [EVENT_TIMER] = "timer", [EVENT_EVENT] = "event",
2134 [EVENT_READY] = "ready", [EVENT_UNUSED] = "unused",
2135 [EVENT_EXECUTE] = "exec",
f59e6882
DL
2136 };
2137 ssize_t rv = 0;
2138 char info[16] = "";
2139
2140 if (!thread)
2141 return bputs(buf, "{(thread *)NULL}");
2142
2143 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2144
2145 if (thread->type < array_size(types) && types[thread->type])
2146 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2147 else
2148 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2149
2150 switch (thread->type) {
2ccccdf5
DS
2151 case EVENT_READ:
2152 case EVENT_WRITE:
f59e6882
DL
2153 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2154 break;
2155
2ccccdf5 2156 case EVENT_TIMER:
f59e6882
DL
2157 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2158 break;
2ccccdf5
DS
2159 case EVENT_READY:
2160 case EVENT_EVENT:
2161 case EVENT_UNUSED:
2162 case EVENT_EXECUTE:
2163 break;
f59e6882
DL
2164 }
2165
2166 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2167 thread->xref->funcname, thread->xref->dest,
2168 thread->xref->xref.file, thread->xref->xref.line);
2169 return rv;
2170}
2171
54929fd3 2172printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2173static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2174 const void *ptr)
2175{
e6685141 2176 const struct event *thread = ptr;
f59e6882
DL
2177 struct timespec remain = {};
2178
2179 if (ea->fmt[0] == 'D') {
2180 ea->fmt++;
2181 return printfrr_thread_dbg(buf, ea, thread);
2182 }
2183
2184 if (!thread) {
2185 /* need to jump over time formatting flag characters in the
2186 * input format string, i.e. adjust ea->fmt!
2187 */
2188 printfrr_time(buf, ea, &remain,
2189 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2190 return bputch(buf, '-');
2191 }
2192
2193 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2194 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2195}