]> git.proxmox.com Git - mirror_frr.git/blame - lib/event.c
*: Convert a bunch of thread_XX to event_XX
[mirror_frr.git] / lib / event.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
718e3744 4 */
5
6/* #define DEBUG */
7
8#include <zebra.h>
308d14ae 9#include <sys/resource.h>
718e3744 10
cb37cb33 11#include "event.h"
718e3744 12#include "memory.h"
3e41733f 13#include "frrcu.h"
718e3744 14#include "log.h"
e04ab74d 15#include "hash.h"
16#include "command.h"
05c447dd 17#include "sigevent.h"
3bf2673b 18#include "network.h"
bd74dc61 19#include "jhash.h"
fbcac826 20#include "frratomic.h"
00dffa8c 21#include "frr_pthread.h"
9ef9495e 22#include "lib_errors.h"
912d45a1 23#include "libfrr_trace.h"
1a9f340b 24#include "libfrr.h"
d6be5fb9 25
bf8d3d6a 26DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
2ccccdf5
DS
27DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
28DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
29DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
4a1ab8e4 30
e6685141 31DECLARE_LIST(thread_list, struct event, threaditem);
c284542b 32
aea25d1e
MS
33struct cancel_req {
34 int flags;
e6685141 35 struct event *thread;
aea25d1e 36 void *eventobj;
e6685141 37 struct event **threadref;
aea25d1e
MS
38};
39
40/* Flags for task cancellation */
332beb64 41#define EVENT_CANCEL_FLAG_READY 0x01
aea25d1e 42
e6685141 43static int thread_timer_cmp(const struct event *a, const struct event *b)
27d29ced
DL
44{
45 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 return -1;
47 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 return 1;
49 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 return -1;
51 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 return 1;
53 return 0;
54}
55
e6685141 56DECLARE_HEAP(thread_timer_list, struct event, timeritem, thread_timer_cmp);
27d29ced 57
3b96b781
HT
58#if defined(__APPLE__)
59#include <mach/mach.h>
60#include <mach/mach_time.h>
61#endif
62
d62a17ae 63#define AWAKEN(m) \
64 do { \
2b64873d 65 const unsigned char wakebyte = 0x01; \
d62a17ae 66 write(m->io_pipe[1], &wakebyte, 1); \
67 } while (0);
3bf2673b 68
62f44022 69/* control variable for initializer */
c17faa4b 70static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 71pthread_key_t thread_current;
6b0655a2 72
c17faa4b 73static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
74static struct list *masters;
75
e6685141 76static void thread_free(struct thread_master *master, struct event *thread);
6b0655a2 77
45f01188
DL
78#ifndef EXCLUDE_CPU_TIME
79#define EXCLUDE_CPU_TIME 0
80#endif
81#ifndef CONSUMED_TIME_CHECK
82#define CONSUMED_TIME_CHECK 0
83#endif
84
85bool cputime_enabled = !EXCLUDE_CPU_TIME;
86unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
62f44022 89/* CLI start ---------------------------------------------------------------- */
cb37cb33 90#include "lib/event_clippy.c"
45f01188 91
d8b87afe 92static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 93{
883cc51d 94 int size = sizeof(a->func);
bd74dc61
DS
95
96 return jhash(&a->func, size, 0);
e04ab74d 97}
98
74df8d6d 99static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 100 const struct cpu_thread_history *b)
e04ab74d 101{
d62a17ae 102 return a->func == b->func;
e04ab74d 103}
104
d62a17ae 105static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 106{
d62a17ae 107 struct cpu_thread_history *new;
2ccccdf5 108 new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_thread_history));
d62a17ae 109 new->func = a->func;
110 new->funcname = a->funcname;
111 return new;
e04ab74d 112}
113
d62a17ae 114static void cpu_record_hash_free(void *a)
228da428 115{
d62a17ae 116 struct cpu_thread_history *hist = a;
117
2ccccdf5 118 XFREE(MTYPE_EVENT_STATS, hist);
228da428
CC
119}
120
d62a17ae 121static void vty_out_cpu_thread_history(struct vty *vty,
122 struct cpu_thread_history *a)
e04ab74d 123{
1dd08c22
DS
124 vty_out(vty,
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 126 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
127 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
128 (a->real.total / a->total_calls), a->real.max,
1dd08c22 129 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 130 vty_out(vty, " %c%c%c%c%c %s\n",
2ccccdf5
DS
131 a->types & (1 << EVENT_READ) ? 'R' : ' ',
132 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
133 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
134 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
135 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 136}
137
e3b78da8 138static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 139{
d62a17ae 140 struct cpu_thread_history *totals = args[0];
fbcac826 141 struct cpu_thread_history copy;
d62a17ae 142 struct vty *vty = args[1];
fbcac826 143 uint8_t *filter = args[2];
d62a17ae 144
145 struct cpu_thread_history *a = bucket->data;
146
fbcac826
QY
147 copy.total_active =
148 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
149 copy.total_calls =
150 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
151 copy.total_cpu_warn =
152 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
153 copy.total_wall_warn =
154 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
155 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
156 memory_order_seq_cst);
fbcac826
QY
157 copy.cpu.total =
158 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
159 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
160 copy.real.total =
161 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
162 copy.real.max =
163 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
164 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
165 copy.funcname = a->funcname;
166
167 if (!(copy.types & *filter))
d62a17ae 168 return;
fbcac826
QY
169
170 vty_out_cpu_thread_history(vty, &copy);
171 totals->total_active += copy.total_active;
172 totals->total_calls += copy.total_calls;
9b8e01ca
DS
173 totals->total_cpu_warn += copy.total_cpu_warn;
174 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 175 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
176 totals->real.total += copy.real.total;
177 if (totals->real.max < copy.real.max)
178 totals->real.max = copy.real.max;
179 totals->cpu.total += copy.cpu.total;
180 if (totals->cpu.max < copy.cpu.max)
181 totals->cpu.max = copy.cpu.max;
e04ab74d 182}
183
fbcac826 184static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 185{
d62a17ae 186 struct cpu_thread_history tmp;
187 void *args[3] = {&tmp, vty, &filter};
188 struct thread_master *m;
189 struct listnode *ln;
190
45f01188
DL
191 if (!cputime_enabled)
192 vty_out(vty,
193 "\n"
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
198
0d6f7fd6 199 memset(&tmp, 0, sizeof(tmp));
d62a17ae 200 tmp.funcname = "TOTAL";
201 tmp.types = filter;
202
cb1991af 203 frr_with_mutex (&masters_mtx) {
d62a17ae 204 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
205 const char *name = m->name ? m->name : "main";
206
207 char underline[strlen(name) + 1];
208 memset(underline, '-', sizeof(underline));
4f113d60 209 underline[sizeof(underline) - 1] = '\0';
d62a17ae 210
211 vty_out(vty, "\n");
212 vty_out(vty, "Showing statistics for pthread %s\n",
213 name);
214 vty_out(vty, "-------------------------------%s\n",
215 underline);
84d951d0 216 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 217 "CPU (user+system):", "Real (wall-clock):");
218 vty_out(vty,
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
221 vty_out(vty,
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 223
224 if (m->cpu_record->count)
225 hash_iterate(
226 m->cpu_record,
e3b78da8 227 (void (*)(struct hash_bucket *,
d62a17ae 228 void *))cpu_record_hash_print,
229 args);
230 else
231 vty_out(vty, "No data to display yet.\n");
232
233 vty_out(vty, "\n");
234 }
235 }
d62a17ae 236
237 vty_out(vty, "\n");
238 vty_out(vty, "Total thread statistics\n");
239 vty_out(vty, "-------------------------\n");
84d951d0 240 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 243 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 244 vty_out(vty, " Type Thread\n");
245
246 if (tmp.total_calls > 0)
247 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 248}
249
e3b78da8 250static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 251{
fbcac826 252 uint8_t *filter = args[0];
d62a17ae 253 struct hash *cpu_record = args[1];
254
255 struct cpu_thread_history *a = bucket->data;
62f44022 256
d62a17ae 257 if (!(a->types & *filter))
258 return;
f48f65d2 259
d62a17ae 260 hash_release(cpu_record, bucket->data);
e276eb82
PJ
261}
262
fbcac826 263static void cpu_record_clear(uint8_t filter)
e276eb82 264{
fbcac826 265 uint8_t *tmp = &filter;
d62a17ae 266 struct thread_master *m;
267 struct listnode *ln;
268
cb1991af 269 frr_with_mutex (&masters_mtx) {
d62a17ae 270 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 271 frr_with_mutex (&m->mtx) {
d62a17ae 272 void *args[2] = {tmp, m->cpu_record};
273 hash_iterate(
274 m->cpu_record,
e3b78da8 275 (void (*)(struct hash_bucket *,
d62a17ae 276 void *))cpu_record_hash_clear,
277 args);
278 }
d62a17ae 279 }
280 }
62f44022
QY
281}
282
fbcac826 283static uint8_t parse_filter(const char *filterstr)
62f44022 284{
d62a17ae 285 int i = 0;
286 int filter = 0;
287
288 while (filterstr[i] != '\0') {
289 switch (filterstr[i]) {
290 case 'r':
291 case 'R':
2ccccdf5 292 filter |= (1 << EVENT_READ);
d62a17ae 293 break;
294 case 'w':
295 case 'W':
2ccccdf5 296 filter |= (1 << EVENT_WRITE);
d62a17ae 297 break;
298 case 't':
299 case 'T':
2ccccdf5 300 filter |= (1 << EVENT_TIMER);
d62a17ae 301 break;
302 case 'e':
303 case 'E':
2ccccdf5 304 filter |= (1 << EVENT_EVENT);
d62a17ae 305 break;
306 case 'x':
307 case 'X':
2ccccdf5 308 filter |= (1 << EVENT_EXECUTE);
d62a17ae 309 break;
310 default:
311 break;
312 }
313 ++i;
314 }
315 return filter;
62f44022
QY
316}
317
ee4dcee8
DL
318DEFUN_NOSH (show_thread_cpu,
319 show_thread_cpu_cmd,
320 "show thread cpu [FILTER]",
321 SHOW_STR
322 "Thread information\n"
323 "Thread CPU usage\n"
324 "Display filter (rwtex)\n")
62f44022 325{
fbcac826 326 uint8_t filter = (uint8_t)-1U;
d62a17ae 327 int idx = 0;
328
329 if (argv_find(argv, argc, "FILTER", &idx)) {
330 filter = parse_filter(argv[idx]->arg);
331 if (!filter) {
332 vty_out(vty,
3efd0893 333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 334 argv[idx]->arg);
335 return CMD_WARNING;
336 }
337 }
338
339 cpu_record_print(vty, filter);
340 return CMD_SUCCESS;
e276eb82 341}
45f01188
DL
342
343DEFPY (service_cputime_stats,
344 service_cputime_stats_cmd,
345 "[no] service cputime-stats",
346 NO_STR
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
349{
350 cputime_enabled = !no;
351 return CMD_SUCCESS;
352}
353
354DEFPY (service_cputime_warning,
355 service_cputime_warning_cmd,
356 "[no] service cputime-warning (1-4294967295)",
357 NO_STR
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
361{
362 if (no)
363 cputime_threshold = 0;
364 else
365 cputime_threshold = cputime_warning * 1000;
366 return CMD_SUCCESS;
367}
368
369ALIAS (service_cputime_warning,
370 no_service_cputime_warning_cmd,
371 "no service cputime-warning",
372 NO_STR
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
375
376DEFPY (service_walltime_warning,
377 service_walltime_warning_cmd,
378 "[no] service walltime-warning (1-4294967295)",
379 NO_STR
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
383{
384 if (no)
385 walltime_threshold = 0;
386 else
387 walltime_threshold = walltime_warning * 1000;
388 return CMD_SUCCESS;
389}
390
391ALIAS (service_walltime_warning,
392 no_service_walltime_warning_cmd,
393 "no service walltime-warning",
394 NO_STR
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 397
8872626b
DS
398static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
399{
400 const char *name = m->name ? m->name : "main";
401 char underline[strlen(name) + 1];
e6685141 402 struct event *thread;
8872626b
DS
403 uint32_t i;
404
405 memset(underline, '-', sizeof(underline));
406 underline[sizeof(underline) - 1] = '\0';
407
408 vty_out(vty, "\nShowing poll FD's for %s\n", name);
409 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
410 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
411 m->fd_limit);
a0b36ae6
DS
412 for (i = 0; i < m->handler.pfdcount; i++) {
413 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
414 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 415 m->handler.pfds[i].revents);
a0b36ae6
DS
416
417 if (m->handler.pfds[i].events & POLLIN) {
418 thread = m->read[m->handler.pfds[i].fd];
419
420 if (!thread)
421 vty_out(vty, "ERROR ");
422 else
60a3efec 423 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
424 } else
425 vty_out(vty, " ");
426
427 if (m->handler.pfds[i].events & POLLOUT) {
428 thread = m->write[m->handler.pfds[i].fd];
429
430 if (!thread)
431 vty_out(vty, "ERROR\n");
432 else
60a3efec 433 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
434 } else
435 vty_out(vty, "\n");
436 }
8872626b
DS
437}
438
ee4dcee8
DL
439DEFUN_NOSH (show_thread_poll,
440 show_thread_poll_cmd,
441 "show thread poll",
442 SHOW_STR
443 "Thread information\n"
444 "Show poll FD's and information\n")
8872626b
DS
445{
446 struct listnode *node;
447 struct thread_master *m;
448
cb1991af 449 frr_with_mutex (&masters_mtx) {
8872626b
DS
450 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
451 show_thread_poll_helper(vty, m);
452 }
453 }
8872626b
DS
454
455 return CMD_SUCCESS;
456}
457
458
49d41a26
DS
459DEFUN (clear_thread_cpu,
460 clear_thread_cpu_cmd,
461 "clear thread cpu [FILTER]",
62f44022 462 "Clear stored data in all pthreads\n"
49d41a26
DS
463 "Thread information\n"
464 "Thread CPU usage\n"
465 "Display filter (rwtexb)\n")
e276eb82 466{
fbcac826 467 uint8_t filter = (uint8_t)-1U;
d62a17ae 468 int idx = 0;
469
470 if (argv_find(argv, argc, "FILTER", &idx)) {
471 filter = parse_filter(argv[idx]->arg);
472 if (!filter) {
473 vty_out(vty,
3efd0893 474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 475 argv[idx]->arg);
476 return CMD_WARNING;
477 }
478 }
479
480 cpu_record_clear(filter);
481 return CMD_SUCCESS;
e276eb82 482}
6b0655a2 483
22f31b8c
DS
484static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
485{
486 const char *name = m->name ? m->name : "main";
487 char underline[strlen(name) + 1];
e6685141 488 struct event *thread;
22f31b8c
DS
489
490 memset(underline, '-', sizeof(underline));
491 underline[sizeof(underline) - 1] = '\0';
492
493 vty_out(vty, "\nShowing timers for %s\n", name);
494 vty_out(vty, "-------------------%s\n", underline);
495
496 frr_each (thread_timer_list, &m->timer, thread) {
497 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
498 }
499}
500
501DEFPY_NOSH (show_thread_timers,
502 show_thread_timers_cmd,
503 "show thread timers",
504 SHOW_STR
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
507{
508 struct listnode *node;
509 struct thread_master *m;
510
511 frr_with_mutex (&masters_mtx) {
512 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
513 show_thread_timers_helper(vty, m);
514 }
515
516 return CMD_SUCCESS;
517}
518
5f6eaa9b 519void event_cmd_init(void)
0b84f294 520{
d62a17ae 521 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 522 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 523 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
524
525 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
526 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
527 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
528 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
529 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
530
531 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 532}
62f44022
QY
533/* CLI end ------------------------------------------------------------------ */
534
0b84f294 535
d62a17ae 536static void cancelreq_del(void *cr)
63ccb9cb 537{
d62a17ae 538 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
539}
540
e0bebc7c 541/* initializer, only ever called once */
4d762f26 542static void initializer(void)
e0bebc7c 543{
d62a17ae 544 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
545}
546
d62a17ae 547struct thread_master *thread_master_create(const char *name)
718e3744 548{
d62a17ae 549 struct thread_master *rv;
550 struct rlimit limit;
551
552 pthread_once(&init_once, &initializer);
553
2ccccdf5 554 rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct thread_master));
d62a17ae 555
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv->mtx, NULL);
558 pthread_cond_init(&rv->cancel_cond, NULL);
559
560 /* Set name */
7ffcd8bd 561 name = name ? name : "default";
2ccccdf5 562 rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d62a17ae 563
564 /* Initialize I/O task data structures */
1a9f340b
MS
565
566 /* Use configured limit if present, ulimit otherwise. */
567 rv->fd_limit = frr_get_fd_limit();
568 if (rv->fd_limit == 0) {
569 getrlimit(RLIMIT_NOFILE, &limit);
570 rv->fd_limit = (int)limit.rlim_cur;
571 }
572
2ccccdf5 573 rv->read = XCALLOC(MTYPE_EVENT_POLL,
e6685141 574 sizeof(struct event *) * rv->fd_limit);
a6f235f3 575
2ccccdf5 576 rv->write = XCALLOC(MTYPE_EVENT_POLL,
e6685141 577 sizeof(struct event *) * rv->fd_limit);
d62a17ae 578
7ffcd8bd
QY
579 char tmhashname[strlen(name) + 32];
580 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
581 name);
bd74dc61 582 rv->cpu_record = hash_create_size(
d8b87afe 583 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 584 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 585 tmhashname);
d62a17ae 586
c284542b
DL
587 thread_list_init(&rv->event);
588 thread_list_init(&rv->ready);
589 thread_list_init(&rv->unuse);
27d29ced 590 thread_timer_list_init(&rv->timer);
d62a17ae 591
de2754be 592 /* Initialize event_fetch() settings */
d62a17ae 593 rv->spin = true;
594 rv->handle_signals = true;
595
596 /* Set pthread owner, should be updated by actual owner */
597 rv->owner = pthread_self();
598 rv->cancel_req = list_new();
599 rv->cancel_req->del = cancelreq_del;
600 rv->canceled = true;
601
602 /* Initialize pipe poker */
603 pipe(rv->io_pipe);
604 set_nonblocking(rv->io_pipe[0]);
605 set_nonblocking(rv->io_pipe[1]);
606
607 /* Initialize data structures for poll() */
608 rv->handler.pfdsize = rv->fd_limit;
609 rv->handler.pfdcount = 0;
2ccccdf5 610 rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 611 sizeof(struct pollfd) * rv->handler.pfdsize);
2ccccdf5 612 rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 613 sizeof(struct pollfd) * rv->handler.pfdsize);
614
eff09c66 615 /* add to list of threadmasters */
cb1991af 616 frr_with_mutex (&masters_mtx) {
eff09c66
QY
617 if (!masters)
618 masters = list_new();
619
d62a17ae 620 listnode_add(masters, rv);
621 }
d62a17ae 622
623 return rv;
718e3744 624}
625
d8a8a8de
QY
626void thread_master_set_name(struct thread_master *master, const char *name)
627{
cb1991af 628 frr_with_mutex (&master->mtx) {
2ccccdf5
DS
629 XFREE(MTYPE_EVENT_MASTER, master->name);
630 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d8a8a8de 631 }
d8a8a8de
QY
632}
633
2ccccdf5 634#define EVENT_UNUSED_DEPTH 10
6ed04aa2 635
718e3744 636/* Move thread to unuse list. */
e6685141 637static void thread_add_unuse(struct thread_master *m, struct event *thread)
718e3744 638{
6655966d
RZ
639 pthread_mutex_t mtxc = thread->mtx;
640
d62a17ae 641 assert(m != NULL && thread != NULL);
d62a17ae 642
d62a17ae 643 thread->hist->total_active--;
e6685141 644 memset(thread, 0, sizeof(struct event));
2ccccdf5 645 thread->type = EVENT_UNUSED;
6ed04aa2 646
6655966d
RZ
647 /* Restore the thread mutex context. */
648 thread->mtx = mtxc;
649
2ccccdf5 650 if (thread_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
c284542b 651 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
652 return;
653 }
654
655 thread_free(m, thread);
718e3744 656}
657
658/* Free all unused thread. */
c284542b
DL
659static void thread_list_free(struct thread_master *m,
660 struct thread_list_head *list)
718e3744 661{
e6685141 662 struct event *t;
d62a17ae 663
c284542b 664 while ((t = thread_list_pop(list)))
6655966d 665 thread_free(m, t);
718e3744 666}
667
d62a17ae 668static void thread_array_free(struct thread_master *m,
e6685141 669 struct event **thread_array)
308d14ae 670{
e6685141 671 struct event *t;
d62a17ae 672 int index;
673
674 for (index = 0; index < m->fd_limit; ++index) {
675 t = thread_array[index];
676 if (t) {
677 thread_array[index] = NULL;
6655966d 678 thread_free(m, t);
d62a17ae 679 }
680 }
2ccccdf5 681 XFREE(MTYPE_EVENT_POLL, thread_array);
308d14ae
DV
682}
683
495f0b13
DS
684/*
685 * thread_master_free_unused
686 *
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
691 */
d62a17ae 692void thread_master_free_unused(struct thread_master *m)
495f0b13 693{
cb1991af 694 frr_with_mutex (&m->mtx) {
e6685141 695 struct event *t;
c284542b 696 while ((t = thread_list_pop(&m->unuse)))
6655966d 697 thread_free(m, t);
d62a17ae 698 }
495f0b13
DS
699}
700
718e3744 701/* Stop thread scheduler. */
d62a17ae 702void thread_master_free(struct thread_master *m)
718e3744 703{
e6685141 704 struct event *t;
27d29ced 705
cb1991af 706 frr_with_mutex (&masters_mtx) {
d62a17ae 707 listnode_delete(masters, m);
eff09c66 708 if (masters->count == 0) {
6a154c88 709 list_delete(&masters);
eff09c66 710 }
d62a17ae 711 }
d62a17ae 712
713 thread_array_free(m, m->read);
714 thread_array_free(m, m->write);
27d29ced
DL
715 while ((t = thread_timer_list_pop(&m->timer)))
716 thread_free(m, t);
d62a17ae 717 thread_list_free(m, &m->event);
718 thread_list_free(m, &m->ready);
719 thread_list_free(m, &m->unuse);
720 pthread_mutex_destroy(&m->mtx);
33844bbe 721 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 722 close(m->io_pipe[0]);
723 close(m->io_pipe[1]);
6a154c88 724 list_delete(&m->cancel_req);
1a0a92ea 725 m->cancel_req = NULL;
d62a17ae 726
d8bc11a5 727 hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
d62a17ae 728
2ccccdf5
DS
729 XFREE(MTYPE_EVENT_MASTER, m->name);
730 XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
731 XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
732 XFREE(MTYPE_EVENT_MASTER, m);
718e3744 733}
734
94202742 735/* Return remain time in milliseconds. */
4f830a07 736unsigned long event_timer_remain_msec(struct event *thread)
718e3744 737{
d62a17ae 738 int64_t remain;
1189d95f 739
5f6eaa9b 740 if (!event_is_scheduled(thread))
13bcc010
DA
741 return 0;
742
cb1991af 743 frr_with_mutex (&thread->mtx) {
78ca0342 744 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 745 }
1189d95f 746
d62a17ae 747 return remain < 0 ? 0 : remain;
718e3744 748}
749
78ca0342 750/* Return remain time in seconds. */
4f830a07 751unsigned long event_timer_remain_second(struct event *thread)
78ca0342 752{
4f830a07 753 return event_timer_remain_msec(thread) / 1000LL;
78ca0342
CF
754}
755
4f830a07 756struct timeval event_timer_remain(struct event *thread)
6ac44687 757{
d62a17ae 758 struct timeval remain;
cb1991af 759 frr_with_mutex (&thread->mtx) {
d62a17ae 760 monotime_until(&thread->u.sands, &remain);
761 }
d62a17ae 762 return remain;
6ac44687
CF
763}
764
0447957e
AK
765static int time_hhmmss(char *buf, int buf_size, long sec)
766{
767 long hh;
768 long mm;
769 int wr;
770
642ac49d 771 assert(buf_size >= 8);
0447957e
AK
772
773 hh = sec / 3600;
774 sec %= 3600;
775 mm = sec / 60;
776 sec %= 60;
777
778 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
779
780 return wr != 8;
781}
782
5f6eaa9b 783char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
0447957e
AK
784{
785 if (t_timer) {
4f830a07 786 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
0447957e
AK
787 } else {
788 snprintf(buf, buf_size, "--:--:--");
789 }
790 return buf;
791}
792
718e3744 793/* Get new thread. */
e6685141
DS
794static struct event *thread_get(struct thread_master *m, uint8_t type,
795 void (*func)(struct event *), void *arg,
796 const struct xref_threadsched *xref)
718e3744 797{
e6685141 798 struct event *thread = thread_list_pop(&m->unuse);
d62a17ae 799 struct cpu_thread_history tmp;
800
801 if (!thread) {
e6685141 802 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
d62a17ae 803 /* mutex only needs to be initialized at struct creation. */
804 pthread_mutex_init(&thread->mtx, NULL);
805 m->alloc++;
806 }
807
808 thread->type = type;
809 thread->add_type = type;
810 thread->master = m;
811 thread->arg = arg;
ba7d2705 812 thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
d62a17ae 813 thread->ref = NULL;
e8b3a2f7 814 thread->ignore_timer_late = false;
d62a17ae 815
816 /*
817 * So if the passed in funcname is not what we have
818 * stored that means the thread->hist needs to be
819 * updated. We keep the last one around in unused
820 * under the assumption that we are probably
821 * going to immediately allocate the same
822 * type of thread.
823 * This hopefully saves us some serious
824 * hash_get lookups.
825 */
60a3efec
DL
826 if ((thread->xref && thread->xref->funcname != xref->funcname)
827 || thread->func != func) {
d62a17ae 828 tmp.func = func;
60a3efec 829 tmp.funcname = xref->funcname;
d62a17ae 830 thread->hist =
831 hash_get(m->cpu_record, &tmp,
832 (void *(*)(void *))cpu_record_hash_alloc);
833 }
834 thread->hist->total_active++;
835 thread->func = func;
60a3efec 836 thread->xref = xref;
d62a17ae 837
838 return thread;
718e3744 839}
840
e6685141 841static void thread_free(struct thread_master *master, struct event *thread)
6655966d
RZ
842{
843 /* Update statistics. */
844 assert(master->alloc > 0);
845 master->alloc--;
846
847 /* Free allocated resources. */
848 pthread_mutex_destroy(&thread->mtx);
849 XFREE(MTYPE_THREAD, thread);
850}
851
d81ca9a3
MS
852static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
853 bool *eintr_p)
209a72a6 854{
d81ca9a3
MS
855 sigset_t origsigs;
856 unsigned char trash[64];
857 nfds_t count = m->handler.copycount;
858
d279ef57
DS
859 /*
860 * If timer_wait is null here, that means poll() should block
861 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 862 * ->selectpoll_timeout.
d279ef57 863 *
d62a17ae 864 * If the value is positive, it specifies the maximum number of
d279ef57
DS
865 * milliseconds to wait. If the timeout is -1, it specifies that
866 * we should never wait and always return immediately even if no
867 * event is detected. If the value is zero, the behavior is default.
868 */
d62a17ae 869 int timeout = -1;
870
871 /* number of file descriptors with events */
872 int num;
873
874 if (timer_wait != NULL
875 && m->selectpoll_timeout == 0) // use the default value
876 timeout = (timer_wait->tv_sec * 1000)
877 + (timer_wait->tv_usec / 1000);
878 else if (m->selectpoll_timeout > 0) // use the user's timeout
879 timeout = m->selectpoll_timeout;
880 else if (m->selectpoll_timeout
881 < 0) // effect a poll (return immediately)
882 timeout = 0;
883
0bdeb5e5 884 zlog_tls_buffer_flush();
3e41733f
DL
885 rcu_read_unlock();
886 rcu_assert_read_unlocked();
887
d62a17ae 888 /* add poll pipe poker */
d81ca9a3
MS
889 assert(count + 1 < m->handler.pfdsize);
890 m->handler.copy[count].fd = m->io_pipe[0];
891 m->handler.copy[count].events = POLLIN;
892 m->handler.copy[count].revents = 0x00;
893
894 /* We need to deal with a signal-handling race here: we
895 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
896 * that may arrive just before we enter poll(). We will block the
897 * key signals, then check whether any have arrived - if so, we return
898 * before calling poll(). If not, we'll re-enable the signals
899 * in the ppoll() call.
900 */
901
902 sigemptyset(&origsigs);
903 if (m->handle_signals) {
904 /* Main pthread that handles the app signals */
905 if (frr_sigevent_check(&origsigs)) {
906 /* Signal to process - restore signal mask and return */
907 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
908 num = -1;
909 *eintr_p = true;
910 goto done;
911 }
912 } else {
913 /* Don't make any changes for the non-main pthreads */
914 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
915 }
d62a17ae 916
d81ca9a3
MS
917#if defined(HAVE_PPOLL)
918 struct timespec ts, *tsp;
919
920 if (timeout >= 0) {
921 ts.tv_sec = timeout / 1000;
922 ts.tv_nsec = (timeout % 1000) * 1000000;
923 tsp = &ts;
924 } else
925 tsp = NULL;
926
927 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
928 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
929#else
930 /* Not ideal - there is a race after we restore the signal mask */
931 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
932 num = poll(m->handler.copy, count + 1, timeout);
933#endif
d62a17ae 934
d81ca9a3
MS
935done:
936
937 if (num < 0 && errno == EINTR)
938 *eintr_p = true;
939
940 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 941 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
942 ;
943
3e41733f
DL
944 rcu_read_lock();
945
d62a17ae 946 return num;
209a72a6
DS
947}
948
718e3744 949/* Add new read thread. */
907a2395
DS
950void _event_add_read_write(const struct xref_threadsched *xref,
951 struct thread_master *m,
952 void (*func)(struct event *), void *arg, int fd,
953 struct event **t_ptr)
718e3744 954{
2ccccdf5 955 int dir = xref->event_type;
e6685141
DS
956 struct event *thread = NULL;
957 struct event **thread_array;
d62a17ae 958
2ccccdf5 959 if (dir == EVENT_READ)
6c3aa850
DL
960 frrtrace(9, frr_libfrr, schedule_read, m,
961 xref->funcname, xref->xref.file, xref->xref.line,
962 t_ptr, fd, 0, arg, 0);
abf96a87 963 else
6c3aa850
DL
964 frrtrace(9, frr_libfrr, schedule_write, m,
965 xref->funcname, xref->xref.file, xref->xref.line,
966 t_ptr, fd, 0, arg, 0);
abf96a87 967
188acbb9
DS
968 assert(fd >= 0);
969 if (fd >= m->fd_limit)
970 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
971
cb1991af 972 frr_with_mutex (&m->mtx) {
00dffa8c
DL
973 if (t_ptr && *t_ptr)
974 // thread is already scheduled; don't reschedule
975 break;
d62a17ae 976
977 /* default to a new pollfd */
978 nfds_t queuepos = m->handler.pfdcount;
979
2ccccdf5 980 if (dir == EVENT_READ)
1ef14bee
DS
981 thread_array = m->read;
982 else
983 thread_array = m->write;
984
d62a17ae 985 /* if we already have a pollfd for our file descriptor, find and
986 * use it */
987 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
988 if (m->handler.pfds[i].fd == fd) {
989 queuepos = i;
1ef14bee
DS
990
991#ifdef DEV_BUILD
992 /*
993 * What happens if we have a thread already
994 * created for this event?
995 */
996 if (thread_array[fd])
997 assert(!"Thread already scheduled for file descriptor");
998#endif
d62a17ae 999 break;
1000 }
1001
1002 /* make sure we have room for this fd + pipe poker fd */
1003 assert(queuepos + 1 < m->handler.pfdsize);
1004
60a3efec 1005 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1006
1007 m->handler.pfds[queuepos].fd = fd;
1008 m->handler.pfds[queuepos].events |=
2ccccdf5 1009 (dir == EVENT_READ ? POLLIN : POLLOUT);
d62a17ae 1010
1011 if (queuepos == m->handler.pfdcount)
1012 m->handler.pfdcount++;
1013
1014 if (thread) {
cb1991af 1015 frr_with_mutex (&thread->mtx) {
d62a17ae 1016 thread->u.fd = fd;
1ef14bee 1017 thread_array[thread->u.fd] = thread;
d62a17ae 1018 }
d62a17ae 1019
1020 if (t_ptr) {
1021 *t_ptr = thread;
1022 thread->ref = t_ptr;
1023 }
1024 }
1025
1026 AWAKEN(m);
1027 }
718e3744 1028}
1029
907a2395
DS
1030static void _event_add_timer_timeval(const struct xref_threadsched *xref,
1031 struct thread_master *m,
1032 void (*func)(struct event *), void *arg,
1033 struct timeval *time_relative,
1034 struct event **t_ptr)
718e3744 1035{
e6685141 1036 struct event *thread;
96fe578a 1037 struct timeval t;
d62a17ae 1038
1039 assert(m != NULL);
1040
d62a17ae 1041 assert(time_relative);
1042
6c3aa850
DL
1043 frrtrace(9, frr_libfrr, schedule_timer, m,
1044 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1045 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1046
96fe578a
MS
1047 /* Compute expiration/deadline time. */
1048 monotime(&t);
1049 timeradd(&t, time_relative, &t);
1050
cb1991af 1051 frr_with_mutex (&m->mtx) {
00dffa8c 1052 if (t_ptr && *t_ptr)
d279ef57 1053 /* thread is already scheduled; don't reschedule */
ee1455dd 1054 return;
d62a17ae 1055
2ccccdf5 1056 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
d62a17ae 1057
cb1991af 1058 frr_with_mutex (&thread->mtx) {
96fe578a 1059 thread->u.sands = t;
27d29ced 1060 thread_timer_list_add(&m->timer, thread);
d62a17ae 1061 if (t_ptr) {
1062 *t_ptr = thread;
1063 thread->ref = t_ptr;
1064 }
1065 }
d62a17ae 1066
96fe578a
MS
1067 /* The timer list is sorted - if this new timer
1068 * might change the time we'll wait for, give the pthread
1069 * a chance to re-compute.
1070 */
1071 if (thread_timer_list_first(&m->timer) == thread)
1072 AWAKEN(m);
d62a17ae 1073 }
e2eff5c3
DS
1074#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1075 if (time_relative->tv_sec > ONEYEAR2SEC)
1076 flog_err(
1077 EC_LIB_TIMER_TOO_LONG,
1078 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1079 thread);
9e867fe6 1080}
1081
98c91ac6 1082
1083/* Add timer event thread. */
907a2395
DS
1084void _event_add_timer(const struct xref_threadsched *xref,
1085 struct thread_master *m, void (*func)(struct event *),
1086 void *arg, long timer, struct event **t_ptr)
9e867fe6 1087{
d62a17ae 1088 struct timeval trel;
9e867fe6 1089
d62a17ae 1090 assert(m != NULL);
9e867fe6 1091
d62a17ae 1092 trel.tv_sec = timer;
1093 trel.tv_usec = 0;
9e867fe6 1094
907a2395 1095 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1096}
9e867fe6 1097
98c91ac6 1098/* Add timer event thread with "millisecond" resolution */
907a2395
DS
1099void _event_add_timer_msec(const struct xref_threadsched *xref,
1100 struct thread_master *m,
1101 void (*func)(struct event *), void *arg, long timer,
1102 struct event **t_ptr)
98c91ac6 1103{
d62a17ae 1104 struct timeval trel;
9e867fe6 1105
d62a17ae 1106 assert(m != NULL);
718e3744 1107
d62a17ae 1108 trel.tv_sec = timer / 1000;
1109 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1110
907a2395 1111 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1112}
1113
4322dea7 1114/* Add timer event thread with "timeval" resolution */
907a2395
DS
1115void _event_add_timer_tv(const struct xref_threadsched *xref,
1116 struct thread_master *m, void (*func)(struct event *),
1117 void *arg, struct timeval *tv, struct event **t_ptr)
d03c4cbd 1118{
907a2395 1119 _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1120}
1121
718e3744 1122/* Add simple event thread. */
907a2395
DS
1123void _event_add_event(const struct xref_threadsched *xref,
1124 struct thread_master *m, void (*func)(struct event *),
1125 void *arg, int val, struct event **t_ptr)
718e3744 1126{
e6685141 1127 struct event *thread = NULL;
d62a17ae 1128
6c3aa850
DL
1129 frrtrace(9, frr_libfrr, schedule_event, m,
1130 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1131 t_ptr, 0, val, arg, 0);
abf96a87 1132
d62a17ae 1133 assert(m != NULL);
1134
cb1991af 1135 frr_with_mutex (&m->mtx) {
00dffa8c 1136 if (t_ptr && *t_ptr)
d279ef57 1137 /* thread is already scheduled; don't reschedule */
00dffa8c 1138 break;
d62a17ae 1139
2ccccdf5 1140 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
cb1991af 1141 frr_with_mutex (&thread->mtx) {
d62a17ae 1142 thread->u.val = val;
c284542b 1143 thread_list_add_tail(&m->event, thread);
d62a17ae 1144 }
d62a17ae 1145
1146 if (t_ptr) {
1147 *t_ptr = thread;
1148 thread->ref = t_ptr;
1149 }
1150
1151 AWAKEN(m);
1152 }
718e3744 1153}
1154
63ccb9cb
QY
1155/* Thread cancellation ------------------------------------------------------ */
1156
8797240e
QY
1157/**
1158 * NOT's out the .events field of pollfd corresponding to the given file
1159 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1160 *
de2754be 1161 * This needs to happen for both copies of pollfd's. See 'event_fetch'
8797240e
QY
1162 * implementation for details.
1163 *
1164 * @param master
1165 * @param fd
1166 * @param state the event to cancel. One or more (OR'd together) of the
1167 * following:
1168 * - POLLIN
1169 * - POLLOUT
1170 */
332beb64
DS
1171static void event_cancel_rw(struct thread_master *master, int fd, short state,
1172 int idx_hint)
0a95a0d0 1173{
42d74538
QY
1174 bool found = false;
1175
d62a17ae 1176 /* find the index of corresponding pollfd */
1177 nfds_t i;
1178
a9318a32
MS
1179 /* Cancel POLLHUP too just in case some bozo set it */
1180 state |= POLLHUP;
1181
1182 /* Some callers know the index of the pfd already */
1183 if (idx_hint >= 0) {
1184 i = idx_hint;
1185 found = true;
1186 } else {
1187 /* Have to look for the fd in the pfd array */
1188 for (i = 0; i < master->handler.pfdcount; i++)
1189 if (master->handler.pfds[i].fd == fd) {
1190 found = true;
1191 break;
1192 }
1193 }
42d74538
QY
1194
1195 if (!found) {
1196 zlog_debug(
1197 "[!] Received cancellation request for nonexistent rw job");
1198 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1199 master->name ? master->name : "", fd);
42d74538
QY
1200 return;
1201 }
d62a17ae 1202
1203 /* NOT out event. */
1204 master->handler.pfds[i].events &= ~(state);
1205
1206 /* If all events are canceled, delete / resize the pollfd array. */
1207 if (master->handler.pfds[i].events == 0) {
1208 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1209 (master->handler.pfdcount - i - 1)
1210 * sizeof(struct pollfd));
1211 master->handler.pfdcount--;
e985cda0
S
1212 master->handler.pfds[master->handler.pfdcount].fd = 0;
1213 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1214 }
1215
1216 /* If we have the same pollfd in the copy, perform the same operations,
1217 * otherwise return. */
1218 if (i >= master->handler.copycount)
1219 return;
1220
1221 master->handler.copy[i].events &= ~(state);
1222
1223 if (master->handler.copy[i].events == 0) {
1224 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1225 (master->handler.copycount - i - 1)
1226 * sizeof(struct pollfd));
1227 master->handler.copycount--;
e985cda0
S
1228 master->handler.copy[master->handler.copycount].fd = 0;
1229 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1230 }
0a95a0d0
DS
1231}
1232
a9318a32
MS
1233/*
1234 * Process task cancellation given a task argument: iterate through the
1235 * various lists of tasks, looking for any that match the argument.
1236 */
1237static void cancel_arg_helper(struct thread_master *master,
1238 const struct cancel_req *cr)
1239{
e6685141 1240 struct event *t;
a9318a32
MS
1241 nfds_t i;
1242 int fd;
1243 struct pollfd *pfd;
1244
1245 /* We're only processing arg-based cancellations here. */
1246 if (cr->eventobj == NULL)
1247 return;
1248
1249 /* First process the ready lists. */
1250 frr_each_safe(thread_list, &master->event, t) {
1251 if (t->arg != cr->eventobj)
1252 continue;
1253 thread_list_del(&master->event, t);
1254 if (t->ref)
1255 *t->ref = NULL;
1256 thread_add_unuse(master, t);
1257 }
1258
1259 frr_each_safe(thread_list, &master->ready, t) {
1260 if (t->arg != cr->eventobj)
1261 continue;
1262 thread_list_del(&master->ready, t);
1263 if (t->ref)
1264 *t->ref = NULL;
1265 thread_add_unuse(master, t);
1266 }
1267
1268 /* If requested, stop here and ignore io and timers */
332beb64 1269 if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
a9318a32
MS
1270 return;
1271
1272 /* Check the io tasks */
1273 for (i = 0; i < master->handler.pfdcount;) {
1274 pfd = master->handler.pfds + i;
1275
1276 if (pfd->events & POLLIN)
1277 t = master->read[pfd->fd];
1278 else
1279 t = master->write[pfd->fd];
1280
1281 if (t && t->arg == cr->eventobj) {
1282 fd = pfd->fd;
1283
1284 /* Found a match to cancel: clean up fd arrays */
332beb64 1285 event_cancel_rw(master, pfd->fd, pfd->events, i);
a9318a32
MS
1286
1287 /* Clean up thread arrays */
1288 master->read[fd] = NULL;
1289 master->write[fd] = NULL;
1290
1291 /* Clear caller's ref */
1292 if (t->ref)
1293 *t->ref = NULL;
1294
1295 thread_add_unuse(master, t);
1296
1297 /* Don't increment 'i' since the cancellation will have
1298 * removed the entry from the pfd array
1299 */
1300 } else
1301 i++;
1302 }
1303
1304 /* Check the timer tasks */
1305 t = thread_timer_list_first(&master->timer);
1306 while (t) {
e6685141 1307 struct event *t_next;
a9318a32
MS
1308
1309 t_next = thread_timer_list_next(&master->timer, t);
1310
1311 if (t->arg == cr->eventobj) {
1312 thread_timer_list_del(&master->timer, t);
1313 if (t->ref)
1314 *t->ref = NULL;
1315 thread_add_unuse(master, t);
1316 }
1317
1318 t = t_next;
1319 }
1320}
1321
1189d95f 1322/**
63ccb9cb 1323 * Process cancellation requests.
1189d95f 1324 *
63ccb9cb
QY
1325 * This may only be run from the pthread which owns the thread_master.
1326 *
1327 * @param master the thread master to process
1328 * @REQUIRE master->mtx
1189d95f 1329 */
332beb64 1330static void do_event_cancel(struct thread_master *master)
718e3744 1331{
c284542b 1332 struct thread_list_head *list = NULL;
e6685141
DS
1333 struct event **thread_array = NULL;
1334 struct event *thread;
d62a17ae 1335 struct cancel_req *cr;
1336 struct listnode *ln;
7e93a54c 1337
d62a17ae 1338 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1339 /*
a9318a32
MS
1340 * If this is an event object cancellation, search
1341 * through task lists deleting any tasks which have the
1342 * specified argument - use this handy helper function.
d279ef57 1343 */
d62a17ae 1344 if (cr->eventobj) {
a9318a32 1345 cancel_arg_helper(master, cr);
d62a17ae 1346 continue;
1347 }
1348
d279ef57
DS
1349 /*
1350 * The pointer varies depending on whether the cancellation
1351 * request was made asynchronously or not. If it was, we
1352 * need to check whether the thread even exists anymore
1353 * before cancelling it.
1354 */
d62a17ae 1355 thread = (cr->thread) ? cr->thread : *cr->threadref;
1356
1357 if (!thread)
1358 continue;
1359
7e93a54c
MS
1360 list = NULL;
1361 thread_array = NULL;
1362
d62a17ae 1363 /* Determine the appropriate queue to cancel the thread from */
1364 switch (thread->type) {
2ccccdf5 1365 case EVENT_READ:
332beb64 1366 event_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1367 thread_array = master->read;
1368 break;
2ccccdf5 1369 case EVENT_WRITE:
332beb64 1370 event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1371 thread_array = master->write;
1372 break;
2ccccdf5 1373 case EVENT_TIMER:
27d29ced 1374 thread_timer_list_del(&master->timer, thread);
d62a17ae 1375 break;
2ccccdf5 1376 case EVENT_EVENT:
d62a17ae 1377 list = &master->event;
1378 break;
2ccccdf5 1379 case EVENT_READY:
d62a17ae 1380 list = &master->ready;
1381 break;
2ccccdf5
DS
1382 case EVENT_UNUSED:
1383 case EVENT_EXECUTE:
d62a17ae 1384 continue;
1385 break;
1386 }
1387
27d29ced 1388 if (list) {
c284542b 1389 thread_list_del(list, thread);
d62a17ae 1390 } else if (thread_array) {
1391 thread_array[thread->u.fd] = NULL;
d62a17ae 1392 }
1393
1394 if (thread->ref)
1395 *thread->ref = NULL;
1396
1397 thread_add_unuse(thread->master, thread);
1398 }
1399
1400 /* Delete and free all cancellation requests */
41b21bfa
MS
1401 if (master->cancel_req)
1402 list_delete_all_node(master->cancel_req);
d62a17ae 1403
332beb64 1404 /* Wake up any threads which may be blocked in event_cancel_async() */
d62a17ae 1405 master->canceled = true;
1406 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1407}
1408
a9318a32
MS
1409/*
1410 * Helper function used for multiple flavors of arg-based cancellation.
1411 */
1412static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1413{
1414 struct cancel_req *cr;
1415
1416 assert(m->owner == pthread_self());
1417
1418 /* Only worth anything if caller supplies an arg. */
1419 if (arg == NULL)
1420 return;
1421
1422 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1423
1424 cr->flags = flags;
1425
cb1991af 1426 frr_with_mutex (&m->mtx) {
a9318a32
MS
1427 cr->eventobj = arg;
1428 listnode_add(m->cancel_req, cr);
332beb64 1429 do_event_cancel(m);
a9318a32
MS
1430 }
1431}
1432
63ccb9cb
QY
1433/**
1434 * Cancel any events which have the specified argument.
1435 *
1436 * MT-Unsafe
1437 *
1438 * @param m the thread_master to cancel from
1439 * @param arg the argument passed when creating the event
1440 */
332beb64 1441void event_cancel_event(struct thread_master *master, void *arg)
718e3744 1442{
a9318a32
MS
1443 cancel_event_helper(master, arg, 0);
1444}
d62a17ae 1445
a9318a32
MS
1446/*
1447 * Cancel ready tasks with an arg matching 'arg'
1448 *
1449 * MT-Unsafe
1450 *
1451 * @param m the thread_master to cancel from
1452 * @param arg the argument passed when creating the event
1453 */
332beb64 1454void event_cancel_event_ready(struct thread_master *m, void *arg)
a9318a32
MS
1455{
1456
1457 /* Only cancel ready/event tasks */
332beb64 1458 cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
63ccb9cb 1459}
1189d95f 1460
63ccb9cb
QY
1461/**
1462 * Cancel a specific task.
1463 *
1464 * MT-Unsafe
1465 *
1466 * @param thread task to cancel
1467 */
332beb64 1468void event_cancel(struct event **thread)
63ccb9cb 1469{
b3d6bc6e
MS
1470 struct thread_master *master;
1471
1472 if (thread == NULL || *thread == NULL)
1473 return;
1474
1475 master = (*thread)->master;
d62a17ae 1476
332beb64
DS
1477 frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1478 (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1479 (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1480 (*thread)->u.sands.tv_sec);
abf96a87 1481
6ed04aa2
DS
1482 assert(master->owner == pthread_self());
1483
cb1991af 1484 frr_with_mutex (&master->mtx) {
d62a17ae 1485 struct cancel_req *cr =
1486 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1487 cr->thread = *thread;
6ed04aa2 1488 listnode_add(master->cancel_req, cr);
332beb64 1489 do_event_cancel(master);
d62a17ae 1490 }
b3d6bc6e
MS
1491
1492 *thread = NULL;
63ccb9cb 1493}
1189d95f 1494
63ccb9cb
QY
1495/**
1496 * Asynchronous cancellation.
1497 *
e6685141 1498 * Called with either a struct event ** or void * to an event argument,
8797240e
QY
1499 * this function posts the correct cancellation request and blocks until it is
1500 * serviced.
63ccb9cb
QY
1501 *
1502 * If the thread is currently running, execution blocks until it completes.
1503 *
8797240e
QY
1504 * The last two parameters are mutually exclusive, i.e. if you pass one the
1505 * other must be NULL.
1506 *
1507 * When the cancellation procedure executes on the target thread_master, the
1508 * thread * provided is checked for nullity. If it is null, the thread is
1509 * assumed to no longer exist and the cancellation request is a no-op. Thus
1510 * users of this API must pass a back-reference when scheduling the original
1511 * task.
1512 *
63ccb9cb
QY
1513 * MT-Safe
1514 *
8797240e
QY
1515 * @param master the thread master with the relevant event / task
1516 * @param thread pointer to thread to cancel
1517 * @param eventobj the event
63ccb9cb 1518 */
332beb64
DS
1519void event_cancel_async(struct thread_master *master, struct event **thread,
1520 void *eventobj)
63ccb9cb 1521{
d62a17ae 1522 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1523
1524 if (thread && *thread)
332beb64 1525 frrtrace(9, frr_libfrr, event_cancel_async, master,
6c3aa850
DL
1526 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1527 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1528 (*thread)->u.val, (*thread)->arg,
1529 (*thread)->u.sands.tv_sec);
abf96a87 1530 else
332beb64 1531 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
c7bb4f00 1532 0, NULL, 0, 0, eventobj, 0);
abf96a87 1533
d62a17ae 1534 assert(master->owner != pthread_self());
1535
cb1991af 1536 frr_with_mutex (&master->mtx) {
d62a17ae 1537 master->canceled = false;
1538
1539 if (thread) {
1540 struct cancel_req *cr =
1541 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1542 cr->threadref = thread;
1543 listnode_add(master->cancel_req, cr);
1544 } else if (eventobj) {
1545 struct cancel_req *cr =
1546 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1547 cr->eventobj = eventobj;
1548 listnode_add(master->cancel_req, cr);
1549 }
1550 AWAKEN(master);
1551
1552 while (!master->canceled)
1553 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1554 }
50478845
MS
1555
1556 if (thread)
1557 *thread = NULL;
718e3744 1558}
63ccb9cb 1559/* ------------------------------------------------------------------------- */
718e3744 1560
27d29ced 1561static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1562 struct timeval *timer_val)
718e3744 1563{
27d29ced
DL
1564 if (!thread_timer_list_count(timers))
1565 return NULL;
1566
e6685141 1567 struct event *next_timer = thread_timer_list_first(timers);
27d29ced
DL
1568 monotime_until(&next_timer->u.sands, timer_val);
1569 return timer_val;
718e3744 1570}
718e3744 1571
e6685141
DS
1572static struct event *thread_run(struct thread_master *m, struct event *thread,
1573 struct event *fetch)
718e3744 1574{
d62a17ae 1575 *fetch = *thread;
1576 thread_add_unuse(m, thread);
1577 return fetch;
718e3744 1578}
1579
d62a17ae 1580static int thread_process_io_helper(struct thread_master *m,
e6685141 1581 struct event *thread, short state,
45f3d590 1582 short actual_state, int pos)
5d4ccd4e 1583{
e6685141 1584 struct event **thread_array;
d62a17ae 1585
45f3d590
DS
1586 /*
1587 * poll() clears the .events field, but the pollfd array we
1588 * pass to poll() is a copy of the one used to schedule threads.
1589 * We need to synchronize state between the two here by applying
1590 * the same changes poll() made on the copy of the "real" pollfd
1591 * array.
1592 *
1593 * This cleans up a possible infinite loop where we refuse
1594 * to respond to a poll event but poll is insistent that
1595 * we should.
1596 */
1597 m->handler.pfds[pos].events &= ~(state);
1598
1599 if (!thread) {
1600 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1601 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1602 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1603 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1604 return 0;
45f3d590 1605 }
d62a17ae 1606
2ccccdf5 1607 if (thread->type == EVENT_READ)
d62a17ae 1608 thread_array = m->read;
1609 else
1610 thread_array = m->write;
1611
1612 thread_array[thread->u.fd] = NULL;
c284542b 1613 thread_list_add_tail(&m->ready, thread);
2ccccdf5 1614 thread->type = EVENT_READY;
45f3d590 1615
d62a17ae 1616 return 1;
5d4ccd4e
DS
1617}
1618
8797240e
QY
1619/**
1620 * Process I/O events.
1621 *
1622 * Walks through file descriptor array looking for those pollfds whose .revents
1623 * field has something interesting. Deletes any invalid file descriptors.
1624 *
1625 * @param m the thread master
1626 * @param num the number of active file descriptors (return value of poll())
1627 */
d62a17ae 1628static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1629{
d62a17ae 1630 unsigned int ready = 0;
1631 struct pollfd *pfds = m->handler.copy;
1632
1633 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1634 /* no event for current fd? immediately continue */
1635 if (pfds[i].revents == 0)
1636 continue;
1637
1638 ready++;
1639
d279ef57 1640 /*
332beb64 1641 * Unless someone has called event_cancel from another
d279ef57
DS
1642 * pthread, the only thing that could have changed in
1643 * m->handler.pfds while we were asleep is the .events
332beb64 1644 * field in a given pollfd. Barring event_cancel() that
d279ef57
DS
1645 * value should be a superset of the values we have in our
1646 * copy, so there's no need to update it. Similarily,
1647 * barring deletion, the fd should still be a valid index
1648 * into the master's pfds.
d142453d
DS
1649 *
1650 * We are including POLLERR here to do a READ event
1651 * this is because the read should fail and the
1652 * read function should handle it appropriately
d279ef57 1653 */
d142453d 1654 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1655 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1656 pfds[i].revents, i);
1657 }
d62a17ae 1658 if (pfds[i].revents & POLLOUT)
1659 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1660 POLLOUT, pfds[i].revents, i);
d62a17ae 1661
1662 /* if one of our file descriptors is garbage, remove the same
1663 * from
1664 * both pfds + update sizes and index */
1665 if (pfds[i].revents & POLLNVAL) {
1666 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1667 (m->handler.pfdcount - i - 1)
1668 * sizeof(struct pollfd));
1669 m->handler.pfdcount--;
e985cda0
S
1670 m->handler.pfds[m->handler.pfdcount].fd = 0;
1671 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1672
1673 memmove(pfds + i, pfds + i + 1,
1674 (m->handler.copycount - i - 1)
1675 * sizeof(struct pollfd));
1676 m->handler.copycount--;
e985cda0
S
1677 m->handler.copy[m->handler.copycount].fd = 0;
1678 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1679
1680 i--;
1681 }
1682 }
718e3744 1683}
1684
8b70d0b0 1685/* Add all timers that have popped to the ready list. */
e7d9e44b 1686static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1687 struct timeval *timenow)
a48b4e6d 1688{
ab01a001
DS
1689 struct timeval prev = *timenow;
1690 bool displayed = false;
e6685141 1691 struct event *thread;
d62a17ae 1692 unsigned int ready = 0;
1693
e7d9e44b 1694 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1695 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1696 break;
ab01a001
DS
1697 prev = thread->u.sands;
1698 prev.tv_sec += 4;
1699 /*
1700 * If the timer would have popped 4 seconds in the
1701 * past then we are in a situation where we are
1702 * really getting behind on handling of events.
1703 * Let's log it and do the right thing with it.
1704 */
1dd08c22
DS
1705 if (timercmp(timenow, &prev, >)) {
1706 atomic_fetch_add_explicit(
1707 &thread->hist->total_starv_warn, 1,
1708 memory_order_seq_cst);
1709 if (!displayed && !thread->ignore_timer_late) {
1710 flog_warn(
1711 EC_LIB_STARVE_THREAD,
1712 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1713 thread);
1714 displayed = true;
1715 }
ab01a001
DS
1716 }
1717
e7d9e44b 1718 thread_timer_list_pop(&m->timer);
2ccccdf5 1719 thread->type = EVENT_READY;
e7d9e44b 1720 thread_list_add_tail(&m->ready, thread);
d62a17ae 1721 ready++;
1722 }
e7d9e44b 1723
d62a17ae 1724 return ready;
a48b4e6d 1725}
1726
2613abe6 1727/* process a list en masse, e.g. for event thread lists */
c284542b 1728static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1729{
e6685141 1730 struct event *thread;
d62a17ae 1731 unsigned int ready = 0;
1732
c284542b 1733 while ((thread = thread_list_pop(list))) {
2ccccdf5 1734 thread->type = EVENT_READY;
c284542b 1735 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1736 ready++;
1737 }
1738 return ready;
2613abe6
PJ
1739}
1740
1741
718e3744 1742/* Fetch next ready thread. */
de2754be 1743struct event *event_fetch(struct thread_master *m, struct event *fetch)
718e3744 1744{
e6685141 1745 struct event *thread = NULL;
d62a17ae 1746 struct timeval now;
1747 struct timeval zerotime = {0, 0};
1748 struct timeval tv;
1749 struct timeval *tw = NULL;
d81ca9a3 1750 bool eintr_p = false;
d62a17ae 1751 int num = 0;
1752
1753 do {
1754 /* Handle signals if any */
1755 if (m->handle_signals)
7cc91e67 1756 frr_sigevent_process();
d62a17ae 1757
1758 pthread_mutex_lock(&m->mtx);
1759
1760 /* Process any pending cancellation requests */
332beb64 1761 do_event_cancel(m);
d62a17ae 1762
e3c9529e
QY
1763 /*
1764 * Attempt to flush ready queue before going into poll().
1765 * This is performance-critical. Think twice before modifying.
1766 */
c284542b 1767 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1768 fetch = thread_run(m, thread, fetch);
1769 if (fetch->ref)
1770 *fetch->ref = NULL;
1771 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1772 if (!m->ready_run_loop)
1773 GETRUSAGE(&m->last_getrusage);
1774 m->ready_run_loop = true;
e3c9529e
QY
1775 break;
1776 }
1777
5e822957 1778 m->ready_run_loop = false;
e3c9529e
QY
1779 /* otherwise, tick through scheduling sequence */
1780
bca37d17
QY
1781 /*
1782 * Post events to ready queue. This must come before the
1783 * following block since events should occur immediately
1784 */
d62a17ae 1785 thread_process(&m->event);
1786
bca37d17
QY
1787 /*
1788 * If there are no tasks on the ready queue, we will poll()
1789 * until a timer expires or we receive I/O, whichever comes
1790 * first. The strategy for doing this is:
d62a17ae 1791 *
1792 * - If there are events pending, set the poll() timeout to zero
1793 * - If there are no events pending, but there are timers
d279ef57
DS
1794 * pending, set the timeout to the smallest remaining time on
1795 * any timer.
d62a17ae 1796 * - If there are neither timers nor events pending, but there
d279ef57 1797 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1798 * - If nothing is pending, it's time for the application to die
1799 *
1800 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1801 * once per loop to avoid starvation by events
1802 */
c284542b 1803 if (!thread_list_count(&m->ready))
27d29ced 1804 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1805
c284542b
DL
1806 if (thread_list_count(&m->ready) ||
1807 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1808 tw = &zerotime;
1809
1810 if (!tw && m->handler.pfdcount == 0) { /* die */
1811 pthread_mutex_unlock(&m->mtx);
1812 fetch = NULL;
1813 break;
1814 }
1815
bca37d17
QY
1816 /*
1817 * Copy pollfd array + # active pollfds in it. Not necessary to
1818 * copy the array size as this is fixed.
1819 */
d62a17ae 1820 m->handler.copycount = m->handler.pfdcount;
1821 memcpy(m->handler.copy, m->handler.pfds,
1822 m->handler.copycount * sizeof(struct pollfd));
1823
e3c9529e
QY
1824 pthread_mutex_unlock(&m->mtx);
1825 {
d81ca9a3
MS
1826 eintr_p = false;
1827 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1828 }
1829 pthread_mutex_lock(&m->mtx);
d764d2cc 1830
e3c9529e
QY
1831 /* Handle any errors received in poll() */
1832 if (num < 0) {
d81ca9a3 1833 if (eintr_p) {
d62a17ae 1834 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1835 /* loop around to signal handler */
1836 continue;
d62a17ae 1837 }
1838
e3c9529e 1839 /* else die */
450971aa 1840 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1841 safe_strerror(errno));
e3c9529e
QY
1842 pthread_mutex_unlock(&m->mtx);
1843 fetch = NULL;
1844 break;
bca37d17 1845 }
d62a17ae 1846
1847 /* Post timers to ready queue. */
1848 monotime(&now);
e7d9e44b 1849 thread_process_timers(m, &now);
d62a17ae 1850
1851 /* Post I/O to ready queue. */
1852 if (num > 0)
1853 thread_process_io(m, num);
1854
d62a17ae 1855 pthread_mutex_unlock(&m->mtx);
1856
1857 } while (!thread && m->spin);
1858
1859 return fetch;
718e3744 1860}
1861
d62a17ae 1862static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1863{
d62a17ae 1864 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1865 + (a.tv_usec - b.tv_usec));
62f44022
QY
1866}
1867
5f6eaa9b
DS
1868unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1869 unsigned long *cputime)
718e3744 1870{
6418e2d3 1871#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1872
1873#ifdef __FreeBSD__
1874 /*
1875 * FreeBSD appears to have an issue when calling clock_gettime
1876 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1877 * occassionally the now time will be before the start time.
1878 * This is not good and FRR is ending up with CPU HOG's
1879 * when the subtraction wraps to very large numbers
1880 *
1881 * What we are going to do here is cheat a little bit
1882 * and notice that this is a problem and just correct
1883 * it so that it is impossible to happen
1884 */
1885 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1886 start->cpu.tv_nsec > now->cpu.tv_nsec)
1887 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1888 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1889 now->cpu.tv_sec = start->cpu.tv_sec;
1890 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1891 }
1892#endif
6418e2d3
DL
1893 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1894 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1895#else
d62a17ae 1896 /* This is 'user + sys' time. */
1897 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1898 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1899#endif
d62a17ae 1900 return timeval_elapsed(now->real, start->real);
8b70d0b0 1901}
1902
50596be0 1903/* We should aim to yield after yield milliseconds, which defaults
2ccccdf5 1904 to EVENT_YIELD_TIME_SLOT .
8b70d0b0 1905 Note: we are using real (wall clock) time for this calculation.
1906 It could be argued that CPU time may make more sense in certain
1907 contexts. The things to consider are whether the thread may have
1908 blocked (in which case wall time increases, but CPU time does not),
1909 or whether the system is heavily loaded with other processes competing
d62a17ae 1910 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1911 Plus it has the added benefit that gettimeofday should be faster
1912 than calling getrusage. */
70c35c11 1913int event_should_yield(struct event *thread)
718e3744 1914{
d62a17ae 1915 int result;
cb1991af 1916 frr_with_mutex (&thread->mtx) {
d62a17ae 1917 result = monotime_since(&thread->real, NULL)
1918 > (int64_t)thread->yield;
1919 }
d62a17ae 1920 return result;
50596be0
DS
1921}
1922
70c35c11 1923void event_set_yield_time(struct event *thread, unsigned long yield_time)
50596be0 1924{
cb1991af 1925 frr_with_mutex (&thread->mtx) {
d62a17ae 1926 thread->yield = yield_time;
1927 }
718e3744 1928}
1929
5f6eaa9b 1930void event_getrusage(RUSAGE_T *r)
db9c0df9 1931{
6418e2d3
DL
1932 monotime(&r->real);
1933 if (!cputime_enabled) {
1934 memset(&r->cpu, 0, sizeof(r->cpu));
1935 return;
1936 }
1937
1938#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1939 /* not currently implemented in Linux's vDSO, but maybe at some point
1940 * in the future?
1941 */
1942 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1943#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1944#if defined RUSAGE_THREAD
1945#define FRR_RUSAGE RUSAGE_THREAD
1946#else
1947#define FRR_RUSAGE RUSAGE_SELF
1948#endif
6418e2d3
DL
1949 getrusage(FRR_RUSAGE, &(r->cpu));
1950#endif
db9c0df9
PJ
1951}
1952
fbcac826
QY
1953/*
1954 * Call a thread.
1955 *
1956 * This function will atomically update the thread's usage history. At present
1957 * this is the only spot where usage history is written. Nevertheless the code
1958 * has been written such that the introduction of writers in the future should
1959 * not need to update it provided the writers atomically perform only the
1960 * operations done here, i.e. updating the total and maximum times. In
1961 * particular, the maximum real and cpu times must be monotonically increasing
1962 * or this code is not correct.
1963 */
de2754be 1964void event_call(struct event *thread)
718e3744 1965{
d62a17ae 1966 RUSAGE_T before, after;
cc8b13a0 1967
45f01188
DL
1968 /* if the thread being called is the CLI, it may change cputime_enabled
1969 * ("service cputime-stats" command), which can result in nonsensical
1970 * and very confusing warnings
1971 */
1972 bool cputime_enabled_here = cputime_enabled;
1973
5e822957
DS
1974 if (thread->master->ready_run_loop)
1975 before = thread->master->last_getrusage;
1976 else
1977 GETRUSAGE(&before);
1978
d62a17ae 1979 thread->real = before.real;
718e3744 1980
de2754be 1981 frrtrace(9, frr_libfrr, event_call, thread->master,
6c3aa850 1982 thread->xref->funcname, thread->xref->xref.file,
de2754be
DS
1983 thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1984 thread->arg, thread->u.sands.tv_sec);
abf96a87 1985
d62a17ae 1986 pthread_setspecific(thread_current, thread);
1987 (*thread->func)(thread);
1988 pthread_setspecific(thread_current, NULL);
718e3744 1989
d62a17ae 1990 GETRUSAGE(&after);
5e822957 1991 thread->master->last_getrusage = after;
718e3744 1992
45f01188
DL
1993 unsigned long walltime, cputime;
1994 unsigned long exp;
fbcac826 1995
5f6eaa9b 1996 walltime = event_consumed_time(&after, &before, &cputime);
45f01188
DL
1997
1998 /* update walltime */
1999 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2000 memory_order_seq_cst);
2001 exp = atomic_load_explicit(&thread->hist->real.max,
2002 memory_order_seq_cst);
45f01188 2003 while (exp < walltime
fbcac826 2004 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2005 &thread->hist->real.max, &exp, walltime,
2006 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2007 ;
2008
45f01188
DL
2009 if (cputime_enabled_here && cputime_enabled) {
2010 /* update cputime */
2011 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2012 memory_order_seq_cst);
2013 exp = atomic_load_explicit(&thread->hist->cpu.max,
2014 memory_order_seq_cst);
2015 while (exp < cputime
2016 && !atomic_compare_exchange_weak_explicit(
2017 &thread->hist->cpu.max, &exp, cputime,
2018 memory_order_seq_cst, memory_order_seq_cst))
2019 ;
2020 }
fbcac826
QY
2021
2022 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2023 memory_order_seq_cst);
2024 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2025 memory_order_seq_cst);
718e3744 2026
45f01188
DL
2027 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2028 && cputime > cputime_threshold) {
d62a17ae 2029 /*
45f01188
DL
2030 * We have a CPU Hog on our hands. The time FRR has spent
2031 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2032 * Whinge about it now, so we're aware this is yet another task
2033 * to fix.
2034 */
9b8e01ca
DS
2035 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2036 1, memory_order_seq_cst);
9ef9495e 2037 flog_warn(
039d547f
DS
2038 EC_LIB_SLOW_THREAD_CPU,
2039 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2040 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2041 walltime / 1000, cputime / 1000);
2042
2043 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2044 /*
45f01188
DL
2045 * The runtime for a task is greater than 5 seconds, but the
2046 * cpu time is under 5 seconds. Let's whine about this because
2047 * this could imply some sort of scheduling issue.
039d547f 2048 */
9b8e01ca
DS
2049 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2050 1, memory_order_seq_cst);
039d547f
DS
2051 flog_warn(
2052 EC_LIB_SLOW_THREAD_WALL,
2053 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2054 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2055 walltime / 1000, cputime / 1000);
d62a17ae 2056 }
718e3744 2057}
2058
2059/* Execute thread */
8c1186d3
DS
2060void _event_execute(const struct xref_threadsched *xref,
2061 struct thread_master *m, void (*func)(struct event *),
2062 void *arg, int val)
718e3744 2063{
e6685141 2064 struct event *thread;
718e3744 2065
c4345fbf 2066 /* Get or allocate new thread to execute. */
cb1991af 2067 frr_with_mutex (&m->mtx) {
2ccccdf5 2068 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
9c7753e4 2069
c4345fbf 2070 /* Set its event value. */
cb1991af 2071 frr_with_mutex (&thread->mtx) {
2ccccdf5 2072 thread->add_type = EVENT_EXECUTE;
c4345fbf
RZ
2073 thread->u.val = val;
2074 thread->ref = &thread;
2075 }
c4345fbf 2076 }
f7c62e11 2077
c4345fbf 2078 /* Execute thread doing all accounting. */
de2754be 2079 event_call(thread);
9c7753e4 2080
c4345fbf
RZ
2081 /* Give back or free thread. */
2082 thread_add_unuse(m, thread);
718e3744 2083}
1543c387
MS
2084
2085/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2086void debug_signals(const sigset_t *sigs)
2087{
2088 int i, found;
2089 sigset_t tmpsigs;
2090 char buf[300];
2091
2092 /*
2093 * We're only looking at the non-realtime signals here, so we need
2094 * some limit value. Platform differences mean at some point we just
2095 * need to pick a reasonable value.
2096 */
2097#if defined SIGRTMIN
2098# define LAST_SIGNAL SIGRTMIN
2099#else
2100# define LAST_SIGNAL 32
2101#endif
2102
2103
2104 if (sigs == NULL) {
2105 sigemptyset(&tmpsigs);
2106 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2107 sigs = &tmpsigs;
2108 }
2109
2110 found = 0;
2111 buf[0] = '\0';
2112
2113 for (i = 0; i < LAST_SIGNAL; i++) {
2114 char tmp[20];
2115
2116 if (sigismember(sigs, i) > 0) {
2117 if (found > 0)
2118 strlcat(buf, ",", sizeof(buf));
2119 snprintf(tmp, sizeof(tmp), "%d", i);
2120 strlcat(buf, tmp, sizeof(buf));
2121 found++;
2122 }
2123 }
2124
2125 if (found == 0)
2126 snprintf(buf, sizeof(buf), "<none>");
2127
2128 zlog_debug("%s: %s", __func__, buf);
2129}
a505383d 2130
f59e6882 2131static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
e6685141 2132 const struct event *thread)
f59e6882 2133{
2ccccdf5
DS
2134 static const char *const types[] = {
2135 [EVENT_READ] = "read", [EVENT_WRITE] = "write",
2136 [EVENT_TIMER] = "timer", [EVENT_EVENT] = "event",
2137 [EVENT_READY] = "ready", [EVENT_UNUSED] = "unused",
2138 [EVENT_EXECUTE] = "exec",
f59e6882
DL
2139 };
2140 ssize_t rv = 0;
2141 char info[16] = "";
2142
2143 if (!thread)
2144 return bputs(buf, "{(thread *)NULL}");
2145
2146 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2147
2148 if (thread->type < array_size(types) && types[thread->type])
2149 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2150 else
2151 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2152
2153 switch (thread->type) {
2ccccdf5
DS
2154 case EVENT_READ:
2155 case EVENT_WRITE:
f59e6882
DL
2156 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2157 break;
2158
2ccccdf5 2159 case EVENT_TIMER:
f59e6882
DL
2160 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2161 break;
2ccccdf5
DS
2162 case EVENT_READY:
2163 case EVENT_EVENT:
2164 case EVENT_UNUSED:
2165 case EVENT_EXECUTE:
2166 break;
f59e6882
DL
2167 }
2168
2169 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2170 thread->xref->funcname, thread->xref->dest,
2171 thread->xref->xref.file, thread->xref->xref.line);
2172 return rv;
2173}
2174
54929fd3 2175printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2176static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2177 const void *ptr)
2178{
e6685141 2179 const struct event *thread = ptr;
f59e6882
DL
2180 struct timespec remain = {};
2181
2182 if (ea->fmt[0] == 'D') {
2183 ea->fmt++;
2184 return printfrr_thread_dbg(buf, ea, thread);
2185 }
2186
2187 if (!thread) {
2188 /* need to jump over time formatting flag characters in the
2189 * input format string, i.e. adjust ea->fmt!
2190 */
2191 printfrr_time(buf, ea, &remain,
2192 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2193 return bputch(buf, '-');
2194 }
2195
2196 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2197 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2198}