]> git.proxmox.com Git - mirror_frr.git/blame - lib/event.c
Merge pull request #13649 from donaldsharp/unlock_the_node_or_else
[mirror_frr.git] / lib / event.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
718e3744 4 */
5
6/* #define DEBUG */
7
8#include <zebra.h>
308d14ae 9#include <sys/resource.h>
718e3744 10
24a58196 11#include "frrevent.h"
718e3744 12#include "memory.h"
3e41733f 13#include "frrcu.h"
718e3744 14#include "log.h"
e04ab74d 15#include "hash.h"
16#include "command.h"
05c447dd 17#include "sigevent.h"
3bf2673b 18#include "network.h"
bd74dc61 19#include "jhash.h"
fbcac826 20#include "frratomic.h"
00dffa8c 21#include "frr_pthread.h"
9ef9495e 22#include "lib_errors.h"
912d45a1 23#include "libfrr_trace.h"
1a9f340b 24#include "libfrr.h"
d6be5fb9 25
bf8d3d6a 26DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
2ccccdf5
DS
27DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
28DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
29DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
4a1ab8e4 30
3905fb73 31DECLARE_LIST(event_list, struct event, eventitem);
c284542b 32
aea25d1e
MS
33struct cancel_req {
34 int flags;
e6685141 35 struct event *thread;
aea25d1e 36 void *eventobj;
e6685141 37 struct event **threadref;
aea25d1e
MS
38};
39
40/* Flags for task cancellation */
332beb64 41#define EVENT_CANCEL_FLAG_READY 0x01
aea25d1e 42
3905fb73 43static int event_timer_cmp(const struct event *a, const struct event *b)
27d29ced
DL
44{
45 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 return -1;
47 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 return 1;
49 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 return -1;
51 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 return 1;
53 return 0;
54}
55
3905fb73 56DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
27d29ced 57
3b96b781
HT
58#if defined(__APPLE__)
59#include <mach/mach.h>
60#include <mach/mach_time.h>
61#endif
62
d62a17ae 63#define AWAKEN(m) \
64 do { \
2b64873d 65 const unsigned char wakebyte = 0x01; \
d62a17ae 66 write(m->io_pipe[1], &wakebyte, 1); \
02e701e4 67 } while (0)
3bf2673b 68
62f44022 69/* control variable for initializer */
c17faa4b 70static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 71pthread_key_t thread_current;
6b0655a2 72
c17faa4b 73static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
74static struct list *masters;
75
cd9d0537 76static void thread_free(struct event_loop *master, struct event *thread);
6b0655a2 77
45f01188
DL
78#ifndef EXCLUDE_CPU_TIME
79#define EXCLUDE_CPU_TIME 0
80#endif
81#ifndef CONSUMED_TIME_CHECK
82#define CONSUMED_TIME_CHECK 0
83#endif
84
85bool cputime_enabled = !EXCLUDE_CPU_TIME;
86unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
62f44022 89/* CLI start ---------------------------------------------------------------- */
cb37cb33 90#include "lib/event_clippy.c"
45f01188 91
04ec6679 92static unsigned int cpu_record_hash_key(const struct cpu_event_history *a)
e04ab74d 93{
883cc51d 94 int size = sizeof(a->func);
bd74dc61
DS
95
96 return jhash(&a->func, size, 0);
e04ab74d 97}
98
04ec6679
DS
99static bool cpu_record_hash_cmp(const struct cpu_event_history *a,
100 const struct cpu_event_history *b)
e04ab74d 101{
d62a17ae 102 return a->func == b->func;
e04ab74d 103}
104
04ec6679 105static void *cpu_record_hash_alloc(struct cpu_event_history *a)
e04ab74d 106{
04ec6679 107 struct cpu_event_history *new;
02e701e4 108
04ec6679 109 new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_event_history));
d62a17ae 110 new->func = a->func;
111 new->funcname = a->funcname;
112 return new;
e04ab74d 113}
114
d62a17ae 115static void cpu_record_hash_free(void *a)
228da428 116{
04ec6679 117 struct cpu_event_history *hist = a;
d62a17ae 118
2ccccdf5 119 XFREE(MTYPE_EVENT_STATS, hist);
228da428
CC
120}
121
04ec6679
DS
122static void vty_out_cpu_event_history(struct vty *vty,
123 struct cpu_event_history *a)
e04ab74d 124{
1dd08c22
DS
125 vty_out(vty,
126 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 127 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
128 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
129 (a->real.total / a->total_calls), a->real.max,
1dd08c22 130 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 131 vty_out(vty, " %c%c%c%c%c %s\n",
2ccccdf5
DS
132 a->types & (1 << EVENT_READ) ? 'R' : ' ',
133 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
134 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
135 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
136 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 137}
138
e3b78da8 139static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 140{
04ec6679
DS
141 struct cpu_event_history *totals = args[0];
142 struct cpu_event_history copy;
d62a17ae 143 struct vty *vty = args[1];
fbcac826 144 uint8_t *filter = args[2];
d62a17ae 145
04ec6679 146 struct cpu_event_history *a = bucket->data;
d62a17ae 147
fbcac826
QY
148 copy.total_active =
149 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
150 copy.total_calls =
151 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
152 copy.total_cpu_warn =
153 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
154 copy.total_wall_warn =
155 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
156 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
157 memory_order_seq_cst);
fbcac826
QY
158 copy.cpu.total =
159 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
160 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
161 copy.real.total =
162 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
163 copy.real.max =
164 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
165 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
166 copy.funcname = a->funcname;
167
168 if (!(copy.types & *filter))
d62a17ae 169 return;
fbcac826 170
04ec6679 171 vty_out_cpu_event_history(vty, &copy);
fbcac826
QY
172 totals->total_active += copy.total_active;
173 totals->total_calls += copy.total_calls;
9b8e01ca
DS
174 totals->total_cpu_warn += copy.total_cpu_warn;
175 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 176 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
177 totals->real.total += copy.real.total;
178 if (totals->real.max < copy.real.max)
179 totals->real.max = copy.real.max;
180 totals->cpu.total += copy.cpu.total;
181 if (totals->cpu.max < copy.cpu.max)
182 totals->cpu.max = copy.cpu.max;
e04ab74d 183}
184
fbcac826 185static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 186{
04ec6679 187 struct cpu_event_history tmp;
d62a17ae 188 void *args[3] = {&tmp, vty, &filter};
cd9d0537 189 struct event_loop *m;
d62a17ae 190 struct listnode *ln;
191
45f01188
DL
192 if (!cputime_enabled)
193 vty_out(vty,
194 "\n"
195 "Collecting CPU time statistics is currently disabled. Following statistics\n"
196 "will be zero or may display data from when collection was enabled. Use the\n"
197 " \"service cputime-stats\" command to start collecting data.\n"
198 "\nCounters and wallclock times are always maintained and should be accurate.\n");
199
0d6f7fd6 200 memset(&tmp, 0, sizeof(tmp));
d62a17ae 201 tmp.funcname = "TOTAL";
202 tmp.types = filter;
203
cb1991af 204 frr_with_mutex (&masters_mtx) {
d62a17ae 205 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
206 const char *name = m->name ? m->name : "main";
d62a17ae 207 char underline[strlen(name) + 1];
02e701e4 208
d62a17ae 209 memset(underline, '-', sizeof(underline));
4f113d60 210 underline[sizeof(underline) - 1] = '\0';
d62a17ae 211
212 vty_out(vty, "\n");
213 vty_out(vty, "Showing statistics for pthread %s\n",
214 name);
215 vty_out(vty, "-------------------------------%s\n",
216 underline);
84d951d0 217 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 218 "CPU (user+system):", "Real (wall-clock):");
219 vty_out(vty,
220 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
221 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
222 vty_out(vty,
223 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 224
225 if (m->cpu_record->count)
226 hash_iterate(
227 m->cpu_record,
e3b78da8 228 (void (*)(struct hash_bucket *,
d62a17ae 229 void *))cpu_record_hash_print,
230 args);
231 else
232 vty_out(vty, "No data to display yet.\n");
233
234 vty_out(vty, "\n");
235 }
236 }
d62a17ae 237
238 vty_out(vty, "\n");
239 vty_out(vty, "Total thread statistics\n");
240 vty_out(vty, "-------------------------\n");
84d951d0 241 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 242 "CPU (user+system):", "Real (wall-clock):");
243 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 244 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 245 vty_out(vty, " Type Thread\n");
246
247 if (tmp.total_calls > 0)
04ec6679 248 vty_out_cpu_event_history(vty, &tmp);
e04ab74d 249}
250
e3b78da8 251static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 252{
fbcac826 253 uint8_t *filter = args[0];
d62a17ae 254 struct hash *cpu_record = args[1];
255
04ec6679 256 struct cpu_event_history *a = bucket->data;
62f44022 257
d62a17ae 258 if (!(a->types & *filter))
259 return;
f48f65d2 260
d62a17ae 261 hash_release(cpu_record, bucket->data);
e276eb82
PJ
262}
263
fbcac826 264static void cpu_record_clear(uint8_t filter)
e276eb82 265{
fbcac826 266 uint8_t *tmp = &filter;
cd9d0537 267 struct event_loop *m;
d62a17ae 268 struct listnode *ln;
269
cb1991af 270 frr_with_mutex (&masters_mtx) {
d62a17ae 271 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 272 frr_with_mutex (&m->mtx) {
d62a17ae 273 void *args[2] = {tmp, m->cpu_record};
02e701e4 274
d62a17ae 275 hash_iterate(
276 m->cpu_record,
e3b78da8 277 (void (*)(struct hash_bucket *,
d62a17ae 278 void *))cpu_record_hash_clear,
279 args);
280 }
d62a17ae 281 }
282 }
62f44022
QY
283}
284
fbcac826 285static uint8_t parse_filter(const char *filterstr)
62f44022 286{
d62a17ae 287 int i = 0;
288 int filter = 0;
289
290 while (filterstr[i] != '\0') {
291 switch (filterstr[i]) {
292 case 'r':
293 case 'R':
2ccccdf5 294 filter |= (1 << EVENT_READ);
d62a17ae 295 break;
296 case 'w':
297 case 'W':
2ccccdf5 298 filter |= (1 << EVENT_WRITE);
d62a17ae 299 break;
300 case 't':
301 case 'T':
2ccccdf5 302 filter |= (1 << EVENT_TIMER);
d62a17ae 303 break;
304 case 'e':
305 case 'E':
2ccccdf5 306 filter |= (1 << EVENT_EVENT);
d62a17ae 307 break;
308 case 'x':
309 case 'X':
2ccccdf5 310 filter |= (1 << EVENT_EXECUTE);
d62a17ae 311 break;
312 default:
313 break;
314 }
315 ++i;
316 }
317 return filter;
62f44022
QY
318}
319
ee4dcee8
DL
320DEFUN_NOSH (show_thread_cpu,
321 show_thread_cpu_cmd,
322 "show thread cpu [FILTER]",
323 SHOW_STR
324 "Thread information\n"
325 "Thread CPU usage\n"
326 "Display filter (rwtex)\n")
62f44022 327{
fbcac826 328 uint8_t filter = (uint8_t)-1U;
d62a17ae 329 int idx = 0;
330
331 if (argv_find(argv, argc, "FILTER", &idx)) {
332 filter = parse_filter(argv[idx]->arg);
333 if (!filter) {
334 vty_out(vty,
3efd0893 335 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 336 argv[idx]->arg);
337 return CMD_WARNING;
338 }
339 }
340
341 cpu_record_print(vty, filter);
342 return CMD_SUCCESS;
e276eb82 343}
45f01188
DL
344
345DEFPY (service_cputime_stats,
346 service_cputime_stats_cmd,
347 "[no] service cputime-stats",
348 NO_STR
349 "Set up miscellaneous service\n"
350 "Collect CPU usage statistics\n")
351{
352 cputime_enabled = !no;
353 return CMD_SUCCESS;
354}
355
356DEFPY (service_cputime_warning,
357 service_cputime_warning_cmd,
358 "[no] service cputime-warning (1-4294967295)",
359 NO_STR
360 "Set up miscellaneous service\n"
361 "Warn for tasks exceeding CPU usage threshold\n"
362 "Warning threshold in milliseconds\n")
363{
364 if (no)
365 cputime_threshold = 0;
366 else
367 cputime_threshold = cputime_warning * 1000;
368 return CMD_SUCCESS;
369}
370
371ALIAS (service_cputime_warning,
372 no_service_cputime_warning_cmd,
373 "no service cputime-warning",
374 NO_STR
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n")
377
378DEFPY (service_walltime_warning,
379 service_walltime_warning_cmd,
380 "[no] service walltime-warning (1-4294967295)",
381 NO_STR
382 "Set up miscellaneous service\n"
383 "Warn for tasks exceeding total wallclock threshold\n"
384 "Warning threshold in milliseconds\n")
385{
386 if (no)
387 walltime_threshold = 0;
388 else
389 walltime_threshold = walltime_warning * 1000;
390 return CMD_SUCCESS;
391}
392
393ALIAS (service_walltime_warning,
394 no_service_walltime_warning_cmd,
395 "no service walltime-warning",
396 NO_STR
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 399
cd9d0537 400static void show_thread_poll_helper(struct vty *vty, struct event_loop *m)
8872626b
DS
401{
402 const char *name = m->name ? m->name : "main";
403 char underline[strlen(name) + 1];
e6685141 404 struct event *thread;
8872626b
DS
405 uint32_t i;
406
407 memset(underline, '-', sizeof(underline));
408 underline[sizeof(underline) - 1] = '\0';
409
410 vty_out(vty, "\nShowing poll FD's for %s\n", name);
411 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
412 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
413 m->fd_limit);
a0b36ae6
DS
414 for (i = 0; i < m->handler.pfdcount; i++) {
415 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
416 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 417 m->handler.pfds[i].revents);
a0b36ae6
DS
418
419 if (m->handler.pfds[i].events & POLLIN) {
420 thread = m->read[m->handler.pfds[i].fd];
421
422 if (!thread)
423 vty_out(vty, "ERROR ");
424 else
60a3efec 425 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
426 } else
427 vty_out(vty, " ");
428
429 if (m->handler.pfds[i].events & POLLOUT) {
430 thread = m->write[m->handler.pfds[i].fd];
431
432 if (!thread)
433 vty_out(vty, "ERROR\n");
434 else
60a3efec 435 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
436 } else
437 vty_out(vty, "\n");
438 }
8872626b
DS
439}
440
ee4dcee8
DL
441DEFUN_NOSH (show_thread_poll,
442 show_thread_poll_cmd,
443 "show thread poll",
444 SHOW_STR
445 "Thread information\n"
446 "Show poll FD's and information\n")
8872626b
DS
447{
448 struct listnode *node;
cd9d0537 449 struct event_loop *m;
8872626b 450
cb1991af 451 frr_with_mutex (&masters_mtx) {
02e701e4 452 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
8872626b 453 show_thread_poll_helper(vty, m);
8872626b 454 }
8872626b
DS
455
456 return CMD_SUCCESS;
457}
458
459
49d41a26
DS
460DEFUN (clear_thread_cpu,
461 clear_thread_cpu_cmd,
462 "clear thread cpu [FILTER]",
62f44022 463 "Clear stored data in all pthreads\n"
49d41a26
DS
464 "Thread information\n"
465 "Thread CPU usage\n"
466 "Display filter (rwtexb)\n")
e276eb82 467{
fbcac826 468 uint8_t filter = (uint8_t)-1U;
d62a17ae 469 int idx = 0;
470
471 if (argv_find(argv, argc, "FILTER", &idx)) {
472 filter = parse_filter(argv[idx]->arg);
473 if (!filter) {
474 vty_out(vty,
3efd0893 475 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 476 argv[idx]->arg);
477 return CMD_WARNING;
478 }
479 }
480
481 cpu_record_clear(filter);
482 return CMD_SUCCESS;
e276eb82 483}
6b0655a2 484
cd9d0537 485static void show_thread_timers_helper(struct vty *vty, struct event_loop *m)
22f31b8c
DS
486{
487 const char *name = m->name ? m->name : "main";
488 char underline[strlen(name) + 1];
e6685141 489 struct event *thread;
22f31b8c
DS
490
491 memset(underline, '-', sizeof(underline));
492 underline[sizeof(underline) - 1] = '\0';
493
494 vty_out(vty, "\nShowing timers for %s\n", name);
495 vty_out(vty, "-------------------%s\n", underline);
496
3905fb73 497 frr_each (event_timer_list, &m->timer, thread) {
22f31b8c
DS
498 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
499 }
500}
501
502DEFPY_NOSH (show_thread_timers,
503 show_thread_timers_cmd,
504 "show thread timers",
505 SHOW_STR
506 "Thread information\n"
507 "Show all timers and how long they have in the system\n")
508{
509 struct listnode *node;
cd9d0537 510 struct event_loop *m;
22f31b8c
DS
511
512 frr_with_mutex (&masters_mtx) {
513 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
514 show_thread_timers_helper(vty, m);
515 }
516
517 return CMD_SUCCESS;
518}
519
5f6eaa9b 520void event_cmd_init(void)
0b84f294 521{
d62a17ae 522 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 523 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 524 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
525
526 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
527 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
528 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
529 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
530 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
531
532 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 533}
62f44022
QY
534/* CLI end ------------------------------------------------------------------ */
535
0b84f294 536
d62a17ae 537static void cancelreq_del(void *cr)
63ccb9cb 538{
d62a17ae 539 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
540}
541
e0bebc7c 542/* initializer, only ever called once */
4d762f26 543static void initializer(void)
e0bebc7c 544{
d62a17ae 545 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
546}
547
cd9d0537 548struct event_loop *event_master_create(const char *name)
718e3744 549{
cd9d0537 550 struct event_loop *rv;
d62a17ae 551 struct rlimit limit;
552
553 pthread_once(&init_once, &initializer);
554
cd9d0537 555 rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_loop));
d62a17ae 556
557 /* Initialize master mutex */
558 pthread_mutex_init(&rv->mtx, NULL);
559 pthread_cond_init(&rv->cancel_cond, NULL);
560
561 /* Set name */
7ffcd8bd 562 name = name ? name : "default";
2ccccdf5 563 rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d62a17ae 564
565 /* Initialize I/O task data structures */
1a9f340b
MS
566
567 /* Use configured limit if present, ulimit otherwise. */
568 rv->fd_limit = frr_get_fd_limit();
569 if (rv->fd_limit == 0) {
570 getrlimit(RLIMIT_NOFILE, &limit);
571 rv->fd_limit = (int)limit.rlim_cur;
572 }
573
2ccccdf5 574 rv->read = XCALLOC(MTYPE_EVENT_POLL,
e6685141 575 sizeof(struct event *) * rv->fd_limit);
a6f235f3 576
2ccccdf5 577 rv->write = XCALLOC(MTYPE_EVENT_POLL,
e6685141 578 sizeof(struct event *) * rv->fd_limit);
d62a17ae 579
7ffcd8bd 580 char tmhashname[strlen(name) + 32];
02e701e4 581
7ffcd8bd
QY
582 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
583 name);
bd74dc61 584 rv->cpu_record = hash_create_size(
d8b87afe 585 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 586 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 587 tmhashname);
d62a17ae 588
3905fb73
DS
589 event_list_init(&rv->event);
590 event_list_init(&rv->ready);
591 event_list_init(&rv->unuse);
592 event_timer_list_init(&rv->timer);
d62a17ae 593
de2754be 594 /* Initialize event_fetch() settings */
d62a17ae 595 rv->spin = true;
596 rv->handle_signals = true;
597
598 /* Set pthread owner, should be updated by actual owner */
599 rv->owner = pthread_self();
600 rv->cancel_req = list_new();
601 rv->cancel_req->del = cancelreq_del;
602 rv->canceled = true;
603
604 /* Initialize pipe poker */
605 pipe(rv->io_pipe);
606 set_nonblocking(rv->io_pipe[0]);
607 set_nonblocking(rv->io_pipe[1]);
608
609 /* Initialize data structures for poll() */
610 rv->handler.pfdsize = rv->fd_limit;
611 rv->handler.pfdcount = 0;
2ccccdf5 612 rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 613 sizeof(struct pollfd) * rv->handler.pfdsize);
2ccccdf5 614 rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
d62a17ae 615 sizeof(struct pollfd) * rv->handler.pfdsize);
616
eff09c66 617 /* add to list of threadmasters */
cb1991af 618 frr_with_mutex (&masters_mtx) {
eff09c66
QY
619 if (!masters)
620 masters = list_new();
621
d62a17ae 622 listnode_add(masters, rv);
623 }
d62a17ae 624
625 return rv;
718e3744 626}
627
cd9d0537 628void event_master_set_name(struct event_loop *master, const char *name)
d8a8a8de 629{
cb1991af 630 frr_with_mutex (&master->mtx) {
2ccccdf5
DS
631 XFREE(MTYPE_EVENT_MASTER, master->name);
632 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
d8a8a8de 633 }
d8a8a8de
QY
634}
635
2ccccdf5 636#define EVENT_UNUSED_DEPTH 10
6ed04aa2 637
718e3744 638/* Move thread to unuse list. */
cd9d0537 639static void thread_add_unuse(struct event_loop *m, struct event *thread)
718e3744 640{
6655966d
RZ
641 pthread_mutex_t mtxc = thread->mtx;
642
d62a17ae 643 assert(m != NULL && thread != NULL);
d62a17ae 644
d62a17ae 645 thread->hist->total_active--;
e6685141 646 memset(thread, 0, sizeof(struct event));
2ccccdf5 647 thread->type = EVENT_UNUSED;
6ed04aa2 648
6655966d
RZ
649 /* Restore the thread mutex context. */
650 thread->mtx = mtxc;
651
3905fb73
DS
652 if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
653 event_list_add_tail(&m->unuse, thread);
6655966d
RZ
654 return;
655 }
656
657 thread_free(m, thread);
718e3744 658}
659
660/* Free all unused thread. */
cd9d0537 661static void thread_list_free(struct event_loop *m, struct event_list_head *list)
718e3744 662{
e6685141 663 struct event *t;
d62a17ae 664
3905fb73 665 while ((t = event_list_pop(list)))
6655966d 666 thread_free(m, t);
718e3744 667}
668
cd9d0537 669static void thread_array_free(struct event_loop *m, struct event **thread_array)
308d14ae 670{
e6685141 671 struct event *t;
d62a17ae 672 int index;
673
674 for (index = 0; index < m->fd_limit; ++index) {
675 t = thread_array[index];
676 if (t) {
677 thread_array[index] = NULL;
6655966d 678 thread_free(m, t);
d62a17ae 679 }
680 }
2ccccdf5 681 XFREE(MTYPE_EVENT_POLL, thread_array);
308d14ae
DV
682}
683
495f0b13 684/*
ce50d11c 685 * event_master_free_unused
495f0b13
DS
686 *
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
691 */
cd9d0537 692void event_master_free_unused(struct event_loop *m)
495f0b13 693{
cb1991af 694 frr_with_mutex (&m->mtx) {
e6685141 695 struct event *t;
02e701e4 696
3905fb73 697 while ((t = event_list_pop(&m->unuse)))
6655966d 698 thread_free(m, t);
d62a17ae 699 }
495f0b13
DS
700}
701
718e3744 702/* Stop thread scheduler. */
cd9d0537 703void event_master_free(struct event_loop *m)
718e3744 704{
e6685141 705 struct event *t;
27d29ced 706
cb1991af 707 frr_with_mutex (&masters_mtx) {
d62a17ae 708 listnode_delete(masters, m);
02e701e4 709 if (masters->count == 0)
6a154c88 710 list_delete(&masters);
d62a17ae 711 }
d62a17ae 712
713 thread_array_free(m, m->read);
714 thread_array_free(m, m->write);
3905fb73 715 while ((t = event_timer_list_pop(&m->timer)))
27d29ced 716 thread_free(m, t);
d62a17ae 717 thread_list_free(m, &m->event);
718 thread_list_free(m, &m->ready);
719 thread_list_free(m, &m->unuse);
720 pthread_mutex_destroy(&m->mtx);
33844bbe 721 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 722 close(m->io_pipe[0]);
723 close(m->io_pipe[1]);
6a154c88 724 list_delete(&m->cancel_req);
1a0a92ea 725 m->cancel_req = NULL;
d62a17ae 726
d8bc11a5 727 hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
d62a17ae 728
2ccccdf5
DS
729 XFREE(MTYPE_EVENT_MASTER, m->name);
730 XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
731 XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
732 XFREE(MTYPE_EVENT_MASTER, m);
718e3744 733}
734
94202742 735/* Return remain time in milliseconds. */
4f830a07 736unsigned long event_timer_remain_msec(struct event *thread)
718e3744 737{
d62a17ae 738 int64_t remain;
1189d95f 739
5f6eaa9b 740 if (!event_is_scheduled(thread))
13bcc010
DA
741 return 0;
742
cb1991af 743 frr_with_mutex (&thread->mtx) {
78ca0342 744 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 745 }
1189d95f 746
d62a17ae 747 return remain < 0 ? 0 : remain;
718e3744 748}
749
78ca0342 750/* Return remain time in seconds. */
4f830a07 751unsigned long event_timer_remain_second(struct event *thread)
78ca0342 752{
4f830a07 753 return event_timer_remain_msec(thread) / 1000LL;
78ca0342
CF
754}
755
4f830a07 756struct timeval event_timer_remain(struct event *thread)
6ac44687 757{
d62a17ae 758 struct timeval remain;
02e701e4 759
cb1991af 760 frr_with_mutex (&thread->mtx) {
d62a17ae 761 monotime_until(&thread->u.sands, &remain);
762 }
d62a17ae 763 return remain;
6ac44687
CF
764}
765
0447957e
AK
766static int time_hhmmss(char *buf, int buf_size, long sec)
767{
768 long hh;
769 long mm;
770 int wr;
771
642ac49d 772 assert(buf_size >= 8);
0447957e
AK
773
774 hh = sec / 3600;
775 sec %= 3600;
776 mm = sec / 60;
777 sec %= 60;
778
779 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
780
781 return wr != 8;
782}
783
5f6eaa9b 784char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
0447957e 785{
02e701e4 786 if (t_timer)
4f830a07 787 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
02e701e4 788 else
0447957e 789 snprintf(buf, buf_size, "--:--:--");
02e701e4 790
0447957e
AK
791 return buf;
792}
793
718e3744 794/* Get new thread. */
cd9d0537 795static struct event *thread_get(struct event_loop *m, uint8_t type,
e6685141 796 void (*func)(struct event *), void *arg,
5163a1c5 797 const struct xref_eventsched *xref)
718e3744 798{
3905fb73 799 struct event *thread = event_list_pop(&m->unuse);
04ec6679 800 struct cpu_event_history tmp;
d62a17ae 801
802 if (!thread) {
e6685141 803 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
d62a17ae 804 /* mutex only needs to be initialized at struct creation. */
805 pthread_mutex_init(&thread->mtx, NULL);
806 m->alloc++;
807 }
808
809 thread->type = type;
810 thread->add_type = type;
811 thread->master = m;
812 thread->arg = arg;
ba7d2705 813 thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
d62a17ae 814 thread->ref = NULL;
e8b3a2f7 815 thread->ignore_timer_late = false;
d62a17ae 816
817 /*
818 * So if the passed in funcname is not what we have
819 * stored that means the thread->hist needs to be
820 * updated. We keep the last one around in unused
821 * under the assumption that we are probably
822 * going to immediately allocate the same
823 * type of thread.
824 * This hopefully saves us some serious
825 * hash_get lookups.
826 */
60a3efec
DL
827 if ((thread->xref && thread->xref->funcname != xref->funcname)
828 || thread->func != func) {
d62a17ae 829 tmp.func = func;
60a3efec 830 tmp.funcname = xref->funcname;
d62a17ae 831 thread->hist =
832 hash_get(m->cpu_record, &tmp,
833 (void *(*)(void *))cpu_record_hash_alloc);
834 }
835 thread->hist->total_active++;
836 thread->func = func;
60a3efec 837 thread->xref = xref;
d62a17ae 838
839 return thread;
718e3744 840}
841
cd9d0537 842static void thread_free(struct event_loop *master, struct event *thread)
6655966d
RZ
843{
844 /* Update statistics. */
845 assert(master->alloc > 0);
846 master->alloc--;
847
848 /* Free allocated resources. */
849 pthread_mutex_destroy(&thread->mtx);
850 XFREE(MTYPE_THREAD, thread);
851}
852
cd9d0537 853static int fd_poll(struct event_loop *m, const struct timeval *timer_wait,
d81ca9a3 854 bool *eintr_p)
209a72a6 855{
d81ca9a3
MS
856 sigset_t origsigs;
857 unsigned char trash[64];
858 nfds_t count = m->handler.copycount;
859
d279ef57
DS
860 /*
861 * If timer_wait is null here, that means poll() should block
ce50d11c 862 * indefinitely, unless the event_master has overridden it by setting
d62a17ae 863 * ->selectpoll_timeout.
d279ef57 864 *
d62a17ae 865 * If the value is positive, it specifies the maximum number of
d279ef57
DS
866 * milliseconds to wait. If the timeout is -1, it specifies that
867 * we should never wait and always return immediately even if no
868 * event is detected. If the value is zero, the behavior is default.
869 */
d62a17ae 870 int timeout = -1;
871
872 /* number of file descriptors with events */
873 int num;
874
02e701e4
DS
875 if (timer_wait != NULL && m->selectpoll_timeout == 0) {
876 /* use the default value */
d62a17ae 877 timeout = (timer_wait->tv_sec * 1000)
878 + (timer_wait->tv_usec / 1000);
02e701e4
DS
879 } else if (m->selectpoll_timeout > 0) {
880 /* use the user's timeout */
d62a17ae 881 timeout = m->selectpoll_timeout;
02e701e4
DS
882 } else if (m->selectpoll_timeout < 0) {
883 /* effect a poll (return immediately) */
d62a17ae 884 timeout = 0;
02e701e4 885 }
d62a17ae 886
0bdeb5e5 887 zlog_tls_buffer_flush();
3e41733f
DL
888 rcu_read_unlock();
889 rcu_assert_read_unlocked();
890
d62a17ae 891 /* add poll pipe poker */
d81ca9a3
MS
892 assert(count + 1 < m->handler.pfdsize);
893 m->handler.copy[count].fd = m->io_pipe[0];
894 m->handler.copy[count].events = POLLIN;
895 m->handler.copy[count].revents = 0x00;
896
897 /* We need to deal with a signal-handling race here: we
898 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
899 * that may arrive just before we enter poll(). We will block the
900 * key signals, then check whether any have arrived - if so, we return
901 * before calling poll(). If not, we'll re-enable the signals
902 * in the ppoll() call.
903 */
904
905 sigemptyset(&origsigs);
906 if (m->handle_signals) {
907 /* Main pthread that handles the app signals */
908 if (frr_sigevent_check(&origsigs)) {
909 /* Signal to process - restore signal mask and return */
910 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
911 num = -1;
912 *eintr_p = true;
913 goto done;
914 }
915 } else {
916 /* Don't make any changes for the non-main pthreads */
917 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
918 }
d62a17ae 919
d81ca9a3
MS
920#if defined(HAVE_PPOLL)
921 struct timespec ts, *tsp;
922
923 if (timeout >= 0) {
924 ts.tv_sec = timeout / 1000;
925 ts.tv_nsec = (timeout % 1000) * 1000000;
926 tsp = &ts;
927 } else
928 tsp = NULL;
929
930 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
931 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
932#else
933 /* Not ideal - there is a race after we restore the signal mask */
934 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
935 num = poll(m->handler.copy, count + 1, timeout);
936#endif
d62a17ae 937
d81ca9a3
MS
938done:
939
940 if (num < 0 && errno == EINTR)
941 *eintr_p = true;
942
943 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 944 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
945 ;
946
3e41733f
DL
947 rcu_read_lock();
948
d62a17ae 949 return num;
209a72a6
DS
950}
951
718e3744 952/* Add new read thread. */
5163a1c5 953void _event_add_read_write(const struct xref_eventsched *xref,
cd9d0537 954 struct event_loop *m, void (*func)(struct event *),
2453d15d 955 void *arg, int fd, struct event **t_ptr)
718e3744 956{
2ccccdf5 957 int dir = xref->event_type;
e6685141
DS
958 struct event *thread = NULL;
959 struct event **thread_array;
d62a17ae 960
2ccccdf5 961 if (dir == EVENT_READ)
6c3aa850
DL
962 frrtrace(9, frr_libfrr, schedule_read, m,
963 xref->funcname, xref->xref.file, xref->xref.line,
964 t_ptr, fd, 0, arg, 0);
abf96a87 965 else
6c3aa850
DL
966 frrtrace(9, frr_libfrr, schedule_write, m,
967 xref->funcname, xref->xref.file, xref->xref.line,
968 t_ptr, fd, 0, arg, 0);
abf96a87 969
188acbb9
DS
970 assert(fd >= 0);
971 if (fd >= m->fd_limit)
972 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
973
cb1991af 974 frr_with_mutex (&m->mtx) {
02e701e4 975 /* Thread is already scheduled; don't reschedule */
00dffa8c 976 if (t_ptr && *t_ptr)
00dffa8c 977 break;
d62a17ae 978
979 /* default to a new pollfd */
980 nfds_t queuepos = m->handler.pfdcount;
981
2ccccdf5 982 if (dir == EVENT_READ)
1ef14bee
DS
983 thread_array = m->read;
984 else
985 thread_array = m->write;
986
02e701e4
DS
987 /*
988 * if we already have a pollfd for our file descriptor, find and
989 * use it
990 */
d62a17ae 991 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
992 if (m->handler.pfds[i].fd == fd) {
993 queuepos = i;
1ef14bee
DS
994
995#ifdef DEV_BUILD
996 /*
997 * What happens if we have a thread already
998 * created for this event?
999 */
1000 if (thread_array[fd])
1001 assert(!"Thread already scheduled for file descriptor");
1002#endif
d62a17ae 1003 break;
1004 }
1005
1006 /* make sure we have room for this fd + pipe poker fd */
1007 assert(queuepos + 1 < m->handler.pfdsize);
1008
60a3efec 1009 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1010
1011 m->handler.pfds[queuepos].fd = fd;
1012 m->handler.pfds[queuepos].events |=
2ccccdf5 1013 (dir == EVENT_READ ? POLLIN : POLLOUT);
d62a17ae 1014
1015 if (queuepos == m->handler.pfdcount)
1016 m->handler.pfdcount++;
1017
1018 if (thread) {
cb1991af 1019 frr_with_mutex (&thread->mtx) {
d62a17ae 1020 thread->u.fd = fd;
1ef14bee 1021 thread_array[thread->u.fd] = thread;
d62a17ae 1022 }
d62a17ae 1023
1024 if (t_ptr) {
1025 *t_ptr = thread;
1026 thread->ref = t_ptr;
1027 }
1028 }
1029
1030 AWAKEN(m);
1031 }
718e3744 1032}
1033
5163a1c5 1034static void _event_add_timer_timeval(const struct xref_eventsched *xref,
cd9d0537 1035 struct event_loop *m,
907a2395
DS
1036 void (*func)(struct event *), void *arg,
1037 struct timeval *time_relative,
1038 struct event **t_ptr)
718e3744 1039{
e6685141 1040 struct event *thread;
96fe578a 1041 struct timeval t;
d62a17ae 1042
1043 assert(m != NULL);
1044
d62a17ae 1045 assert(time_relative);
1046
6c3aa850
DL
1047 frrtrace(9, frr_libfrr, schedule_timer, m,
1048 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1049 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1050
96fe578a
MS
1051 /* Compute expiration/deadline time. */
1052 monotime(&t);
1053 timeradd(&t, time_relative, &t);
1054
cb1991af 1055 frr_with_mutex (&m->mtx) {
00dffa8c 1056 if (t_ptr && *t_ptr)
d279ef57 1057 /* thread is already scheduled; don't reschedule */
ee1455dd 1058 return;
d62a17ae 1059
2ccccdf5 1060 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
d62a17ae 1061
cb1991af 1062 frr_with_mutex (&thread->mtx) {
96fe578a 1063 thread->u.sands = t;
3905fb73 1064 event_timer_list_add(&m->timer, thread);
d62a17ae 1065 if (t_ptr) {
1066 *t_ptr = thread;
1067 thread->ref = t_ptr;
1068 }
1069 }
d62a17ae 1070
96fe578a
MS
1071 /* The timer list is sorted - if this new timer
1072 * might change the time we'll wait for, give the pthread
1073 * a chance to re-compute.
1074 */
3905fb73 1075 if (event_timer_list_first(&m->timer) == thread)
96fe578a 1076 AWAKEN(m);
d62a17ae 1077 }
e2eff5c3
DS
1078#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079 if (time_relative->tv_sec > ONEYEAR2SEC)
1080 flog_err(
1081 EC_LIB_TIMER_TOO_LONG,
1082 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1083 thread);
9e867fe6 1084}
1085
98c91ac6 1086
1087/* Add timer event thread. */
cd9d0537
DS
1088void _event_add_timer(const struct xref_eventsched *xref, struct event_loop *m,
1089 void (*func)(struct event *), void *arg, long timer,
1090 struct event **t_ptr)
9e867fe6 1091{
d62a17ae 1092 struct timeval trel;
9e867fe6 1093
d62a17ae 1094 assert(m != NULL);
9e867fe6 1095
d62a17ae 1096 trel.tv_sec = timer;
1097 trel.tv_usec = 0;
9e867fe6 1098
907a2395 1099 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1100}
9e867fe6 1101
98c91ac6 1102/* Add timer event thread with "millisecond" resolution */
5163a1c5 1103void _event_add_timer_msec(const struct xref_eventsched *xref,
cd9d0537 1104 struct event_loop *m, void (*func)(struct event *),
2453d15d 1105 void *arg, long timer, struct event **t_ptr)
98c91ac6 1106{
d62a17ae 1107 struct timeval trel;
9e867fe6 1108
d62a17ae 1109 assert(m != NULL);
718e3744 1110
d62a17ae 1111 trel.tv_sec = timer / 1000;
1112 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1113
907a2395 1114 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1115}
1116
4322dea7 1117/* Add timer event thread with "timeval" resolution */
5163a1c5 1118void _event_add_timer_tv(const struct xref_eventsched *xref,
cd9d0537 1119 struct event_loop *m, void (*func)(struct event *),
907a2395 1120 void *arg, struct timeval *tv, struct event **t_ptr)
d03c4cbd 1121{
907a2395 1122 _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1123}
1124
718e3744 1125/* Add simple event thread. */
cd9d0537
DS
1126void _event_add_event(const struct xref_eventsched *xref, struct event_loop *m,
1127 void (*func)(struct event *), void *arg, int val,
1128 struct event **t_ptr)
718e3744 1129{
e6685141 1130 struct event *thread = NULL;
d62a17ae 1131
6c3aa850
DL
1132 frrtrace(9, frr_libfrr, schedule_event, m,
1133 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1134 t_ptr, 0, val, arg, 0);
abf96a87 1135
d62a17ae 1136 assert(m != NULL);
1137
cb1991af 1138 frr_with_mutex (&m->mtx) {
00dffa8c 1139 if (t_ptr && *t_ptr)
d279ef57 1140 /* thread is already scheduled; don't reschedule */
00dffa8c 1141 break;
d62a17ae 1142
2ccccdf5 1143 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
cb1991af 1144 frr_with_mutex (&thread->mtx) {
d62a17ae 1145 thread->u.val = val;
3905fb73 1146 event_list_add_tail(&m->event, thread);
d62a17ae 1147 }
d62a17ae 1148
1149 if (t_ptr) {
1150 *t_ptr = thread;
1151 thread->ref = t_ptr;
1152 }
1153
1154 AWAKEN(m);
1155 }
718e3744 1156}
1157
63ccb9cb
QY
1158/* Thread cancellation ------------------------------------------------------ */
1159
8797240e
QY
1160/**
1161 * NOT's out the .events field of pollfd corresponding to the given file
1162 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1163 *
de2754be 1164 * This needs to happen for both copies of pollfd's. See 'event_fetch'
8797240e
QY
1165 * implementation for details.
1166 *
1167 * @param master
1168 * @param fd
1169 * @param state the event to cancel. One or more (OR'd together) of the
1170 * following:
1171 * - POLLIN
1172 * - POLLOUT
1173 */
cd9d0537 1174static void event_cancel_rw(struct event_loop *master, int fd, short state,
332beb64 1175 int idx_hint)
0a95a0d0 1176{
42d74538
QY
1177 bool found = false;
1178
d62a17ae 1179 /* find the index of corresponding pollfd */
1180 nfds_t i;
1181
a9318a32
MS
1182 /* Cancel POLLHUP too just in case some bozo set it */
1183 state |= POLLHUP;
1184
1185 /* Some callers know the index of the pfd already */
1186 if (idx_hint >= 0) {
1187 i = idx_hint;
1188 found = true;
1189 } else {
1190 /* Have to look for the fd in the pfd array */
1191 for (i = 0; i < master->handler.pfdcount; i++)
1192 if (master->handler.pfds[i].fd == fd) {
1193 found = true;
1194 break;
1195 }
1196 }
42d74538
QY
1197
1198 if (!found) {
1199 zlog_debug(
1200 "[!] Received cancellation request for nonexistent rw job");
1201 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1202 master->name ? master->name : "", fd);
42d74538
QY
1203 return;
1204 }
d62a17ae 1205
1206 /* NOT out event. */
1207 master->handler.pfds[i].events &= ~(state);
1208
1209 /* If all events are canceled, delete / resize the pollfd array. */
1210 if (master->handler.pfds[i].events == 0) {
1211 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1212 (master->handler.pfdcount - i - 1)
1213 * sizeof(struct pollfd));
1214 master->handler.pfdcount--;
e985cda0
S
1215 master->handler.pfds[master->handler.pfdcount].fd = 0;
1216 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1217 }
1218
02e701e4
DS
1219 /*
1220 * If we have the same pollfd in the copy, perform the same operations,
1221 * otherwise return.
1222 */
d62a17ae 1223 if (i >= master->handler.copycount)
1224 return;
1225
1226 master->handler.copy[i].events &= ~(state);
1227
1228 if (master->handler.copy[i].events == 0) {
1229 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1230 (master->handler.copycount - i - 1)
1231 * sizeof(struct pollfd));
1232 master->handler.copycount--;
e985cda0 1233 master->handler.copy[master->handler.copycount].fd = 0;
02e701e4 1234 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1235 }
0a95a0d0
DS
1236}
1237
a9318a32
MS
1238/*
1239 * Process task cancellation given a task argument: iterate through the
1240 * various lists of tasks, looking for any that match the argument.
1241 */
cd9d0537 1242static void cancel_arg_helper(struct event_loop *master,
a9318a32
MS
1243 const struct cancel_req *cr)
1244{
e6685141 1245 struct event *t;
a9318a32
MS
1246 nfds_t i;
1247 int fd;
1248 struct pollfd *pfd;
1249
1250 /* We're only processing arg-based cancellations here. */
1251 if (cr->eventobj == NULL)
1252 return;
1253
1254 /* First process the ready lists. */
3905fb73 1255 frr_each_safe (event_list, &master->event, t) {
a9318a32
MS
1256 if (t->arg != cr->eventobj)
1257 continue;
3905fb73 1258 event_list_del(&master->event, t);
a9318a32
MS
1259 if (t->ref)
1260 *t->ref = NULL;
1261 thread_add_unuse(master, t);
1262 }
1263
3905fb73 1264 frr_each_safe (event_list, &master->ready, t) {
a9318a32
MS
1265 if (t->arg != cr->eventobj)
1266 continue;
3905fb73 1267 event_list_del(&master->ready, t);
a9318a32
MS
1268 if (t->ref)
1269 *t->ref = NULL;
1270 thread_add_unuse(master, t);
1271 }
1272
1273 /* If requested, stop here and ignore io and timers */
332beb64 1274 if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
a9318a32
MS
1275 return;
1276
1277 /* Check the io tasks */
1278 for (i = 0; i < master->handler.pfdcount;) {
1279 pfd = master->handler.pfds + i;
1280
1281 if (pfd->events & POLLIN)
1282 t = master->read[pfd->fd];
1283 else
1284 t = master->write[pfd->fd];
1285
1286 if (t && t->arg == cr->eventobj) {
1287 fd = pfd->fd;
1288
1289 /* Found a match to cancel: clean up fd arrays */
332beb64 1290 event_cancel_rw(master, pfd->fd, pfd->events, i);
a9318a32
MS
1291
1292 /* Clean up thread arrays */
1293 master->read[fd] = NULL;
1294 master->write[fd] = NULL;
1295
1296 /* Clear caller's ref */
1297 if (t->ref)
1298 *t->ref = NULL;
1299
1300 thread_add_unuse(master, t);
1301
1302 /* Don't increment 'i' since the cancellation will have
1303 * removed the entry from the pfd array
1304 */
1305 } else
1306 i++;
1307 }
1308
1309 /* Check the timer tasks */
3905fb73 1310 t = event_timer_list_first(&master->timer);
a9318a32 1311 while (t) {
e6685141 1312 struct event *t_next;
a9318a32 1313
3905fb73 1314 t_next = event_timer_list_next(&master->timer, t);
a9318a32
MS
1315
1316 if (t->arg == cr->eventobj) {
3905fb73 1317 event_timer_list_del(&master->timer, t);
a9318a32
MS
1318 if (t->ref)
1319 *t->ref = NULL;
1320 thread_add_unuse(master, t);
1321 }
1322
1323 t = t_next;
1324 }
1325}
1326
1189d95f 1327/**
63ccb9cb 1328 * Process cancellation requests.
1189d95f 1329 *
ce50d11c 1330 * This may only be run from the pthread which owns the event_master.
63ccb9cb
QY
1331 *
1332 * @param master the thread master to process
1333 * @REQUIRE master->mtx
1189d95f 1334 */
cd9d0537 1335static void do_event_cancel(struct event_loop *master)
718e3744 1336{
3905fb73 1337 struct event_list_head *list = NULL;
e6685141
DS
1338 struct event **thread_array = NULL;
1339 struct event *thread;
d62a17ae 1340 struct cancel_req *cr;
1341 struct listnode *ln;
7e93a54c 1342
d62a17ae 1343 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1344 /*
a9318a32
MS
1345 * If this is an event object cancellation, search
1346 * through task lists deleting any tasks which have the
1347 * specified argument - use this handy helper function.
d279ef57 1348 */
d62a17ae 1349 if (cr->eventobj) {
a9318a32 1350 cancel_arg_helper(master, cr);
d62a17ae 1351 continue;
1352 }
1353
d279ef57
DS
1354 /*
1355 * The pointer varies depending on whether the cancellation
1356 * request was made asynchronously or not. If it was, we
1357 * need to check whether the thread even exists anymore
1358 * before cancelling it.
1359 */
d62a17ae 1360 thread = (cr->thread) ? cr->thread : *cr->threadref;
1361
1362 if (!thread)
1363 continue;
1364
7e93a54c
MS
1365 list = NULL;
1366 thread_array = NULL;
1367
d62a17ae 1368 /* Determine the appropriate queue to cancel the thread from */
1369 switch (thread->type) {
2ccccdf5 1370 case EVENT_READ:
332beb64 1371 event_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1372 thread_array = master->read;
1373 break;
2ccccdf5 1374 case EVENT_WRITE:
332beb64 1375 event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1376 thread_array = master->write;
1377 break;
2ccccdf5 1378 case EVENT_TIMER:
3905fb73 1379 event_timer_list_del(&master->timer, thread);
d62a17ae 1380 break;
2ccccdf5 1381 case EVENT_EVENT:
d62a17ae 1382 list = &master->event;
1383 break;
2ccccdf5 1384 case EVENT_READY:
d62a17ae 1385 list = &master->ready;
1386 break;
2ccccdf5
DS
1387 case EVENT_UNUSED:
1388 case EVENT_EXECUTE:
d62a17ae 1389 continue;
1390 break;
1391 }
1392
02e701e4 1393 if (list)
3905fb73 1394 event_list_del(list, thread);
02e701e4 1395 else if (thread_array)
d62a17ae 1396 thread_array[thread->u.fd] = NULL;
d62a17ae 1397
1398 if (thread->ref)
1399 *thread->ref = NULL;
1400
1401 thread_add_unuse(thread->master, thread);
1402 }
1403
1404 /* Delete and free all cancellation requests */
41b21bfa
MS
1405 if (master->cancel_req)
1406 list_delete_all_node(master->cancel_req);
d62a17ae 1407
332beb64 1408 /* Wake up any threads which may be blocked in event_cancel_async() */
d62a17ae 1409 master->canceled = true;
1410 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1411}
1412
a9318a32
MS
1413/*
1414 * Helper function used for multiple flavors of arg-based cancellation.
1415 */
cd9d0537 1416static void cancel_event_helper(struct event_loop *m, void *arg, int flags)
a9318a32
MS
1417{
1418 struct cancel_req *cr;
1419
1420 assert(m->owner == pthread_self());
1421
1422 /* Only worth anything if caller supplies an arg. */
1423 if (arg == NULL)
1424 return;
1425
1426 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1427
1428 cr->flags = flags;
1429
cb1991af 1430 frr_with_mutex (&m->mtx) {
a9318a32
MS
1431 cr->eventobj = arg;
1432 listnode_add(m->cancel_req, cr);
332beb64 1433 do_event_cancel(m);
a9318a32
MS
1434 }
1435}
1436
63ccb9cb
QY
1437/**
1438 * Cancel any events which have the specified argument.
1439 *
1440 * MT-Unsafe
1441 *
ce50d11c 1442 * @param m the event_master to cancel from
63ccb9cb
QY
1443 * @param arg the argument passed when creating the event
1444 */
cd9d0537 1445void event_cancel_event(struct event_loop *master, void *arg)
718e3744 1446{
a9318a32
MS
1447 cancel_event_helper(master, arg, 0);
1448}
d62a17ae 1449
a9318a32
MS
1450/*
1451 * Cancel ready tasks with an arg matching 'arg'
1452 *
1453 * MT-Unsafe
1454 *
ce50d11c 1455 * @param m the event_master to cancel from
a9318a32
MS
1456 * @param arg the argument passed when creating the event
1457 */
cd9d0537 1458void event_cancel_event_ready(struct event_loop *m, void *arg)
a9318a32
MS
1459{
1460
1461 /* Only cancel ready/event tasks */
332beb64 1462 cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
63ccb9cb 1463}
1189d95f 1464
63ccb9cb
QY
1465/**
1466 * Cancel a specific task.
1467 *
1468 * MT-Unsafe
1469 *
1470 * @param thread task to cancel
1471 */
332beb64 1472void event_cancel(struct event **thread)
63ccb9cb 1473{
cd9d0537 1474 struct event_loop *master;
b3d6bc6e
MS
1475
1476 if (thread == NULL || *thread == NULL)
1477 return;
1478
1479 master = (*thread)->master;
d62a17ae 1480
332beb64
DS
1481 frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1482 (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1483 (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1484 (*thread)->u.sands.tv_sec);
abf96a87 1485
6ed04aa2
DS
1486 assert(master->owner == pthread_self());
1487
cb1991af 1488 frr_with_mutex (&master->mtx) {
d62a17ae 1489 struct cancel_req *cr =
1490 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1491 cr->thread = *thread;
6ed04aa2 1492 listnode_add(master->cancel_req, cr);
332beb64 1493 do_event_cancel(master);
d62a17ae 1494 }
b3d6bc6e
MS
1495
1496 *thread = NULL;
63ccb9cb 1497}
1189d95f 1498
63ccb9cb
QY
1499/**
1500 * Asynchronous cancellation.
1501 *
e6685141 1502 * Called with either a struct event ** or void * to an event argument,
8797240e
QY
1503 * this function posts the correct cancellation request and blocks until it is
1504 * serviced.
63ccb9cb
QY
1505 *
1506 * If the thread is currently running, execution blocks until it completes.
1507 *
8797240e
QY
1508 * The last two parameters are mutually exclusive, i.e. if you pass one the
1509 * other must be NULL.
1510 *
ce50d11c 1511 * When the cancellation procedure executes on the target event_master, the
8797240e
QY
1512 * thread * provided is checked for nullity. If it is null, the thread is
1513 * assumed to no longer exist and the cancellation request is a no-op. Thus
1514 * users of this API must pass a back-reference when scheduling the original
1515 * task.
1516 *
63ccb9cb
QY
1517 * MT-Safe
1518 *
8797240e
QY
1519 * @param master the thread master with the relevant event / task
1520 * @param thread pointer to thread to cancel
1521 * @param eventobj the event
63ccb9cb 1522 */
cd9d0537 1523void event_cancel_async(struct event_loop *master, struct event **thread,
332beb64 1524 void *eventobj)
63ccb9cb 1525{
d62a17ae 1526 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1527
1528 if (thread && *thread)
332beb64 1529 frrtrace(9, frr_libfrr, event_cancel_async, master,
6c3aa850
DL
1530 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1531 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1532 (*thread)->u.val, (*thread)->arg,
1533 (*thread)->u.sands.tv_sec);
abf96a87 1534 else
332beb64 1535 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
c7bb4f00 1536 0, NULL, 0, 0, eventobj, 0);
abf96a87 1537
d62a17ae 1538 assert(master->owner != pthread_self());
1539
cb1991af 1540 frr_with_mutex (&master->mtx) {
d62a17ae 1541 master->canceled = false;
1542
1543 if (thread) {
1544 struct cancel_req *cr =
1545 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1546 cr->threadref = thread;
1547 listnode_add(master->cancel_req, cr);
1548 } else if (eventobj) {
1549 struct cancel_req *cr =
1550 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1551 cr->eventobj = eventobj;
1552 listnode_add(master->cancel_req, cr);
1553 }
1554 AWAKEN(master);
1555
1556 while (!master->canceled)
1557 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1558 }
50478845
MS
1559
1560 if (thread)
1561 *thread = NULL;
718e3744 1562}
63ccb9cb 1563/* ------------------------------------------------------------------------- */
718e3744 1564
3905fb73 1565static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
d62a17ae 1566 struct timeval *timer_val)
718e3744 1567{
3905fb73 1568 if (!event_timer_list_count(timers))
27d29ced
DL
1569 return NULL;
1570
3905fb73 1571 struct event *next_timer = event_timer_list_first(timers);
02e701e4 1572
27d29ced
DL
1573 monotime_until(&next_timer->u.sands, timer_val);
1574 return timer_val;
718e3744 1575}
718e3744 1576
cd9d0537 1577static struct event *thread_run(struct event_loop *m, struct event *thread,
e6685141 1578 struct event *fetch)
718e3744 1579{
d62a17ae 1580 *fetch = *thread;
1581 thread_add_unuse(m, thread);
1582 return fetch;
718e3744 1583}
1584
cd9d0537
DS
1585static int thread_process_io_helper(struct event_loop *m, struct event *thread,
1586 short state, short actual_state, int pos)
5d4ccd4e 1587{
e6685141 1588 struct event **thread_array;
d62a17ae 1589
45f3d590
DS
1590 /*
1591 * poll() clears the .events field, but the pollfd array we
1592 * pass to poll() is a copy of the one used to schedule threads.
1593 * We need to synchronize state between the two here by applying
1594 * the same changes poll() made on the copy of the "real" pollfd
1595 * array.
1596 *
1597 * This cleans up a possible infinite loop where we refuse
1598 * to respond to a poll event but poll is insistent that
1599 * we should.
1600 */
1601 m->handler.pfds[pos].events &= ~(state);
1602
1603 if (!thread) {
1604 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1605 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1606 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1607 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1608 return 0;
45f3d590 1609 }
d62a17ae 1610
2ccccdf5 1611 if (thread->type == EVENT_READ)
d62a17ae 1612 thread_array = m->read;
1613 else
1614 thread_array = m->write;
1615
1616 thread_array[thread->u.fd] = NULL;
3905fb73 1617 event_list_add_tail(&m->ready, thread);
2ccccdf5 1618 thread->type = EVENT_READY;
45f3d590 1619
d62a17ae 1620 return 1;
5d4ccd4e
DS
1621}
1622
8797240e
QY
1623/**
1624 * Process I/O events.
1625 *
1626 * Walks through file descriptor array looking for those pollfds whose .revents
1627 * field has something interesting. Deletes any invalid file descriptors.
1628 *
1629 * @param m the thread master
1630 * @param num the number of active file descriptors (return value of poll())
1631 */
cd9d0537 1632static void thread_process_io(struct event_loop *m, unsigned int num)
0a95a0d0 1633{
d62a17ae 1634 unsigned int ready = 0;
1635 struct pollfd *pfds = m->handler.copy;
1636
1637 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1638 /* no event for current fd? immediately continue */
1639 if (pfds[i].revents == 0)
1640 continue;
1641
1642 ready++;
1643
d279ef57 1644 /*
332beb64 1645 * Unless someone has called event_cancel from another
d279ef57
DS
1646 * pthread, the only thing that could have changed in
1647 * m->handler.pfds while we were asleep is the .events
332beb64 1648 * field in a given pollfd. Barring event_cancel() that
d279ef57
DS
1649 * value should be a superset of the values we have in our
1650 * copy, so there's no need to update it. Similarily,
1651 * barring deletion, the fd should still be a valid index
1652 * into the master's pfds.
d142453d
DS
1653 *
1654 * We are including POLLERR here to do a READ event
1655 * this is because the read should fail and the
1656 * read function should handle it appropriately
d279ef57 1657 */
d142453d 1658 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1659 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1660 pfds[i].revents, i);
1661 }
d62a17ae 1662 if (pfds[i].revents & POLLOUT)
1663 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1664 POLLOUT, pfds[i].revents, i);
d62a17ae 1665
02e701e4
DS
1666 /*
1667 * if one of our file descriptors is garbage, remove the same
1668 * from both pfds + update sizes and index
1669 */
d62a17ae 1670 if (pfds[i].revents & POLLNVAL) {
1671 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1672 (m->handler.pfdcount - i - 1)
1673 * sizeof(struct pollfd));
1674 m->handler.pfdcount--;
e985cda0
S
1675 m->handler.pfds[m->handler.pfdcount].fd = 0;
1676 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1677
1678 memmove(pfds + i, pfds + i + 1,
1679 (m->handler.copycount - i - 1)
1680 * sizeof(struct pollfd));
1681 m->handler.copycount--;
e985cda0
S
1682 m->handler.copy[m->handler.copycount].fd = 0;
1683 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1684
1685 i--;
1686 }
1687 }
718e3744 1688}
1689
8b70d0b0 1690/* Add all timers that have popped to the ready list. */
cd9d0537 1691static unsigned int thread_process_timers(struct event_loop *m,
d62a17ae 1692 struct timeval *timenow)
a48b4e6d 1693{
ab01a001
DS
1694 struct timeval prev = *timenow;
1695 bool displayed = false;
e6685141 1696 struct event *thread;
d62a17ae 1697 unsigned int ready = 0;
1698
3905fb73 1699 while ((thread = event_timer_list_first(&m->timer))) {
d62a17ae 1700 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1701 break;
ab01a001
DS
1702 prev = thread->u.sands;
1703 prev.tv_sec += 4;
1704 /*
1705 * If the timer would have popped 4 seconds in the
1706 * past then we are in a situation where we are
1707 * really getting behind on handling of events.
1708 * Let's log it and do the right thing with it.
1709 */
1dd08c22
DS
1710 if (timercmp(timenow, &prev, >)) {
1711 atomic_fetch_add_explicit(
1712 &thread->hist->total_starv_warn, 1,
1713 memory_order_seq_cst);
1714 if (!displayed && !thread->ignore_timer_late) {
1715 flog_warn(
1716 EC_LIB_STARVE_THREAD,
1717 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1718 thread);
1719 displayed = true;
1720 }
ab01a001
DS
1721 }
1722
3905fb73 1723 event_timer_list_pop(&m->timer);
2ccccdf5 1724 thread->type = EVENT_READY;
3905fb73 1725 event_list_add_tail(&m->ready, thread);
d62a17ae 1726 ready++;
1727 }
e7d9e44b 1728
d62a17ae 1729 return ready;
a48b4e6d 1730}
1731
2613abe6 1732/* process a list en masse, e.g. for event thread lists */
3905fb73 1733static unsigned int thread_process(struct event_list_head *list)
2613abe6 1734{
e6685141 1735 struct event *thread;
d62a17ae 1736 unsigned int ready = 0;
1737
3905fb73 1738 while ((thread = event_list_pop(list))) {
2ccccdf5 1739 thread->type = EVENT_READY;
3905fb73 1740 event_list_add_tail(&thread->master->ready, thread);
d62a17ae 1741 ready++;
1742 }
1743 return ready;
2613abe6
PJ
1744}
1745
1746
718e3744 1747/* Fetch next ready thread. */
cd9d0537 1748struct event *event_fetch(struct event_loop *m, struct event *fetch)
718e3744 1749{
e6685141 1750 struct event *thread = NULL;
d62a17ae 1751 struct timeval now;
1752 struct timeval zerotime = {0, 0};
1753 struct timeval tv;
1754 struct timeval *tw = NULL;
d81ca9a3 1755 bool eintr_p = false;
d62a17ae 1756 int num = 0;
1757
1758 do {
1759 /* Handle signals if any */
1760 if (m->handle_signals)
7cc91e67 1761 frr_sigevent_process();
d62a17ae 1762
1763 pthread_mutex_lock(&m->mtx);
1764
1765 /* Process any pending cancellation requests */
332beb64 1766 do_event_cancel(m);
d62a17ae 1767
e3c9529e
QY
1768 /*
1769 * Attempt to flush ready queue before going into poll().
1770 * This is performance-critical. Think twice before modifying.
1771 */
3905fb73 1772 if ((thread = event_list_pop(&m->ready))) {
e3c9529e
QY
1773 fetch = thread_run(m, thread, fetch);
1774 if (fetch->ref)
1775 *fetch->ref = NULL;
1776 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1777 if (!m->ready_run_loop)
1778 GETRUSAGE(&m->last_getrusage);
1779 m->ready_run_loop = true;
e3c9529e
QY
1780 break;
1781 }
1782
5e822957 1783 m->ready_run_loop = false;
e3c9529e
QY
1784 /* otherwise, tick through scheduling sequence */
1785
bca37d17
QY
1786 /*
1787 * Post events to ready queue. This must come before the
1788 * following block since events should occur immediately
1789 */
d62a17ae 1790 thread_process(&m->event);
1791
bca37d17
QY
1792 /*
1793 * If there are no tasks on the ready queue, we will poll()
1794 * until a timer expires or we receive I/O, whichever comes
1795 * first. The strategy for doing this is:
d62a17ae 1796 *
1797 * - If there are events pending, set the poll() timeout to zero
1798 * - If there are no events pending, but there are timers
d279ef57
DS
1799 * pending, set the timeout to the smallest remaining time on
1800 * any timer.
d62a17ae 1801 * - If there are neither timers nor events pending, but there
d279ef57 1802 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1803 * - If nothing is pending, it's time for the application to die
1804 *
1805 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1806 * once per loop to avoid starvation by events
1807 */
3905fb73 1808 if (!event_list_count(&m->ready))
27d29ced 1809 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1810
3905fb73
DS
1811 if (event_list_count(&m->ready) ||
1812 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1813 tw = &zerotime;
1814
1815 if (!tw && m->handler.pfdcount == 0) { /* die */
1816 pthread_mutex_unlock(&m->mtx);
1817 fetch = NULL;
1818 break;
1819 }
1820
bca37d17
QY
1821 /*
1822 * Copy pollfd array + # active pollfds in it. Not necessary to
1823 * copy the array size as this is fixed.
1824 */
d62a17ae 1825 m->handler.copycount = m->handler.pfdcount;
1826 memcpy(m->handler.copy, m->handler.pfds,
1827 m->handler.copycount * sizeof(struct pollfd));
1828
e3c9529e
QY
1829 pthread_mutex_unlock(&m->mtx);
1830 {
d81ca9a3
MS
1831 eintr_p = false;
1832 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1833 }
1834 pthread_mutex_lock(&m->mtx);
d764d2cc 1835
e3c9529e
QY
1836 /* Handle any errors received in poll() */
1837 if (num < 0) {
d81ca9a3 1838 if (eintr_p) {
d62a17ae 1839 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1840 /* loop around to signal handler */
1841 continue;
d62a17ae 1842 }
1843
e3c9529e 1844 /* else die */
450971aa 1845 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1846 safe_strerror(errno));
e3c9529e
QY
1847 pthread_mutex_unlock(&m->mtx);
1848 fetch = NULL;
1849 break;
bca37d17 1850 }
d62a17ae 1851
1852 /* Post timers to ready queue. */
1853 monotime(&now);
e7d9e44b 1854 thread_process_timers(m, &now);
d62a17ae 1855
1856 /* Post I/O to ready queue. */
1857 if (num > 0)
1858 thread_process_io(m, num);
1859
d62a17ae 1860 pthread_mutex_unlock(&m->mtx);
1861
1862 } while (!thread && m->spin);
1863
1864 return fetch;
718e3744 1865}
1866
d62a17ae 1867static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1868{
d62a17ae 1869 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1870 + (a.tv_usec - b.tv_usec));
62f44022
QY
1871}
1872
5f6eaa9b
DS
1873unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1874 unsigned long *cputime)
718e3744 1875{
6418e2d3 1876#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1877
1878#ifdef __FreeBSD__
1879 /*
1880 * FreeBSD appears to have an issue when calling clock_gettime
1881 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1882 * occassionally the now time will be before the start time.
1883 * This is not good and FRR is ending up with CPU HOG's
1884 * when the subtraction wraps to very large numbers
1885 *
1886 * What we are going to do here is cheat a little bit
1887 * and notice that this is a problem and just correct
1888 * it so that it is impossible to happen
1889 */
1890 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1891 start->cpu.tv_nsec > now->cpu.tv_nsec)
1892 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1893 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1894 now->cpu.tv_sec = start->cpu.tv_sec;
1895 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1896 }
1897#endif
6418e2d3
DL
1898 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1899 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1900#else
d62a17ae 1901 /* This is 'user + sys' time. */
1902 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1903 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1904#endif
d62a17ae 1905 return timeval_elapsed(now->real, start->real);
8b70d0b0 1906}
1907
02e701e4
DS
1908/*
1909 * We should aim to yield after yield milliseconds, which defaults
1910 * to EVENT_YIELD_TIME_SLOT .
1911 * Note: we are using real (wall clock) time for this calculation.
1912 * It could be argued that CPU time may make more sense in certain
1913 * contexts. The things to consider are whether the thread may have
1914 * blocked (in which case wall time increases, but CPU time does not),
1915 * or whether the system is heavily loaded with other processes competing
1916 * for CPU time. On balance, wall clock time seems to make sense.
1917 * Plus it has the added benefit that gettimeofday should be faster
1918 * than calling getrusage.
1919 */
70c35c11 1920int event_should_yield(struct event *thread)
718e3744 1921{
d62a17ae 1922 int result;
02e701e4 1923
cb1991af 1924 frr_with_mutex (&thread->mtx) {
d62a17ae 1925 result = monotime_since(&thread->real, NULL)
1926 > (int64_t)thread->yield;
1927 }
d62a17ae 1928 return result;
50596be0
DS
1929}
1930
70c35c11 1931void event_set_yield_time(struct event *thread, unsigned long yield_time)
50596be0 1932{
cb1991af 1933 frr_with_mutex (&thread->mtx) {
d62a17ae 1934 thread->yield = yield_time;
1935 }
718e3744 1936}
1937
5f6eaa9b 1938void event_getrusage(RUSAGE_T *r)
db9c0df9 1939{
6418e2d3
DL
1940 monotime(&r->real);
1941 if (!cputime_enabled) {
1942 memset(&r->cpu, 0, sizeof(r->cpu));
1943 return;
1944 }
1945
1946#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1947 /* not currently implemented in Linux's vDSO, but maybe at some point
1948 * in the future?
1949 */
1950 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1951#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1952#if defined RUSAGE_THREAD
1953#define FRR_RUSAGE RUSAGE_THREAD
1954#else
1955#define FRR_RUSAGE RUSAGE_SELF
1956#endif
6418e2d3
DL
1957 getrusage(FRR_RUSAGE, &(r->cpu));
1958#endif
db9c0df9
PJ
1959}
1960
fbcac826
QY
1961/*
1962 * Call a thread.
1963 *
1964 * This function will atomically update the thread's usage history. At present
1965 * this is the only spot where usage history is written. Nevertheless the code
1966 * has been written such that the introduction of writers in the future should
1967 * not need to update it provided the writers atomically perform only the
1968 * operations done here, i.e. updating the total and maximum times. In
1969 * particular, the maximum real and cpu times must be monotonically increasing
1970 * or this code is not correct.
1971 */
de2754be 1972void event_call(struct event *thread)
718e3744 1973{
d62a17ae 1974 RUSAGE_T before, after;
cc8b13a0 1975
45f01188
DL
1976 /* if the thread being called is the CLI, it may change cputime_enabled
1977 * ("service cputime-stats" command), which can result in nonsensical
1978 * and very confusing warnings
1979 */
1980 bool cputime_enabled_here = cputime_enabled;
1981
5e822957
DS
1982 if (thread->master->ready_run_loop)
1983 before = thread->master->last_getrusage;
1984 else
1985 GETRUSAGE(&before);
1986
d62a17ae 1987 thread->real = before.real;
718e3744 1988
de2754be 1989 frrtrace(9, frr_libfrr, event_call, thread->master,
6c3aa850 1990 thread->xref->funcname, thread->xref->xref.file,
de2754be
DS
1991 thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1992 thread->arg, thread->u.sands.tv_sec);
abf96a87 1993
d62a17ae 1994 pthread_setspecific(thread_current, thread);
1995 (*thread->func)(thread);
1996 pthread_setspecific(thread_current, NULL);
718e3744 1997
d62a17ae 1998 GETRUSAGE(&after);
5e822957 1999 thread->master->last_getrusage = after;
718e3744 2000
45f01188
DL
2001 unsigned long walltime, cputime;
2002 unsigned long exp;
fbcac826 2003
5f6eaa9b 2004 walltime = event_consumed_time(&after, &before, &cputime);
45f01188
DL
2005
2006 /* update walltime */
2007 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2008 memory_order_seq_cst);
2009 exp = atomic_load_explicit(&thread->hist->real.max,
2010 memory_order_seq_cst);
45f01188 2011 while (exp < walltime
fbcac826 2012 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2013 &thread->hist->real.max, &exp, walltime,
2014 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2015 ;
2016
45f01188
DL
2017 if (cputime_enabled_here && cputime_enabled) {
2018 /* update cputime */
2019 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2020 memory_order_seq_cst);
2021 exp = atomic_load_explicit(&thread->hist->cpu.max,
2022 memory_order_seq_cst);
2023 while (exp < cputime
2024 && !atomic_compare_exchange_weak_explicit(
2025 &thread->hist->cpu.max, &exp, cputime,
2026 memory_order_seq_cst, memory_order_seq_cst))
2027 ;
2028 }
fbcac826
QY
2029
2030 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2031 memory_order_seq_cst);
2032 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2033 memory_order_seq_cst);
718e3744 2034
45f01188
DL
2035 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2036 && cputime > cputime_threshold) {
d62a17ae 2037 /*
45f01188
DL
2038 * We have a CPU Hog on our hands. The time FRR has spent
2039 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2040 * Whinge about it now, so we're aware this is yet another task
2041 * to fix.
2042 */
9b8e01ca
DS
2043 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2044 1, memory_order_seq_cst);
9ef9495e 2045 flog_warn(
039d547f
DS
2046 EC_LIB_SLOW_THREAD_CPU,
2047 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2048 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2049 walltime / 1000, cputime / 1000);
2050
2051 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2052 /*
45f01188
DL
2053 * The runtime for a task is greater than 5 seconds, but the
2054 * cpu time is under 5 seconds. Let's whine about this because
2055 * this could imply some sort of scheduling issue.
039d547f 2056 */
9b8e01ca
DS
2057 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2058 1, memory_order_seq_cst);
039d547f
DS
2059 flog_warn(
2060 EC_LIB_SLOW_THREAD_WALL,
2061 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2062 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2063 walltime / 1000, cputime / 1000);
d62a17ae 2064 }
718e3744 2065}
2066
2067/* Execute thread */
cd9d0537 2068void _event_execute(const struct xref_eventsched *xref, struct event_loop *m,
2453d15d 2069 void (*func)(struct event *), void *arg, int val)
718e3744 2070{
e6685141 2071 struct event *thread;
718e3744 2072
c4345fbf 2073 /* Get or allocate new thread to execute. */
cb1991af 2074 frr_with_mutex (&m->mtx) {
2ccccdf5 2075 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
9c7753e4 2076
c4345fbf 2077 /* Set its event value. */
cb1991af 2078 frr_with_mutex (&thread->mtx) {
2ccccdf5 2079 thread->add_type = EVENT_EXECUTE;
c4345fbf
RZ
2080 thread->u.val = val;
2081 thread->ref = &thread;
2082 }
c4345fbf 2083 }
f7c62e11 2084
c4345fbf 2085 /* Execute thread doing all accounting. */
de2754be 2086 event_call(thread);
9c7753e4 2087
c4345fbf
RZ
2088 /* Give back or free thread. */
2089 thread_add_unuse(m, thread);
718e3744 2090}
1543c387
MS
2091
2092/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2093void debug_signals(const sigset_t *sigs)
2094{
2095 int i, found;
2096 sigset_t tmpsigs;
2097 char buf[300];
2098
2099 /*
2100 * We're only looking at the non-realtime signals here, so we need
2101 * some limit value. Platform differences mean at some point we just
2102 * need to pick a reasonable value.
2103 */
2104#if defined SIGRTMIN
2105# define LAST_SIGNAL SIGRTMIN
2106#else
2107# define LAST_SIGNAL 32
2108#endif
2109
2110
2111 if (sigs == NULL) {
2112 sigemptyset(&tmpsigs);
2113 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2114 sigs = &tmpsigs;
2115 }
2116
2117 found = 0;
2118 buf[0] = '\0';
2119
2120 for (i = 0; i < LAST_SIGNAL; i++) {
2121 char tmp[20];
2122
2123 if (sigismember(sigs, i) > 0) {
2124 if (found > 0)
2125 strlcat(buf, ",", sizeof(buf));
2126 snprintf(tmp, sizeof(tmp), "%d", i);
2127 strlcat(buf, tmp, sizeof(buf));
2128 found++;
2129 }
2130 }
2131
2132 if (found == 0)
2133 snprintf(buf, sizeof(buf), "<none>");
2134
2135 zlog_debug("%s: %s", __func__, buf);
2136}
a505383d 2137
f59e6882 2138static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
e6685141 2139 const struct event *thread)
f59e6882 2140{
2ccccdf5
DS
2141 static const char *const types[] = {
2142 [EVENT_READ] = "read", [EVENT_WRITE] = "write",
2143 [EVENT_TIMER] = "timer", [EVENT_EVENT] = "event",
2144 [EVENT_READY] = "ready", [EVENT_UNUSED] = "unused",
2145 [EVENT_EXECUTE] = "exec",
f59e6882
DL
2146 };
2147 ssize_t rv = 0;
2148 char info[16] = "";
2149
2150 if (!thread)
2151 return bputs(buf, "{(thread *)NULL}");
2152
2153 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2154
2155 if (thread->type < array_size(types) && types[thread->type])
2156 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2157 else
2158 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2159
2160 switch (thread->type) {
2ccccdf5
DS
2161 case EVENT_READ:
2162 case EVENT_WRITE:
f59e6882
DL
2163 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2164 break;
2165
2ccccdf5 2166 case EVENT_TIMER:
f59e6882
DL
2167 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2168 break;
2ccccdf5
DS
2169 case EVENT_READY:
2170 case EVENT_EVENT:
2171 case EVENT_UNUSED:
2172 case EVENT_EXECUTE:
2173 break;
f59e6882
DL
2174 }
2175
2176 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2177 thread->xref->funcname, thread->xref->dest,
2178 thread->xref->xref.file, thread->xref->xref.line);
2179 return rv;
2180}
2181
54929fd3 2182printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2183static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2184 const void *ptr)
2185{
e6685141 2186 const struct event *thread = ptr;
f59e6882
DL
2187 struct timespec remain = {};
2188
2189 if (ea->fmt[0] == 'D') {
2190 ea->fmt++;
2191 return printfrr_thread_dbg(buf, ea, thread);
2192 }
2193
2194 if (!thread) {
2195 /* need to jump over time formatting flag characters in the
2196 * input format string, i.e. adjust ea->fmt!
2197 */
2198 printfrr_time(buf, ea, &remain,
2199 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2200 return bputch(buf, '-');
2201 }
2202
2203 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2204 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2205}