]> git.proxmox.com Git - mirror_frr.git/blame - lib/event.c
*: Convert thread_cancelXXX to event_cancelXXX
[mirror_frr.git] / lib / event.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* Thread management routine
3 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
718e3744 4 */
5
6/* #define DEBUG */
7
8#include <zebra.h>
308d14ae 9#include <sys/resource.h>
718e3744 10
cb37cb33 11#include "event.h"
718e3744 12#include "memory.h"
3e41733f 13#include "frrcu.h"
718e3744 14#include "log.h"
e04ab74d 15#include "hash.h"
16#include "command.h"
05c447dd 17#include "sigevent.h"
3bf2673b 18#include "network.h"
bd74dc61 19#include "jhash.h"
fbcac826 20#include "frratomic.h"
00dffa8c 21#include "frr_pthread.h"
9ef9495e 22#include "lib_errors.h"
912d45a1 23#include "libfrr_trace.h"
1a9f340b 24#include "libfrr.h"
d6be5fb9 25
bf8d3d6a
DL
26DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
27DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
28DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
29DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 30
e6685141 31DECLARE_LIST(thread_list, struct event, threaditem);
c284542b 32
aea25d1e
MS
33struct cancel_req {
34 int flags;
e6685141 35 struct event *thread;
aea25d1e 36 void *eventobj;
e6685141 37 struct event **threadref;
aea25d1e
MS
38};
39
40/* Flags for task cancellation */
332beb64 41#define EVENT_CANCEL_FLAG_READY 0x01
aea25d1e 42
e6685141 43static int thread_timer_cmp(const struct event *a, const struct event *b)
27d29ced
DL
44{
45 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 return -1;
47 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 return 1;
49 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 return -1;
51 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 return 1;
53 return 0;
54}
55
e6685141 56DECLARE_HEAP(thread_timer_list, struct event, timeritem, thread_timer_cmp);
27d29ced 57
3b96b781
HT
58#if defined(__APPLE__)
59#include <mach/mach.h>
60#include <mach/mach_time.h>
61#endif
62
d62a17ae 63#define AWAKEN(m) \
64 do { \
2b64873d 65 const unsigned char wakebyte = 0x01; \
d62a17ae 66 write(m->io_pipe[1], &wakebyte, 1); \
67 } while (0);
3bf2673b 68
62f44022 69/* control variable for initializer */
c17faa4b 70static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 71pthread_key_t thread_current;
6b0655a2 72
c17faa4b 73static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
74static struct list *masters;
75
e6685141 76static void thread_free(struct thread_master *master, struct event *thread);
6b0655a2 77
45f01188
DL
78#ifndef EXCLUDE_CPU_TIME
79#define EXCLUDE_CPU_TIME 0
80#endif
81#ifndef CONSUMED_TIME_CHECK
82#define CONSUMED_TIME_CHECK 0
83#endif
84
85bool cputime_enabled = !EXCLUDE_CPU_TIME;
86unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
62f44022 89/* CLI start ---------------------------------------------------------------- */
cb37cb33 90#include "lib/event_clippy.c"
45f01188 91
d8b87afe 92static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 93{
883cc51d 94 int size = sizeof(a->func);
bd74dc61
DS
95
96 return jhash(&a->func, size, 0);
e04ab74d 97}
98
74df8d6d 99static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 100 const struct cpu_thread_history *b)
e04ab74d 101{
d62a17ae 102 return a->func == b->func;
e04ab74d 103}
104
d62a17ae 105static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 106{
d62a17ae 107 struct cpu_thread_history *new;
108 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
109 new->func = a->func;
110 new->funcname = a->funcname;
111 return new;
e04ab74d 112}
113
d62a17ae 114static void cpu_record_hash_free(void *a)
228da428 115{
d62a17ae 116 struct cpu_thread_history *hist = a;
117
118 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
119}
120
d62a17ae 121static void vty_out_cpu_thread_history(struct vty *vty,
122 struct cpu_thread_history *a)
e04ab74d 123{
1dd08c22
DS
124 vty_out(vty,
125 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 126 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
127 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
128 (a->real.total / a->total_calls), a->real.max,
1dd08c22 129 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 130 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 131 a->types & (1 << THREAD_READ) ? 'R' : ' ',
132 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
133 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
134 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
135 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 136}
137
e3b78da8 138static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 139{
d62a17ae 140 struct cpu_thread_history *totals = args[0];
fbcac826 141 struct cpu_thread_history copy;
d62a17ae 142 struct vty *vty = args[1];
fbcac826 143 uint8_t *filter = args[2];
d62a17ae 144
145 struct cpu_thread_history *a = bucket->data;
146
fbcac826
QY
147 copy.total_active =
148 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
149 copy.total_calls =
150 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
151 copy.total_cpu_warn =
152 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
153 copy.total_wall_warn =
154 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
155 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
156 memory_order_seq_cst);
fbcac826
QY
157 copy.cpu.total =
158 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
159 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
160 copy.real.total =
161 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
162 copy.real.max =
163 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
164 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
165 copy.funcname = a->funcname;
166
167 if (!(copy.types & *filter))
d62a17ae 168 return;
fbcac826
QY
169
170 vty_out_cpu_thread_history(vty, &copy);
171 totals->total_active += copy.total_active;
172 totals->total_calls += copy.total_calls;
9b8e01ca
DS
173 totals->total_cpu_warn += copy.total_cpu_warn;
174 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 175 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
176 totals->real.total += copy.real.total;
177 if (totals->real.max < copy.real.max)
178 totals->real.max = copy.real.max;
179 totals->cpu.total += copy.cpu.total;
180 if (totals->cpu.max < copy.cpu.max)
181 totals->cpu.max = copy.cpu.max;
e04ab74d 182}
183
fbcac826 184static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 185{
d62a17ae 186 struct cpu_thread_history tmp;
187 void *args[3] = {&tmp, vty, &filter};
188 struct thread_master *m;
189 struct listnode *ln;
190
45f01188
DL
191 if (!cputime_enabled)
192 vty_out(vty,
193 "\n"
194 "Collecting CPU time statistics is currently disabled. Following statistics\n"
195 "will be zero or may display data from when collection was enabled. Use the\n"
196 " \"service cputime-stats\" command to start collecting data.\n"
197 "\nCounters and wallclock times are always maintained and should be accurate.\n");
198
0d6f7fd6 199 memset(&tmp, 0, sizeof(tmp));
d62a17ae 200 tmp.funcname = "TOTAL";
201 tmp.types = filter;
202
cb1991af 203 frr_with_mutex (&masters_mtx) {
d62a17ae 204 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
205 const char *name = m->name ? m->name : "main";
206
207 char underline[strlen(name) + 1];
208 memset(underline, '-', sizeof(underline));
4f113d60 209 underline[sizeof(underline) - 1] = '\0';
d62a17ae 210
211 vty_out(vty, "\n");
212 vty_out(vty, "Showing statistics for pthread %s\n",
213 name);
214 vty_out(vty, "-------------------------------%s\n",
215 underline);
84d951d0 216 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 217 "CPU (user+system):", "Real (wall-clock):");
218 vty_out(vty,
219 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
220 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
221 vty_out(vty,
222 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 223
224 if (m->cpu_record->count)
225 hash_iterate(
226 m->cpu_record,
e3b78da8 227 (void (*)(struct hash_bucket *,
d62a17ae 228 void *))cpu_record_hash_print,
229 args);
230 else
231 vty_out(vty, "No data to display yet.\n");
232
233 vty_out(vty, "\n");
234 }
235 }
d62a17ae 236
237 vty_out(vty, "\n");
238 vty_out(vty, "Total thread statistics\n");
239 vty_out(vty, "-------------------------\n");
84d951d0 240 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 241 "CPU (user+system):", "Real (wall-clock):");
242 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 243 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 244 vty_out(vty, " Type Thread\n");
245
246 if (tmp.total_calls > 0)
247 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 248}
249
e3b78da8 250static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 251{
fbcac826 252 uint8_t *filter = args[0];
d62a17ae 253 struct hash *cpu_record = args[1];
254
255 struct cpu_thread_history *a = bucket->data;
62f44022 256
d62a17ae 257 if (!(a->types & *filter))
258 return;
f48f65d2 259
d62a17ae 260 hash_release(cpu_record, bucket->data);
e276eb82
PJ
261}
262
fbcac826 263static void cpu_record_clear(uint8_t filter)
e276eb82 264{
fbcac826 265 uint8_t *tmp = &filter;
d62a17ae 266 struct thread_master *m;
267 struct listnode *ln;
268
cb1991af 269 frr_with_mutex (&masters_mtx) {
d62a17ae 270 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 271 frr_with_mutex (&m->mtx) {
d62a17ae 272 void *args[2] = {tmp, m->cpu_record};
273 hash_iterate(
274 m->cpu_record,
e3b78da8 275 (void (*)(struct hash_bucket *,
d62a17ae 276 void *))cpu_record_hash_clear,
277 args);
278 }
d62a17ae 279 }
280 }
62f44022
QY
281}
282
fbcac826 283static uint8_t parse_filter(const char *filterstr)
62f44022 284{
d62a17ae 285 int i = 0;
286 int filter = 0;
287
288 while (filterstr[i] != '\0') {
289 switch (filterstr[i]) {
290 case 'r':
291 case 'R':
292 filter |= (1 << THREAD_READ);
293 break;
294 case 'w':
295 case 'W':
296 filter |= (1 << THREAD_WRITE);
297 break;
298 case 't':
299 case 'T':
300 filter |= (1 << THREAD_TIMER);
301 break;
302 case 'e':
303 case 'E':
304 filter |= (1 << THREAD_EVENT);
305 break;
306 case 'x':
307 case 'X':
308 filter |= (1 << THREAD_EXECUTE);
309 break;
310 default:
311 break;
312 }
313 ++i;
314 }
315 return filter;
62f44022
QY
316}
317
ee4dcee8
DL
318DEFUN_NOSH (show_thread_cpu,
319 show_thread_cpu_cmd,
320 "show thread cpu [FILTER]",
321 SHOW_STR
322 "Thread information\n"
323 "Thread CPU usage\n"
324 "Display filter (rwtex)\n")
62f44022 325{
fbcac826 326 uint8_t filter = (uint8_t)-1U;
d62a17ae 327 int idx = 0;
328
329 if (argv_find(argv, argc, "FILTER", &idx)) {
330 filter = parse_filter(argv[idx]->arg);
331 if (!filter) {
332 vty_out(vty,
3efd0893 333 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 334 argv[idx]->arg);
335 return CMD_WARNING;
336 }
337 }
338
339 cpu_record_print(vty, filter);
340 return CMD_SUCCESS;
e276eb82 341}
45f01188
DL
342
343DEFPY (service_cputime_stats,
344 service_cputime_stats_cmd,
345 "[no] service cputime-stats",
346 NO_STR
347 "Set up miscellaneous service\n"
348 "Collect CPU usage statistics\n")
349{
350 cputime_enabled = !no;
351 return CMD_SUCCESS;
352}
353
354DEFPY (service_cputime_warning,
355 service_cputime_warning_cmd,
356 "[no] service cputime-warning (1-4294967295)",
357 NO_STR
358 "Set up miscellaneous service\n"
359 "Warn for tasks exceeding CPU usage threshold\n"
360 "Warning threshold in milliseconds\n")
361{
362 if (no)
363 cputime_threshold = 0;
364 else
365 cputime_threshold = cputime_warning * 1000;
366 return CMD_SUCCESS;
367}
368
369ALIAS (service_cputime_warning,
370 no_service_cputime_warning_cmd,
371 "no service cputime-warning",
372 NO_STR
373 "Set up miscellaneous service\n"
374 "Warn for tasks exceeding CPU usage threshold\n")
375
376DEFPY (service_walltime_warning,
377 service_walltime_warning_cmd,
378 "[no] service walltime-warning (1-4294967295)",
379 NO_STR
380 "Set up miscellaneous service\n"
381 "Warn for tasks exceeding total wallclock threshold\n"
382 "Warning threshold in milliseconds\n")
383{
384 if (no)
385 walltime_threshold = 0;
386 else
387 walltime_threshold = walltime_warning * 1000;
388 return CMD_SUCCESS;
389}
390
391ALIAS (service_walltime_warning,
392 no_service_walltime_warning_cmd,
393 "no service walltime-warning",
394 NO_STR
395 "Set up miscellaneous service\n"
396 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 397
8872626b
DS
398static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
399{
400 const char *name = m->name ? m->name : "main";
401 char underline[strlen(name) + 1];
e6685141 402 struct event *thread;
8872626b
DS
403 uint32_t i;
404
405 memset(underline, '-', sizeof(underline));
406 underline[sizeof(underline) - 1] = '\0';
407
408 vty_out(vty, "\nShowing poll FD's for %s\n", name);
409 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
410 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
411 m->fd_limit);
a0b36ae6
DS
412 for (i = 0; i < m->handler.pfdcount; i++) {
413 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
414 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 415 m->handler.pfds[i].revents);
a0b36ae6
DS
416
417 if (m->handler.pfds[i].events & POLLIN) {
418 thread = m->read[m->handler.pfds[i].fd];
419
420 if (!thread)
421 vty_out(vty, "ERROR ");
422 else
60a3efec 423 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
424 } else
425 vty_out(vty, " ");
426
427 if (m->handler.pfds[i].events & POLLOUT) {
428 thread = m->write[m->handler.pfds[i].fd];
429
430 if (!thread)
431 vty_out(vty, "ERROR\n");
432 else
60a3efec 433 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
434 } else
435 vty_out(vty, "\n");
436 }
8872626b
DS
437}
438
ee4dcee8
DL
439DEFUN_NOSH (show_thread_poll,
440 show_thread_poll_cmd,
441 "show thread poll",
442 SHOW_STR
443 "Thread information\n"
444 "Show poll FD's and information\n")
8872626b
DS
445{
446 struct listnode *node;
447 struct thread_master *m;
448
cb1991af 449 frr_with_mutex (&masters_mtx) {
8872626b
DS
450 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
451 show_thread_poll_helper(vty, m);
452 }
453 }
8872626b
DS
454
455 return CMD_SUCCESS;
456}
457
458
49d41a26
DS
459DEFUN (clear_thread_cpu,
460 clear_thread_cpu_cmd,
461 "clear thread cpu [FILTER]",
62f44022 462 "Clear stored data in all pthreads\n"
49d41a26
DS
463 "Thread information\n"
464 "Thread CPU usage\n"
465 "Display filter (rwtexb)\n")
e276eb82 466{
fbcac826 467 uint8_t filter = (uint8_t)-1U;
d62a17ae 468 int idx = 0;
469
470 if (argv_find(argv, argc, "FILTER", &idx)) {
471 filter = parse_filter(argv[idx]->arg);
472 if (!filter) {
473 vty_out(vty,
3efd0893 474 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 475 argv[idx]->arg);
476 return CMD_WARNING;
477 }
478 }
479
480 cpu_record_clear(filter);
481 return CMD_SUCCESS;
e276eb82 482}
6b0655a2 483
22f31b8c
DS
484static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
485{
486 const char *name = m->name ? m->name : "main";
487 char underline[strlen(name) + 1];
e6685141 488 struct event *thread;
22f31b8c
DS
489
490 memset(underline, '-', sizeof(underline));
491 underline[sizeof(underline) - 1] = '\0';
492
493 vty_out(vty, "\nShowing timers for %s\n", name);
494 vty_out(vty, "-------------------%s\n", underline);
495
496 frr_each (thread_timer_list, &m->timer, thread) {
497 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
498 }
499}
500
501DEFPY_NOSH (show_thread_timers,
502 show_thread_timers_cmd,
503 "show thread timers",
504 SHOW_STR
505 "Thread information\n"
506 "Show all timers and how long they have in the system\n")
507{
508 struct listnode *node;
509 struct thread_master *m;
510
511 frr_with_mutex (&masters_mtx) {
512 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
513 show_thread_timers_helper(vty, m);
514 }
515
516 return CMD_SUCCESS;
517}
518
d62a17ae 519void thread_cmd_init(void)
0b84f294 520{
d62a17ae 521 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 522 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 523 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
524
525 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
526 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
527 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
528 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
529 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
530
531 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 532}
62f44022
QY
533/* CLI end ------------------------------------------------------------------ */
534
0b84f294 535
d62a17ae 536static void cancelreq_del(void *cr)
63ccb9cb 537{
d62a17ae 538 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
539}
540
e0bebc7c 541/* initializer, only ever called once */
4d762f26 542static void initializer(void)
e0bebc7c 543{
d62a17ae 544 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
545}
546
d62a17ae 547struct thread_master *thread_master_create(const char *name)
718e3744 548{
d62a17ae 549 struct thread_master *rv;
550 struct rlimit limit;
551
552 pthread_once(&init_once, &initializer);
553
554 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 555
556 /* Initialize master mutex */
557 pthread_mutex_init(&rv->mtx, NULL);
558 pthread_cond_init(&rv->cancel_cond, NULL);
559
560 /* Set name */
7ffcd8bd
QY
561 name = name ? name : "default";
562 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 563
564 /* Initialize I/O task data structures */
1a9f340b
MS
565
566 /* Use configured limit if present, ulimit otherwise. */
567 rv->fd_limit = frr_get_fd_limit();
568 if (rv->fd_limit == 0) {
569 getrlimit(RLIMIT_NOFILE, &limit);
570 rv->fd_limit = (int)limit.rlim_cur;
571 }
572
a6f235f3 573 rv->read = XCALLOC(MTYPE_THREAD_POLL,
e6685141 574 sizeof(struct event *) * rv->fd_limit);
a6f235f3
DS
575
576 rv->write = XCALLOC(MTYPE_THREAD_POLL,
e6685141 577 sizeof(struct event *) * rv->fd_limit);
d62a17ae 578
7ffcd8bd
QY
579 char tmhashname[strlen(name) + 32];
580 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
581 name);
bd74dc61 582 rv->cpu_record = hash_create_size(
d8b87afe 583 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 584 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 585 tmhashname);
d62a17ae 586
c284542b
DL
587 thread_list_init(&rv->event);
588 thread_list_init(&rv->ready);
589 thread_list_init(&rv->unuse);
27d29ced 590 thread_timer_list_init(&rv->timer);
d62a17ae 591
592 /* Initialize thread_fetch() settings */
593 rv->spin = true;
594 rv->handle_signals = true;
595
596 /* Set pthread owner, should be updated by actual owner */
597 rv->owner = pthread_self();
598 rv->cancel_req = list_new();
599 rv->cancel_req->del = cancelreq_del;
600 rv->canceled = true;
601
602 /* Initialize pipe poker */
603 pipe(rv->io_pipe);
604 set_nonblocking(rv->io_pipe[0]);
605 set_nonblocking(rv->io_pipe[1]);
606
607 /* Initialize data structures for poll() */
608 rv->handler.pfdsize = rv->fd_limit;
609 rv->handler.pfdcount = 0;
610 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
611 sizeof(struct pollfd) * rv->handler.pfdsize);
612 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
613 sizeof(struct pollfd) * rv->handler.pfdsize);
614
eff09c66 615 /* add to list of threadmasters */
cb1991af 616 frr_with_mutex (&masters_mtx) {
eff09c66
QY
617 if (!masters)
618 masters = list_new();
619
d62a17ae 620 listnode_add(masters, rv);
621 }
d62a17ae 622
623 return rv;
718e3744 624}
625
d8a8a8de
QY
626void thread_master_set_name(struct thread_master *master, const char *name)
627{
cb1991af 628 frr_with_mutex (&master->mtx) {
0a22ddfb 629 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
630 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
631 }
d8a8a8de
QY
632}
633
6ed04aa2
DS
634#define THREAD_UNUSED_DEPTH 10
635
718e3744 636/* Move thread to unuse list. */
e6685141 637static void thread_add_unuse(struct thread_master *m, struct event *thread)
718e3744 638{
6655966d
RZ
639 pthread_mutex_t mtxc = thread->mtx;
640
d62a17ae 641 assert(m != NULL && thread != NULL);
d62a17ae 642
d62a17ae 643 thread->hist->total_active--;
e6685141 644 memset(thread, 0, sizeof(struct event));
6ed04aa2
DS
645 thread->type = THREAD_UNUSED;
646
6655966d
RZ
647 /* Restore the thread mutex context. */
648 thread->mtx = mtxc;
649
c284542b
DL
650 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
651 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
652 return;
653 }
654
655 thread_free(m, thread);
718e3744 656}
657
658/* Free all unused thread. */
c284542b
DL
659static void thread_list_free(struct thread_master *m,
660 struct thread_list_head *list)
718e3744 661{
e6685141 662 struct event *t;
d62a17ae 663
c284542b 664 while ((t = thread_list_pop(list)))
6655966d 665 thread_free(m, t);
718e3744 666}
667
d62a17ae 668static void thread_array_free(struct thread_master *m,
e6685141 669 struct event **thread_array)
308d14ae 670{
e6685141 671 struct event *t;
d62a17ae 672 int index;
673
674 for (index = 0; index < m->fd_limit; ++index) {
675 t = thread_array[index];
676 if (t) {
677 thread_array[index] = NULL;
6655966d 678 thread_free(m, t);
d62a17ae 679 }
680 }
a6f235f3 681 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
682}
683
495f0b13
DS
684/*
685 * thread_master_free_unused
686 *
687 * As threads are finished with they are put on the
688 * unuse list for later reuse.
689 * If we are shutting down, Free up unused threads
690 * So we can see if we forget to shut anything off
691 */
d62a17ae 692void thread_master_free_unused(struct thread_master *m)
495f0b13 693{
cb1991af 694 frr_with_mutex (&m->mtx) {
e6685141 695 struct event *t;
c284542b 696 while ((t = thread_list_pop(&m->unuse)))
6655966d 697 thread_free(m, t);
d62a17ae 698 }
495f0b13
DS
699}
700
718e3744 701/* Stop thread scheduler. */
d62a17ae 702void thread_master_free(struct thread_master *m)
718e3744 703{
e6685141 704 struct event *t;
27d29ced 705
cb1991af 706 frr_with_mutex (&masters_mtx) {
d62a17ae 707 listnode_delete(masters, m);
eff09c66 708 if (masters->count == 0) {
6a154c88 709 list_delete(&masters);
eff09c66 710 }
d62a17ae 711 }
d62a17ae 712
713 thread_array_free(m, m->read);
714 thread_array_free(m, m->write);
27d29ced
DL
715 while ((t = thread_timer_list_pop(&m->timer)))
716 thread_free(m, t);
d62a17ae 717 thread_list_free(m, &m->event);
718 thread_list_free(m, &m->ready);
719 thread_list_free(m, &m->unuse);
720 pthread_mutex_destroy(&m->mtx);
33844bbe 721 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 722 close(m->io_pipe[0]);
723 close(m->io_pipe[1]);
6a154c88 724 list_delete(&m->cancel_req);
1a0a92ea 725 m->cancel_req = NULL;
d62a17ae 726
d8bc11a5 727 hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
d62a17ae 728
0a22ddfb 729 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 730 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
731 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
732 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 733}
734
94202742 735/* Return remain time in milliseconds. */
e6685141 736unsigned long thread_timer_remain_msec(struct event *thread)
718e3744 737{
d62a17ae 738 int64_t remain;
1189d95f 739
13bcc010
DA
740 if (!thread_is_scheduled(thread))
741 return 0;
742
cb1991af 743 frr_with_mutex (&thread->mtx) {
78ca0342 744 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 745 }
1189d95f 746
d62a17ae 747 return remain < 0 ? 0 : remain;
718e3744 748}
749
78ca0342 750/* Return remain time in seconds. */
e6685141 751unsigned long thread_timer_remain_second(struct event *thread)
78ca0342
CF
752{
753 return thread_timer_remain_msec(thread) / 1000LL;
754}
755
e6685141 756struct timeval thread_timer_remain(struct event *thread)
6ac44687 757{
d62a17ae 758 struct timeval remain;
cb1991af 759 frr_with_mutex (&thread->mtx) {
d62a17ae 760 monotime_until(&thread->u.sands, &remain);
761 }
d62a17ae 762 return remain;
6ac44687
CF
763}
764
0447957e
AK
765static int time_hhmmss(char *buf, int buf_size, long sec)
766{
767 long hh;
768 long mm;
769 int wr;
770
642ac49d 771 assert(buf_size >= 8);
0447957e
AK
772
773 hh = sec / 3600;
774 sec %= 3600;
775 mm = sec / 60;
776 sec %= 60;
777
778 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
779
780 return wr != 8;
781}
782
e6685141 783char *thread_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
0447957e
AK
784{
785 if (t_timer) {
786 time_hhmmss(buf, buf_size,
787 thread_timer_remain_second(t_timer));
788 } else {
789 snprintf(buf, buf_size, "--:--:--");
790 }
791 return buf;
792}
793
718e3744 794/* Get new thread. */
e6685141
DS
795static struct event *thread_get(struct thread_master *m, uint8_t type,
796 void (*func)(struct event *), void *arg,
797 const struct xref_threadsched *xref)
718e3744 798{
e6685141 799 struct event *thread = thread_list_pop(&m->unuse);
d62a17ae 800 struct cpu_thread_history tmp;
801
802 if (!thread) {
e6685141 803 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
d62a17ae 804 /* mutex only needs to be initialized at struct creation. */
805 pthread_mutex_init(&thread->mtx, NULL);
806 m->alloc++;
807 }
808
809 thread->type = type;
810 thread->add_type = type;
811 thread->master = m;
812 thread->arg = arg;
d62a17ae 813 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
814 thread->ref = NULL;
e8b3a2f7 815 thread->ignore_timer_late = false;
d62a17ae 816
817 /*
818 * So if the passed in funcname is not what we have
819 * stored that means the thread->hist needs to be
820 * updated. We keep the last one around in unused
821 * under the assumption that we are probably
822 * going to immediately allocate the same
823 * type of thread.
824 * This hopefully saves us some serious
825 * hash_get lookups.
826 */
60a3efec
DL
827 if ((thread->xref && thread->xref->funcname != xref->funcname)
828 || thread->func != func) {
d62a17ae 829 tmp.func = func;
60a3efec 830 tmp.funcname = xref->funcname;
d62a17ae 831 thread->hist =
832 hash_get(m->cpu_record, &tmp,
833 (void *(*)(void *))cpu_record_hash_alloc);
834 }
835 thread->hist->total_active++;
836 thread->func = func;
60a3efec 837 thread->xref = xref;
d62a17ae 838
839 return thread;
718e3744 840}
841
e6685141 842static void thread_free(struct thread_master *master, struct event *thread)
6655966d
RZ
843{
844 /* Update statistics. */
845 assert(master->alloc > 0);
846 master->alloc--;
847
848 /* Free allocated resources. */
849 pthread_mutex_destroy(&thread->mtx);
850 XFREE(MTYPE_THREAD, thread);
851}
852
d81ca9a3
MS
853static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
854 bool *eintr_p)
209a72a6 855{
d81ca9a3
MS
856 sigset_t origsigs;
857 unsigned char trash[64];
858 nfds_t count = m->handler.copycount;
859
d279ef57
DS
860 /*
861 * If timer_wait is null here, that means poll() should block
862 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 863 * ->selectpoll_timeout.
d279ef57 864 *
d62a17ae 865 * If the value is positive, it specifies the maximum number of
d279ef57
DS
866 * milliseconds to wait. If the timeout is -1, it specifies that
867 * we should never wait and always return immediately even if no
868 * event is detected. If the value is zero, the behavior is default.
869 */
d62a17ae 870 int timeout = -1;
871
872 /* number of file descriptors with events */
873 int num;
874
875 if (timer_wait != NULL
876 && m->selectpoll_timeout == 0) // use the default value
877 timeout = (timer_wait->tv_sec * 1000)
878 + (timer_wait->tv_usec / 1000);
879 else if (m->selectpoll_timeout > 0) // use the user's timeout
880 timeout = m->selectpoll_timeout;
881 else if (m->selectpoll_timeout
882 < 0) // effect a poll (return immediately)
883 timeout = 0;
884
0bdeb5e5 885 zlog_tls_buffer_flush();
3e41733f
DL
886 rcu_read_unlock();
887 rcu_assert_read_unlocked();
888
d62a17ae 889 /* add poll pipe poker */
d81ca9a3
MS
890 assert(count + 1 < m->handler.pfdsize);
891 m->handler.copy[count].fd = m->io_pipe[0];
892 m->handler.copy[count].events = POLLIN;
893 m->handler.copy[count].revents = 0x00;
894
895 /* We need to deal with a signal-handling race here: we
896 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
897 * that may arrive just before we enter poll(). We will block the
898 * key signals, then check whether any have arrived - if so, we return
899 * before calling poll(). If not, we'll re-enable the signals
900 * in the ppoll() call.
901 */
902
903 sigemptyset(&origsigs);
904 if (m->handle_signals) {
905 /* Main pthread that handles the app signals */
906 if (frr_sigevent_check(&origsigs)) {
907 /* Signal to process - restore signal mask and return */
908 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
909 num = -1;
910 *eintr_p = true;
911 goto done;
912 }
913 } else {
914 /* Don't make any changes for the non-main pthreads */
915 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
916 }
d62a17ae 917
d81ca9a3
MS
918#if defined(HAVE_PPOLL)
919 struct timespec ts, *tsp;
920
921 if (timeout >= 0) {
922 ts.tv_sec = timeout / 1000;
923 ts.tv_nsec = (timeout % 1000) * 1000000;
924 tsp = &ts;
925 } else
926 tsp = NULL;
927
928 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
929 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
930#else
931 /* Not ideal - there is a race after we restore the signal mask */
932 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
933 num = poll(m->handler.copy, count + 1, timeout);
934#endif
d62a17ae 935
d81ca9a3
MS
936done:
937
938 if (num < 0 && errno == EINTR)
939 *eintr_p = true;
940
941 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 942 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
943 ;
944
3e41733f
DL
945 rcu_read_lock();
946
d62a17ae 947 return num;
209a72a6
DS
948}
949
718e3744 950/* Add new read thread. */
907a2395
DS
951void _event_add_read_write(const struct xref_threadsched *xref,
952 struct thread_master *m,
953 void (*func)(struct event *), void *arg, int fd,
954 struct event **t_ptr)
718e3744 955{
60a3efec 956 int dir = xref->thread_type;
e6685141
DS
957 struct event *thread = NULL;
958 struct event **thread_array;
d62a17ae 959
abf96a87 960 if (dir == THREAD_READ)
6c3aa850
DL
961 frrtrace(9, frr_libfrr, schedule_read, m,
962 xref->funcname, xref->xref.file, xref->xref.line,
963 t_ptr, fd, 0, arg, 0);
abf96a87 964 else
6c3aa850
DL
965 frrtrace(9, frr_libfrr, schedule_write, m,
966 xref->funcname, xref->xref.file, xref->xref.line,
967 t_ptr, fd, 0, arg, 0);
abf96a87 968
188acbb9
DS
969 assert(fd >= 0);
970 if (fd >= m->fd_limit)
971 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
972
cb1991af 973 frr_with_mutex (&m->mtx) {
00dffa8c
DL
974 if (t_ptr && *t_ptr)
975 // thread is already scheduled; don't reschedule
976 break;
d62a17ae 977
978 /* default to a new pollfd */
979 nfds_t queuepos = m->handler.pfdcount;
980
1ef14bee
DS
981 if (dir == THREAD_READ)
982 thread_array = m->read;
983 else
984 thread_array = m->write;
985
d62a17ae 986 /* if we already have a pollfd for our file descriptor, find and
987 * use it */
988 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
989 if (m->handler.pfds[i].fd == fd) {
990 queuepos = i;
1ef14bee
DS
991
992#ifdef DEV_BUILD
993 /*
994 * What happens if we have a thread already
995 * created for this event?
996 */
997 if (thread_array[fd])
998 assert(!"Thread already scheduled for file descriptor");
999#endif
d62a17ae 1000 break;
1001 }
1002
1003 /* make sure we have room for this fd + pipe poker fd */
1004 assert(queuepos + 1 < m->handler.pfdsize);
1005
60a3efec 1006 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1007
1008 m->handler.pfds[queuepos].fd = fd;
1009 m->handler.pfds[queuepos].events |=
1010 (dir == THREAD_READ ? POLLIN : POLLOUT);
1011
1012 if (queuepos == m->handler.pfdcount)
1013 m->handler.pfdcount++;
1014
1015 if (thread) {
cb1991af 1016 frr_with_mutex (&thread->mtx) {
d62a17ae 1017 thread->u.fd = fd;
1ef14bee 1018 thread_array[thread->u.fd] = thread;
d62a17ae 1019 }
d62a17ae 1020
1021 if (t_ptr) {
1022 *t_ptr = thread;
1023 thread->ref = t_ptr;
1024 }
1025 }
1026
1027 AWAKEN(m);
1028 }
718e3744 1029}
1030
907a2395
DS
1031static void _event_add_timer_timeval(const struct xref_threadsched *xref,
1032 struct thread_master *m,
1033 void (*func)(struct event *), void *arg,
1034 struct timeval *time_relative,
1035 struct event **t_ptr)
718e3744 1036{
e6685141 1037 struct event *thread;
96fe578a 1038 struct timeval t;
d62a17ae 1039
1040 assert(m != NULL);
1041
d62a17ae 1042 assert(time_relative);
1043
6c3aa850
DL
1044 frrtrace(9, frr_libfrr, schedule_timer, m,
1045 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1046 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1047
96fe578a
MS
1048 /* Compute expiration/deadline time. */
1049 monotime(&t);
1050 timeradd(&t, time_relative, &t);
1051
cb1991af 1052 frr_with_mutex (&m->mtx) {
00dffa8c 1053 if (t_ptr && *t_ptr)
d279ef57 1054 /* thread is already scheduled; don't reschedule */
ee1455dd 1055 return;
d62a17ae 1056
4322dea7 1057 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1058
cb1991af 1059 frr_with_mutex (&thread->mtx) {
96fe578a 1060 thread->u.sands = t;
27d29ced 1061 thread_timer_list_add(&m->timer, thread);
d62a17ae 1062 if (t_ptr) {
1063 *t_ptr = thread;
1064 thread->ref = t_ptr;
1065 }
1066 }
d62a17ae 1067
96fe578a
MS
1068 /* The timer list is sorted - if this new timer
1069 * might change the time we'll wait for, give the pthread
1070 * a chance to re-compute.
1071 */
1072 if (thread_timer_list_first(&m->timer) == thread)
1073 AWAKEN(m);
d62a17ae 1074 }
e2eff5c3
DS
1075#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1076 if (time_relative->tv_sec > ONEYEAR2SEC)
1077 flog_err(
1078 EC_LIB_TIMER_TOO_LONG,
1079 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1080 thread);
9e867fe6 1081}
1082
98c91ac6 1083
1084/* Add timer event thread. */
907a2395
DS
1085void _event_add_timer(const struct xref_threadsched *xref,
1086 struct thread_master *m, void (*func)(struct event *),
1087 void *arg, long timer, struct event **t_ptr)
9e867fe6 1088{
d62a17ae 1089 struct timeval trel;
9e867fe6 1090
d62a17ae 1091 assert(m != NULL);
9e867fe6 1092
d62a17ae 1093 trel.tv_sec = timer;
1094 trel.tv_usec = 0;
9e867fe6 1095
907a2395 1096 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1097}
9e867fe6 1098
98c91ac6 1099/* Add timer event thread with "millisecond" resolution */
907a2395
DS
1100void _event_add_timer_msec(const struct xref_threadsched *xref,
1101 struct thread_master *m,
1102 void (*func)(struct event *), void *arg, long timer,
1103 struct event **t_ptr)
98c91ac6 1104{
d62a17ae 1105 struct timeval trel;
9e867fe6 1106
d62a17ae 1107 assert(m != NULL);
718e3744 1108
d62a17ae 1109 trel.tv_sec = timer / 1000;
1110 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1111
907a2395 1112 _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1113}
1114
4322dea7 1115/* Add timer event thread with "timeval" resolution */
907a2395
DS
1116void _event_add_timer_tv(const struct xref_threadsched *xref,
1117 struct thread_master *m, void (*func)(struct event *),
1118 void *arg, struct timeval *tv, struct event **t_ptr)
d03c4cbd 1119{
907a2395 1120 _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1121}
1122
718e3744 1123/* Add simple event thread. */
907a2395
DS
1124void _event_add_event(const struct xref_threadsched *xref,
1125 struct thread_master *m, void (*func)(struct event *),
1126 void *arg, int val, struct event **t_ptr)
718e3744 1127{
e6685141 1128 struct event *thread = NULL;
d62a17ae 1129
6c3aa850
DL
1130 frrtrace(9, frr_libfrr, schedule_event, m,
1131 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1132 t_ptr, 0, val, arg, 0);
abf96a87 1133
d62a17ae 1134 assert(m != NULL);
1135
cb1991af 1136 frr_with_mutex (&m->mtx) {
00dffa8c 1137 if (t_ptr && *t_ptr)
d279ef57 1138 /* thread is already scheduled; don't reschedule */
00dffa8c 1139 break;
d62a17ae 1140
60a3efec 1141 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
cb1991af 1142 frr_with_mutex (&thread->mtx) {
d62a17ae 1143 thread->u.val = val;
c284542b 1144 thread_list_add_tail(&m->event, thread);
d62a17ae 1145 }
d62a17ae 1146
1147 if (t_ptr) {
1148 *t_ptr = thread;
1149 thread->ref = t_ptr;
1150 }
1151
1152 AWAKEN(m);
1153 }
718e3744 1154}
1155
63ccb9cb
QY
1156/* Thread cancellation ------------------------------------------------------ */
1157
8797240e
QY
1158/**
1159 * NOT's out the .events field of pollfd corresponding to the given file
1160 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1161 *
1162 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1163 * implementation for details.
1164 *
1165 * @param master
1166 * @param fd
1167 * @param state the event to cancel. One or more (OR'd together) of the
1168 * following:
1169 * - POLLIN
1170 * - POLLOUT
1171 */
332beb64
DS
1172static void event_cancel_rw(struct thread_master *master, int fd, short state,
1173 int idx_hint)
0a95a0d0 1174{
42d74538
QY
1175 bool found = false;
1176
d62a17ae 1177 /* find the index of corresponding pollfd */
1178 nfds_t i;
1179
a9318a32
MS
1180 /* Cancel POLLHUP too just in case some bozo set it */
1181 state |= POLLHUP;
1182
1183 /* Some callers know the index of the pfd already */
1184 if (idx_hint >= 0) {
1185 i = idx_hint;
1186 found = true;
1187 } else {
1188 /* Have to look for the fd in the pfd array */
1189 for (i = 0; i < master->handler.pfdcount; i++)
1190 if (master->handler.pfds[i].fd == fd) {
1191 found = true;
1192 break;
1193 }
1194 }
42d74538
QY
1195
1196 if (!found) {
1197 zlog_debug(
1198 "[!] Received cancellation request for nonexistent rw job");
1199 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1200 master->name ? master->name : "", fd);
42d74538
QY
1201 return;
1202 }
d62a17ae 1203
1204 /* NOT out event. */
1205 master->handler.pfds[i].events &= ~(state);
1206
1207 /* If all events are canceled, delete / resize the pollfd array. */
1208 if (master->handler.pfds[i].events == 0) {
1209 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1210 (master->handler.pfdcount - i - 1)
1211 * sizeof(struct pollfd));
1212 master->handler.pfdcount--;
e985cda0
S
1213 master->handler.pfds[master->handler.pfdcount].fd = 0;
1214 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1215 }
1216
1217 /* If we have the same pollfd in the copy, perform the same operations,
1218 * otherwise return. */
1219 if (i >= master->handler.copycount)
1220 return;
1221
1222 master->handler.copy[i].events &= ~(state);
1223
1224 if (master->handler.copy[i].events == 0) {
1225 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1226 (master->handler.copycount - i - 1)
1227 * sizeof(struct pollfd));
1228 master->handler.copycount--;
e985cda0
S
1229 master->handler.copy[master->handler.copycount].fd = 0;
1230 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1231 }
0a95a0d0
DS
1232}
1233
a9318a32
MS
1234/*
1235 * Process task cancellation given a task argument: iterate through the
1236 * various lists of tasks, looking for any that match the argument.
1237 */
1238static void cancel_arg_helper(struct thread_master *master,
1239 const struct cancel_req *cr)
1240{
e6685141 1241 struct event *t;
a9318a32
MS
1242 nfds_t i;
1243 int fd;
1244 struct pollfd *pfd;
1245
1246 /* We're only processing arg-based cancellations here. */
1247 if (cr->eventobj == NULL)
1248 return;
1249
1250 /* First process the ready lists. */
1251 frr_each_safe(thread_list, &master->event, t) {
1252 if (t->arg != cr->eventobj)
1253 continue;
1254 thread_list_del(&master->event, t);
1255 if (t->ref)
1256 *t->ref = NULL;
1257 thread_add_unuse(master, t);
1258 }
1259
1260 frr_each_safe(thread_list, &master->ready, t) {
1261 if (t->arg != cr->eventobj)
1262 continue;
1263 thread_list_del(&master->ready, t);
1264 if (t->ref)
1265 *t->ref = NULL;
1266 thread_add_unuse(master, t);
1267 }
1268
1269 /* If requested, stop here and ignore io and timers */
332beb64 1270 if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
a9318a32
MS
1271 return;
1272
1273 /* Check the io tasks */
1274 for (i = 0; i < master->handler.pfdcount;) {
1275 pfd = master->handler.pfds + i;
1276
1277 if (pfd->events & POLLIN)
1278 t = master->read[pfd->fd];
1279 else
1280 t = master->write[pfd->fd];
1281
1282 if (t && t->arg == cr->eventobj) {
1283 fd = pfd->fd;
1284
1285 /* Found a match to cancel: clean up fd arrays */
332beb64 1286 event_cancel_rw(master, pfd->fd, pfd->events, i);
a9318a32
MS
1287
1288 /* Clean up thread arrays */
1289 master->read[fd] = NULL;
1290 master->write[fd] = NULL;
1291
1292 /* Clear caller's ref */
1293 if (t->ref)
1294 *t->ref = NULL;
1295
1296 thread_add_unuse(master, t);
1297
1298 /* Don't increment 'i' since the cancellation will have
1299 * removed the entry from the pfd array
1300 */
1301 } else
1302 i++;
1303 }
1304
1305 /* Check the timer tasks */
1306 t = thread_timer_list_first(&master->timer);
1307 while (t) {
e6685141 1308 struct event *t_next;
a9318a32
MS
1309
1310 t_next = thread_timer_list_next(&master->timer, t);
1311
1312 if (t->arg == cr->eventobj) {
1313 thread_timer_list_del(&master->timer, t);
1314 if (t->ref)
1315 *t->ref = NULL;
1316 thread_add_unuse(master, t);
1317 }
1318
1319 t = t_next;
1320 }
1321}
1322
1189d95f 1323/**
63ccb9cb 1324 * Process cancellation requests.
1189d95f 1325 *
63ccb9cb
QY
1326 * This may only be run from the pthread which owns the thread_master.
1327 *
1328 * @param master the thread master to process
1329 * @REQUIRE master->mtx
1189d95f 1330 */
332beb64 1331static void do_event_cancel(struct thread_master *master)
718e3744 1332{
c284542b 1333 struct thread_list_head *list = NULL;
e6685141
DS
1334 struct event **thread_array = NULL;
1335 struct event *thread;
d62a17ae 1336 struct cancel_req *cr;
1337 struct listnode *ln;
7e93a54c 1338
d62a17ae 1339 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1340 /*
a9318a32
MS
1341 * If this is an event object cancellation, search
1342 * through task lists deleting any tasks which have the
1343 * specified argument - use this handy helper function.
d279ef57 1344 */
d62a17ae 1345 if (cr->eventobj) {
a9318a32 1346 cancel_arg_helper(master, cr);
d62a17ae 1347 continue;
1348 }
1349
d279ef57
DS
1350 /*
1351 * The pointer varies depending on whether the cancellation
1352 * request was made asynchronously or not. If it was, we
1353 * need to check whether the thread even exists anymore
1354 * before cancelling it.
1355 */
d62a17ae 1356 thread = (cr->thread) ? cr->thread : *cr->threadref;
1357
1358 if (!thread)
1359 continue;
1360
7e93a54c
MS
1361 list = NULL;
1362 thread_array = NULL;
1363
d62a17ae 1364 /* Determine the appropriate queue to cancel the thread from */
1365 switch (thread->type) {
1366 case THREAD_READ:
332beb64 1367 event_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1368 thread_array = master->read;
1369 break;
1370 case THREAD_WRITE:
332beb64 1371 event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1372 thread_array = master->write;
1373 break;
1374 case THREAD_TIMER:
27d29ced 1375 thread_timer_list_del(&master->timer, thread);
d62a17ae 1376 break;
1377 case THREAD_EVENT:
1378 list = &master->event;
1379 break;
1380 case THREAD_READY:
1381 list = &master->ready;
1382 break;
1383 default:
1384 continue;
1385 break;
1386 }
1387
27d29ced 1388 if (list) {
c284542b 1389 thread_list_del(list, thread);
d62a17ae 1390 } else if (thread_array) {
1391 thread_array[thread->u.fd] = NULL;
d62a17ae 1392 }
1393
1394 if (thread->ref)
1395 *thread->ref = NULL;
1396
1397 thread_add_unuse(thread->master, thread);
1398 }
1399
1400 /* Delete and free all cancellation requests */
41b21bfa
MS
1401 if (master->cancel_req)
1402 list_delete_all_node(master->cancel_req);
d62a17ae 1403
332beb64 1404 /* Wake up any threads which may be blocked in event_cancel_async() */
d62a17ae 1405 master->canceled = true;
1406 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1407}
1408
a9318a32
MS
1409/*
1410 * Helper function used for multiple flavors of arg-based cancellation.
1411 */
1412static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1413{
1414 struct cancel_req *cr;
1415
1416 assert(m->owner == pthread_self());
1417
1418 /* Only worth anything if caller supplies an arg. */
1419 if (arg == NULL)
1420 return;
1421
1422 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1423
1424 cr->flags = flags;
1425
cb1991af 1426 frr_with_mutex (&m->mtx) {
a9318a32
MS
1427 cr->eventobj = arg;
1428 listnode_add(m->cancel_req, cr);
332beb64 1429 do_event_cancel(m);
a9318a32
MS
1430 }
1431}
1432
63ccb9cb
QY
1433/**
1434 * Cancel any events which have the specified argument.
1435 *
1436 * MT-Unsafe
1437 *
1438 * @param m the thread_master to cancel from
1439 * @param arg the argument passed when creating the event
1440 */
332beb64 1441void event_cancel_event(struct thread_master *master, void *arg)
718e3744 1442{
a9318a32
MS
1443 cancel_event_helper(master, arg, 0);
1444}
d62a17ae 1445
a9318a32
MS
1446/*
1447 * Cancel ready tasks with an arg matching 'arg'
1448 *
1449 * MT-Unsafe
1450 *
1451 * @param m the thread_master to cancel from
1452 * @param arg the argument passed when creating the event
1453 */
332beb64 1454void event_cancel_event_ready(struct thread_master *m, void *arg)
a9318a32
MS
1455{
1456
1457 /* Only cancel ready/event tasks */
332beb64 1458 cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
63ccb9cb 1459}
1189d95f 1460
63ccb9cb
QY
1461/**
1462 * Cancel a specific task.
1463 *
1464 * MT-Unsafe
1465 *
1466 * @param thread task to cancel
1467 */
332beb64 1468void event_cancel(struct event **thread)
63ccb9cb 1469{
b3d6bc6e
MS
1470 struct thread_master *master;
1471
1472 if (thread == NULL || *thread == NULL)
1473 return;
1474
1475 master = (*thread)->master;
d62a17ae 1476
332beb64
DS
1477 frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1478 (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1479 (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1480 (*thread)->u.sands.tv_sec);
abf96a87 1481
6ed04aa2
DS
1482 assert(master->owner == pthread_self());
1483
cb1991af 1484 frr_with_mutex (&master->mtx) {
d62a17ae 1485 struct cancel_req *cr =
1486 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1487 cr->thread = *thread;
6ed04aa2 1488 listnode_add(master->cancel_req, cr);
332beb64 1489 do_event_cancel(master);
d62a17ae 1490 }
b3d6bc6e
MS
1491
1492 *thread = NULL;
63ccb9cb 1493}
1189d95f 1494
63ccb9cb
QY
1495/**
1496 * Asynchronous cancellation.
1497 *
e6685141 1498 * Called with either a struct event ** or void * to an event argument,
8797240e
QY
1499 * this function posts the correct cancellation request and blocks until it is
1500 * serviced.
63ccb9cb
QY
1501 *
1502 * If the thread is currently running, execution blocks until it completes.
1503 *
8797240e
QY
1504 * The last two parameters are mutually exclusive, i.e. if you pass one the
1505 * other must be NULL.
1506 *
1507 * When the cancellation procedure executes on the target thread_master, the
1508 * thread * provided is checked for nullity. If it is null, the thread is
1509 * assumed to no longer exist and the cancellation request is a no-op. Thus
1510 * users of this API must pass a back-reference when scheduling the original
1511 * task.
1512 *
63ccb9cb
QY
1513 * MT-Safe
1514 *
8797240e
QY
1515 * @param master the thread master with the relevant event / task
1516 * @param thread pointer to thread to cancel
1517 * @param eventobj the event
63ccb9cb 1518 */
332beb64
DS
1519void event_cancel_async(struct thread_master *master, struct event **thread,
1520 void *eventobj)
63ccb9cb 1521{
d62a17ae 1522 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1523
1524 if (thread && *thread)
332beb64 1525 frrtrace(9, frr_libfrr, event_cancel_async, master,
6c3aa850
DL
1526 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1527 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1528 (*thread)->u.val, (*thread)->arg,
1529 (*thread)->u.sands.tv_sec);
abf96a87 1530 else
332beb64 1531 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
c7bb4f00 1532 0, NULL, 0, 0, eventobj, 0);
abf96a87 1533
d62a17ae 1534 assert(master->owner != pthread_self());
1535
cb1991af 1536 frr_with_mutex (&master->mtx) {
d62a17ae 1537 master->canceled = false;
1538
1539 if (thread) {
1540 struct cancel_req *cr =
1541 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1542 cr->threadref = thread;
1543 listnode_add(master->cancel_req, cr);
1544 } else if (eventobj) {
1545 struct cancel_req *cr =
1546 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1547 cr->eventobj = eventobj;
1548 listnode_add(master->cancel_req, cr);
1549 }
1550 AWAKEN(master);
1551
1552 while (!master->canceled)
1553 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1554 }
50478845
MS
1555
1556 if (thread)
1557 *thread = NULL;
718e3744 1558}
63ccb9cb 1559/* ------------------------------------------------------------------------- */
718e3744 1560
27d29ced 1561static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1562 struct timeval *timer_val)
718e3744 1563{
27d29ced
DL
1564 if (!thread_timer_list_count(timers))
1565 return NULL;
1566
e6685141 1567 struct event *next_timer = thread_timer_list_first(timers);
27d29ced
DL
1568 monotime_until(&next_timer->u.sands, timer_val);
1569 return timer_val;
718e3744 1570}
718e3744 1571
e6685141
DS
1572static struct event *thread_run(struct thread_master *m, struct event *thread,
1573 struct event *fetch)
718e3744 1574{
d62a17ae 1575 *fetch = *thread;
1576 thread_add_unuse(m, thread);
1577 return fetch;
718e3744 1578}
1579
d62a17ae 1580static int thread_process_io_helper(struct thread_master *m,
e6685141 1581 struct event *thread, short state,
45f3d590 1582 short actual_state, int pos)
5d4ccd4e 1583{
e6685141 1584 struct event **thread_array;
d62a17ae 1585
45f3d590
DS
1586 /*
1587 * poll() clears the .events field, but the pollfd array we
1588 * pass to poll() is a copy of the one used to schedule threads.
1589 * We need to synchronize state between the two here by applying
1590 * the same changes poll() made on the copy of the "real" pollfd
1591 * array.
1592 *
1593 * This cleans up a possible infinite loop where we refuse
1594 * to respond to a poll event but poll is insistent that
1595 * we should.
1596 */
1597 m->handler.pfds[pos].events &= ~(state);
1598
1599 if (!thread) {
1600 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1601 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1602 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1603 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1604 return 0;
45f3d590 1605 }
d62a17ae 1606
1607 if (thread->type == THREAD_READ)
1608 thread_array = m->read;
1609 else
1610 thread_array = m->write;
1611
1612 thread_array[thread->u.fd] = NULL;
c284542b 1613 thread_list_add_tail(&m->ready, thread);
d62a17ae 1614 thread->type = THREAD_READY;
45f3d590 1615
d62a17ae 1616 return 1;
5d4ccd4e
DS
1617}
1618
8797240e
QY
1619/**
1620 * Process I/O events.
1621 *
1622 * Walks through file descriptor array looking for those pollfds whose .revents
1623 * field has something interesting. Deletes any invalid file descriptors.
1624 *
1625 * @param m the thread master
1626 * @param num the number of active file descriptors (return value of poll())
1627 */
d62a17ae 1628static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1629{
d62a17ae 1630 unsigned int ready = 0;
1631 struct pollfd *pfds = m->handler.copy;
1632
1633 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1634 /* no event for current fd? immediately continue */
1635 if (pfds[i].revents == 0)
1636 continue;
1637
1638 ready++;
1639
d279ef57 1640 /*
332beb64 1641 * Unless someone has called event_cancel from another
d279ef57
DS
1642 * pthread, the only thing that could have changed in
1643 * m->handler.pfds while we were asleep is the .events
332beb64 1644 * field in a given pollfd. Barring event_cancel() that
d279ef57
DS
1645 * value should be a superset of the values we have in our
1646 * copy, so there's no need to update it. Similarily,
1647 * barring deletion, the fd should still be a valid index
1648 * into the master's pfds.
d142453d
DS
1649 *
1650 * We are including POLLERR here to do a READ event
1651 * this is because the read should fail and the
1652 * read function should handle it appropriately
d279ef57 1653 */
d142453d 1654 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1655 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1656 pfds[i].revents, i);
1657 }
d62a17ae 1658 if (pfds[i].revents & POLLOUT)
1659 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1660 POLLOUT, pfds[i].revents, i);
d62a17ae 1661
1662 /* if one of our file descriptors is garbage, remove the same
1663 * from
1664 * both pfds + update sizes and index */
1665 if (pfds[i].revents & POLLNVAL) {
1666 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1667 (m->handler.pfdcount - i - 1)
1668 * sizeof(struct pollfd));
1669 m->handler.pfdcount--;
e985cda0
S
1670 m->handler.pfds[m->handler.pfdcount].fd = 0;
1671 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1672
1673 memmove(pfds + i, pfds + i + 1,
1674 (m->handler.copycount - i - 1)
1675 * sizeof(struct pollfd));
1676 m->handler.copycount--;
e985cda0
S
1677 m->handler.copy[m->handler.copycount].fd = 0;
1678 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1679
1680 i--;
1681 }
1682 }
718e3744 1683}
1684
8b70d0b0 1685/* Add all timers that have popped to the ready list. */
e7d9e44b 1686static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1687 struct timeval *timenow)
a48b4e6d 1688{
ab01a001
DS
1689 struct timeval prev = *timenow;
1690 bool displayed = false;
e6685141 1691 struct event *thread;
d62a17ae 1692 unsigned int ready = 0;
1693
e7d9e44b 1694 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1695 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1696 break;
ab01a001
DS
1697 prev = thread->u.sands;
1698 prev.tv_sec += 4;
1699 /*
1700 * If the timer would have popped 4 seconds in the
1701 * past then we are in a situation where we are
1702 * really getting behind on handling of events.
1703 * Let's log it and do the right thing with it.
1704 */
1dd08c22
DS
1705 if (timercmp(timenow, &prev, >)) {
1706 atomic_fetch_add_explicit(
1707 &thread->hist->total_starv_warn, 1,
1708 memory_order_seq_cst);
1709 if (!displayed && !thread->ignore_timer_late) {
1710 flog_warn(
1711 EC_LIB_STARVE_THREAD,
1712 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1713 thread);
1714 displayed = true;
1715 }
ab01a001
DS
1716 }
1717
e7d9e44b 1718 thread_timer_list_pop(&m->timer);
d62a17ae 1719 thread->type = THREAD_READY;
e7d9e44b 1720 thread_list_add_tail(&m->ready, thread);
d62a17ae 1721 ready++;
1722 }
e7d9e44b 1723
d62a17ae 1724 return ready;
a48b4e6d 1725}
1726
2613abe6 1727/* process a list en masse, e.g. for event thread lists */
c284542b 1728static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1729{
e6685141 1730 struct event *thread;
d62a17ae 1731 unsigned int ready = 0;
1732
c284542b 1733 while ((thread = thread_list_pop(list))) {
d62a17ae 1734 thread->type = THREAD_READY;
c284542b 1735 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1736 ready++;
1737 }
1738 return ready;
2613abe6
PJ
1739}
1740
1741
718e3744 1742/* Fetch next ready thread. */
e6685141 1743struct event *thread_fetch(struct thread_master *m, struct event *fetch)
718e3744 1744{
e6685141 1745 struct event *thread = NULL;
d62a17ae 1746 struct timeval now;
1747 struct timeval zerotime = {0, 0};
1748 struct timeval tv;
1749 struct timeval *tw = NULL;
d81ca9a3 1750 bool eintr_p = false;
d62a17ae 1751 int num = 0;
1752
1753 do {
1754 /* Handle signals if any */
1755 if (m->handle_signals)
7cc91e67 1756 frr_sigevent_process();
d62a17ae 1757
1758 pthread_mutex_lock(&m->mtx);
1759
1760 /* Process any pending cancellation requests */
332beb64 1761 do_event_cancel(m);
d62a17ae 1762
e3c9529e
QY
1763 /*
1764 * Attempt to flush ready queue before going into poll().
1765 * This is performance-critical. Think twice before modifying.
1766 */
c284542b 1767 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1768 fetch = thread_run(m, thread, fetch);
1769 if (fetch->ref)
1770 *fetch->ref = NULL;
1771 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1772 if (!m->ready_run_loop)
1773 GETRUSAGE(&m->last_getrusage);
1774 m->ready_run_loop = true;
e3c9529e
QY
1775 break;
1776 }
1777
5e822957 1778 m->ready_run_loop = false;
e3c9529e
QY
1779 /* otherwise, tick through scheduling sequence */
1780
bca37d17
QY
1781 /*
1782 * Post events to ready queue. This must come before the
1783 * following block since events should occur immediately
1784 */
d62a17ae 1785 thread_process(&m->event);
1786
bca37d17
QY
1787 /*
1788 * If there are no tasks on the ready queue, we will poll()
1789 * until a timer expires or we receive I/O, whichever comes
1790 * first. The strategy for doing this is:
d62a17ae 1791 *
1792 * - If there are events pending, set the poll() timeout to zero
1793 * - If there are no events pending, but there are timers
d279ef57
DS
1794 * pending, set the timeout to the smallest remaining time on
1795 * any timer.
d62a17ae 1796 * - If there are neither timers nor events pending, but there
d279ef57 1797 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1798 * - If nothing is pending, it's time for the application to die
1799 *
1800 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1801 * once per loop to avoid starvation by events
1802 */
c284542b 1803 if (!thread_list_count(&m->ready))
27d29ced 1804 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1805
c284542b
DL
1806 if (thread_list_count(&m->ready) ||
1807 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1808 tw = &zerotime;
1809
1810 if (!tw && m->handler.pfdcount == 0) { /* die */
1811 pthread_mutex_unlock(&m->mtx);
1812 fetch = NULL;
1813 break;
1814 }
1815
bca37d17
QY
1816 /*
1817 * Copy pollfd array + # active pollfds in it. Not necessary to
1818 * copy the array size as this is fixed.
1819 */
d62a17ae 1820 m->handler.copycount = m->handler.pfdcount;
1821 memcpy(m->handler.copy, m->handler.pfds,
1822 m->handler.copycount * sizeof(struct pollfd));
1823
e3c9529e
QY
1824 pthread_mutex_unlock(&m->mtx);
1825 {
d81ca9a3
MS
1826 eintr_p = false;
1827 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1828 }
1829 pthread_mutex_lock(&m->mtx);
d764d2cc 1830
e3c9529e
QY
1831 /* Handle any errors received in poll() */
1832 if (num < 0) {
d81ca9a3 1833 if (eintr_p) {
d62a17ae 1834 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1835 /* loop around to signal handler */
1836 continue;
d62a17ae 1837 }
1838
e3c9529e 1839 /* else die */
450971aa 1840 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1841 safe_strerror(errno));
e3c9529e
QY
1842 pthread_mutex_unlock(&m->mtx);
1843 fetch = NULL;
1844 break;
bca37d17 1845 }
d62a17ae 1846
1847 /* Post timers to ready queue. */
1848 monotime(&now);
e7d9e44b 1849 thread_process_timers(m, &now);
d62a17ae 1850
1851 /* Post I/O to ready queue. */
1852 if (num > 0)
1853 thread_process_io(m, num);
1854
d62a17ae 1855 pthread_mutex_unlock(&m->mtx);
1856
1857 } while (!thread && m->spin);
1858
1859 return fetch;
718e3744 1860}
1861
d62a17ae 1862static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1863{
d62a17ae 1864 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1865 + (a.tv_usec - b.tv_usec));
62f44022
QY
1866}
1867
d62a17ae 1868unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1869 unsigned long *cputime)
718e3744 1870{
6418e2d3 1871#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1872
1873#ifdef __FreeBSD__
1874 /*
1875 * FreeBSD appears to have an issue when calling clock_gettime
1876 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1877 * occassionally the now time will be before the start time.
1878 * This is not good and FRR is ending up with CPU HOG's
1879 * when the subtraction wraps to very large numbers
1880 *
1881 * What we are going to do here is cheat a little bit
1882 * and notice that this is a problem and just correct
1883 * it so that it is impossible to happen
1884 */
1885 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1886 start->cpu.tv_nsec > now->cpu.tv_nsec)
1887 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1888 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1889 now->cpu.tv_sec = start->cpu.tv_sec;
1890 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1891 }
1892#endif
6418e2d3
DL
1893 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1894 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1895#else
d62a17ae 1896 /* This is 'user + sys' time. */
1897 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1898 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1899#endif
d62a17ae 1900 return timeval_elapsed(now->real, start->real);
8b70d0b0 1901}
1902
50596be0
DS
1903/* We should aim to yield after yield milliseconds, which defaults
1904 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1905 Note: we are using real (wall clock) time for this calculation.
1906 It could be argued that CPU time may make more sense in certain
1907 contexts. The things to consider are whether the thread may have
1908 blocked (in which case wall time increases, but CPU time does not),
1909 or whether the system is heavily loaded with other processes competing
d62a17ae 1910 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1911 Plus it has the added benefit that gettimeofday should be faster
1912 than calling getrusage. */
e6685141 1913int thread_should_yield(struct event *thread)
718e3744 1914{
d62a17ae 1915 int result;
cb1991af 1916 frr_with_mutex (&thread->mtx) {
d62a17ae 1917 result = monotime_since(&thread->real, NULL)
1918 > (int64_t)thread->yield;
1919 }
d62a17ae 1920 return result;
50596be0
DS
1921}
1922
e6685141 1923void thread_set_yield_time(struct event *thread, unsigned long yield_time)
50596be0 1924{
cb1991af 1925 frr_with_mutex (&thread->mtx) {
d62a17ae 1926 thread->yield = yield_time;
1927 }
718e3744 1928}
1929
d62a17ae 1930void thread_getrusage(RUSAGE_T *r)
db9c0df9 1931{
6418e2d3
DL
1932 monotime(&r->real);
1933 if (!cputime_enabled) {
1934 memset(&r->cpu, 0, sizeof(r->cpu));
1935 return;
1936 }
1937
1938#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1939 /* not currently implemented in Linux's vDSO, but maybe at some point
1940 * in the future?
1941 */
1942 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1943#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1944#if defined RUSAGE_THREAD
1945#define FRR_RUSAGE RUSAGE_THREAD
1946#else
1947#define FRR_RUSAGE RUSAGE_SELF
1948#endif
6418e2d3
DL
1949 getrusage(FRR_RUSAGE, &(r->cpu));
1950#endif
db9c0df9
PJ
1951}
1952
fbcac826
QY
1953/*
1954 * Call a thread.
1955 *
1956 * This function will atomically update the thread's usage history. At present
1957 * this is the only spot where usage history is written. Nevertheless the code
1958 * has been written such that the introduction of writers in the future should
1959 * not need to update it provided the writers atomically perform only the
1960 * operations done here, i.e. updating the total and maximum times. In
1961 * particular, the maximum real and cpu times must be monotonically increasing
1962 * or this code is not correct.
1963 */
e6685141 1964void thread_call(struct event *thread)
718e3744 1965{
d62a17ae 1966 RUSAGE_T before, after;
cc8b13a0 1967
45f01188
DL
1968 /* if the thread being called is the CLI, it may change cputime_enabled
1969 * ("service cputime-stats" command), which can result in nonsensical
1970 * and very confusing warnings
1971 */
1972 bool cputime_enabled_here = cputime_enabled;
1973
5e822957
DS
1974 if (thread->master->ready_run_loop)
1975 before = thread->master->last_getrusage;
1976 else
1977 GETRUSAGE(&before);
1978
d62a17ae 1979 thread->real = before.real;
718e3744 1980
6c3aa850
DL
1981 frrtrace(9, frr_libfrr, thread_call, thread->master,
1982 thread->xref->funcname, thread->xref->xref.file,
1983 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1984 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 1985
d62a17ae 1986 pthread_setspecific(thread_current, thread);
1987 (*thread->func)(thread);
1988 pthread_setspecific(thread_current, NULL);
718e3744 1989
d62a17ae 1990 GETRUSAGE(&after);
5e822957 1991 thread->master->last_getrusage = after;
718e3744 1992
45f01188
DL
1993 unsigned long walltime, cputime;
1994 unsigned long exp;
fbcac826 1995
45f01188
DL
1996 walltime = thread_consumed_time(&after, &before, &cputime);
1997
1998 /* update walltime */
1999 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2000 memory_order_seq_cst);
2001 exp = atomic_load_explicit(&thread->hist->real.max,
2002 memory_order_seq_cst);
45f01188 2003 while (exp < walltime
fbcac826 2004 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2005 &thread->hist->real.max, &exp, walltime,
2006 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2007 ;
2008
45f01188
DL
2009 if (cputime_enabled_here && cputime_enabled) {
2010 /* update cputime */
2011 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2012 memory_order_seq_cst);
2013 exp = atomic_load_explicit(&thread->hist->cpu.max,
2014 memory_order_seq_cst);
2015 while (exp < cputime
2016 && !atomic_compare_exchange_weak_explicit(
2017 &thread->hist->cpu.max, &exp, cputime,
2018 memory_order_seq_cst, memory_order_seq_cst))
2019 ;
2020 }
fbcac826
QY
2021
2022 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2023 memory_order_seq_cst);
2024 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2025 memory_order_seq_cst);
718e3744 2026
45f01188
DL
2027 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2028 && cputime > cputime_threshold) {
d62a17ae 2029 /*
45f01188
DL
2030 * We have a CPU Hog on our hands. The time FRR has spent
2031 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2032 * Whinge about it now, so we're aware this is yet another task
2033 * to fix.
2034 */
9b8e01ca
DS
2035 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2036 1, memory_order_seq_cst);
9ef9495e 2037 flog_warn(
039d547f
DS
2038 EC_LIB_SLOW_THREAD_CPU,
2039 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2040 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2041 walltime / 1000, cputime / 1000);
2042
2043 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2044 /*
45f01188
DL
2045 * The runtime for a task is greater than 5 seconds, but the
2046 * cpu time is under 5 seconds. Let's whine about this because
2047 * this could imply some sort of scheduling issue.
039d547f 2048 */
9b8e01ca
DS
2049 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2050 1, memory_order_seq_cst);
039d547f
DS
2051 flog_warn(
2052 EC_LIB_SLOW_THREAD_WALL,
2053 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2054 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2055 walltime / 1000, cputime / 1000);
d62a17ae 2056 }
718e3744 2057}
2058
2059/* Execute thread */
60a3efec 2060void _thread_execute(const struct xref_threadsched *xref,
e6685141 2061 struct thread_master *m, void (*func)(struct event *),
60a3efec 2062 void *arg, int val)
718e3744 2063{
e6685141 2064 struct event *thread;
718e3744 2065
c4345fbf 2066 /* Get or allocate new thread to execute. */
cb1991af 2067 frr_with_mutex (&m->mtx) {
60a3efec 2068 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2069
c4345fbf 2070 /* Set its event value. */
cb1991af 2071 frr_with_mutex (&thread->mtx) {
c4345fbf
RZ
2072 thread->add_type = THREAD_EXECUTE;
2073 thread->u.val = val;
2074 thread->ref = &thread;
2075 }
c4345fbf 2076 }
f7c62e11 2077
c4345fbf
RZ
2078 /* Execute thread doing all accounting. */
2079 thread_call(thread);
9c7753e4 2080
c4345fbf
RZ
2081 /* Give back or free thread. */
2082 thread_add_unuse(m, thread);
718e3744 2083}
1543c387
MS
2084
2085/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2086void debug_signals(const sigset_t *sigs)
2087{
2088 int i, found;
2089 sigset_t tmpsigs;
2090 char buf[300];
2091
2092 /*
2093 * We're only looking at the non-realtime signals here, so we need
2094 * some limit value. Platform differences mean at some point we just
2095 * need to pick a reasonable value.
2096 */
2097#if defined SIGRTMIN
2098# define LAST_SIGNAL SIGRTMIN
2099#else
2100# define LAST_SIGNAL 32
2101#endif
2102
2103
2104 if (sigs == NULL) {
2105 sigemptyset(&tmpsigs);
2106 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2107 sigs = &tmpsigs;
2108 }
2109
2110 found = 0;
2111 buf[0] = '\0';
2112
2113 for (i = 0; i < LAST_SIGNAL; i++) {
2114 char tmp[20];
2115
2116 if (sigismember(sigs, i) > 0) {
2117 if (found > 0)
2118 strlcat(buf, ",", sizeof(buf));
2119 snprintf(tmp, sizeof(tmp), "%d", i);
2120 strlcat(buf, tmp, sizeof(buf));
2121 found++;
2122 }
2123 }
2124
2125 if (found == 0)
2126 snprintf(buf, sizeof(buf), "<none>");
2127
2128 zlog_debug("%s: %s", __func__, buf);
2129}
a505383d 2130
f59e6882 2131static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
e6685141 2132 const struct event *thread)
f59e6882
DL
2133{
2134 static const char * const types[] = {
2135 [THREAD_READ] = "read",
2136 [THREAD_WRITE] = "write",
2137 [THREAD_TIMER] = "timer",
2138 [THREAD_EVENT] = "event",
2139 [THREAD_READY] = "ready",
2140 [THREAD_UNUSED] = "unused",
2141 [THREAD_EXECUTE] = "exec",
2142 };
2143 ssize_t rv = 0;
2144 char info[16] = "";
2145
2146 if (!thread)
2147 return bputs(buf, "{(thread *)NULL}");
2148
2149 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2150
2151 if (thread->type < array_size(types) && types[thread->type])
2152 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2153 else
2154 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2155
2156 switch (thread->type) {
2157 case THREAD_READ:
2158 case THREAD_WRITE:
2159 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2160 break;
2161
2162 case THREAD_TIMER:
2163 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2164 break;
2165 }
2166
2167 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2168 thread->xref->funcname, thread->xref->dest,
2169 thread->xref->xref.file, thread->xref->xref.line);
2170 return rv;
2171}
2172
54929fd3 2173printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2174static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2175 const void *ptr)
2176{
e6685141 2177 const struct event *thread = ptr;
f59e6882
DL
2178 struct timespec remain = {};
2179
2180 if (ea->fmt[0] == 'D') {
2181 ea->fmt++;
2182 return printfrr_thread_dbg(buf, ea, thread);
2183 }
2184
2185 if (!thread) {
2186 /* need to jump over time formatting flag characters in the
2187 * input format string, i.e. adjust ea->fmt!
2188 */
2189 printfrr_time(buf, ea, &remain,
2190 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2191 return bputch(buf, '-');
2192 }
2193
2194 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2195 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2196}