]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
Merge pull request #10537 from mjstapp/fix_dplane_strdup
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188
DL
105#ifndef VTYSH_EXTRACT_PL
106#include "lib/thread_clippy.c"
107#endif
108
d8b87afe 109static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 110{
883cc51d 111 int size = sizeof(a->func);
bd74dc61
DS
112
113 return jhash(&a->func, size, 0);
e04ab74d 114}
115
74df8d6d 116static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 117 const struct cpu_thread_history *b)
e04ab74d 118{
d62a17ae 119 return a->func == b->func;
e04ab74d 120}
121
d62a17ae 122static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 123{
d62a17ae 124 struct cpu_thread_history *new;
125 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
126 new->func = a->func;
127 new->funcname = a->funcname;
128 return new;
e04ab74d 129}
130
d62a17ae 131static void cpu_record_hash_free(void *a)
228da428 132{
d62a17ae 133 struct cpu_thread_history *hist = a;
134
135 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
136}
137
d62a17ae 138static void vty_out_cpu_thread_history(struct vty *vty,
139 struct cpu_thread_history *a)
e04ab74d 140{
9b8e01ca 141 vty_out(vty, "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu",
72327cf3 142 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
143 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
144 (a->real.total / a->total_calls), a->real.max,
145 a->total_cpu_warn, a->total_wall_warn);
146 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 147 a->types & (1 << THREAD_READ) ? 'R' : ' ',
148 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
149 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
150 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
151 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 152}
153
e3b78da8 154static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 155{
d62a17ae 156 struct cpu_thread_history *totals = args[0];
fbcac826 157 struct cpu_thread_history copy;
d62a17ae 158 struct vty *vty = args[1];
fbcac826 159 uint8_t *filter = args[2];
d62a17ae 160
161 struct cpu_thread_history *a = bucket->data;
162
fbcac826
QY
163 copy.total_active =
164 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
165 copy.total_calls =
166 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
167 copy.total_cpu_warn =
168 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
169 copy.total_wall_warn =
170 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
fbcac826
QY
171 copy.cpu.total =
172 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
173 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
174 copy.real.total =
175 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
176 copy.real.max =
177 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
178 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
179 copy.funcname = a->funcname;
180
181 if (!(copy.types & *filter))
d62a17ae 182 return;
fbcac826
QY
183
184 vty_out_cpu_thread_history(vty, &copy);
185 totals->total_active += copy.total_active;
186 totals->total_calls += copy.total_calls;
9b8e01ca
DS
187 totals->total_cpu_warn += copy.total_cpu_warn;
188 totals->total_wall_warn += copy.total_wall_warn;
fbcac826
QY
189 totals->real.total += copy.real.total;
190 if (totals->real.max < copy.real.max)
191 totals->real.max = copy.real.max;
192 totals->cpu.total += copy.cpu.total;
193 if (totals->cpu.max < copy.cpu.max)
194 totals->cpu.max = copy.cpu.max;
e04ab74d 195}
196
fbcac826 197static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 198{
d62a17ae 199 struct cpu_thread_history tmp;
200 void *args[3] = {&tmp, vty, &filter};
201 struct thread_master *m;
202 struct listnode *ln;
203
45f01188
DL
204 if (!cputime_enabled)
205 vty_out(vty,
206 "\n"
207 "Collecting CPU time statistics is currently disabled. Following statistics\n"
208 "will be zero or may display data from when collection was enabled. Use the\n"
209 " \"service cputime-stats\" command to start collecting data.\n"
210 "\nCounters and wallclock times are always maintained and should be accurate.\n");
211
0d6f7fd6 212 memset(&tmp, 0, sizeof(tmp));
d62a17ae 213 tmp.funcname = "TOTAL";
214 tmp.types = filter;
215
00dffa8c 216 frr_with_mutex(&masters_mtx) {
d62a17ae 217 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
218 const char *name = m->name ? m->name : "main";
219
220 char underline[strlen(name) + 1];
221 memset(underline, '-', sizeof(underline));
4f113d60 222 underline[sizeof(underline) - 1] = '\0';
d62a17ae 223
224 vty_out(vty, "\n");
225 vty_out(vty, "Showing statistics for pthread %s\n",
226 name);
227 vty_out(vty, "-------------------------------%s\n",
228 underline);
84d951d0 229 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 230 "CPU (user+system):", "Real (wall-clock):");
231 vty_out(vty,
232 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
233 vty_out(vty, " Avg uSec Max uSecs");
9b8e01ca 234 vty_out(vty, " CPU_Warn Wall_Warn Type Thread\n");
d62a17ae 235
236 if (m->cpu_record->count)
237 hash_iterate(
238 m->cpu_record,
e3b78da8 239 (void (*)(struct hash_bucket *,
d62a17ae 240 void *))cpu_record_hash_print,
241 args);
242 else
243 vty_out(vty, "No data to display yet.\n");
244
245 vty_out(vty, "\n");
246 }
247 }
d62a17ae 248
249 vty_out(vty, "\n");
250 vty_out(vty, "Total thread statistics\n");
251 vty_out(vty, "-------------------------\n");
84d951d0 252 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 253 "CPU (user+system):", "Real (wall-clock):");
254 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 255 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 256 vty_out(vty, " Type Thread\n");
257
258 if (tmp.total_calls > 0)
259 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 260}
261
e3b78da8 262static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 263{
fbcac826 264 uint8_t *filter = args[0];
d62a17ae 265 struct hash *cpu_record = args[1];
266
267 struct cpu_thread_history *a = bucket->data;
62f44022 268
d62a17ae 269 if (!(a->types & *filter))
270 return;
f48f65d2 271
d62a17ae 272 hash_release(cpu_record, bucket->data);
e276eb82
PJ
273}
274
fbcac826 275static void cpu_record_clear(uint8_t filter)
e276eb82 276{
fbcac826 277 uint8_t *tmp = &filter;
d62a17ae 278 struct thread_master *m;
279 struct listnode *ln;
280
00dffa8c 281 frr_with_mutex(&masters_mtx) {
d62a17ae 282 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
00dffa8c 283 frr_with_mutex(&m->mtx) {
d62a17ae 284 void *args[2] = {tmp, m->cpu_record};
285 hash_iterate(
286 m->cpu_record,
e3b78da8 287 (void (*)(struct hash_bucket *,
d62a17ae 288 void *))cpu_record_hash_clear,
289 args);
290 }
d62a17ae 291 }
292 }
62f44022
QY
293}
294
fbcac826 295static uint8_t parse_filter(const char *filterstr)
62f44022 296{
d62a17ae 297 int i = 0;
298 int filter = 0;
299
300 while (filterstr[i] != '\0') {
301 switch (filterstr[i]) {
302 case 'r':
303 case 'R':
304 filter |= (1 << THREAD_READ);
305 break;
306 case 'w':
307 case 'W':
308 filter |= (1 << THREAD_WRITE);
309 break;
310 case 't':
311 case 'T':
312 filter |= (1 << THREAD_TIMER);
313 break;
314 case 'e':
315 case 'E':
316 filter |= (1 << THREAD_EVENT);
317 break;
318 case 'x':
319 case 'X':
320 filter |= (1 << THREAD_EXECUTE);
321 break;
322 default:
323 break;
324 }
325 ++i;
326 }
327 return filter;
62f44022
QY
328}
329
ee4dcee8
DL
330DEFUN_NOSH (show_thread_cpu,
331 show_thread_cpu_cmd,
332 "show thread cpu [FILTER]",
333 SHOW_STR
334 "Thread information\n"
335 "Thread CPU usage\n"
336 "Display filter (rwtex)\n")
62f44022 337{
fbcac826 338 uint8_t filter = (uint8_t)-1U;
d62a17ae 339 int idx = 0;
340
341 if (argv_find(argv, argc, "FILTER", &idx)) {
342 filter = parse_filter(argv[idx]->arg);
343 if (!filter) {
344 vty_out(vty,
3efd0893 345 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 346 argv[idx]->arg);
347 return CMD_WARNING;
348 }
349 }
350
351 cpu_record_print(vty, filter);
352 return CMD_SUCCESS;
e276eb82 353}
45f01188
DL
354
355DEFPY (service_cputime_stats,
356 service_cputime_stats_cmd,
357 "[no] service cputime-stats",
358 NO_STR
359 "Set up miscellaneous service\n"
360 "Collect CPU usage statistics\n")
361{
362 cputime_enabled = !no;
363 return CMD_SUCCESS;
364}
365
366DEFPY (service_cputime_warning,
367 service_cputime_warning_cmd,
368 "[no] service cputime-warning (1-4294967295)",
369 NO_STR
370 "Set up miscellaneous service\n"
371 "Warn for tasks exceeding CPU usage threshold\n"
372 "Warning threshold in milliseconds\n")
373{
374 if (no)
375 cputime_threshold = 0;
376 else
377 cputime_threshold = cputime_warning * 1000;
378 return CMD_SUCCESS;
379}
380
381ALIAS (service_cputime_warning,
382 no_service_cputime_warning_cmd,
383 "no service cputime-warning",
384 NO_STR
385 "Set up miscellaneous service\n"
386 "Warn for tasks exceeding CPU usage threshold\n")
387
388DEFPY (service_walltime_warning,
389 service_walltime_warning_cmd,
390 "[no] service walltime-warning (1-4294967295)",
391 NO_STR
392 "Set up miscellaneous service\n"
393 "Warn for tasks exceeding total wallclock threshold\n"
394 "Warning threshold in milliseconds\n")
395{
396 if (no)
397 walltime_threshold = 0;
398 else
399 walltime_threshold = walltime_warning * 1000;
400 return CMD_SUCCESS;
401}
402
403ALIAS (service_walltime_warning,
404 no_service_walltime_warning_cmd,
405 "no service walltime-warning",
406 NO_STR
407 "Set up miscellaneous service\n"
408 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 409
8872626b
DS
410static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
411{
412 const char *name = m->name ? m->name : "main";
413 char underline[strlen(name) + 1];
a0b36ae6 414 struct thread *thread;
8872626b
DS
415 uint32_t i;
416
417 memset(underline, '-', sizeof(underline));
418 underline[sizeof(underline) - 1] = '\0';
419
420 vty_out(vty, "\nShowing poll FD's for %s\n", name);
421 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
422 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
423 m->fd_limit);
a0b36ae6
DS
424 for (i = 0; i < m->handler.pfdcount; i++) {
425 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
426 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 427 m->handler.pfds[i].revents);
a0b36ae6
DS
428
429 if (m->handler.pfds[i].events & POLLIN) {
430 thread = m->read[m->handler.pfds[i].fd];
431
432 if (!thread)
433 vty_out(vty, "ERROR ");
434 else
60a3efec 435 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
436 } else
437 vty_out(vty, " ");
438
439 if (m->handler.pfds[i].events & POLLOUT) {
440 thread = m->write[m->handler.pfds[i].fd];
441
442 if (!thread)
443 vty_out(vty, "ERROR\n");
444 else
60a3efec 445 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
446 } else
447 vty_out(vty, "\n");
448 }
8872626b
DS
449}
450
ee4dcee8
DL
451DEFUN_NOSH (show_thread_poll,
452 show_thread_poll_cmd,
453 "show thread poll",
454 SHOW_STR
455 "Thread information\n"
456 "Show poll FD's and information\n")
8872626b
DS
457{
458 struct listnode *node;
459 struct thread_master *m;
460
00dffa8c 461 frr_with_mutex(&masters_mtx) {
8872626b
DS
462 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
463 show_thread_poll_helper(vty, m);
464 }
465 }
8872626b
DS
466
467 return CMD_SUCCESS;
468}
469
470
49d41a26
DS
471DEFUN (clear_thread_cpu,
472 clear_thread_cpu_cmd,
473 "clear thread cpu [FILTER]",
62f44022 474 "Clear stored data in all pthreads\n"
49d41a26
DS
475 "Thread information\n"
476 "Thread CPU usage\n"
477 "Display filter (rwtexb)\n")
e276eb82 478{
fbcac826 479 uint8_t filter = (uint8_t)-1U;
d62a17ae 480 int idx = 0;
481
482 if (argv_find(argv, argc, "FILTER", &idx)) {
483 filter = parse_filter(argv[idx]->arg);
484 if (!filter) {
485 vty_out(vty,
3efd0893 486 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 487 argv[idx]->arg);
488 return CMD_WARNING;
489 }
490 }
491
492 cpu_record_clear(filter);
493 return CMD_SUCCESS;
e276eb82 494}
6b0655a2 495
d62a17ae 496void thread_cmd_init(void)
0b84f294 497{
d62a17ae 498 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 499 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 500 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
501
502 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
503 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
504 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
505 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
506 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
0b84f294 507}
62f44022
QY
508/* CLI end ------------------------------------------------------------------ */
509
0b84f294 510
d62a17ae 511static void cancelreq_del(void *cr)
63ccb9cb 512{
d62a17ae 513 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
514}
515
e0bebc7c 516/* initializer, only ever called once */
4d762f26 517static void initializer(void)
e0bebc7c 518{
d62a17ae 519 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
520}
521
d62a17ae 522struct thread_master *thread_master_create(const char *name)
718e3744 523{
d62a17ae 524 struct thread_master *rv;
525 struct rlimit limit;
526
527 pthread_once(&init_once, &initializer);
528
529 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 530
531 /* Initialize master mutex */
532 pthread_mutex_init(&rv->mtx, NULL);
533 pthread_cond_init(&rv->cancel_cond, NULL);
534
535 /* Set name */
7ffcd8bd
QY
536 name = name ? name : "default";
537 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 538
539 /* Initialize I/O task data structures */
1a9f340b
MS
540
541 /* Use configured limit if present, ulimit otherwise. */
542 rv->fd_limit = frr_get_fd_limit();
543 if (rv->fd_limit == 0) {
544 getrlimit(RLIMIT_NOFILE, &limit);
545 rv->fd_limit = (int)limit.rlim_cur;
546 }
547
a6f235f3
DS
548 rv->read = XCALLOC(MTYPE_THREAD_POLL,
549 sizeof(struct thread *) * rv->fd_limit);
550
551 rv->write = XCALLOC(MTYPE_THREAD_POLL,
552 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 553
7ffcd8bd
QY
554 char tmhashname[strlen(name) + 32];
555 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
556 name);
bd74dc61 557 rv->cpu_record = hash_create_size(
d8b87afe 558 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 559 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 560 tmhashname);
d62a17ae 561
c284542b
DL
562 thread_list_init(&rv->event);
563 thread_list_init(&rv->ready);
564 thread_list_init(&rv->unuse);
27d29ced 565 thread_timer_list_init(&rv->timer);
d62a17ae 566
567 /* Initialize thread_fetch() settings */
568 rv->spin = true;
569 rv->handle_signals = true;
570
571 /* Set pthread owner, should be updated by actual owner */
572 rv->owner = pthread_self();
573 rv->cancel_req = list_new();
574 rv->cancel_req->del = cancelreq_del;
575 rv->canceled = true;
576
577 /* Initialize pipe poker */
578 pipe(rv->io_pipe);
579 set_nonblocking(rv->io_pipe[0]);
580 set_nonblocking(rv->io_pipe[1]);
581
582 /* Initialize data structures for poll() */
583 rv->handler.pfdsize = rv->fd_limit;
584 rv->handler.pfdcount = 0;
585 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
586 sizeof(struct pollfd) * rv->handler.pfdsize);
587 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
588 sizeof(struct pollfd) * rv->handler.pfdsize);
589
eff09c66 590 /* add to list of threadmasters */
00dffa8c 591 frr_with_mutex(&masters_mtx) {
eff09c66
QY
592 if (!masters)
593 masters = list_new();
594
d62a17ae 595 listnode_add(masters, rv);
596 }
d62a17ae 597
598 return rv;
718e3744 599}
600
d8a8a8de
QY
601void thread_master_set_name(struct thread_master *master, const char *name)
602{
00dffa8c 603 frr_with_mutex(&master->mtx) {
0a22ddfb 604 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
605 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
606 }
d8a8a8de
QY
607}
608
6ed04aa2
DS
609#define THREAD_UNUSED_DEPTH 10
610
718e3744 611/* Move thread to unuse list. */
d62a17ae 612static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 613{
6655966d
RZ
614 pthread_mutex_t mtxc = thread->mtx;
615
d62a17ae 616 assert(m != NULL && thread != NULL);
d62a17ae 617
d62a17ae 618 thread->hist->total_active--;
6ed04aa2
DS
619 memset(thread, 0, sizeof(struct thread));
620 thread->type = THREAD_UNUSED;
621
6655966d
RZ
622 /* Restore the thread mutex context. */
623 thread->mtx = mtxc;
624
c284542b
DL
625 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
626 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
627 return;
628 }
629
630 thread_free(m, thread);
718e3744 631}
632
633/* Free all unused thread. */
c284542b
DL
634static void thread_list_free(struct thread_master *m,
635 struct thread_list_head *list)
718e3744 636{
d62a17ae 637 struct thread *t;
d62a17ae 638
c284542b 639 while ((t = thread_list_pop(list)))
6655966d 640 thread_free(m, t);
718e3744 641}
642
d62a17ae 643static void thread_array_free(struct thread_master *m,
644 struct thread **thread_array)
308d14ae 645{
d62a17ae 646 struct thread *t;
647 int index;
648
649 for (index = 0; index < m->fd_limit; ++index) {
650 t = thread_array[index];
651 if (t) {
652 thread_array[index] = NULL;
6655966d 653 thread_free(m, t);
d62a17ae 654 }
655 }
a6f235f3 656 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
657}
658
495f0b13
DS
659/*
660 * thread_master_free_unused
661 *
662 * As threads are finished with they are put on the
663 * unuse list for later reuse.
664 * If we are shutting down, Free up unused threads
665 * So we can see if we forget to shut anything off
666 */
d62a17ae 667void thread_master_free_unused(struct thread_master *m)
495f0b13 668{
00dffa8c 669 frr_with_mutex(&m->mtx) {
d62a17ae 670 struct thread *t;
c284542b 671 while ((t = thread_list_pop(&m->unuse)))
6655966d 672 thread_free(m, t);
d62a17ae 673 }
495f0b13
DS
674}
675
718e3744 676/* Stop thread scheduler. */
d62a17ae 677void thread_master_free(struct thread_master *m)
718e3744 678{
27d29ced
DL
679 struct thread *t;
680
00dffa8c 681 frr_with_mutex(&masters_mtx) {
d62a17ae 682 listnode_delete(masters, m);
eff09c66 683 if (masters->count == 0) {
6a154c88 684 list_delete(&masters);
eff09c66 685 }
d62a17ae 686 }
d62a17ae 687
688 thread_array_free(m, m->read);
689 thread_array_free(m, m->write);
27d29ced
DL
690 while ((t = thread_timer_list_pop(&m->timer)))
691 thread_free(m, t);
d62a17ae 692 thread_list_free(m, &m->event);
693 thread_list_free(m, &m->ready);
694 thread_list_free(m, &m->unuse);
695 pthread_mutex_destroy(&m->mtx);
33844bbe 696 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 697 close(m->io_pipe[0]);
698 close(m->io_pipe[1]);
6a154c88 699 list_delete(&m->cancel_req);
1a0a92ea 700 m->cancel_req = NULL;
d62a17ae 701
702 hash_clean(m->cpu_record, cpu_record_hash_free);
703 hash_free(m->cpu_record);
704 m->cpu_record = NULL;
705
0a22ddfb 706 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 707 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
708 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
709 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 710}
711
78ca0342
CF
712/* Return remain time in miliseconds. */
713unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 714{
d62a17ae 715 int64_t remain;
1189d95f 716
00dffa8c 717 frr_with_mutex(&thread->mtx) {
78ca0342 718 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 719 }
1189d95f 720
d62a17ae 721 return remain < 0 ? 0 : remain;
718e3744 722}
723
78ca0342
CF
724/* Return remain time in seconds. */
725unsigned long thread_timer_remain_second(struct thread *thread)
726{
727 return thread_timer_remain_msec(thread) / 1000LL;
728}
729
d62a17ae 730struct timeval thread_timer_remain(struct thread *thread)
6ac44687 731{
d62a17ae 732 struct timeval remain;
00dffa8c 733 frr_with_mutex(&thread->mtx) {
d62a17ae 734 monotime_until(&thread->u.sands, &remain);
735 }
d62a17ae 736 return remain;
6ac44687
CF
737}
738
0447957e
AK
739static int time_hhmmss(char *buf, int buf_size, long sec)
740{
741 long hh;
742 long mm;
743 int wr;
744
642ac49d 745 assert(buf_size >= 8);
0447957e
AK
746
747 hh = sec / 3600;
748 sec %= 3600;
749 mm = sec / 60;
750 sec %= 60;
751
752 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
753
754 return wr != 8;
755}
756
757char *thread_timer_to_hhmmss(char *buf, int buf_size,
758 struct thread *t_timer)
759{
760 if (t_timer) {
761 time_hhmmss(buf, buf_size,
762 thread_timer_remain_second(t_timer));
763 } else {
764 snprintf(buf, buf_size, "--:--:--");
765 }
766 return buf;
767}
768
718e3744 769/* Get new thread. */
d7c0a89a 770static struct thread *thread_get(struct thread_master *m, uint8_t type,
d62a17ae 771 int (*func)(struct thread *), void *arg,
60a3efec 772 const struct xref_threadsched *xref)
718e3744 773{
c284542b 774 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 775 struct cpu_thread_history tmp;
776
777 if (!thread) {
778 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
779 /* mutex only needs to be initialized at struct creation. */
780 pthread_mutex_init(&thread->mtx, NULL);
781 m->alloc++;
782 }
783
784 thread->type = type;
785 thread->add_type = type;
786 thread->master = m;
787 thread->arg = arg;
d62a17ae 788 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
789 thread->ref = NULL;
e8b3a2f7 790 thread->ignore_timer_late = false;
d62a17ae 791
792 /*
793 * So if the passed in funcname is not what we have
794 * stored that means the thread->hist needs to be
795 * updated. We keep the last one around in unused
796 * under the assumption that we are probably
797 * going to immediately allocate the same
798 * type of thread.
799 * This hopefully saves us some serious
800 * hash_get lookups.
801 */
60a3efec
DL
802 if ((thread->xref && thread->xref->funcname != xref->funcname)
803 || thread->func != func) {
d62a17ae 804 tmp.func = func;
60a3efec 805 tmp.funcname = xref->funcname;
d62a17ae 806 thread->hist =
807 hash_get(m->cpu_record, &tmp,
808 (void *(*)(void *))cpu_record_hash_alloc);
809 }
810 thread->hist->total_active++;
811 thread->func = func;
60a3efec 812 thread->xref = xref;
d62a17ae 813
814 return thread;
718e3744 815}
816
6655966d
RZ
817static void thread_free(struct thread_master *master, struct thread *thread)
818{
819 /* Update statistics. */
820 assert(master->alloc > 0);
821 master->alloc--;
822
823 /* Free allocated resources. */
824 pthread_mutex_destroy(&thread->mtx);
825 XFREE(MTYPE_THREAD, thread);
826}
827
d81ca9a3
MS
828static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
829 bool *eintr_p)
209a72a6 830{
d81ca9a3
MS
831 sigset_t origsigs;
832 unsigned char trash[64];
833 nfds_t count = m->handler.copycount;
834
d279ef57
DS
835 /*
836 * If timer_wait is null here, that means poll() should block
837 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 838 * ->selectpoll_timeout.
d279ef57 839 *
d62a17ae 840 * If the value is positive, it specifies the maximum number of
d279ef57
DS
841 * milliseconds to wait. If the timeout is -1, it specifies that
842 * we should never wait and always return immediately even if no
843 * event is detected. If the value is zero, the behavior is default.
844 */
d62a17ae 845 int timeout = -1;
846
847 /* number of file descriptors with events */
848 int num;
849
850 if (timer_wait != NULL
851 && m->selectpoll_timeout == 0) // use the default value
852 timeout = (timer_wait->tv_sec * 1000)
853 + (timer_wait->tv_usec / 1000);
854 else if (m->selectpoll_timeout > 0) // use the user's timeout
855 timeout = m->selectpoll_timeout;
856 else if (m->selectpoll_timeout
857 < 0) // effect a poll (return immediately)
858 timeout = 0;
859
0bdeb5e5 860 zlog_tls_buffer_flush();
3e41733f
DL
861 rcu_read_unlock();
862 rcu_assert_read_unlocked();
863
d62a17ae 864 /* add poll pipe poker */
d81ca9a3
MS
865 assert(count + 1 < m->handler.pfdsize);
866 m->handler.copy[count].fd = m->io_pipe[0];
867 m->handler.copy[count].events = POLLIN;
868 m->handler.copy[count].revents = 0x00;
869
870 /* We need to deal with a signal-handling race here: we
871 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
872 * that may arrive just before we enter poll(). We will block the
873 * key signals, then check whether any have arrived - if so, we return
874 * before calling poll(). If not, we'll re-enable the signals
875 * in the ppoll() call.
876 */
877
878 sigemptyset(&origsigs);
879 if (m->handle_signals) {
880 /* Main pthread that handles the app signals */
881 if (frr_sigevent_check(&origsigs)) {
882 /* Signal to process - restore signal mask and return */
883 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
884 num = -1;
885 *eintr_p = true;
886 goto done;
887 }
888 } else {
889 /* Don't make any changes for the non-main pthreads */
890 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
891 }
d62a17ae 892
d81ca9a3
MS
893#if defined(HAVE_PPOLL)
894 struct timespec ts, *tsp;
895
896 if (timeout >= 0) {
897 ts.tv_sec = timeout / 1000;
898 ts.tv_nsec = (timeout % 1000) * 1000000;
899 tsp = &ts;
900 } else
901 tsp = NULL;
902
903 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
904 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
905#else
906 /* Not ideal - there is a race after we restore the signal mask */
907 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
908 num = poll(m->handler.copy, count + 1, timeout);
909#endif
d62a17ae 910
d81ca9a3
MS
911done:
912
913 if (num < 0 && errno == EINTR)
914 *eintr_p = true;
915
916 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 917 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
918 ;
919
3e41733f
DL
920 rcu_read_lock();
921
d62a17ae 922 return num;
209a72a6
DS
923}
924
718e3744 925/* Add new read thread. */
ee1455dd
IR
926void _thread_add_read_write(const struct xref_threadsched *xref,
927 struct thread_master *m,
928 int (*func)(struct thread *), void *arg, int fd,
929 struct thread **t_ptr)
718e3744 930{
60a3efec 931 int dir = xref->thread_type;
d62a17ae 932 struct thread *thread = NULL;
1ef14bee 933 struct thread **thread_array;
d62a17ae 934
abf96a87 935 if (dir == THREAD_READ)
6c3aa850
DL
936 frrtrace(9, frr_libfrr, schedule_read, m,
937 xref->funcname, xref->xref.file, xref->xref.line,
938 t_ptr, fd, 0, arg, 0);
abf96a87 939 else
6c3aa850
DL
940 frrtrace(9, frr_libfrr, schedule_write, m,
941 xref->funcname, xref->xref.file, xref->xref.line,
942 t_ptr, fd, 0, arg, 0);
abf96a87 943
188acbb9
DS
944 assert(fd >= 0);
945 if (fd >= m->fd_limit)
946 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
947
00dffa8c
DL
948 frr_with_mutex(&m->mtx) {
949 if (t_ptr && *t_ptr)
950 // thread is already scheduled; don't reschedule
951 break;
d62a17ae 952
953 /* default to a new pollfd */
954 nfds_t queuepos = m->handler.pfdcount;
955
1ef14bee
DS
956 if (dir == THREAD_READ)
957 thread_array = m->read;
958 else
959 thread_array = m->write;
960
d62a17ae 961 /* if we already have a pollfd for our file descriptor, find and
962 * use it */
963 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
964 if (m->handler.pfds[i].fd == fd) {
965 queuepos = i;
1ef14bee
DS
966
967#ifdef DEV_BUILD
968 /*
969 * What happens if we have a thread already
970 * created for this event?
971 */
972 if (thread_array[fd])
973 assert(!"Thread already scheduled for file descriptor");
974#endif
d62a17ae 975 break;
976 }
977
978 /* make sure we have room for this fd + pipe poker fd */
979 assert(queuepos + 1 < m->handler.pfdsize);
980
60a3efec 981 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 982
983 m->handler.pfds[queuepos].fd = fd;
984 m->handler.pfds[queuepos].events |=
985 (dir == THREAD_READ ? POLLIN : POLLOUT);
986
987 if (queuepos == m->handler.pfdcount)
988 m->handler.pfdcount++;
989
990 if (thread) {
00dffa8c 991 frr_with_mutex(&thread->mtx) {
d62a17ae 992 thread->u.fd = fd;
1ef14bee 993 thread_array[thread->u.fd] = thread;
d62a17ae 994 }
d62a17ae 995
996 if (t_ptr) {
997 *t_ptr = thread;
998 thread->ref = t_ptr;
999 }
1000 }
1001
1002 AWAKEN(m);
1003 }
718e3744 1004}
1005
ee1455dd
IR
1006static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1007 struct thread_master *m,
1008 int (*func)(struct thread *), void *arg,
1009 struct timeval *time_relative,
1010 struct thread **t_ptr)
718e3744 1011{
d62a17ae 1012 struct thread *thread;
96fe578a 1013 struct timeval t;
d62a17ae 1014
1015 assert(m != NULL);
1016
d62a17ae 1017 assert(time_relative);
1018
6c3aa850
DL
1019 frrtrace(9, frr_libfrr, schedule_timer, m,
1020 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1021 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1022
96fe578a
MS
1023 /* Compute expiration/deadline time. */
1024 monotime(&t);
1025 timeradd(&t, time_relative, &t);
1026
00dffa8c
DL
1027 frr_with_mutex(&m->mtx) {
1028 if (t_ptr && *t_ptr)
d279ef57 1029 /* thread is already scheduled; don't reschedule */
ee1455dd 1030 return;
d62a17ae 1031
4322dea7 1032 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1033
00dffa8c 1034 frr_with_mutex(&thread->mtx) {
96fe578a 1035 thread->u.sands = t;
27d29ced 1036 thread_timer_list_add(&m->timer, thread);
d62a17ae 1037 if (t_ptr) {
1038 *t_ptr = thread;
1039 thread->ref = t_ptr;
1040 }
1041 }
d62a17ae 1042
96fe578a
MS
1043 /* The timer list is sorted - if this new timer
1044 * might change the time we'll wait for, give the pthread
1045 * a chance to re-compute.
1046 */
1047 if (thread_timer_list_first(&m->timer) == thread)
1048 AWAKEN(m);
d62a17ae 1049 }
9e867fe6 1050}
1051
98c91ac6 1052
1053/* Add timer event thread. */
ee1455dd
IR
1054void _thread_add_timer(const struct xref_threadsched *xref,
1055 struct thread_master *m, int (*func)(struct thread *),
1056 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1057{
d62a17ae 1058 struct timeval trel;
9e867fe6 1059
d62a17ae 1060 assert(m != NULL);
9e867fe6 1061
d62a17ae 1062 trel.tv_sec = timer;
1063 trel.tv_usec = 0;
9e867fe6 1064
ee1455dd 1065 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1066}
9e867fe6 1067
98c91ac6 1068/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1069void _thread_add_timer_msec(const struct xref_threadsched *xref,
1070 struct thread_master *m,
1071 int (*func)(struct thread *), void *arg, long timer,
1072 struct thread **t_ptr)
98c91ac6 1073{
d62a17ae 1074 struct timeval trel;
9e867fe6 1075
d62a17ae 1076 assert(m != NULL);
718e3744 1077
d62a17ae 1078 trel.tv_sec = timer / 1000;
1079 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1080
ee1455dd 1081 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1082}
1083
4322dea7 1084/* Add timer event thread with "timeval" resolution */
ee1455dd
IR
1085void _thread_add_timer_tv(const struct xref_threadsched *xref,
1086 struct thread_master *m, int (*func)(struct thread *),
1087 void *arg, struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1088{
ee1455dd 1089 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1090}
1091
718e3744 1092/* Add simple event thread. */
ee1455dd
IR
1093void _thread_add_event(const struct xref_threadsched *xref,
1094 struct thread_master *m, int (*func)(struct thread *),
1095 void *arg, int val, struct thread **t_ptr)
718e3744 1096{
00dffa8c 1097 struct thread *thread = NULL;
d62a17ae 1098
6c3aa850
DL
1099 frrtrace(9, frr_libfrr, schedule_event, m,
1100 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1101 t_ptr, 0, val, arg, 0);
abf96a87 1102
d62a17ae 1103 assert(m != NULL);
1104
00dffa8c
DL
1105 frr_with_mutex(&m->mtx) {
1106 if (t_ptr && *t_ptr)
d279ef57 1107 /* thread is already scheduled; don't reschedule */
00dffa8c 1108 break;
d62a17ae 1109
60a3efec 1110 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
00dffa8c 1111 frr_with_mutex(&thread->mtx) {
d62a17ae 1112 thread->u.val = val;
c284542b 1113 thread_list_add_tail(&m->event, thread);
d62a17ae 1114 }
d62a17ae 1115
1116 if (t_ptr) {
1117 *t_ptr = thread;
1118 thread->ref = t_ptr;
1119 }
1120
1121 AWAKEN(m);
1122 }
718e3744 1123}
1124
63ccb9cb
QY
1125/* Thread cancellation ------------------------------------------------------ */
1126
8797240e
QY
1127/**
1128 * NOT's out the .events field of pollfd corresponding to the given file
1129 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1130 *
1131 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1132 * implementation for details.
1133 *
1134 * @param master
1135 * @param fd
1136 * @param state the event to cancel. One or more (OR'd together) of the
1137 * following:
1138 * - POLLIN
1139 * - POLLOUT
1140 */
a9318a32
MS
1141static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1142 int idx_hint)
0a95a0d0 1143{
42d74538
QY
1144 bool found = false;
1145
d62a17ae 1146 /* find the index of corresponding pollfd */
1147 nfds_t i;
1148
a9318a32
MS
1149 /* Cancel POLLHUP too just in case some bozo set it */
1150 state |= POLLHUP;
1151
1152 /* Some callers know the index of the pfd already */
1153 if (idx_hint >= 0) {
1154 i = idx_hint;
1155 found = true;
1156 } else {
1157 /* Have to look for the fd in the pfd array */
1158 for (i = 0; i < master->handler.pfdcount; i++)
1159 if (master->handler.pfds[i].fd == fd) {
1160 found = true;
1161 break;
1162 }
1163 }
42d74538
QY
1164
1165 if (!found) {
1166 zlog_debug(
1167 "[!] Received cancellation request for nonexistent rw job");
1168 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1169 master->name ? master->name : "", fd);
42d74538
QY
1170 return;
1171 }
d62a17ae 1172
1173 /* NOT out event. */
1174 master->handler.pfds[i].events &= ~(state);
1175
1176 /* If all events are canceled, delete / resize the pollfd array. */
1177 if (master->handler.pfds[i].events == 0) {
1178 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1179 (master->handler.pfdcount - i - 1)
1180 * sizeof(struct pollfd));
1181 master->handler.pfdcount--;
e985cda0
S
1182 master->handler.pfds[master->handler.pfdcount].fd = 0;
1183 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1184 }
1185
1186 /* If we have the same pollfd in the copy, perform the same operations,
1187 * otherwise return. */
1188 if (i >= master->handler.copycount)
1189 return;
1190
1191 master->handler.copy[i].events &= ~(state);
1192
1193 if (master->handler.copy[i].events == 0) {
1194 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1195 (master->handler.copycount - i - 1)
1196 * sizeof(struct pollfd));
1197 master->handler.copycount--;
e985cda0
S
1198 master->handler.copy[master->handler.copycount].fd = 0;
1199 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1200 }
0a95a0d0
DS
1201}
1202
a9318a32
MS
1203/*
1204 * Process task cancellation given a task argument: iterate through the
1205 * various lists of tasks, looking for any that match the argument.
1206 */
1207static void cancel_arg_helper(struct thread_master *master,
1208 const struct cancel_req *cr)
1209{
1210 struct thread *t;
1211 nfds_t i;
1212 int fd;
1213 struct pollfd *pfd;
1214
1215 /* We're only processing arg-based cancellations here. */
1216 if (cr->eventobj == NULL)
1217 return;
1218
1219 /* First process the ready lists. */
1220 frr_each_safe(thread_list, &master->event, t) {
1221 if (t->arg != cr->eventobj)
1222 continue;
1223 thread_list_del(&master->event, t);
1224 if (t->ref)
1225 *t->ref = NULL;
1226 thread_add_unuse(master, t);
1227 }
1228
1229 frr_each_safe(thread_list, &master->ready, t) {
1230 if (t->arg != cr->eventobj)
1231 continue;
1232 thread_list_del(&master->ready, t);
1233 if (t->ref)
1234 *t->ref = NULL;
1235 thread_add_unuse(master, t);
1236 }
1237
1238 /* If requested, stop here and ignore io and timers */
1239 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1240 return;
1241
1242 /* Check the io tasks */
1243 for (i = 0; i < master->handler.pfdcount;) {
1244 pfd = master->handler.pfds + i;
1245
1246 if (pfd->events & POLLIN)
1247 t = master->read[pfd->fd];
1248 else
1249 t = master->write[pfd->fd];
1250
1251 if (t && t->arg == cr->eventobj) {
1252 fd = pfd->fd;
1253
1254 /* Found a match to cancel: clean up fd arrays */
1255 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1256
1257 /* Clean up thread arrays */
1258 master->read[fd] = NULL;
1259 master->write[fd] = NULL;
1260
1261 /* Clear caller's ref */
1262 if (t->ref)
1263 *t->ref = NULL;
1264
1265 thread_add_unuse(master, t);
1266
1267 /* Don't increment 'i' since the cancellation will have
1268 * removed the entry from the pfd array
1269 */
1270 } else
1271 i++;
1272 }
1273
1274 /* Check the timer tasks */
1275 t = thread_timer_list_first(&master->timer);
1276 while (t) {
1277 struct thread *t_next;
1278
1279 t_next = thread_timer_list_next(&master->timer, t);
1280
1281 if (t->arg == cr->eventobj) {
1282 thread_timer_list_del(&master->timer, t);
1283 if (t->ref)
1284 *t->ref = NULL;
1285 thread_add_unuse(master, t);
1286 }
1287
1288 t = t_next;
1289 }
1290}
1291
1189d95f 1292/**
63ccb9cb 1293 * Process cancellation requests.
1189d95f 1294 *
63ccb9cb
QY
1295 * This may only be run from the pthread which owns the thread_master.
1296 *
1297 * @param master the thread master to process
1298 * @REQUIRE master->mtx
1189d95f 1299 */
d62a17ae 1300static void do_thread_cancel(struct thread_master *master)
718e3744 1301{
c284542b 1302 struct thread_list_head *list = NULL;
d62a17ae 1303 struct thread **thread_array = NULL;
1304 struct thread *thread;
1305
1306 struct cancel_req *cr;
1307 struct listnode *ln;
1308 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1309 /*
a9318a32
MS
1310 * If this is an event object cancellation, search
1311 * through task lists deleting any tasks which have the
1312 * specified argument - use this handy helper function.
d279ef57 1313 */
d62a17ae 1314 if (cr->eventobj) {
a9318a32 1315 cancel_arg_helper(master, cr);
d62a17ae 1316 continue;
1317 }
1318
d279ef57
DS
1319 /*
1320 * The pointer varies depending on whether the cancellation
1321 * request was made asynchronously or not. If it was, we
1322 * need to check whether the thread even exists anymore
1323 * before cancelling it.
1324 */
d62a17ae 1325 thread = (cr->thread) ? cr->thread : *cr->threadref;
1326
1327 if (!thread)
1328 continue;
1329
1330 /* Determine the appropriate queue to cancel the thread from */
1331 switch (thread->type) {
1332 case THREAD_READ:
a9318a32 1333 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1334 thread_array = master->read;
1335 break;
1336 case THREAD_WRITE:
a9318a32 1337 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1338 thread_array = master->write;
1339 break;
1340 case THREAD_TIMER:
27d29ced 1341 thread_timer_list_del(&master->timer, thread);
d62a17ae 1342 break;
1343 case THREAD_EVENT:
1344 list = &master->event;
1345 break;
1346 case THREAD_READY:
1347 list = &master->ready;
1348 break;
1349 default:
1350 continue;
1351 break;
1352 }
1353
27d29ced 1354 if (list) {
c284542b 1355 thread_list_del(list, thread);
d62a17ae 1356 } else if (thread_array) {
1357 thread_array[thread->u.fd] = NULL;
d62a17ae 1358 }
1359
1360 if (thread->ref)
1361 *thread->ref = NULL;
1362
1363 thread_add_unuse(thread->master, thread);
1364 }
1365
1366 /* Delete and free all cancellation requests */
41b21bfa
MS
1367 if (master->cancel_req)
1368 list_delete_all_node(master->cancel_req);
d62a17ae 1369
1370 /* Wake up any threads which may be blocked in thread_cancel_async() */
1371 master->canceled = true;
1372 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1373}
1374
a9318a32
MS
1375/*
1376 * Helper function used for multiple flavors of arg-based cancellation.
1377 */
1378static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1379{
1380 struct cancel_req *cr;
1381
1382 assert(m->owner == pthread_self());
1383
1384 /* Only worth anything if caller supplies an arg. */
1385 if (arg == NULL)
1386 return;
1387
1388 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1389
1390 cr->flags = flags;
1391
1392 frr_with_mutex(&m->mtx) {
1393 cr->eventobj = arg;
1394 listnode_add(m->cancel_req, cr);
1395 do_thread_cancel(m);
1396 }
1397}
1398
63ccb9cb
QY
1399/**
1400 * Cancel any events which have the specified argument.
1401 *
1402 * MT-Unsafe
1403 *
1404 * @param m the thread_master to cancel from
1405 * @param arg the argument passed when creating the event
1406 */
d62a17ae 1407void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1408{
a9318a32
MS
1409 cancel_event_helper(master, arg, 0);
1410}
d62a17ae 1411
a9318a32
MS
1412/*
1413 * Cancel ready tasks with an arg matching 'arg'
1414 *
1415 * MT-Unsafe
1416 *
1417 * @param m the thread_master to cancel from
1418 * @param arg the argument passed when creating the event
1419 */
1420void thread_cancel_event_ready(struct thread_master *m, void *arg)
1421{
1422
1423 /* Only cancel ready/event tasks */
1424 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1425}
1189d95f 1426
63ccb9cb
QY
1427/**
1428 * Cancel a specific task.
1429 *
1430 * MT-Unsafe
1431 *
1432 * @param thread task to cancel
1433 */
b3d6bc6e 1434void thread_cancel(struct thread **thread)
63ccb9cb 1435{
b3d6bc6e
MS
1436 struct thread_master *master;
1437
1438 if (thread == NULL || *thread == NULL)
1439 return;
1440
1441 master = (*thread)->master;
d62a17ae 1442
6c3aa850
DL
1443 frrtrace(9, frr_libfrr, thread_cancel, master,
1444 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1445 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1446 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1447
6ed04aa2
DS
1448 assert(master->owner == pthread_self());
1449
00dffa8c 1450 frr_with_mutex(&master->mtx) {
d62a17ae 1451 struct cancel_req *cr =
1452 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1453 cr->thread = *thread;
6ed04aa2
DS
1454 listnode_add(master->cancel_req, cr);
1455 do_thread_cancel(master);
d62a17ae 1456 }
b3d6bc6e
MS
1457
1458 *thread = NULL;
63ccb9cb 1459}
1189d95f 1460
63ccb9cb
QY
1461/**
1462 * Asynchronous cancellation.
1463 *
8797240e
QY
1464 * Called with either a struct thread ** or void * to an event argument,
1465 * this function posts the correct cancellation request and blocks until it is
1466 * serviced.
63ccb9cb
QY
1467 *
1468 * If the thread is currently running, execution blocks until it completes.
1469 *
8797240e
QY
1470 * The last two parameters are mutually exclusive, i.e. if you pass one the
1471 * other must be NULL.
1472 *
1473 * When the cancellation procedure executes on the target thread_master, the
1474 * thread * provided is checked for nullity. If it is null, the thread is
1475 * assumed to no longer exist and the cancellation request is a no-op. Thus
1476 * users of this API must pass a back-reference when scheduling the original
1477 * task.
1478 *
63ccb9cb
QY
1479 * MT-Safe
1480 *
8797240e
QY
1481 * @param master the thread master with the relevant event / task
1482 * @param thread pointer to thread to cancel
1483 * @param eventobj the event
63ccb9cb 1484 */
d62a17ae 1485void thread_cancel_async(struct thread_master *master, struct thread **thread,
1486 void *eventobj)
63ccb9cb 1487{
d62a17ae 1488 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1489
1490 if (thread && *thread)
c7bb4f00 1491 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1492 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1493 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1494 (*thread)->u.val, (*thread)->arg,
1495 (*thread)->u.sands.tv_sec);
abf96a87 1496 else
c7bb4f00
QY
1497 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1498 0, NULL, 0, 0, eventobj, 0);
abf96a87 1499
d62a17ae 1500 assert(master->owner != pthread_self());
1501
00dffa8c 1502 frr_with_mutex(&master->mtx) {
d62a17ae 1503 master->canceled = false;
1504
1505 if (thread) {
1506 struct cancel_req *cr =
1507 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1508 cr->threadref = thread;
1509 listnode_add(master->cancel_req, cr);
1510 } else if (eventobj) {
1511 struct cancel_req *cr =
1512 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1513 cr->eventobj = eventobj;
1514 listnode_add(master->cancel_req, cr);
1515 }
1516 AWAKEN(master);
1517
1518 while (!master->canceled)
1519 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1520 }
50478845
MS
1521
1522 if (thread)
1523 *thread = NULL;
718e3744 1524}
63ccb9cb 1525/* ------------------------------------------------------------------------- */
718e3744 1526
27d29ced 1527static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1528 struct timeval *timer_val)
718e3744 1529{
27d29ced
DL
1530 if (!thread_timer_list_count(timers))
1531 return NULL;
1532
1533 struct thread *next_timer = thread_timer_list_first(timers);
1534 monotime_until(&next_timer->u.sands, timer_val);
1535 return timer_val;
718e3744 1536}
718e3744 1537
d62a17ae 1538static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1539 struct thread *fetch)
718e3744 1540{
d62a17ae 1541 *fetch = *thread;
1542 thread_add_unuse(m, thread);
1543 return fetch;
718e3744 1544}
1545
d62a17ae 1546static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1547 struct thread *thread, short state,
1548 short actual_state, int pos)
5d4ccd4e 1549{
d62a17ae 1550 struct thread **thread_array;
1551
45f3d590
DS
1552 /*
1553 * poll() clears the .events field, but the pollfd array we
1554 * pass to poll() is a copy of the one used to schedule threads.
1555 * We need to synchronize state between the two here by applying
1556 * the same changes poll() made on the copy of the "real" pollfd
1557 * array.
1558 *
1559 * This cleans up a possible infinite loop where we refuse
1560 * to respond to a poll event but poll is insistent that
1561 * we should.
1562 */
1563 m->handler.pfds[pos].events &= ~(state);
1564
1565 if (!thread) {
1566 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1567 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1568 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1569 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1570 return 0;
45f3d590 1571 }
d62a17ae 1572
1573 if (thread->type == THREAD_READ)
1574 thread_array = m->read;
1575 else
1576 thread_array = m->write;
1577
1578 thread_array[thread->u.fd] = NULL;
c284542b 1579 thread_list_add_tail(&m->ready, thread);
d62a17ae 1580 thread->type = THREAD_READY;
45f3d590 1581
d62a17ae 1582 return 1;
5d4ccd4e
DS
1583}
1584
8797240e
QY
1585/**
1586 * Process I/O events.
1587 *
1588 * Walks through file descriptor array looking for those pollfds whose .revents
1589 * field has something interesting. Deletes any invalid file descriptors.
1590 *
1591 * @param m the thread master
1592 * @param num the number of active file descriptors (return value of poll())
1593 */
d62a17ae 1594static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1595{
d62a17ae 1596 unsigned int ready = 0;
1597 struct pollfd *pfds = m->handler.copy;
1598
1599 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1600 /* no event for current fd? immediately continue */
1601 if (pfds[i].revents == 0)
1602 continue;
1603
1604 ready++;
1605
d279ef57
DS
1606 /*
1607 * Unless someone has called thread_cancel from another
1608 * pthread, the only thing that could have changed in
1609 * m->handler.pfds while we were asleep is the .events
1610 * field in a given pollfd. Barring thread_cancel() that
1611 * value should be a superset of the values we have in our
1612 * copy, so there's no need to update it. Similarily,
1613 * barring deletion, the fd should still be a valid index
1614 * into the master's pfds.
d142453d
DS
1615 *
1616 * We are including POLLERR here to do a READ event
1617 * this is because the read should fail and the
1618 * read function should handle it appropriately
d279ef57 1619 */
d142453d 1620 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1621 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1622 pfds[i].revents, i);
1623 }
d62a17ae 1624 if (pfds[i].revents & POLLOUT)
1625 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1626 POLLOUT, pfds[i].revents, i);
d62a17ae 1627
1628 /* if one of our file descriptors is garbage, remove the same
1629 * from
1630 * both pfds + update sizes and index */
1631 if (pfds[i].revents & POLLNVAL) {
1632 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1633 (m->handler.pfdcount - i - 1)
1634 * sizeof(struct pollfd));
1635 m->handler.pfdcount--;
e985cda0
S
1636 m->handler.pfds[m->handler.pfdcount].fd = 0;
1637 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1638
1639 memmove(pfds + i, pfds + i + 1,
1640 (m->handler.copycount - i - 1)
1641 * sizeof(struct pollfd));
1642 m->handler.copycount--;
e985cda0
S
1643 m->handler.copy[m->handler.copycount].fd = 0;
1644 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1645
1646 i--;
1647 }
1648 }
718e3744 1649}
1650
8b70d0b0 1651/* Add all timers that have popped to the ready list. */
e7d9e44b 1652static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1653 struct timeval *timenow)
a48b4e6d 1654{
ab01a001
DS
1655 struct timeval prev = *timenow;
1656 bool displayed = false;
d62a17ae 1657 struct thread *thread;
1658 unsigned int ready = 0;
1659
e7d9e44b 1660 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1661 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1662 break;
ab01a001
DS
1663 prev = thread->u.sands;
1664 prev.tv_sec += 4;
1665 /*
1666 * If the timer would have popped 4 seconds in the
1667 * past then we are in a situation where we are
1668 * really getting behind on handling of events.
1669 * Let's log it and do the right thing with it.
1670 */
e8b3a2f7
DS
1671 if (!displayed && !thread->ignore_timer_late &&
1672 timercmp(timenow, &prev, >)) {
1673 flog_warn(
1674 EC_LIB_STARVE_THREAD,
1675 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1676 thread);
ab01a001
DS
1677 displayed = true;
1678 }
1679
e7d9e44b 1680 thread_timer_list_pop(&m->timer);
d62a17ae 1681 thread->type = THREAD_READY;
e7d9e44b 1682 thread_list_add_tail(&m->ready, thread);
d62a17ae 1683 ready++;
1684 }
e7d9e44b 1685
d62a17ae 1686 return ready;
a48b4e6d 1687}
1688
2613abe6 1689/* process a list en masse, e.g. for event thread lists */
c284542b 1690static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1691{
d62a17ae 1692 struct thread *thread;
d62a17ae 1693 unsigned int ready = 0;
1694
c284542b 1695 while ((thread = thread_list_pop(list))) {
d62a17ae 1696 thread->type = THREAD_READY;
c284542b 1697 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1698 ready++;
1699 }
1700 return ready;
2613abe6
PJ
1701}
1702
1703
718e3744 1704/* Fetch next ready thread. */
d62a17ae 1705struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1706{
d62a17ae 1707 struct thread *thread = NULL;
1708 struct timeval now;
1709 struct timeval zerotime = {0, 0};
1710 struct timeval tv;
1711 struct timeval *tw = NULL;
d81ca9a3 1712 bool eintr_p = false;
d62a17ae 1713 int num = 0;
1714
1715 do {
1716 /* Handle signals if any */
1717 if (m->handle_signals)
7cc91e67 1718 frr_sigevent_process();
d62a17ae 1719
1720 pthread_mutex_lock(&m->mtx);
1721
1722 /* Process any pending cancellation requests */
1723 do_thread_cancel(m);
1724
e3c9529e
QY
1725 /*
1726 * Attempt to flush ready queue before going into poll().
1727 * This is performance-critical. Think twice before modifying.
1728 */
c284542b 1729 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1730 fetch = thread_run(m, thread, fetch);
1731 if (fetch->ref)
1732 *fetch->ref = NULL;
1733 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1734 if (!m->ready_run_loop)
1735 GETRUSAGE(&m->last_getrusage);
1736 m->ready_run_loop = true;
e3c9529e
QY
1737 break;
1738 }
1739
5e822957 1740 m->ready_run_loop = false;
e3c9529e
QY
1741 /* otherwise, tick through scheduling sequence */
1742
bca37d17
QY
1743 /*
1744 * Post events to ready queue. This must come before the
1745 * following block since events should occur immediately
1746 */
d62a17ae 1747 thread_process(&m->event);
1748
bca37d17
QY
1749 /*
1750 * If there are no tasks on the ready queue, we will poll()
1751 * until a timer expires or we receive I/O, whichever comes
1752 * first. The strategy for doing this is:
d62a17ae 1753 *
1754 * - If there are events pending, set the poll() timeout to zero
1755 * - If there are no events pending, but there are timers
d279ef57
DS
1756 * pending, set the timeout to the smallest remaining time on
1757 * any timer.
d62a17ae 1758 * - If there are neither timers nor events pending, but there
d279ef57 1759 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1760 * - If nothing is pending, it's time for the application to die
1761 *
1762 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1763 * once per loop to avoid starvation by events
1764 */
c284542b 1765 if (!thread_list_count(&m->ready))
27d29ced 1766 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1767
c284542b
DL
1768 if (thread_list_count(&m->ready) ||
1769 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1770 tw = &zerotime;
1771
1772 if (!tw && m->handler.pfdcount == 0) { /* die */
1773 pthread_mutex_unlock(&m->mtx);
1774 fetch = NULL;
1775 break;
1776 }
1777
bca37d17
QY
1778 /*
1779 * Copy pollfd array + # active pollfds in it. Not necessary to
1780 * copy the array size as this is fixed.
1781 */
d62a17ae 1782 m->handler.copycount = m->handler.pfdcount;
1783 memcpy(m->handler.copy, m->handler.pfds,
1784 m->handler.copycount * sizeof(struct pollfd));
1785
e3c9529e
QY
1786 pthread_mutex_unlock(&m->mtx);
1787 {
d81ca9a3
MS
1788 eintr_p = false;
1789 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1790 }
1791 pthread_mutex_lock(&m->mtx);
d764d2cc 1792
e3c9529e
QY
1793 /* Handle any errors received in poll() */
1794 if (num < 0) {
d81ca9a3 1795 if (eintr_p) {
d62a17ae 1796 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1797 /* loop around to signal handler */
1798 continue;
d62a17ae 1799 }
1800
e3c9529e 1801 /* else die */
450971aa 1802 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1803 safe_strerror(errno));
e3c9529e
QY
1804 pthread_mutex_unlock(&m->mtx);
1805 fetch = NULL;
1806 break;
bca37d17 1807 }
d62a17ae 1808
1809 /* Post timers to ready queue. */
1810 monotime(&now);
e7d9e44b 1811 thread_process_timers(m, &now);
d62a17ae 1812
1813 /* Post I/O to ready queue. */
1814 if (num > 0)
1815 thread_process_io(m, num);
1816
d62a17ae 1817 pthread_mutex_unlock(&m->mtx);
1818
1819 } while (!thread && m->spin);
1820
1821 return fetch;
718e3744 1822}
1823
d62a17ae 1824static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1825{
d62a17ae 1826 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1827 + (a.tv_usec - b.tv_usec));
62f44022
QY
1828}
1829
d62a17ae 1830unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1831 unsigned long *cputime)
718e3744 1832{
6418e2d3
DL
1833#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1834 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1835 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1836#else
d62a17ae 1837 /* This is 'user + sys' time. */
1838 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1839 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1840#endif
d62a17ae 1841 return timeval_elapsed(now->real, start->real);
8b70d0b0 1842}
1843
50596be0
DS
1844/* We should aim to yield after yield milliseconds, which defaults
1845 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1846 Note: we are using real (wall clock) time for this calculation.
1847 It could be argued that CPU time may make more sense in certain
1848 contexts. The things to consider are whether the thread may have
1849 blocked (in which case wall time increases, but CPU time does not),
1850 or whether the system is heavily loaded with other processes competing
d62a17ae 1851 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1852 Plus it has the added benefit that gettimeofday should be faster
1853 than calling getrusage. */
d62a17ae 1854int thread_should_yield(struct thread *thread)
718e3744 1855{
d62a17ae 1856 int result;
00dffa8c 1857 frr_with_mutex(&thread->mtx) {
d62a17ae 1858 result = monotime_since(&thread->real, NULL)
1859 > (int64_t)thread->yield;
1860 }
d62a17ae 1861 return result;
50596be0
DS
1862}
1863
d62a17ae 1864void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1865{
00dffa8c 1866 frr_with_mutex(&thread->mtx) {
d62a17ae 1867 thread->yield = yield_time;
1868 }
718e3744 1869}
1870
d62a17ae 1871void thread_getrusage(RUSAGE_T *r)
db9c0df9 1872{
6418e2d3
DL
1873 monotime(&r->real);
1874 if (!cputime_enabled) {
1875 memset(&r->cpu, 0, sizeof(r->cpu));
1876 return;
1877 }
1878
1879#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1880 /* not currently implemented in Linux's vDSO, but maybe at some point
1881 * in the future?
1882 */
1883 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1884#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1885#if defined RUSAGE_THREAD
1886#define FRR_RUSAGE RUSAGE_THREAD
1887#else
1888#define FRR_RUSAGE RUSAGE_SELF
1889#endif
6418e2d3
DL
1890 getrusage(FRR_RUSAGE, &(r->cpu));
1891#endif
db9c0df9
PJ
1892}
1893
fbcac826
QY
1894/*
1895 * Call a thread.
1896 *
1897 * This function will atomically update the thread's usage history. At present
1898 * this is the only spot where usage history is written. Nevertheless the code
1899 * has been written such that the introduction of writers in the future should
1900 * not need to update it provided the writers atomically perform only the
1901 * operations done here, i.e. updating the total and maximum times. In
1902 * particular, the maximum real and cpu times must be monotonically increasing
1903 * or this code is not correct.
1904 */
d62a17ae 1905void thread_call(struct thread *thread)
718e3744 1906{
d62a17ae 1907 RUSAGE_T before, after;
cc8b13a0 1908
45f01188
DL
1909 /* if the thread being called is the CLI, it may change cputime_enabled
1910 * ("service cputime-stats" command), which can result in nonsensical
1911 * and very confusing warnings
1912 */
1913 bool cputime_enabled_here = cputime_enabled;
1914
5e822957
DS
1915 if (thread->master->ready_run_loop)
1916 before = thread->master->last_getrusage;
1917 else
1918 GETRUSAGE(&before);
1919
d62a17ae 1920 thread->real = before.real;
718e3744 1921
6c3aa850
DL
1922 frrtrace(9, frr_libfrr, thread_call, thread->master,
1923 thread->xref->funcname, thread->xref->xref.file,
1924 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1925 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 1926
d62a17ae 1927 pthread_setspecific(thread_current, thread);
1928 (*thread->func)(thread);
1929 pthread_setspecific(thread_current, NULL);
718e3744 1930
d62a17ae 1931 GETRUSAGE(&after);
5e822957 1932 thread->master->last_getrusage = after;
718e3744 1933
45f01188
DL
1934 unsigned long walltime, cputime;
1935 unsigned long exp;
fbcac826 1936
45f01188
DL
1937 walltime = thread_consumed_time(&after, &before, &cputime);
1938
1939 /* update walltime */
1940 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
1941 memory_order_seq_cst);
1942 exp = atomic_load_explicit(&thread->hist->real.max,
1943 memory_order_seq_cst);
45f01188 1944 while (exp < walltime
fbcac826 1945 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
1946 &thread->hist->real.max, &exp, walltime,
1947 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
1948 ;
1949
45f01188
DL
1950 if (cputime_enabled_here && cputime_enabled) {
1951 /* update cputime */
1952 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
1953 memory_order_seq_cst);
1954 exp = atomic_load_explicit(&thread->hist->cpu.max,
1955 memory_order_seq_cst);
1956 while (exp < cputime
1957 && !atomic_compare_exchange_weak_explicit(
1958 &thread->hist->cpu.max, &exp, cputime,
1959 memory_order_seq_cst, memory_order_seq_cst))
1960 ;
1961 }
fbcac826
QY
1962
1963 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
1964 memory_order_seq_cst);
1965 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
1966 memory_order_seq_cst);
718e3744 1967
45f01188
DL
1968 if (cputime_enabled_here && cputime_enabled && cputime_threshold
1969 && cputime > cputime_threshold) {
d62a17ae 1970 /*
45f01188
DL
1971 * We have a CPU Hog on our hands. The time FRR has spent
1972 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 1973 * Whinge about it now, so we're aware this is yet another task
1974 * to fix.
1975 */
9b8e01ca
DS
1976 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
1977 1, memory_order_seq_cst);
9ef9495e 1978 flog_warn(
039d547f
DS
1979 EC_LIB_SLOW_THREAD_CPU,
1980 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1981 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
1982 walltime / 1000, cputime / 1000);
1983
1984 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 1985 /*
45f01188
DL
1986 * The runtime for a task is greater than 5 seconds, but the
1987 * cpu time is under 5 seconds. Let's whine about this because
1988 * this could imply some sort of scheduling issue.
039d547f 1989 */
9b8e01ca
DS
1990 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
1991 1, memory_order_seq_cst);
039d547f
DS
1992 flog_warn(
1993 EC_LIB_SLOW_THREAD_WALL,
1994 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 1995 thread->xref->funcname, (unsigned long)thread->func,
45f01188 1996 walltime / 1000, cputime / 1000);
d62a17ae 1997 }
718e3744 1998}
1999
2000/* Execute thread */
60a3efec
DL
2001void _thread_execute(const struct xref_threadsched *xref,
2002 struct thread_master *m, int (*func)(struct thread *),
2003 void *arg, int val)
718e3744 2004{
c4345fbf 2005 struct thread *thread;
718e3744 2006
c4345fbf 2007 /* Get or allocate new thread to execute. */
00dffa8c 2008 frr_with_mutex(&m->mtx) {
60a3efec 2009 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2010
c4345fbf 2011 /* Set its event value. */
00dffa8c 2012 frr_with_mutex(&thread->mtx) {
c4345fbf
RZ
2013 thread->add_type = THREAD_EXECUTE;
2014 thread->u.val = val;
2015 thread->ref = &thread;
2016 }
c4345fbf 2017 }
f7c62e11 2018
c4345fbf
RZ
2019 /* Execute thread doing all accounting. */
2020 thread_call(thread);
9c7753e4 2021
c4345fbf
RZ
2022 /* Give back or free thread. */
2023 thread_add_unuse(m, thread);
718e3744 2024}
1543c387
MS
2025
2026/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2027void debug_signals(const sigset_t *sigs)
2028{
2029 int i, found;
2030 sigset_t tmpsigs;
2031 char buf[300];
2032
2033 /*
2034 * We're only looking at the non-realtime signals here, so we need
2035 * some limit value. Platform differences mean at some point we just
2036 * need to pick a reasonable value.
2037 */
2038#if defined SIGRTMIN
2039# define LAST_SIGNAL SIGRTMIN
2040#else
2041# define LAST_SIGNAL 32
2042#endif
2043
2044
2045 if (sigs == NULL) {
2046 sigemptyset(&tmpsigs);
2047 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2048 sigs = &tmpsigs;
2049 }
2050
2051 found = 0;
2052 buf[0] = '\0';
2053
2054 for (i = 0; i < LAST_SIGNAL; i++) {
2055 char tmp[20];
2056
2057 if (sigismember(sigs, i) > 0) {
2058 if (found > 0)
2059 strlcat(buf, ",", sizeof(buf));
2060 snprintf(tmp, sizeof(tmp), "%d", i);
2061 strlcat(buf, tmp, sizeof(buf));
2062 found++;
2063 }
2064 }
2065
2066 if (found == 0)
2067 snprintf(buf, sizeof(buf), "<none>");
2068
2069 zlog_debug("%s: %s", __func__, buf);
2070}
a505383d
DS
2071
2072bool thread_is_scheduled(struct thread *thread)
2073{
2074 if (thread == NULL)
2075 return false;
2076
2077 return true;
2078}
f59e6882
DL
2079
2080static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2081 const struct thread *thread)
2082{
2083 static const char * const types[] = {
2084 [THREAD_READ] = "read",
2085 [THREAD_WRITE] = "write",
2086 [THREAD_TIMER] = "timer",
2087 [THREAD_EVENT] = "event",
2088 [THREAD_READY] = "ready",
2089 [THREAD_UNUSED] = "unused",
2090 [THREAD_EXECUTE] = "exec",
2091 };
2092 ssize_t rv = 0;
2093 char info[16] = "";
2094
2095 if (!thread)
2096 return bputs(buf, "{(thread *)NULL}");
2097
2098 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2099
2100 if (thread->type < array_size(types) && types[thread->type])
2101 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2102 else
2103 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2104
2105 switch (thread->type) {
2106 case THREAD_READ:
2107 case THREAD_WRITE:
2108 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2109 break;
2110
2111 case THREAD_TIMER:
2112 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2113 break;
2114 }
2115
2116 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2117 thread->xref->funcname, thread->xref->dest,
2118 thread->xref->xref.file, thread->xref->xref.line);
2119 return rv;
2120}
2121
54929fd3 2122printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2123static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2124 const void *ptr)
2125{
2126 const struct thread *thread = ptr;
2127 struct timespec remain = {};
2128
2129 if (ea->fmt[0] == 'D') {
2130 ea->fmt++;
2131 return printfrr_thread_dbg(buf, ea, thread);
2132 }
2133
2134 if (!thread) {
2135 /* need to jump over time formatting flag characters in the
2136 * input format string, i.e. adjust ea->fmt!
2137 */
2138 printfrr_time(buf, ea, &remain,
2139 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2140 return bputch(buf, '-');
2141 }
2142
2143 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2144 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2145}