]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
Merge pull request #11848 from rgirada/ospf_gr_nbr
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188
DL
105#ifndef VTYSH_EXTRACT_PL
106#include "lib/thread_clippy.c"
107#endif
108
d8b87afe 109static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 110{
883cc51d 111 int size = sizeof(a->func);
bd74dc61
DS
112
113 return jhash(&a->func, size, 0);
e04ab74d 114}
115
74df8d6d 116static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 117 const struct cpu_thread_history *b)
e04ab74d 118{
d62a17ae 119 return a->func == b->func;
e04ab74d 120}
121
d62a17ae 122static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 123{
d62a17ae 124 struct cpu_thread_history *new;
125 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
126 new->func = a->func;
127 new->funcname = a->funcname;
128 return new;
e04ab74d 129}
130
d62a17ae 131static void cpu_record_hash_free(void *a)
228da428 132{
d62a17ae 133 struct cpu_thread_history *hist = a;
134
135 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
136}
137
d62a17ae 138static void vty_out_cpu_thread_history(struct vty *vty,
139 struct cpu_thread_history *a)
e04ab74d 140{
1dd08c22
DS
141 vty_out(vty,
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 143 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
144 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
145 (a->real.total / a->total_calls), a->real.max,
1dd08c22 146 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 147 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 148 a->types & (1 << THREAD_READ) ? 'R' : ' ',
149 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
150 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
151 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
152 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 153}
154
e3b78da8 155static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 156{
d62a17ae 157 struct cpu_thread_history *totals = args[0];
fbcac826 158 struct cpu_thread_history copy;
d62a17ae 159 struct vty *vty = args[1];
fbcac826 160 uint8_t *filter = args[2];
d62a17ae 161
162 struct cpu_thread_history *a = bucket->data;
163
fbcac826
QY
164 copy.total_active =
165 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
166 copy.total_calls =
167 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
168 copy.total_cpu_warn =
169 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
170 copy.total_wall_warn =
171 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
172 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
173 memory_order_seq_cst);
fbcac826
QY
174 copy.cpu.total =
175 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
176 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
177 copy.real.total =
178 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
179 copy.real.max =
180 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
181 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
182 copy.funcname = a->funcname;
183
184 if (!(copy.types & *filter))
d62a17ae 185 return;
fbcac826
QY
186
187 vty_out_cpu_thread_history(vty, &copy);
188 totals->total_active += copy.total_active;
189 totals->total_calls += copy.total_calls;
9b8e01ca
DS
190 totals->total_cpu_warn += copy.total_cpu_warn;
191 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 192 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
193 totals->real.total += copy.real.total;
194 if (totals->real.max < copy.real.max)
195 totals->real.max = copy.real.max;
196 totals->cpu.total += copy.cpu.total;
197 if (totals->cpu.max < copy.cpu.max)
198 totals->cpu.max = copy.cpu.max;
e04ab74d 199}
200
fbcac826 201static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 202{
d62a17ae 203 struct cpu_thread_history tmp;
204 void *args[3] = {&tmp, vty, &filter};
205 struct thread_master *m;
206 struct listnode *ln;
207
45f01188
DL
208 if (!cputime_enabled)
209 vty_out(vty,
210 "\n"
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
215
0d6f7fd6 216 memset(&tmp, 0, sizeof(tmp));
d62a17ae 217 tmp.funcname = "TOTAL";
218 tmp.types = filter;
219
cb1991af 220 frr_with_mutex (&masters_mtx) {
d62a17ae 221 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
222 const char *name = m->name ? m->name : "main";
223
224 char underline[strlen(name) + 1];
225 memset(underline, '-', sizeof(underline));
4f113d60 226 underline[sizeof(underline) - 1] = '\0';
d62a17ae 227
228 vty_out(vty, "\n");
229 vty_out(vty, "Showing statistics for pthread %s\n",
230 name);
231 vty_out(vty, "-------------------------------%s\n",
232 underline);
84d951d0 233 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 234 "CPU (user+system):", "Real (wall-clock):");
235 vty_out(vty,
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
238 vty_out(vty,
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 240
241 if (m->cpu_record->count)
242 hash_iterate(
243 m->cpu_record,
e3b78da8 244 (void (*)(struct hash_bucket *,
d62a17ae 245 void *))cpu_record_hash_print,
246 args);
247 else
248 vty_out(vty, "No data to display yet.\n");
249
250 vty_out(vty, "\n");
251 }
252 }
d62a17ae 253
254 vty_out(vty, "\n");
255 vty_out(vty, "Total thread statistics\n");
256 vty_out(vty, "-------------------------\n");
84d951d0 257 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 260 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 261 vty_out(vty, " Type Thread\n");
262
263 if (tmp.total_calls > 0)
264 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 265}
266
e3b78da8 267static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 268{
fbcac826 269 uint8_t *filter = args[0];
d62a17ae 270 struct hash *cpu_record = args[1];
271
272 struct cpu_thread_history *a = bucket->data;
62f44022 273
d62a17ae 274 if (!(a->types & *filter))
275 return;
f48f65d2 276
d62a17ae 277 hash_release(cpu_record, bucket->data);
e276eb82
PJ
278}
279
fbcac826 280static void cpu_record_clear(uint8_t filter)
e276eb82 281{
fbcac826 282 uint8_t *tmp = &filter;
d62a17ae 283 struct thread_master *m;
284 struct listnode *ln;
285
cb1991af 286 frr_with_mutex (&masters_mtx) {
d62a17ae 287 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
cb1991af 288 frr_with_mutex (&m->mtx) {
d62a17ae 289 void *args[2] = {tmp, m->cpu_record};
290 hash_iterate(
291 m->cpu_record,
e3b78da8 292 (void (*)(struct hash_bucket *,
d62a17ae 293 void *))cpu_record_hash_clear,
294 args);
295 }
d62a17ae 296 }
297 }
62f44022
QY
298}
299
fbcac826 300static uint8_t parse_filter(const char *filterstr)
62f44022 301{
d62a17ae 302 int i = 0;
303 int filter = 0;
304
305 while (filterstr[i] != '\0') {
306 switch (filterstr[i]) {
307 case 'r':
308 case 'R':
309 filter |= (1 << THREAD_READ);
310 break;
311 case 'w':
312 case 'W':
313 filter |= (1 << THREAD_WRITE);
314 break;
315 case 't':
316 case 'T':
317 filter |= (1 << THREAD_TIMER);
318 break;
319 case 'e':
320 case 'E':
321 filter |= (1 << THREAD_EVENT);
322 break;
323 case 'x':
324 case 'X':
325 filter |= (1 << THREAD_EXECUTE);
326 break;
327 default:
328 break;
329 }
330 ++i;
331 }
332 return filter;
62f44022
QY
333}
334
ee4dcee8
DL
335DEFUN_NOSH (show_thread_cpu,
336 show_thread_cpu_cmd,
337 "show thread cpu [FILTER]",
338 SHOW_STR
339 "Thread information\n"
340 "Thread CPU usage\n"
341 "Display filter (rwtex)\n")
62f44022 342{
fbcac826 343 uint8_t filter = (uint8_t)-1U;
d62a17ae 344 int idx = 0;
345
346 if (argv_find(argv, argc, "FILTER", &idx)) {
347 filter = parse_filter(argv[idx]->arg);
348 if (!filter) {
349 vty_out(vty,
3efd0893 350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 351 argv[idx]->arg);
352 return CMD_WARNING;
353 }
354 }
355
356 cpu_record_print(vty, filter);
357 return CMD_SUCCESS;
e276eb82 358}
45f01188
DL
359
360DEFPY (service_cputime_stats,
361 service_cputime_stats_cmd,
362 "[no] service cputime-stats",
363 NO_STR
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
366{
367 cputime_enabled = !no;
368 return CMD_SUCCESS;
369}
370
371DEFPY (service_cputime_warning,
372 service_cputime_warning_cmd,
373 "[no] service cputime-warning (1-4294967295)",
374 NO_STR
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
378{
379 if (no)
380 cputime_threshold = 0;
381 else
382 cputime_threshold = cputime_warning * 1000;
383 return CMD_SUCCESS;
384}
385
386ALIAS (service_cputime_warning,
387 no_service_cputime_warning_cmd,
388 "no service cputime-warning",
389 NO_STR
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
392
393DEFPY (service_walltime_warning,
394 service_walltime_warning_cmd,
395 "[no] service walltime-warning (1-4294967295)",
396 NO_STR
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
400{
401 if (no)
402 walltime_threshold = 0;
403 else
404 walltime_threshold = walltime_warning * 1000;
405 return CMD_SUCCESS;
406}
407
408ALIAS (service_walltime_warning,
409 no_service_walltime_warning_cmd,
410 "no service walltime-warning",
411 NO_STR
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 414
8872626b
DS
415static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
416{
417 const char *name = m->name ? m->name : "main";
418 char underline[strlen(name) + 1];
a0b36ae6 419 struct thread *thread;
8872626b
DS
420 uint32_t i;
421
422 memset(underline, '-', sizeof(underline));
423 underline[sizeof(underline) - 1] = '\0';
424
425 vty_out(vty, "\nShowing poll FD's for %s\n", name);
426 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
427 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
428 m->fd_limit);
a0b36ae6
DS
429 for (i = 0; i < m->handler.pfdcount; i++) {
430 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
431 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 432 m->handler.pfds[i].revents);
a0b36ae6
DS
433
434 if (m->handler.pfds[i].events & POLLIN) {
435 thread = m->read[m->handler.pfds[i].fd];
436
437 if (!thread)
438 vty_out(vty, "ERROR ");
439 else
60a3efec 440 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
441 } else
442 vty_out(vty, " ");
443
444 if (m->handler.pfds[i].events & POLLOUT) {
445 thread = m->write[m->handler.pfds[i].fd];
446
447 if (!thread)
448 vty_out(vty, "ERROR\n");
449 else
60a3efec 450 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
451 } else
452 vty_out(vty, "\n");
453 }
8872626b
DS
454}
455
ee4dcee8
DL
456DEFUN_NOSH (show_thread_poll,
457 show_thread_poll_cmd,
458 "show thread poll",
459 SHOW_STR
460 "Thread information\n"
461 "Show poll FD's and information\n")
8872626b
DS
462{
463 struct listnode *node;
464 struct thread_master *m;
465
cb1991af 466 frr_with_mutex (&masters_mtx) {
8872626b
DS
467 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
468 show_thread_poll_helper(vty, m);
469 }
470 }
8872626b
DS
471
472 return CMD_SUCCESS;
473}
474
475
49d41a26
DS
476DEFUN (clear_thread_cpu,
477 clear_thread_cpu_cmd,
478 "clear thread cpu [FILTER]",
62f44022 479 "Clear stored data in all pthreads\n"
49d41a26
DS
480 "Thread information\n"
481 "Thread CPU usage\n"
482 "Display filter (rwtexb)\n")
e276eb82 483{
fbcac826 484 uint8_t filter = (uint8_t)-1U;
d62a17ae 485 int idx = 0;
486
487 if (argv_find(argv, argc, "FILTER", &idx)) {
488 filter = parse_filter(argv[idx]->arg);
489 if (!filter) {
490 vty_out(vty,
3efd0893 491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 492 argv[idx]->arg);
493 return CMD_WARNING;
494 }
495 }
496
497 cpu_record_clear(filter);
498 return CMD_SUCCESS;
e276eb82 499}
6b0655a2 500
22f31b8c
DS
501static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
502{
503 const char *name = m->name ? m->name : "main";
504 char underline[strlen(name) + 1];
505 struct thread *thread;
506
507 memset(underline, '-', sizeof(underline));
508 underline[sizeof(underline) - 1] = '\0';
509
510 vty_out(vty, "\nShowing timers for %s\n", name);
511 vty_out(vty, "-------------------%s\n", underline);
512
513 frr_each (thread_timer_list, &m->timer, thread) {
514 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
515 }
516}
517
518DEFPY_NOSH (show_thread_timers,
519 show_thread_timers_cmd,
520 "show thread timers",
521 SHOW_STR
522 "Thread information\n"
523 "Show all timers and how long they have in the system\n")
524{
525 struct listnode *node;
526 struct thread_master *m;
527
528 frr_with_mutex (&masters_mtx) {
529 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
530 show_thread_timers_helper(vty, m);
531 }
532
533 return CMD_SUCCESS;
534}
535
d62a17ae 536void thread_cmd_init(void)
0b84f294 537{
d62a17ae 538 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 539 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 540 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
541
542 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
543 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
544 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
545 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
546 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
547
548 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 549}
62f44022
QY
550/* CLI end ------------------------------------------------------------------ */
551
0b84f294 552
d62a17ae 553static void cancelreq_del(void *cr)
63ccb9cb 554{
d62a17ae 555 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
556}
557
e0bebc7c 558/* initializer, only ever called once */
4d762f26 559static void initializer(void)
e0bebc7c 560{
d62a17ae 561 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
562}
563
d62a17ae 564struct thread_master *thread_master_create(const char *name)
718e3744 565{
d62a17ae 566 struct thread_master *rv;
567 struct rlimit limit;
568
569 pthread_once(&init_once, &initializer);
570
571 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 572
573 /* Initialize master mutex */
574 pthread_mutex_init(&rv->mtx, NULL);
575 pthread_cond_init(&rv->cancel_cond, NULL);
576
577 /* Set name */
7ffcd8bd
QY
578 name = name ? name : "default";
579 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 580
581 /* Initialize I/O task data structures */
1a9f340b
MS
582
583 /* Use configured limit if present, ulimit otherwise. */
584 rv->fd_limit = frr_get_fd_limit();
585 if (rv->fd_limit == 0) {
586 getrlimit(RLIMIT_NOFILE, &limit);
587 rv->fd_limit = (int)limit.rlim_cur;
588 }
589
a6f235f3
DS
590 rv->read = XCALLOC(MTYPE_THREAD_POLL,
591 sizeof(struct thread *) * rv->fd_limit);
592
593 rv->write = XCALLOC(MTYPE_THREAD_POLL,
594 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 595
7ffcd8bd
QY
596 char tmhashname[strlen(name) + 32];
597 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
598 name);
bd74dc61 599 rv->cpu_record = hash_create_size(
d8b87afe 600 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 601 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 602 tmhashname);
d62a17ae 603
c284542b
DL
604 thread_list_init(&rv->event);
605 thread_list_init(&rv->ready);
606 thread_list_init(&rv->unuse);
27d29ced 607 thread_timer_list_init(&rv->timer);
d62a17ae 608
609 /* Initialize thread_fetch() settings */
610 rv->spin = true;
611 rv->handle_signals = true;
612
613 /* Set pthread owner, should be updated by actual owner */
614 rv->owner = pthread_self();
615 rv->cancel_req = list_new();
616 rv->cancel_req->del = cancelreq_del;
617 rv->canceled = true;
618
619 /* Initialize pipe poker */
620 pipe(rv->io_pipe);
621 set_nonblocking(rv->io_pipe[0]);
622 set_nonblocking(rv->io_pipe[1]);
623
624 /* Initialize data structures for poll() */
625 rv->handler.pfdsize = rv->fd_limit;
626 rv->handler.pfdcount = 0;
627 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
628 sizeof(struct pollfd) * rv->handler.pfdsize);
629 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
630 sizeof(struct pollfd) * rv->handler.pfdsize);
631
eff09c66 632 /* add to list of threadmasters */
cb1991af 633 frr_with_mutex (&masters_mtx) {
eff09c66
QY
634 if (!masters)
635 masters = list_new();
636
d62a17ae 637 listnode_add(masters, rv);
638 }
d62a17ae 639
640 return rv;
718e3744 641}
642
d8a8a8de
QY
643void thread_master_set_name(struct thread_master *master, const char *name)
644{
cb1991af 645 frr_with_mutex (&master->mtx) {
0a22ddfb 646 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
647 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
648 }
d8a8a8de
QY
649}
650
6ed04aa2
DS
651#define THREAD_UNUSED_DEPTH 10
652
718e3744 653/* Move thread to unuse list. */
d62a17ae 654static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 655{
6655966d
RZ
656 pthread_mutex_t mtxc = thread->mtx;
657
d62a17ae 658 assert(m != NULL && thread != NULL);
d62a17ae 659
d62a17ae 660 thread->hist->total_active--;
6ed04aa2
DS
661 memset(thread, 0, sizeof(struct thread));
662 thread->type = THREAD_UNUSED;
663
6655966d
RZ
664 /* Restore the thread mutex context. */
665 thread->mtx = mtxc;
666
c284542b
DL
667 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
668 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
669 return;
670 }
671
672 thread_free(m, thread);
718e3744 673}
674
675/* Free all unused thread. */
c284542b
DL
676static void thread_list_free(struct thread_master *m,
677 struct thread_list_head *list)
718e3744 678{
d62a17ae 679 struct thread *t;
d62a17ae 680
c284542b 681 while ((t = thread_list_pop(list)))
6655966d 682 thread_free(m, t);
718e3744 683}
684
d62a17ae 685static void thread_array_free(struct thread_master *m,
686 struct thread **thread_array)
308d14ae 687{
d62a17ae 688 struct thread *t;
689 int index;
690
691 for (index = 0; index < m->fd_limit; ++index) {
692 t = thread_array[index];
693 if (t) {
694 thread_array[index] = NULL;
6655966d 695 thread_free(m, t);
d62a17ae 696 }
697 }
a6f235f3 698 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
699}
700
495f0b13
DS
701/*
702 * thread_master_free_unused
703 *
704 * As threads are finished with they are put on the
705 * unuse list for later reuse.
706 * If we are shutting down, Free up unused threads
707 * So we can see if we forget to shut anything off
708 */
d62a17ae 709void thread_master_free_unused(struct thread_master *m)
495f0b13 710{
cb1991af 711 frr_with_mutex (&m->mtx) {
d62a17ae 712 struct thread *t;
c284542b 713 while ((t = thread_list_pop(&m->unuse)))
6655966d 714 thread_free(m, t);
d62a17ae 715 }
495f0b13
DS
716}
717
718e3744 718/* Stop thread scheduler. */
d62a17ae 719void thread_master_free(struct thread_master *m)
718e3744 720{
27d29ced
DL
721 struct thread *t;
722
cb1991af 723 frr_with_mutex (&masters_mtx) {
d62a17ae 724 listnode_delete(masters, m);
eff09c66 725 if (masters->count == 0) {
6a154c88 726 list_delete(&masters);
eff09c66 727 }
d62a17ae 728 }
d62a17ae 729
730 thread_array_free(m, m->read);
731 thread_array_free(m, m->write);
27d29ced
DL
732 while ((t = thread_timer_list_pop(&m->timer)))
733 thread_free(m, t);
d62a17ae 734 thread_list_free(m, &m->event);
735 thread_list_free(m, &m->ready);
736 thread_list_free(m, &m->unuse);
737 pthread_mutex_destroy(&m->mtx);
33844bbe 738 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 739 close(m->io_pipe[0]);
740 close(m->io_pipe[1]);
6a154c88 741 list_delete(&m->cancel_req);
1a0a92ea 742 m->cancel_req = NULL;
d62a17ae 743
744 hash_clean(m->cpu_record, cpu_record_hash_free);
745 hash_free(m->cpu_record);
746 m->cpu_record = NULL;
747
0a22ddfb 748 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 749 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
750 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
751 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 752}
753
94202742 754/* Return remain time in milliseconds. */
78ca0342 755unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 756{
d62a17ae 757 int64_t remain;
1189d95f 758
13bcc010
DA
759 if (!thread_is_scheduled(thread))
760 return 0;
761
cb1991af 762 frr_with_mutex (&thread->mtx) {
78ca0342 763 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 764 }
1189d95f 765
d62a17ae 766 return remain < 0 ? 0 : remain;
718e3744 767}
768
78ca0342
CF
769/* Return remain time in seconds. */
770unsigned long thread_timer_remain_second(struct thread *thread)
771{
772 return thread_timer_remain_msec(thread) / 1000LL;
773}
774
d62a17ae 775struct timeval thread_timer_remain(struct thread *thread)
6ac44687 776{
d62a17ae 777 struct timeval remain;
cb1991af 778 frr_with_mutex (&thread->mtx) {
d62a17ae 779 monotime_until(&thread->u.sands, &remain);
780 }
d62a17ae 781 return remain;
6ac44687
CF
782}
783
0447957e
AK
784static int time_hhmmss(char *buf, int buf_size, long sec)
785{
786 long hh;
787 long mm;
788 int wr;
789
642ac49d 790 assert(buf_size >= 8);
0447957e
AK
791
792 hh = sec / 3600;
793 sec %= 3600;
794 mm = sec / 60;
795 sec %= 60;
796
797 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
798
799 return wr != 8;
800}
801
802char *thread_timer_to_hhmmss(char *buf, int buf_size,
803 struct thread *t_timer)
804{
805 if (t_timer) {
806 time_hhmmss(buf, buf_size,
807 thread_timer_remain_second(t_timer));
808 } else {
809 snprintf(buf, buf_size, "--:--:--");
810 }
811 return buf;
812}
813
718e3744 814/* Get new thread. */
d7c0a89a 815static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 816 void (*func)(struct thread *), void *arg,
60a3efec 817 const struct xref_threadsched *xref)
718e3744 818{
c284542b 819 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 820 struct cpu_thread_history tmp;
821
822 if (!thread) {
823 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
824 /* mutex only needs to be initialized at struct creation. */
825 pthread_mutex_init(&thread->mtx, NULL);
826 m->alloc++;
827 }
828
829 thread->type = type;
830 thread->add_type = type;
831 thread->master = m;
832 thread->arg = arg;
d62a17ae 833 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
834 thread->ref = NULL;
e8b3a2f7 835 thread->ignore_timer_late = false;
d62a17ae 836
837 /*
838 * So if the passed in funcname is not what we have
839 * stored that means the thread->hist needs to be
840 * updated. We keep the last one around in unused
841 * under the assumption that we are probably
842 * going to immediately allocate the same
843 * type of thread.
844 * This hopefully saves us some serious
845 * hash_get lookups.
846 */
60a3efec
DL
847 if ((thread->xref && thread->xref->funcname != xref->funcname)
848 || thread->func != func) {
d62a17ae 849 tmp.func = func;
60a3efec 850 tmp.funcname = xref->funcname;
d62a17ae 851 thread->hist =
852 hash_get(m->cpu_record, &tmp,
853 (void *(*)(void *))cpu_record_hash_alloc);
854 }
855 thread->hist->total_active++;
856 thread->func = func;
60a3efec 857 thread->xref = xref;
d62a17ae 858
859 return thread;
718e3744 860}
861
6655966d
RZ
862static void thread_free(struct thread_master *master, struct thread *thread)
863{
864 /* Update statistics. */
865 assert(master->alloc > 0);
866 master->alloc--;
867
868 /* Free allocated resources. */
869 pthread_mutex_destroy(&thread->mtx);
870 XFREE(MTYPE_THREAD, thread);
871}
872
d81ca9a3
MS
873static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
874 bool *eintr_p)
209a72a6 875{
d81ca9a3
MS
876 sigset_t origsigs;
877 unsigned char trash[64];
878 nfds_t count = m->handler.copycount;
879
d279ef57
DS
880 /*
881 * If timer_wait is null here, that means poll() should block
882 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 883 * ->selectpoll_timeout.
d279ef57 884 *
d62a17ae 885 * If the value is positive, it specifies the maximum number of
d279ef57
DS
886 * milliseconds to wait. If the timeout is -1, it specifies that
887 * we should never wait and always return immediately even if no
888 * event is detected. If the value is zero, the behavior is default.
889 */
d62a17ae 890 int timeout = -1;
891
892 /* number of file descriptors with events */
893 int num;
894
895 if (timer_wait != NULL
896 && m->selectpoll_timeout == 0) // use the default value
897 timeout = (timer_wait->tv_sec * 1000)
898 + (timer_wait->tv_usec / 1000);
899 else if (m->selectpoll_timeout > 0) // use the user's timeout
900 timeout = m->selectpoll_timeout;
901 else if (m->selectpoll_timeout
902 < 0) // effect a poll (return immediately)
903 timeout = 0;
904
0bdeb5e5 905 zlog_tls_buffer_flush();
3e41733f
DL
906 rcu_read_unlock();
907 rcu_assert_read_unlocked();
908
d62a17ae 909 /* add poll pipe poker */
d81ca9a3
MS
910 assert(count + 1 < m->handler.pfdsize);
911 m->handler.copy[count].fd = m->io_pipe[0];
912 m->handler.copy[count].events = POLLIN;
913 m->handler.copy[count].revents = 0x00;
914
915 /* We need to deal with a signal-handling race here: we
916 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
917 * that may arrive just before we enter poll(). We will block the
918 * key signals, then check whether any have arrived - if so, we return
919 * before calling poll(). If not, we'll re-enable the signals
920 * in the ppoll() call.
921 */
922
923 sigemptyset(&origsigs);
924 if (m->handle_signals) {
925 /* Main pthread that handles the app signals */
926 if (frr_sigevent_check(&origsigs)) {
927 /* Signal to process - restore signal mask and return */
928 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
929 num = -1;
930 *eintr_p = true;
931 goto done;
932 }
933 } else {
934 /* Don't make any changes for the non-main pthreads */
935 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
936 }
d62a17ae 937
d81ca9a3
MS
938#if defined(HAVE_PPOLL)
939 struct timespec ts, *tsp;
940
941 if (timeout >= 0) {
942 ts.tv_sec = timeout / 1000;
943 ts.tv_nsec = (timeout % 1000) * 1000000;
944 tsp = &ts;
945 } else
946 tsp = NULL;
947
948 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
949 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
950#else
951 /* Not ideal - there is a race after we restore the signal mask */
952 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
953 num = poll(m->handler.copy, count + 1, timeout);
954#endif
d62a17ae 955
d81ca9a3
MS
956done:
957
958 if (num < 0 && errno == EINTR)
959 *eintr_p = true;
960
961 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 962 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
963 ;
964
3e41733f
DL
965 rcu_read_lock();
966
d62a17ae 967 return num;
209a72a6
DS
968}
969
718e3744 970/* Add new read thread. */
ee1455dd
IR
971void _thread_add_read_write(const struct xref_threadsched *xref,
972 struct thread_master *m,
cc9f21da 973 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 974 struct thread **t_ptr)
718e3744 975{
60a3efec 976 int dir = xref->thread_type;
d62a17ae 977 struct thread *thread = NULL;
1ef14bee 978 struct thread **thread_array;
d62a17ae 979
abf96a87 980 if (dir == THREAD_READ)
6c3aa850
DL
981 frrtrace(9, frr_libfrr, schedule_read, m,
982 xref->funcname, xref->xref.file, xref->xref.line,
983 t_ptr, fd, 0, arg, 0);
abf96a87 984 else
6c3aa850
DL
985 frrtrace(9, frr_libfrr, schedule_write, m,
986 xref->funcname, xref->xref.file, xref->xref.line,
987 t_ptr, fd, 0, arg, 0);
abf96a87 988
188acbb9
DS
989 assert(fd >= 0);
990 if (fd >= m->fd_limit)
991 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
992
cb1991af 993 frr_with_mutex (&m->mtx) {
00dffa8c
DL
994 if (t_ptr && *t_ptr)
995 // thread is already scheduled; don't reschedule
996 break;
d62a17ae 997
998 /* default to a new pollfd */
999 nfds_t queuepos = m->handler.pfdcount;
1000
1ef14bee
DS
1001 if (dir == THREAD_READ)
1002 thread_array = m->read;
1003 else
1004 thread_array = m->write;
1005
d62a17ae 1006 /* if we already have a pollfd for our file descriptor, find and
1007 * use it */
1008 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
1009 if (m->handler.pfds[i].fd == fd) {
1010 queuepos = i;
1ef14bee
DS
1011
1012#ifdef DEV_BUILD
1013 /*
1014 * What happens if we have a thread already
1015 * created for this event?
1016 */
1017 if (thread_array[fd])
1018 assert(!"Thread already scheduled for file descriptor");
1019#endif
d62a17ae 1020 break;
1021 }
1022
1023 /* make sure we have room for this fd + pipe poker fd */
1024 assert(queuepos + 1 < m->handler.pfdsize);
1025
60a3efec 1026 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1027
1028 m->handler.pfds[queuepos].fd = fd;
1029 m->handler.pfds[queuepos].events |=
1030 (dir == THREAD_READ ? POLLIN : POLLOUT);
1031
1032 if (queuepos == m->handler.pfdcount)
1033 m->handler.pfdcount++;
1034
1035 if (thread) {
cb1991af 1036 frr_with_mutex (&thread->mtx) {
d62a17ae 1037 thread->u.fd = fd;
1ef14bee 1038 thread_array[thread->u.fd] = thread;
d62a17ae 1039 }
d62a17ae 1040
1041 if (t_ptr) {
1042 *t_ptr = thread;
1043 thread->ref = t_ptr;
1044 }
1045 }
1046
1047 AWAKEN(m);
1048 }
718e3744 1049}
1050
ee1455dd
IR
1051static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1052 struct thread_master *m,
cc9f21da 1053 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1054 struct timeval *time_relative,
1055 struct thread **t_ptr)
718e3744 1056{
d62a17ae 1057 struct thread *thread;
96fe578a 1058 struct timeval t;
d62a17ae 1059
1060 assert(m != NULL);
1061
d62a17ae 1062 assert(time_relative);
1063
6c3aa850
DL
1064 frrtrace(9, frr_libfrr, schedule_timer, m,
1065 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1066 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1067
96fe578a
MS
1068 /* Compute expiration/deadline time. */
1069 monotime(&t);
1070 timeradd(&t, time_relative, &t);
1071
cb1991af 1072 frr_with_mutex (&m->mtx) {
00dffa8c 1073 if (t_ptr && *t_ptr)
d279ef57 1074 /* thread is already scheduled; don't reschedule */
ee1455dd 1075 return;
d62a17ae 1076
4322dea7 1077 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1078
cb1991af 1079 frr_with_mutex (&thread->mtx) {
96fe578a 1080 thread->u.sands = t;
27d29ced 1081 thread_timer_list_add(&m->timer, thread);
d62a17ae 1082 if (t_ptr) {
1083 *t_ptr = thread;
1084 thread->ref = t_ptr;
1085 }
1086 }
d62a17ae 1087
96fe578a
MS
1088 /* The timer list is sorted - if this new timer
1089 * might change the time we'll wait for, give the pthread
1090 * a chance to re-compute.
1091 */
1092 if (thread_timer_list_first(&m->timer) == thread)
1093 AWAKEN(m);
d62a17ae 1094 }
e2eff5c3
DS
1095#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1096 if (time_relative->tv_sec > ONEYEAR2SEC)
1097 flog_err(
1098 EC_LIB_TIMER_TOO_LONG,
1099 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1100 thread);
9e867fe6 1101}
1102
98c91ac6 1103
1104/* Add timer event thread. */
ee1455dd 1105void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1106 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1107 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1108{
d62a17ae 1109 struct timeval trel;
9e867fe6 1110
d62a17ae 1111 assert(m != NULL);
9e867fe6 1112
d62a17ae 1113 trel.tv_sec = timer;
1114 trel.tv_usec = 0;
9e867fe6 1115
ee1455dd 1116 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1117}
9e867fe6 1118
98c91ac6 1119/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1120void _thread_add_timer_msec(const struct xref_threadsched *xref,
1121 struct thread_master *m,
cc9f21da
DS
1122 void (*func)(struct thread *), void *arg,
1123 long timer, struct thread **t_ptr)
98c91ac6 1124{
d62a17ae 1125 struct timeval trel;
9e867fe6 1126
d62a17ae 1127 assert(m != NULL);
718e3744 1128
d62a17ae 1129 trel.tv_sec = timer / 1000;
1130 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1131
ee1455dd 1132 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1133}
1134
4322dea7 1135/* Add timer event thread with "timeval" resolution */
ee1455dd 1136void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1137 struct thread_master *m,
1138 void (*func)(struct thread *), void *arg,
1139 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1140{
ee1455dd 1141 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1142}
1143
718e3744 1144/* Add simple event thread. */
ee1455dd 1145void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1146 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1147 void *arg, int val, struct thread **t_ptr)
718e3744 1148{
00dffa8c 1149 struct thread *thread = NULL;
d62a17ae 1150
6c3aa850
DL
1151 frrtrace(9, frr_libfrr, schedule_event, m,
1152 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1153 t_ptr, 0, val, arg, 0);
abf96a87 1154
d62a17ae 1155 assert(m != NULL);
1156
cb1991af 1157 frr_with_mutex (&m->mtx) {
00dffa8c 1158 if (t_ptr && *t_ptr)
d279ef57 1159 /* thread is already scheduled; don't reschedule */
00dffa8c 1160 break;
d62a17ae 1161
60a3efec 1162 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
cb1991af 1163 frr_with_mutex (&thread->mtx) {
d62a17ae 1164 thread->u.val = val;
c284542b 1165 thread_list_add_tail(&m->event, thread);
d62a17ae 1166 }
d62a17ae 1167
1168 if (t_ptr) {
1169 *t_ptr = thread;
1170 thread->ref = t_ptr;
1171 }
1172
1173 AWAKEN(m);
1174 }
718e3744 1175}
1176
63ccb9cb
QY
1177/* Thread cancellation ------------------------------------------------------ */
1178
8797240e
QY
1179/**
1180 * NOT's out the .events field of pollfd corresponding to the given file
1181 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1182 *
1183 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1184 * implementation for details.
1185 *
1186 * @param master
1187 * @param fd
1188 * @param state the event to cancel. One or more (OR'd together) of the
1189 * following:
1190 * - POLLIN
1191 * - POLLOUT
1192 */
a9318a32
MS
1193static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1194 int idx_hint)
0a95a0d0 1195{
42d74538
QY
1196 bool found = false;
1197
d62a17ae 1198 /* find the index of corresponding pollfd */
1199 nfds_t i;
1200
a9318a32
MS
1201 /* Cancel POLLHUP too just in case some bozo set it */
1202 state |= POLLHUP;
1203
1204 /* Some callers know the index of the pfd already */
1205 if (idx_hint >= 0) {
1206 i = idx_hint;
1207 found = true;
1208 } else {
1209 /* Have to look for the fd in the pfd array */
1210 for (i = 0; i < master->handler.pfdcount; i++)
1211 if (master->handler.pfds[i].fd == fd) {
1212 found = true;
1213 break;
1214 }
1215 }
42d74538
QY
1216
1217 if (!found) {
1218 zlog_debug(
1219 "[!] Received cancellation request for nonexistent rw job");
1220 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1221 master->name ? master->name : "", fd);
42d74538
QY
1222 return;
1223 }
d62a17ae 1224
1225 /* NOT out event. */
1226 master->handler.pfds[i].events &= ~(state);
1227
1228 /* If all events are canceled, delete / resize the pollfd array. */
1229 if (master->handler.pfds[i].events == 0) {
1230 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1231 (master->handler.pfdcount - i - 1)
1232 * sizeof(struct pollfd));
1233 master->handler.pfdcount--;
e985cda0
S
1234 master->handler.pfds[master->handler.pfdcount].fd = 0;
1235 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1236 }
1237
1238 /* If we have the same pollfd in the copy, perform the same operations,
1239 * otherwise return. */
1240 if (i >= master->handler.copycount)
1241 return;
1242
1243 master->handler.copy[i].events &= ~(state);
1244
1245 if (master->handler.copy[i].events == 0) {
1246 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1247 (master->handler.copycount - i - 1)
1248 * sizeof(struct pollfd));
1249 master->handler.copycount--;
e985cda0
S
1250 master->handler.copy[master->handler.copycount].fd = 0;
1251 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1252 }
0a95a0d0
DS
1253}
1254
a9318a32
MS
1255/*
1256 * Process task cancellation given a task argument: iterate through the
1257 * various lists of tasks, looking for any that match the argument.
1258 */
1259static void cancel_arg_helper(struct thread_master *master,
1260 const struct cancel_req *cr)
1261{
1262 struct thread *t;
1263 nfds_t i;
1264 int fd;
1265 struct pollfd *pfd;
1266
1267 /* We're only processing arg-based cancellations here. */
1268 if (cr->eventobj == NULL)
1269 return;
1270
1271 /* First process the ready lists. */
1272 frr_each_safe(thread_list, &master->event, t) {
1273 if (t->arg != cr->eventobj)
1274 continue;
1275 thread_list_del(&master->event, t);
1276 if (t->ref)
1277 *t->ref = NULL;
1278 thread_add_unuse(master, t);
1279 }
1280
1281 frr_each_safe(thread_list, &master->ready, t) {
1282 if (t->arg != cr->eventobj)
1283 continue;
1284 thread_list_del(&master->ready, t);
1285 if (t->ref)
1286 *t->ref = NULL;
1287 thread_add_unuse(master, t);
1288 }
1289
1290 /* If requested, stop here and ignore io and timers */
1291 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1292 return;
1293
1294 /* Check the io tasks */
1295 for (i = 0; i < master->handler.pfdcount;) {
1296 pfd = master->handler.pfds + i;
1297
1298 if (pfd->events & POLLIN)
1299 t = master->read[pfd->fd];
1300 else
1301 t = master->write[pfd->fd];
1302
1303 if (t && t->arg == cr->eventobj) {
1304 fd = pfd->fd;
1305
1306 /* Found a match to cancel: clean up fd arrays */
1307 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1308
1309 /* Clean up thread arrays */
1310 master->read[fd] = NULL;
1311 master->write[fd] = NULL;
1312
1313 /* Clear caller's ref */
1314 if (t->ref)
1315 *t->ref = NULL;
1316
1317 thread_add_unuse(master, t);
1318
1319 /* Don't increment 'i' since the cancellation will have
1320 * removed the entry from the pfd array
1321 */
1322 } else
1323 i++;
1324 }
1325
1326 /* Check the timer tasks */
1327 t = thread_timer_list_first(&master->timer);
1328 while (t) {
1329 struct thread *t_next;
1330
1331 t_next = thread_timer_list_next(&master->timer, t);
1332
1333 if (t->arg == cr->eventobj) {
1334 thread_timer_list_del(&master->timer, t);
1335 if (t->ref)
1336 *t->ref = NULL;
1337 thread_add_unuse(master, t);
1338 }
1339
1340 t = t_next;
1341 }
1342}
1343
1189d95f 1344/**
63ccb9cb 1345 * Process cancellation requests.
1189d95f 1346 *
63ccb9cb
QY
1347 * This may only be run from the pthread which owns the thread_master.
1348 *
1349 * @param master the thread master to process
1350 * @REQUIRE master->mtx
1189d95f 1351 */
d62a17ae 1352static void do_thread_cancel(struct thread_master *master)
718e3744 1353{
c284542b 1354 struct thread_list_head *list = NULL;
d62a17ae 1355 struct thread **thread_array = NULL;
1356 struct thread *thread;
1357
1358 struct cancel_req *cr;
1359 struct listnode *ln;
1360 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1361 /*
a9318a32
MS
1362 * If this is an event object cancellation, search
1363 * through task lists deleting any tasks which have the
1364 * specified argument - use this handy helper function.
d279ef57 1365 */
d62a17ae 1366 if (cr->eventobj) {
a9318a32 1367 cancel_arg_helper(master, cr);
d62a17ae 1368 continue;
1369 }
1370
d279ef57
DS
1371 /*
1372 * The pointer varies depending on whether the cancellation
1373 * request was made asynchronously or not. If it was, we
1374 * need to check whether the thread even exists anymore
1375 * before cancelling it.
1376 */
d62a17ae 1377 thread = (cr->thread) ? cr->thread : *cr->threadref;
1378
1379 if (!thread)
1380 continue;
1381
1382 /* Determine the appropriate queue to cancel the thread from */
1383 switch (thread->type) {
1384 case THREAD_READ:
a9318a32 1385 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1386 thread_array = master->read;
1387 break;
1388 case THREAD_WRITE:
a9318a32 1389 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1390 thread_array = master->write;
1391 break;
1392 case THREAD_TIMER:
27d29ced 1393 thread_timer_list_del(&master->timer, thread);
d62a17ae 1394 break;
1395 case THREAD_EVENT:
1396 list = &master->event;
1397 break;
1398 case THREAD_READY:
1399 list = &master->ready;
1400 break;
1401 default:
1402 continue;
1403 break;
1404 }
1405
27d29ced 1406 if (list) {
c284542b 1407 thread_list_del(list, thread);
d62a17ae 1408 } else if (thread_array) {
1409 thread_array[thread->u.fd] = NULL;
d62a17ae 1410 }
1411
1412 if (thread->ref)
1413 *thread->ref = NULL;
1414
1415 thread_add_unuse(thread->master, thread);
1416 }
1417
1418 /* Delete and free all cancellation requests */
41b21bfa
MS
1419 if (master->cancel_req)
1420 list_delete_all_node(master->cancel_req);
d62a17ae 1421
1422 /* Wake up any threads which may be blocked in thread_cancel_async() */
1423 master->canceled = true;
1424 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1425}
1426
a9318a32
MS
1427/*
1428 * Helper function used for multiple flavors of arg-based cancellation.
1429 */
1430static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1431{
1432 struct cancel_req *cr;
1433
1434 assert(m->owner == pthread_self());
1435
1436 /* Only worth anything if caller supplies an arg. */
1437 if (arg == NULL)
1438 return;
1439
1440 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1441
1442 cr->flags = flags;
1443
cb1991af 1444 frr_with_mutex (&m->mtx) {
a9318a32
MS
1445 cr->eventobj = arg;
1446 listnode_add(m->cancel_req, cr);
1447 do_thread_cancel(m);
1448 }
1449}
1450
63ccb9cb
QY
1451/**
1452 * Cancel any events which have the specified argument.
1453 *
1454 * MT-Unsafe
1455 *
1456 * @param m the thread_master to cancel from
1457 * @param arg the argument passed when creating the event
1458 */
d62a17ae 1459void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1460{
a9318a32
MS
1461 cancel_event_helper(master, arg, 0);
1462}
d62a17ae 1463
a9318a32
MS
1464/*
1465 * Cancel ready tasks with an arg matching 'arg'
1466 *
1467 * MT-Unsafe
1468 *
1469 * @param m the thread_master to cancel from
1470 * @param arg the argument passed when creating the event
1471 */
1472void thread_cancel_event_ready(struct thread_master *m, void *arg)
1473{
1474
1475 /* Only cancel ready/event tasks */
1476 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1477}
1189d95f 1478
63ccb9cb
QY
1479/**
1480 * Cancel a specific task.
1481 *
1482 * MT-Unsafe
1483 *
1484 * @param thread task to cancel
1485 */
b3d6bc6e 1486void thread_cancel(struct thread **thread)
63ccb9cb 1487{
b3d6bc6e
MS
1488 struct thread_master *master;
1489
1490 if (thread == NULL || *thread == NULL)
1491 return;
1492
1493 master = (*thread)->master;
d62a17ae 1494
6c3aa850
DL
1495 frrtrace(9, frr_libfrr, thread_cancel, master,
1496 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1497 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1498 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1499
6ed04aa2
DS
1500 assert(master->owner == pthread_self());
1501
cb1991af 1502 frr_with_mutex (&master->mtx) {
d62a17ae 1503 struct cancel_req *cr =
1504 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1505 cr->thread = *thread;
6ed04aa2
DS
1506 listnode_add(master->cancel_req, cr);
1507 do_thread_cancel(master);
d62a17ae 1508 }
b3d6bc6e
MS
1509
1510 *thread = NULL;
63ccb9cb 1511}
1189d95f 1512
63ccb9cb
QY
1513/**
1514 * Asynchronous cancellation.
1515 *
8797240e
QY
1516 * Called with either a struct thread ** or void * to an event argument,
1517 * this function posts the correct cancellation request and blocks until it is
1518 * serviced.
63ccb9cb
QY
1519 *
1520 * If the thread is currently running, execution blocks until it completes.
1521 *
8797240e
QY
1522 * The last two parameters are mutually exclusive, i.e. if you pass one the
1523 * other must be NULL.
1524 *
1525 * When the cancellation procedure executes on the target thread_master, the
1526 * thread * provided is checked for nullity. If it is null, the thread is
1527 * assumed to no longer exist and the cancellation request is a no-op. Thus
1528 * users of this API must pass a back-reference when scheduling the original
1529 * task.
1530 *
63ccb9cb
QY
1531 * MT-Safe
1532 *
8797240e
QY
1533 * @param master the thread master with the relevant event / task
1534 * @param thread pointer to thread to cancel
1535 * @param eventobj the event
63ccb9cb 1536 */
d62a17ae 1537void thread_cancel_async(struct thread_master *master, struct thread **thread,
1538 void *eventobj)
63ccb9cb 1539{
d62a17ae 1540 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1541
1542 if (thread && *thread)
c7bb4f00 1543 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1544 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1545 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1546 (*thread)->u.val, (*thread)->arg,
1547 (*thread)->u.sands.tv_sec);
abf96a87 1548 else
c7bb4f00
QY
1549 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1550 0, NULL, 0, 0, eventobj, 0);
abf96a87 1551
d62a17ae 1552 assert(master->owner != pthread_self());
1553
cb1991af 1554 frr_with_mutex (&master->mtx) {
d62a17ae 1555 master->canceled = false;
1556
1557 if (thread) {
1558 struct cancel_req *cr =
1559 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1560 cr->threadref = thread;
1561 listnode_add(master->cancel_req, cr);
1562 } else if (eventobj) {
1563 struct cancel_req *cr =
1564 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1565 cr->eventobj = eventobj;
1566 listnode_add(master->cancel_req, cr);
1567 }
1568 AWAKEN(master);
1569
1570 while (!master->canceled)
1571 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1572 }
50478845
MS
1573
1574 if (thread)
1575 *thread = NULL;
718e3744 1576}
63ccb9cb 1577/* ------------------------------------------------------------------------- */
718e3744 1578
27d29ced 1579static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1580 struct timeval *timer_val)
718e3744 1581{
27d29ced
DL
1582 if (!thread_timer_list_count(timers))
1583 return NULL;
1584
1585 struct thread *next_timer = thread_timer_list_first(timers);
1586 monotime_until(&next_timer->u.sands, timer_val);
1587 return timer_val;
718e3744 1588}
718e3744 1589
d62a17ae 1590static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1591 struct thread *fetch)
718e3744 1592{
d62a17ae 1593 *fetch = *thread;
1594 thread_add_unuse(m, thread);
1595 return fetch;
718e3744 1596}
1597
d62a17ae 1598static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1599 struct thread *thread, short state,
1600 short actual_state, int pos)
5d4ccd4e 1601{
d62a17ae 1602 struct thread **thread_array;
1603
45f3d590
DS
1604 /*
1605 * poll() clears the .events field, but the pollfd array we
1606 * pass to poll() is a copy of the one used to schedule threads.
1607 * We need to synchronize state between the two here by applying
1608 * the same changes poll() made on the copy of the "real" pollfd
1609 * array.
1610 *
1611 * This cleans up a possible infinite loop where we refuse
1612 * to respond to a poll event but poll is insistent that
1613 * we should.
1614 */
1615 m->handler.pfds[pos].events &= ~(state);
1616
1617 if (!thread) {
1618 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1619 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1620 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1621 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1622 return 0;
45f3d590 1623 }
d62a17ae 1624
1625 if (thread->type == THREAD_READ)
1626 thread_array = m->read;
1627 else
1628 thread_array = m->write;
1629
1630 thread_array[thread->u.fd] = NULL;
c284542b 1631 thread_list_add_tail(&m->ready, thread);
d62a17ae 1632 thread->type = THREAD_READY;
45f3d590 1633
d62a17ae 1634 return 1;
5d4ccd4e
DS
1635}
1636
8797240e
QY
1637/**
1638 * Process I/O events.
1639 *
1640 * Walks through file descriptor array looking for those pollfds whose .revents
1641 * field has something interesting. Deletes any invalid file descriptors.
1642 *
1643 * @param m the thread master
1644 * @param num the number of active file descriptors (return value of poll())
1645 */
d62a17ae 1646static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1647{
d62a17ae 1648 unsigned int ready = 0;
1649 struct pollfd *pfds = m->handler.copy;
1650
1651 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1652 /* no event for current fd? immediately continue */
1653 if (pfds[i].revents == 0)
1654 continue;
1655
1656 ready++;
1657
d279ef57
DS
1658 /*
1659 * Unless someone has called thread_cancel from another
1660 * pthread, the only thing that could have changed in
1661 * m->handler.pfds while we were asleep is the .events
1662 * field in a given pollfd. Barring thread_cancel() that
1663 * value should be a superset of the values we have in our
1664 * copy, so there's no need to update it. Similarily,
1665 * barring deletion, the fd should still be a valid index
1666 * into the master's pfds.
d142453d
DS
1667 *
1668 * We are including POLLERR here to do a READ event
1669 * this is because the read should fail and the
1670 * read function should handle it appropriately
d279ef57 1671 */
d142453d 1672 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1673 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1674 pfds[i].revents, i);
1675 }
d62a17ae 1676 if (pfds[i].revents & POLLOUT)
1677 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1678 POLLOUT, pfds[i].revents, i);
d62a17ae 1679
1680 /* if one of our file descriptors is garbage, remove the same
1681 * from
1682 * both pfds + update sizes and index */
1683 if (pfds[i].revents & POLLNVAL) {
1684 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1685 (m->handler.pfdcount - i - 1)
1686 * sizeof(struct pollfd));
1687 m->handler.pfdcount--;
e985cda0
S
1688 m->handler.pfds[m->handler.pfdcount].fd = 0;
1689 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1690
1691 memmove(pfds + i, pfds + i + 1,
1692 (m->handler.copycount - i - 1)
1693 * sizeof(struct pollfd));
1694 m->handler.copycount--;
e985cda0
S
1695 m->handler.copy[m->handler.copycount].fd = 0;
1696 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1697
1698 i--;
1699 }
1700 }
718e3744 1701}
1702
8b70d0b0 1703/* Add all timers that have popped to the ready list. */
e7d9e44b 1704static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1705 struct timeval *timenow)
a48b4e6d 1706{
ab01a001
DS
1707 struct timeval prev = *timenow;
1708 bool displayed = false;
d62a17ae 1709 struct thread *thread;
1710 unsigned int ready = 0;
1711
e7d9e44b 1712 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1713 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1714 break;
ab01a001
DS
1715 prev = thread->u.sands;
1716 prev.tv_sec += 4;
1717 /*
1718 * If the timer would have popped 4 seconds in the
1719 * past then we are in a situation where we are
1720 * really getting behind on handling of events.
1721 * Let's log it and do the right thing with it.
1722 */
1dd08c22
DS
1723 if (timercmp(timenow, &prev, >)) {
1724 atomic_fetch_add_explicit(
1725 &thread->hist->total_starv_warn, 1,
1726 memory_order_seq_cst);
1727 if (!displayed && !thread->ignore_timer_late) {
1728 flog_warn(
1729 EC_LIB_STARVE_THREAD,
1730 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1731 thread);
1732 displayed = true;
1733 }
ab01a001
DS
1734 }
1735
e7d9e44b 1736 thread_timer_list_pop(&m->timer);
d62a17ae 1737 thread->type = THREAD_READY;
e7d9e44b 1738 thread_list_add_tail(&m->ready, thread);
d62a17ae 1739 ready++;
1740 }
e7d9e44b 1741
d62a17ae 1742 return ready;
a48b4e6d 1743}
1744
2613abe6 1745/* process a list en masse, e.g. for event thread lists */
c284542b 1746static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1747{
d62a17ae 1748 struct thread *thread;
d62a17ae 1749 unsigned int ready = 0;
1750
c284542b 1751 while ((thread = thread_list_pop(list))) {
d62a17ae 1752 thread->type = THREAD_READY;
c284542b 1753 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1754 ready++;
1755 }
1756 return ready;
2613abe6
PJ
1757}
1758
1759
718e3744 1760/* Fetch next ready thread. */
d62a17ae 1761struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1762{
d62a17ae 1763 struct thread *thread = NULL;
1764 struct timeval now;
1765 struct timeval zerotime = {0, 0};
1766 struct timeval tv;
1767 struct timeval *tw = NULL;
d81ca9a3 1768 bool eintr_p = false;
d62a17ae 1769 int num = 0;
1770
1771 do {
1772 /* Handle signals if any */
1773 if (m->handle_signals)
7cc91e67 1774 frr_sigevent_process();
d62a17ae 1775
1776 pthread_mutex_lock(&m->mtx);
1777
1778 /* Process any pending cancellation requests */
1779 do_thread_cancel(m);
1780
e3c9529e
QY
1781 /*
1782 * Attempt to flush ready queue before going into poll().
1783 * This is performance-critical. Think twice before modifying.
1784 */
c284542b 1785 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1786 fetch = thread_run(m, thread, fetch);
1787 if (fetch->ref)
1788 *fetch->ref = NULL;
1789 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1790 if (!m->ready_run_loop)
1791 GETRUSAGE(&m->last_getrusage);
1792 m->ready_run_loop = true;
e3c9529e
QY
1793 break;
1794 }
1795
5e822957 1796 m->ready_run_loop = false;
e3c9529e
QY
1797 /* otherwise, tick through scheduling sequence */
1798
bca37d17
QY
1799 /*
1800 * Post events to ready queue. This must come before the
1801 * following block since events should occur immediately
1802 */
d62a17ae 1803 thread_process(&m->event);
1804
bca37d17
QY
1805 /*
1806 * If there are no tasks on the ready queue, we will poll()
1807 * until a timer expires or we receive I/O, whichever comes
1808 * first. The strategy for doing this is:
d62a17ae 1809 *
1810 * - If there are events pending, set the poll() timeout to zero
1811 * - If there are no events pending, but there are timers
d279ef57
DS
1812 * pending, set the timeout to the smallest remaining time on
1813 * any timer.
d62a17ae 1814 * - If there are neither timers nor events pending, but there
d279ef57 1815 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1816 * - If nothing is pending, it's time for the application to die
1817 *
1818 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1819 * once per loop to avoid starvation by events
1820 */
c284542b 1821 if (!thread_list_count(&m->ready))
27d29ced 1822 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1823
c284542b
DL
1824 if (thread_list_count(&m->ready) ||
1825 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1826 tw = &zerotime;
1827
1828 if (!tw && m->handler.pfdcount == 0) { /* die */
1829 pthread_mutex_unlock(&m->mtx);
1830 fetch = NULL;
1831 break;
1832 }
1833
bca37d17
QY
1834 /*
1835 * Copy pollfd array + # active pollfds in it. Not necessary to
1836 * copy the array size as this is fixed.
1837 */
d62a17ae 1838 m->handler.copycount = m->handler.pfdcount;
1839 memcpy(m->handler.copy, m->handler.pfds,
1840 m->handler.copycount * sizeof(struct pollfd));
1841
e3c9529e
QY
1842 pthread_mutex_unlock(&m->mtx);
1843 {
d81ca9a3
MS
1844 eintr_p = false;
1845 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1846 }
1847 pthread_mutex_lock(&m->mtx);
d764d2cc 1848
e3c9529e
QY
1849 /* Handle any errors received in poll() */
1850 if (num < 0) {
d81ca9a3 1851 if (eintr_p) {
d62a17ae 1852 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1853 /* loop around to signal handler */
1854 continue;
d62a17ae 1855 }
1856
e3c9529e 1857 /* else die */
450971aa 1858 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1859 safe_strerror(errno));
e3c9529e
QY
1860 pthread_mutex_unlock(&m->mtx);
1861 fetch = NULL;
1862 break;
bca37d17 1863 }
d62a17ae 1864
1865 /* Post timers to ready queue. */
1866 monotime(&now);
e7d9e44b 1867 thread_process_timers(m, &now);
d62a17ae 1868
1869 /* Post I/O to ready queue. */
1870 if (num > 0)
1871 thread_process_io(m, num);
1872
d62a17ae 1873 pthread_mutex_unlock(&m->mtx);
1874
1875 } while (!thread && m->spin);
1876
1877 return fetch;
718e3744 1878}
1879
d62a17ae 1880static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1881{
d62a17ae 1882 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1883 + (a.tv_usec - b.tv_usec));
62f44022
QY
1884}
1885
d62a17ae 1886unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1887 unsigned long *cputime)
718e3744 1888{
6418e2d3 1889#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1890
1891#ifdef __FreeBSD__
1892 /*
1893 * FreeBSD appears to have an issue when calling clock_gettime
1894 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1895 * occassionally the now time will be before the start time.
1896 * This is not good and FRR is ending up with CPU HOG's
1897 * when the subtraction wraps to very large numbers
1898 *
1899 * What we are going to do here is cheat a little bit
1900 * and notice that this is a problem and just correct
1901 * it so that it is impossible to happen
1902 */
1903 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1904 start->cpu.tv_nsec > now->cpu.tv_nsec)
1905 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1906 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1907 now->cpu.tv_sec = start->cpu.tv_sec;
1908 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1909 }
1910#endif
6418e2d3
DL
1911 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1912 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1913#else
d62a17ae 1914 /* This is 'user + sys' time. */
1915 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1916 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1917#endif
d62a17ae 1918 return timeval_elapsed(now->real, start->real);
8b70d0b0 1919}
1920
50596be0
DS
1921/* We should aim to yield after yield milliseconds, which defaults
1922 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1923 Note: we are using real (wall clock) time for this calculation.
1924 It could be argued that CPU time may make more sense in certain
1925 contexts. The things to consider are whether the thread may have
1926 blocked (in which case wall time increases, but CPU time does not),
1927 or whether the system is heavily loaded with other processes competing
d62a17ae 1928 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1929 Plus it has the added benefit that gettimeofday should be faster
1930 than calling getrusage. */
d62a17ae 1931int thread_should_yield(struct thread *thread)
718e3744 1932{
d62a17ae 1933 int result;
cb1991af 1934 frr_with_mutex (&thread->mtx) {
d62a17ae 1935 result = monotime_since(&thread->real, NULL)
1936 > (int64_t)thread->yield;
1937 }
d62a17ae 1938 return result;
50596be0
DS
1939}
1940
d62a17ae 1941void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1942{
cb1991af 1943 frr_with_mutex (&thread->mtx) {
d62a17ae 1944 thread->yield = yield_time;
1945 }
718e3744 1946}
1947
d62a17ae 1948void thread_getrusage(RUSAGE_T *r)
db9c0df9 1949{
6418e2d3
DL
1950 monotime(&r->real);
1951 if (!cputime_enabled) {
1952 memset(&r->cpu, 0, sizeof(r->cpu));
1953 return;
1954 }
1955
1956#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1957 /* not currently implemented in Linux's vDSO, but maybe at some point
1958 * in the future?
1959 */
1960 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1961#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1962#if defined RUSAGE_THREAD
1963#define FRR_RUSAGE RUSAGE_THREAD
1964#else
1965#define FRR_RUSAGE RUSAGE_SELF
1966#endif
6418e2d3
DL
1967 getrusage(FRR_RUSAGE, &(r->cpu));
1968#endif
db9c0df9
PJ
1969}
1970
fbcac826
QY
1971/*
1972 * Call a thread.
1973 *
1974 * This function will atomically update the thread's usage history. At present
1975 * this is the only spot where usage history is written. Nevertheless the code
1976 * has been written such that the introduction of writers in the future should
1977 * not need to update it provided the writers atomically perform only the
1978 * operations done here, i.e. updating the total and maximum times. In
1979 * particular, the maximum real and cpu times must be monotonically increasing
1980 * or this code is not correct.
1981 */
d62a17ae 1982void thread_call(struct thread *thread)
718e3744 1983{
d62a17ae 1984 RUSAGE_T before, after;
cc8b13a0 1985
45f01188
DL
1986 /* if the thread being called is the CLI, it may change cputime_enabled
1987 * ("service cputime-stats" command), which can result in nonsensical
1988 * and very confusing warnings
1989 */
1990 bool cputime_enabled_here = cputime_enabled;
1991
5e822957
DS
1992 if (thread->master->ready_run_loop)
1993 before = thread->master->last_getrusage;
1994 else
1995 GETRUSAGE(&before);
1996
d62a17ae 1997 thread->real = before.real;
718e3744 1998
6c3aa850
DL
1999 frrtrace(9, frr_libfrr, thread_call, thread->master,
2000 thread->xref->funcname, thread->xref->xref.file,
2001 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 2002 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 2003
d62a17ae 2004 pthread_setspecific(thread_current, thread);
2005 (*thread->func)(thread);
2006 pthread_setspecific(thread_current, NULL);
718e3744 2007
d62a17ae 2008 GETRUSAGE(&after);
5e822957 2009 thread->master->last_getrusage = after;
718e3744 2010
45f01188
DL
2011 unsigned long walltime, cputime;
2012 unsigned long exp;
fbcac826 2013
45f01188
DL
2014 walltime = thread_consumed_time(&after, &before, &cputime);
2015
2016 /* update walltime */
2017 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2018 memory_order_seq_cst);
2019 exp = atomic_load_explicit(&thread->hist->real.max,
2020 memory_order_seq_cst);
45f01188 2021 while (exp < walltime
fbcac826 2022 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2023 &thread->hist->real.max, &exp, walltime,
2024 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2025 ;
2026
45f01188
DL
2027 if (cputime_enabled_here && cputime_enabled) {
2028 /* update cputime */
2029 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2030 memory_order_seq_cst);
2031 exp = atomic_load_explicit(&thread->hist->cpu.max,
2032 memory_order_seq_cst);
2033 while (exp < cputime
2034 && !atomic_compare_exchange_weak_explicit(
2035 &thread->hist->cpu.max, &exp, cputime,
2036 memory_order_seq_cst, memory_order_seq_cst))
2037 ;
2038 }
fbcac826
QY
2039
2040 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2041 memory_order_seq_cst);
2042 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2043 memory_order_seq_cst);
718e3744 2044
45f01188
DL
2045 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2046 && cputime > cputime_threshold) {
d62a17ae 2047 /*
45f01188
DL
2048 * We have a CPU Hog on our hands. The time FRR has spent
2049 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2050 * Whinge about it now, so we're aware this is yet another task
2051 * to fix.
2052 */
9b8e01ca
DS
2053 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2054 1, memory_order_seq_cst);
9ef9495e 2055 flog_warn(
039d547f
DS
2056 EC_LIB_SLOW_THREAD_CPU,
2057 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2058 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2059 walltime / 1000, cputime / 1000);
2060
2061 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2062 /*
45f01188
DL
2063 * The runtime for a task is greater than 5 seconds, but the
2064 * cpu time is under 5 seconds. Let's whine about this because
2065 * this could imply some sort of scheduling issue.
039d547f 2066 */
9b8e01ca
DS
2067 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2068 1, memory_order_seq_cst);
039d547f
DS
2069 flog_warn(
2070 EC_LIB_SLOW_THREAD_WALL,
2071 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2072 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2073 walltime / 1000, cputime / 1000);
d62a17ae 2074 }
718e3744 2075}
2076
2077/* Execute thread */
60a3efec 2078void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2079 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2080 void *arg, int val)
718e3744 2081{
c4345fbf 2082 struct thread *thread;
718e3744 2083
c4345fbf 2084 /* Get or allocate new thread to execute. */
cb1991af 2085 frr_with_mutex (&m->mtx) {
60a3efec 2086 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2087
c4345fbf 2088 /* Set its event value. */
cb1991af 2089 frr_with_mutex (&thread->mtx) {
c4345fbf
RZ
2090 thread->add_type = THREAD_EXECUTE;
2091 thread->u.val = val;
2092 thread->ref = &thread;
2093 }
c4345fbf 2094 }
f7c62e11 2095
c4345fbf
RZ
2096 /* Execute thread doing all accounting. */
2097 thread_call(thread);
9c7753e4 2098
c4345fbf
RZ
2099 /* Give back or free thread. */
2100 thread_add_unuse(m, thread);
718e3744 2101}
1543c387
MS
2102
2103/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2104void debug_signals(const sigset_t *sigs)
2105{
2106 int i, found;
2107 sigset_t tmpsigs;
2108 char buf[300];
2109
2110 /*
2111 * We're only looking at the non-realtime signals here, so we need
2112 * some limit value. Platform differences mean at some point we just
2113 * need to pick a reasonable value.
2114 */
2115#if defined SIGRTMIN
2116# define LAST_SIGNAL SIGRTMIN
2117#else
2118# define LAST_SIGNAL 32
2119#endif
2120
2121
2122 if (sigs == NULL) {
2123 sigemptyset(&tmpsigs);
2124 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2125 sigs = &tmpsigs;
2126 }
2127
2128 found = 0;
2129 buf[0] = '\0';
2130
2131 for (i = 0; i < LAST_SIGNAL; i++) {
2132 char tmp[20];
2133
2134 if (sigismember(sigs, i) > 0) {
2135 if (found > 0)
2136 strlcat(buf, ",", sizeof(buf));
2137 snprintf(tmp, sizeof(tmp), "%d", i);
2138 strlcat(buf, tmp, sizeof(buf));
2139 found++;
2140 }
2141 }
2142
2143 if (found == 0)
2144 snprintf(buf, sizeof(buf), "<none>");
2145
2146 zlog_debug("%s: %s", __func__, buf);
2147}
a505383d 2148
f59e6882
DL
2149static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2150 const struct thread *thread)
2151{
2152 static const char * const types[] = {
2153 [THREAD_READ] = "read",
2154 [THREAD_WRITE] = "write",
2155 [THREAD_TIMER] = "timer",
2156 [THREAD_EVENT] = "event",
2157 [THREAD_READY] = "ready",
2158 [THREAD_UNUSED] = "unused",
2159 [THREAD_EXECUTE] = "exec",
2160 };
2161 ssize_t rv = 0;
2162 char info[16] = "";
2163
2164 if (!thread)
2165 return bputs(buf, "{(thread *)NULL}");
2166
2167 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2168
2169 if (thread->type < array_size(types) && types[thread->type])
2170 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2171 else
2172 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2173
2174 switch (thread->type) {
2175 case THREAD_READ:
2176 case THREAD_WRITE:
2177 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2178 break;
2179
2180 case THREAD_TIMER:
2181 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2182 break;
2183 }
2184
2185 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2186 thread->xref->funcname, thread->xref->dest,
2187 thread->xref->xref.file, thread->xref->xref.line);
2188 return rv;
2189}
2190
54929fd3 2191printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2192static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2193 const void *ptr)
2194{
2195 const struct thread *thread = ptr;
2196 struct timespec remain = {};
2197
2198 if (ea->fmt[0] == 'D') {
2199 ea->fmt++;
2200 return printfrr_thread_dbg(buf, ea, thread);
2201 }
2202
2203 if (!thread) {
2204 /* need to jump over time formatting flag characters in the
2205 * input format string, i.e. adjust ea->fmt!
2206 */
2207 printfrr_time(buf, ea, &remain,
2208 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2209 return bputch(buf, '-');
2210 }
2211
2212 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2213 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2214}