]> git.proxmox.com Git - mirror_frr.git/blame - lib/thread.c
*: remove the checking returned value for hash_get()
[mirror_frr.git] / lib / thread.c
CommitLineData
718e3744 1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21/* #define DEBUG */
22
23#include <zebra.h>
308d14ae 24#include <sys/resource.h>
718e3744 25
26#include "thread.h"
27#include "memory.h"
3e41733f 28#include "frrcu.h"
718e3744 29#include "log.h"
e04ab74d 30#include "hash.h"
31#include "command.h"
05c447dd 32#include "sigevent.h"
3bf2673b 33#include "network.h"
bd74dc61 34#include "jhash.h"
fbcac826 35#include "frratomic.h"
00dffa8c 36#include "frr_pthread.h"
9ef9495e 37#include "lib_errors.h"
912d45a1 38#include "libfrr_trace.h"
1a9f340b 39#include "libfrr.h"
d6be5fb9 40
bf8d3d6a
DL
41DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
42DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
43DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
44DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
4a1ab8e4 45
960b9a53 46DECLARE_LIST(thread_list, struct thread, threaditem);
c284542b 47
aea25d1e
MS
48struct cancel_req {
49 int flags;
50 struct thread *thread;
51 void *eventobj;
52 struct thread **threadref;
53};
54
55/* Flags for task cancellation */
56#define THREAD_CANCEL_FLAG_READY 0x01
57
27d29ced
DL
58static int thread_timer_cmp(const struct thread *a, const struct thread *b)
59{
60 if (a->u.sands.tv_sec < b->u.sands.tv_sec)
61 return -1;
62 if (a->u.sands.tv_sec > b->u.sands.tv_sec)
63 return 1;
64 if (a->u.sands.tv_usec < b->u.sands.tv_usec)
65 return -1;
66 if (a->u.sands.tv_usec > b->u.sands.tv_usec)
67 return 1;
68 return 0;
69}
70
960b9a53 71DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
27d29ced 72
3b96b781
HT
73#if defined(__APPLE__)
74#include <mach/mach.h>
75#include <mach/mach_time.h>
76#endif
77
d62a17ae 78#define AWAKEN(m) \
79 do { \
2b64873d 80 const unsigned char wakebyte = 0x01; \
d62a17ae 81 write(m->io_pipe[1], &wakebyte, 1); \
82 } while (0);
3bf2673b 83
62f44022 84/* control variable for initializer */
c17faa4b 85static pthread_once_t init_once = PTHREAD_ONCE_INIT;
e0bebc7c 86pthread_key_t thread_current;
6b0655a2 87
c17faa4b 88static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
62f44022
QY
89static struct list *masters;
90
6655966d 91static void thread_free(struct thread_master *master, struct thread *thread);
6b0655a2 92
45f01188
DL
93#ifndef EXCLUDE_CPU_TIME
94#define EXCLUDE_CPU_TIME 0
95#endif
96#ifndef CONSUMED_TIME_CHECK
97#define CONSUMED_TIME_CHECK 0
98#endif
99
100bool cputime_enabled = !EXCLUDE_CPU_TIME;
101unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
102unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
103
62f44022 104/* CLI start ---------------------------------------------------------------- */
45f01188
DL
105#ifndef VTYSH_EXTRACT_PL
106#include "lib/thread_clippy.c"
107#endif
108
d8b87afe 109static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
e04ab74d 110{
883cc51d 111 int size = sizeof(a->func);
bd74dc61
DS
112
113 return jhash(&a->func, size, 0);
e04ab74d 114}
115
74df8d6d 116static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
d62a17ae 117 const struct cpu_thread_history *b)
e04ab74d 118{
d62a17ae 119 return a->func == b->func;
e04ab74d 120}
121
d62a17ae 122static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
e04ab74d 123{
d62a17ae 124 struct cpu_thread_history *new;
125 new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
126 new->func = a->func;
127 new->funcname = a->funcname;
128 return new;
e04ab74d 129}
130
d62a17ae 131static void cpu_record_hash_free(void *a)
228da428 132{
d62a17ae 133 struct cpu_thread_history *hist = a;
134
135 XFREE(MTYPE_THREAD_STATS, hist);
228da428
CC
136}
137
d62a17ae 138static void vty_out_cpu_thread_history(struct vty *vty,
139 struct cpu_thread_history *a)
e04ab74d 140{
1dd08c22
DS
141 vty_out(vty,
142 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
72327cf3 143 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
9b8e01ca
DS
144 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
145 (a->real.total / a->total_calls), a->real.max,
1dd08c22 146 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
9b8e01ca 147 vty_out(vty, " %c%c%c%c%c %s\n",
d62a17ae 148 a->types & (1 << THREAD_READ) ? 'R' : ' ',
149 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
150 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
151 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
152 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
e04ab74d 153}
154
e3b78da8 155static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
e04ab74d 156{
d62a17ae 157 struct cpu_thread_history *totals = args[0];
fbcac826 158 struct cpu_thread_history copy;
d62a17ae 159 struct vty *vty = args[1];
fbcac826 160 uint8_t *filter = args[2];
d62a17ae 161
162 struct cpu_thread_history *a = bucket->data;
163
fbcac826
QY
164 copy.total_active =
165 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
166 copy.total_calls =
167 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
9b8e01ca
DS
168 copy.total_cpu_warn =
169 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
170 copy.total_wall_warn =
171 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
1dd08c22
DS
172 copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
173 memory_order_seq_cst);
fbcac826
QY
174 copy.cpu.total =
175 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
176 copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
177 copy.real.total =
178 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
179 copy.real.max =
180 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
181 copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
182 copy.funcname = a->funcname;
183
184 if (!(copy.types & *filter))
d62a17ae 185 return;
fbcac826
QY
186
187 vty_out_cpu_thread_history(vty, &copy);
188 totals->total_active += copy.total_active;
189 totals->total_calls += copy.total_calls;
9b8e01ca
DS
190 totals->total_cpu_warn += copy.total_cpu_warn;
191 totals->total_wall_warn += copy.total_wall_warn;
1dd08c22 192 totals->total_starv_warn += copy.total_starv_warn;
fbcac826
QY
193 totals->real.total += copy.real.total;
194 if (totals->real.max < copy.real.max)
195 totals->real.max = copy.real.max;
196 totals->cpu.total += copy.cpu.total;
197 if (totals->cpu.max < copy.cpu.max)
198 totals->cpu.max = copy.cpu.max;
e04ab74d 199}
200
fbcac826 201static void cpu_record_print(struct vty *vty, uint8_t filter)
e04ab74d 202{
d62a17ae 203 struct cpu_thread_history tmp;
204 void *args[3] = {&tmp, vty, &filter};
205 struct thread_master *m;
206 struct listnode *ln;
207
45f01188
DL
208 if (!cputime_enabled)
209 vty_out(vty,
210 "\n"
211 "Collecting CPU time statistics is currently disabled. Following statistics\n"
212 "will be zero or may display data from when collection was enabled. Use the\n"
213 " \"service cputime-stats\" command to start collecting data.\n"
214 "\nCounters and wallclock times are always maintained and should be accurate.\n");
215
0d6f7fd6 216 memset(&tmp, 0, sizeof(tmp));
d62a17ae 217 tmp.funcname = "TOTAL";
218 tmp.types = filter;
219
00dffa8c 220 frr_with_mutex(&masters_mtx) {
d62a17ae 221 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
222 const char *name = m->name ? m->name : "main";
223
224 char underline[strlen(name) + 1];
225 memset(underline, '-', sizeof(underline));
4f113d60 226 underline[sizeof(underline) - 1] = '\0';
d62a17ae 227
228 vty_out(vty, "\n");
229 vty_out(vty, "Showing statistics for pthread %s\n",
230 name);
231 vty_out(vty, "-------------------------------%s\n",
232 underline);
84d951d0 233 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 234 "CPU (user+system):", "Real (wall-clock):");
235 vty_out(vty,
236 "Active Runtime(ms) Invoked Avg uSec Max uSecs");
237 vty_out(vty, " Avg uSec Max uSecs");
1dd08c22
DS
238 vty_out(vty,
239 " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
d62a17ae 240
241 if (m->cpu_record->count)
242 hash_iterate(
243 m->cpu_record,
e3b78da8 244 (void (*)(struct hash_bucket *,
d62a17ae 245 void *))cpu_record_hash_print,
246 args);
247 else
248 vty_out(vty, "No data to display yet.\n");
249
250 vty_out(vty, "\n");
251 }
252 }
d62a17ae 253
254 vty_out(vty, "\n");
255 vty_out(vty, "Total thread statistics\n");
256 vty_out(vty, "-------------------------\n");
84d951d0 257 vty_out(vty, "%30s %18s %18s\n", "",
d62a17ae 258 "CPU (user+system):", "Real (wall-clock):");
259 vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
9b8e01ca 260 vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
d62a17ae 261 vty_out(vty, " Type Thread\n");
262
263 if (tmp.total_calls > 0)
264 vty_out_cpu_thread_history(vty, &tmp);
e04ab74d 265}
266
e3b78da8 267static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
e276eb82 268{
fbcac826 269 uint8_t *filter = args[0];
d62a17ae 270 struct hash *cpu_record = args[1];
271
272 struct cpu_thread_history *a = bucket->data;
62f44022 273
d62a17ae 274 if (!(a->types & *filter))
275 return;
f48f65d2 276
d62a17ae 277 hash_release(cpu_record, bucket->data);
e276eb82
PJ
278}
279
fbcac826 280static void cpu_record_clear(uint8_t filter)
e276eb82 281{
fbcac826 282 uint8_t *tmp = &filter;
d62a17ae 283 struct thread_master *m;
284 struct listnode *ln;
285
00dffa8c 286 frr_with_mutex(&masters_mtx) {
d62a17ae 287 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
00dffa8c 288 frr_with_mutex(&m->mtx) {
d62a17ae 289 void *args[2] = {tmp, m->cpu_record};
290 hash_iterate(
291 m->cpu_record,
e3b78da8 292 (void (*)(struct hash_bucket *,
d62a17ae 293 void *))cpu_record_hash_clear,
294 args);
295 }
d62a17ae 296 }
297 }
62f44022
QY
298}
299
fbcac826 300static uint8_t parse_filter(const char *filterstr)
62f44022 301{
d62a17ae 302 int i = 0;
303 int filter = 0;
304
305 while (filterstr[i] != '\0') {
306 switch (filterstr[i]) {
307 case 'r':
308 case 'R':
309 filter |= (1 << THREAD_READ);
310 break;
311 case 'w':
312 case 'W':
313 filter |= (1 << THREAD_WRITE);
314 break;
315 case 't':
316 case 'T':
317 filter |= (1 << THREAD_TIMER);
318 break;
319 case 'e':
320 case 'E':
321 filter |= (1 << THREAD_EVENT);
322 break;
323 case 'x':
324 case 'X':
325 filter |= (1 << THREAD_EXECUTE);
326 break;
327 default:
328 break;
329 }
330 ++i;
331 }
332 return filter;
62f44022
QY
333}
334
ee4dcee8
DL
335DEFUN_NOSH (show_thread_cpu,
336 show_thread_cpu_cmd,
337 "show thread cpu [FILTER]",
338 SHOW_STR
339 "Thread information\n"
340 "Thread CPU usage\n"
341 "Display filter (rwtex)\n")
62f44022 342{
fbcac826 343 uint8_t filter = (uint8_t)-1U;
d62a17ae 344 int idx = 0;
345
346 if (argv_find(argv, argc, "FILTER", &idx)) {
347 filter = parse_filter(argv[idx]->arg);
348 if (!filter) {
349 vty_out(vty,
3efd0893 350 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 351 argv[idx]->arg);
352 return CMD_WARNING;
353 }
354 }
355
356 cpu_record_print(vty, filter);
357 return CMD_SUCCESS;
e276eb82 358}
45f01188
DL
359
360DEFPY (service_cputime_stats,
361 service_cputime_stats_cmd,
362 "[no] service cputime-stats",
363 NO_STR
364 "Set up miscellaneous service\n"
365 "Collect CPU usage statistics\n")
366{
367 cputime_enabled = !no;
368 return CMD_SUCCESS;
369}
370
371DEFPY (service_cputime_warning,
372 service_cputime_warning_cmd,
373 "[no] service cputime-warning (1-4294967295)",
374 NO_STR
375 "Set up miscellaneous service\n"
376 "Warn for tasks exceeding CPU usage threshold\n"
377 "Warning threshold in milliseconds\n")
378{
379 if (no)
380 cputime_threshold = 0;
381 else
382 cputime_threshold = cputime_warning * 1000;
383 return CMD_SUCCESS;
384}
385
386ALIAS (service_cputime_warning,
387 no_service_cputime_warning_cmd,
388 "no service cputime-warning",
389 NO_STR
390 "Set up miscellaneous service\n"
391 "Warn for tasks exceeding CPU usage threshold\n")
392
393DEFPY (service_walltime_warning,
394 service_walltime_warning_cmd,
395 "[no] service walltime-warning (1-4294967295)",
396 NO_STR
397 "Set up miscellaneous service\n"
398 "Warn for tasks exceeding total wallclock threshold\n"
399 "Warning threshold in milliseconds\n")
400{
401 if (no)
402 walltime_threshold = 0;
403 else
404 walltime_threshold = walltime_warning * 1000;
405 return CMD_SUCCESS;
406}
407
408ALIAS (service_walltime_warning,
409 no_service_walltime_warning_cmd,
410 "no service walltime-warning",
411 NO_STR
412 "Set up miscellaneous service\n"
413 "Warn for tasks exceeding total wallclock threshold\n")
e276eb82 414
8872626b
DS
415static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
416{
417 const char *name = m->name ? m->name : "main";
418 char underline[strlen(name) + 1];
a0b36ae6 419 struct thread *thread;
8872626b
DS
420 uint32_t i;
421
422 memset(underline, '-', sizeof(underline));
423 underline[sizeof(underline) - 1] = '\0';
424
425 vty_out(vty, "\nShowing poll FD's for %s\n", name);
426 vty_out(vty, "----------------------%s\n", underline);
6c19478a
DS
427 vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
428 m->fd_limit);
a0b36ae6
DS
429 for (i = 0; i < m->handler.pfdcount; i++) {
430 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
431 m->handler.pfds[i].fd, m->handler.pfds[i].events,
8872626b 432 m->handler.pfds[i].revents);
a0b36ae6
DS
433
434 if (m->handler.pfds[i].events & POLLIN) {
435 thread = m->read[m->handler.pfds[i].fd];
436
437 if (!thread)
438 vty_out(vty, "ERROR ");
439 else
60a3efec 440 vty_out(vty, "%s ", thread->xref->funcname);
a0b36ae6
DS
441 } else
442 vty_out(vty, " ");
443
444 if (m->handler.pfds[i].events & POLLOUT) {
445 thread = m->write[m->handler.pfds[i].fd];
446
447 if (!thread)
448 vty_out(vty, "ERROR\n");
449 else
60a3efec 450 vty_out(vty, "%s\n", thread->xref->funcname);
a0b36ae6
DS
451 } else
452 vty_out(vty, "\n");
453 }
8872626b
DS
454}
455
ee4dcee8
DL
456DEFUN_NOSH (show_thread_poll,
457 show_thread_poll_cmd,
458 "show thread poll",
459 SHOW_STR
460 "Thread information\n"
461 "Show poll FD's and information\n")
8872626b
DS
462{
463 struct listnode *node;
464 struct thread_master *m;
465
00dffa8c 466 frr_with_mutex(&masters_mtx) {
8872626b
DS
467 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
468 show_thread_poll_helper(vty, m);
469 }
470 }
8872626b
DS
471
472 return CMD_SUCCESS;
473}
474
475
49d41a26
DS
476DEFUN (clear_thread_cpu,
477 clear_thread_cpu_cmd,
478 "clear thread cpu [FILTER]",
62f44022 479 "Clear stored data in all pthreads\n"
49d41a26
DS
480 "Thread information\n"
481 "Thread CPU usage\n"
482 "Display filter (rwtexb)\n")
e276eb82 483{
fbcac826 484 uint8_t filter = (uint8_t)-1U;
d62a17ae 485 int idx = 0;
486
487 if (argv_find(argv, argc, "FILTER", &idx)) {
488 filter = parse_filter(argv[idx]->arg);
489 if (!filter) {
490 vty_out(vty,
3efd0893 491 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
d62a17ae 492 argv[idx]->arg);
493 return CMD_WARNING;
494 }
495 }
496
497 cpu_record_clear(filter);
498 return CMD_SUCCESS;
e276eb82 499}
6b0655a2 500
22f31b8c
DS
501static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
502{
503 const char *name = m->name ? m->name : "main";
504 char underline[strlen(name) + 1];
505 struct thread *thread;
506
507 memset(underline, '-', sizeof(underline));
508 underline[sizeof(underline) - 1] = '\0';
509
510 vty_out(vty, "\nShowing timers for %s\n", name);
511 vty_out(vty, "-------------------%s\n", underline);
512
513 frr_each (thread_timer_list, &m->timer, thread) {
514 vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
515 }
516}
517
518DEFPY_NOSH (show_thread_timers,
519 show_thread_timers_cmd,
520 "show thread timers",
521 SHOW_STR
522 "Thread information\n"
523 "Show all timers and how long they have in the system\n")
524{
525 struct listnode *node;
526 struct thread_master *m;
527
528 frr_with_mutex (&masters_mtx) {
529 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
530 show_thread_timers_helper(vty, m);
531 }
532
533 return CMD_SUCCESS;
534}
535
d62a17ae 536void thread_cmd_init(void)
0b84f294 537{
d62a17ae 538 install_element(VIEW_NODE, &show_thread_cpu_cmd);
8872626b 539 install_element(VIEW_NODE, &show_thread_poll_cmd);
d62a17ae 540 install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
45f01188
DL
541
542 install_element(CONFIG_NODE, &service_cputime_stats_cmd);
543 install_element(CONFIG_NODE, &service_cputime_warning_cmd);
544 install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
545 install_element(CONFIG_NODE, &service_walltime_warning_cmd);
546 install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
22f31b8c
DS
547
548 install_element(VIEW_NODE, &show_thread_timers_cmd);
0b84f294 549}
62f44022
QY
550/* CLI end ------------------------------------------------------------------ */
551
0b84f294 552
d62a17ae 553static void cancelreq_del(void *cr)
63ccb9cb 554{
d62a17ae 555 XFREE(MTYPE_TMP, cr);
63ccb9cb
QY
556}
557
e0bebc7c 558/* initializer, only ever called once */
4d762f26 559static void initializer(void)
e0bebc7c 560{
d62a17ae 561 pthread_key_create(&thread_current, NULL);
e0bebc7c
QY
562}
563
d62a17ae 564struct thread_master *thread_master_create(const char *name)
718e3744 565{
d62a17ae 566 struct thread_master *rv;
567 struct rlimit limit;
568
569 pthread_once(&init_once, &initializer);
570
571 rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
d62a17ae 572
573 /* Initialize master mutex */
574 pthread_mutex_init(&rv->mtx, NULL);
575 pthread_cond_init(&rv->cancel_cond, NULL);
576
577 /* Set name */
7ffcd8bd
QY
578 name = name ? name : "default";
579 rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
d62a17ae 580
581 /* Initialize I/O task data structures */
1a9f340b
MS
582
583 /* Use configured limit if present, ulimit otherwise. */
584 rv->fd_limit = frr_get_fd_limit();
585 if (rv->fd_limit == 0) {
586 getrlimit(RLIMIT_NOFILE, &limit);
587 rv->fd_limit = (int)limit.rlim_cur;
588 }
589
a6f235f3
DS
590 rv->read = XCALLOC(MTYPE_THREAD_POLL,
591 sizeof(struct thread *) * rv->fd_limit);
592
593 rv->write = XCALLOC(MTYPE_THREAD_POLL,
594 sizeof(struct thread *) * rv->fd_limit);
d62a17ae 595
7ffcd8bd
QY
596 char tmhashname[strlen(name) + 32];
597 snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
598 name);
bd74dc61 599 rv->cpu_record = hash_create_size(
d8b87afe 600 8, (unsigned int (*)(const void *))cpu_record_hash_key,
74df8d6d 601 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
7ffcd8bd 602 tmhashname);
d62a17ae 603
c284542b
DL
604 thread_list_init(&rv->event);
605 thread_list_init(&rv->ready);
606 thread_list_init(&rv->unuse);
27d29ced 607 thread_timer_list_init(&rv->timer);
d62a17ae 608
609 /* Initialize thread_fetch() settings */
610 rv->spin = true;
611 rv->handle_signals = true;
612
613 /* Set pthread owner, should be updated by actual owner */
614 rv->owner = pthread_self();
615 rv->cancel_req = list_new();
616 rv->cancel_req->del = cancelreq_del;
617 rv->canceled = true;
618
619 /* Initialize pipe poker */
620 pipe(rv->io_pipe);
621 set_nonblocking(rv->io_pipe[0]);
622 set_nonblocking(rv->io_pipe[1]);
623
624 /* Initialize data structures for poll() */
625 rv->handler.pfdsize = rv->fd_limit;
626 rv->handler.pfdcount = 0;
627 rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
628 sizeof(struct pollfd) * rv->handler.pfdsize);
629 rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
630 sizeof(struct pollfd) * rv->handler.pfdsize);
631
eff09c66 632 /* add to list of threadmasters */
00dffa8c 633 frr_with_mutex(&masters_mtx) {
eff09c66
QY
634 if (!masters)
635 masters = list_new();
636
d62a17ae 637 listnode_add(masters, rv);
638 }
d62a17ae 639
640 return rv;
718e3744 641}
642
d8a8a8de
QY
643void thread_master_set_name(struct thread_master *master, const char *name)
644{
00dffa8c 645 frr_with_mutex(&master->mtx) {
0a22ddfb 646 XFREE(MTYPE_THREAD_MASTER, master->name);
d8a8a8de
QY
647 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
648 }
d8a8a8de
QY
649}
650
6ed04aa2
DS
651#define THREAD_UNUSED_DEPTH 10
652
718e3744 653/* Move thread to unuse list. */
d62a17ae 654static void thread_add_unuse(struct thread_master *m, struct thread *thread)
718e3744 655{
6655966d
RZ
656 pthread_mutex_t mtxc = thread->mtx;
657
d62a17ae 658 assert(m != NULL && thread != NULL);
d62a17ae 659
d62a17ae 660 thread->hist->total_active--;
6ed04aa2
DS
661 memset(thread, 0, sizeof(struct thread));
662 thread->type = THREAD_UNUSED;
663
6655966d
RZ
664 /* Restore the thread mutex context. */
665 thread->mtx = mtxc;
666
c284542b
DL
667 if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
668 thread_list_add_tail(&m->unuse, thread);
6655966d
RZ
669 return;
670 }
671
672 thread_free(m, thread);
718e3744 673}
674
675/* Free all unused thread. */
c284542b
DL
676static void thread_list_free(struct thread_master *m,
677 struct thread_list_head *list)
718e3744 678{
d62a17ae 679 struct thread *t;
d62a17ae 680
c284542b 681 while ((t = thread_list_pop(list)))
6655966d 682 thread_free(m, t);
718e3744 683}
684
d62a17ae 685static void thread_array_free(struct thread_master *m,
686 struct thread **thread_array)
308d14ae 687{
d62a17ae 688 struct thread *t;
689 int index;
690
691 for (index = 0; index < m->fd_limit; ++index) {
692 t = thread_array[index];
693 if (t) {
694 thread_array[index] = NULL;
6655966d 695 thread_free(m, t);
d62a17ae 696 }
697 }
a6f235f3 698 XFREE(MTYPE_THREAD_POLL, thread_array);
308d14ae
DV
699}
700
495f0b13
DS
701/*
702 * thread_master_free_unused
703 *
704 * As threads are finished with they are put on the
705 * unuse list for later reuse.
706 * If we are shutting down, Free up unused threads
707 * So we can see if we forget to shut anything off
708 */
d62a17ae 709void thread_master_free_unused(struct thread_master *m)
495f0b13 710{
00dffa8c 711 frr_with_mutex(&m->mtx) {
d62a17ae 712 struct thread *t;
c284542b 713 while ((t = thread_list_pop(&m->unuse)))
6655966d 714 thread_free(m, t);
d62a17ae 715 }
495f0b13
DS
716}
717
718e3744 718/* Stop thread scheduler. */
d62a17ae 719void thread_master_free(struct thread_master *m)
718e3744 720{
27d29ced
DL
721 struct thread *t;
722
00dffa8c 723 frr_with_mutex(&masters_mtx) {
d62a17ae 724 listnode_delete(masters, m);
eff09c66 725 if (masters->count == 0) {
6a154c88 726 list_delete(&masters);
eff09c66 727 }
d62a17ae 728 }
d62a17ae 729
730 thread_array_free(m, m->read);
731 thread_array_free(m, m->write);
27d29ced
DL
732 while ((t = thread_timer_list_pop(&m->timer)))
733 thread_free(m, t);
d62a17ae 734 thread_list_free(m, &m->event);
735 thread_list_free(m, &m->ready);
736 thread_list_free(m, &m->unuse);
737 pthread_mutex_destroy(&m->mtx);
33844bbe 738 pthread_cond_destroy(&m->cancel_cond);
d62a17ae 739 close(m->io_pipe[0]);
740 close(m->io_pipe[1]);
6a154c88 741 list_delete(&m->cancel_req);
1a0a92ea 742 m->cancel_req = NULL;
d62a17ae 743
744 hash_clean(m->cpu_record, cpu_record_hash_free);
745 hash_free(m->cpu_record);
746 m->cpu_record = NULL;
747
0a22ddfb 748 XFREE(MTYPE_THREAD_MASTER, m->name);
d62a17ae 749 XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
750 XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
751 XFREE(MTYPE_THREAD_MASTER, m);
718e3744 752}
753
94202742 754/* Return remain time in milliseconds. */
78ca0342 755unsigned long thread_timer_remain_msec(struct thread *thread)
718e3744 756{
d62a17ae 757 int64_t remain;
1189d95f 758
00dffa8c 759 frr_with_mutex(&thread->mtx) {
78ca0342 760 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
d62a17ae 761 }
1189d95f 762
d62a17ae 763 return remain < 0 ? 0 : remain;
718e3744 764}
765
78ca0342
CF
766/* Return remain time in seconds. */
767unsigned long thread_timer_remain_second(struct thread *thread)
768{
769 return thread_timer_remain_msec(thread) / 1000LL;
770}
771
d62a17ae 772struct timeval thread_timer_remain(struct thread *thread)
6ac44687 773{
d62a17ae 774 struct timeval remain;
00dffa8c 775 frr_with_mutex(&thread->mtx) {
d62a17ae 776 monotime_until(&thread->u.sands, &remain);
777 }
d62a17ae 778 return remain;
6ac44687
CF
779}
780
0447957e
AK
781static int time_hhmmss(char *buf, int buf_size, long sec)
782{
783 long hh;
784 long mm;
785 int wr;
786
642ac49d 787 assert(buf_size >= 8);
0447957e
AK
788
789 hh = sec / 3600;
790 sec %= 3600;
791 mm = sec / 60;
792 sec %= 60;
793
794 wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
795
796 return wr != 8;
797}
798
799char *thread_timer_to_hhmmss(char *buf, int buf_size,
800 struct thread *t_timer)
801{
802 if (t_timer) {
803 time_hhmmss(buf, buf_size,
804 thread_timer_remain_second(t_timer));
805 } else {
806 snprintf(buf, buf_size, "--:--:--");
807 }
808 return buf;
809}
810
718e3744 811/* Get new thread. */
d7c0a89a 812static struct thread *thread_get(struct thread_master *m, uint8_t type,
cc9f21da 813 void (*func)(struct thread *), void *arg,
60a3efec 814 const struct xref_threadsched *xref)
718e3744 815{
c284542b 816 struct thread *thread = thread_list_pop(&m->unuse);
d62a17ae 817 struct cpu_thread_history tmp;
818
819 if (!thread) {
820 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
821 /* mutex only needs to be initialized at struct creation. */
822 pthread_mutex_init(&thread->mtx, NULL);
823 m->alloc++;
824 }
825
826 thread->type = type;
827 thread->add_type = type;
828 thread->master = m;
829 thread->arg = arg;
d62a17ae 830 thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
831 thread->ref = NULL;
e8b3a2f7 832 thread->ignore_timer_late = false;
d62a17ae 833
834 /*
835 * So if the passed in funcname is not what we have
836 * stored that means the thread->hist needs to be
837 * updated. We keep the last one around in unused
838 * under the assumption that we are probably
839 * going to immediately allocate the same
840 * type of thread.
841 * This hopefully saves us some serious
842 * hash_get lookups.
843 */
60a3efec
DL
844 if ((thread->xref && thread->xref->funcname != xref->funcname)
845 || thread->func != func) {
d62a17ae 846 tmp.func = func;
60a3efec 847 tmp.funcname = xref->funcname;
d62a17ae 848 thread->hist =
849 hash_get(m->cpu_record, &tmp,
850 (void *(*)(void *))cpu_record_hash_alloc);
851 }
852 thread->hist->total_active++;
853 thread->func = func;
60a3efec 854 thread->xref = xref;
d62a17ae 855
856 return thread;
718e3744 857}
858
6655966d
RZ
859static void thread_free(struct thread_master *master, struct thread *thread)
860{
861 /* Update statistics. */
862 assert(master->alloc > 0);
863 master->alloc--;
864
865 /* Free allocated resources. */
866 pthread_mutex_destroy(&thread->mtx);
867 XFREE(MTYPE_THREAD, thread);
868}
869
d81ca9a3
MS
870static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
871 bool *eintr_p)
209a72a6 872{
d81ca9a3
MS
873 sigset_t origsigs;
874 unsigned char trash[64];
875 nfds_t count = m->handler.copycount;
876
d279ef57
DS
877 /*
878 * If timer_wait is null here, that means poll() should block
879 * indefinitely, unless the thread_master has overridden it by setting
d62a17ae 880 * ->selectpoll_timeout.
d279ef57 881 *
d62a17ae 882 * If the value is positive, it specifies the maximum number of
d279ef57
DS
883 * milliseconds to wait. If the timeout is -1, it specifies that
884 * we should never wait and always return immediately even if no
885 * event is detected. If the value is zero, the behavior is default.
886 */
d62a17ae 887 int timeout = -1;
888
889 /* number of file descriptors with events */
890 int num;
891
892 if (timer_wait != NULL
893 && m->selectpoll_timeout == 0) // use the default value
894 timeout = (timer_wait->tv_sec * 1000)
895 + (timer_wait->tv_usec / 1000);
896 else if (m->selectpoll_timeout > 0) // use the user's timeout
897 timeout = m->selectpoll_timeout;
898 else if (m->selectpoll_timeout
899 < 0) // effect a poll (return immediately)
900 timeout = 0;
901
0bdeb5e5 902 zlog_tls_buffer_flush();
3e41733f
DL
903 rcu_read_unlock();
904 rcu_assert_read_unlocked();
905
d62a17ae 906 /* add poll pipe poker */
d81ca9a3
MS
907 assert(count + 1 < m->handler.pfdsize);
908 m->handler.copy[count].fd = m->io_pipe[0];
909 m->handler.copy[count].events = POLLIN;
910 m->handler.copy[count].revents = 0x00;
911
912 /* We need to deal with a signal-handling race here: we
913 * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
914 * that may arrive just before we enter poll(). We will block the
915 * key signals, then check whether any have arrived - if so, we return
916 * before calling poll(). If not, we'll re-enable the signals
917 * in the ppoll() call.
918 */
919
920 sigemptyset(&origsigs);
921 if (m->handle_signals) {
922 /* Main pthread that handles the app signals */
923 if (frr_sigevent_check(&origsigs)) {
924 /* Signal to process - restore signal mask and return */
925 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
926 num = -1;
927 *eintr_p = true;
928 goto done;
929 }
930 } else {
931 /* Don't make any changes for the non-main pthreads */
932 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
933 }
d62a17ae 934
d81ca9a3
MS
935#if defined(HAVE_PPOLL)
936 struct timespec ts, *tsp;
937
938 if (timeout >= 0) {
939 ts.tv_sec = timeout / 1000;
940 ts.tv_nsec = (timeout % 1000) * 1000000;
941 tsp = &ts;
942 } else
943 tsp = NULL;
944
945 num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
946 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
947#else
948 /* Not ideal - there is a race after we restore the signal mask */
949 pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
950 num = poll(m->handler.copy, count + 1, timeout);
951#endif
d62a17ae 952
d81ca9a3
MS
953done:
954
955 if (num < 0 && errno == EINTR)
956 *eintr_p = true;
957
958 if (num > 0 && m->handler.copy[count].revents != 0 && num--)
d62a17ae 959 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
960 ;
961
3e41733f
DL
962 rcu_read_lock();
963
d62a17ae 964 return num;
209a72a6
DS
965}
966
718e3744 967/* Add new read thread. */
ee1455dd
IR
968void _thread_add_read_write(const struct xref_threadsched *xref,
969 struct thread_master *m,
cc9f21da 970 void (*func)(struct thread *), void *arg, int fd,
ee1455dd 971 struct thread **t_ptr)
718e3744 972{
60a3efec 973 int dir = xref->thread_type;
d62a17ae 974 struct thread *thread = NULL;
1ef14bee 975 struct thread **thread_array;
d62a17ae 976
abf96a87 977 if (dir == THREAD_READ)
6c3aa850
DL
978 frrtrace(9, frr_libfrr, schedule_read, m,
979 xref->funcname, xref->xref.file, xref->xref.line,
980 t_ptr, fd, 0, arg, 0);
abf96a87 981 else
6c3aa850
DL
982 frrtrace(9, frr_libfrr, schedule_write, m,
983 xref->funcname, xref->xref.file, xref->xref.line,
984 t_ptr, fd, 0, arg, 0);
abf96a87 985
188acbb9
DS
986 assert(fd >= 0);
987 if (fd >= m->fd_limit)
988 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
989
00dffa8c
DL
990 frr_with_mutex(&m->mtx) {
991 if (t_ptr && *t_ptr)
992 // thread is already scheduled; don't reschedule
993 break;
d62a17ae 994
995 /* default to a new pollfd */
996 nfds_t queuepos = m->handler.pfdcount;
997
1ef14bee
DS
998 if (dir == THREAD_READ)
999 thread_array = m->read;
1000 else
1001 thread_array = m->write;
1002
d62a17ae 1003 /* if we already have a pollfd for our file descriptor, find and
1004 * use it */
1005 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
1006 if (m->handler.pfds[i].fd == fd) {
1007 queuepos = i;
1ef14bee
DS
1008
1009#ifdef DEV_BUILD
1010 /*
1011 * What happens if we have a thread already
1012 * created for this event?
1013 */
1014 if (thread_array[fd])
1015 assert(!"Thread already scheduled for file descriptor");
1016#endif
d62a17ae 1017 break;
1018 }
1019
1020 /* make sure we have room for this fd + pipe poker fd */
1021 assert(queuepos + 1 < m->handler.pfdsize);
1022
60a3efec 1023 thread = thread_get(m, dir, func, arg, xref);
d62a17ae 1024
1025 m->handler.pfds[queuepos].fd = fd;
1026 m->handler.pfds[queuepos].events |=
1027 (dir == THREAD_READ ? POLLIN : POLLOUT);
1028
1029 if (queuepos == m->handler.pfdcount)
1030 m->handler.pfdcount++;
1031
1032 if (thread) {
00dffa8c 1033 frr_with_mutex(&thread->mtx) {
d62a17ae 1034 thread->u.fd = fd;
1ef14bee 1035 thread_array[thread->u.fd] = thread;
d62a17ae 1036 }
d62a17ae 1037
1038 if (t_ptr) {
1039 *t_ptr = thread;
1040 thread->ref = t_ptr;
1041 }
1042 }
1043
1044 AWAKEN(m);
1045 }
718e3744 1046}
1047
ee1455dd
IR
1048static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1049 struct thread_master *m,
cc9f21da 1050 void (*func)(struct thread *), void *arg,
ee1455dd
IR
1051 struct timeval *time_relative,
1052 struct thread **t_ptr)
718e3744 1053{
d62a17ae 1054 struct thread *thread;
96fe578a 1055 struct timeval t;
d62a17ae 1056
1057 assert(m != NULL);
1058
d62a17ae 1059 assert(time_relative);
1060
6c3aa850
DL
1061 frrtrace(9, frr_libfrr, schedule_timer, m,
1062 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1063 t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
abf96a87 1064
96fe578a
MS
1065 /* Compute expiration/deadline time. */
1066 monotime(&t);
1067 timeradd(&t, time_relative, &t);
1068
00dffa8c
DL
1069 frr_with_mutex(&m->mtx) {
1070 if (t_ptr && *t_ptr)
d279ef57 1071 /* thread is already scheduled; don't reschedule */
ee1455dd 1072 return;
d62a17ae 1073
4322dea7 1074 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
d62a17ae 1075
00dffa8c 1076 frr_with_mutex(&thread->mtx) {
96fe578a 1077 thread->u.sands = t;
27d29ced 1078 thread_timer_list_add(&m->timer, thread);
d62a17ae 1079 if (t_ptr) {
1080 *t_ptr = thread;
1081 thread->ref = t_ptr;
1082 }
1083 }
d62a17ae 1084
96fe578a
MS
1085 /* The timer list is sorted - if this new timer
1086 * might change the time we'll wait for, give the pthread
1087 * a chance to re-compute.
1088 */
1089 if (thread_timer_list_first(&m->timer) == thread)
1090 AWAKEN(m);
d62a17ae 1091 }
e2eff5c3
DS
1092#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1093 if (time_relative->tv_sec > ONEYEAR2SEC)
1094 flog_err(
1095 EC_LIB_TIMER_TOO_LONG,
1096 "Timer: %pTHD is created with an expiration that is greater than 1 year",
1097 thread);
9e867fe6 1098}
1099
98c91ac6 1100
1101/* Add timer event thread. */
ee1455dd 1102void _thread_add_timer(const struct xref_threadsched *xref,
cc9f21da 1103 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1104 void *arg, long timer, struct thread **t_ptr)
9e867fe6 1105{
d62a17ae 1106 struct timeval trel;
9e867fe6 1107
d62a17ae 1108 assert(m != NULL);
9e867fe6 1109
d62a17ae 1110 trel.tv_sec = timer;
1111 trel.tv_usec = 0;
9e867fe6 1112
ee1455dd 1113 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
98c91ac6 1114}
9e867fe6 1115
98c91ac6 1116/* Add timer event thread with "millisecond" resolution */
ee1455dd
IR
1117void _thread_add_timer_msec(const struct xref_threadsched *xref,
1118 struct thread_master *m,
cc9f21da
DS
1119 void (*func)(struct thread *), void *arg,
1120 long timer, struct thread **t_ptr)
98c91ac6 1121{
d62a17ae 1122 struct timeval trel;
9e867fe6 1123
d62a17ae 1124 assert(m != NULL);
718e3744 1125
d62a17ae 1126 trel.tv_sec = timer / 1000;
1127 trel.tv_usec = 1000 * (timer % 1000);
98c91ac6 1128
ee1455dd 1129 _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
a48b4e6d 1130}
1131
4322dea7 1132/* Add timer event thread with "timeval" resolution */
ee1455dd 1133void _thread_add_timer_tv(const struct xref_threadsched *xref,
cc9f21da
DS
1134 struct thread_master *m,
1135 void (*func)(struct thread *), void *arg,
1136 struct timeval *tv, struct thread **t_ptr)
d03c4cbd 1137{
ee1455dd 1138 _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
d03c4cbd
DL
1139}
1140
718e3744 1141/* Add simple event thread. */
ee1455dd 1142void _thread_add_event(const struct xref_threadsched *xref,
cc9f21da 1143 struct thread_master *m, void (*func)(struct thread *),
ee1455dd 1144 void *arg, int val, struct thread **t_ptr)
718e3744 1145{
00dffa8c 1146 struct thread *thread = NULL;
d62a17ae 1147
6c3aa850
DL
1148 frrtrace(9, frr_libfrr, schedule_event, m,
1149 xref->funcname, xref->xref.file, xref->xref.line,
c7bb4f00 1150 t_ptr, 0, val, arg, 0);
abf96a87 1151
d62a17ae 1152 assert(m != NULL);
1153
00dffa8c
DL
1154 frr_with_mutex(&m->mtx) {
1155 if (t_ptr && *t_ptr)
d279ef57 1156 /* thread is already scheduled; don't reschedule */
00dffa8c 1157 break;
d62a17ae 1158
60a3efec 1159 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
00dffa8c 1160 frr_with_mutex(&thread->mtx) {
d62a17ae 1161 thread->u.val = val;
c284542b 1162 thread_list_add_tail(&m->event, thread);
d62a17ae 1163 }
d62a17ae 1164
1165 if (t_ptr) {
1166 *t_ptr = thread;
1167 thread->ref = t_ptr;
1168 }
1169
1170 AWAKEN(m);
1171 }
718e3744 1172}
1173
63ccb9cb
QY
1174/* Thread cancellation ------------------------------------------------------ */
1175
8797240e
QY
1176/**
1177 * NOT's out the .events field of pollfd corresponding to the given file
1178 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1179 *
1180 * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1181 * implementation for details.
1182 *
1183 * @param master
1184 * @param fd
1185 * @param state the event to cancel. One or more (OR'd together) of the
1186 * following:
1187 * - POLLIN
1188 * - POLLOUT
1189 */
a9318a32
MS
1190static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1191 int idx_hint)
0a95a0d0 1192{
42d74538
QY
1193 bool found = false;
1194
d62a17ae 1195 /* find the index of corresponding pollfd */
1196 nfds_t i;
1197
a9318a32
MS
1198 /* Cancel POLLHUP too just in case some bozo set it */
1199 state |= POLLHUP;
1200
1201 /* Some callers know the index of the pfd already */
1202 if (idx_hint >= 0) {
1203 i = idx_hint;
1204 found = true;
1205 } else {
1206 /* Have to look for the fd in the pfd array */
1207 for (i = 0; i < master->handler.pfdcount; i++)
1208 if (master->handler.pfds[i].fd == fd) {
1209 found = true;
1210 break;
1211 }
1212 }
42d74538
QY
1213
1214 if (!found) {
1215 zlog_debug(
1216 "[!] Received cancellation request for nonexistent rw job");
1217 zlog_debug("[!] threadmaster: %s | fd: %d",
996c9314 1218 master->name ? master->name : "", fd);
42d74538
QY
1219 return;
1220 }
d62a17ae 1221
1222 /* NOT out event. */
1223 master->handler.pfds[i].events &= ~(state);
1224
1225 /* If all events are canceled, delete / resize the pollfd array. */
1226 if (master->handler.pfds[i].events == 0) {
1227 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1228 (master->handler.pfdcount - i - 1)
1229 * sizeof(struct pollfd));
1230 master->handler.pfdcount--;
e985cda0
S
1231 master->handler.pfds[master->handler.pfdcount].fd = 0;
1232 master->handler.pfds[master->handler.pfdcount].events = 0;
d62a17ae 1233 }
1234
1235 /* If we have the same pollfd in the copy, perform the same operations,
1236 * otherwise return. */
1237 if (i >= master->handler.copycount)
1238 return;
1239
1240 master->handler.copy[i].events &= ~(state);
1241
1242 if (master->handler.copy[i].events == 0) {
1243 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1244 (master->handler.copycount - i - 1)
1245 * sizeof(struct pollfd));
1246 master->handler.copycount--;
e985cda0
S
1247 master->handler.copy[master->handler.copycount].fd = 0;
1248 master->handler.copy[master->handler.copycount].events = 0;
d62a17ae 1249 }
0a95a0d0
DS
1250}
1251
a9318a32
MS
1252/*
1253 * Process task cancellation given a task argument: iterate through the
1254 * various lists of tasks, looking for any that match the argument.
1255 */
1256static void cancel_arg_helper(struct thread_master *master,
1257 const struct cancel_req *cr)
1258{
1259 struct thread *t;
1260 nfds_t i;
1261 int fd;
1262 struct pollfd *pfd;
1263
1264 /* We're only processing arg-based cancellations here. */
1265 if (cr->eventobj == NULL)
1266 return;
1267
1268 /* First process the ready lists. */
1269 frr_each_safe(thread_list, &master->event, t) {
1270 if (t->arg != cr->eventobj)
1271 continue;
1272 thread_list_del(&master->event, t);
1273 if (t->ref)
1274 *t->ref = NULL;
1275 thread_add_unuse(master, t);
1276 }
1277
1278 frr_each_safe(thread_list, &master->ready, t) {
1279 if (t->arg != cr->eventobj)
1280 continue;
1281 thread_list_del(&master->ready, t);
1282 if (t->ref)
1283 *t->ref = NULL;
1284 thread_add_unuse(master, t);
1285 }
1286
1287 /* If requested, stop here and ignore io and timers */
1288 if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1289 return;
1290
1291 /* Check the io tasks */
1292 for (i = 0; i < master->handler.pfdcount;) {
1293 pfd = master->handler.pfds + i;
1294
1295 if (pfd->events & POLLIN)
1296 t = master->read[pfd->fd];
1297 else
1298 t = master->write[pfd->fd];
1299
1300 if (t && t->arg == cr->eventobj) {
1301 fd = pfd->fd;
1302
1303 /* Found a match to cancel: clean up fd arrays */
1304 thread_cancel_rw(master, pfd->fd, pfd->events, i);
1305
1306 /* Clean up thread arrays */
1307 master->read[fd] = NULL;
1308 master->write[fd] = NULL;
1309
1310 /* Clear caller's ref */
1311 if (t->ref)
1312 *t->ref = NULL;
1313
1314 thread_add_unuse(master, t);
1315
1316 /* Don't increment 'i' since the cancellation will have
1317 * removed the entry from the pfd array
1318 */
1319 } else
1320 i++;
1321 }
1322
1323 /* Check the timer tasks */
1324 t = thread_timer_list_first(&master->timer);
1325 while (t) {
1326 struct thread *t_next;
1327
1328 t_next = thread_timer_list_next(&master->timer, t);
1329
1330 if (t->arg == cr->eventobj) {
1331 thread_timer_list_del(&master->timer, t);
1332 if (t->ref)
1333 *t->ref = NULL;
1334 thread_add_unuse(master, t);
1335 }
1336
1337 t = t_next;
1338 }
1339}
1340
1189d95f 1341/**
63ccb9cb 1342 * Process cancellation requests.
1189d95f 1343 *
63ccb9cb
QY
1344 * This may only be run from the pthread which owns the thread_master.
1345 *
1346 * @param master the thread master to process
1347 * @REQUIRE master->mtx
1189d95f 1348 */
d62a17ae 1349static void do_thread_cancel(struct thread_master *master)
718e3744 1350{
c284542b 1351 struct thread_list_head *list = NULL;
d62a17ae 1352 struct thread **thread_array = NULL;
1353 struct thread *thread;
1354
1355 struct cancel_req *cr;
1356 struct listnode *ln;
1357 for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
d279ef57 1358 /*
a9318a32
MS
1359 * If this is an event object cancellation, search
1360 * through task lists deleting any tasks which have the
1361 * specified argument - use this handy helper function.
d279ef57 1362 */
d62a17ae 1363 if (cr->eventobj) {
a9318a32 1364 cancel_arg_helper(master, cr);
d62a17ae 1365 continue;
1366 }
1367
d279ef57
DS
1368 /*
1369 * The pointer varies depending on whether the cancellation
1370 * request was made asynchronously or not. If it was, we
1371 * need to check whether the thread even exists anymore
1372 * before cancelling it.
1373 */
d62a17ae 1374 thread = (cr->thread) ? cr->thread : *cr->threadref;
1375
1376 if (!thread)
1377 continue;
1378
1379 /* Determine the appropriate queue to cancel the thread from */
1380 switch (thread->type) {
1381 case THREAD_READ:
a9318a32 1382 thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
d62a17ae 1383 thread_array = master->read;
1384 break;
1385 case THREAD_WRITE:
a9318a32 1386 thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
d62a17ae 1387 thread_array = master->write;
1388 break;
1389 case THREAD_TIMER:
27d29ced 1390 thread_timer_list_del(&master->timer, thread);
d62a17ae 1391 break;
1392 case THREAD_EVENT:
1393 list = &master->event;
1394 break;
1395 case THREAD_READY:
1396 list = &master->ready;
1397 break;
1398 default:
1399 continue;
1400 break;
1401 }
1402
27d29ced 1403 if (list) {
c284542b 1404 thread_list_del(list, thread);
d62a17ae 1405 } else if (thread_array) {
1406 thread_array[thread->u.fd] = NULL;
d62a17ae 1407 }
1408
1409 if (thread->ref)
1410 *thread->ref = NULL;
1411
1412 thread_add_unuse(thread->master, thread);
1413 }
1414
1415 /* Delete and free all cancellation requests */
41b21bfa
MS
1416 if (master->cancel_req)
1417 list_delete_all_node(master->cancel_req);
d62a17ae 1418
1419 /* Wake up any threads which may be blocked in thread_cancel_async() */
1420 master->canceled = true;
1421 pthread_cond_broadcast(&master->cancel_cond);
718e3744 1422}
1423
a9318a32
MS
1424/*
1425 * Helper function used for multiple flavors of arg-based cancellation.
1426 */
1427static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1428{
1429 struct cancel_req *cr;
1430
1431 assert(m->owner == pthread_self());
1432
1433 /* Only worth anything if caller supplies an arg. */
1434 if (arg == NULL)
1435 return;
1436
1437 cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1438
1439 cr->flags = flags;
1440
1441 frr_with_mutex(&m->mtx) {
1442 cr->eventobj = arg;
1443 listnode_add(m->cancel_req, cr);
1444 do_thread_cancel(m);
1445 }
1446}
1447
63ccb9cb
QY
1448/**
1449 * Cancel any events which have the specified argument.
1450 *
1451 * MT-Unsafe
1452 *
1453 * @param m the thread_master to cancel from
1454 * @param arg the argument passed when creating the event
1455 */
d62a17ae 1456void thread_cancel_event(struct thread_master *master, void *arg)
718e3744 1457{
a9318a32
MS
1458 cancel_event_helper(master, arg, 0);
1459}
d62a17ae 1460
a9318a32
MS
1461/*
1462 * Cancel ready tasks with an arg matching 'arg'
1463 *
1464 * MT-Unsafe
1465 *
1466 * @param m the thread_master to cancel from
1467 * @param arg the argument passed when creating the event
1468 */
1469void thread_cancel_event_ready(struct thread_master *m, void *arg)
1470{
1471
1472 /* Only cancel ready/event tasks */
1473 cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
63ccb9cb 1474}
1189d95f 1475
63ccb9cb
QY
1476/**
1477 * Cancel a specific task.
1478 *
1479 * MT-Unsafe
1480 *
1481 * @param thread task to cancel
1482 */
b3d6bc6e 1483void thread_cancel(struct thread **thread)
63ccb9cb 1484{
b3d6bc6e
MS
1485 struct thread_master *master;
1486
1487 if (thread == NULL || *thread == NULL)
1488 return;
1489
1490 master = (*thread)->master;
d62a17ae 1491
6c3aa850
DL
1492 frrtrace(9, frr_libfrr, thread_cancel, master,
1493 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1494 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
b4d6e855 1495 (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
abf96a87 1496
6ed04aa2
DS
1497 assert(master->owner == pthread_self());
1498
00dffa8c 1499 frr_with_mutex(&master->mtx) {
d62a17ae 1500 struct cancel_req *cr =
1501 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
b3d6bc6e 1502 cr->thread = *thread;
6ed04aa2
DS
1503 listnode_add(master->cancel_req, cr);
1504 do_thread_cancel(master);
d62a17ae 1505 }
b3d6bc6e
MS
1506
1507 *thread = NULL;
63ccb9cb 1508}
1189d95f 1509
63ccb9cb
QY
1510/**
1511 * Asynchronous cancellation.
1512 *
8797240e
QY
1513 * Called with either a struct thread ** or void * to an event argument,
1514 * this function posts the correct cancellation request and blocks until it is
1515 * serviced.
63ccb9cb
QY
1516 *
1517 * If the thread is currently running, execution blocks until it completes.
1518 *
8797240e
QY
1519 * The last two parameters are mutually exclusive, i.e. if you pass one the
1520 * other must be NULL.
1521 *
1522 * When the cancellation procedure executes on the target thread_master, the
1523 * thread * provided is checked for nullity. If it is null, the thread is
1524 * assumed to no longer exist and the cancellation request is a no-op. Thus
1525 * users of this API must pass a back-reference when scheduling the original
1526 * task.
1527 *
63ccb9cb
QY
1528 * MT-Safe
1529 *
8797240e
QY
1530 * @param master the thread master with the relevant event / task
1531 * @param thread pointer to thread to cancel
1532 * @param eventobj the event
63ccb9cb 1533 */
d62a17ae 1534void thread_cancel_async(struct thread_master *master, struct thread **thread,
1535 void *eventobj)
63ccb9cb 1536{
d62a17ae 1537 assert(!(thread && eventobj) && (thread || eventobj));
abf96a87
QY
1538
1539 if (thread && *thread)
c7bb4f00 1540 frrtrace(9, frr_libfrr, thread_cancel_async, master,
6c3aa850
DL
1541 (*thread)->xref->funcname, (*thread)->xref->xref.file,
1542 (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
c7bb4f00
QY
1543 (*thread)->u.val, (*thread)->arg,
1544 (*thread)->u.sands.tv_sec);
abf96a87 1545 else
c7bb4f00
QY
1546 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1547 0, NULL, 0, 0, eventobj, 0);
abf96a87 1548
d62a17ae 1549 assert(master->owner != pthread_self());
1550
00dffa8c 1551 frr_with_mutex(&master->mtx) {
d62a17ae 1552 master->canceled = false;
1553
1554 if (thread) {
1555 struct cancel_req *cr =
1556 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1557 cr->threadref = thread;
1558 listnode_add(master->cancel_req, cr);
1559 } else if (eventobj) {
1560 struct cancel_req *cr =
1561 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1562 cr->eventobj = eventobj;
1563 listnode_add(master->cancel_req, cr);
1564 }
1565 AWAKEN(master);
1566
1567 while (!master->canceled)
1568 pthread_cond_wait(&master->cancel_cond, &master->mtx);
1569 }
50478845
MS
1570
1571 if (thread)
1572 *thread = NULL;
718e3744 1573}
63ccb9cb 1574/* ------------------------------------------------------------------------- */
718e3744 1575
27d29ced 1576static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
d62a17ae 1577 struct timeval *timer_val)
718e3744 1578{
27d29ced
DL
1579 if (!thread_timer_list_count(timers))
1580 return NULL;
1581
1582 struct thread *next_timer = thread_timer_list_first(timers);
1583 monotime_until(&next_timer->u.sands, timer_val);
1584 return timer_val;
718e3744 1585}
718e3744 1586
d62a17ae 1587static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1588 struct thread *fetch)
718e3744 1589{
d62a17ae 1590 *fetch = *thread;
1591 thread_add_unuse(m, thread);
1592 return fetch;
718e3744 1593}
1594
d62a17ae 1595static int thread_process_io_helper(struct thread_master *m,
45f3d590
DS
1596 struct thread *thread, short state,
1597 short actual_state, int pos)
5d4ccd4e 1598{
d62a17ae 1599 struct thread **thread_array;
1600
45f3d590
DS
1601 /*
1602 * poll() clears the .events field, but the pollfd array we
1603 * pass to poll() is a copy of the one used to schedule threads.
1604 * We need to synchronize state between the two here by applying
1605 * the same changes poll() made on the copy of the "real" pollfd
1606 * array.
1607 *
1608 * This cleans up a possible infinite loop where we refuse
1609 * to respond to a poll event but poll is insistent that
1610 * we should.
1611 */
1612 m->handler.pfds[pos].events &= ~(state);
1613
1614 if (!thread) {
1615 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1616 flog_err(EC_LIB_NO_THREAD,
1d5453d6 1617 "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
45f3d590 1618 m->handler.pfds[pos].fd, actual_state);
d62a17ae 1619 return 0;
45f3d590 1620 }
d62a17ae 1621
1622 if (thread->type == THREAD_READ)
1623 thread_array = m->read;
1624 else
1625 thread_array = m->write;
1626
1627 thread_array[thread->u.fd] = NULL;
c284542b 1628 thread_list_add_tail(&m->ready, thread);
d62a17ae 1629 thread->type = THREAD_READY;
45f3d590 1630
d62a17ae 1631 return 1;
5d4ccd4e
DS
1632}
1633
8797240e
QY
1634/**
1635 * Process I/O events.
1636 *
1637 * Walks through file descriptor array looking for those pollfds whose .revents
1638 * field has something interesting. Deletes any invalid file descriptors.
1639 *
1640 * @param m the thread master
1641 * @param num the number of active file descriptors (return value of poll())
1642 */
d62a17ae 1643static void thread_process_io(struct thread_master *m, unsigned int num)
0a95a0d0 1644{
d62a17ae 1645 unsigned int ready = 0;
1646 struct pollfd *pfds = m->handler.copy;
1647
1648 for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1649 /* no event for current fd? immediately continue */
1650 if (pfds[i].revents == 0)
1651 continue;
1652
1653 ready++;
1654
d279ef57
DS
1655 /*
1656 * Unless someone has called thread_cancel from another
1657 * pthread, the only thing that could have changed in
1658 * m->handler.pfds while we were asleep is the .events
1659 * field in a given pollfd. Barring thread_cancel() that
1660 * value should be a superset of the values we have in our
1661 * copy, so there's no need to update it. Similarily,
1662 * barring deletion, the fd should still be a valid index
1663 * into the master's pfds.
d142453d
DS
1664 *
1665 * We are including POLLERR here to do a READ event
1666 * this is because the read should fail and the
1667 * read function should handle it appropriately
d279ef57 1668 */
d142453d 1669 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
d62a17ae 1670 thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
45f3d590
DS
1671 pfds[i].revents, i);
1672 }
d62a17ae 1673 if (pfds[i].revents & POLLOUT)
1674 thread_process_io_helper(m, m->write[pfds[i].fd],
45f3d590 1675 POLLOUT, pfds[i].revents, i);
d62a17ae 1676
1677 /* if one of our file descriptors is garbage, remove the same
1678 * from
1679 * both pfds + update sizes and index */
1680 if (pfds[i].revents & POLLNVAL) {
1681 memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1682 (m->handler.pfdcount - i - 1)
1683 * sizeof(struct pollfd));
1684 m->handler.pfdcount--;
e985cda0
S
1685 m->handler.pfds[m->handler.pfdcount].fd = 0;
1686 m->handler.pfds[m->handler.pfdcount].events = 0;
d62a17ae 1687
1688 memmove(pfds + i, pfds + i + 1,
1689 (m->handler.copycount - i - 1)
1690 * sizeof(struct pollfd));
1691 m->handler.copycount--;
e985cda0
S
1692 m->handler.copy[m->handler.copycount].fd = 0;
1693 m->handler.copy[m->handler.copycount].events = 0;
d62a17ae 1694
1695 i--;
1696 }
1697 }
718e3744 1698}
1699
8b70d0b0 1700/* Add all timers that have popped to the ready list. */
e7d9e44b 1701static unsigned int thread_process_timers(struct thread_master *m,
d62a17ae 1702 struct timeval *timenow)
a48b4e6d 1703{
ab01a001
DS
1704 struct timeval prev = *timenow;
1705 bool displayed = false;
d62a17ae 1706 struct thread *thread;
1707 unsigned int ready = 0;
1708
e7d9e44b 1709 while ((thread = thread_timer_list_first(&m->timer))) {
d62a17ae 1710 if (timercmp(timenow, &thread->u.sands, <))
e7d9e44b 1711 break;
ab01a001
DS
1712 prev = thread->u.sands;
1713 prev.tv_sec += 4;
1714 /*
1715 * If the timer would have popped 4 seconds in the
1716 * past then we are in a situation where we are
1717 * really getting behind on handling of events.
1718 * Let's log it and do the right thing with it.
1719 */
1dd08c22
DS
1720 if (timercmp(timenow, &prev, >)) {
1721 atomic_fetch_add_explicit(
1722 &thread->hist->total_starv_warn, 1,
1723 memory_order_seq_cst);
1724 if (!displayed && !thread->ignore_timer_late) {
1725 flog_warn(
1726 EC_LIB_STARVE_THREAD,
1727 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1728 thread);
1729 displayed = true;
1730 }
ab01a001
DS
1731 }
1732
e7d9e44b 1733 thread_timer_list_pop(&m->timer);
d62a17ae 1734 thread->type = THREAD_READY;
e7d9e44b 1735 thread_list_add_tail(&m->ready, thread);
d62a17ae 1736 ready++;
1737 }
e7d9e44b 1738
d62a17ae 1739 return ready;
a48b4e6d 1740}
1741
2613abe6 1742/* process a list en masse, e.g. for event thread lists */
c284542b 1743static unsigned int thread_process(struct thread_list_head *list)
2613abe6 1744{
d62a17ae 1745 struct thread *thread;
d62a17ae 1746 unsigned int ready = 0;
1747
c284542b 1748 while ((thread = thread_list_pop(list))) {
d62a17ae 1749 thread->type = THREAD_READY;
c284542b 1750 thread_list_add_tail(&thread->master->ready, thread);
d62a17ae 1751 ready++;
1752 }
1753 return ready;
2613abe6
PJ
1754}
1755
1756
718e3744 1757/* Fetch next ready thread. */
d62a17ae 1758struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
718e3744 1759{
d62a17ae 1760 struct thread *thread = NULL;
1761 struct timeval now;
1762 struct timeval zerotime = {0, 0};
1763 struct timeval tv;
1764 struct timeval *tw = NULL;
d81ca9a3 1765 bool eintr_p = false;
d62a17ae 1766 int num = 0;
1767
1768 do {
1769 /* Handle signals if any */
1770 if (m->handle_signals)
7cc91e67 1771 frr_sigevent_process();
d62a17ae 1772
1773 pthread_mutex_lock(&m->mtx);
1774
1775 /* Process any pending cancellation requests */
1776 do_thread_cancel(m);
1777
e3c9529e
QY
1778 /*
1779 * Attempt to flush ready queue before going into poll().
1780 * This is performance-critical. Think twice before modifying.
1781 */
c284542b 1782 if ((thread = thread_list_pop(&m->ready))) {
e3c9529e
QY
1783 fetch = thread_run(m, thread, fetch);
1784 if (fetch->ref)
1785 *fetch->ref = NULL;
1786 pthread_mutex_unlock(&m->mtx);
5e822957
DS
1787 if (!m->ready_run_loop)
1788 GETRUSAGE(&m->last_getrusage);
1789 m->ready_run_loop = true;
e3c9529e
QY
1790 break;
1791 }
1792
5e822957 1793 m->ready_run_loop = false;
e3c9529e
QY
1794 /* otherwise, tick through scheduling sequence */
1795
bca37d17
QY
1796 /*
1797 * Post events to ready queue. This must come before the
1798 * following block since events should occur immediately
1799 */
d62a17ae 1800 thread_process(&m->event);
1801
bca37d17
QY
1802 /*
1803 * If there are no tasks on the ready queue, we will poll()
1804 * until a timer expires or we receive I/O, whichever comes
1805 * first. The strategy for doing this is:
d62a17ae 1806 *
1807 * - If there are events pending, set the poll() timeout to zero
1808 * - If there are no events pending, but there are timers
d279ef57
DS
1809 * pending, set the timeout to the smallest remaining time on
1810 * any timer.
d62a17ae 1811 * - If there are neither timers nor events pending, but there
d279ef57 1812 * are file descriptors pending, block indefinitely in poll()
d62a17ae 1813 * - If nothing is pending, it's time for the application to die
1814 *
1815 * In every case except the last, we need to hit poll() at least
bca37d17
QY
1816 * once per loop to avoid starvation by events
1817 */
c284542b 1818 if (!thread_list_count(&m->ready))
27d29ced 1819 tw = thread_timer_wait(&m->timer, &tv);
d62a17ae 1820
c284542b
DL
1821 if (thread_list_count(&m->ready) ||
1822 (tw && !timercmp(tw, &zerotime, >)))
d62a17ae 1823 tw = &zerotime;
1824
1825 if (!tw && m->handler.pfdcount == 0) { /* die */
1826 pthread_mutex_unlock(&m->mtx);
1827 fetch = NULL;
1828 break;
1829 }
1830
bca37d17
QY
1831 /*
1832 * Copy pollfd array + # active pollfds in it. Not necessary to
1833 * copy the array size as this is fixed.
1834 */
d62a17ae 1835 m->handler.copycount = m->handler.pfdcount;
1836 memcpy(m->handler.copy, m->handler.pfds,
1837 m->handler.copycount * sizeof(struct pollfd));
1838
e3c9529e
QY
1839 pthread_mutex_unlock(&m->mtx);
1840 {
d81ca9a3
MS
1841 eintr_p = false;
1842 num = fd_poll(m, tw, &eintr_p);
e3c9529e
QY
1843 }
1844 pthread_mutex_lock(&m->mtx);
d764d2cc 1845
e3c9529e
QY
1846 /* Handle any errors received in poll() */
1847 if (num < 0) {
d81ca9a3 1848 if (eintr_p) {
d62a17ae 1849 pthread_mutex_unlock(&m->mtx);
e3c9529e
QY
1850 /* loop around to signal handler */
1851 continue;
d62a17ae 1852 }
1853
e3c9529e 1854 /* else die */
450971aa 1855 flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
9ef9495e 1856 safe_strerror(errno));
e3c9529e
QY
1857 pthread_mutex_unlock(&m->mtx);
1858 fetch = NULL;
1859 break;
bca37d17 1860 }
d62a17ae 1861
1862 /* Post timers to ready queue. */
1863 monotime(&now);
e7d9e44b 1864 thread_process_timers(m, &now);
d62a17ae 1865
1866 /* Post I/O to ready queue. */
1867 if (num > 0)
1868 thread_process_io(m, num);
1869
d62a17ae 1870 pthread_mutex_unlock(&m->mtx);
1871
1872 } while (!thread && m->spin);
1873
1874 return fetch;
718e3744 1875}
1876
d62a17ae 1877static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
62f44022 1878{
d62a17ae 1879 return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1880 + (a.tv_usec - b.tv_usec));
62f44022
QY
1881}
1882
d62a17ae 1883unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1884 unsigned long *cputime)
718e3744 1885{
6418e2d3 1886#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
4e2839de
DS
1887
1888#ifdef __FreeBSD__
1889 /*
1890 * FreeBSD appears to have an issue when calling clock_gettime
1891 * with CLOCK_THREAD_CPUTIME_ID really close to each other
1892 * occassionally the now time will be before the start time.
1893 * This is not good and FRR is ending up with CPU HOG's
1894 * when the subtraction wraps to very large numbers
1895 *
1896 * What we are going to do here is cheat a little bit
1897 * and notice that this is a problem and just correct
1898 * it so that it is impossible to happen
1899 */
1900 if (start->cpu.tv_sec == now->cpu.tv_sec &&
1901 start->cpu.tv_nsec > now->cpu.tv_nsec)
1902 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1903 else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1904 now->cpu.tv_sec = start->cpu.tv_sec;
1905 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1906 }
1907#endif
6418e2d3
DL
1908 *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1909 + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1910#else
d62a17ae 1911 /* This is 'user + sys' time. */
1912 *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1913 + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
6418e2d3 1914#endif
d62a17ae 1915 return timeval_elapsed(now->real, start->real);
8b70d0b0 1916}
1917
50596be0
DS
1918/* We should aim to yield after yield milliseconds, which defaults
1919 to THREAD_YIELD_TIME_SLOT .
8b70d0b0 1920 Note: we are using real (wall clock) time for this calculation.
1921 It could be argued that CPU time may make more sense in certain
1922 contexts. The things to consider are whether the thread may have
1923 blocked (in which case wall time increases, but CPU time does not),
1924 or whether the system is heavily loaded with other processes competing
d62a17ae 1925 for CPU time. On balance, wall clock time seems to make sense.
8b70d0b0 1926 Plus it has the added benefit that gettimeofday should be faster
1927 than calling getrusage. */
d62a17ae 1928int thread_should_yield(struct thread *thread)
718e3744 1929{
d62a17ae 1930 int result;
00dffa8c 1931 frr_with_mutex(&thread->mtx) {
d62a17ae 1932 result = monotime_since(&thread->real, NULL)
1933 > (int64_t)thread->yield;
1934 }
d62a17ae 1935 return result;
50596be0
DS
1936}
1937
d62a17ae 1938void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
50596be0 1939{
00dffa8c 1940 frr_with_mutex(&thread->mtx) {
d62a17ae 1941 thread->yield = yield_time;
1942 }
718e3744 1943}
1944
d62a17ae 1945void thread_getrusage(RUSAGE_T *r)
db9c0df9 1946{
6418e2d3
DL
1947 monotime(&r->real);
1948 if (!cputime_enabled) {
1949 memset(&r->cpu, 0, sizeof(r->cpu));
1950 return;
1951 }
1952
1953#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1954 /* not currently implemented in Linux's vDSO, but maybe at some point
1955 * in the future?
1956 */
1957 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1958#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
231db9a6
DS
1959#if defined RUSAGE_THREAD
1960#define FRR_RUSAGE RUSAGE_THREAD
1961#else
1962#define FRR_RUSAGE RUSAGE_SELF
1963#endif
6418e2d3
DL
1964 getrusage(FRR_RUSAGE, &(r->cpu));
1965#endif
db9c0df9
PJ
1966}
1967
fbcac826
QY
1968/*
1969 * Call a thread.
1970 *
1971 * This function will atomically update the thread's usage history. At present
1972 * this is the only spot where usage history is written. Nevertheless the code
1973 * has been written such that the introduction of writers in the future should
1974 * not need to update it provided the writers atomically perform only the
1975 * operations done here, i.e. updating the total and maximum times. In
1976 * particular, the maximum real and cpu times must be monotonically increasing
1977 * or this code is not correct.
1978 */
d62a17ae 1979void thread_call(struct thread *thread)
718e3744 1980{
d62a17ae 1981 RUSAGE_T before, after;
cc8b13a0 1982
45f01188
DL
1983 /* if the thread being called is the CLI, it may change cputime_enabled
1984 * ("service cputime-stats" command), which can result in nonsensical
1985 * and very confusing warnings
1986 */
1987 bool cputime_enabled_here = cputime_enabled;
1988
5e822957
DS
1989 if (thread->master->ready_run_loop)
1990 before = thread->master->last_getrusage;
1991 else
1992 GETRUSAGE(&before);
1993
d62a17ae 1994 thread->real = before.real;
718e3744 1995
6c3aa850
DL
1996 frrtrace(9, frr_libfrr, thread_call, thread->master,
1997 thread->xref->funcname, thread->xref->xref.file,
1998 thread->xref->xref.line, NULL, thread->u.fd,
c7bb4f00 1999 thread->u.val, thread->arg, thread->u.sands.tv_sec);
abf96a87 2000
d62a17ae 2001 pthread_setspecific(thread_current, thread);
2002 (*thread->func)(thread);
2003 pthread_setspecific(thread_current, NULL);
718e3744 2004
d62a17ae 2005 GETRUSAGE(&after);
5e822957 2006 thread->master->last_getrusage = after;
718e3744 2007
45f01188
DL
2008 unsigned long walltime, cputime;
2009 unsigned long exp;
fbcac826 2010
45f01188
DL
2011 walltime = thread_consumed_time(&after, &before, &cputime);
2012
2013 /* update walltime */
2014 atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
fbcac826
QY
2015 memory_order_seq_cst);
2016 exp = atomic_load_explicit(&thread->hist->real.max,
2017 memory_order_seq_cst);
45f01188 2018 while (exp < walltime
fbcac826 2019 && !atomic_compare_exchange_weak_explicit(
45f01188
DL
2020 &thread->hist->real.max, &exp, walltime,
2021 memory_order_seq_cst, memory_order_seq_cst))
fbcac826
QY
2022 ;
2023
45f01188
DL
2024 if (cputime_enabled_here && cputime_enabled) {
2025 /* update cputime */
2026 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2027 memory_order_seq_cst);
2028 exp = atomic_load_explicit(&thread->hist->cpu.max,
2029 memory_order_seq_cst);
2030 while (exp < cputime
2031 && !atomic_compare_exchange_weak_explicit(
2032 &thread->hist->cpu.max, &exp, cputime,
2033 memory_order_seq_cst, memory_order_seq_cst))
2034 ;
2035 }
fbcac826
QY
2036
2037 atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2038 memory_order_seq_cst);
2039 atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2040 memory_order_seq_cst);
718e3744 2041
45f01188
DL
2042 if (cputime_enabled_here && cputime_enabled && cputime_threshold
2043 && cputime > cputime_threshold) {
d62a17ae 2044 /*
45f01188
DL
2045 * We have a CPU Hog on our hands. The time FRR has spent
2046 * doing actual work (not sleeping) is greater than 5 seconds.
d62a17ae 2047 * Whinge about it now, so we're aware this is yet another task
2048 * to fix.
2049 */
9b8e01ca
DS
2050 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2051 1, memory_order_seq_cst);
9ef9495e 2052 flog_warn(
039d547f
DS
2053 EC_LIB_SLOW_THREAD_CPU,
2054 "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2055 thread->xref->funcname, (unsigned long)thread->func,
45f01188
DL
2056 walltime / 1000, cputime / 1000);
2057
2058 } else if (walltime_threshold && walltime > walltime_threshold) {
039d547f 2059 /*
45f01188
DL
2060 * The runtime for a task is greater than 5 seconds, but the
2061 * cpu time is under 5 seconds. Let's whine about this because
2062 * this could imply some sort of scheduling issue.
039d547f 2063 */
9b8e01ca
DS
2064 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2065 1, memory_order_seq_cst);
039d547f
DS
2066 flog_warn(
2067 EC_LIB_SLOW_THREAD_WALL,
2068 "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
60a3efec 2069 thread->xref->funcname, (unsigned long)thread->func,
45f01188 2070 walltime / 1000, cputime / 1000);
d62a17ae 2071 }
718e3744 2072}
2073
2074/* Execute thread */
60a3efec 2075void _thread_execute(const struct xref_threadsched *xref,
cc9f21da 2076 struct thread_master *m, void (*func)(struct thread *),
60a3efec 2077 void *arg, int val)
718e3744 2078{
c4345fbf 2079 struct thread *thread;
718e3744 2080
c4345fbf 2081 /* Get or allocate new thread to execute. */
00dffa8c 2082 frr_with_mutex(&m->mtx) {
60a3efec 2083 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
9c7753e4 2084
c4345fbf 2085 /* Set its event value. */
00dffa8c 2086 frr_with_mutex(&thread->mtx) {
c4345fbf
RZ
2087 thread->add_type = THREAD_EXECUTE;
2088 thread->u.val = val;
2089 thread->ref = &thread;
2090 }
c4345fbf 2091 }
f7c62e11 2092
c4345fbf
RZ
2093 /* Execute thread doing all accounting. */
2094 thread_call(thread);
9c7753e4 2095
c4345fbf
RZ
2096 /* Give back or free thread. */
2097 thread_add_unuse(m, thread);
718e3744 2098}
1543c387
MS
2099
2100/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2101void debug_signals(const sigset_t *sigs)
2102{
2103 int i, found;
2104 sigset_t tmpsigs;
2105 char buf[300];
2106
2107 /*
2108 * We're only looking at the non-realtime signals here, so we need
2109 * some limit value. Platform differences mean at some point we just
2110 * need to pick a reasonable value.
2111 */
2112#if defined SIGRTMIN
2113# define LAST_SIGNAL SIGRTMIN
2114#else
2115# define LAST_SIGNAL 32
2116#endif
2117
2118
2119 if (sigs == NULL) {
2120 sigemptyset(&tmpsigs);
2121 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2122 sigs = &tmpsigs;
2123 }
2124
2125 found = 0;
2126 buf[0] = '\0';
2127
2128 for (i = 0; i < LAST_SIGNAL; i++) {
2129 char tmp[20];
2130
2131 if (sigismember(sigs, i) > 0) {
2132 if (found > 0)
2133 strlcat(buf, ",", sizeof(buf));
2134 snprintf(tmp, sizeof(tmp), "%d", i);
2135 strlcat(buf, tmp, sizeof(buf));
2136 found++;
2137 }
2138 }
2139
2140 if (found == 0)
2141 snprintf(buf, sizeof(buf), "<none>");
2142
2143 zlog_debug("%s: %s", __func__, buf);
2144}
a505383d
DS
2145
2146bool thread_is_scheduled(struct thread *thread)
2147{
2148 if (thread == NULL)
2149 return false;
2150
2151 return true;
2152}
f59e6882
DL
2153
2154static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2155 const struct thread *thread)
2156{
2157 static const char * const types[] = {
2158 [THREAD_READ] = "read",
2159 [THREAD_WRITE] = "write",
2160 [THREAD_TIMER] = "timer",
2161 [THREAD_EVENT] = "event",
2162 [THREAD_READY] = "ready",
2163 [THREAD_UNUSED] = "unused",
2164 [THREAD_EXECUTE] = "exec",
2165 };
2166 ssize_t rv = 0;
2167 char info[16] = "";
2168
2169 if (!thread)
2170 return bputs(buf, "{(thread *)NULL}");
2171
2172 rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2173
2174 if (thread->type < array_size(types) && types[thread->type])
2175 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2176 else
2177 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2178
2179 switch (thread->type) {
2180 case THREAD_READ:
2181 case THREAD_WRITE:
2182 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2183 break;
2184
2185 case THREAD_TIMER:
2186 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2187 break;
2188 }
2189
2190 rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2191 thread->xref->funcname, thread->xref->dest,
2192 thread->xref->xref.file, thread->xref->xref.line);
2193 return rv;
2194}
2195
54929fd3 2196printfrr_ext_autoreg_p("TH", printfrr_thread);
f59e6882
DL
2197static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2198 const void *ptr)
2199{
2200 const struct thread *thread = ptr;
2201 struct timespec remain = {};
2202
2203 if (ea->fmt[0] == 'D') {
2204 ea->fmt++;
2205 return printfrr_thread_dbg(buf, ea, thread);
2206 }
2207
2208 if (!thread) {
2209 /* need to jump over time formatting flag characters in the
2210 * input format string, i.e. adjust ea->fmt!
2211 */
2212 printfrr_time(buf, ea, &remain,
2213 TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2214 return bputch(buf, '-');
2215 }
2216
2217 TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2218 return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2219}