lib/event.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Thread management routine
   3  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   4  */
   5
   6 /* #define DEBUG */
   7
   8 #include <zebra.h>
   9 #include <sys/resource.h>
  10
  11 #include "event.h"
  12 #include "memory.h"
  13 #include "frrcu.h"
  14 #include "log.h"
  15 #include "hash.h"
  16 #include "command.h"
  17 #include "sigevent.h"
  18 #include "network.h"
  19 #include "jhash.h"
  20 #include "frratomic.h"
  21 #include "frr_pthread.h"
  22 #include "lib_errors.h"
  23 #include "libfrr_trace.h"
  24 #include "libfrr.h"
  25
  26 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  27 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
  28 DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
  29 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
  30
  31 DECLARE_LIST(thread_list, struct event, threaditem);
  32
  33 struct cancel_req {
  34         int flags;
  35         struct event *thread;
  36         void *eventobj;
  37         struct event **threadref;
  38 };
  39
  40 /* Flags for task cancellation */
  41 #define THREAD_CANCEL_FLAG_READY     0x01
  42
  43 static int thread_timer_cmp(const struct event *a, const struct event *b)
  44 {
  45         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  46                 return -1;
  47         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  48                 return 1;
  49         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  50                 return -1;
  51         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  52                 return 1;
  53         return 0;
  54 }
  55
  56 DECLARE_HEAP(thread_timer_list, struct event, timeritem, thread_timer_cmp);
  57
  58 #if defined(__APPLE__)
  59 #include <mach/mach.h>
  60 #include <mach/mach_time.h>
  61 #endif
  62
  63 #define AWAKEN(m)                                                              \
  64         do {                                                                   \
  65                 const unsigned char wakebyte = 0x01;                           \
  66                 write(m->io_pipe[1], &wakebyte, 1);                            \
  67         } while (0);
  68
  69 /* control variable for initializer */
  70 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  71 pthread_key_t thread_current;
  72
  73 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  74 static struct list *masters;
  75
  76 static void thread_free(struct thread_master *master, struct event *thread);
  77
  78 #ifndef EXCLUDE_CPU_TIME
  79 #define EXCLUDE_CPU_TIME 0
  80 #endif
  81 #ifndef CONSUMED_TIME_CHECK
  82 #define CONSUMED_TIME_CHECK 0
  83 #endif
  84
  85 bool cputime_enabled = !EXCLUDE_CPU_TIME;
  86 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
  87 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
  88
  89 /* CLI start ---------------------------------------------------------------- */
  90 #include "lib/event_clippy.c"
  91
  92 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
  93 {
  94         int size = sizeof(a->func);
  95
  96         return jhash(&a->func, size, 0);
  97 }
  98
  99 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 100                                const struct cpu_thread_history *b)
 101 {
 102         return a->func == b->func;
 103 }
 104
 105 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 106 {
 107         struct cpu_thread_history *new;
 108         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
 109         new->func = a->func;
 110         new->funcname = a->funcname;
 111         return new;
 112 }
 113
 114 static void cpu_record_hash_free(void *a)
 115 {
 116         struct cpu_thread_history *hist = a;
 117
 118         XFREE(MTYPE_THREAD_STATS, hist);
 119 }
 120
 121 static void vty_out_cpu_thread_history(struct vty *vty,
 122                                        struct cpu_thread_history *a)
 123 {
 124         vty_out(vty,
 125                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 126                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 127                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 128                 (a->real.total / a->total_calls), a->real.max,
 129                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 130         vty_out(vty, "  %c%c%c%c%c  %s\n",
 131                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
 132                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
 133                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 134                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 135                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 136 }
 137
 138 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 139 {
 140         struct cpu_thread_history *totals = args[0];
 141         struct cpu_thread_history copy;
 142         struct vty *vty = args[1];
 143         uint8_t *filter = args[2];
 144
 145         struct cpu_thread_history *a = bucket->data;
 146
 147         copy.total_active =
 148                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 149         copy.total_calls =
 150                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 151         copy.total_cpu_warn =
 152                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 153         copy.total_wall_warn =
 154                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 155         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 156                                                      memory_order_seq_cst);
 157         copy.cpu.total =
 158                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 159         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 160         copy.real.total =
 161                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 162         copy.real.max =
 163                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 164         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 165         copy.funcname = a->funcname;
 166
 167         if (!(copy.types & *filter))
 168                 return;
 169
 170         vty_out_cpu_thread_history(vty, &copy);
 171         totals->total_active += copy.total_active;
 172         totals->total_calls += copy.total_calls;
 173         totals->total_cpu_warn += copy.total_cpu_warn;
 174         totals->total_wall_warn += copy.total_wall_warn;
 175         totals->total_starv_warn += copy.total_starv_warn;
 176         totals->real.total += copy.real.total;
 177         if (totals->real.max < copy.real.max)
 178                 totals->real.max = copy.real.max;
 179         totals->cpu.total += copy.cpu.total;
 180         if (totals->cpu.max < copy.cpu.max)
 181                 totals->cpu.max = copy.cpu.max;
 182 }
 183
 184 static void cpu_record_print(struct vty *vty, uint8_t filter)
 185 {
 186         struct cpu_thread_history tmp;
 187         void *args[3] = {&tmp, vty, &filter};
 188         struct thread_master *m;
 189         struct listnode *ln;
 190
 191         if (!cputime_enabled)
 192                 vty_out(vty,
 193                         "\n"
 194                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 195                         "will be zero or may display data from when collection was enabled.  Use the\n"
 196                         "  \"service cputime-stats\"  command to start collecting data.\n"
 197                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 198
 199         memset(&tmp, 0, sizeof(tmp));
 200         tmp.funcname = "TOTAL";
 201         tmp.types = filter;
 202
 203         frr_with_mutex (&masters_mtx) {
 204                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 205                         const char *name = m->name ? m->name : "main";
 206
 207                         char underline[strlen(name) + 1];
 208                         memset(underline, '-', sizeof(underline));
 209                         underline[sizeof(underline) - 1] = '\0';
 210
 211                         vty_out(vty, "\n");
 212                         vty_out(vty, "Showing statistics for pthread %s\n",
 213                                 name);
 214                         vty_out(vty, "-------------------------------%s\n",
 215                                 underline);
 216                         vty_out(vty, "%30s %18s %18s\n", "",
 217                                 "CPU (user+system):", "Real (wall-clock):");
 218                         vty_out(vty,
 219                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 220                         vty_out(vty, " Avg uSec Max uSecs");
 221                         vty_out(vty,
 222                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 223
 224                         if (m->cpu_record->count)
 225                                 hash_iterate(
 226                                         m->cpu_record,
 227                                         (void (*)(struct hash_bucket *,
 228                                                   void *))cpu_record_hash_print,
 229                                         args);
 230                         else
 231                                 vty_out(vty, "No data to display yet.\n");
 232
 233                         vty_out(vty, "\n");
 234                 }
 235         }
 236
 237         vty_out(vty, "\n");
 238         vty_out(vty, "Total thread statistics\n");
 239         vty_out(vty, "-------------------------\n");
 240         vty_out(vty, "%30s %18s %18s\n", "",
 241                 "CPU (user+system):", "Real (wall-clock):");
 242         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 243         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 244         vty_out(vty, "  Type  Thread\n");
 245
 246         if (tmp.total_calls > 0)
 247                 vty_out_cpu_thread_history(vty, &tmp);
 248 }
 249
 250 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 251 {
 252         uint8_t *filter = args[0];
 253         struct hash *cpu_record = args[1];
 254
 255         struct cpu_thread_history *a = bucket->data;
 256
 257         if (!(a->types & *filter))
 258                 return;
 259
 260         hash_release(cpu_record, bucket->data);
 261 }
 262
 263 static void cpu_record_clear(uint8_t filter)
 264 {
 265         uint8_t *tmp = &filter;
 266         struct thread_master *m;
 267         struct listnode *ln;
 268
 269         frr_with_mutex (&masters_mtx) {
 270                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 271                         frr_with_mutex (&m->mtx) {
 272                                 void *args[2] = {tmp, m->cpu_record};
 273                                 hash_iterate(
 274                                         m->cpu_record,
 275                                         (void (*)(struct hash_bucket *,
 276                                                   void *))cpu_record_hash_clear,
 277                                         args);
 278                         }
 279                 }
 280         }
 281 }
 282
 283 static uint8_t parse_filter(const char *filterstr)
 284 {
 285         int i = 0;
 286         int filter = 0;
 287
 288         while (filterstr[i] != '\0') {
 289                 switch (filterstr[i]) {
 290                 case 'r':
 291                 case 'R':
 292                         filter |= (1 << THREAD_READ);
 293                         break;
 294                 case 'w':
 295                 case 'W':
 296                         filter |= (1 << THREAD_WRITE);
 297                         break;
 298                 case 't':
 299                 case 'T':
 300                         filter |= (1 << THREAD_TIMER);
 301                         break;
 302                 case 'e':
 303                 case 'E':
 304                         filter |= (1 << THREAD_EVENT);
 305                         break;
 306                 case 'x':
 307                 case 'X':
 308                         filter |= (1 << THREAD_EXECUTE);
 309                         break;
 310                 default:
 311                         break;
 312                 }
 313                 ++i;
 314         }
 315         return filter;
 316 }
 317
 318 DEFUN_NOSH (show_thread_cpu,
 319             show_thread_cpu_cmd,
 320             "show thread cpu [FILTER]",
 321             SHOW_STR
 322             "Thread information\n"
 323             "Thread CPU usage\n"
 324             "Display filter (rwtex)\n")
 325 {
 326         uint8_t filter = (uint8_t)-1U;
 327         int idx = 0;
 328
 329         if (argv_find(argv, argc, "FILTER", &idx)) {
 330                 filter = parse_filter(argv[idx]->arg);
 331                 if (!filter) {
 332                         vty_out(vty,
 333                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 334                                 argv[idx]->arg);
 335                         return CMD_WARNING;
 336                 }
 337         }
 338
 339         cpu_record_print(vty, filter);
 340         return CMD_SUCCESS;
 341 }
 342
 343 DEFPY (service_cputime_stats,
 344        service_cputime_stats_cmd,
 345        "[no] service cputime-stats",
 346        NO_STR
 347        "Set up miscellaneous service\n"
 348        "Collect CPU usage statistics\n")
 349 {
 350         cputime_enabled = !no;
 351         return CMD_SUCCESS;
 352 }
 353
 354 DEFPY (service_cputime_warning,
 355        service_cputime_warning_cmd,
 356        "[no] service cputime-warning (1-4294967295)",
 357        NO_STR
 358        "Set up miscellaneous service\n"
 359        "Warn for tasks exceeding CPU usage threshold\n"
 360        "Warning threshold in milliseconds\n")
 361 {
 362         if (no)
 363                 cputime_threshold = 0;
 364         else
 365                 cputime_threshold = cputime_warning * 1000;
 366         return CMD_SUCCESS;
 367 }
 368
 369 ALIAS (service_cputime_warning,
 370        no_service_cputime_warning_cmd,
 371        "no service cputime-warning",
 372        NO_STR
 373        "Set up miscellaneous service\n"
 374        "Warn for tasks exceeding CPU usage threshold\n")
 375
 376 DEFPY (service_walltime_warning,
 377        service_walltime_warning_cmd,
 378        "[no] service walltime-warning (1-4294967295)",
 379        NO_STR
 380        "Set up miscellaneous service\n"
 381        "Warn for tasks exceeding total wallclock threshold\n"
 382        "Warning threshold in milliseconds\n")
 383 {
 384         if (no)
 385                 walltime_threshold = 0;
 386         else
 387                 walltime_threshold = walltime_warning * 1000;
 388         return CMD_SUCCESS;
 389 }
 390
 391 ALIAS (service_walltime_warning,
 392        no_service_walltime_warning_cmd,
 393        "no service walltime-warning",
 394        NO_STR
 395        "Set up miscellaneous service\n"
 396        "Warn for tasks exceeding total wallclock threshold\n")
 397
 398 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 399 {
 400         const char *name = m->name ? m->name : "main";
 401         char underline[strlen(name) + 1];
 402         struct event *thread;
 403         uint32_t i;
 404
 405         memset(underline, '-', sizeof(underline));
 406         underline[sizeof(underline) - 1] = '\0';
 407
 408         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 409         vty_out(vty, "----------------------%s\n", underline);
 410         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 411                 m->fd_limit);
 412         for (i = 0; i < m->handler.pfdcount; i++) {
 413                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 414                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 415                         m->handler.pfds[i].revents);
 416
 417                 if (m->handler.pfds[i].events & POLLIN) {
 418                         thread = m->read[m->handler.pfds[i].fd];
 419
 420                         if (!thread)
 421                                 vty_out(vty, "ERROR ");
 422                         else
 423                                 vty_out(vty, "%s ", thread->xref->funcname);
 424                 } else
 425                         vty_out(vty, " ");
 426
 427                 if (m->handler.pfds[i].events & POLLOUT) {
 428                         thread = m->write[m->handler.pfds[i].fd];
 429
 430                         if (!thread)
 431                                 vty_out(vty, "ERROR\n");
 432                         else
 433                                 vty_out(vty, "%s\n", thread->xref->funcname);
 434                 } else
 435                         vty_out(vty, "\n");
 436         }
 437 }
 438
 439 DEFUN_NOSH (show_thread_poll,
 440             show_thread_poll_cmd,
 441             "show thread poll",
 442             SHOW_STR
 443             "Thread information\n"
 444             "Show poll FD's and information\n")
 445 {
 446         struct listnode *node;
 447         struct thread_master *m;
 448
 449         frr_with_mutex (&masters_mtx) {
 450                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 451                         show_thread_poll_helper(vty, m);
 452                 }
 453         }
 454
 455         return CMD_SUCCESS;
 456 }
 457
 458
 459 DEFUN (clear_thread_cpu,
 460        clear_thread_cpu_cmd,
 461        "clear thread cpu [FILTER]",
 462        "Clear stored data in all pthreads\n"
 463        "Thread information\n"
 464        "Thread CPU usage\n"
 465        "Display filter (rwtexb)\n")
 466 {
 467         uint8_t filter = (uint8_t)-1U;
 468         int idx = 0;
 469
 470         if (argv_find(argv, argc, "FILTER", &idx)) {
 471                 filter = parse_filter(argv[idx]->arg);
 472                 if (!filter) {
 473                         vty_out(vty,
 474                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 475                                 argv[idx]->arg);
 476                         return CMD_WARNING;
 477                 }
 478         }
 479
 480         cpu_record_clear(filter);
 481         return CMD_SUCCESS;
 482 }
 483
 484 static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
 485 {
 486         const char *name = m->name ? m->name : "main";
 487         char underline[strlen(name) + 1];
 488         struct event *thread;
 489
 490         memset(underline, '-', sizeof(underline));
 491         underline[sizeof(underline) - 1] = '\0';
 492
 493         vty_out(vty, "\nShowing timers for %s\n", name);
 494         vty_out(vty, "-------------------%s\n", underline);
 495
 496         frr_each (thread_timer_list, &m->timer, thread) {
 497                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 498         }
 499 }
 500
 501 DEFPY_NOSH (show_thread_timers,
 502             show_thread_timers_cmd,
 503             "show thread timers",
 504             SHOW_STR
 505             "Thread information\n"
 506             "Show all timers and how long they have in the system\n")
 507 {
 508         struct listnode *node;
 509         struct thread_master *m;
 510
 511         frr_with_mutex (&masters_mtx) {
 512                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 513                         show_thread_timers_helper(vty, m);
 514         }
 515
 516         return CMD_SUCCESS;
 517 }
 518
 519 void thread_cmd_init(void)
 520 {
 521         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 522         install_element(VIEW_NODE, &show_thread_poll_cmd);
 523         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 524
 525         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 526         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 527         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 528         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 529         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 530
 531         install_element(VIEW_NODE, &show_thread_timers_cmd);
 532 }
 533 /* CLI end ------------------------------------------------------------------ */
 534
 535
 536 static void cancelreq_del(void *cr)
 537 {
 538         XFREE(MTYPE_TMP, cr);
 539 }
 540
 541 /* initializer, only ever called once */
 542 static void initializer(void)
 543 {
 544         pthread_key_create(&thread_current, NULL);
 545 }
 546
 547 struct thread_master *thread_master_create(const char *name)
 548 {
 549         struct thread_master *rv;
 550         struct rlimit limit;
 551
 552         pthread_once(&init_once, &initializer);
 553
 554         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 555
 556         /* Initialize master mutex */
 557         pthread_mutex_init(&rv->mtx, NULL);
 558         pthread_cond_init(&rv->cancel_cond, NULL);
 559
 560         /* Set name */
 561         name = name ? name : "default";
 562         rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 563
 564         /* Initialize I/O task data structures */
 565
 566         /* Use configured limit if present, ulimit otherwise. */
 567         rv->fd_limit = frr_get_fd_limit();
 568         if (rv->fd_limit == 0) {
 569                 getrlimit(RLIMIT_NOFILE, &limit);
 570                 rv->fd_limit = (int)limit.rlim_cur;
 571         }
 572
 573         rv->read = XCALLOC(MTYPE_THREAD_POLL,
 574                            sizeof(struct event *) * rv->fd_limit);
 575
 576         rv->write = XCALLOC(MTYPE_THREAD_POLL,
 577                             sizeof(struct event *) * rv->fd_limit);
 578
 579         char tmhashname[strlen(name) + 32];
 580         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 581                  name);
 582         rv->cpu_record = hash_create_size(
 583                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 584                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 585                 tmhashname);
 586
 587         thread_list_init(&rv->event);
 588         thread_list_init(&rv->ready);
 589         thread_list_init(&rv->unuse);
 590         thread_timer_list_init(&rv->timer);
 591
 592         /* Initialize thread_fetch() settings */
 593         rv->spin = true;
 594         rv->handle_signals = true;
 595
 596         /* Set pthread owner, should be updated by actual owner */
 597         rv->owner = pthread_self();
 598         rv->cancel_req = list_new();
 599         rv->cancel_req->del = cancelreq_del;
 600         rv->canceled = true;
 601
 602         /* Initialize pipe poker */
 603         pipe(rv->io_pipe);
 604         set_nonblocking(rv->io_pipe[0]);
 605         set_nonblocking(rv->io_pipe[1]);
 606
 607         /* Initialize data structures for poll() */
 608         rv->handler.pfdsize = rv->fd_limit;
 609         rv->handler.pfdcount = 0;
 610         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 611                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 612         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 613                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 614
 615         /* add to list of threadmasters */
 616         frr_with_mutex (&masters_mtx) {
 617                 if (!masters)
 618                         masters = list_new();
 619
 620                 listnode_add(masters, rv);
 621         }
 622
 623         return rv;
 624 }
 625
 626 void thread_master_set_name(struct thread_master *master, const char *name)
 627 {
 628         frr_with_mutex (&master->mtx) {
 629                 XFREE(MTYPE_THREAD_MASTER, master->name);
 630                 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 631         }
 632 }
 633
 634 #define THREAD_UNUSED_DEPTH 10
 635
 636 /* Move thread to unuse list. */
 637 static void thread_add_unuse(struct thread_master *m, struct event *thread)
 638 {
 639         pthread_mutex_t mtxc = thread->mtx;
 640
 641         assert(m != NULL && thread != NULL);
 642
 643         thread->hist->total_active--;
 644         memset(thread, 0, sizeof(struct event));
 645         thread->type = THREAD_UNUSED;
 646
 647         /* Restore the thread mutex context. */
 648         thread->mtx = mtxc;
 649
 650         if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
 651                 thread_list_add_tail(&m->unuse, thread);
 652                 return;
 653         }
 654
 655         thread_free(m, thread);
 656 }
 657
 658 /* Free all unused thread. */
 659 static void thread_list_free(struct thread_master *m,
 660                 struct thread_list_head *list)
 661 {
 662         struct event *t;
 663
 664         while ((t = thread_list_pop(list)))
 665                 thread_free(m, t);
 666 }
 667
 668 static void thread_array_free(struct thread_master *m,
 669                               struct event **thread_array)
 670 {
 671         struct event *t;
 672         int index;
 673
 674         for (index = 0; index < m->fd_limit; ++index) {
 675                 t = thread_array[index];
 676                 if (t) {
 677                         thread_array[index] = NULL;
 678                         thread_free(m, t);
 679                 }
 680         }
 681         XFREE(MTYPE_THREAD_POLL, thread_array);
 682 }
 683
 684 /*
 685  * thread_master_free_unused
 686  *
 687  * As threads are finished with they are put on the
 688  * unuse list for later reuse.
 689  * If we are shutting down, Free up unused threads
 690  * So we can see if we forget to shut anything off
 691  */
 692 void thread_master_free_unused(struct thread_master *m)
 693 {
 694         frr_with_mutex (&m->mtx) {
 695                 struct event *t;
 696                 while ((t = thread_list_pop(&m->unuse)))
 697                         thread_free(m, t);
 698         }
 699 }
 700
 701 /* Stop thread scheduler. */
 702 void thread_master_free(struct thread_master *m)
 703 {
 704         struct event *t;
 705
 706         frr_with_mutex (&masters_mtx) {
 707                 listnode_delete(masters, m);
 708                 if (masters->count == 0) {
 709                         list_delete(&masters);
 710                 }
 711         }
 712
 713         thread_array_free(m, m->read);
 714         thread_array_free(m, m->write);
 715         while ((t = thread_timer_list_pop(&m->timer)))
 716                 thread_free(m, t);
 717         thread_list_free(m, &m->event);
 718         thread_list_free(m, &m->ready);
 719         thread_list_free(m, &m->unuse);
 720         pthread_mutex_destroy(&m->mtx);
 721         pthread_cond_destroy(&m->cancel_cond);
 722         close(m->io_pipe[0]);
 723         close(m->io_pipe[1]);
 724         list_delete(&m->cancel_req);
 725         m->cancel_req = NULL;
 726
 727         hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
 728
 729         XFREE(MTYPE_THREAD_MASTER, m->name);
 730         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 731         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 732         XFREE(MTYPE_THREAD_MASTER, m);
 733 }
 734
 735 /* Return remain time in milliseconds. */
 736 unsigned long thread_timer_remain_msec(struct event *thread)
 737 {
 738         int64_t remain;
 739
 740         if (!thread_is_scheduled(thread))
 741                 return 0;
 742
 743         frr_with_mutex (&thread->mtx) {
 744                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 745         }
 746
 747         return remain < 0 ? 0 : remain;
 748 }
 749
 750 /* Return remain time in seconds. */
 751 unsigned long thread_timer_remain_second(struct event *thread)
 752 {
 753         return thread_timer_remain_msec(thread) / 1000LL;
 754 }
 755
 756 struct timeval thread_timer_remain(struct event *thread)
 757 {
 758         struct timeval remain;
 759         frr_with_mutex (&thread->mtx) {
 760                 monotime_until(&thread->u.sands, &remain);
 761         }
 762         return remain;
 763 }
 764
 765 static int time_hhmmss(char *buf, int buf_size, long sec)
 766 {
 767         long hh;
 768         long mm;
 769         int wr;
 770
 771         assert(buf_size >= 8);
 772
 773         hh = sec / 3600;
 774         sec %= 3600;
 775         mm = sec / 60;
 776         sec %= 60;
 777
 778         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 779
 780         return wr != 8;
 781 }
 782
 783 char *thread_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
 784 {
 785         if (t_timer) {
 786                 time_hhmmss(buf, buf_size,
 787                                 thread_timer_remain_second(t_timer));
 788         } else {
 789                 snprintf(buf, buf_size, "--:--:--");
 790         }
 791         return buf;
 792 }
 793
 794 /* Get new thread.  */
 795 static struct event *thread_get(struct thread_master *m, uint8_t type,
 796                                 void (*func)(struct event *), void *arg,
 797                                 const struct xref_threadsched *xref)
 798 {
 799         struct event *thread = thread_list_pop(&m->unuse);
 800         struct cpu_thread_history tmp;
 801
 802         if (!thread) {
 803                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
 804                 /* mutex only needs to be initialized at struct creation. */
 805                 pthread_mutex_init(&thread->mtx, NULL);
 806                 m->alloc++;
 807         }
 808
 809         thread->type = type;
 810         thread->add_type = type;
 811         thread->master = m;
 812         thread->arg = arg;
 813         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 814         thread->ref = NULL;
 815         thread->ignore_timer_late = false;
 816
 817         /*
 818          * So if the passed in funcname is not what we have
 819          * stored that means the thread->hist needs to be
 820          * updated.  We keep the last one around in unused
 821          * under the assumption that we are probably
 822          * going to immediately allocate the same
 823          * type of thread.
 824          * This hopefully saves us some serious
 825          * hash_get lookups.
 826          */
 827         if ((thread->xref && thread->xref->funcname != xref->funcname)
 828             || thread->func != func) {
 829                 tmp.func = func;
 830                 tmp.funcname = xref->funcname;
 831                 thread->hist =
 832                         hash_get(m->cpu_record, &tmp,
 833                                  (void *(*)(void *))cpu_record_hash_alloc);
 834         }
 835         thread->hist->total_active++;
 836         thread->func = func;
 837         thread->xref = xref;
 838
 839         return thread;
 840 }
 841
 842 static void thread_free(struct thread_master *master, struct event *thread)
 843 {
 844         /* Update statistics. */
 845         assert(master->alloc > 0);
 846         master->alloc--;
 847
 848         /* Free allocated resources. */
 849         pthread_mutex_destroy(&thread->mtx);
 850         XFREE(MTYPE_THREAD, thread);
 851 }
 852
 853 static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
 854                    bool *eintr_p)
 855 {
 856         sigset_t origsigs;
 857         unsigned char trash[64];
 858         nfds_t count = m->handler.copycount;
 859
 860         /*
 861          * If timer_wait is null here, that means poll() should block
 862          * indefinitely, unless the thread_master has overridden it by setting
 863          * ->selectpoll_timeout.
 864          *
 865          * If the value is positive, it specifies the maximum number of
 866          * milliseconds to wait. If the timeout is -1, it specifies that
 867          * we should never wait and always return immediately even if no
 868          * event is detected. If the value is zero, the behavior is default.
 869          */
 870         int timeout = -1;
 871
 872         /* number of file descriptors with events */
 873         int num;
 874
 875         if (timer_wait != NULL
 876             && m->selectpoll_timeout == 0) // use the default value
 877                 timeout = (timer_wait->tv_sec * 1000)
 878                           + (timer_wait->tv_usec / 1000);
 879         else if (m->selectpoll_timeout > 0) // use the user's timeout
 880                 timeout = m->selectpoll_timeout;
 881         else if (m->selectpoll_timeout
 882                  < 0) // effect a poll (return immediately)
 883                 timeout = 0;
 884
 885         zlog_tls_buffer_flush();
 886         rcu_read_unlock();
 887         rcu_assert_read_unlocked();
 888
 889         /* add poll pipe poker */
 890         assert(count + 1 < m->handler.pfdsize);
 891         m->handler.copy[count].fd = m->io_pipe[0];
 892         m->handler.copy[count].events = POLLIN;
 893         m->handler.copy[count].revents = 0x00;
 894
 895         /* We need to deal with a signal-handling race here: we
 896          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 897          * that may arrive just before we enter poll(). We will block the
 898          * key signals, then check whether any have arrived - if so, we return
 899          * before calling poll(). If not, we'll re-enable the signals
 900          * in the ppoll() call.
 901          */
 902
 903         sigemptyset(&origsigs);
 904         if (m->handle_signals) {
 905                 /* Main pthread that handles the app signals */
 906                 if (frr_sigevent_check(&origsigs)) {
 907                         /* Signal to process - restore signal mask and return */
 908                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 909                         num = -1;
 910                         *eintr_p = true;
 911                         goto done;
 912                 }
 913         } else {
 914                 /* Don't make any changes for the non-main pthreads */
 915                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 916         }
 917
 918 #if defined(HAVE_PPOLL)
 919         struct timespec ts, *tsp;
 920
 921         if (timeout >= 0) {
 922                 ts.tv_sec = timeout / 1000;
 923                 ts.tv_nsec = (timeout % 1000) * 1000000;
 924                 tsp = &ts;
 925         } else
 926                 tsp = NULL;
 927
 928         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 929         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 930 #else
 931         /* Not ideal - there is a race after we restore the signal mask */
 932         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 933         num = poll(m->handler.copy, count + 1, timeout);
 934 #endif
 935
 936 done:
 937
 938         if (num < 0 && errno == EINTR)
 939                 *eintr_p = true;
 940
 941         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 942                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 943                         ;
 944
 945         rcu_read_lock();
 946
 947         return num;
 948 }
 949
 950 /* Add new read thread. */
 951 void _event_add_read_write(const struct xref_threadsched *xref,
 952                            struct thread_master *m,
 953                            void (*func)(struct event *), void *arg, int fd,
 954                            struct event **t_ptr)
 955 {
 956         int dir = xref->thread_type;
 957         struct event *thread = NULL;
 958         struct event **thread_array;
 959
 960         if (dir == THREAD_READ)
 961                 frrtrace(9, frr_libfrr, schedule_read, m,
 962                          xref->funcname, xref->xref.file, xref->xref.line,
 963                          t_ptr, fd, 0, arg, 0);
 964         else
 965                 frrtrace(9, frr_libfrr, schedule_write, m,
 966                          xref->funcname, xref->xref.file, xref->xref.line,
 967                          t_ptr, fd, 0, arg, 0);
 968
 969         assert(fd >= 0);
 970         if (fd >= m->fd_limit)
 971                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 972
 973         frr_with_mutex (&m->mtx) {
 974                 if (t_ptr && *t_ptr)
 975                         // thread is already scheduled; don't reschedule
 976                         break;
 977
 978                 /* default to a new pollfd */
 979                 nfds_t queuepos = m->handler.pfdcount;
 980
 981                 if (dir == THREAD_READ)
 982                         thread_array = m->read;
 983                 else
 984                         thread_array = m->write;
 985
 986                 /* if we already have a pollfd for our file descriptor, find and
 987                  * use it */
 988                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 989                         if (m->handler.pfds[i].fd == fd) {
 990                                 queuepos = i;
 991
 992 #ifdef DEV_BUILD
 993                                 /*
 994                                  * What happens if we have a thread already
 995                                  * created for this event?
 996                                  */
 997                                 if (thread_array[fd])
 998                                         assert(!"Thread already scheduled for file descriptor");
 999 #endif
1000                                 break;
1001                         }
1002
1003                 /* make sure we have room for this fd + pipe poker fd */
1004                 assert(queuepos + 1 < m->handler.pfdsize);
1005
1006                 thread = thread_get(m, dir, func, arg, xref);
1007
1008                 m->handler.pfds[queuepos].fd = fd;
1009                 m->handler.pfds[queuepos].events |=
1010                         (dir == THREAD_READ ? POLLIN : POLLOUT);
1011
1012                 if (queuepos == m->handler.pfdcount)
1013                         m->handler.pfdcount++;
1014
1015                 if (thread) {
1016                         frr_with_mutex (&thread->mtx) {
1017                                 thread->u.fd = fd;
1018                                 thread_array[thread->u.fd] = thread;
1019                         }
1020
1021                         if (t_ptr) {
1022                                 *t_ptr = thread;
1023                                 thread->ref = t_ptr;
1024                         }
1025                 }
1026
1027                 AWAKEN(m);
1028         }
1029 }
1030
1031 static void _event_add_timer_timeval(const struct xref_threadsched *xref,
1032                                      struct thread_master *m,
1033                                      void (*func)(struct event *), void *arg,
1034                                      struct timeval *time_relative,
1035                                      struct event **t_ptr)
1036 {
1037         struct event *thread;
1038         struct timeval t;
1039
1040         assert(m != NULL);
1041
1042         assert(time_relative);
1043
1044         frrtrace(9, frr_libfrr, schedule_timer, m,
1045                  xref->funcname, xref->xref.file, xref->xref.line,
1046                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1047
1048         /* Compute expiration/deadline time. */
1049         monotime(&t);
1050         timeradd(&t, time_relative, &t);
1051
1052         frr_with_mutex (&m->mtx) {
1053                 if (t_ptr && *t_ptr)
1054                         /* thread is already scheduled; don't reschedule */
1055                         return;
1056
1057                 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
1058
1059                 frr_with_mutex (&thread->mtx) {
1060                         thread->u.sands = t;
1061                         thread_timer_list_add(&m->timer, thread);
1062                         if (t_ptr) {
1063                                 *t_ptr = thread;
1064                                 thread->ref = t_ptr;
1065                         }
1066                 }
1067
1068                 /* The timer list is sorted - if this new timer
1069                  * might change the time we'll wait for, give the pthread
1070                  * a chance to re-compute.
1071                  */
1072                 if (thread_timer_list_first(&m->timer) == thread)
1073                         AWAKEN(m);
1074         }
1075 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1076         if (time_relative->tv_sec > ONEYEAR2SEC)
1077                 flog_err(
1078                         EC_LIB_TIMER_TOO_LONG,
1079                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1080                         thread);
1081 }
1082
1083
1084 /* Add timer event thread. */
1085 void _event_add_timer(const struct xref_threadsched *xref,
1086                       struct thread_master *m, void (*func)(struct event *),
1087                       void *arg, long timer, struct event **t_ptr)
1088 {
1089         struct timeval trel;
1090
1091         assert(m != NULL);
1092
1093         trel.tv_sec = timer;
1094         trel.tv_usec = 0;
1095
1096         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1097 }
1098
1099 /* Add timer event thread with "millisecond" resolution */
1100 void _event_add_timer_msec(const struct xref_threadsched *xref,
1101                            struct thread_master *m,
1102                            void (*func)(struct event *), void *arg, long timer,
1103                            struct event **t_ptr)
1104 {
1105         struct timeval trel;
1106
1107         assert(m != NULL);
1108
1109         trel.tv_sec = timer / 1000;
1110         trel.tv_usec = 1000 * (timer % 1000);
1111
1112         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1113 }
1114
1115 /* Add timer event thread with "timeval" resolution */
1116 void _event_add_timer_tv(const struct xref_threadsched *xref,
1117                          struct thread_master *m, void (*func)(struct event *),
1118                          void *arg, struct timeval *tv, struct event **t_ptr)
1119 {
1120         _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1121 }
1122
1123 /* Add simple event thread. */
1124 void _event_add_event(const struct xref_threadsched *xref,
1125                       struct thread_master *m, void (*func)(struct event *),
1126                       void *arg, int val, struct event **t_ptr)
1127 {
1128         struct event *thread = NULL;
1129
1130         frrtrace(9, frr_libfrr, schedule_event, m,
1131                  xref->funcname, xref->xref.file, xref->xref.line,
1132                  t_ptr, 0, val, arg, 0);
1133
1134         assert(m != NULL);
1135
1136         frr_with_mutex (&m->mtx) {
1137                 if (t_ptr && *t_ptr)
1138                         /* thread is already scheduled; don't reschedule */
1139                         break;
1140
1141                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
1142                 frr_with_mutex (&thread->mtx) {
1143                         thread->u.val = val;
1144                         thread_list_add_tail(&m->event, thread);
1145                 }
1146
1147                 if (t_ptr) {
1148                         *t_ptr = thread;
1149                         thread->ref = t_ptr;
1150                 }
1151
1152                 AWAKEN(m);
1153         }
1154 }
1155
1156 /* Thread cancellation ------------------------------------------------------ */
1157
1158 /**
1159  * NOT's out the .events field of pollfd corresponding to the given file
1160  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1161  *
1162  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1163  * implementation for details.
1164  *
1165  * @param master
1166  * @param fd
1167  * @param state the event to cancel. One or more (OR'd together) of the
1168  * following:
1169  *   - POLLIN
1170  *   - POLLOUT
1171  */
1172 static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1173                              int idx_hint)
1174 {
1175         bool found = false;
1176
1177         /* find the index of corresponding pollfd */
1178         nfds_t i;
1179
1180         /* Cancel POLLHUP too just in case some bozo set it */
1181         state |= POLLHUP;
1182
1183         /* Some callers know the index of the pfd already */
1184         if (idx_hint >= 0) {
1185                 i = idx_hint;
1186                 found = true;
1187         } else {
1188                 /* Have to look for the fd in the pfd array */
1189                 for (i = 0; i < master->handler.pfdcount; i++)
1190                         if (master->handler.pfds[i].fd == fd) {
1191                                 found = true;
1192                                 break;
1193                         }
1194         }
1195
1196         if (!found) {
1197                 zlog_debug(
1198                         "[!] Received cancellation request for nonexistent rw job");
1199                 zlog_debug("[!] threadmaster: %s | fd: %d",
1200                            master->name ? master->name : "", fd);
1201                 return;
1202         }
1203
1204         /* NOT out event. */
1205         master->handler.pfds[i].events &= ~(state);
1206
1207         /* If all events are canceled, delete / resize the pollfd array. */
1208         if (master->handler.pfds[i].events == 0) {
1209                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1210                         (master->handler.pfdcount - i - 1)
1211                                 * sizeof(struct pollfd));
1212                 master->handler.pfdcount--;
1213                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1214                 master->handler.pfds[master->handler.pfdcount].events = 0;
1215         }
1216
1217         /* If we have the same pollfd in the copy, perform the same operations,
1218          * otherwise return. */
1219         if (i >= master->handler.copycount)
1220                 return;
1221
1222         master->handler.copy[i].events &= ~(state);
1223
1224         if (master->handler.copy[i].events == 0) {
1225                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1226                         (master->handler.copycount - i - 1)
1227                                 * sizeof(struct pollfd));
1228                 master->handler.copycount--;
1229                 master->handler.copy[master->handler.copycount].fd = 0;
1230                 master->handler.copy[master->handler.copycount].events = 0;
1231         }
1232 }
1233
1234 /*
1235  * Process task cancellation given a task argument: iterate through the
1236  * various lists of tasks, looking for any that match the argument.
1237  */
1238 static void cancel_arg_helper(struct thread_master *master,
1239                               const struct cancel_req *cr)
1240 {
1241         struct event *t;
1242         nfds_t i;
1243         int fd;
1244         struct pollfd *pfd;
1245
1246         /* We're only processing arg-based cancellations here. */
1247         if (cr->eventobj == NULL)
1248                 return;
1249
1250         /* First process the ready lists. */
1251         frr_each_safe(thread_list, &master->event, t) {
1252                 if (t->arg != cr->eventobj)
1253                         continue;
1254                 thread_list_del(&master->event, t);
1255                 if (t->ref)
1256                         *t->ref = NULL;
1257                 thread_add_unuse(master, t);
1258         }
1259
1260         frr_each_safe(thread_list, &master->ready, t) {
1261                 if (t->arg != cr->eventobj)
1262                         continue;
1263                 thread_list_del(&master->ready, t);
1264                 if (t->ref)
1265                         *t->ref = NULL;
1266                 thread_add_unuse(master, t);
1267         }
1268
1269         /* If requested, stop here and ignore io and timers */
1270         if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1271                 return;
1272
1273         /* Check the io tasks */
1274         for (i = 0; i < master->handler.pfdcount;) {
1275                 pfd = master->handler.pfds + i;
1276
1277                 if (pfd->events & POLLIN)
1278                         t = master->read[pfd->fd];
1279                 else
1280                         t = master->write[pfd->fd];
1281
1282                 if (t && t->arg == cr->eventobj) {
1283                         fd = pfd->fd;
1284
1285                         /* Found a match to cancel: clean up fd arrays */
1286                         thread_cancel_rw(master, pfd->fd, pfd->events, i);
1287
1288                         /* Clean up thread arrays */
1289                         master->read[fd] = NULL;
1290                         master->write[fd] = NULL;
1291
1292                         /* Clear caller's ref */
1293                         if (t->ref)
1294                                 *t->ref = NULL;
1295
1296                         thread_add_unuse(master, t);
1297
1298                         /* Don't increment 'i' since the cancellation will have
1299                          * removed the entry from the pfd array
1300                          */
1301                 } else
1302                         i++;
1303         }
1304
1305         /* Check the timer tasks */
1306         t = thread_timer_list_first(&master->timer);
1307         while (t) {
1308                 struct event *t_next;
1309
1310                 t_next = thread_timer_list_next(&master->timer, t);
1311
1312                 if (t->arg == cr->eventobj) {
1313                         thread_timer_list_del(&master->timer, t);
1314                         if (t->ref)
1315                                 *t->ref = NULL;
1316                         thread_add_unuse(master, t);
1317                 }
1318
1319                 t = t_next;
1320         }
1321 }
1322
1323 /**
1324  * Process cancellation requests.
1325  *
1326  * This may only be run from the pthread which owns the thread_master.
1327  *
1328  * @param master the thread master to process
1329  * @REQUIRE master->mtx
1330  */
1331 static void do_thread_cancel(struct thread_master *master)
1332 {
1333         struct thread_list_head *list = NULL;
1334         struct event **thread_array = NULL;
1335         struct event *thread;
1336         struct cancel_req *cr;
1337         struct listnode *ln;
1338
1339         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1340                 /*
1341                  * If this is an event object cancellation, search
1342                  * through task lists deleting any tasks which have the
1343                  * specified argument - use this handy helper function.
1344                  */
1345                 if (cr->eventobj) {
1346                         cancel_arg_helper(master, cr);
1347                         continue;
1348                 }
1349
1350                 /*
1351                  * The pointer varies depending on whether the cancellation
1352                  * request was made asynchronously or not. If it was, we
1353                  * need to check whether the thread even exists anymore
1354                  * before cancelling it.
1355                  */
1356                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1357
1358                 if (!thread)
1359                         continue;
1360
1361                 list = NULL;
1362                 thread_array = NULL;
1363
1364                 /* Determine the appropriate queue to cancel the thread from */
1365                 switch (thread->type) {
1366                 case THREAD_READ:
1367                         thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
1368                         thread_array = master->read;
1369                         break;
1370                 case THREAD_WRITE:
1371                         thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1372                         thread_array = master->write;
1373                         break;
1374                 case THREAD_TIMER:
1375                         thread_timer_list_del(&master->timer, thread);
1376                         break;
1377                 case THREAD_EVENT:
1378                         list = &master->event;
1379                         break;
1380                 case THREAD_READY:
1381                         list = &master->ready;
1382                         break;
1383                 default:
1384                         continue;
1385                         break;
1386                 }
1387
1388                 if (list) {
1389                         thread_list_del(list, thread);
1390                 } else if (thread_array) {
1391                         thread_array[thread->u.fd] = NULL;
1392                 }
1393
1394                 if (thread->ref)
1395                         *thread->ref = NULL;
1396
1397                 thread_add_unuse(thread->master, thread);
1398         }
1399
1400         /* Delete and free all cancellation requests */
1401         if (master->cancel_req)
1402                 list_delete_all_node(master->cancel_req);
1403
1404         /* Wake up any threads which may be blocked in thread_cancel_async() */
1405         master->canceled = true;
1406         pthread_cond_broadcast(&master->cancel_cond);
1407 }
1408
1409 /*
1410  * Helper function used for multiple flavors of arg-based cancellation.
1411  */
1412 static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1413 {
1414         struct cancel_req *cr;
1415
1416         assert(m->owner == pthread_self());
1417
1418         /* Only worth anything if caller supplies an arg. */
1419         if (arg == NULL)
1420                 return;
1421
1422         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1423
1424         cr->flags = flags;
1425
1426         frr_with_mutex (&m->mtx) {
1427                 cr->eventobj = arg;
1428                 listnode_add(m->cancel_req, cr);
1429                 do_thread_cancel(m);
1430         }
1431 }
1432
1433 /**
1434  * Cancel any events which have the specified argument.
1435  *
1436  * MT-Unsafe
1437  *
1438  * @param m the thread_master to cancel from
1439  * @param arg the argument passed when creating the event
1440  */
1441 void thread_cancel_event(struct thread_master *master, void *arg)
1442 {
1443         cancel_event_helper(master, arg, 0);
1444 }
1445
1446 /*
1447  * Cancel ready tasks with an arg matching 'arg'
1448  *
1449  * MT-Unsafe
1450  *
1451  * @param m the thread_master to cancel from
1452  * @param arg the argument passed when creating the event
1453  */
1454 void thread_cancel_event_ready(struct thread_master *m, void *arg)
1455 {
1456
1457         /* Only cancel ready/event tasks */
1458         cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
1459 }
1460
1461 /**
1462  * Cancel a specific task.
1463  *
1464  * MT-Unsafe
1465  *
1466  * @param thread task to cancel
1467  */
1468 void thread_cancel(struct event **thread)
1469 {
1470         struct thread_master *master;
1471
1472         if (thread == NULL || *thread == NULL)
1473                 return;
1474
1475         master = (*thread)->master;
1476
1477         frrtrace(9, frr_libfrr, thread_cancel, master,
1478                  (*thread)->xref->funcname, (*thread)->xref->xref.file,
1479                  (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1480                  (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
1481
1482         assert(master->owner == pthread_self());
1483
1484         frr_with_mutex (&master->mtx) {
1485                 struct cancel_req *cr =
1486                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1487                 cr->thread = *thread;
1488                 listnode_add(master->cancel_req, cr);
1489                 do_thread_cancel(master);
1490         }
1491
1492         *thread = NULL;
1493 }
1494
1495 /**
1496  * Asynchronous cancellation.
1497  *
1498  * Called with either a struct event ** or void * to an event argument,
1499  * this function posts the correct cancellation request and blocks until it is
1500  * serviced.
1501  *
1502  * If the thread is currently running, execution blocks until it completes.
1503  *
1504  * The last two parameters are mutually exclusive, i.e. if you pass one the
1505  * other must be NULL.
1506  *
1507  * When the cancellation procedure executes on the target thread_master, the
1508  * thread * provided is checked for nullity. If it is null, the thread is
1509  * assumed to no longer exist and the cancellation request is a no-op. Thus
1510  * users of this API must pass a back-reference when scheduling the original
1511  * task.
1512  *
1513  * MT-Safe
1514  *
1515  * @param master the thread master with the relevant event / task
1516  * @param thread pointer to thread to cancel
1517  * @param eventobj the event
1518  */
1519 void thread_cancel_async(struct thread_master *master, struct event **thread,
1520                          void *eventobj)
1521 {
1522         assert(!(thread && eventobj) && (thread || eventobj));
1523
1524         if (thread && *thread)
1525                 frrtrace(9, frr_libfrr, thread_cancel_async, master,
1526                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1527                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1528                          (*thread)->u.val, (*thread)->arg,
1529                          (*thread)->u.sands.tv_sec);
1530         else
1531                 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1532                          0, NULL, 0, 0, eventobj, 0);
1533
1534         assert(master->owner != pthread_self());
1535
1536         frr_with_mutex (&master->mtx) {
1537                 master->canceled = false;
1538
1539                 if (thread) {
1540                         struct cancel_req *cr =
1541                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1542                         cr->threadref = thread;
1543                         listnode_add(master->cancel_req, cr);
1544                 } else if (eventobj) {
1545                         struct cancel_req *cr =
1546                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1547                         cr->eventobj = eventobj;
1548                         listnode_add(master->cancel_req, cr);
1549                 }
1550                 AWAKEN(master);
1551
1552                 while (!master->canceled)
1553                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1554         }
1555
1556         if (thread)
1557                 *thread = NULL;
1558 }
1559 /* ------------------------------------------------------------------------- */
1560
1561 static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
1562                                          struct timeval *timer_val)
1563 {
1564         if (!thread_timer_list_count(timers))
1565                 return NULL;
1566
1567         struct event *next_timer = thread_timer_list_first(timers);
1568         monotime_until(&next_timer->u.sands, timer_val);
1569         return timer_val;
1570 }
1571
1572 static struct event *thread_run(struct thread_master *m, struct event *thread,
1573                                 struct event *fetch)
1574 {
1575         *fetch = *thread;
1576         thread_add_unuse(m, thread);
1577         return fetch;
1578 }
1579
1580 static int thread_process_io_helper(struct thread_master *m,
1581                                     struct event *thread, short state,
1582                                     short actual_state, int pos)
1583 {
1584         struct event **thread_array;
1585
1586         /*
1587          * poll() clears the .events field, but the pollfd array we
1588          * pass to poll() is a copy of the one used to schedule threads.
1589          * We need to synchronize state between the two here by applying
1590          * the same changes poll() made on the copy of the "real" pollfd
1591          * array.
1592          *
1593          * This cleans up a possible infinite loop where we refuse
1594          * to respond to a poll event but poll is insistent that
1595          * we should.
1596          */
1597         m->handler.pfds[pos].events &= ~(state);
1598
1599         if (!thread) {
1600                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1601                         flog_err(EC_LIB_NO_THREAD,
1602                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1603                                  m->handler.pfds[pos].fd, actual_state);
1604                 return 0;
1605         }
1606
1607         if (thread->type == THREAD_READ)
1608                 thread_array = m->read;
1609         else
1610                 thread_array = m->write;
1611
1612         thread_array[thread->u.fd] = NULL;
1613         thread_list_add_tail(&m->ready, thread);
1614         thread->type = THREAD_READY;
1615
1616         return 1;
1617 }
1618
1619 /**
1620  * Process I/O events.
1621  *
1622  * Walks through file descriptor array looking for those pollfds whose .revents
1623  * field has something interesting. Deletes any invalid file descriptors.
1624  *
1625  * @param m the thread master
1626  * @param num the number of active file descriptors (return value of poll())
1627  */
1628 static void thread_process_io(struct thread_master *m, unsigned int num)
1629 {
1630         unsigned int ready = 0;
1631         struct pollfd *pfds = m->handler.copy;
1632
1633         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1634                 /* no event for current fd? immediately continue */
1635                 if (pfds[i].revents == 0)
1636                         continue;
1637
1638                 ready++;
1639
1640                 /*
1641                  * Unless someone has called thread_cancel from another
1642                  * pthread, the only thing that could have changed in
1643                  * m->handler.pfds while we were asleep is the .events
1644                  * field in a given pollfd. Barring thread_cancel() that
1645                  * value should be a superset of the values we have in our
1646                  * copy, so there's no need to update it. Similarily,
1647                  * barring deletion, the fd should still be a valid index
1648                  * into the master's pfds.
1649                  *
1650                  * We are including POLLERR here to do a READ event
1651                  * this is because the read should fail and the
1652                  * read function should handle it appropriately
1653                  */
1654                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1655                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1656                                                  pfds[i].revents, i);
1657                 }
1658                 if (pfds[i].revents & POLLOUT)
1659                         thread_process_io_helper(m, m->write[pfds[i].fd],
1660                                                  POLLOUT, pfds[i].revents, i);
1661
1662                 /* if one of our file descriptors is garbage, remove the same
1663                  * from
1664                  * both pfds + update sizes and index */
1665                 if (pfds[i].revents & POLLNVAL) {
1666                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1667                                 (m->handler.pfdcount - i - 1)
1668                                         * sizeof(struct pollfd));
1669                         m->handler.pfdcount--;
1670                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1671                         m->handler.pfds[m->handler.pfdcount].events = 0;
1672
1673                         memmove(pfds + i, pfds + i + 1,
1674                                 (m->handler.copycount - i - 1)
1675                                         * sizeof(struct pollfd));
1676                         m->handler.copycount--;
1677                         m->handler.copy[m->handler.copycount].fd = 0;
1678                         m->handler.copy[m->handler.copycount].events = 0;
1679
1680                         i--;
1681                 }
1682         }
1683 }
1684
1685 /* Add all timers that have popped to the ready list. */
1686 static unsigned int thread_process_timers(struct thread_master *m,
1687                                           struct timeval *timenow)
1688 {
1689         struct timeval prev = *timenow;
1690         bool displayed = false;
1691         struct event *thread;
1692         unsigned int ready = 0;
1693
1694         while ((thread = thread_timer_list_first(&m->timer))) {
1695                 if (timercmp(timenow, &thread->u.sands, <))
1696                         break;
1697                 prev = thread->u.sands;
1698                 prev.tv_sec += 4;
1699                 /*
1700                  * If the timer would have popped 4 seconds in the
1701                  * past then we are in a situation where we are
1702                  * really getting behind on handling of events.
1703                  * Let's log it and do the right thing with it.
1704                  */
1705                 if (timercmp(timenow, &prev, >)) {
1706                         atomic_fetch_add_explicit(
1707                                 &thread->hist->total_starv_warn, 1,
1708                                 memory_order_seq_cst);
1709                         if (!displayed && !thread->ignore_timer_late) {
1710                                 flog_warn(
1711                                         EC_LIB_STARVE_THREAD,
1712                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1713                                         thread);
1714                                 displayed = true;
1715                         }
1716                 }
1717
1718                 thread_timer_list_pop(&m->timer);
1719                 thread->type = THREAD_READY;
1720                 thread_list_add_tail(&m->ready, thread);
1721                 ready++;
1722         }
1723
1724         return ready;
1725 }
1726
1727 /* process a list en masse, e.g. for event thread lists */
1728 static unsigned int thread_process(struct thread_list_head *list)
1729 {
1730         struct event *thread;
1731         unsigned int ready = 0;
1732
1733         while ((thread = thread_list_pop(list))) {
1734                 thread->type = THREAD_READY;
1735                 thread_list_add_tail(&thread->master->ready, thread);
1736                 ready++;
1737         }
1738         return ready;
1739 }
1740
1741
1742 /* Fetch next ready thread. */
1743 struct event *thread_fetch(struct thread_master *m, struct event *fetch)
1744 {
1745         struct event *thread = NULL;
1746         struct timeval now;
1747         struct timeval zerotime = {0, 0};
1748         struct timeval tv;
1749         struct timeval *tw = NULL;
1750         bool eintr_p = false;
1751         int num = 0;
1752
1753         do {
1754                 /* Handle signals if any */
1755                 if (m->handle_signals)
1756                         frr_sigevent_process();
1757
1758                 pthread_mutex_lock(&m->mtx);
1759
1760                 /* Process any pending cancellation requests */
1761                 do_thread_cancel(m);
1762
1763                 /*
1764                  * Attempt to flush ready queue before going into poll().
1765                  * This is performance-critical. Think twice before modifying.
1766                  */
1767                 if ((thread = thread_list_pop(&m->ready))) {
1768                         fetch = thread_run(m, thread, fetch);
1769                         if (fetch->ref)
1770                                 *fetch->ref = NULL;
1771                         pthread_mutex_unlock(&m->mtx);
1772                         if (!m->ready_run_loop)
1773                                 GETRUSAGE(&m->last_getrusage);
1774                         m->ready_run_loop = true;
1775                         break;
1776                 }
1777
1778                 m->ready_run_loop = false;
1779                 /* otherwise, tick through scheduling sequence */
1780
1781                 /*
1782                  * Post events to ready queue. This must come before the
1783                  * following block since events should occur immediately
1784                  */
1785                 thread_process(&m->event);
1786
1787                 /*
1788                  * If there are no tasks on the ready queue, we will poll()
1789                  * until a timer expires or we receive I/O, whichever comes
1790                  * first. The strategy for doing this is:
1791                  *
1792                  * - If there are events pending, set the poll() timeout to zero
1793                  * - If there are no events pending, but there are timers
1794                  * pending, set the timeout to the smallest remaining time on
1795                  * any timer.
1796                  * - If there are neither timers nor events pending, but there
1797                  * are file descriptors pending, block indefinitely in poll()
1798                  * - If nothing is pending, it's time for the application to die
1799                  *
1800                  * In every case except the last, we need to hit poll() at least
1801                  * once per loop to avoid starvation by events
1802                  */
1803                 if (!thread_list_count(&m->ready))
1804                         tw = thread_timer_wait(&m->timer, &tv);
1805
1806                 if (thread_list_count(&m->ready) ||
1807                                 (tw && !timercmp(tw, &zerotime, >)))
1808                         tw = &zerotime;
1809
1810                 if (!tw && m->handler.pfdcount == 0) { /* die */
1811                         pthread_mutex_unlock(&m->mtx);
1812                         fetch = NULL;
1813                         break;
1814                 }
1815
1816                 /*
1817                  * Copy pollfd array + # active pollfds in it. Not necessary to
1818                  * copy the array size as this is fixed.
1819                  */
1820                 m->handler.copycount = m->handler.pfdcount;
1821                 memcpy(m->handler.copy, m->handler.pfds,
1822                        m->handler.copycount * sizeof(struct pollfd));
1823
1824                 pthread_mutex_unlock(&m->mtx);
1825                 {
1826                         eintr_p = false;
1827                         num = fd_poll(m, tw, &eintr_p);
1828                 }
1829                 pthread_mutex_lock(&m->mtx);
1830
1831                 /* Handle any errors received in poll() */
1832                 if (num < 0) {
1833                         if (eintr_p) {
1834                                 pthread_mutex_unlock(&m->mtx);
1835                                 /* loop around to signal handler */
1836                                 continue;
1837                         }
1838
1839                         /* else die */
1840                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1841                                  safe_strerror(errno));
1842                         pthread_mutex_unlock(&m->mtx);
1843                         fetch = NULL;
1844                         break;
1845                 }
1846
1847                 /* Post timers to ready queue. */
1848                 monotime(&now);
1849                 thread_process_timers(m, &now);
1850
1851                 /* Post I/O to ready queue. */
1852                 if (num > 0)
1853                         thread_process_io(m, num);
1854
1855                 pthread_mutex_unlock(&m->mtx);
1856
1857         } while (!thread && m->spin);
1858
1859         return fetch;
1860 }
1861
1862 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1863 {
1864         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1865                 + (a.tv_usec - b.tv_usec));
1866 }
1867
1868 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1869                                    unsigned long *cputime)
1870 {
1871 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1872
1873 #ifdef __FreeBSD__
1874         /*
1875          * FreeBSD appears to have an issue when calling clock_gettime
1876          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1877          * occassionally the now time will be before the start time.
1878          * This is not good and FRR is ending up with CPU HOG's
1879          * when the subtraction wraps to very large numbers
1880          *
1881          * What we are going to do here is cheat a little bit
1882          * and notice that this is a problem and just correct
1883          * it so that it is impossible to happen
1884          */
1885         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1886             start->cpu.tv_nsec > now->cpu.tv_nsec)
1887                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1888         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1889                 now->cpu.tv_sec = start->cpu.tv_sec;
1890                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1891         }
1892 #endif
1893         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1894                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1895 #else
1896         /* This is 'user + sys' time.  */
1897         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1898                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1899 #endif
1900         return timeval_elapsed(now->real, start->real);
1901 }
1902
1903 /* We should aim to yield after yield milliseconds, which defaults
1904    to THREAD_YIELD_TIME_SLOT .
1905    Note: we are using real (wall clock) time for this calculation.
1906    It could be argued that CPU time may make more sense in certain
1907    contexts.  The things to consider are whether the thread may have
1908    blocked (in which case wall time increases, but CPU time does not),
1909    or whether the system is heavily loaded with other processes competing
1910    for CPU time.  On balance, wall clock time seems to make sense.
1911    Plus it has the added benefit that gettimeofday should be faster
1912    than calling getrusage. */
1913 int thread_should_yield(struct event *thread)
1914 {
1915         int result;
1916         frr_with_mutex (&thread->mtx) {
1917                 result = monotime_since(&thread->real, NULL)
1918                          > (int64_t)thread->yield;
1919         }
1920         return result;
1921 }
1922
1923 void thread_set_yield_time(struct event *thread, unsigned long yield_time)
1924 {
1925         frr_with_mutex (&thread->mtx) {
1926                 thread->yield = yield_time;
1927         }
1928 }
1929
1930 void thread_getrusage(RUSAGE_T *r)
1931 {
1932         monotime(&r->real);
1933         if (!cputime_enabled) {
1934                 memset(&r->cpu, 0, sizeof(r->cpu));
1935                 return;
1936         }
1937
1938 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1939         /* not currently implemented in Linux's vDSO, but maybe at some point
1940          * in the future?
1941          */
1942         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1943 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1944 #if defined RUSAGE_THREAD
1945 #define FRR_RUSAGE RUSAGE_THREAD
1946 #else
1947 #define FRR_RUSAGE RUSAGE_SELF
1948 #endif
1949         getrusage(FRR_RUSAGE, &(r->cpu));
1950 #endif
1951 }
1952
1953 /*
1954  * Call a thread.
1955  *
1956  * This function will atomically update the thread's usage history. At present
1957  * this is the only spot where usage history is written. Nevertheless the code
1958  * has been written such that the introduction of writers in the future should
1959  * not need to update it provided the writers atomically perform only the
1960  * operations done here, i.e. updating the total and maximum times. In
1961  * particular, the maximum real and cpu times must be monotonically increasing
1962  * or this code is not correct.
1963  */
1964 void thread_call(struct event *thread)
1965 {
1966         RUSAGE_T before, after;
1967
1968         /* if the thread being called is the CLI, it may change cputime_enabled
1969          * ("service cputime-stats" command), which can result in nonsensical
1970          * and very confusing warnings
1971          */
1972         bool cputime_enabled_here = cputime_enabled;
1973
1974         if (thread->master->ready_run_loop)
1975                 before = thread->master->last_getrusage;
1976         else
1977                 GETRUSAGE(&before);
1978
1979         thread->real = before.real;
1980
1981         frrtrace(9, frr_libfrr, thread_call, thread->master,
1982                  thread->xref->funcname, thread->xref->xref.file,
1983                  thread->xref->xref.line, NULL, thread->u.fd,
1984                  thread->u.val, thread->arg, thread->u.sands.tv_sec);
1985
1986         pthread_setspecific(thread_current, thread);
1987         (*thread->func)(thread);
1988         pthread_setspecific(thread_current, NULL);
1989
1990         GETRUSAGE(&after);
1991         thread->master->last_getrusage = after;
1992
1993         unsigned long walltime, cputime;
1994         unsigned long exp;
1995
1996         walltime = thread_consumed_time(&after, &before, &cputime);
1997
1998         /* update walltime */
1999         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
2000                                   memory_order_seq_cst);
2001         exp = atomic_load_explicit(&thread->hist->real.max,
2002                                    memory_order_seq_cst);
2003         while (exp < walltime
2004                && !atomic_compare_exchange_weak_explicit(
2005                        &thread->hist->real.max, &exp, walltime,
2006                        memory_order_seq_cst, memory_order_seq_cst))
2007                 ;
2008
2009         if (cputime_enabled_here && cputime_enabled) {
2010                 /* update cputime */
2011                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2012                                           memory_order_seq_cst);
2013                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2014                                            memory_order_seq_cst);
2015                 while (exp < cputime
2016                        && !atomic_compare_exchange_weak_explicit(
2017                                &thread->hist->cpu.max, &exp, cputime,
2018                                memory_order_seq_cst, memory_order_seq_cst))
2019                         ;
2020         }
2021
2022         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2023                                   memory_order_seq_cst);
2024         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2025                                  memory_order_seq_cst);
2026
2027         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2028             && cputime > cputime_threshold) {
2029                 /*
2030                  * We have a CPU Hog on our hands.  The time FRR has spent
2031                  * doing actual work (not sleeping) is greater than 5 seconds.
2032                  * Whinge about it now, so we're aware this is yet another task
2033                  * to fix.
2034                  */
2035                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2036                                           1, memory_order_seq_cst);
2037                 flog_warn(
2038                         EC_LIB_SLOW_THREAD_CPU,
2039                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2040                         thread->xref->funcname, (unsigned long)thread->func,
2041                         walltime / 1000, cputime / 1000);
2042
2043         } else if (walltime_threshold && walltime > walltime_threshold) {
2044                 /*
2045                  * The runtime for a task is greater than 5 seconds, but the
2046                  * cpu time is under 5 seconds.  Let's whine about this because
2047                  * this could imply some sort of scheduling issue.
2048                  */
2049                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2050                                           1, memory_order_seq_cst);
2051                 flog_warn(
2052                         EC_LIB_SLOW_THREAD_WALL,
2053                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2054                         thread->xref->funcname, (unsigned long)thread->func,
2055                         walltime / 1000, cputime / 1000);
2056         }
2057 }
2058
2059 /* Execute thread */
2060 void _thread_execute(const struct xref_threadsched *xref,
2061                      struct thread_master *m, void (*func)(struct event *),
2062                      void *arg, int val)
2063 {
2064         struct event *thread;
2065
2066         /* Get or allocate new thread to execute. */
2067         frr_with_mutex (&m->mtx) {
2068                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
2069
2070                 /* Set its event value. */
2071                 frr_with_mutex (&thread->mtx) {
2072                         thread->add_type = THREAD_EXECUTE;
2073                         thread->u.val = val;
2074                         thread->ref = &thread;
2075                 }
2076         }
2077
2078         /* Execute thread doing all accounting. */
2079         thread_call(thread);
2080
2081         /* Give back or free thread. */
2082         thread_add_unuse(m, thread);
2083 }
2084
2085 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2086 void debug_signals(const sigset_t *sigs)
2087 {
2088         int i, found;
2089         sigset_t tmpsigs;
2090         char buf[300];
2091
2092         /*
2093          * We're only looking at the non-realtime signals here, so we need
2094          * some limit value. Platform differences mean at some point we just
2095          * need to pick a reasonable value.
2096          */
2097 #if defined SIGRTMIN
2098 #  define LAST_SIGNAL SIGRTMIN
2099 #else
2100 #  define LAST_SIGNAL 32
2101 #endif
2102
2103
2104         if (sigs == NULL) {
2105                 sigemptyset(&tmpsigs);
2106                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2107                 sigs = &tmpsigs;
2108         }
2109
2110         found = 0;
2111         buf[0] = '\0';
2112
2113         for (i = 0; i < LAST_SIGNAL; i++) {
2114                 char tmp[20];
2115
2116                 if (sigismember(sigs, i) > 0) {
2117                         if (found > 0)
2118                                 strlcat(buf, ",", sizeof(buf));
2119                         snprintf(tmp, sizeof(tmp), "%d", i);
2120                         strlcat(buf, tmp, sizeof(buf));
2121                         found++;
2122                 }
2123         }
2124
2125         if (found == 0)
2126                 snprintf(buf, sizeof(buf), "<none>");
2127
2128         zlog_debug("%s: %s", __func__, buf);
2129 }
2130
2131 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2132                                    const struct event *thread)
2133 {
2134         static const char * const types[] = {
2135                 [THREAD_READ] = "read",
2136                 [THREAD_WRITE] = "write",
2137                 [THREAD_TIMER] = "timer",
2138                 [THREAD_EVENT] = "event",
2139                 [THREAD_READY] = "ready",
2140                 [THREAD_UNUSED] = "unused",
2141                 [THREAD_EXECUTE] = "exec",
2142         };
2143         ssize_t rv = 0;
2144         char info[16] = "";
2145
2146         if (!thread)
2147                 return bputs(buf, "{(thread *)NULL}");
2148
2149         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2150
2151         if (thread->type < array_size(types) && types[thread->type])
2152                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2153         else
2154                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2155
2156         switch (thread->type) {
2157         case THREAD_READ:
2158         case THREAD_WRITE:
2159                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2160                 break;
2161
2162         case THREAD_TIMER:
2163                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2164                 break;
2165         }
2166
2167         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2168                         thread->xref->funcname, thread->xref->dest,
2169                         thread->xref->xref.file, thread->xref->xref.line);
2170         return rv;
2171 }
2172
2173 printfrr_ext_autoreg_p("TH", printfrr_thread);
2174 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2175                                const void *ptr)
2176 {
2177         const struct event *thread = ptr;
2178         struct timespec remain = {};
2179
2180         if (ea->fmt[0] == 'D') {
2181                 ea->fmt++;
2182                 return printfrr_thread_dbg(buf, ea, thread);
2183         }
2184
2185         if (!thread) {
2186                 /* need to jump over time formatting flag characters in the
2187                  * input format string, i.e. adjust ea->fmt!
2188                  */
2189                 printfrr_time(buf, ea, &remain,
2190                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2191                 return bputch(buf, '-');
2192         }
2193
2194         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2195         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2196 }