lib/thread.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Thread management routine
   3  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   4  */
   5
   6 /* #define DEBUG */
   7
   8 #include <zebra.h>
   9 #include <sys/resource.h>
  10
  11 #include "thread.h"
  12 #include "memory.h"
  13 #include "frrcu.h"
  14 #include "log.h"
  15 #include "hash.h"
  16 #include "command.h"
  17 #include "sigevent.h"
  18 #include "network.h"
  19 #include "jhash.h"
  20 #include "frratomic.h"
  21 #include "frr_pthread.h"
  22 #include "lib_errors.h"
  23 #include "libfrr_trace.h"
  24 #include "libfrr.h"
  25
  26 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  27 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
  28 DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
  29 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
  30
  31 DECLARE_LIST(thread_list, struct thread, threaditem);
  32
  33 struct cancel_req {
  34         int flags;
  35         struct thread *thread;
  36         void *eventobj;
  37         struct thread **threadref;
  38 };
  39
  40 /* Flags for task cancellation */
  41 #define THREAD_CANCEL_FLAG_READY     0x01
  42
  43 static int thread_timer_cmp(const struct thread *a, const struct thread *b)
  44 {
  45         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  46                 return -1;
  47         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  48                 return 1;
  49         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  50                 return -1;
  51         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  52                 return 1;
  53         return 0;
  54 }
  55
  56 DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
  57
  58 #if defined(__APPLE__)
  59 #include <mach/mach.h>
  60 #include <mach/mach_time.h>
  61 #endif
  62
  63 #define AWAKEN(m)                                                              \
  64         do {                                                                   \
  65                 const unsigned char wakebyte = 0x01;                           \
  66                 write(m->io_pipe[1], &wakebyte, 1);                            \
  67         } while (0);
  68
  69 /* control variable for initializer */
  70 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  71 pthread_key_t thread_current;
  72
  73 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  74 static struct list *masters;
  75
  76 static void thread_free(struct thread_master *master, struct thread *thread);
  77
  78 #ifndef EXCLUDE_CPU_TIME
  79 #define EXCLUDE_CPU_TIME 0
  80 #endif
  81 #ifndef CONSUMED_TIME_CHECK
  82 #define CONSUMED_TIME_CHECK 0
  83 #endif
  84
  85 bool cputime_enabled = !EXCLUDE_CPU_TIME;
  86 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
  87 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
  88
  89 /* CLI start ---------------------------------------------------------------- */
  90 #include "lib/thread_clippy.c"
  91
  92 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
  93 {
  94         int size = sizeof(a->func);
  95
  96         return jhash(&a->func, size, 0);
  97 }
  98
  99 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 100                                const struct cpu_thread_history *b)
 101 {
 102         return a->func == b->func;
 103 }
 104
 105 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 106 {
 107         struct cpu_thread_history *new;
 108         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
 109         new->func = a->func;
 110         new->funcname = a->funcname;
 111         return new;
 112 }
 113
 114 static void cpu_record_hash_free(void *a)
 115 {
 116         struct cpu_thread_history *hist = a;
 117
 118         XFREE(MTYPE_THREAD_STATS, hist);
 119 }
 120
 121 static void vty_out_cpu_thread_history(struct vty *vty,
 122                                        struct cpu_thread_history *a)
 123 {
 124         vty_out(vty,
 125                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 126                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 127                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 128                 (a->real.total / a->total_calls), a->real.max,
 129                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 130         vty_out(vty, "  %c%c%c%c%c  %s\n",
 131                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
 132                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
 133                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 134                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 135                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 136 }
 137
 138 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 139 {
 140         struct cpu_thread_history *totals = args[0];
 141         struct cpu_thread_history copy;
 142         struct vty *vty = args[1];
 143         uint8_t *filter = args[2];
 144
 145         struct cpu_thread_history *a = bucket->data;
 146
 147         copy.total_active =
 148                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 149         copy.total_calls =
 150                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 151         copy.total_cpu_warn =
 152                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 153         copy.total_wall_warn =
 154                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 155         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 156                                                      memory_order_seq_cst);
 157         copy.cpu.total =
 158                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 159         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 160         copy.real.total =
 161                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 162         copy.real.max =
 163                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 164         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 165         copy.funcname = a->funcname;
 166
 167         if (!(copy.types & *filter))
 168                 return;
 169
 170         vty_out_cpu_thread_history(vty, &copy);
 171         totals->total_active += copy.total_active;
 172         totals->total_calls += copy.total_calls;
 173         totals->total_cpu_warn += copy.total_cpu_warn;
 174         totals->total_wall_warn += copy.total_wall_warn;
 175         totals->total_starv_warn += copy.total_starv_warn;
 176         totals->real.total += copy.real.total;
 177         if (totals->real.max < copy.real.max)
 178                 totals->real.max = copy.real.max;
 179         totals->cpu.total += copy.cpu.total;
 180         if (totals->cpu.max < copy.cpu.max)
 181                 totals->cpu.max = copy.cpu.max;
 182 }
 183
 184 static void cpu_record_print(struct vty *vty, uint8_t filter)
 185 {
 186         struct cpu_thread_history tmp;
 187         void *args[3] = {&tmp, vty, &filter};
 188         struct thread_master *m;
 189         struct listnode *ln;
 190
 191         if (!cputime_enabled)
 192                 vty_out(vty,
 193                         "\n"
 194                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 195                         "will be zero or may display data from when collection was enabled.  Use the\n"
 196                         "  \"service cputime-stats\"  command to start collecting data.\n"
 197                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 198
 199         memset(&tmp, 0, sizeof(tmp));
 200         tmp.funcname = "TOTAL";
 201         tmp.types = filter;
 202
 203         frr_with_mutex (&masters_mtx) {
 204                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 205                         const char *name = m->name ? m->name : "main";
 206
 207                         char underline[strlen(name) + 1];
 208                         memset(underline, '-', sizeof(underline));
 209                         underline[sizeof(underline) - 1] = '\0';
 210
 211                         vty_out(vty, "\n");
 212                         vty_out(vty, "Showing statistics for pthread %s\n",
 213                                 name);
 214                         vty_out(vty, "-------------------------------%s\n",
 215                                 underline);
 216                         vty_out(vty, "%30s %18s %18s\n", "",
 217                                 "CPU (user+system):", "Real (wall-clock):");
 218                         vty_out(vty,
 219                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 220                         vty_out(vty, " Avg uSec Max uSecs");
 221                         vty_out(vty,
 222                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 223
 224                         if (m->cpu_record->count)
 225                                 hash_iterate(
 226                                         m->cpu_record,
 227                                         (void (*)(struct hash_bucket *,
 228                                                   void *))cpu_record_hash_print,
 229                                         args);
 230                         else
 231                                 vty_out(vty, "No data to display yet.\n");
 232
 233                         vty_out(vty, "\n");
 234                 }
 235         }
 236
 237         vty_out(vty, "\n");
 238         vty_out(vty, "Total thread statistics\n");
 239         vty_out(vty, "-------------------------\n");
 240         vty_out(vty, "%30s %18s %18s\n", "",
 241                 "CPU (user+system):", "Real (wall-clock):");
 242         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 243         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 244         vty_out(vty, "  Type  Thread\n");
 245
 246         if (tmp.total_calls > 0)
 247                 vty_out_cpu_thread_history(vty, &tmp);
 248 }
 249
 250 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 251 {
 252         uint8_t *filter = args[0];
 253         struct hash *cpu_record = args[1];
 254
 255         struct cpu_thread_history *a = bucket->data;
 256
 257         if (!(a->types & *filter))
 258                 return;
 259
 260         hash_release(cpu_record, bucket->data);
 261 }
 262
 263 static void cpu_record_clear(uint8_t filter)
 264 {
 265         uint8_t *tmp = &filter;
 266         struct thread_master *m;
 267         struct listnode *ln;
 268
 269         frr_with_mutex (&masters_mtx) {
 270                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 271                         frr_with_mutex (&m->mtx) {
 272                                 void *args[2] = {tmp, m->cpu_record};
 273                                 hash_iterate(
 274                                         m->cpu_record,
 275                                         (void (*)(struct hash_bucket *,
 276                                                   void *))cpu_record_hash_clear,
 277                                         args);
 278                         }
 279                 }
 280         }
 281 }
 282
 283 static uint8_t parse_filter(const char *filterstr)
 284 {
 285         int i = 0;
 286         int filter = 0;
 287
 288         while (filterstr[i] != '\0') {
 289                 switch (filterstr[i]) {
 290                 case 'r':
 291                 case 'R':
 292                         filter |= (1 << THREAD_READ);
 293                         break;
 294                 case 'w':
 295                 case 'W':
 296                         filter |= (1 << THREAD_WRITE);
 297                         break;
 298                 case 't':
 299                 case 'T':
 300                         filter |= (1 << THREAD_TIMER);
 301                         break;
 302                 case 'e':
 303                 case 'E':
 304                         filter |= (1 << THREAD_EVENT);
 305                         break;
 306                 case 'x':
 307                 case 'X':
 308                         filter |= (1 << THREAD_EXECUTE);
 309                         break;
 310                 default:
 311                         break;
 312                 }
 313                 ++i;
 314         }
 315         return filter;
 316 }
 317
 318 DEFUN_NOSH (show_thread_cpu,
 319             show_thread_cpu_cmd,
 320             "show thread cpu [FILTER]",
 321             SHOW_STR
 322             "Thread information\n"
 323             "Thread CPU usage\n"
 324             "Display filter (rwtex)\n")
 325 {
 326         uint8_t filter = (uint8_t)-1U;
 327         int idx = 0;
 328
 329         if (argv_find(argv, argc, "FILTER", &idx)) {
 330                 filter = parse_filter(argv[idx]->arg);
 331                 if (!filter) {
 332                         vty_out(vty,
 333                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 334                                 argv[idx]->arg);
 335                         return CMD_WARNING;
 336                 }
 337         }
 338
 339         cpu_record_print(vty, filter);
 340         return CMD_SUCCESS;
 341 }
 342
 343 DEFPY (service_cputime_stats,
 344        service_cputime_stats_cmd,
 345        "[no] service cputime-stats",
 346        NO_STR
 347        "Set up miscellaneous service\n"
 348        "Collect CPU usage statistics\n")
 349 {
 350         cputime_enabled = !no;
 351         return CMD_SUCCESS;
 352 }
 353
 354 DEFPY (service_cputime_warning,
 355        service_cputime_warning_cmd,
 356        "[no] service cputime-warning (1-4294967295)",
 357        NO_STR
 358        "Set up miscellaneous service\n"
 359        "Warn for tasks exceeding CPU usage threshold\n"
 360        "Warning threshold in milliseconds\n")
 361 {
 362         if (no)
 363                 cputime_threshold = 0;
 364         else
 365                 cputime_threshold = cputime_warning * 1000;
 366         return CMD_SUCCESS;
 367 }
 368
 369 ALIAS (service_cputime_warning,
 370        no_service_cputime_warning_cmd,
 371        "no service cputime-warning",
 372        NO_STR
 373        "Set up miscellaneous service\n"
 374        "Warn for tasks exceeding CPU usage threshold\n")
 375
 376 DEFPY (service_walltime_warning,
 377        service_walltime_warning_cmd,
 378        "[no] service walltime-warning (1-4294967295)",
 379        NO_STR
 380        "Set up miscellaneous service\n"
 381        "Warn for tasks exceeding total wallclock threshold\n"
 382        "Warning threshold in milliseconds\n")
 383 {
 384         if (no)
 385                 walltime_threshold = 0;
 386         else
 387                 walltime_threshold = walltime_warning * 1000;
 388         return CMD_SUCCESS;
 389 }
 390
 391 ALIAS (service_walltime_warning,
 392        no_service_walltime_warning_cmd,
 393        "no service walltime-warning",
 394        NO_STR
 395        "Set up miscellaneous service\n"
 396        "Warn for tasks exceeding total wallclock threshold\n")
 397
 398 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 399 {
 400         const char *name = m->name ? m->name : "main";
 401         char underline[strlen(name) + 1];
 402         struct thread *thread;
 403         uint32_t i;
 404
 405         memset(underline, '-', sizeof(underline));
 406         underline[sizeof(underline) - 1] = '\0';
 407
 408         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 409         vty_out(vty, "----------------------%s\n", underline);
 410         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 411                 m->fd_limit);
 412         for (i = 0; i < m->handler.pfdcount; i++) {
 413                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 414                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 415                         m->handler.pfds[i].revents);
 416
 417                 if (m->handler.pfds[i].events & POLLIN) {
 418                         thread = m->read[m->handler.pfds[i].fd];
 419
 420                         if (!thread)
 421                                 vty_out(vty, "ERROR ");
 422                         else
 423                                 vty_out(vty, "%s ", thread->xref->funcname);
 424                 } else
 425                         vty_out(vty, " ");
 426
 427                 if (m->handler.pfds[i].events & POLLOUT) {
 428                         thread = m->write[m->handler.pfds[i].fd];
 429
 430                         if (!thread)
 431                                 vty_out(vty, "ERROR\n");
 432                         else
 433                                 vty_out(vty, "%s\n", thread->xref->funcname);
 434                 } else
 435                         vty_out(vty, "\n");
 436         }
 437 }
 438
 439 DEFUN_NOSH (show_thread_poll,
 440             show_thread_poll_cmd,
 441             "show thread poll",
 442             SHOW_STR
 443             "Thread information\n"
 444             "Show poll FD's and information\n")
 445 {
 446         struct listnode *node;
 447         struct thread_master *m;
 448
 449         frr_with_mutex (&masters_mtx) {
 450                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 451                         show_thread_poll_helper(vty, m);
 452                 }
 453         }
 454
 455         return CMD_SUCCESS;
 456 }
 457
 458
 459 DEFUN (clear_thread_cpu,
 460        clear_thread_cpu_cmd,
 461        "clear thread cpu [FILTER]",
 462        "Clear stored data in all pthreads\n"
 463        "Thread information\n"
 464        "Thread CPU usage\n"
 465        "Display filter (rwtexb)\n")
 466 {
 467         uint8_t filter = (uint8_t)-1U;
 468         int idx = 0;
 469
 470         if (argv_find(argv, argc, "FILTER", &idx)) {
 471                 filter = parse_filter(argv[idx]->arg);
 472                 if (!filter) {
 473                         vty_out(vty,
 474                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 475                                 argv[idx]->arg);
 476                         return CMD_WARNING;
 477                 }
 478         }
 479
 480         cpu_record_clear(filter);
 481         return CMD_SUCCESS;
 482 }
 483
 484 static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
 485 {
 486         const char *name = m->name ? m->name : "main";
 487         char underline[strlen(name) + 1];
 488         struct thread *thread;
 489
 490         memset(underline, '-', sizeof(underline));
 491         underline[sizeof(underline) - 1] = '\0';
 492
 493         vty_out(vty, "\nShowing timers for %s\n", name);
 494         vty_out(vty, "-------------------%s\n", underline);
 495
 496         frr_each (thread_timer_list, &m->timer, thread) {
 497                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 498         }
 499 }
 500
 501 DEFPY_NOSH (show_thread_timers,
 502             show_thread_timers_cmd,
 503             "show thread timers",
 504             SHOW_STR
 505             "Thread information\n"
 506             "Show all timers and how long they have in the system\n")
 507 {
 508         struct listnode *node;
 509         struct thread_master *m;
 510
 511         frr_with_mutex (&masters_mtx) {
 512                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 513                         show_thread_timers_helper(vty, m);
 514         }
 515
 516         return CMD_SUCCESS;
 517 }
 518
 519 void thread_cmd_init(void)
 520 {
 521         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 522         install_element(VIEW_NODE, &show_thread_poll_cmd);
 523         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 524
 525         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 526         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 527         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 528         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 529         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 530
 531         install_element(VIEW_NODE, &show_thread_timers_cmd);
 532 }
 533 /* CLI end ------------------------------------------------------------------ */
 534
 535
 536 static void cancelreq_del(void *cr)
 537 {
 538         XFREE(MTYPE_TMP, cr);
 539 }
 540
 541 /* initializer, only ever called once */
 542 static void initializer(void)
 543 {
 544         pthread_key_create(&thread_current, NULL);
 545 }
 546
 547 struct thread_master *thread_master_create(const char *name)
 548 {
 549         struct thread_master *rv;
 550         struct rlimit limit;
 551
 552         pthread_once(&init_once, &initializer);
 553
 554         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 555
 556         /* Initialize master mutex */
 557         pthread_mutex_init(&rv->mtx, NULL);
 558         pthread_cond_init(&rv->cancel_cond, NULL);
 559
 560         /* Set name */
 561         name = name ? name : "default";
 562         rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 563
 564         /* Initialize I/O task data structures */
 565
 566         /* Use configured limit if present, ulimit otherwise. */
 567         rv->fd_limit = frr_get_fd_limit();
 568         if (rv->fd_limit == 0) {
 569                 getrlimit(RLIMIT_NOFILE, &limit);
 570                 rv->fd_limit = (int)limit.rlim_cur;
 571         }
 572
 573         rv->read = XCALLOC(MTYPE_THREAD_POLL,
 574                            sizeof(struct thread *) * rv->fd_limit);
 575
 576         rv->write = XCALLOC(MTYPE_THREAD_POLL,
 577                             sizeof(struct thread *) * rv->fd_limit);
 578
 579         char tmhashname[strlen(name) + 32];
 580         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 581                  name);
 582         rv->cpu_record = hash_create_size(
 583                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 584                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 585                 tmhashname);
 586
 587         thread_list_init(&rv->event);
 588         thread_list_init(&rv->ready);
 589         thread_list_init(&rv->unuse);
 590         thread_timer_list_init(&rv->timer);
 591
 592         /* Initialize thread_fetch() settings */
 593         rv->spin = true;
 594         rv->handle_signals = true;
 595
 596         /* Set pthread owner, should be updated by actual owner */
 597         rv->owner = pthread_self();
 598         rv->cancel_req = list_new();
 599         rv->cancel_req->del = cancelreq_del;
 600         rv->canceled = true;
 601
 602         /* Initialize pipe poker */
 603         pipe(rv->io_pipe);
 604         set_nonblocking(rv->io_pipe[0]);
 605         set_nonblocking(rv->io_pipe[1]);
 606
 607         /* Initialize data structures for poll() */
 608         rv->handler.pfdsize = rv->fd_limit;
 609         rv->handler.pfdcount = 0;
 610         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 611                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 612         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 613                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 614
 615         /* add to list of threadmasters */
 616         frr_with_mutex (&masters_mtx) {
 617                 if (!masters)
 618                         masters = list_new();
 619
 620                 listnode_add(masters, rv);
 621         }
 622
 623         return rv;
 624 }
 625
 626 void thread_master_set_name(struct thread_master *master, const char *name)
 627 {
 628         frr_with_mutex (&master->mtx) {
 629                 XFREE(MTYPE_THREAD_MASTER, master->name);
 630                 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 631         }
 632 }
 633
 634 #define THREAD_UNUSED_DEPTH 10
 635
 636 /* Move thread to unuse list. */
 637 static void thread_add_unuse(struct thread_master *m, struct thread *thread)
 638 {
 639         pthread_mutex_t mtxc = thread->mtx;
 640
 641         assert(m != NULL && thread != NULL);
 642
 643         thread->hist->total_active--;
 644         memset(thread, 0, sizeof(struct thread));
 645         thread->type = THREAD_UNUSED;
 646
 647         /* Restore the thread mutex context. */
 648         thread->mtx = mtxc;
 649
 650         if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
 651                 thread_list_add_tail(&m->unuse, thread);
 652                 return;
 653         }
 654
 655         thread_free(m, thread);
 656 }
 657
 658 /* Free all unused thread. */
 659 static void thread_list_free(struct thread_master *m,
 660                 struct thread_list_head *list)
 661 {
 662         struct thread *t;
 663
 664         while ((t = thread_list_pop(list)))
 665                 thread_free(m, t);
 666 }
 667
 668 static void thread_array_free(struct thread_master *m,
 669                               struct thread **thread_array)
 670 {
 671         struct thread *t;
 672         int index;
 673
 674         for (index = 0; index < m->fd_limit; ++index) {
 675                 t = thread_array[index];
 676                 if (t) {
 677                         thread_array[index] = NULL;
 678                         thread_free(m, t);
 679                 }
 680         }
 681         XFREE(MTYPE_THREAD_POLL, thread_array);
 682 }
 683
 684 /*
 685  * thread_master_free_unused
 686  *
 687  * As threads are finished with they are put on the
 688  * unuse list for later reuse.
 689  * If we are shutting down, Free up unused threads
 690  * So we can see if we forget to shut anything off
 691  */
 692 void thread_master_free_unused(struct thread_master *m)
 693 {
 694         frr_with_mutex (&m->mtx) {
 695                 struct thread *t;
 696                 while ((t = thread_list_pop(&m->unuse)))
 697                         thread_free(m, t);
 698         }
 699 }
 700
 701 /* Stop thread scheduler. */
 702 void thread_master_free(struct thread_master *m)
 703 {
 704         struct thread *t;
 705
 706         frr_with_mutex (&masters_mtx) {
 707                 listnode_delete(masters, m);
 708                 if (masters->count == 0) {
 709                         list_delete(&masters);
 710                 }
 711         }
 712
 713         thread_array_free(m, m->read);
 714         thread_array_free(m, m->write);
 715         while ((t = thread_timer_list_pop(&m->timer)))
 716                 thread_free(m, t);
 717         thread_list_free(m, &m->event);
 718         thread_list_free(m, &m->ready);
 719         thread_list_free(m, &m->unuse);
 720         pthread_mutex_destroy(&m->mtx);
 721         pthread_cond_destroy(&m->cancel_cond);
 722         close(m->io_pipe[0]);
 723         close(m->io_pipe[1]);
 724         list_delete(&m->cancel_req);
 725         m->cancel_req = NULL;
 726
 727         hash_clean(m->cpu_record, cpu_record_hash_free);
 728         hash_free(m->cpu_record);
 729         m->cpu_record = NULL;
 730
 731         XFREE(MTYPE_THREAD_MASTER, m->name);
 732         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 733         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 734         XFREE(MTYPE_THREAD_MASTER, m);
 735 }
 736
 737 /* Return remain time in milliseconds. */
 738 unsigned long thread_timer_remain_msec(struct thread *thread)
 739 {
 740         int64_t remain;
 741
 742         if (!thread_is_scheduled(thread))
 743                 return 0;
 744
 745         frr_with_mutex (&thread->mtx) {
 746                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 747         }
 748
 749         return remain < 0 ? 0 : remain;
 750 }
 751
 752 /* Return remain time in seconds. */
 753 unsigned long thread_timer_remain_second(struct thread *thread)
 754 {
 755         return thread_timer_remain_msec(thread) / 1000LL;
 756 }
 757
 758 struct timeval thread_timer_remain(struct thread *thread)
 759 {
 760         struct timeval remain;
 761         frr_with_mutex (&thread->mtx) {
 762                 monotime_until(&thread->u.sands, &remain);
 763         }
 764         return remain;
 765 }
 766
 767 static int time_hhmmss(char *buf, int buf_size, long sec)
 768 {
 769         long hh;
 770         long mm;
 771         int wr;
 772
 773         assert(buf_size >= 8);
 774
 775         hh = sec / 3600;
 776         sec %= 3600;
 777         mm = sec / 60;
 778         sec %= 60;
 779
 780         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 781
 782         return wr != 8;
 783 }
 784
 785 char *thread_timer_to_hhmmss(char *buf, int buf_size,
 786                 struct thread *t_timer)
 787 {
 788         if (t_timer) {
 789                 time_hhmmss(buf, buf_size,
 790                                 thread_timer_remain_second(t_timer));
 791         } else {
 792                 snprintf(buf, buf_size, "--:--:--");
 793         }
 794         return buf;
 795 }
 796
 797 /* Get new thread.  */
 798 static struct thread *thread_get(struct thread_master *m, uint8_t type,
 799                                  void (*func)(struct thread *), void *arg,
 800                                  const struct xref_threadsched *xref)
 801 {
 802         struct thread *thread = thread_list_pop(&m->unuse);
 803         struct cpu_thread_history tmp;
 804
 805         if (!thread) {
 806                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
 807                 /* mutex only needs to be initialized at struct creation. */
 808                 pthread_mutex_init(&thread->mtx, NULL);
 809                 m->alloc++;
 810         }
 811
 812         thread->type = type;
 813         thread->add_type = type;
 814         thread->master = m;
 815         thread->arg = arg;
 816         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 817         thread->ref = NULL;
 818         thread->ignore_timer_late = false;
 819
 820         /*
 821          * So if the passed in funcname is not what we have
 822          * stored that means the thread->hist needs to be
 823          * updated.  We keep the last one around in unused
 824          * under the assumption that we are probably
 825          * going to immediately allocate the same
 826          * type of thread.
 827          * This hopefully saves us some serious
 828          * hash_get lookups.
 829          */
 830         if ((thread->xref && thread->xref->funcname != xref->funcname)
 831             || thread->func != func) {
 832                 tmp.func = func;
 833                 tmp.funcname = xref->funcname;
 834                 thread->hist =
 835                         hash_get(m->cpu_record, &tmp,
 836                                  (void *(*)(void *))cpu_record_hash_alloc);
 837         }
 838         thread->hist->total_active++;
 839         thread->func = func;
 840         thread->xref = xref;
 841
 842         return thread;
 843 }
 844
 845 static void thread_free(struct thread_master *master, struct thread *thread)
 846 {
 847         /* Update statistics. */
 848         assert(master->alloc > 0);
 849         master->alloc--;
 850
 851         /* Free allocated resources. */
 852         pthread_mutex_destroy(&thread->mtx);
 853         XFREE(MTYPE_THREAD, thread);
 854 }
 855
 856 static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
 857                    bool *eintr_p)
 858 {
 859         sigset_t origsigs;
 860         unsigned char trash[64];
 861         nfds_t count = m->handler.copycount;
 862
 863         /*
 864          * If timer_wait is null here, that means poll() should block
 865          * indefinitely, unless the thread_master has overridden it by setting
 866          * ->selectpoll_timeout.
 867          *
 868          * If the value is positive, it specifies the maximum number of
 869          * milliseconds to wait. If the timeout is -1, it specifies that
 870          * we should never wait and always return immediately even if no
 871          * event is detected. If the value is zero, the behavior is default.
 872          */
 873         int timeout = -1;
 874
 875         /* number of file descriptors with events */
 876         int num;
 877
 878         if (timer_wait != NULL
 879             && m->selectpoll_timeout == 0) // use the default value
 880                 timeout = (timer_wait->tv_sec * 1000)
 881                           + (timer_wait->tv_usec / 1000);
 882         else if (m->selectpoll_timeout > 0) // use the user's timeout
 883                 timeout = m->selectpoll_timeout;
 884         else if (m->selectpoll_timeout
 885                  < 0) // effect a poll (return immediately)
 886                 timeout = 0;
 887
 888         zlog_tls_buffer_flush();
 889         rcu_read_unlock();
 890         rcu_assert_read_unlocked();
 891
 892         /* add poll pipe poker */
 893         assert(count + 1 < m->handler.pfdsize);
 894         m->handler.copy[count].fd = m->io_pipe[0];
 895         m->handler.copy[count].events = POLLIN;
 896         m->handler.copy[count].revents = 0x00;
 897
 898         /* We need to deal with a signal-handling race here: we
 899          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 900          * that may arrive just before we enter poll(). We will block the
 901          * key signals, then check whether any have arrived - if so, we return
 902          * before calling poll(). If not, we'll re-enable the signals
 903          * in the ppoll() call.
 904          */
 905
 906         sigemptyset(&origsigs);
 907         if (m->handle_signals) {
 908                 /* Main pthread that handles the app signals */
 909                 if (frr_sigevent_check(&origsigs)) {
 910                         /* Signal to process - restore signal mask and return */
 911                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 912                         num = -1;
 913                         *eintr_p = true;
 914                         goto done;
 915                 }
 916         } else {
 917                 /* Don't make any changes for the non-main pthreads */
 918                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 919         }
 920
 921 #if defined(HAVE_PPOLL)
 922         struct timespec ts, *tsp;
 923
 924         if (timeout >= 0) {
 925                 ts.tv_sec = timeout / 1000;
 926                 ts.tv_nsec = (timeout % 1000) * 1000000;
 927                 tsp = &ts;
 928         } else
 929                 tsp = NULL;
 930
 931         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 932         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 933 #else
 934         /* Not ideal - there is a race after we restore the signal mask */
 935         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 936         num = poll(m->handler.copy, count + 1, timeout);
 937 #endif
 938
 939 done:
 940
 941         if (num < 0 && errno == EINTR)
 942                 *eintr_p = true;
 943
 944         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 945                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 946                         ;
 947
 948         rcu_read_lock();
 949
 950         return num;
 951 }
 952
 953 /* Add new read thread. */
 954 void _thread_add_read_write(const struct xref_threadsched *xref,
 955                             struct thread_master *m,
 956                             void (*func)(struct thread *), void *arg, int fd,
 957                             struct thread **t_ptr)
 958 {
 959         int dir = xref->thread_type;
 960         struct thread *thread = NULL;
 961         struct thread **thread_array;
 962
 963         if (dir == THREAD_READ)
 964                 frrtrace(9, frr_libfrr, schedule_read, m,
 965                          xref->funcname, xref->xref.file, xref->xref.line,
 966                          t_ptr, fd, 0, arg, 0);
 967         else
 968                 frrtrace(9, frr_libfrr, schedule_write, m,
 969                          xref->funcname, xref->xref.file, xref->xref.line,
 970                          t_ptr, fd, 0, arg, 0);
 971
 972         assert(fd >= 0);
 973         if (fd >= m->fd_limit)
 974                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 975
 976         frr_with_mutex (&m->mtx) {
 977                 if (t_ptr && *t_ptr)
 978                         // thread is already scheduled; don't reschedule
 979                         break;
 980
 981                 /* default to a new pollfd */
 982                 nfds_t queuepos = m->handler.pfdcount;
 983
 984                 if (dir == THREAD_READ)
 985                         thread_array = m->read;
 986                 else
 987                         thread_array = m->write;
 988
 989                 /* if we already have a pollfd for our file descriptor, find and
 990                  * use it */
 991                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 992                         if (m->handler.pfds[i].fd == fd) {
 993                                 queuepos = i;
 994
 995 #ifdef DEV_BUILD
 996                                 /*
 997                                  * What happens if we have a thread already
 998                                  * created for this event?
 999                                  */
1000                                 if (thread_array[fd])
1001                                         assert(!"Thread already scheduled for file descriptor");
1002 #endif
1003                                 break;
1004                         }
1005
1006                 /* make sure we have room for this fd + pipe poker fd */
1007                 assert(queuepos + 1 < m->handler.pfdsize);
1008
1009                 thread = thread_get(m, dir, func, arg, xref);
1010
1011                 m->handler.pfds[queuepos].fd = fd;
1012                 m->handler.pfds[queuepos].events |=
1013                         (dir == THREAD_READ ? POLLIN : POLLOUT);
1014
1015                 if (queuepos == m->handler.pfdcount)
1016                         m->handler.pfdcount++;
1017
1018                 if (thread) {
1019                         frr_with_mutex (&thread->mtx) {
1020                                 thread->u.fd = fd;
1021                                 thread_array[thread->u.fd] = thread;
1022                         }
1023
1024                         if (t_ptr) {
1025                                 *t_ptr = thread;
1026                                 thread->ref = t_ptr;
1027                         }
1028                 }
1029
1030                 AWAKEN(m);
1031         }
1032 }
1033
1034 static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1035                                       struct thread_master *m,
1036                                       void (*func)(struct thread *), void *arg,
1037                                       struct timeval *time_relative,
1038                                       struct thread **t_ptr)
1039 {
1040         struct thread *thread;
1041         struct timeval t;
1042
1043         assert(m != NULL);
1044
1045         assert(time_relative);
1046
1047         frrtrace(9, frr_libfrr, schedule_timer, m,
1048                  xref->funcname, xref->xref.file, xref->xref.line,
1049                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1050
1051         /* Compute expiration/deadline time. */
1052         monotime(&t);
1053         timeradd(&t, time_relative, &t);
1054
1055         frr_with_mutex (&m->mtx) {
1056                 if (t_ptr && *t_ptr)
1057                         /* thread is already scheduled; don't reschedule */
1058                         return;
1059
1060                 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
1061
1062                 frr_with_mutex (&thread->mtx) {
1063                         thread->u.sands = t;
1064                         thread_timer_list_add(&m->timer, thread);
1065                         if (t_ptr) {
1066                                 *t_ptr = thread;
1067                                 thread->ref = t_ptr;
1068                         }
1069                 }
1070
1071                 /* The timer list is sorted - if this new timer
1072                  * might change the time we'll wait for, give the pthread
1073                  * a chance to re-compute.
1074                  */
1075                 if (thread_timer_list_first(&m->timer) == thread)
1076                         AWAKEN(m);
1077         }
1078 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079         if (time_relative->tv_sec > ONEYEAR2SEC)
1080                 flog_err(
1081                         EC_LIB_TIMER_TOO_LONG,
1082                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1083                         thread);
1084 }
1085
1086
1087 /* Add timer event thread. */
1088 void _thread_add_timer(const struct xref_threadsched *xref,
1089                        struct thread_master *m, void (*func)(struct thread *),
1090                        void *arg, long timer, struct thread **t_ptr)
1091 {
1092         struct timeval trel;
1093
1094         assert(m != NULL);
1095
1096         trel.tv_sec = timer;
1097         trel.tv_usec = 0;
1098
1099         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1100 }
1101
1102 /* Add timer event thread with "millisecond" resolution */
1103 void _thread_add_timer_msec(const struct xref_threadsched *xref,
1104                             struct thread_master *m,
1105                             void (*func)(struct thread *), void *arg,
1106                             long timer, struct thread **t_ptr)
1107 {
1108         struct timeval trel;
1109
1110         assert(m != NULL);
1111
1112         trel.tv_sec = timer / 1000;
1113         trel.tv_usec = 1000 * (timer % 1000);
1114
1115         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1116 }
1117
1118 /* Add timer event thread with "timeval" resolution */
1119 void _thread_add_timer_tv(const struct xref_threadsched *xref,
1120                           struct thread_master *m,
1121                           void (*func)(struct thread *), void *arg,
1122                           struct timeval *tv, struct thread **t_ptr)
1123 {
1124         _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1125 }
1126
1127 /* Add simple event thread. */
1128 void _thread_add_event(const struct xref_threadsched *xref,
1129                        struct thread_master *m, void (*func)(struct thread *),
1130                        void *arg, int val, struct thread **t_ptr)
1131 {
1132         struct thread *thread = NULL;
1133
1134         frrtrace(9, frr_libfrr, schedule_event, m,
1135                  xref->funcname, xref->xref.file, xref->xref.line,
1136                  t_ptr, 0, val, arg, 0);
1137
1138         assert(m != NULL);
1139
1140         frr_with_mutex (&m->mtx) {
1141                 if (t_ptr && *t_ptr)
1142                         /* thread is already scheduled; don't reschedule */
1143                         break;
1144
1145                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
1146                 frr_with_mutex (&thread->mtx) {
1147                         thread->u.val = val;
1148                         thread_list_add_tail(&m->event, thread);
1149                 }
1150
1151                 if (t_ptr) {
1152                         *t_ptr = thread;
1153                         thread->ref = t_ptr;
1154                 }
1155
1156                 AWAKEN(m);
1157         }
1158 }
1159
1160 /* Thread cancellation ------------------------------------------------------ */
1161
1162 /**
1163  * NOT's out the .events field of pollfd corresponding to the given file
1164  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1165  *
1166  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1167  * implementation for details.
1168  *
1169  * @param master
1170  * @param fd
1171  * @param state the event to cancel. One or more (OR'd together) of the
1172  * following:
1173  *   - POLLIN
1174  *   - POLLOUT
1175  */
1176 static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1177                              int idx_hint)
1178 {
1179         bool found = false;
1180
1181         /* find the index of corresponding pollfd */
1182         nfds_t i;
1183
1184         /* Cancel POLLHUP too just in case some bozo set it */
1185         state |= POLLHUP;
1186
1187         /* Some callers know the index of the pfd already */
1188         if (idx_hint >= 0) {
1189                 i = idx_hint;
1190                 found = true;
1191         } else {
1192                 /* Have to look for the fd in the pfd array */
1193                 for (i = 0; i < master->handler.pfdcount; i++)
1194                         if (master->handler.pfds[i].fd == fd) {
1195                                 found = true;
1196                                 break;
1197                         }
1198         }
1199
1200         if (!found) {
1201                 zlog_debug(
1202                         "[!] Received cancellation request for nonexistent rw job");
1203                 zlog_debug("[!] threadmaster: %s | fd: %d",
1204                            master->name ? master->name : "", fd);
1205                 return;
1206         }
1207
1208         /* NOT out event. */
1209         master->handler.pfds[i].events &= ~(state);
1210
1211         /* If all events are canceled, delete / resize the pollfd array. */
1212         if (master->handler.pfds[i].events == 0) {
1213                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1214                         (master->handler.pfdcount - i - 1)
1215                                 * sizeof(struct pollfd));
1216                 master->handler.pfdcount--;
1217                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1218                 master->handler.pfds[master->handler.pfdcount].events = 0;
1219         }
1220
1221         /* If we have the same pollfd in the copy, perform the same operations,
1222          * otherwise return. */
1223         if (i >= master->handler.copycount)
1224                 return;
1225
1226         master->handler.copy[i].events &= ~(state);
1227
1228         if (master->handler.copy[i].events == 0) {
1229                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1230                         (master->handler.copycount - i - 1)
1231                                 * sizeof(struct pollfd));
1232                 master->handler.copycount--;
1233                 master->handler.copy[master->handler.copycount].fd = 0;
1234                 master->handler.copy[master->handler.copycount].events = 0;
1235         }
1236 }
1237
1238 /*
1239  * Process task cancellation given a task argument: iterate through the
1240  * various lists of tasks, looking for any that match the argument.
1241  */
1242 static void cancel_arg_helper(struct thread_master *master,
1243                               const struct cancel_req *cr)
1244 {
1245         struct thread *t;
1246         nfds_t i;
1247         int fd;
1248         struct pollfd *pfd;
1249
1250         /* We're only processing arg-based cancellations here. */
1251         if (cr->eventobj == NULL)
1252                 return;
1253
1254         /* First process the ready lists. */
1255         frr_each_safe(thread_list, &master->event, t) {
1256                 if (t->arg != cr->eventobj)
1257                         continue;
1258                 thread_list_del(&master->event, t);
1259                 if (t->ref)
1260                         *t->ref = NULL;
1261                 thread_add_unuse(master, t);
1262         }
1263
1264         frr_each_safe(thread_list, &master->ready, t) {
1265                 if (t->arg != cr->eventobj)
1266                         continue;
1267                 thread_list_del(&master->ready, t);
1268                 if (t->ref)
1269                         *t->ref = NULL;
1270                 thread_add_unuse(master, t);
1271         }
1272
1273         /* If requested, stop here and ignore io and timers */
1274         if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1275                 return;
1276
1277         /* Check the io tasks */
1278         for (i = 0; i < master->handler.pfdcount;) {
1279                 pfd = master->handler.pfds + i;
1280
1281                 if (pfd->events & POLLIN)
1282                         t = master->read[pfd->fd];
1283                 else
1284                         t = master->write[pfd->fd];
1285
1286                 if (t && t->arg == cr->eventobj) {
1287                         fd = pfd->fd;
1288
1289                         /* Found a match to cancel: clean up fd arrays */
1290                         thread_cancel_rw(master, pfd->fd, pfd->events, i);
1291
1292                         /* Clean up thread arrays */
1293                         master->read[fd] = NULL;
1294                         master->write[fd] = NULL;
1295
1296                         /* Clear caller's ref */
1297                         if (t->ref)
1298                                 *t->ref = NULL;
1299
1300                         thread_add_unuse(master, t);
1301
1302                         /* Don't increment 'i' since the cancellation will have
1303                          * removed the entry from the pfd array
1304                          */
1305                 } else
1306                         i++;
1307         }
1308
1309         /* Check the timer tasks */
1310         t = thread_timer_list_first(&master->timer);
1311         while (t) {
1312                 struct thread *t_next;
1313
1314                 t_next = thread_timer_list_next(&master->timer, t);
1315
1316                 if (t->arg == cr->eventobj) {
1317                         thread_timer_list_del(&master->timer, t);
1318                         if (t->ref)
1319                                 *t->ref = NULL;
1320                         thread_add_unuse(master, t);
1321                 }
1322
1323                 t = t_next;
1324         }
1325 }
1326
1327 /**
1328  * Process cancellation requests.
1329  *
1330  * This may only be run from the pthread which owns the thread_master.
1331  *
1332  * @param master the thread master to process
1333  * @REQUIRE master->mtx
1334  */
1335 static void do_thread_cancel(struct thread_master *master)
1336 {
1337         struct thread_list_head *list = NULL;
1338         struct thread **thread_array = NULL;
1339         struct thread *thread;
1340         struct cancel_req *cr;
1341         struct listnode *ln;
1342
1343         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1344                 /*
1345                  * If this is an event object cancellation, search
1346                  * through task lists deleting any tasks which have the
1347                  * specified argument - use this handy helper function.
1348                  */
1349                 if (cr->eventobj) {
1350                         cancel_arg_helper(master, cr);
1351                         continue;
1352                 }
1353
1354                 /*
1355                  * The pointer varies depending on whether the cancellation
1356                  * request was made asynchronously or not. If it was, we
1357                  * need to check whether the thread even exists anymore
1358                  * before cancelling it.
1359                  */
1360                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1361
1362                 if (!thread)
1363                         continue;
1364
1365                 list = NULL;
1366                 thread_array = NULL;
1367
1368                 /* Determine the appropriate queue to cancel the thread from */
1369                 switch (thread->type) {
1370                 case THREAD_READ:
1371                         thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
1372                         thread_array = master->read;
1373                         break;
1374                 case THREAD_WRITE:
1375                         thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1376                         thread_array = master->write;
1377                         break;
1378                 case THREAD_TIMER:
1379                         thread_timer_list_del(&master->timer, thread);
1380                         break;
1381                 case THREAD_EVENT:
1382                         list = &master->event;
1383                         break;
1384                 case THREAD_READY:
1385                         list = &master->ready;
1386                         break;
1387                 default:
1388                         continue;
1389                         break;
1390                 }
1391
1392                 if (list) {
1393                         thread_list_del(list, thread);
1394                 } else if (thread_array) {
1395                         thread_array[thread->u.fd] = NULL;
1396                 }
1397
1398                 if (thread->ref)
1399                         *thread->ref = NULL;
1400
1401                 thread_add_unuse(thread->master, thread);
1402         }
1403
1404         /* Delete and free all cancellation requests */
1405         if (master->cancel_req)
1406                 list_delete_all_node(master->cancel_req);
1407
1408         /* Wake up any threads which may be blocked in thread_cancel_async() */
1409         master->canceled = true;
1410         pthread_cond_broadcast(&master->cancel_cond);
1411 }
1412
1413 /*
1414  * Helper function used for multiple flavors of arg-based cancellation.
1415  */
1416 static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1417 {
1418         struct cancel_req *cr;
1419
1420         assert(m->owner == pthread_self());
1421
1422         /* Only worth anything if caller supplies an arg. */
1423         if (arg == NULL)
1424                 return;
1425
1426         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1427
1428         cr->flags = flags;
1429
1430         frr_with_mutex (&m->mtx) {
1431                 cr->eventobj = arg;
1432                 listnode_add(m->cancel_req, cr);
1433                 do_thread_cancel(m);
1434         }
1435 }
1436
1437 /**
1438  * Cancel any events which have the specified argument.
1439  *
1440  * MT-Unsafe
1441  *
1442  * @param m the thread_master to cancel from
1443  * @param arg the argument passed when creating the event
1444  */
1445 void thread_cancel_event(struct thread_master *master, void *arg)
1446 {
1447         cancel_event_helper(master, arg, 0);
1448 }
1449
1450 /*
1451  * Cancel ready tasks with an arg matching 'arg'
1452  *
1453  * MT-Unsafe
1454  *
1455  * @param m the thread_master to cancel from
1456  * @param arg the argument passed when creating the event
1457  */
1458 void thread_cancel_event_ready(struct thread_master *m, void *arg)
1459 {
1460
1461         /* Only cancel ready/event tasks */
1462         cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
1463 }
1464
1465 /**
1466  * Cancel a specific task.
1467  *
1468  * MT-Unsafe
1469  *
1470  * @param thread task to cancel
1471  */
1472 void thread_cancel(struct thread **thread)
1473 {
1474         struct thread_master *master;
1475
1476         if (thread == NULL || *thread == NULL)
1477                 return;
1478
1479         master = (*thread)->master;
1480
1481         frrtrace(9, frr_libfrr, thread_cancel, master,
1482                  (*thread)->xref->funcname, (*thread)->xref->xref.file,
1483                  (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1484                  (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
1485
1486         assert(master->owner == pthread_self());
1487
1488         frr_with_mutex (&master->mtx) {
1489                 struct cancel_req *cr =
1490                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1491                 cr->thread = *thread;
1492                 listnode_add(master->cancel_req, cr);
1493                 do_thread_cancel(master);
1494         }
1495
1496         *thread = NULL;
1497 }
1498
1499 /**
1500  * Asynchronous cancellation.
1501  *
1502  * Called with either a struct thread ** or void * to an event argument,
1503  * this function posts the correct cancellation request and blocks until it is
1504  * serviced.
1505  *
1506  * If the thread is currently running, execution blocks until it completes.
1507  *
1508  * The last two parameters are mutually exclusive, i.e. if you pass one the
1509  * other must be NULL.
1510  *
1511  * When the cancellation procedure executes on the target thread_master, the
1512  * thread * provided is checked for nullity. If it is null, the thread is
1513  * assumed to no longer exist and the cancellation request is a no-op. Thus
1514  * users of this API must pass a back-reference when scheduling the original
1515  * task.
1516  *
1517  * MT-Safe
1518  *
1519  * @param master the thread master with the relevant event / task
1520  * @param thread pointer to thread to cancel
1521  * @param eventobj the event
1522  */
1523 void thread_cancel_async(struct thread_master *master, struct thread **thread,
1524                          void *eventobj)
1525 {
1526         assert(!(thread && eventobj) && (thread || eventobj));
1527
1528         if (thread && *thread)
1529                 frrtrace(9, frr_libfrr, thread_cancel_async, master,
1530                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1531                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1532                          (*thread)->u.val, (*thread)->arg,
1533                          (*thread)->u.sands.tv_sec);
1534         else
1535                 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1536                          0, NULL, 0, 0, eventobj, 0);
1537
1538         assert(master->owner != pthread_self());
1539
1540         frr_with_mutex (&master->mtx) {
1541                 master->canceled = false;
1542
1543                 if (thread) {
1544                         struct cancel_req *cr =
1545                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1546                         cr->threadref = thread;
1547                         listnode_add(master->cancel_req, cr);
1548                 } else if (eventobj) {
1549                         struct cancel_req *cr =
1550                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1551                         cr->eventobj = eventobj;
1552                         listnode_add(master->cancel_req, cr);
1553                 }
1554                 AWAKEN(master);
1555
1556                 while (!master->canceled)
1557                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1558         }
1559
1560         if (thread)
1561                 *thread = NULL;
1562 }
1563 /* ------------------------------------------------------------------------- */
1564
1565 static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
1566                                          struct timeval *timer_val)
1567 {
1568         if (!thread_timer_list_count(timers))
1569                 return NULL;
1570
1571         struct thread *next_timer = thread_timer_list_first(timers);
1572         monotime_until(&next_timer->u.sands, timer_val);
1573         return timer_val;
1574 }
1575
1576 static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1577                                  struct thread *fetch)
1578 {
1579         *fetch = *thread;
1580         thread_add_unuse(m, thread);
1581         return fetch;
1582 }
1583
1584 static int thread_process_io_helper(struct thread_master *m,
1585                                     struct thread *thread, short state,
1586                                     short actual_state, int pos)
1587 {
1588         struct thread **thread_array;
1589
1590         /*
1591          * poll() clears the .events field, but the pollfd array we
1592          * pass to poll() is a copy of the one used to schedule threads.
1593          * We need to synchronize state between the two here by applying
1594          * the same changes poll() made on the copy of the "real" pollfd
1595          * array.
1596          *
1597          * This cleans up a possible infinite loop where we refuse
1598          * to respond to a poll event but poll is insistent that
1599          * we should.
1600          */
1601         m->handler.pfds[pos].events &= ~(state);
1602
1603         if (!thread) {
1604                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1605                         flog_err(EC_LIB_NO_THREAD,
1606                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1607                                  m->handler.pfds[pos].fd, actual_state);
1608                 return 0;
1609         }
1610
1611         if (thread->type == THREAD_READ)
1612                 thread_array = m->read;
1613         else
1614                 thread_array = m->write;
1615
1616         thread_array[thread->u.fd] = NULL;
1617         thread_list_add_tail(&m->ready, thread);
1618         thread->type = THREAD_READY;
1619
1620         return 1;
1621 }
1622
1623 /**
1624  * Process I/O events.
1625  *
1626  * Walks through file descriptor array looking for those pollfds whose .revents
1627  * field has something interesting. Deletes any invalid file descriptors.
1628  *
1629  * @param m the thread master
1630  * @param num the number of active file descriptors (return value of poll())
1631  */
1632 static void thread_process_io(struct thread_master *m, unsigned int num)
1633 {
1634         unsigned int ready = 0;
1635         struct pollfd *pfds = m->handler.copy;
1636
1637         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1638                 /* no event for current fd? immediately continue */
1639                 if (pfds[i].revents == 0)
1640                         continue;
1641
1642                 ready++;
1643
1644                 /*
1645                  * Unless someone has called thread_cancel from another
1646                  * pthread, the only thing that could have changed in
1647                  * m->handler.pfds while we were asleep is the .events
1648                  * field in a given pollfd. Barring thread_cancel() that
1649                  * value should be a superset of the values we have in our
1650                  * copy, so there's no need to update it. Similarily,
1651                  * barring deletion, the fd should still be a valid index
1652                  * into the master's pfds.
1653                  *
1654                  * We are including POLLERR here to do a READ event
1655                  * this is because the read should fail and the
1656                  * read function should handle it appropriately
1657                  */
1658                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1659                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1660                                                  pfds[i].revents, i);
1661                 }
1662                 if (pfds[i].revents & POLLOUT)
1663                         thread_process_io_helper(m, m->write[pfds[i].fd],
1664                                                  POLLOUT, pfds[i].revents, i);
1665
1666                 /* if one of our file descriptors is garbage, remove the same
1667                  * from
1668                  * both pfds + update sizes and index */
1669                 if (pfds[i].revents & POLLNVAL) {
1670                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1671                                 (m->handler.pfdcount - i - 1)
1672                                         * sizeof(struct pollfd));
1673                         m->handler.pfdcount--;
1674                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1675                         m->handler.pfds[m->handler.pfdcount].events = 0;
1676
1677                         memmove(pfds + i, pfds + i + 1,
1678                                 (m->handler.copycount - i - 1)
1679                                         * sizeof(struct pollfd));
1680                         m->handler.copycount--;
1681                         m->handler.copy[m->handler.copycount].fd = 0;
1682                         m->handler.copy[m->handler.copycount].events = 0;
1683
1684                         i--;
1685                 }
1686         }
1687 }
1688
1689 /* Add all timers that have popped to the ready list. */
1690 static unsigned int thread_process_timers(struct thread_master *m,
1691                                           struct timeval *timenow)
1692 {
1693         struct timeval prev = *timenow;
1694         bool displayed = false;
1695         struct thread *thread;
1696         unsigned int ready = 0;
1697
1698         while ((thread = thread_timer_list_first(&m->timer))) {
1699                 if (timercmp(timenow, &thread->u.sands, <))
1700                         break;
1701                 prev = thread->u.sands;
1702                 prev.tv_sec += 4;
1703                 /*
1704                  * If the timer would have popped 4 seconds in the
1705                  * past then we are in a situation where we are
1706                  * really getting behind on handling of events.
1707                  * Let's log it and do the right thing with it.
1708                  */
1709                 if (timercmp(timenow, &prev, >)) {
1710                         atomic_fetch_add_explicit(
1711                                 &thread->hist->total_starv_warn, 1,
1712                                 memory_order_seq_cst);
1713                         if (!displayed && !thread->ignore_timer_late) {
1714                                 flog_warn(
1715                                         EC_LIB_STARVE_THREAD,
1716                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1717                                         thread);
1718                                 displayed = true;
1719                         }
1720                 }
1721
1722                 thread_timer_list_pop(&m->timer);
1723                 thread->type = THREAD_READY;
1724                 thread_list_add_tail(&m->ready, thread);
1725                 ready++;
1726         }
1727
1728         return ready;
1729 }
1730
1731 /* process a list en masse, e.g. for event thread lists */
1732 static unsigned int thread_process(struct thread_list_head *list)
1733 {
1734         struct thread *thread;
1735         unsigned int ready = 0;
1736
1737         while ((thread = thread_list_pop(list))) {
1738                 thread->type = THREAD_READY;
1739                 thread_list_add_tail(&thread->master->ready, thread);
1740                 ready++;
1741         }
1742         return ready;
1743 }
1744
1745
1746 /* Fetch next ready thread. */
1747 struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
1748 {
1749         struct thread *thread = NULL;
1750         struct timeval now;
1751         struct timeval zerotime = {0, 0};
1752         struct timeval tv;
1753         struct timeval *tw = NULL;
1754         bool eintr_p = false;
1755         int num = 0;
1756
1757         do {
1758                 /* Handle signals if any */
1759                 if (m->handle_signals)
1760                         frr_sigevent_process();
1761
1762                 pthread_mutex_lock(&m->mtx);
1763
1764                 /* Process any pending cancellation requests */
1765                 do_thread_cancel(m);
1766
1767                 /*
1768                  * Attempt to flush ready queue before going into poll().
1769                  * This is performance-critical. Think twice before modifying.
1770                  */
1771                 if ((thread = thread_list_pop(&m->ready))) {
1772                         fetch = thread_run(m, thread, fetch);
1773                         if (fetch->ref)
1774                                 *fetch->ref = NULL;
1775                         pthread_mutex_unlock(&m->mtx);
1776                         if (!m->ready_run_loop)
1777                                 GETRUSAGE(&m->last_getrusage);
1778                         m->ready_run_loop = true;
1779                         break;
1780                 }
1781
1782                 m->ready_run_loop = false;
1783                 /* otherwise, tick through scheduling sequence */
1784
1785                 /*
1786                  * Post events to ready queue. This must come before the
1787                  * following block since events should occur immediately
1788                  */
1789                 thread_process(&m->event);
1790
1791                 /*
1792                  * If there are no tasks on the ready queue, we will poll()
1793                  * until a timer expires or we receive I/O, whichever comes
1794                  * first. The strategy for doing this is:
1795                  *
1796                  * - If there are events pending, set the poll() timeout to zero
1797                  * - If there are no events pending, but there are timers
1798                  * pending, set the timeout to the smallest remaining time on
1799                  * any timer.
1800                  * - If there are neither timers nor events pending, but there
1801                  * are file descriptors pending, block indefinitely in poll()
1802                  * - If nothing is pending, it's time for the application to die
1803                  *
1804                  * In every case except the last, we need to hit poll() at least
1805                  * once per loop to avoid starvation by events
1806                  */
1807                 if (!thread_list_count(&m->ready))
1808                         tw = thread_timer_wait(&m->timer, &tv);
1809
1810                 if (thread_list_count(&m->ready) ||
1811                                 (tw && !timercmp(tw, &zerotime, >)))
1812                         tw = &zerotime;
1813
1814                 if (!tw && m->handler.pfdcount == 0) { /* die */
1815                         pthread_mutex_unlock(&m->mtx);
1816                         fetch = NULL;
1817                         break;
1818                 }
1819
1820                 /*
1821                  * Copy pollfd array + # active pollfds in it. Not necessary to
1822                  * copy the array size as this is fixed.
1823                  */
1824                 m->handler.copycount = m->handler.pfdcount;
1825                 memcpy(m->handler.copy, m->handler.pfds,
1826                        m->handler.copycount * sizeof(struct pollfd));
1827
1828                 pthread_mutex_unlock(&m->mtx);
1829                 {
1830                         eintr_p = false;
1831                         num = fd_poll(m, tw, &eintr_p);
1832                 }
1833                 pthread_mutex_lock(&m->mtx);
1834
1835                 /* Handle any errors received in poll() */
1836                 if (num < 0) {
1837                         if (eintr_p) {
1838                                 pthread_mutex_unlock(&m->mtx);
1839                                 /* loop around to signal handler */
1840                                 continue;
1841                         }
1842
1843                         /* else die */
1844                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1845                                  safe_strerror(errno));
1846                         pthread_mutex_unlock(&m->mtx);
1847                         fetch = NULL;
1848                         break;
1849                 }
1850
1851                 /* Post timers to ready queue. */
1852                 monotime(&now);
1853                 thread_process_timers(m, &now);
1854
1855                 /* Post I/O to ready queue. */
1856                 if (num > 0)
1857                         thread_process_io(m, num);
1858
1859                 pthread_mutex_unlock(&m->mtx);
1860
1861         } while (!thread && m->spin);
1862
1863         return fetch;
1864 }
1865
1866 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1867 {
1868         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1869                 + (a.tv_usec - b.tv_usec));
1870 }
1871
1872 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1873                                    unsigned long *cputime)
1874 {
1875 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1876
1877 #ifdef __FreeBSD__
1878         /*
1879          * FreeBSD appears to have an issue when calling clock_gettime
1880          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1881          * occassionally the now time will be before the start time.
1882          * This is not good and FRR is ending up with CPU HOG's
1883          * when the subtraction wraps to very large numbers
1884          *
1885          * What we are going to do here is cheat a little bit
1886          * and notice that this is a problem and just correct
1887          * it so that it is impossible to happen
1888          */
1889         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1890             start->cpu.tv_nsec > now->cpu.tv_nsec)
1891                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1892         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1893                 now->cpu.tv_sec = start->cpu.tv_sec;
1894                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1895         }
1896 #endif
1897         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1898                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1899 #else
1900         /* This is 'user + sys' time.  */
1901         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1902                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1903 #endif
1904         return timeval_elapsed(now->real, start->real);
1905 }
1906
1907 /* We should aim to yield after yield milliseconds, which defaults
1908    to THREAD_YIELD_TIME_SLOT .
1909    Note: we are using real (wall clock) time for this calculation.
1910    It could be argued that CPU time may make more sense in certain
1911    contexts.  The things to consider are whether the thread may have
1912    blocked (in which case wall time increases, but CPU time does not),
1913    or whether the system is heavily loaded with other processes competing
1914    for CPU time.  On balance, wall clock time seems to make sense.
1915    Plus it has the added benefit that gettimeofday should be faster
1916    than calling getrusage. */
1917 int thread_should_yield(struct thread *thread)
1918 {
1919         int result;
1920         frr_with_mutex (&thread->mtx) {
1921                 result = monotime_since(&thread->real, NULL)
1922                          > (int64_t)thread->yield;
1923         }
1924         return result;
1925 }
1926
1927 void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
1928 {
1929         frr_with_mutex (&thread->mtx) {
1930                 thread->yield = yield_time;
1931         }
1932 }
1933
1934 void thread_getrusage(RUSAGE_T *r)
1935 {
1936         monotime(&r->real);
1937         if (!cputime_enabled) {
1938                 memset(&r->cpu, 0, sizeof(r->cpu));
1939                 return;
1940         }
1941
1942 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1943         /* not currently implemented in Linux's vDSO, but maybe at some point
1944          * in the future?
1945          */
1946         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1947 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1948 #if defined RUSAGE_THREAD
1949 #define FRR_RUSAGE RUSAGE_THREAD
1950 #else
1951 #define FRR_RUSAGE RUSAGE_SELF
1952 #endif
1953         getrusage(FRR_RUSAGE, &(r->cpu));
1954 #endif
1955 }
1956
1957 /*
1958  * Call a thread.
1959  *
1960  * This function will atomically update the thread's usage history. At present
1961  * this is the only spot where usage history is written. Nevertheless the code
1962  * has been written such that the introduction of writers in the future should
1963  * not need to update it provided the writers atomically perform only the
1964  * operations done here, i.e. updating the total and maximum times. In
1965  * particular, the maximum real and cpu times must be monotonically increasing
1966  * or this code is not correct.
1967  */
1968 void thread_call(struct thread *thread)
1969 {
1970         RUSAGE_T before, after;
1971
1972         /* if the thread being called is the CLI, it may change cputime_enabled
1973          * ("service cputime-stats" command), which can result in nonsensical
1974          * and very confusing warnings
1975          */
1976         bool cputime_enabled_here = cputime_enabled;
1977
1978         if (thread->master->ready_run_loop)
1979                 before = thread->master->last_getrusage;
1980         else
1981                 GETRUSAGE(&before);
1982
1983         thread->real = before.real;
1984
1985         frrtrace(9, frr_libfrr, thread_call, thread->master,
1986                  thread->xref->funcname, thread->xref->xref.file,
1987                  thread->xref->xref.line, NULL, thread->u.fd,
1988                  thread->u.val, thread->arg, thread->u.sands.tv_sec);
1989
1990         pthread_setspecific(thread_current, thread);
1991         (*thread->func)(thread);
1992         pthread_setspecific(thread_current, NULL);
1993
1994         GETRUSAGE(&after);
1995         thread->master->last_getrusage = after;
1996
1997         unsigned long walltime, cputime;
1998         unsigned long exp;
1999
2000         walltime = thread_consumed_time(&after, &before, &cputime);
2001
2002         /* update walltime */
2003         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
2004                                   memory_order_seq_cst);
2005         exp = atomic_load_explicit(&thread->hist->real.max,
2006                                    memory_order_seq_cst);
2007         while (exp < walltime
2008                && !atomic_compare_exchange_weak_explicit(
2009                        &thread->hist->real.max, &exp, walltime,
2010                        memory_order_seq_cst, memory_order_seq_cst))
2011                 ;
2012
2013         if (cputime_enabled_here && cputime_enabled) {
2014                 /* update cputime */
2015                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2016                                           memory_order_seq_cst);
2017                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2018                                            memory_order_seq_cst);
2019                 while (exp < cputime
2020                        && !atomic_compare_exchange_weak_explicit(
2021                                &thread->hist->cpu.max, &exp, cputime,
2022                                memory_order_seq_cst, memory_order_seq_cst))
2023                         ;
2024         }
2025
2026         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2027                                   memory_order_seq_cst);
2028         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2029                                  memory_order_seq_cst);
2030
2031         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2032             && cputime > cputime_threshold) {
2033                 /*
2034                  * We have a CPU Hog on our hands.  The time FRR has spent
2035                  * doing actual work (not sleeping) is greater than 5 seconds.
2036                  * Whinge about it now, so we're aware this is yet another task
2037                  * to fix.
2038                  */
2039                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2040                                           1, memory_order_seq_cst);
2041                 flog_warn(
2042                         EC_LIB_SLOW_THREAD_CPU,
2043                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2044                         thread->xref->funcname, (unsigned long)thread->func,
2045                         walltime / 1000, cputime / 1000);
2046
2047         } else if (walltime_threshold && walltime > walltime_threshold) {
2048                 /*
2049                  * The runtime for a task is greater than 5 seconds, but the
2050                  * cpu time is under 5 seconds.  Let's whine about this because
2051                  * this could imply some sort of scheduling issue.
2052                  */
2053                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2054                                           1, memory_order_seq_cst);
2055                 flog_warn(
2056                         EC_LIB_SLOW_THREAD_WALL,
2057                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2058                         thread->xref->funcname, (unsigned long)thread->func,
2059                         walltime / 1000, cputime / 1000);
2060         }
2061 }
2062
2063 /* Execute thread */
2064 void _thread_execute(const struct xref_threadsched *xref,
2065                      struct thread_master *m, void (*func)(struct thread *),
2066                      void *arg, int val)
2067 {
2068         struct thread *thread;
2069
2070         /* Get or allocate new thread to execute. */
2071         frr_with_mutex (&m->mtx) {
2072                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
2073
2074                 /* Set its event value. */
2075                 frr_with_mutex (&thread->mtx) {
2076                         thread->add_type = THREAD_EXECUTE;
2077                         thread->u.val = val;
2078                         thread->ref = &thread;
2079                 }
2080         }
2081
2082         /* Execute thread doing all accounting. */
2083         thread_call(thread);
2084
2085         /* Give back or free thread. */
2086         thread_add_unuse(m, thread);
2087 }
2088
2089 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2090 void debug_signals(const sigset_t *sigs)
2091 {
2092         int i, found;
2093         sigset_t tmpsigs;
2094         char buf[300];
2095
2096         /*
2097          * We're only looking at the non-realtime signals here, so we need
2098          * some limit value. Platform differences mean at some point we just
2099          * need to pick a reasonable value.
2100          */
2101 #if defined SIGRTMIN
2102 #  define LAST_SIGNAL SIGRTMIN
2103 #else
2104 #  define LAST_SIGNAL 32
2105 #endif
2106
2107
2108         if (sigs == NULL) {
2109                 sigemptyset(&tmpsigs);
2110                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2111                 sigs = &tmpsigs;
2112         }
2113
2114         found = 0;
2115         buf[0] = '\0';
2116
2117         for (i = 0; i < LAST_SIGNAL; i++) {
2118                 char tmp[20];
2119
2120                 if (sigismember(sigs, i) > 0) {
2121                         if (found > 0)
2122                                 strlcat(buf, ",", sizeof(buf));
2123                         snprintf(tmp, sizeof(tmp), "%d", i);
2124                         strlcat(buf, tmp, sizeof(buf));
2125                         found++;
2126                 }
2127         }
2128
2129         if (found == 0)
2130                 snprintf(buf, sizeof(buf), "<none>");
2131
2132         zlog_debug("%s: %s", __func__, buf);
2133 }
2134
2135 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2136                                    const struct thread *thread)
2137 {
2138         static const char * const types[] = {
2139                 [THREAD_READ] = "read",
2140                 [THREAD_WRITE] = "write",
2141                 [THREAD_TIMER] = "timer",
2142                 [THREAD_EVENT] = "event",
2143                 [THREAD_READY] = "ready",
2144                 [THREAD_UNUSED] = "unused",
2145                 [THREAD_EXECUTE] = "exec",
2146         };
2147         ssize_t rv = 0;
2148         char info[16] = "";
2149
2150         if (!thread)
2151                 return bputs(buf, "{(thread *)NULL}");
2152
2153         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2154
2155         if (thread->type < array_size(types) && types[thread->type])
2156                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2157         else
2158                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2159
2160         switch (thread->type) {
2161         case THREAD_READ:
2162         case THREAD_WRITE:
2163                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2164                 break;
2165
2166         case THREAD_TIMER:
2167                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2168                 break;
2169         }
2170
2171         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2172                         thread->xref->funcname, thread->xref->dest,
2173                         thread->xref->xref.file, thread->xref->xref.line);
2174         return rv;
2175 }
2176
2177 printfrr_ext_autoreg_p("TH", printfrr_thread);
2178 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2179                                const void *ptr)
2180 {
2181         const struct thread *thread = ptr;
2182         struct timespec remain = {};
2183
2184         if (ea->fmt[0] == 'D') {
2185                 ea->fmt++;
2186                 return printfrr_thread_dbg(buf, ea, thread);
2187         }
2188
2189         if (!thread) {
2190                 /* need to jump over time formatting flag characters in the
2191                  * input format string, i.e. adjust ea->fmt!
2192                  */
2193                 printfrr_time(buf, ea, &remain,
2194                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2195                 return bputch(buf, '-');
2196         }
2197
2198         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2199         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2200 }