lib/event.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Thread management routine
   3  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   4  */
   5
   6 /* #define DEBUG */
   7
   8 #include <zebra.h>
   9 #include <sys/resource.h>
  10
  11 #include "event.h"
  12 #include "memory.h"
  13 #include "frrcu.h"
  14 #include "log.h"
  15 #include "hash.h"
  16 #include "command.h"
  17 #include "sigevent.h"
  18 #include "network.h"
  19 #include "jhash.h"
  20 #include "frratomic.h"
  21 #include "frr_pthread.h"
  22 #include "lib_errors.h"
  23 #include "libfrr_trace.h"
  24 #include "libfrr.h"
  25
  26 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  27 DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
  28 DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
  29 DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
  30
  31 DECLARE_LIST(event_list, struct event, eventitem);
  32
  33 struct cancel_req {
  34         int flags;
  35         struct event *thread;
  36         void *eventobj;
  37         struct event **threadref;
  38 };
  39
  40 /* Flags for task cancellation */
  41 #define EVENT_CANCEL_FLAG_READY 0x01
  42
  43 static int event_timer_cmp(const struct event *a, const struct event *b)
  44 {
  45         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  46                 return -1;
  47         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  48                 return 1;
  49         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  50                 return -1;
  51         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  52                 return 1;
  53         return 0;
  54 }
  55
  56 DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
  57
  58 #if defined(__APPLE__)
  59 #include <mach/mach.h>
  60 #include <mach/mach_time.h>
  61 #endif
  62
  63 #define AWAKEN(m)                                                              \
  64         do {                                                                   \
  65                 const unsigned char wakebyte = 0x01;                           \
  66                 write(m->io_pipe[1], &wakebyte, 1);                            \
  67         } while (0);
  68
  69 /* control variable for initializer */
  70 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  71 pthread_key_t thread_current;
  72
  73 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  74 static struct list *masters;
  75
  76 static void thread_free(struct event_master *master, struct event *thread);
  77
  78 #ifndef EXCLUDE_CPU_TIME
  79 #define EXCLUDE_CPU_TIME 0
  80 #endif
  81 #ifndef CONSUMED_TIME_CHECK
  82 #define CONSUMED_TIME_CHECK 0
  83 #endif
  84
  85 bool cputime_enabled = !EXCLUDE_CPU_TIME;
  86 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
  87 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
  88
  89 /* CLI start ---------------------------------------------------------------- */
  90 #include "lib/event_clippy.c"
  91
  92 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
  93 {
  94         int size = sizeof(a->func);
  95
  96         return jhash(&a->func, size, 0);
  97 }
  98
  99 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 100                                const struct cpu_thread_history *b)
 101 {
 102         return a->func == b->func;
 103 }
 104
 105 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 106 {
 107         struct cpu_thread_history *new;
 108         new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_thread_history));
 109         new->func = a->func;
 110         new->funcname = a->funcname;
 111         return new;
 112 }
 113
 114 static void cpu_record_hash_free(void *a)
 115 {
 116         struct cpu_thread_history *hist = a;
 117
 118         XFREE(MTYPE_EVENT_STATS, hist);
 119 }
 120
 121 static void vty_out_cpu_thread_history(struct vty *vty,
 122                                        struct cpu_thread_history *a)
 123 {
 124         vty_out(vty,
 125                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 126                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 127                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 128                 (a->real.total / a->total_calls), a->real.max,
 129                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 130         vty_out(vty, "  %c%c%c%c%c  %s\n",
 131                 a->types & (1 << EVENT_READ) ? 'R' : ' ',
 132                 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
 133                 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
 134                 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
 135                 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
 136 }
 137
 138 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 139 {
 140         struct cpu_thread_history *totals = args[0];
 141         struct cpu_thread_history copy;
 142         struct vty *vty = args[1];
 143         uint8_t *filter = args[2];
 144
 145         struct cpu_thread_history *a = bucket->data;
 146
 147         copy.total_active =
 148                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 149         copy.total_calls =
 150                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 151         copy.total_cpu_warn =
 152                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 153         copy.total_wall_warn =
 154                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 155         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 156                                                      memory_order_seq_cst);
 157         copy.cpu.total =
 158                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 159         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 160         copy.real.total =
 161                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 162         copy.real.max =
 163                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 164         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 165         copy.funcname = a->funcname;
 166
 167         if (!(copy.types & *filter))
 168                 return;
 169
 170         vty_out_cpu_thread_history(vty, &copy);
 171         totals->total_active += copy.total_active;
 172         totals->total_calls += copy.total_calls;
 173         totals->total_cpu_warn += copy.total_cpu_warn;
 174         totals->total_wall_warn += copy.total_wall_warn;
 175         totals->total_starv_warn += copy.total_starv_warn;
 176         totals->real.total += copy.real.total;
 177         if (totals->real.max < copy.real.max)
 178                 totals->real.max = copy.real.max;
 179         totals->cpu.total += copy.cpu.total;
 180         if (totals->cpu.max < copy.cpu.max)
 181                 totals->cpu.max = copy.cpu.max;
 182 }
 183
 184 static void cpu_record_print(struct vty *vty, uint8_t filter)
 185 {
 186         struct cpu_thread_history tmp;
 187         void *args[3] = {&tmp, vty, &filter};
 188         struct event_master *m;
 189         struct listnode *ln;
 190
 191         if (!cputime_enabled)
 192                 vty_out(vty,
 193                         "\n"
 194                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 195                         "will be zero or may display data from when collection was enabled.  Use the\n"
 196                         "  \"service cputime-stats\"  command to start collecting data.\n"
 197                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 198
 199         memset(&tmp, 0, sizeof(tmp));
 200         tmp.funcname = "TOTAL";
 201         tmp.types = filter;
 202
 203         frr_with_mutex (&masters_mtx) {
 204                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 205                         const char *name = m->name ? m->name : "main";
 206
 207                         char underline[strlen(name) + 1];
 208                         memset(underline, '-', sizeof(underline));
 209                         underline[sizeof(underline) - 1] = '\0';
 210
 211                         vty_out(vty, "\n");
 212                         vty_out(vty, "Showing statistics for pthread %s\n",
 213                                 name);
 214                         vty_out(vty, "-------------------------------%s\n",
 215                                 underline);
 216                         vty_out(vty, "%30s %18s %18s\n", "",
 217                                 "CPU (user+system):", "Real (wall-clock):");
 218                         vty_out(vty,
 219                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 220                         vty_out(vty, " Avg uSec Max uSecs");
 221                         vty_out(vty,
 222                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 223
 224                         if (m->cpu_record->count)
 225                                 hash_iterate(
 226                                         m->cpu_record,
 227                                         (void (*)(struct hash_bucket *,
 228                                                   void *))cpu_record_hash_print,
 229                                         args);
 230                         else
 231                                 vty_out(vty, "No data to display yet.\n");
 232
 233                         vty_out(vty, "\n");
 234                 }
 235         }
 236
 237         vty_out(vty, "\n");
 238         vty_out(vty, "Total thread statistics\n");
 239         vty_out(vty, "-------------------------\n");
 240         vty_out(vty, "%30s %18s %18s\n", "",
 241                 "CPU (user+system):", "Real (wall-clock):");
 242         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 243         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 244         vty_out(vty, "  Type  Thread\n");
 245
 246         if (tmp.total_calls > 0)
 247                 vty_out_cpu_thread_history(vty, &tmp);
 248 }
 249
 250 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 251 {
 252         uint8_t *filter = args[0];
 253         struct hash *cpu_record = args[1];
 254
 255         struct cpu_thread_history *a = bucket->data;
 256
 257         if (!(a->types & *filter))
 258                 return;
 259
 260         hash_release(cpu_record, bucket->data);
 261 }
 262
 263 static void cpu_record_clear(uint8_t filter)
 264 {
 265         uint8_t *tmp = &filter;
 266         struct event_master *m;
 267         struct listnode *ln;
 268
 269         frr_with_mutex (&masters_mtx) {
 270                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 271                         frr_with_mutex (&m->mtx) {
 272                                 void *args[2] = {tmp, m->cpu_record};
 273                                 hash_iterate(
 274                                         m->cpu_record,
 275                                         (void (*)(struct hash_bucket *,
 276                                                   void *))cpu_record_hash_clear,
 277                                         args);
 278                         }
 279                 }
 280         }
 281 }
 282
 283 static uint8_t parse_filter(const char *filterstr)
 284 {
 285         int i = 0;
 286         int filter = 0;
 287
 288         while (filterstr[i] != '\0') {
 289                 switch (filterstr[i]) {
 290                 case 'r':
 291                 case 'R':
 292                         filter |= (1 << EVENT_READ);
 293                         break;
 294                 case 'w':
 295                 case 'W':
 296                         filter |= (1 << EVENT_WRITE);
 297                         break;
 298                 case 't':
 299                 case 'T':
 300                         filter |= (1 << EVENT_TIMER);
 301                         break;
 302                 case 'e':
 303                 case 'E':
 304                         filter |= (1 << EVENT_EVENT);
 305                         break;
 306                 case 'x':
 307                 case 'X':
 308                         filter |= (1 << EVENT_EXECUTE);
 309                         break;
 310                 default:
 311                         break;
 312                 }
 313                 ++i;
 314         }
 315         return filter;
 316 }
 317
 318 DEFUN_NOSH (show_thread_cpu,
 319             show_thread_cpu_cmd,
 320             "show thread cpu [FILTER]",
 321             SHOW_STR
 322             "Thread information\n"
 323             "Thread CPU usage\n"
 324             "Display filter (rwtex)\n")
 325 {
 326         uint8_t filter = (uint8_t)-1U;
 327         int idx = 0;
 328
 329         if (argv_find(argv, argc, "FILTER", &idx)) {
 330                 filter = parse_filter(argv[idx]->arg);
 331                 if (!filter) {
 332                         vty_out(vty,
 333                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 334                                 argv[idx]->arg);
 335                         return CMD_WARNING;
 336                 }
 337         }
 338
 339         cpu_record_print(vty, filter);
 340         return CMD_SUCCESS;
 341 }
 342
 343 DEFPY (service_cputime_stats,
 344        service_cputime_stats_cmd,
 345        "[no] service cputime-stats",
 346        NO_STR
 347        "Set up miscellaneous service\n"
 348        "Collect CPU usage statistics\n")
 349 {
 350         cputime_enabled = !no;
 351         return CMD_SUCCESS;
 352 }
 353
 354 DEFPY (service_cputime_warning,
 355        service_cputime_warning_cmd,
 356        "[no] service cputime-warning (1-4294967295)",
 357        NO_STR
 358        "Set up miscellaneous service\n"
 359        "Warn for tasks exceeding CPU usage threshold\n"
 360        "Warning threshold in milliseconds\n")
 361 {
 362         if (no)
 363                 cputime_threshold = 0;
 364         else
 365                 cputime_threshold = cputime_warning * 1000;
 366         return CMD_SUCCESS;
 367 }
 368
 369 ALIAS (service_cputime_warning,
 370        no_service_cputime_warning_cmd,
 371        "no service cputime-warning",
 372        NO_STR
 373        "Set up miscellaneous service\n"
 374        "Warn for tasks exceeding CPU usage threshold\n")
 375
 376 DEFPY (service_walltime_warning,
 377        service_walltime_warning_cmd,
 378        "[no] service walltime-warning (1-4294967295)",
 379        NO_STR
 380        "Set up miscellaneous service\n"
 381        "Warn for tasks exceeding total wallclock threshold\n"
 382        "Warning threshold in milliseconds\n")
 383 {
 384         if (no)
 385                 walltime_threshold = 0;
 386         else
 387                 walltime_threshold = walltime_warning * 1000;
 388         return CMD_SUCCESS;
 389 }
 390
 391 ALIAS (service_walltime_warning,
 392        no_service_walltime_warning_cmd,
 393        "no service walltime-warning",
 394        NO_STR
 395        "Set up miscellaneous service\n"
 396        "Warn for tasks exceeding total wallclock threshold\n")
 397
 398 static void show_thread_poll_helper(struct vty *vty, struct event_master *m)
 399 {
 400         const char *name = m->name ? m->name : "main";
 401         char underline[strlen(name) + 1];
 402         struct event *thread;
 403         uint32_t i;
 404
 405         memset(underline, '-', sizeof(underline));
 406         underline[sizeof(underline) - 1] = '\0';
 407
 408         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 409         vty_out(vty, "----------------------%s\n", underline);
 410         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 411                 m->fd_limit);
 412         for (i = 0; i < m->handler.pfdcount; i++) {
 413                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 414                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 415                         m->handler.pfds[i].revents);
 416
 417                 if (m->handler.pfds[i].events & POLLIN) {
 418                         thread = m->read[m->handler.pfds[i].fd];
 419
 420                         if (!thread)
 421                                 vty_out(vty, "ERROR ");
 422                         else
 423                                 vty_out(vty, "%s ", thread->xref->funcname);
 424                 } else
 425                         vty_out(vty, " ");
 426
 427                 if (m->handler.pfds[i].events & POLLOUT) {
 428                         thread = m->write[m->handler.pfds[i].fd];
 429
 430                         if (!thread)
 431                                 vty_out(vty, "ERROR\n");
 432                         else
 433                                 vty_out(vty, "%s\n", thread->xref->funcname);
 434                 } else
 435                         vty_out(vty, "\n");
 436         }
 437 }
 438
 439 DEFUN_NOSH (show_thread_poll,
 440             show_thread_poll_cmd,
 441             "show thread poll",
 442             SHOW_STR
 443             "Thread information\n"
 444             "Show poll FD's and information\n")
 445 {
 446         struct listnode *node;
 447         struct event_master *m;
 448
 449         frr_with_mutex (&masters_mtx) {
 450                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 451                         show_thread_poll_helper(vty, m);
 452                 }
 453         }
 454
 455         return CMD_SUCCESS;
 456 }
 457
 458
 459 DEFUN (clear_thread_cpu,
 460        clear_thread_cpu_cmd,
 461        "clear thread cpu [FILTER]",
 462        "Clear stored data in all pthreads\n"
 463        "Thread information\n"
 464        "Thread CPU usage\n"
 465        "Display filter (rwtexb)\n")
 466 {
 467         uint8_t filter = (uint8_t)-1U;
 468         int idx = 0;
 469
 470         if (argv_find(argv, argc, "FILTER", &idx)) {
 471                 filter = parse_filter(argv[idx]->arg);
 472                 if (!filter) {
 473                         vty_out(vty,
 474                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 475                                 argv[idx]->arg);
 476                         return CMD_WARNING;
 477                 }
 478         }
 479
 480         cpu_record_clear(filter);
 481         return CMD_SUCCESS;
 482 }
 483
 484 static void show_thread_timers_helper(struct vty *vty, struct event_master *m)
 485 {
 486         const char *name = m->name ? m->name : "main";
 487         char underline[strlen(name) + 1];
 488         struct event *thread;
 489
 490         memset(underline, '-', sizeof(underline));
 491         underline[sizeof(underline) - 1] = '\0';
 492
 493         vty_out(vty, "\nShowing timers for %s\n", name);
 494         vty_out(vty, "-------------------%s\n", underline);
 495
 496         frr_each (event_timer_list, &m->timer, thread) {
 497                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 498         }
 499 }
 500
 501 DEFPY_NOSH (show_thread_timers,
 502             show_thread_timers_cmd,
 503             "show thread timers",
 504             SHOW_STR
 505             "Thread information\n"
 506             "Show all timers and how long they have in the system\n")
 507 {
 508         struct listnode *node;
 509         struct event_master *m;
 510
 511         frr_with_mutex (&masters_mtx) {
 512                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 513                         show_thread_timers_helper(vty, m);
 514         }
 515
 516         return CMD_SUCCESS;
 517 }
 518
 519 void event_cmd_init(void)
 520 {
 521         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 522         install_element(VIEW_NODE, &show_thread_poll_cmd);
 523         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 524
 525         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 526         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 527         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 528         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 529         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 530
 531         install_element(VIEW_NODE, &show_thread_timers_cmd);
 532 }
 533 /* CLI end ------------------------------------------------------------------ */
 534
 535
 536 static void cancelreq_del(void *cr)
 537 {
 538         XFREE(MTYPE_TMP, cr);
 539 }
 540
 541 /* initializer, only ever called once */
 542 static void initializer(void)
 543 {
 544         pthread_key_create(&thread_current, NULL);
 545 }
 546
 547 struct event_master *thread_master_create(const char *name)
 548 {
 549         struct event_master *rv;
 550         struct rlimit limit;
 551
 552         pthread_once(&init_once, &initializer);
 553
 554         rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_master));
 555
 556         /* Initialize master mutex */
 557         pthread_mutex_init(&rv->mtx, NULL);
 558         pthread_cond_init(&rv->cancel_cond, NULL);
 559
 560         /* Set name */
 561         name = name ? name : "default";
 562         rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 563
 564         /* Initialize I/O task data structures */
 565
 566         /* Use configured limit if present, ulimit otherwise. */
 567         rv->fd_limit = frr_get_fd_limit();
 568         if (rv->fd_limit == 0) {
 569                 getrlimit(RLIMIT_NOFILE, &limit);
 570                 rv->fd_limit = (int)limit.rlim_cur;
 571         }
 572
 573         rv->read = XCALLOC(MTYPE_EVENT_POLL,
 574                            sizeof(struct event *) * rv->fd_limit);
 575
 576         rv->write = XCALLOC(MTYPE_EVENT_POLL,
 577                             sizeof(struct event *) * rv->fd_limit);
 578
 579         char tmhashname[strlen(name) + 32];
 580         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 581                  name);
 582         rv->cpu_record = hash_create_size(
 583                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 584                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 585                 tmhashname);
 586
 587         event_list_init(&rv->event);
 588         event_list_init(&rv->ready);
 589         event_list_init(&rv->unuse);
 590         event_timer_list_init(&rv->timer);
 591
 592         /* Initialize event_fetch() settings */
 593         rv->spin = true;
 594         rv->handle_signals = true;
 595
 596         /* Set pthread owner, should be updated by actual owner */
 597         rv->owner = pthread_self();
 598         rv->cancel_req = list_new();
 599         rv->cancel_req->del = cancelreq_del;
 600         rv->canceled = true;
 601
 602         /* Initialize pipe poker */
 603         pipe(rv->io_pipe);
 604         set_nonblocking(rv->io_pipe[0]);
 605         set_nonblocking(rv->io_pipe[1]);
 606
 607         /* Initialize data structures for poll() */
 608         rv->handler.pfdsize = rv->fd_limit;
 609         rv->handler.pfdcount = 0;
 610         rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
 611                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 612         rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
 613                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 614
 615         /* add to list of threadmasters */
 616         frr_with_mutex (&masters_mtx) {
 617                 if (!masters)
 618                         masters = list_new();
 619
 620                 listnode_add(masters, rv);
 621         }
 622
 623         return rv;
 624 }
 625
 626 void thread_master_set_name(struct event_master *master, const char *name)
 627 {
 628         frr_with_mutex (&master->mtx) {
 629                 XFREE(MTYPE_EVENT_MASTER, master->name);
 630                 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 631         }
 632 }
 633
 634 #define EVENT_UNUSED_DEPTH 10
 635
 636 /* Move thread to unuse list. */
 637 static void thread_add_unuse(struct event_master *m, struct event *thread)
 638 {
 639         pthread_mutex_t mtxc = thread->mtx;
 640
 641         assert(m != NULL && thread != NULL);
 642
 643         thread->hist->total_active--;
 644         memset(thread, 0, sizeof(struct event));
 645         thread->type = EVENT_UNUSED;
 646
 647         /* Restore the thread mutex context. */
 648         thread->mtx = mtxc;
 649
 650         if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
 651                 event_list_add_tail(&m->unuse, thread);
 652                 return;
 653         }
 654
 655         thread_free(m, thread);
 656 }
 657
 658 /* Free all unused thread. */
 659 static void thread_list_free(struct event_master *m,
 660                              struct event_list_head *list)
 661 {
 662         struct event *t;
 663
 664         while ((t = event_list_pop(list)))
 665                 thread_free(m, t);
 666 }
 667
 668 static void thread_array_free(struct event_master *m,
 669                               struct event **thread_array)
 670 {
 671         struct event *t;
 672         int index;
 673
 674         for (index = 0; index < m->fd_limit; ++index) {
 675                 t = thread_array[index];
 676                 if (t) {
 677                         thread_array[index] = NULL;
 678                         thread_free(m, t);
 679                 }
 680         }
 681         XFREE(MTYPE_EVENT_POLL, thread_array);
 682 }
 683
 684 /*
 685  * thread_master_free_unused
 686  *
 687  * As threads are finished with they are put on the
 688  * unuse list for later reuse.
 689  * If we are shutting down, Free up unused threads
 690  * So we can see if we forget to shut anything off
 691  */
 692 void thread_master_free_unused(struct event_master *m)
 693 {
 694         frr_with_mutex (&m->mtx) {
 695                 struct event *t;
 696                 while ((t = event_list_pop(&m->unuse)))
 697                         thread_free(m, t);
 698         }
 699 }
 700
 701 /* Stop thread scheduler. */
 702 void thread_master_free(struct event_master *m)
 703 {
 704         struct event *t;
 705
 706         frr_with_mutex (&masters_mtx) {
 707                 listnode_delete(masters, m);
 708                 if (masters->count == 0) {
 709                         list_delete(&masters);
 710                 }
 711         }
 712
 713         thread_array_free(m, m->read);
 714         thread_array_free(m, m->write);
 715         while ((t = event_timer_list_pop(&m->timer)))
 716                 thread_free(m, t);
 717         thread_list_free(m, &m->event);
 718         thread_list_free(m, &m->ready);
 719         thread_list_free(m, &m->unuse);
 720         pthread_mutex_destroy(&m->mtx);
 721         pthread_cond_destroy(&m->cancel_cond);
 722         close(m->io_pipe[0]);
 723         close(m->io_pipe[1]);
 724         list_delete(&m->cancel_req);
 725         m->cancel_req = NULL;
 726
 727         hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
 728
 729         XFREE(MTYPE_EVENT_MASTER, m->name);
 730         XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
 731         XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
 732         XFREE(MTYPE_EVENT_MASTER, m);
 733 }
 734
 735 /* Return remain time in milliseconds. */
 736 unsigned long event_timer_remain_msec(struct event *thread)
 737 {
 738         int64_t remain;
 739
 740         if (!event_is_scheduled(thread))
 741                 return 0;
 742
 743         frr_with_mutex (&thread->mtx) {
 744                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 745         }
 746
 747         return remain < 0 ? 0 : remain;
 748 }
 749
 750 /* Return remain time in seconds. */
 751 unsigned long event_timer_remain_second(struct event *thread)
 752 {
 753         return event_timer_remain_msec(thread) / 1000LL;
 754 }
 755
 756 struct timeval event_timer_remain(struct event *thread)
 757 {
 758         struct timeval remain;
 759         frr_with_mutex (&thread->mtx) {
 760                 monotime_until(&thread->u.sands, &remain);
 761         }
 762         return remain;
 763 }
 764
 765 static int time_hhmmss(char *buf, int buf_size, long sec)
 766 {
 767         long hh;
 768         long mm;
 769         int wr;
 770
 771         assert(buf_size >= 8);
 772
 773         hh = sec / 3600;
 774         sec %= 3600;
 775         mm = sec / 60;
 776         sec %= 60;
 777
 778         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 779
 780         return wr != 8;
 781 }
 782
 783 char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
 784 {
 785         if (t_timer) {
 786                 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
 787         } else {
 788                 snprintf(buf, buf_size, "--:--:--");
 789         }
 790         return buf;
 791 }
 792
 793 /* Get new thread.  */
 794 static struct event *thread_get(struct event_master *m, uint8_t type,
 795                                 void (*func)(struct event *), void *arg,
 796                                 const struct xref_eventsched *xref)
 797 {
 798         struct event *thread = event_list_pop(&m->unuse);
 799         struct cpu_thread_history tmp;
 800
 801         if (!thread) {
 802                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
 803                 /* mutex only needs to be initialized at struct creation. */
 804                 pthread_mutex_init(&thread->mtx, NULL);
 805                 m->alloc++;
 806         }
 807
 808         thread->type = type;
 809         thread->add_type = type;
 810         thread->master = m;
 811         thread->arg = arg;
 812         thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
 813         thread->ref = NULL;
 814         thread->ignore_timer_late = false;
 815
 816         /*
 817          * So if the passed in funcname is not what we have
 818          * stored that means the thread->hist needs to be
 819          * updated.  We keep the last one around in unused
 820          * under the assumption that we are probably
 821          * going to immediately allocate the same
 822          * type of thread.
 823          * This hopefully saves us some serious
 824          * hash_get lookups.
 825          */
 826         if ((thread->xref && thread->xref->funcname != xref->funcname)
 827             || thread->func != func) {
 828                 tmp.func = func;
 829                 tmp.funcname = xref->funcname;
 830                 thread->hist =
 831                         hash_get(m->cpu_record, &tmp,
 832                                  (void *(*)(void *))cpu_record_hash_alloc);
 833         }
 834         thread->hist->total_active++;
 835         thread->func = func;
 836         thread->xref = xref;
 837
 838         return thread;
 839 }
 840
 841 static void thread_free(struct event_master *master, struct event *thread)
 842 {
 843         /* Update statistics. */
 844         assert(master->alloc > 0);
 845         master->alloc--;
 846
 847         /* Free allocated resources. */
 848         pthread_mutex_destroy(&thread->mtx);
 849         XFREE(MTYPE_THREAD, thread);
 850 }
 851
 852 static int fd_poll(struct event_master *m, const struct timeval *timer_wait,
 853                    bool *eintr_p)
 854 {
 855         sigset_t origsigs;
 856         unsigned char trash[64];
 857         nfds_t count = m->handler.copycount;
 858
 859         /*
 860          * If timer_wait is null here, that means poll() should block
 861          * indefinitely, unless the thread_master has overridden it by setting
 862          * ->selectpoll_timeout.
 863          *
 864          * If the value is positive, it specifies the maximum number of
 865          * milliseconds to wait. If the timeout is -1, it specifies that
 866          * we should never wait and always return immediately even if no
 867          * event is detected. If the value is zero, the behavior is default.
 868          */
 869         int timeout = -1;
 870
 871         /* number of file descriptors with events */
 872         int num;
 873
 874         if (timer_wait != NULL
 875             && m->selectpoll_timeout == 0) // use the default value
 876                 timeout = (timer_wait->tv_sec * 1000)
 877                           + (timer_wait->tv_usec / 1000);
 878         else if (m->selectpoll_timeout > 0) // use the user's timeout
 879                 timeout = m->selectpoll_timeout;
 880         else if (m->selectpoll_timeout
 881                  < 0) // effect a poll (return immediately)
 882                 timeout = 0;
 883
 884         zlog_tls_buffer_flush();
 885         rcu_read_unlock();
 886         rcu_assert_read_unlocked();
 887
 888         /* add poll pipe poker */
 889         assert(count + 1 < m->handler.pfdsize);
 890         m->handler.copy[count].fd = m->io_pipe[0];
 891         m->handler.copy[count].events = POLLIN;
 892         m->handler.copy[count].revents = 0x00;
 893
 894         /* We need to deal with a signal-handling race here: we
 895          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 896          * that may arrive just before we enter poll(). We will block the
 897          * key signals, then check whether any have arrived - if so, we return
 898          * before calling poll(). If not, we'll re-enable the signals
 899          * in the ppoll() call.
 900          */
 901
 902         sigemptyset(&origsigs);
 903         if (m->handle_signals) {
 904                 /* Main pthread that handles the app signals */
 905                 if (frr_sigevent_check(&origsigs)) {
 906                         /* Signal to process - restore signal mask and return */
 907                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 908                         num = -1;
 909                         *eintr_p = true;
 910                         goto done;
 911                 }
 912         } else {
 913                 /* Don't make any changes for the non-main pthreads */
 914                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 915         }
 916
 917 #if defined(HAVE_PPOLL)
 918         struct timespec ts, *tsp;
 919
 920         if (timeout >= 0) {
 921                 ts.tv_sec = timeout / 1000;
 922                 ts.tv_nsec = (timeout % 1000) * 1000000;
 923                 tsp = &ts;
 924         } else
 925                 tsp = NULL;
 926
 927         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 928         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 929 #else
 930         /* Not ideal - there is a race after we restore the signal mask */
 931         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 932         num = poll(m->handler.copy, count + 1, timeout);
 933 #endif
 934
 935 done:
 936
 937         if (num < 0 && errno == EINTR)
 938                 *eintr_p = true;
 939
 940         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 941                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 942                         ;
 943
 944         rcu_read_lock();
 945
 946         return num;
 947 }
 948
 949 /* Add new read thread. */
 950 void _event_add_read_write(const struct xref_eventsched *xref,
 951                            struct event_master *m, void (*func)(struct event *),
 952                            void *arg, int fd, struct event **t_ptr)
 953 {
 954         int dir = xref->event_type;
 955         struct event *thread = NULL;
 956         struct event **thread_array;
 957
 958         if (dir == EVENT_READ)
 959                 frrtrace(9, frr_libfrr, schedule_read, m,
 960                          xref->funcname, xref->xref.file, xref->xref.line,
 961                          t_ptr, fd, 0, arg, 0);
 962         else
 963                 frrtrace(9, frr_libfrr, schedule_write, m,
 964                          xref->funcname, xref->xref.file, xref->xref.line,
 965                          t_ptr, fd, 0, arg, 0);
 966
 967         assert(fd >= 0);
 968         if (fd >= m->fd_limit)
 969                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 970
 971         frr_with_mutex (&m->mtx) {
 972                 if (t_ptr && *t_ptr)
 973                         // thread is already scheduled; don't reschedule
 974                         break;
 975
 976                 /* default to a new pollfd */
 977                 nfds_t queuepos = m->handler.pfdcount;
 978
 979                 if (dir == EVENT_READ)
 980                         thread_array = m->read;
 981                 else
 982                         thread_array = m->write;
 983
 984                 /* if we already have a pollfd for our file descriptor, find and
 985                  * use it */
 986                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 987                         if (m->handler.pfds[i].fd == fd) {
 988                                 queuepos = i;
 989
 990 #ifdef DEV_BUILD
 991                                 /*
 992                                  * What happens if we have a thread already
 993                                  * created for this event?
 994                                  */
 995                                 if (thread_array[fd])
 996                                         assert(!"Thread already scheduled for file descriptor");
 997 #endif
 998                                 break;
 999                         }
1000
1001                 /* make sure we have room for this fd + pipe poker fd */
1002                 assert(queuepos + 1 < m->handler.pfdsize);
1003
1004                 thread = thread_get(m, dir, func, arg, xref);
1005
1006                 m->handler.pfds[queuepos].fd = fd;
1007                 m->handler.pfds[queuepos].events |=
1008                         (dir == EVENT_READ ? POLLIN : POLLOUT);
1009
1010                 if (queuepos == m->handler.pfdcount)
1011                         m->handler.pfdcount++;
1012
1013                 if (thread) {
1014                         frr_with_mutex (&thread->mtx) {
1015                                 thread->u.fd = fd;
1016                                 thread_array[thread->u.fd] = thread;
1017                         }
1018
1019                         if (t_ptr) {
1020                                 *t_ptr = thread;
1021                                 thread->ref = t_ptr;
1022                         }
1023                 }
1024
1025                 AWAKEN(m);
1026         }
1027 }
1028
1029 static void _event_add_timer_timeval(const struct xref_eventsched *xref,
1030                                      struct event_master *m,
1031                                      void (*func)(struct event *), void *arg,
1032                                      struct timeval *time_relative,
1033                                      struct event **t_ptr)
1034 {
1035         struct event *thread;
1036         struct timeval t;
1037
1038         assert(m != NULL);
1039
1040         assert(time_relative);
1041
1042         frrtrace(9, frr_libfrr, schedule_timer, m,
1043                  xref->funcname, xref->xref.file, xref->xref.line,
1044                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1045
1046         /* Compute expiration/deadline time. */
1047         monotime(&t);
1048         timeradd(&t, time_relative, &t);
1049
1050         frr_with_mutex (&m->mtx) {
1051                 if (t_ptr && *t_ptr)
1052                         /* thread is already scheduled; don't reschedule */
1053                         return;
1054
1055                 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
1056
1057                 frr_with_mutex (&thread->mtx) {
1058                         thread->u.sands = t;
1059                         event_timer_list_add(&m->timer, thread);
1060                         if (t_ptr) {
1061                                 *t_ptr = thread;
1062                                 thread->ref = t_ptr;
1063                         }
1064                 }
1065
1066                 /* The timer list is sorted - if this new timer
1067                  * might change the time we'll wait for, give the pthread
1068                  * a chance to re-compute.
1069                  */
1070                 if (event_timer_list_first(&m->timer) == thread)
1071                         AWAKEN(m);
1072         }
1073 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1074         if (time_relative->tv_sec > ONEYEAR2SEC)
1075                 flog_err(
1076                         EC_LIB_TIMER_TOO_LONG,
1077                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1078                         thread);
1079 }
1080
1081
1082 /* Add timer event thread. */
1083 void _event_add_timer(const struct xref_eventsched *xref,
1084                       struct event_master *m, void (*func)(struct event *),
1085                       void *arg, long timer, struct event **t_ptr)
1086 {
1087         struct timeval trel;
1088
1089         assert(m != NULL);
1090
1091         trel.tv_sec = timer;
1092         trel.tv_usec = 0;
1093
1094         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1095 }
1096
1097 /* Add timer event thread with "millisecond" resolution */
1098 void _event_add_timer_msec(const struct xref_eventsched *xref,
1099                            struct event_master *m, void (*func)(struct event *),
1100                            void *arg, long timer, struct event **t_ptr)
1101 {
1102         struct timeval trel;
1103
1104         assert(m != NULL);
1105
1106         trel.tv_sec = timer / 1000;
1107         trel.tv_usec = 1000 * (timer % 1000);
1108
1109         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1110 }
1111
1112 /* Add timer event thread with "timeval" resolution */
1113 void _event_add_timer_tv(const struct xref_eventsched *xref,
1114                          struct event_master *m, void (*func)(struct event *),
1115                          void *arg, struct timeval *tv, struct event **t_ptr)
1116 {
1117         _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1118 }
1119
1120 /* Add simple event thread. */
1121 void _event_add_event(const struct xref_eventsched *xref,
1122                       struct event_master *m, void (*func)(struct event *),
1123                       void *arg, int val, struct event **t_ptr)
1124 {
1125         struct event *thread = NULL;
1126
1127         frrtrace(9, frr_libfrr, schedule_event, m,
1128                  xref->funcname, xref->xref.file, xref->xref.line,
1129                  t_ptr, 0, val, arg, 0);
1130
1131         assert(m != NULL);
1132
1133         frr_with_mutex (&m->mtx) {
1134                 if (t_ptr && *t_ptr)
1135                         /* thread is already scheduled; don't reschedule */
1136                         break;
1137
1138                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
1139                 frr_with_mutex (&thread->mtx) {
1140                         thread->u.val = val;
1141                         event_list_add_tail(&m->event, thread);
1142                 }
1143
1144                 if (t_ptr) {
1145                         *t_ptr = thread;
1146                         thread->ref = t_ptr;
1147                 }
1148
1149                 AWAKEN(m);
1150         }
1151 }
1152
1153 /* Thread cancellation ------------------------------------------------------ */
1154
1155 /**
1156  * NOT's out the .events field of pollfd corresponding to the given file
1157  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1158  *
1159  * This needs to happen for both copies of pollfd's. See 'event_fetch'
1160  * implementation for details.
1161  *
1162  * @param master
1163  * @param fd
1164  * @param state the event to cancel. One or more (OR'd together) of the
1165  * following:
1166  *   - POLLIN
1167  *   - POLLOUT
1168  */
1169 static void event_cancel_rw(struct event_master *master, int fd, short state,
1170                             int idx_hint)
1171 {
1172         bool found = false;
1173
1174         /* find the index of corresponding pollfd */
1175         nfds_t i;
1176
1177         /* Cancel POLLHUP too just in case some bozo set it */
1178         state |= POLLHUP;
1179
1180         /* Some callers know the index of the pfd already */
1181         if (idx_hint >= 0) {
1182                 i = idx_hint;
1183                 found = true;
1184         } else {
1185                 /* Have to look for the fd in the pfd array */
1186                 for (i = 0; i < master->handler.pfdcount; i++)
1187                         if (master->handler.pfds[i].fd == fd) {
1188                                 found = true;
1189                                 break;
1190                         }
1191         }
1192
1193         if (!found) {
1194                 zlog_debug(
1195                         "[!] Received cancellation request for nonexistent rw job");
1196                 zlog_debug("[!] threadmaster: %s | fd: %d",
1197                            master->name ? master->name : "", fd);
1198                 return;
1199         }
1200
1201         /* NOT out event. */
1202         master->handler.pfds[i].events &= ~(state);
1203
1204         /* If all events are canceled, delete / resize the pollfd array. */
1205         if (master->handler.pfds[i].events == 0) {
1206                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1207                         (master->handler.pfdcount - i - 1)
1208                                 * sizeof(struct pollfd));
1209                 master->handler.pfdcount--;
1210                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1211                 master->handler.pfds[master->handler.pfdcount].events = 0;
1212         }
1213
1214         /* If we have the same pollfd in the copy, perform the same operations,
1215          * otherwise return. */
1216         if (i >= master->handler.copycount)
1217                 return;
1218
1219         master->handler.copy[i].events &= ~(state);
1220
1221         if (master->handler.copy[i].events == 0) {
1222                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1223                         (master->handler.copycount - i - 1)
1224                                 * sizeof(struct pollfd));
1225                 master->handler.copycount--;
1226                 master->handler.copy[master->handler.copycount].fd = 0;
1227                 master->handler.copy[master->handler.copycount].events = 0;
1228         }
1229 }
1230
1231 /*
1232  * Process task cancellation given a task argument: iterate through the
1233  * various lists of tasks, looking for any that match the argument.
1234  */
1235 static void cancel_arg_helper(struct event_master *master,
1236                               const struct cancel_req *cr)
1237 {
1238         struct event *t;
1239         nfds_t i;
1240         int fd;
1241         struct pollfd *pfd;
1242
1243         /* We're only processing arg-based cancellations here. */
1244         if (cr->eventobj == NULL)
1245                 return;
1246
1247         /* First process the ready lists. */
1248         frr_each_safe (event_list, &master->event, t) {
1249                 if (t->arg != cr->eventobj)
1250                         continue;
1251                 event_list_del(&master->event, t);
1252                 if (t->ref)
1253                         *t->ref = NULL;
1254                 thread_add_unuse(master, t);
1255         }
1256
1257         frr_each_safe (event_list, &master->ready, t) {
1258                 if (t->arg != cr->eventobj)
1259                         continue;
1260                 event_list_del(&master->ready, t);
1261                 if (t->ref)
1262                         *t->ref = NULL;
1263                 thread_add_unuse(master, t);
1264         }
1265
1266         /* If requested, stop here and ignore io and timers */
1267         if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
1268                 return;
1269
1270         /* Check the io tasks */
1271         for (i = 0; i < master->handler.pfdcount;) {
1272                 pfd = master->handler.pfds + i;
1273
1274                 if (pfd->events & POLLIN)
1275                         t = master->read[pfd->fd];
1276                 else
1277                         t = master->write[pfd->fd];
1278
1279                 if (t && t->arg == cr->eventobj) {
1280                         fd = pfd->fd;
1281
1282                         /* Found a match to cancel: clean up fd arrays */
1283                         event_cancel_rw(master, pfd->fd, pfd->events, i);
1284
1285                         /* Clean up thread arrays */
1286                         master->read[fd] = NULL;
1287                         master->write[fd] = NULL;
1288
1289                         /* Clear caller's ref */
1290                         if (t->ref)
1291                                 *t->ref = NULL;
1292
1293                         thread_add_unuse(master, t);
1294
1295                         /* Don't increment 'i' since the cancellation will have
1296                          * removed the entry from the pfd array
1297                          */
1298                 } else
1299                         i++;
1300         }
1301
1302         /* Check the timer tasks */
1303         t = event_timer_list_first(&master->timer);
1304         while (t) {
1305                 struct event *t_next;
1306
1307                 t_next = event_timer_list_next(&master->timer, t);
1308
1309                 if (t->arg == cr->eventobj) {
1310                         event_timer_list_del(&master->timer, t);
1311                         if (t->ref)
1312                                 *t->ref = NULL;
1313                         thread_add_unuse(master, t);
1314                 }
1315
1316                 t = t_next;
1317         }
1318 }
1319
1320 /**
1321  * Process cancellation requests.
1322  *
1323  * This may only be run from the pthread which owns the thread_master.
1324  *
1325  * @param master the thread master to process
1326  * @REQUIRE master->mtx
1327  */
1328 static void do_event_cancel(struct event_master *master)
1329 {
1330         struct event_list_head *list = NULL;
1331         struct event **thread_array = NULL;
1332         struct event *thread;
1333         struct cancel_req *cr;
1334         struct listnode *ln;
1335
1336         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1337                 /*
1338                  * If this is an event object cancellation, search
1339                  * through task lists deleting any tasks which have the
1340                  * specified argument - use this handy helper function.
1341                  */
1342                 if (cr->eventobj) {
1343                         cancel_arg_helper(master, cr);
1344                         continue;
1345                 }
1346
1347                 /*
1348                  * The pointer varies depending on whether the cancellation
1349                  * request was made asynchronously or not. If it was, we
1350                  * need to check whether the thread even exists anymore
1351                  * before cancelling it.
1352                  */
1353                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1354
1355                 if (!thread)
1356                         continue;
1357
1358                 list = NULL;
1359                 thread_array = NULL;
1360
1361                 /* Determine the appropriate queue to cancel the thread from */
1362                 switch (thread->type) {
1363                 case EVENT_READ:
1364                         event_cancel_rw(master, thread->u.fd, POLLIN, -1);
1365                         thread_array = master->read;
1366                         break;
1367                 case EVENT_WRITE:
1368                         event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1369                         thread_array = master->write;
1370                         break;
1371                 case EVENT_TIMER:
1372                         event_timer_list_del(&master->timer, thread);
1373                         break;
1374                 case EVENT_EVENT:
1375                         list = &master->event;
1376                         break;
1377                 case EVENT_READY:
1378                         list = &master->ready;
1379                         break;
1380                 case EVENT_UNUSED:
1381                 case EVENT_EXECUTE:
1382                         continue;
1383                         break;
1384                 }
1385
1386                 if (list) {
1387                         event_list_del(list, thread);
1388                 } else if (thread_array) {
1389                         thread_array[thread->u.fd] = NULL;
1390                 }
1391
1392                 if (thread->ref)
1393                         *thread->ref = NULL;
1394
1395                 thread_add_unuse(thread->master, thread);
1396         }
1397
1398         /* Delete and free all cancellation requests */
1399         if (master->cancel_req)
1400                 list_delete_all_node(master->cancel_req);
1401
1402         /* Wake up any threads which may be blocked in event_cancel_async() */
1403         master->canceled = true;
1404         pthread_cond_broadcast(&master->cancel_cond);
1405 }
1406
1407 /*
1408  * Helper function used for multiple flavors of arg-based cancellation.
1409  */
1410 static void cancel_event_helper(struct event_master *m, void *arg, int flags)
1411 {
1412         struct cancel_req *cr;
1413
1414         assert(m->owner == pthread_self());
1415
1416         /* Only worth anything if caller supplies an arg. */
1417         if (arg == NULL)
1418                 return;
1419
1420         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1421
1422         cr->flags = flags;
1423
1424         frr_with_mutex (&m->mtx) {
1425                 cr->eventobj = arg;
1426                 listnode_add(m->cancel_req, cr);
1427                 do_event_cancel(m);
1428         }
1429 }
1430
1431 /**
1432  * Cancel any events which have the specified argument.
1433  *
1434  * MT-Unsafe
1435  *
1436  * @param m the thread_master to cancel from
1437  * @param arg the argument passed when creating the event
1438  */
1439 void event_cancel_event(struct event_master *master, void *arg)
1440 {
1441         cancel_event_helper(master, arg, 0);
1442 }
1443
1444 /*
1445  * Cancel ready tasks with an arg matching 'arg'
1446  *
1447  * MT-Unsafe
1448  *
1449  * @param m the thread_master to cancel from
1450  * @param arg the argument passed when creating the event
1451  */
1452 void event_cancel_event_ready(struct event_master *m, void *arg)
1453 {
1454
1455         /* Only cancel ready/event tasks */
1456         cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
1457 }
1458
1459 /**
1460  * Cancel a specific task.
1461  *
1462  * MT-Unsafe
1463  *
1464  * @param thread task to cancel
1465  */
1466 void event_cancel(struct event **thread)
1467 {
1468         struct event_master *master;
1469
1470         if (thread == NULL || *thread == NULL)
1471                 return;
1472
1473         master = (*thread)->master;
1474
1475         frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1476                  (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1477                  (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1478                  (*thread)->u.sands.tv_sec);
1479
1480         assert(master->owner == pthread_self());
1481
1482         frr_with_mutex (&master->mtx) {
1483                 struct cancel_req *cr =
1484                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1485                 cr->thread = *thread;
1486                 listnode_add(master->cancel_req, cr);
1487                 do_event_cancel(master);
1488         }
1489
1490         *thread = NULL;
1491 }
1492
1493 /**
1494  * Asynchronous cancellation.
1495  *
1496  * Called with either a struct event ** or void * to an event argument,
1497  * this function posts the correct cancellation request and blocks until it is
1498  * serviced.
1499  *
1500  * If the thread is currently running, execution blocks until it completes.
1501  *
1502  * The last two parameters are mutually exclusive, i.e. if you pass one the
1503  * other must be NULL.
1504  *
1505  * When the cancellation procedure executes on the target thread_master, the
1506  * thread * provided is checked for nullity. If it is null, the thread is
1507  * assumed to no longer exist and the cancellation request is a no-op. Thus
1508  * users of this API must pass a back-reference when scheduling the original
1509  * task.
1510  *
1511  * MT-Safe
1512  *
1513  * @param master the thread master with the relevant event / task
1514  * @param thread pointer to thread to cancel
1515  * @param eventobj the event
1516  */
1517 void event_cancel_async(struct event_master *master, struct event **thread,
1518                         void *eventobj)
1519 {
1520         assert(!(thread && eventobj) && (thread || eventobj));
1521
1522         if (thread && *thread)
1523                 frrtrace(9, frr_libfrr, event_cancel_async, master,
1524                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1525                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1526                          (*thread)->u.val, (*thread)->arg,
1527                          (*thread)->u.sands.tv_sec);
1528         else
1529                 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
1530                          0, NULL, 0, 0, eventobj, 0);
1531
1532         assert(master->owner != pthread_self());
1533
1534         frr_with_mutex (&master->mtx) {
1535                 master->canceled = false;
1536
1537                 if (thread) {
1538                         struct cancel_req *cr =
1539                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1540                         cr->threadref = thread;
1541                         listnode_add(master->cancel_req, cr);
1542                 } else if (eventobj) {
1543                         struct cancel_req *cr =
1544                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1545                         cr->eventobj = eventobj;
1546                         listnode_add(master->cancel_req, cr);
1547                 }
1548                 AWAKEN(master);
1549
1550                 while (!master->canceled)
1551                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1552         }
1553
1554         if (thread)
1555                 *thread = NULL;
1556 }
1557 /* ------------------------------------------------------------------------- */
1558
1559 static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
1560                                          struct timeval *timer_val)
1561 {
1562         if (!event_timer_list_count(timers))
1563                 return NULL;
1564
1565         struct event *next_timer = event_timer_list_first(timers);
1566         monotime_until(&next_timer->u.sands, timer_val);
1567         return timer_val;
1568 }
1569
1570 static struct event *thread_run(struct event_master *m, struct event *thread,
1571                                 struct event *fetch)
1572 {
1573         *fetch = *thread;
1574         thread_add_unuse(m, thread);
1575         return fetch;
1576 }
1577
1578 static int thread_process_io_helper(struct event_master *m,
1579                                     struct event *thread, short state,
1580                                     short actual_state, int pos)
1581 {
1582         struct event **thread_array;
1583
1584         /*
1585          * poll() clears the .events field, but the pollfd array we
1586          * pass to poll() is a copy of the one used to schedule threads.
1587          * We need to synchronize state between the two here by applying
1588          * the same changes poll() made on the copy of the "real" pollfd
1589          * array.
1590          *
1591          * This cleans up a possible infinite loop where we refuse
1592          * to respond to a poll event but poll is insistent that
1593          * we should.
1594          */
1595         m->handler.pfds[pos].events &= ~(state);
1596
1597         if (!thread) {
1598                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1599                         flog_err(EC_LIB_NO_THREAD,
1600                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1601                                  m->handler.pfds[pos].fd, actual_state);
1602                 return 0;
1603         }
1604
1605         if (thread->type == EVENT_READ)
1606                 thread_array = m->read;
1607         else
1608                 thread_array = m->write;
1609
1610         thread_array[thread->u.fd] = NULL;
1611         event_list_add_tail(&m->ready, thread);
1612         thread->type = EVENT_READY;
1613
1614         return 1;
1615 }
1616
1617 /**
1618  * Process I/O events.
1619  *
1620  * Walks through file descriptor array looking for those pollfds whose .revents
1621  * field has something interesting. Deletes any invalid file descriptors.
1622  *
1623  * @param m the thread master
1624  * @param num the number of active file descriptors (return value of poll())
1625  */
1626 static void thread_process_io(struct event_master *m, unsigned int num)
1627 {
1628         unsigned int ready = 0;
1629         struct pollfd *pfds = m->handler.copy;
1630
1631         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1632                 /* no event for current fd? immediately continue */
1633                 if (pfds[i].revents == 0)
1634                         continue;
1635
1636                 ready++;
1637
1638                 /*
1639                  * Unless someone has called event_cancel from another
1640                  * pthread, the only thing that could have changed in
1641                  * m->handler.pfds while we were asleep is the .events
1642                  * field in a given pollfd. Barring event_cancel() that
1643                  * value should be a superset of the values we have in our
1644                  * copy, so there's no need to update it. Similarily,
1645                  * barring deletion, the fd should still be a valid index
1646                  * into the master's pfds.
1647                  *
1648                  * We are including POLLERR here to do a READ event
1649                  * this is because the read should fail and the
1650                  * read function should handle it appropriately
1651                  */
1652                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1653                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1654                                                  pfds[i].revents, i);
1655                 }
1656                 if (pfds[i].revents & POLLOUT)
1657                         thread_process_io_helper(m, m->write[pfds[i].fd],
1658                                                  POLLOUT, pfds[i].revents, i);
1659
1660                 /* if one of our file descriptors is garbage, remove the same
1661                  * from
1662                  * both pfds + update sizes and index */
1663                 if (pfds[i].revents & POLLNVAL) {
1664                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1665                                 (m->handler.pfdcount - i - 1)
1666                                         * sizeof(struct pollfd));
1667                         m->handler.pfdcount--;
1668                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1669                         m->handler.pfds[m->handler.pfdcount].events = 0;
1670
1671                         memmove(pfds + i, pfds + i + 1,
1672                                 (m->handler.copycount - i - 1)
1673                                         * sizeof(struct pollfd));
1674                         m->handler.copycount--;
1675                         m->handler.copy[m->handler.copycount].fd = 0;
1676                         m->handler.copy[m->handler.copycount].events = 0;
1677
1678                         i--;
1679                 }
1680         }
1681 }
1682
1683 /* Add all timers that have popped to the ready list. */
1684 static unsigned int thread_process_timers(struct event_master *m,
1685                                           struct timeval *timenow)
1686 {
1687         struct timeval prev = *timenow;
1688         bool displayed = false;
1689         struct event *thread;
1690         unsigned int ready = 0;
1691
1692         while ((thread = event_timer_list_first(&m->timer))) {
1693                 if (timercmp(timenow, &thread->u.sands, <))
1694                         break;
1695                 prev = thread->u.sands;
1696                 prev.tv_sec += 4;
1697                 /*
1698                  * If the timer would have popped 4 seconds in the
1699                  * past then we are in a situation where we are
1700                  * really getting behind on handling of events.
1701                  * Let's log it and do the right thing with it.
1702                  */
1703                 if (timercmp(timenow, &prev, >)) {
1704                         atomic_fetch_add_explicit(
1705                                 &thread->hist->total_starv_warn, 1,
1706                                 memory_order_seq_cst);
1707                         if (!displayed && !thread->ignore_timer_late) {
1708                                 flog_warn(
1709                                         EC_LIB_STARVE_THREAD,
1710                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1711                                         thread);
1712                                 displayed = true;
1713                         }
1714                 }
1715
1716                 event_timer_list_pop(&m->timer);
1717                 thread->type = EVENT_READY;
1718                 event_list_add_tail(&m->ready, thread);
1719                 ready++;
1720         }
1721
1722         return ready;
1723 }
1724
1725 /* process a list en masse, e.g. for event thread lists */
1726 static unsigned int thread_process(struct event_list_head *list)
1727 {
1728         struct event *thread;
1729         unsigned int ready = 0;
1730
1731         while ((thread = event_list_pop(list))) {
1732                 thread->type = EVENT_READY;
1733                 event_list_add_tail(&thread->master->ready, thread);
1734                 ready++;
1735         }
1736         return ready;
1737 }
1738
1739
1740 /* Fetch next ready thread. */
1741 struct event *event_fetch(struct event_master *m, struct event *fetch)
1742 {
1743         struct event *thread = NULL;
1744         struct timeval now;
1745         struct timeval zerotime = {0, 0};
1746         struct timeval tv;
1747         struct timeval *tw = NULL;
1748         bool eintr_p = false;
1749         int num = 0;
1750
1751         do {
1752                 /* Handle signals if any */
1753                 if (m->handle_signals)
1754                         frr_sigevent_process();
1755
1756                 pthread_mutex_lock(&m->mtx);
1757
1758                 /* Process any pending cancellation requests */
1759                 do_event_cancel(m);
1760
1761                 /*
1762                  * Attempt to flush ready queue before going into poll().
1763                  * This is performance-critical. Think twice before modifying.
1764                  */
1765                 if ((thread = event_list_pop(&m->ready))) {
1766                         fetch = thread_run(m, thread, fetch);
1767                         if (fetch->ref)
1768                                 *fetch->ref = NULL;
1769                         pthread_mutex_unlock(&m->mtx);
1770                         if (!m->ready_run_loop)
1771                                 GETRUSAGE(&m->last_getrusage);
1772                         m->ready_run_loop = true;
1773                         break;
1774                 }
1775
1776                 m->ready_run_loop = false;
1777                 /* otherwise, tick through scheduling sequence */
1778
1779                 /*
1780                  * Post events to ready queue. This must come before the
1781                  * following block since events should occur immediately
1782                  */
1783                 thread_process(&m->event);
1784
1785                 /*
1786                  * If there are no tasks on the ready queue, we will poll()
1787                  * until a timer expires or we receive I/O, whichever comes
1788                  * first. The strategy for doing this is:
1789                  *
1790                  * - If there are events pending, set the poll() timeout to zero
1791                  * - If there are no events pending, but there are timers
1792                  * pending, set the timeout to the smallest remaining time on
1793                  * any timer.
1794                  * - If there are neither timers nor events pending, but there
1795                  * are file descriptors pending, block indefinitely in poll()
1796                  * - If nothing is pending, it's time for the application to die
1797                  *
1798                  * In every case except the last, we need to hit poll() at least
1799                  * once per loop to avoid starvation by events
1800                  */
1801                 if (!event_list_count(&m->ready))
1802                         tw = thread_timer_wait(&m->timer, &tv);
1803
1804                 if (event_list_count(&m->ready) ||
1805                     (tw && !timercmp(tw, &zerotime, >)))
1806                         tw = &zerotime;
1807
1808                 if (!tw && m->handler.pfdcount == 0) { /* die */
1809                         pthread_mutex_unlock(&m->mtx);
1810                         fetch = NULL;
1811                         break;
1812                 }
1813
1814                 /*
1815                  * Copy pollfd array + # active pollfds in it. Not necessary to
1816                  * copy the array size as this is fixed.
1817                  */
1818                 m->handler.copycount = m->handler.pfdcount;
1819                 memcpy(m->handler.copy, m->handler.pfds,
1820                        m->handler.copycount * sizeof(struct pollfd));
1821
1822                 pthread_mutex_unlock(&m->mtx);
1823                 {
1824                         eintr_p = false;
1825                         num = fd_poll(m, tw, &eintr_p);
1826                 }
1827                 pthread_mutex_lock(&m->mtx);
1828
1829                 /* Handle any errors received in poll() */
1830                 if (num < 0) {
1831                         if (eintr_p) {
1832                                 pthread_mutex_unlock(&m->mtx);
1833                                 /* loop around to signal handler */
1834                                 continue;
1835                         }
1836
1837                         /* else die */
1838                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1839                                  safe_strerror(errno));
1840                         pthread_mutex_unlock(&m->mtx);
1841                         fetch = NULL;
1842                         break;
1843                 }
1844
1845                 /* Post timers to ready queue. */
1846                 monotime(&now);
1847                 thread_process_timers(m, &now);
1848
1849                 /* Post I/O to ready queue. */
1850                 if (num > 0)
1851                         thread_process_io(m, num);
1852
1853                 pthread_mutex_unlock(&m->mtx);
1854
1855         } while (!thread && m->spin);
1856
1857         return fetch;
1858 }
1859
1860 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1861 {
1862         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1863                 + (a.tv_usec - b.tv_usec));
1864 }
1865
1866 unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1867                                   unsigned long *cputime)
1868 {
1869 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1870
1871 #ifdef __FreeBSD__
1872         /*
1873          * FreeBSD appears to have an issue when calling clock_gettime
1874          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1875          * occassionally the now time will be before the start time.
1876          * This is not good and FRR is ending up with CPU HOG's
1877          * when the subtraction wraps to very large numbers
1878          *
1879          * What we are going to do here is cheat a little bit
1880          * and notice that this is a problem and just correct
1881          * it so that it is impossible to happen
1882          */
1883         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1884             start->cpu.tv_nsec > now->cpu.tv_nsec)
1885                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1886         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1887                 now->cpu.tv_sec = start->cpu.tv_sec;
1888                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1889         }
1890 #endif
1891         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1892                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1893 #else
1894         /* This is 'user + sys' time.  */
1895         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1896                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1897 #endif
1898         return timeval_elapsed(now->real, start->real);
1899 }
1900
1901 /* We should aim to yield after yield milliseconds, which defaults
1902    to EVENT_YIELD_TIME_SLOT .
1903    Note: we are using real (wall clock) time for this calculation.
1904    It could be argued that CPU time may make more sense in certain
1905    contexts.  The things to consider are whether the thread may have
1906    blocked (in which case wall time increases, but CPU time does not),
1907    or whether the system is heavily loaded with other processes competing
1908    for CPU time.  On balance, wall clock time seems to make sense.
1909    Plus it has the added benefit that gettimeofday should be faster
1910    than calling getrusage. */
1911 int event_should_yield(struct event *thread)
1912 {
1913         int result;
1914         frr_with_mutex (&thread->mtx) {
1915                 result = monotime_since(&thread->real, NULL)
1916                          > (int64_t)thread->yield;
1917         }
1918         return result;
1919 }
1920
1921 void event_set_yield_time(struct event *thread, unsigned long yield_time)
1922 {
1923         frr_with_mutex (&thread->mtx) {
1924                 thread->yield = yield_time;
1925         }
1926 }
1927
1928 void event_getrusage(RUSAGE_T *r)
1929 {
1930         monotime(&r->real);
1931         if (!cputime_enabled) {
1932                 memset(&r->cpu, 0, sizeof(r->cpu));
1933                 return;
1934         }
1935
1936 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1937         /* not currently implemented in Linux's vDSO, but maybe at some point
1938          * in the future?
1939          */
1940         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1941 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1942 #if defined RUSAGE_THREAD
1943 #define FRR_RUSAGE RUSAGE_THREAD
1944 #else
1945 #define FRR_RUSAGE RUSAGE_SELF
1946 #endif
1947         getrusage(FRR_RUSAGE, &(r->cpu));
1948 #endif
1949 }
1950
1951 /*
1952  * Call a thread.
1953  *
1954  * This function will atomically update the thread's usage history. At present
1955  * this is the only spot where usage history is written. Nevertheless the code
1956  * has been written such that the introduction of writers in the future should
1957  * not need to update it provided the writers atomically perform only the
1958  * operations done here, i.e. updating the total and maximum times. In
1959  * particular, the maximum real and cpu times must be monotonically increasing
1960  * or this code is not correct.
1961  */
1962 void event_call(struct event *thread)
1963 {
1964         RUSAGE_T before, after;
1965
1966         /* if the thread being called is the CLI, it may change cputime_enabled
1967          * ("service cputime-stats" command), which can result in nonsensical
1968          * and very confusing warnings
1969          */
1970         bool cputime_enabled_here = cputime_enabled;
1971
1972         if (thread->master->ready_run_loop)
1973                 before = thread->master->last_getrusage;
1974         else
1975                 GETRUSAGE(&before);
1976
1977         thread->real = before.real;
1978
1979         frrtrace(9, frr_libfrr, event_call, thread->master,
1980                  thread->xref->funcname, thread->xref->xref.file,
1981                  thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1982                  thread->arg, thread->u.sands.tv_sec);
1983
1984         pthread_setspecific(thread_current, thread);
1985         (*thread->func)(thread);
1986         pthread_setspecific(thread_current, NULL);
1987
1988         GETRUSAGE(&after);
1989         thread->master->last_getrusage = after;
1990
1991         unsigned long walltime, cputime;
1992         unsigned long exp;
1993
1994         walltime = event_consumed_time(&after, &before, &cputime);
1995
1996         /* update walltime */
1997         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1998                                   memory_order_seq_cst);
1999         exp = atomic_load_explicit(&thread->hist->real.max,
2000                                    memory_order_seq_cst);
2001         while (exp < walltime
2002                && !atomic_compare_exchange_weak_explicit(
2003                        &thread->hist->real.max, &exp, walltime,
2004                        memory_order_seq_cst, memory_order_seq_cst))
2005                 ;
2006
2007         if (cputime_enabled_here && cputime_enabled) {
2008                 /* update cputime */
2009                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2010                                           memory_order_seq_cst);
2011                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2012                                            memory_order_seq_cst);
2013                 while (exp < cputime
2014                        && !atomic_compare_exchange_weak_explicit(
2015                                &thread->hist->cpu.max, &exp, cputime,
2016                                memory_order_seq_cst, memory_order_seq_cst))
2017                         ;
2018         }
2019
2020         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2021                                   memory_order_seq_cst);
2022         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2023                                  memory_order_seq_cst);
2024
2025         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2026             && cputime > cputime_threshold) {
2027                 /*
2028                  * We have a CPU Hog on our hands.  The time FRR has spent
2029                  * doing actual work (not sleeping) is greater than 5 seconds.
2030                  * Whinge about it now, so we're aware this is yet another task
2031                  * to fix.
2032                  */
2033                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2034                                           1, memory_order_seq_cst);
2035                 flog_warn(
2036                         EC_LIB_SLOW_THREAD_CPU,
2037                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2038                         thread->xref->funcname, (unsigned long)thread->func,
2039                         walltime / 1000, cputime / 1000);
2040
2041         } else if (walltime_threshold && walltime > walltime_threshold) {
2042                 /*
2043                  * The runtime for a task is greater than 5 seconds, but the
2044                  * cpu time is under 5 seconds.  Let's whine about this because
2045                  * this could imply some sort of scheduling issue.
2046                  */
2047                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2048                                           1, memory_order_seq_cst);
2049                 flog_warn(
2050                         EC_LIB_SLOW_THREAD_WALL,
2051                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2052                         thread->xref->funcname, (unsigned long)thread->func,
2053                         walltime / 1000, cputime / 1000);
2054         }
2055 }
2056
2057 /* Execute thread */
2058 void _event_execute(const struct xref_eventsched *xref, struct event_master *m,
2059                     void (*func)(struct event *), void *arg, int val)
2060 {
2061         struct event *thread;
2062
2063         /* Get or allocate new thread to execute. */
2064         frr_with_mutex (&m->mtx) {
2065                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
2066
2067                 /* Set its event value. */
2068                 frr_with_mutex (&thread->mtx) {
2069                         thread->add_type = EVENT_EXECUTE;
2070                         thread->u.val = val;
2071                         thread->ref = &thread;
2072                 }
2073         }
2074
2075         /* Execute thread doing all accounting. */
2076         event_call(thread);
2077
2078         /* Give back or free thread. */
2079         thread_add_unuse(m, thread);
2080 }
2081
2082 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2083 void debug_signals(const sigset_t *sigs)
2084 {
2085         int i, found;
2086         sigset_t tmpsigs;
2087         char buf[300];
2088
2089         /*
2090          * We're only looking at the non-realtime signals here, so we need
2091          * some limit value. Platform differences mean at some point we just
2092          * need to pick a reasonable value.
2093          */
2094 #if defined SIGRTMIN
2095 #  define LAST_SIGNAL SIGRTMIN
2096 #else
2097 #  define LAST_SIGNAL 32
2098 #endif
2099
2100
2101         if (sigs == NULL) {
2102                 sigemptyset(&tmpsigs);
2103                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2104                 sigs = &tmpsigs;
2105         }
2106
2107         found = 0;
2108         buf[0] = '\0';
2109
2110         for (i = 0; i < LAST_SIGNAL; i++) {
2111                 char tmp[20];
2112
2113                 if (sigismember(sigs, i) > 0) {
2114                         if (found > 0)
2115                                 strlcat(buf, ",", sizeof(buf));
2116                         snprintf(tmp, sizeof(tmp), "%d", i);
2117                         strlcat(buf, tmp, sizeof(buf));
2118                         found++;
2119                 }
2120         }
2121
2122         if (found == 0)
2123                 snprintf(buf, sizeof(buf), "<none>");
2124
2125         zlog_debug("%s: %s", __func__, buf);
2126 }
2127
2128 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2129                                    const struct event *thread)
2130 {
2131         static const char *const types[] = {
2132                 [EVENT_READ] = "read",    [EVENT_WRITE] = "write",
2133                 [EVENT_TIMER] = "timer",  [EVENT_EVENT] = "event",
2134                 [EVENT_READY] = "ready",  [EVENT_UNUSED] = "unused",
2135                 [EVENT_EXECUTE] = "exec",
2136         };
2137         ssize_t rv = 0;
2138         char info[16] = "";
2139
2140         if (!thread)
2141                 return bputs(buf, "{(thread *)NULL}");
2142
2143         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2144
2145         if (thread->type < array_size(types) && types[thread->type])
2146                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2147         else
2148                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2149
2150         switch (thread->type) {
2151         case EVENT_READ:
2152         case EVENT_WRITE:
2153                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2154                 break;
2155
2156         case EVENT_TIMER:
2157                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2158                 break;
2159         case EVENT_READY:
2160         case EVENT_EVENT:
2161         case EVENT_UNUSED:
2162         case EVENT_EXECUTE:
2163                 break;
2164         }
2165
2166         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2167                         thread->xref->funcname, thread->xref->dest,
2168                         thread->xref->xref.file, thread->xref->xref.line);
2169         return rv;
2170 }
2171
2172 printfrr_ext_autoreg_p("TH", printfrr_thread);
2173 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2174                                const void *ptr)
2175 {
2176         const struct event *thread = ptr;
2177         struct timespec remain = {};
2178
2179         if (ea->fmt[0] == 'D') {
2180                 ea->fmt++;
2181                 return printfrr_thread_dbg(buf, ea, thread);
2182         }
2183
2184         if (!thread) {
2185                 /* need to jump over time formatting flag characters in the
2186                  * input format string, i.e. adjust ea->fmt!
2187                  */
2188                 printfrr_time(buf, ea, &remain,
2189                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2190                 return bputch(buf, '-');
2191         }
2192
2193         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2194         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2195 }