lib/event.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Thread management routine
   3  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   4  */
   5
   6 /* #define DEBUG */
   7
   8 #include <zebra.h>
   9 #include <sys/resource.h>
  10
  11 #include "frrevent.h"
  12 #include "memory.h"
  13 #include "frrcu.h"
  14 #include "log.h"
  15 #include "hash.h"
  16 #include "command.h"
  17 #include "sigevent.h"
  18 #include "network.h"
  19 #include "jhash.h"
  20 #include "frratomic.h"
  21 #include "frr_pthread.h"
  22 #include "lib_errors.h"
  23 #include "libfrr_trace.h"
  24 #include "libfrr.h"
  25
  26 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  27 DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
  28 DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
  29 DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
  30
  31 DECLARE_LIST(event_list, struct event, eventitem);
  32
  33 struct cancel_req {
  34         int flags;
  35         struct event *thread;
  36         void *eventobj;
  37         struct event **threadref;
  38 };
  39
  40 /* Flags for task cancellation */
  41 #define EVENT_CANCEL_FLAG_READY 0x01
  42
  43 static int event_timer_cmp(const struct event *a, const struct event *b)
  44 {
  45         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  46                 return -1;
  47         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  48                 return 1;
  49         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  50                 return -1;
  51         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  52                 return 1;
  53         return 0;
  54 }
  55
  56 DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
  57
  58 #if defined(__APPLE__)
  59 #include <mach/mach.h>
  60 #include <mach/mach_time.h>
  61 #endif
  62
  63 #define AWAKEN(m)                                                              \
  64         do {                                                                   \
  65                 const unsigned char wakebyte = 0x01;                           \
  66                 write(m->io_pipe[1], &wakebyte, 1);                            \
  67         } while (0);
  68
  69 /* control variable for initializer */
  70 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  71 pthread_key_t thread_current;
  72
  73 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  74 static struct list *masters;
  75
  76 static void thread_free(struct event_loop *master, struct event *thread);
  77
  78 #ifndef EXCLUDE_CPU_TIME
  79 #define EXCLUDE_CPU_TIME 0
  80 #endif
  81 #ifndef CONSUMED_TIME_CHECK
  82 #define CONSUMED_TIME_CHECK 0
  83 #endif
  84
  85 bool cputime_enabled = !EXCLUDE_CPU_TIME;
  86 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
  87 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
  88
  89 /* CLI start ---------------------------------------------------------------- */
  90 #include "lib/event_clippy.c"
  91
  92 static unsigned int cpu_record_hash_key(const struct cpu_event_history *a)
  93 {
  94         int size = sizeof(a->func);
  95
  96         return jhash(&a->func, size, 0);
  97 }
  98
  99 static bool cpu_record_hash_cmp(const struct cpu_event_history *a,
 100                                 const struct cpu_event_history *b)
 101 {
 102         return a->func == b->func;
 103 }
 104
 105 static void *cpu_record_hash_alloc(struct cpu_event_history *a)
 106 {
 107         struct cpu_event_history *new;
 108         new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_event_history));
 109         new->func = a->func;
 110         new->funcname = a->funcname;
 111         return new;
 112 }
 113
 114 static void cpu_record_hash_free(void *a)
 115 {
 116         struct cpu_event_history *hist = a;
 117
 118         XFREE(MTYPE_EVENT_STATS, hist);
 119 }
 120
 121 static void vty_out_cpu_event_history(struct vty *vty,
 122                                       struct cpu_event_history *a)
 123 {
 124         vty_out(vty,
 125                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 126                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 127                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 128                 (a->real.total / a->total_calls), a->real.max,
 129                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 130         vty_out(vty, "  %c%c%c%c%c  %s\n",
 131                 a->types & (1 << EVENT_READ) ? 'R' : ' ',
 132                 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
 133                 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
 134                 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
 135                 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
 136 }
 137
 138 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 139 {
 140         struct cpu_event_history *totals = args[0];
 141         struct cpu_event_history copy;
 142         struct vty *vty = args[1];
 143         uint8_t *filter = args[2];
 144
 145         struct cpu_event_history *a = bucket->data;
 146
 147         copy.total_active =
 148                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 149         copy.total_calls =
 150                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 151         copy.total_cpu_warn =
 152                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 153         copy.total_wall_warn =
 154                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 155         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 156                                                      memory_order_seq_cst);
 157         copy.cpu.total =
 158                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 159         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 160         copy.real.total =
 161                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 162         copy.real.max =
 163                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 164         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 165         copy.funcname = a->funcname;
 166
 167         if (!(copy.types & *filter))
 168                 return;
 169
 170         vty_out_cpu_event_history(vty, &copy);
 171         totals->total_active += copy.total_active;
 172         totals->total_calls += copy.total_calls;
 173         totals->total_cpu_warn += copy.total_cpu_warn;
 174         totals->total_wall_warn += copy.total_wall_warn;
 175         totals->total_starv_warn += copy.total_starv_warn;
 176         totals->real.total += copy.real.total;
 177         if (totals->real.max < copy.real.max)
 178                 totals->real.max = copy.real.max;
 179         totals->cpu.total += copy.cpu.total;
 180         if (totals->cpu.max < copy.cpu.max)
 181                 totals->cpu.max = copy.cpu.max;
 182 }
 183
 184 static void cpu_record_print(struct vty *vty, uint8_t filter)
 185 {
 186         struct cpu_event_history tmp;
 187         void *args[3] = {&tmp, vty, &filter};
 188         struct event_loop *m;
 189         struct listnode *ln;
 190
 191         if (!cputime_enabled)
 192                 vty_out(vty,
 193                         "\n"
 194                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 195                         "will be zero or may display data from when collection was enabled.  Use the\n"
 196                         "  \"service cputime-stats\"  command to start collecting data.\n"
 197                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 198
 199         memset(&tmp, 0, sizeof(tmp));
 200         tmp.funcname = "TOTAL";
 201         tmp.types = filter;
 202
 203         frr_with_mutex (&masters_mtx) {
 204                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 205                         const char *name = m->name ? m->name : "main";
 206
 207                         char underline[strlen(name) + 1];
 208                         memset(underline, '-', sizeof(underline));
 209                         underline[sizeof(underline) - 1] = '\0';
 210
 211                         vty_out(vty, "\n");
 212                         vty_out(vty, "Showing statistics for pthread %s\n",
 213                                 name);
 214                         vty_out(vty, "-------------------------------%s\n",
 215                                 underline);
 216                         vty_out(vty, "%30s %18s %18s\n", "",
 217                                 "CPU (user+system):", "Real (wall-clock):");
 218                         vty_out(vty,
 219                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 220                         vty_out(vty, " Avg uSec Max uSecs");
 221                         vty_out(vty,
 222                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 223
 224                         if (m->cpu_record->count)
 225                                 hash_iterate(
 226                                         m->cpu_record,
 227                                         (void (*)(struct hash_bucket *,
 228                                                   void *))cpu_record_hash_print,
 229                                         args);
 230                         else
 231                                 vty_out(vty, "No data to display yet.\n");
 232
 233                         vty_out(vty, "\n");
 234                 }
 235         }
 236
 237         vty_out(vty, "\n");
 238         vty_out(vty, "Total thread statistics\n");
 239         vty_out(vty, "-------------------------\n");
 240         vty_out(vty, "%30s %18s %18s\n", "",
 241                 "CPU (user+system):", "Real (wall-clock):");
 242         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 243         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 244         vty_out(vty, "  Type  Thread\n");
 245
 246         if (tmp.total_calls > 0)
 247                 vty_out_cpu_event_history(vty, &tmp);
 248 }
 249
 250 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 251 {
 252         uint8_t *filter = args[0];
 253         struct hash *cpu_record = args[1];
 254
 255         struct cpu_event_history *a = bucket->data;
 256
 257         if (!(a->types & *filter))
 258                 return;
 259
 260         hash_release(cpu_record, bucket->data);
 261 }
 262
 263 static void cpu_record_clear(uint8_t filter)
 264 {
 265         uint8_t *tmp = &filter;
 266         struct event_loop *m;
 267         struct listnode *ln;
 268
 269         frr_with_mutex (&masters_mtx) {
 270                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 271                         frr_with_mutex (&m->mtx) {
 272                                 void *args[2] = {tmp, m->cpu_record};
 273                                 hash_iterate(
 274                                         m->cpu_record,
 275                                         (void (*)(struct hash_bucket *,
 276                                                   void *))cpu_record_hash_clear,
 277                                         args);
 278                         }
 279                 }
 280         }
 281 }
 282
 283 static uint8_t parse_filter(const char *filterstr)
 284 {
 285         int i = 0;
 286         int filter = 0;
 287
 288         while (filterstr[i] != '\0') {
 289                 switch (filterstr[i]) {
 290                 case 'r':
 291                 case 'R':
 292                         filter |= (1 << EVENT_READ);
 293                         break;
 294                 case 'w':
 295                 case 'W':
 296                         filter |= (1 << EVENT_WRITE);
 297                         break;
 298                 case 't':
 299                 case 'T':
 300                         filter |= (1 << EVENT_TIMER);
 301                         break;
 302                 case 'e':
 303                 case 'E':
 304                         filter |= (1 << EVENT_EVENT);
 305                         break;
 306                 case 'x':
 307                 case 'X':
 308                         filter |= (1 << EVENT_EXECUTE);
 309                         break;
 310                 default:
 311                         break;
 312                 }
 313                 ++i;
 314         }
 315         return filter;
 316 }
 317
 318 DEFUN_NOSH (show_thread_cpu,
 319             show_thread_cpu_cmd,
 320             "show thread cpu [FILTER]",
 321             SHOW_STR
 322             "Thread information\n"
 323             "Thread CPU usage\n"
 324             "Display filter (rwtex)\n")
 325 {
 326         uint8_t filter = (uint8_t)-1U;
 327         int idx = 0;
 328
 329         if (argv_find(argv, argc, "FILTER", &idx)) {
 330                 filter = parse_filter(argv[idx]->arg);
 331                 if (!filter) {
 332                         vty_out(vty,
 333                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 334                                 argv[idx]->arg);
 335                         return CMD_WARNING;
 336                 }
 337         }
 338
 339         cpu_record_print(vty, filter);
 340         return CMD_SUCCESS;
 341 }
 342
 343 DEFPY (service_cputime_stats,
 344        service_cputime_stats_cmd,
 345        "[no] service cputime-stats",
 346        NO_STR
 347        "Set up miscellaneous service\n"
 348        "Collect CPU usage statistics\n")
 349 {
 350         cputime_enabled = !no;
 351         return CMD_SUCCESS;
 352 }
 353
 354 DEFPY (service_cputime_warning,
 355        service_cputime_warning_cmd,
 356        "[no] service cputime-warning (1-4294967295)",
 357        NO_STR
 358        "Set up miscellaneous service\n"
 359        "Warn for tasks exceeding CPU usage threshold\n"
 360        "Warning threshold in milliseconds\n")
 361 {
 362         if (no)
 363                 cputime_threshold = 0;
 364         else
 365                 cputime_threshold = cputime_warning * 1000;
 366         return CMD_SUCCESS;
 367 }
 368
 369 ALIAS (service_cputime_warning,
 370        no_service_cputime_warning_cmd,
 371        "no service cputime-warning",
 372        NO_STR
 373        "Set up miscellaneous service\n"
 374        "Warn for tasks exceeding CPU usage threshold\n")
 375
 376 DEFPY (service_walltime_warning,
 377        service_walltime_warning_cmd,
 378        "[no] service walltime-warning (1-4294967295)",
 379        NO_STR
 380        "Set up miscellaneous service\n"
 381        "Warn for tasks exceeding total wallclock threshold\n"
 382        "Warning threshold in milliseconds\n")
 383 {
 384         if (no)
 385                 walltime_threshold = 0;
 386         else
 387                 walltime_threshold = walltime_warning * 1000;
 388         return CMD_SUCCESS;
 389 }
 390
 391 ALIAS (service_walltime_warning,
 392        no_service_walltime_warning_cmd,
 393        "no service walltime-warning",
 394        NO_STR
 395        "Set up miscellaneous service\n"
 396        "Warn for tasks exceeding total wallclock threshold\n")
 397
 398 static void show_thread_poll_helper(struct vty *vty, struct event_loop *m)
 399 {
 400         const char *name = m->name ? m->name : "main";
 401         char underline[strlen(name) + 1];
 402         struct event *thread;
 403         uint32_t i;
 404
 405         memset(underline, '-', sizeof(underline));
 406         underline[sizeof(underline) - 1] = '\0';
 407
 408         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 409         vty_out(vty, "----------------------%s\n", underline);
 410         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 411                 m->fd_limit);
 412         for (i = 0; i < m->handler.pfdcount; i++) {
 413                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 414                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 415                         m->handler.pfds[i].revents);
 416
 417                 if (m->handler.pfds[i].events & POLLIN) {
 418                         thread = m->read[m->handler.pfds[i].fd];
 419
 420                         if (!thread)
 421                                 vty_out(vty, "ERROR ");
 422                         else
 423                                 vty_out(vty, "%s ", thread->xref->funcname);
 424                 } else
 425                         vty_out(vty, " ");
 426
 427                 if (m->handler.pfds[i].events & POLLOUT) {
 428                         thread = m->write[m->handler.pfds[i].fd];
 429
 430                         if (!thread)
 431                                 vty_out(vty, "ERROR\n");
 432                         else
 433                                 vty_out(vty, "%s\n", thread->xref->funcname);
 434                 } else
 435                         vty_out(vty, "\n");
 436         }
 437 }
 438
 439 DEFUN_NOSH (show_thread_poll,
 440             show_thread_poll_cmd,
 441             "show thread poll",
 442             SHOW_STR
 443             "Thread information\n"
 444             "Show poll FD's and information\n")
 445 {
 446         struct listnode *node;
 447         struct event_loop *m;
 448
 449         frr_with_mutex (&masters_mtx) {
 450                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 451                         show_thread_poll_helper(vty, m);
 452                 }
 453         }
 454
 455         return CMD_SUCCESS;
 456 }
 457
 458
 459 DEFUN (clear_thread_cpu,
 460        clear_thread_cpu_cmd,
 461        "clear thread cpu [FILTER]",
 462        "Clear stored data in all pthreads\n"
 463        "Thread information\n"
 464        "Thread CPU usage\n"
 465        "Display filter (rwtexb)\n")
 466 {
 467         uint8_t filter = (uint8_t)-1U;
 468         int idx = 0;
 469
 470         if (argv_find(argv, argc, "FILTER", &idx)) {
 471                 filter = parse_filter(argv[idx]->arg);
 472                 if (!filter) {
 473                         vty_out(vty,
 474                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 475                                 argv[idx]->arg);
 476                         return CMD_WARNING;
 477                 }
 478         }
 479
 480         cpu_record_clear(filter);
 481         return CMD_SUCCESS;
 482 }
 483
 484 static void show_thread_timers_helper(struct vty *vty, struct event_loop *m)
 485 {
 486         const char *name = m->name ? m->name : "main";
 487         char underline[strlen(name) + 1];
 488         struct event *thread;
 489
 490         memset(underline, '-', sizeof(underline));
 491         underline[sizeof(underline) - 1] = '\0';
 492
 493         vty_out(vty, "\nShowing timers for %s\n", name);
 494         vty_out(vty, "-------------------%s\n", underline);
 495
 496         frr_each (event_timer_list, &m->timer, thread) {
 497                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 498         }
 499 }
 500
 501 DEFPY_NOSH (show_thread_timers,
 502             show_thread_timers_cmd,
 503             "show thread timers",
 504             SHOW_STR
 505             "Thread information\n"
 506             "Show all timers and how long they have in the system\n")
 507 {
 508         struct listnode *node;
 509         struct event_loop *m;
 510
 511         frr_with_mutex (&masters_mtx) {
 512                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 513                         show_thread_timers_helper(vty, m);
 514         }
 515
 516         return CMD_SUCCESS;
 517 }
 518
 519 void event_cmd_init(void)
 520 {
 521         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 522         install_element(VIEW_NODE, &show_thread_poll_cmd);
 523         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 524
 525         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 526         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 527         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 528         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 529         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 530
 531         install_element(VIEW_NODE, &show_thread_timers_cmd);
 532 }
 533 /* CLI end ------------------------------------------------------------------ */
 534
 535
 536 static void cancelreq_del(void *cr)
 537 {
 538         XFREE(MTYPE_TMP, cr);
 539 }
 540
 541 /* initializer, only ever called once */
 542 static void initializer(void)
 543 {
 544         pthread_key_create(&thread_current, NULL);
 545 }
 546
 547 struct event_loop *event_master_create(const char *name)
 548 {
 549         struct event_loop *rv;
 550         struct rlimit limit;
 551
 552         pthread_once(&init_once, &initializer);
 553
 554         rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_loop));
 555
 556         /* Initialize master mutex */
 557         pthread_mutex_init(&rv->mtx, NULL);
 558         pthread_cond_init(&rv->cancel_cond, NULL);
 559
 560         /* Set name */
 561         name = name ? name : "default";
 562         rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 563
 564         /* Initialize I/O task data structures */
 565
 566         /* Use configured limit if present, ulimit otherwise. */
 567         rv->fd_limit = frr_get_fd_limit();
 568         if (rv->fd_limit == 0) {
 569                 getrlimit(RLIMIT_NOFILE, &limit);
 570                 rv->fd_limit = (int)limit.rlim_cur;
 571         }
 572
 573         rv->read = XCALLOC(MTYPE_EVENT_POLL,
 574                            sizeof(struct event *) * rv->fd_limit);
 575
 576         rv->write = XCALLOC(MTYPE_EVENT_POLL,
 577                             sizeof(struct event *) * rv->fd_limit);
 578
 579         char tmhashname[strlen(name) + 32];
 580         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 581                  name);
 582         rv->cpu_record = hash_create_size(
 583                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 584                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 585                 tmhashname);
 586
 587         event_list_init(&rv->event);
 588         event_list_init(&rv->ready);
 589         event_list_init(&rv->unuse);
 590         event_timer_list_init(&rv->timer);
 591
 592         /* Initialize event_fetch() settings */
 593         rv->spin = true;
 594         rv->handle_signals = true;
 595
 596         /* Set pthread owner, should be updated by actual owner */
 597         rv->owner = pthread_self();
 598         rv->cancel_req = list_new();
 599         rv->cancel_req->del = cancelreq_del;
 600         rv->canceled = true;
 601
 602         /* Initialize pipe poker */
 603         pipe(rv->io_pipe);
 604         set_nonblocking(rv->io_pipe[0]);
 605         set_nonblocking(rv->io_pipe[1]);
 606
 607         /* Initialize data structures for poll() */
 608         rv->handler.pfdsize = rv->fd_limit;
 609         rv->handler.pfdcount = 0;
 610         rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
 611                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 612         rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
 613                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 614
 615         /* add to list of threadmasters */
 616         frr_with_mutex (&masters_mtx) {
 617                 if (!masters)
 618                         masters = list_new();
 619
 620                 listnode_add(masters, rv);
 621         }
 622
 623         return rv;
 624 }
 625
 626 void event_master_set_name(struct event_loop *master, const char *name)
 627 {
 628         frr_with_mutex (&master->mtx) {
 629                 XFREE(MTYPE_EVENT_MASTER, master->name);
 630                 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 631         }
 632 }
 633
 634 #define EVENT_UNUSED_DEPTH 10
 635
 636 /* Move thread to unuse list. */
 637 static void thread_add_unuse(struct event_loop *m, struct event *thread)
 638 {
 639         pthread_mutex_t mtxc = thread->mtx;
 640
 641         assert(m != NULL && thread != NULL);
 642
 643         thread->hist->total_active--;
 644         memset(thread, 0, sizeof(struct event));
 645         thread->type = EVENT_UNUSED;
 646
 647         /* Restore the thread mutex context. */
 648         thread->mtx = mtxc;
 649
 650         if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
 651                 event_list_add_tail(&m->unuse, thread);
 652                 return;
 653         }
 654
 655         thread_free(m, thread);
 656 }
 657
 658 /* Free all unused thread. */
 659 static void thread_list_free(struct event_loop *m, struct event_list_head *list)
 660 {
 661         struct event *t;
 662
 663         while ((t = event_list_pop(list)))
 664                 thread_free(m, t);
 665 }
 666
 667 static void thread_array_free(struct event_loop *m, struct event **thread_array)
 668 {
 669         struct event *t;
 670         int index;
 671
 672         for (index = 0; index < m->fd_limit; ++index) {
 673                 t = thread_array[index];
 674                 if (t) {
 675                         thread_array[index] = NULL;
 676                         thread_free(m, t);
 677                 }
 678         }
 679         XFREE(MTYPE_EVENT_POLL, thread_array);
 680 }
 681
 682 /*
 683  * event_master_free_unused
 684  *
 685  * As threads are finished with they are put on the
 686  * unuse list for later reuse.
 687  * If we are shutting down, Free up unused threads
 688  * So we can see if we forget to shut anything off
 689  */
 690 void event_master_free_unused(struct event_loop *m)
 691 {
 692         frr_with_mutex (&m->mtx) {
 693                 struct event *t;
 694                 while ((t = event_list_pop(&m->unuse)))
 695                         thread_free(m, t);
 696         }
 697 }
 698
 699 /* Stop thread scheduler. */
 700 void event_master_free(struct event_loop *m)
 701 {
 702         struct event *t;
 703
 704         frr_with_mutex (&masters_mtx) {
 705                 listnode_delete(masters, m);
 706                 if (masters->count == 0) {
 707                         list_delete(&masters);
 708                 }
 709         }
 710
 711         thread_array_free(m, m->read);
 712         thread_array_free(m, m->write);
 713         while ((t = event_timer_list_pop(&m->timer)))
 714                 thread_free(m, t);
 715         thread_list_free(m, &m->event);
 716         thread_list_free(m, &m->ready);
 717         thread_list_free(m, &m->unuse);
 718         pthread_mutex_destroy(&m->mtx);
 719         pthread_cond_destroy(&m->cancel_cond);
 720         close(m->io_pipe[0]);
 721         close(m->io_pipe[1]);
 722         list_delete(&m->cancel_req);
 723         m->cancel_req = NULL;
 724
 725         hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
 726
 727         XFREE(MTYPE_EVENT_MASTER, m->name);
 728         XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
 729         XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
 730         XFREE(MTYPE_EVENT_MASTER, m);
 731 }
 732
 733 /* Return remain time in milliseconds. */
 734 unsigned long event_timer_remain_msec(struct event *thread)
 735 {
 736         int64_t remain;
 737
 738         if (!event_is_scheduled(thread))
 739                 return 0;
 740
 741         frr_with_mutex (&thread->mtx) {
 742                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 743         }
 744
 745         return remain < 0 ? 0 : remain;
 746 }
 747
 748 /* Return remain time in seconds. */
 749 unsigned long event_timer_remain_second(struct event *thread)
 750 {
 751         return event_timer_remain_msec(thread) / 1000LL;
 752 }
 753
 754 struct timeval event_timer_remain(struct event *thread)
 755 {
 756         struct timeval remain;
 757         frr_with_mutex (&thread->mtx) {
 758                 monotime_until(&thread->u.sands, &remain);
 759         }
 760         return remain;
 761 }
 762
 763 static int time_hhmmss(char *buf, int buf_size, long sec)
 764 {
 765         long hh;
 766         long mm;
 767         int wr;
 768
 769         assert(buf_size >= 8);
 770
 771         hh = sec / 3600;
 772         sec %= 3600;
 773         mm = sec / 60;
 774         sec %= 60;
 775
 776         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 777
 778         return wr != 8;
 779 }
 780
 781 char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
 782 {
 783         if (t_timer) {
 784                 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
 785         } else {
 786                 snprintf(buf, buf_size, "--:--:--");
 787         }
 788         return buf;
 789 }
 790
 791 /* Get new thread.  */
 792 static struct event *thread_get(struct event_loop *m, uint8_t type,
 793                                 void (*func)(struct event *), void *arg,
 794                                 const struct xref_eventsched *xref)
 795 {
 796         struct event *thread = event_list_pop(&m->unuse);
 797         struct cpu_event_history tmp;
 798
 799         if (!thread) {
 800                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
 801                 /* mutex only needs to be initialized at struct creation. */
 802                 pthread_mutex_init(&thread->mtx, NULL);
 803                 m->alloc++;
 804         }
 805
 806         thread->type = type;
 807         thread->add_type = type;
 808         thread->master = m;
 809         thread->arg = arg;
 810         thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
 811         thread->ref = NULL;
 812         thread->ignore_timer_late = false;
 813
 814         /*
 815          * So if the passed in funcname is not what we have
 816          * stored that means the thread->hist needs to be
 817          * updated.  We keep the last one around in unused
 818          * under the assumption that we are probably
 819          * going to immediately allocate the same
 820          * type of thread.
 821          * This hopefully saves us some serious
 822          * hash_get lookups.
 823          */
 824         if ((thread->xref && thread->xref->funcname != xref->funcname)
 825             || thread->func != func) {
 826                 tmp.func = func;
 827                 tmp.funcname = xref->funcname;
 828                 thread->hist =
 829                         hash_get(m->cpu_record, &tmp,
 830                                  (void *(*)(void *))cpu_record_hash_alloc);
 831         }
 832         thread->hist->total_active++;
 833         thread->func = func;
 834         thread->xref = xref;
 835
 836         return thread;
 837 }
 838
 839 static void thread_free(struct event_loop *master, struct event *thread)
 840 {
 841         /* Update statistics. */
 842         assert(master->alloc > 0);
 843         master->alloc--;
 844
 845         /* Free allocated resources. */
 846         pthread_mutex_destroy(&thread->mtx);
 847         XFREE(MTYPE_THREAD, thread);
 848 }
 849
 850 static int fd_poll(struct event_loop *m, const struct timeval *timer_wait,
 851                    bool *eintr_p)
 852 {
 853         sigset_t origsigs;
 854         unsigned char trash[64];
 855         nfds_t count = m->handler.copycount;
 856
 857         /*
 858          * If timer_wait is null here, that means poll() should block
 859          * indefinitely, unless the event_master has overridden it by setting
 860          * ->selectpoll_timeout.
 861          *
 862          * If the value is positive, it specifies the maximum number of
 863          * milliseconds to wait. If the timeout is -1, it specifies that
 864          * we should never wait and always return immediately even if no
 865          * event is detected. If the value is zero, the behavior is default.
 866          */
 867         int timeout = -1;
 868
 869         /* number of file descriptors with events */
 870         int num;
 871
 872         if (timer_wait != NULL
 873             && m->selectpoll_timeout == 0) // use the default value
 874                 timeout = (timer_wait->tv_sec * 1000)
 875                           + (timer_wait->tv_usec / 1000);
 876         else if (m->selectpoll_timeout > 0) // use the user's timeout
 877                 timeout = m->selectpoll_timeout;
 878         else if (m->selectpoll_timeout
 879                  < 0) // effect a poll (return immediately)
 880                 timeout = 0;
 881
 882         zlog_tls_buffer_flush();
 883         rcu_read_unlock();
 884         rcu_assert_read_unlocked();
 885
 886         /* add poll pipe poker */
 887         assert(count + 1 < m->handler.pfdsize);
 888         m->handler.copy[count].fd = m->io_pipe[0];
 889         m->handler.copy[count].events = POLLIN;
 890         m->handler.copy[count].revents = 0x00;
 891
 892         /* We need to deal with a signal-handling race here: we
 893          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 894          * that may arrive just before we enter poll(). We will block the
 895          * key signals, then check whether any have arrived - if so, we return
 896          * before calling poll(). If not, we'll re-enable the signals
 897          * in the ppoll() call.
 898          */
 899
 900         sigemptyset(&origsigs);
 901         if (m->handle_signals) {
 902                 /* Main pthread that handles the app signals */
 903                 if (frr_sigevent_check(&origsigs)) {
 904                         /* Signal to process - restore signal mask and return */
 905                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 906                         num = -1;
 907                         *eintr_p = true;
 908                         goto done;
 909                 }
 910         } else {
 911                 /* Don't make any changes for the non-main pthreads */
 912                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 913         }
 914
 915 #if defined(HAVE_PPOLL)
 916         struct timespec ts, *tsp;
 917
 918         if (timeout >= 0) {
 919                 ts.tv_sec = timeout / 1000;
 920                 ts.tv_nsec = (timeout % 1000) * 1000000;
 921                 tsp = &ts;
 922         } else
 923                 tsp = NULL;
 924
 925         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 926         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 927 #else
 928         /* Not ideal - there is a race after we restore the signal mask */
 929         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 930         num = poll(m->handler.copy, count + 1, timeout);
 931 #endif
 932
 933 done:
 934
 935         if (num < 0 && errno == EINTR)
 936                 *eintr_p = true;
 937
 938         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 939                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 940                         ;
 941
 942         rcu_read_lock();
 943
 944         return num;
 945 }
 946
 947 /* Add new read thread. */
 948 void _event_add_read_write(const struct xref_eventsched *xref,
 949                            struct event_loop *m, void (*func)(struct event *),
 950                            void *arg, int fd, struct event **t_ptr)
 951 {
 952         int dir = xref->event_type;
 953         struct event *thread = NULL;
 954         struct event **thread_array;
 955
 956         if (dir == EVENT_READ)
 957                 frrtrace(9, frr_libfrr, schedule_read, m,
 958                          xref->funcname, xref->xref.file, xref->xref.line,
 959                          t_ptr, fd, 0, arg, 0);
 960         else
 961                 frrtrace(9, frr_libfrr, schedule_write, m,
 962                          xref->funcname, xref->xref.file, xref->xref.line,
 963                          t_ptr, fd, 0, arg, 0);
 964
 965         assert(fd >= 0);
 966         if (fd >= m->fd_limit)
 967                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 968
 969         frr_with_mutex (&m->mtx) {
 970                 if (t_ptr && *t_ptr)
 971                         // thread is already scheduled; don't reschedule
 972                         break;
 973
 974                 /* default to a new pollfd */
 975                 nfds_t queuepos = m->handler.pfdcount;
 976
 977                 if (dir == EVENT_READ)
 978                         thread_array = m->read;
 979                 else
 980                         thread_array = m->write;
 981
 982                 /* if we already have a pollfd for our file descriptor, find and
 983                  * use it */
 984                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 985                         if (m->handler.pfds[i].fd == fd) {
 986                                 queuepos = i;
 987
 988 #ifdef DEV_BUILD
 989                                 /*
 990                                  * What happens if we have a thread already
 991                                  * created for this event?
 992                                  */
 993                                 if (thread_array[fd])
 994                                         assert(!"Thread already scheduled for file descriptor");
 995 #endif
 996                                 break;
 997                         }
 998
 999                 /* make sure we have room for this fd + pipe poker fd */
1000                 assert(queuepos + 1 < m->handler.pfdsize);
1001
1002                 thread = thread_get(m, dir, func, arg, xref);
1003
1004                 m->handler.pfds[queuepos].fd = fd;
1005                 m->handler.pfds[queuepos].events |=
1006                         (dir == EVENT_READ ? POLLIN : POLLOUT);
1007
1008                 if (queuepos == m->handler.pfdcount)
1009                         m->handler.pfdcount++;
1010
1011                 if (thread) {
1012                         frr_with_mutex (&thread->mtx) {
1013                                 thread->u.fd = fd;
1014                                 thread_array[thread->u.fd] = thread;
1015                         }
1016
1017                         if (t_ptr) {
1018                                 *t_ptr = thread;
1019                                 thread->ref = t_ptr;
1020                         }
1021                 }
1022
1023                 AWAKEN(m);
1024         }
1025 }
1026
1027 static void _event_add_timer_timeval(const struct xref_eventsched *xref,
1028                                      struct event_loop *m,
1029                                      void (*func)(struct event *), void *arg,
1030                                      struct timeval *time_relative,
1031                                      struct event **t_ptr)
1032 {
1033         struct event *thread;
1034         struct timeval t;
1035
1036         assert(m != NULL);
1037
1038         assert(time_relative);
1039
1040         frrtrace(9, frr_libfrr, schedule_timer, m,
1041                  xref->funcname, xref->xref.file, xref->xref.line,
1042                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1043
1044         /* Compute expiration/deadline time. */
1045         monotime(&t);
1046         timeradd(&t, time_relative, &t);
1047
1048         frr_with_mutex (&m->mtx) {
1049                 if (t_ptr && *t_ptr)
1050                         /* thread is already scheduled; don't reschedule */
1051                         return;
1052
1053                 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
1054
1055                 frr_with_mutex (&thread->mtx) {
1056                         thread->u.sands = t;
1057                         event_timer_list_add(&m->timer, thread);
1058                         if (t_ptr) {
1059                                 *t_ptr = thread;
1060                                 thread->ref = t_ptr;
1061                         }
1062                 }
1063
1064                 /* The timer list is sorted - if this new timer
1065                  * might change the time we'll wait for, give the pthread
1066                  * a chance to re-compute.
1067                  */
1068                 if (event_timer_list_first(&m->timer) == thread)
1069                         AWAKEN(m);
1070         }
1071 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1072         if (time_relative->tv_sec > ONEYEAR2SEC)
1073                 flog_err(
1074                         EC_LIB_TIMER_TOO_LONG,
1075                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1076                         thread);
1077 }
1078
1079
1080 /* Add timer event thread. */
1081 void _event_add_timer(const struct xref_eventsched *xref, struct event_loop *m,
1082                       void (*func)(struct event *), void *arg, long timer,
1083                       struct event **t_ptr)
1084 {
1085         struct timeval trel;
1086
1087         assert(m != NULL);
1088
1089         trel.tv_sec = timer;
1090         trel.tv_usec = 0;
1091
1092         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1093 }
1094
1095 /* Add timer event thread with "millisecond" resolution */
1096 void _event_add_timer_msec(const struct xref_eventsched *xref,
1097                            struct event_loop *m, void (*func)(struct event *),
1098                            void *arg, long timer, struct event **t_ptr)
1099 {
1100         struct timeval trel;
1101
1102         assert(m != NULL);
1103
1104         trel.tv_sec = timer / 1000;
1105         trel.tv_usec = 1000 * (timer % 1000);
1106
1107         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1108 }
1109
1110 /* Add timer event thread with "timeval" resolution */
1111 void _event_add_timer_tv(const struct xref_eventsched *xref,
1112                          struct event_loop *m, void (*func)(struct event *),
1113                          void *arg, struct timeval *tv, struct event **t_ptr)
1114 {
1115         _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1116 }
1117
1118 /* Add simple event thread. */
1119 void _event_add_event(const struct xref_eventsched *xref, struct event_loop *m,
1120                       void (*func)(struct event *), void *arg, int val,
1121                       struct event **t_ptr)
1122 {
1123         struct event *thread = NULL;
1124
1125         frrtrace(9, frr_libfrr, schedule_event, m,
1126                  xref->funcname, xref->xref.file, xref->xref.line,
1127                  t_ptr, 0, val, arg, 0);
1128
1129         assert(m != NULL);
1130
1131         frr_with_mutex (&m->mtx) {
1132                 if (t_ptr && *t_ptr)
1133                         /* thread is already scheduled; don't reschedule */
1134                         break;
1135
1136                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
1137                 frr_with_mutex (&thread->mtx) {
1138                         thread->u.val = val;
1139                         event_list_add_tail(&m->event, thread);
1140                 }
1141
1142                 if (t_ptr) {
1143                         *t_ptr = thread;
1144                         thread->ref = t_ptr;
1145                 }
1146
1147                 AWAKEN(m);
1148         }
1149 }
1150
1151 /* Thread cancellation ------------------------------------------------------ */
1152
1153 /**
1154  * NOT's out the .events field of pollfd corresponding to the given file
1155  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1156  *
1157  * This needs to happen for both copies of pollfd's. See 'event_fetch'
1158  * implementation for details.
1159  *
1160  * @param master
1161  * @param fd
1162  * @param state the event to cancel. One or more (OR'd together) of the
1163  * following:
1164  *   - POLLIN
1165  *   - POLLOUT
1166  */
1167 static void event_cancel_rw(struct event_loop *master, int fd, short state,
1168                             int idx_hint)
1169 {
1170         bool found = false;
1171
1172         /* find the index of corresponding pollfd */
1173         nfds_t i;
1174
1175         /* Cancel POLLHUP too just in case some bozo set it */
1176         state |= POLLHUP;
1177
1178         /* Some callers know the index of the pfd already */
1179         if (idx_hint >= 0) {
1180                 i = idx_hint;
1181                 found = true;
1182         } else {
1183                 /* Have to look for the fd in the pfd array */
1184                 for (i = 0; i < master->handler.pfdcount; i++)
1185                         if (master->handler.pfds[i].fd == fd) {
1186                                 found = true;
1187                                 break;
1188                         }
1189         }
1190
1191         if (!found) {
1192                 zlog_debug(
1193                         "[!] Received cancellation request for nonexistent rw job");
1194                 zlog_debug("[!] threadmaster: %s | fd: %d",
1195                            master->name ? master->name : "", fd);
1196                 return;
1197         }
1198
1199         /* NOT out event. */
1200         master->handler.pfds[i].events &= ~(state);
1201
1202         /* If all events are canceled, delete / resize the pollfd array. */
1203         if (master->handler.pfds[i].events == 0) {
1204                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1205                         (master->handler.pfdcount - i - 1)
1206                                 * sizeof(struct pollfd));
1207                 master->handler.pfdcount--;
1208                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1209                 master->handler.pfds[master->handler.pfdcount].events = 0;
1210         }
1211
1212         /* If we have the same pollfd in the copy, perform the same operations,
1213          * otherwise return. */
1214         if (i >= master->handler.copycount)
1215                 return;
1216
1217         master->handler.copy[i].events &= ~(state);
1218
1219         if (master->handler.copy[i].events == 0) {
1220                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1221                         (master->handler.copycount - i - 1)
1222                                 * sizeof(struct pollfd));
1223                 master->handler.copycount--;
1224                 master->handler.copy[master->handler.copycount].fd = 0;
1225                 master->handler.copy[master->handler.copycount].events = 0;
1226         }
1227 }
1228
1229 /*
1230  * Process task cancellation given a task argument: iterate through the
1231  * various lists of tasks, looking for any that match the argument.
1232  */
1233 static void cancel_arg_helper(struct event_loop *master,
1234                               const struct cancel_req *cr)
1235 {
1236         struct event *t;
1237         nfds_t i;
1238         int fd;
1239         struct pollfd *pfd;
1240
1241         /* We're only processing arg-based cancellations here. */
1242         if (cr->eventobj == NULL)
1243                 return;
1244
1245         /* First process the ready lists. */
1246         frr_each_safe (event_list, &master->event, t) {
1247                 if (t->arg != cr->eventobj)
1248                         continue;
1249                 event_list_del(&master->event, t);
1250                 if (t->ref)
1251                         *t->ref = NULL;
1252                 thread_add_unuse(master, t);
1253         }
1254
1255         frr_each_safe (event_list, &master->ready, t) {
1256                 if (t->arg != cr->eventobj)
1257                         continue;
1258                 event_list_del(&master->ready, t);
1259                 if (t->ref)
1260                         *t->ref = NULL;
1261                 thread_add_unuse(master, t);
1262         }
1263
1264         /* If requested, stop here and ignore io and timers */
1265         if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
1266                 return;
1267
1268         /* Check the io tasks */
1269         for (i = 0; i < master->handler.pfdcount;) {
1270                 pfd = master->handler.pfds + i;
1271
1272                 if (pfd->events & POLLIN)
1273                         t = master->read[pfd->fd];
1274                 else
1275                         t = master->write[pfd->fd];
1276
1277                 if (t && t->arg == cr->eventobj) {
1278                         fd = pfd->fd;
1279
1280                         /* Found a match to cancel: clean up fd arrays */
1281                         event_cancel_rw(master, pfd->fd, pfd->events, i);
1282
1283                         /* Clean up thread arrays */
1284                         master->read[fd] = NULL;
1285                         master->write[fd] = NULL;
1286
1287                         /* Clear caller's ref */
1288                         if (t->ref)
1289                                 *t->ref = NULL;
1290
1291                         thread_add_unuse(master, t);
1292
1293                         /* Don't increment 'i' since the cancellation will have
1294                          * removed the entry from the pfd array
1295                          */
1296                 } else
1297                         i++;
1298         }
1299
1300         /* Check the timer tasks */
1301         t = event_timer_list_first(&master->timer);
1302         while (t) {
1303                 struct event *t_next;
1304
1305                 t_next = event_timer_list_next(&master->timer, t);
1306
1307                 if (t->arg == cr->eventobj) {
1308                         event_timer_list_del(&master->timer, t);
1309                         if (t->ref)
1310                                 *t->ref = NULL;
1311                         thread_add_unuse(master, t);
1312                 }
1313
1314                 t = t_next;
1315         }
1316 }
1317
1318 /**
1319  * Process cancellation requests.
1320  *
1321  * This may only be run from the pthread which owns the event_master.
1322  *
1323  * @param master the thread master to process
1324  * @REQUIRE master->mtx
1325  */
1326 static void do_event_cancel(struct event_loop *master)
1327 {
1328         struct event_list_head *list = NULL;
1329         struct event **thread_array = NULL;
1330         struct event *thread;
1331         struct cancel_req *cr;
1332         struct listnode *ln;
1333
1334         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1335                 /*
1336                  * If this is an event object cancellation, search
1337                  * through task lists deleting any tasks which have the
1338                  * specified argument - use this handy helper function.
1339                  */
1340                 if (cr->eventobj) {
1341                         cancel_arg_helper(master, cr);
1342                         continue;
1343                 }
1344
1345                 /*
1346                  * The pointer varies depending on whether the cancellation
1347                  * request was made asynchronously or not. If it was, we
1348                  * need to check whether the thread even exists anymore
1349                  * before cancelling it.
1350                  */
1351                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1352
1353                 if (!thread)
1354                         continue;
1355
1356                 list = NULL;
1357                 thread_array = NULL;
1358
1359                 /* Determine the appropriate queue to cancel the thread from */
1360                 switch (thread->type) {
1361                 case EVENT_READ:
1362                         event_cancel_rw(master, thread->u.fd, POLLIN, -1);
1363                         thread_array = master->read;
1364                         break;
1365                 case EVENT_WRITE:
1366                         event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1367                         thread_array = master->write;
1368                         break;
1369                 case EVENT_TIMER:
1370                         event_timer_list_del(&master->timer, thread);
1371                         break;
1372                 case EVENT_EVENT:
1373                         list = &master->event;
1374                         break;
1375                 case EVENT_READY:
1376                         list = &master->ready;
1377                         break;
1378                 case EVENT_UNUSED:
1379                 case EVENT_EXECUTE:
1380                         continue;
1381                         break;
1382                 }
1383
1384                 if (list) {
1385                         event_list_del(list, thread);
1386                 } else if (thread_array) {
1387                         thread_array[thread->u.fd] = NULL;
1388                 }
1389
1390                 if (thread->ref)
1391                         *thread->ref = NULL;
1392
1393                 thread_add_unuse(thread->master, thread);
1394         }
1395
1396         /* Delete and free all cancellation requests */
1397         if (master->cancel_req)
1398                 list_delete_all_node(master->cancel_req);
1399
1400         /* Wake up any threads which may be blocked in event_cancel_async() */
1401         master->canceled = true;
1402         pthread_cond_broadcast(&master->cancel_cond);
1403 }
1404
1405 /*
1406  * Helper function used for multiple flavors of arg-based cancellation.
1407  */
1408 static void cancel_event_helper(struct event_loop *m, void *arg, int flags)
1409 {
1410         struct cancel_req *cr;
1411
1412         assert(m->owner == pthread_self());
1413
1414         /* Only worth anything if caller supplies an arg. */
1415         if (arg == NULL)
1416                 return;
1417
1418         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1419
1420         cr->flags = flags;
1421
1422         frr_with_mutex (&m->mtx) {
1423                 cr->eventobj = arg;
1424                 listnode_add(m->cancel_req, cr);
1425                 do_event_cancel(m);
1426         }
1427 }
1428
1429 /**
1430  * Cancel any events which have the specified argument.
1431  *
1432  * MT-Unsafe
1433  *
1434  * @param m the event_master to cancel from
1435  * @param arg the argument passed when creating the event
1436  */
1437 void event_cancel_event(struct event_loop *master, void *arg)
1438 {
1439         cancel_event_helper(master, arg, 0);
1440 }
1441
1442 /*
1443  * Cancel ready tasks with an arg matching 'arg'
1444  *
1445  * MT-Unsafe
1446  *
1447  * @param m the event_master to cancel from
1448  * @param arg the argument passed when creating the event
1449  */
1450 void event_cancel_event_ready(struct event_loop *m, void *arg)
1451 {
1452
1453         /* Only cancel ready/event tasks */
1454         cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
1455 }
1456
1457 /**
1458  * Cancel a specific task.
1459  *
1460  * MT-Unsafe
1461  *
1462  * @param thread task to cancel
1463  */
1464 void event_cancel(struct event **thread)
1465 {
1466         struct event_loop *master;
1467
1468         if (thread == NULL || *thread == NULL)
1469                 return;
1470
1471         master = (*thread)->master;
1472
1473         frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1474                  (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1475                  (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1476                  (*thread)->u.sands.tv_sec);
1477
1478         assert(master->owner == pthread_self());
1479
1480         frr_with_mutex (&master->mtx) {
1481                 struct cancel_req *cr =
1482                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1483                 cr->thread = *thread;
1484                 listnode_add(master->cancel_req, cr);
1485                 do_event_cancel(master);
1486         }
1487
1488         *thread = NULL;
1489 }
1490
1491 /**
1492  * Asynchronous cancellation.
1493  *
1494  * Called with either a struct event ** or void * to an event argument,
1495  * this function posts the correct cancellation request and blocks until it is
1496  * serviced.
1497  *
1498  * If the thread is currently running, execution blocks until it completes.
1499  *
1500  * The last two parameters are mutually exclusive, i.e. if you pass one the
1501  * other must be NULL.
1502  *
1503  * When the cancellation procedure executes on the target event_master, the
1504  * thread * provided is checked for nullity. If it is null, the thread is
1505  * assumed to no longer exist and the cancellation request is a no-op. Thus
1506  * users of this API must pass a back-reference when scheduling the original
1507  * task.
1508  *
1509  * MT-Safe
1510  *
1511  * @param master the thread master with the relevant event / task
1512  * @param thread pointer to thread to cancel
1513  * @param eventobj the event
1514  */
1515 void event_cancel_async(struct event_loop *master, struct event **thread,
1516                         void *eventobj)
1517 {
1518         assert(!(thread && eventobj) && (thread || eventobj));
1519
1520         if (thread && *thread)
1521                 frrtrace(9, frr_libfrr, event_cancel_async, master,
1522                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1523                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1524                          (*thread)->u.val, (*thread)->arg,
1525                          (*thread)->u.sands.tv_sec);
1526         else
1527                 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
1528                          0, NULL, 0, 0, eventobj, 0);
1529
1530         assert(master->owner != pthread_self());
1531
1532         frr_with_mutex (&master->mtx) {
1533                 master->canceled = false;
1534
1535                 if (thread) {
1536                         struct cancel_req *cr =
1537                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1538                         cr->threadref = thread;
1539                         listnode_add(master->cancel_req, cr);
1540                 } else if (eventobj) {
1541                         struct cancel_req *cr =
1542                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1543                         cr->eventobj = eventobj;
1544                         listnode_add(master->cancel_req, cr);
1545                 }
1546                 AWAKEN(master);
1547
1548                 while (!master->canceled)
1549                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1550         }
1551
1552         if (thread)
1553                 *thread = NULL;
1554 }
1555 /* ------------------------------------------------------------------------- */
1556
1557 static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
1558                                          struct timeval *timer_val)
1559 {
1560         if (!event_timer_list_count(timers))
1561                 return NULL;
1562
1563         struct event *next_timer = event_timer_list_first(timers);
1564         monotime_until(&next_timer->u.sands, timer_val);
1565         return timer_val;
1566 }
1567
1568 static struct event *thread_run(struct event_loop *m, struct event *thread,
1569                                 struct event *fetch)
1570 {
1571         *fetch = *thread;
1572         thread_add_unuse(m, thread);
1573         return fetch;
1574 }
1575
1576 static int thread_process_io_helper(struct event_loop *m, struct event *thread,
1577                                     short state, short actual_state, int pos)
1578 {
1579         struct event **thread_array;
1580
1581         /*
1582          * poll() clears the .events field, but the pollfd array we
1583          * pass to poll() is a copy of the one used to schedule threads.
1584          * We need to synchronize state between the two here by applying
1585          * the same changes poll() made on the copy of the "real" pollfd
1586          * array.
1587          *
1588          * This cleans up a possible infinite loop where we refuse
1589          * to respond to a poll event but poll is insistent that
1590          * we should.
1591          */
1592         m->handler.pfds[pos].events &= ~(state);
1593
1594         if (!thread) {
1595                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1596                         flog_err(EC_LIB_NO_THREAD,
1597                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1598                                  m->handler.pfds[pos].fd, actual_state);
1599                 return 0;
1600         }
1601
1602         if (thread->type == EVENT_READ)
1603                 thread_array = m->read;
1604         else
1605                 thread_array = m->write;
1606
1607         thread_array[thread->u.fd] = NULL;
1608         event_list_add_tail(&m->ready, thread);
1609         thread->type = EVENT_READY;
1610
1611         return 1;
1612 }
1613
1614 /**
1615  * Process I/O events.
1616  *
1617  * Walks through file descriptor array looking for those pollfds whose .revents
1618  * field has something interesting. Deletes any invalid file descriptors.
1619  *
1620  * @param m the thread master
1621  * @param num the number of active file descriptors (return value of poll())
1622  */
1623 static void thread_process_io(struct event_loop *m, unsigned int num)
1624 {
1625         unsigned int ready = 0;
1626         struct pollfd *pfds = m->handler.copy;
1627
1628         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1629                 /* no event for current fd? immediately continue */
1630                 if (pfds[i].revents == 0)
1631                         continue;
1632
1633                 ready++;
1634
1635                 /*
1636                  * Unless someone has called event_cancel from another
1637                  * pthread, the only thing that could have changed in
1638                  * m->handler.pfds while we were asleep is the .events
1639                  * field in a given pollfd. Barring event_cancel() that
1640                  * value should be a superset of the values we have in our
1641                  * copy, so there's no need to update it. Similarily,
1642                  * barring deletion, the fd should still be a valid index
1643                  * into the master's pfds.
1644                  *
1645                  * We are including POLLERR here to do a READ event
1646                  * this is because the read should fail and the
1647                  * read function should handle it appropriately
1648                  */
1649                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1650                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1651                                                  pfds[i].revents, i);
1652                 }
1653                 if (pfds[i].revents & POLLOUT)
1654                         thread_process_io_helper(m, m->write[pfds[i].fd],
1655                                                  POLLOUT, pfds[i].revents, i);
1656
1657                 /* if one of our file descriptors is garbage, remove the same
1658                  * from
1659                  * both pfds + update sizes and index */
1660                 if (pfds[i].revents & POLLNVAL) {
1661                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1662                                 (m->handler.pfdcount - i - 1)
1663                                         * sizeof(struct pollfd));
1664                         m->handler.pfdcount--;
1665                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1666                         m->handler.pfds[m->handler.pfdcount].events = 0;
1667
1668                         memmove(pfds + i, pfds + i + 1,
1669                                 (m->handler.copycount - i - 1)
1670                                         * sizeof(struct pollfd));
1671                         m->handler.copycount--;
1672                         m->handler.copy[m->handler.copycount].fd = 0;
1673                         m->handler.copy[m->handler.copycount].events = 0;
1674
1675                         i--;
1676                 }
1677         }
1678 }
1679
1680 /* Add all timers that have popped to the ready list. */
1681 static unsigned int thread_process_timers(struct event_loop *m,
1682                                           struct timeval *timenow)
1683 {
1684         struct timeval prev = *timenow;
1685         bool displayed = false;
1686         struct event *thread;
1687         unsigned int ready = 0;
1688
1689         while ((thread = event_timer_list_first(&m->timer))) {
1690                 if (timercmp(timenow, &thread->u.sands, <))
1691                         break;
1692                 prev = thread->u.sands;
1693                 prev.tv_sec += 4;
1694                 /*
1695                  * If the timer would have popped 4 seconds in the
1696                  * past then we are in a situation where we are
1697                  * really getting behind on handling of events.
1698                  * Let's log it and do the right thing with it.
1699                  */
1700                 if (timercmp(timenow, &prev, >)) {
1701                         atomic_fetch_add_explicit(
1702                                 &thread->hist->total_starv_warn, 1,
1703                                 memory_order_seq_cst);
1704                         if (!displayed && !thread->ignore_timer_late) {
1705                                 flog_warn(
1706                                         EC_LIB_STARVE_THREAD,
1707                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1708                                         thread);
1709                                 displayed = true;
1710                         }
1711                 }
1712
1713                 event_timer_list_pop(&m->timer);
1714                 thread->type = EVENT_READY;
1715                 event_list_add_tail(&m->ready, thread);
1716                 ready++;
1717         }
1718
1719         return ready;
1720 }
1721
1722 /* process a list en masse, e.g. for event thread lists */
1723 static unsigned int thread_process(struct event_list_head *list)
1724 {
1725         struct event *thread;
1726         unsigned int ready = 0;
1727
1728         while ((thread = event_list_pop(list))) {
1729                 thread->type = EVENT_READY;
1730                 event_list_add_tail(&thread->master->ready, thread);
1731                 ready++;
1732         }
1733         return ready;
1734 }
1735
1736
1737 /* Fetch next ready thread. */
1738 struct event *event_fetch(struct event_loop *m, struct event *fetch)
1739 {
1740         struct event *thread = NULL;
1741         struct timeval now;
1742         struct timeval zerotime = {0, 0};
1743         struct timeval tv;
1744         struct timeval *tw = NULL;
1745         bool eintr_p = false;
1746         int num = 0;
1747
1748         do {
1749                 /* Handle signals if any */
1750                 if (m->handle_signals)
1751                         frr_sigevent_process();
1752
1753                 pthread_mutex_lock(&m->mtx);
1754
1755                 /* Process any pending cancellation requests */
1756                 do_event_cancel(m);
1757
1758                 /*
1759                  * Attempt to flush ready queue before going into poll().
1760                  * This is performance-critical. Think twice before modifying.
1761                  */
1762                 if ((thread = event_list_pop(&m->ready))) {
1763                         fetch = thread_run(m, thread, fetch);
1764                         if (fetch->ref)
1765                                 *fetch->ref = NULL;
1766                         pthread_mutex_unlock(&m->mtx);
1767                         if (!m->ready_run_loop)
1768                                 GETRUSAGE(&m->last_getrusage);
1769                         m->ready_run_loop = true;
1770                         break;
1771                 }
1772
1773                 m->ready_run_loop = false;
1774                 /* otherwise, tick through scheduling sequence */
1775
1776                 /*
1777                  * Post events to ready queue. This must come before the
1778                  * following block since events should occur immediately
1779                  */
1780                 thread_process(&m->event);
1781
1782                 /*
1783                  * If there are no tasks on the ready queue, we will poll()
1784                  * until a timer expires or we receive I/O, whichever comes
1785                  * first. The strategy for doing this is:
1786                  *
1787                  * - If there are events pending, set the poll() timeout to zero
1788                  * - If there are no events pending, but there are timers
1789                  * pending, set the timeout to the smallest remaining time on
1790                  * any timer.
1791                  * - If there are neither timers nor events pending, but there
1792                  * are file descriptors pending, block indefinitely in poll()
1793                  * - If nothing is pending, it's time for the application to die
1794                  *
1795                  * In every case except the last, we need to hit poll() at least
1796                  * once per loop to avoid starvation by events
1797                  */
1798                 if (!event_list_count(&m->ready))
1799                         tw = thread_timer_wait(&m->timer, &tv);
1800
1801                 if (event_list_count(&m->ready) ||
1802                     (tw && !timercmp(tw, &zerotime, >)))
1803                         tw = &zerotime;
1804
1805                 if (!tw && m->handler.pfdcount == 0) { /* die */
1806                         pthread_mutex_unlock(&m->mtx);
1807                         fetch = NULL;
1808                         break;
1809                 }
1810
1811                 /*
1812                  * Copy pollfd array + # active pollfds in it. Not necessary to
1813                  * copy the array size as this is fixed.
1814                  */
1815                 m->handler.copycount = m->handler.pfdcount;
1816                 memcpy(m->handler.copy, m->handler.pfds,
1817                        m->handler.copycount * sizeof(struct pollfd));
1818
1819                 pthread_mutex_unlock(&m->mtx);
1820                 {
1821                         eintr_p = false;
1822                         num = fd_poll(m, tw, &eintr_p);
1823                 }
1824                 pthread_mutex_lock(&m->mtx);
1825
1826                 /* Handle any errors received in poll() */
1827                 if (num < 0) {
1828                         if (eintr_p) {
1829                                 pthread_mutex_unlock(&m->mtx);
1830                                 /* loop around to signal handler */
1831                                 continue;
1832                         }
1833
1834                         /* else die */
1835                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1836                                  safe_strerror(errno));
1837                         pthread_mutex_unlock(&m->mtx);
1838                         fetch = NULL;
1839                         break;
1840                 }
1841
1842                 /* Post timers to ready queue. */
1843                 monotime(&now);
1844                 thread_process_timers(m, &now);
1845
1846                 /* Post I/O to ready queue. */
1847                 if (num > 0)
1848                         thread_process_io(m, num);
1849
1850                 pthread_mutex_unlock(&m->mtx);
1851
1852         } while (!thread && m->spin);
1853
1854         return fetch;
1855 }
1856
1857 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1858 {
1859         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1860                 + (a.tv_usec - b.tv_usec));
1861 }
1862
1863 unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1864                                   unsigned long *cputime)
1865 {
1866 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1867
1868 #ifdef __FreeBSD__
1869         /*
1870          * FreeBSD appears to have an issue when calling clock_gettime
1871          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1872          * occassionally the now time will be before the start time.
1873          * This is not good and FRR is ending up with CPU HOG's
1874          * when the subtraction wraps to very large numbers
1875          *
1876          * What we are going to do here is cheat a little bit
1877          * and notice that this is a problem and just correct
1878          * it so that it is impossible to happen
1879          */
1880         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1881             start->cpu.tv_nsec > now->cpu.tv_nsec)
1882                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1883         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1884                 now->cpu.tv_sec = start->cpu.tv_sec;
1885                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1886         }
1887 #endif
1888         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1889                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1890 #else
1891         /* This is 'user + sys' time.  */
1892         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1893                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1894 #endif
1895         return timeval_elapsed(now->real, start->real);
1896 }
1897
1898 /* We should aim to yield after yield milliseconds, which defaults
1899    to EVENT_YIELD_TIME_SLOT .
1900    Note: we are using real (wall clock) time for this calculation.
1901    It could be argued that CPU time may make more sense in certain
1902    contexts.  The things to consider are whether the thread may have
1903    blocked (in which case wall time increases, but CPU time does not),
1904    or whether the system is heavily loaded with other processes competing
1905    for CPU time.  On balance, wall clock time seems to make sense.
1906    Plus it has the added benefit that gettimeofday should be faster
1907    than calling getrusage. */
1908 int event_should_yield(struct event *thread)
1909 {
1910         int result;
1911         frr_with_mutex (&thread->mtx) {
1912                 result = monotime_since(&thread->real, NULL)
1913                          > (int64_t)thread->yield;
1914         }
1915         return result;
1916 }
1917
1918 void event_set_yield_time(struct event *thread, unsigned long yield_time)
1919 {
1920         frr_with_mutex (&thread->mtx) {
1921                 thread->yield = yield_time;
1922         }
1923 }
1924
1925 void event_getrusage(RUSAGE_T *r)
1926 {
1927         monotime(&r->real);
1928         if (!cputime_enabled) {
1929                 memset(&r->cpu, 0, sizeof(r->cpu));
1930                 return;
1931         }
1932
1933 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1934         /* not currently implemented in Linux's vDSO, but maybe at some point
1935          * in the future?
1936          */
1937         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1938 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1939 #if defined RUSAGE_THREAD
1940 #define FRR_RUSAGE RUSAGE_THREAD
1941 #else
1942 #define FRR_RUSAGE RUSAGE_SELF
1943 #endif
1944         getrusage(FRR_RUSAGE, &(r->cpu));
1945 #endif
1946 }
1947
1948 /*
1949  * Call a thread.
1950  *
1951  * This function will atomically update the thread's usage history. At present
1952  * this is the only spot where usage history is written. Nevertheless the code
1953  * has been written such that the introduction of writers in the future should
1954  * not need to update it provided the writers atomically perform only the
1955  * operations done here, i.e. updating the total and maximum times. In
1956  * particular, the maximum real and cpu times must be monotonically increasing
1957  * or this code is not correct.
1958  */
1959 void event_call(struct event *thread)
1960 {
1961         RUSAGE_T before, after;
1962
1963         /* if the thread being called is the CLI, it may change cputime_enabled
1964          * ("service cputime-stats" command), which can result in nonsensical
1965          * and very confusing warnings
1966          */
1967         bool cputime_enabled_here = cputime_enabled;
1968
1969         if (thread->master->ready_run_loop)
1970                 before = thread->master->last_getrusage;
1971         else
1972                 GETRUSAGE(&before);
1973
1974         thread->real = before.real;
1975
1976         frrtrace(9, frr_libfrr, event_call, thread->master,
1977                  thread->xref->funcname, thread->xref->xref.file,
1978                  thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1979                  thread->arg, thread->u.sands.tv_sec);
1980
1981         pthread_setspecific(thread_current, thread);
1982         (*thread->func)(thread);
1983         pthread_setspecific(thread_current, NULL);
1984
1985         GETRUSAGE(&after);
1986         thread->master->last_getrusage = after;
1987
1988         unsigned long walltime, cputime;
1989         unsigned long exp;
1990
1991         walltime = event_consumed_time(&after, &before, &cputime);
1992
1993         /* update walltime */
1994         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1995                                   memory_order_seq_cst);
1996         exp = atomic_load_explicit(&thread->hist->real.max,
1997                                    memory_order_seq_cst);
1998         while (exp < walltime
1999                && !atomic_compare_exchange_weak_explicit(
2000                        &thread->hist->real.max, &exp, walltime,
2001                        memory_order_seq_cst, memory_order_seq_cst))
2002                 ;
2003
2004         if (cputime_enabled_here && cputime_enabled) {
2005                 /* update cputime */
2006                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2007                                           memory_order_seq_cst);
2008                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2009                                            memory_order_seq_cst);
2010                 while (exp < cputime
2011                        && !atomic_compare_exchange_weak_explicit(
2012                                &thread->hist->cpu.max, &exp, cputime,
2013                                memory_order_seq_cst, memory_order_seq_cst))
2014                         ;
2015         }
2016
2017         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2018                                   memory_order_seq_cst);
2019         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2020                                  memory_order_seq_cst);
2021
2022         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2023             && cputime > cputime_threshold) {
2024                 /*
2025                  * We have a CPU Hog on our hands.  The time FRR has spent
2026                  * doing actual work (not sleeping) is greater than 5 seconds.
2027                  * Whinge about it now, so we're aware this is yet another task
2028                  * to fix.
2029                  */
2030                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2031                                           1, memory_order_seq_cst);
2032                 flog_warn(
2033                         EC_LIB_SLOW_THREAD_CPU,
2034                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2035                         thread->xref->funcname, (unsigned long)thread->func,
2036                         walltime / 1000, cputime / 1000);
2037
2038         } else if (walltime_threshold && walltime > walltime_threshold) {
2039                 /*
2040                  * The runtime for a task is greater than 5 seconds, but the
2041                  * cpu time is under 5 seconds.  Let's whine about this because
2042                  * this could imply some sort of scheduling issue.
2043                  */
2044                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2045                                           1, memory_order_seq_cst);
2046                 flog_warn(
2047                         EC_LIB_SLOW_THREAD_WALL,
2048                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2049                         thread->xref->funcname, (unsigned long)thread->func,
2050                         walltime / 1000, cputime / 1000);
2051         }
2052 }
2053
2054 /* Execute thread */
2055 void _event_execute(const struct xref_eventsched *xref, struct event_loop *m,
2056                     void (*func)(struct event *), void *arg, int val)
2057 {
2058         struct event *thread;
2059
2060         /* Get or allocate new thread to execute. */
2061         frr_with_mutex (&m->mtx) {
2062                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
2063
2064                 /* Set its event value. */
2065                 frr_with_mutex (&thread->mtx) {
2066                         thread->add_type = EVENT_EXECUTE;
2067                         thread->u.val = val;
2068                         thread->ref = &thread;
2069                 }
2070         }
2071
2072         /* Execute thread doing all accounting. */
2073         event_call(thread);
2074
2075         /* Give back or free thread. */
2076         thread_add_unuse(m, thread);
2077 }
2078
2079 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2080 void debug_signals(const sigset_t *sigs)
2081 {
2082         int i, found;
2083         sigset_t tmpsigs;
2084         char buf[300];
2085
2086         /*
2087          * We're only looking at the non-realtime signals here, so we need
2088          * some limit value. Platform differences mean at some point we just
2089          * need to pick a reasonable value.
2090          */
2091 #if defined SIGRTMIN
2092 #  define LAST_SIGNAL SIGRTMIN
2093 #else
2094 #  define LAST_SIGNAL 32
2095 #endif
2096
2097
2098         if (sigs == NULL) {
2099                 sigemptyset(&tmpsigs);
2100                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2101                 sigs = &tmpsigs;
2102         }
2103
2104         found = 0;
2105         buf[0] = '\0';
2106
2107         for (i = 0; i < LAST_SIGNAL; i++) {
2108                 char tmp[20];
2109
2110                 if (sigismember(sigs, i) > 0) {
2111                         if (found > 0)
2112                                 strlcat(buf, ",", sizeof(buf));
2113                         snprintf(tmp, sizeof(tmp), "%d", i);
2114                         strlcat(buf, tmp, sizeof(buf));
2115                         found++;
2116                 }
2117         }
2118
2119         if (found == 0)
2120                 snprintf(buf, sizeof(buf), "<none>");
2121
2122         zlog_debug("%s: %s", __func__, buf);
2123 }
2124
2125 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2126                                    const struct event *thread)
2127 {
2128         static const char *const types[] = {
2129                 [EVENT_READ] = "read",    [EVENT_WRITE] = "write",
2130                 [EVENT_TIMER] = "timer",  [EVENT_EVENT] = "event",
2131                 [EVENT_READY] = "ready",  [EVENT_UNUSED] = "unused",
2132                 [EVENT_EXECUTE] = "exec",
2133         };
2134         ssize_t rv = 0;
2135         char info[16] = "";
2136
2137         if (!thread)
2138                 return bputs(buf, "{(thread *)NULL}");
2139
2140         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2141
2142         if (thread->type < array_size(types) && types[thread->type])
2143                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2144         else
2145                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2146
2147         switch (thread->type) {
2148         case EVENT_READ:
2149         case EVENT_WRITE:
2150                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2151                 break;
2152
2153         case EVENT_TIMER:
2154                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2155                 break;
2156         case EVENT_READY:
2157         case EVENT_EVENT:
2158         case EVENT_UNUSED:
2159         case EVENT_EXECUTE:
2160                 break;
2161         }
2162
2163         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2164                         thread->xref->funcname, thread->xref->dest,
2165                         thread->xref->xref.file, thread->xref->xref.line);
2166         return rv;
2167 }
2168
2169 printfrr_ext_autoreg_p("TH", printfrr_thread);
2170 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2171                                const void *ptr)
2172 {
2173         const struct event *thread = ptr;
2174         struct timespec remain = {};
2175
2176         if (ea->fmt[0] == 'D') {
2177                 ea->fmt++;
2178                 return printfrr_thread_dbg(buf, ea, thread);
2179         }
2180
2181         if (!thread) {
2182                 /* need to jump over time formatting flag characters in the
2183                  * input format string, i.e. adjust ea->fmt!
2184                  */
2185                 printfrr_time(buf, ea, &remain,
2186                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2187                 return bputch(buf, '-');
2188         }
2189
2190         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2191         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2192 }