lib/event.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Thread management routine
   3  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   4  */
   5
   6 /* #define DEBUG */
   7
   8 #include <zebra.h>
   9 #include <sys/resource.h>
  10
  11 #include "frrevent.h"
  12 #include "memory.h"
  13 #include "frrcu.h"
  14 #include "log.h"
  15 #include "hash.h"
  16 #include "command.h"
  17 #include "sigevent.h"
  18 #include "network.h"
  19 #include "jhash.h"
  20 #include "frratomic.h"
  21 #include "frr_pthread.h"
  22 #include "lib_errors.h"
  23 #include "libfrr_trace.h"
  24 #include "libfrr.h"
  25
  26 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  27 DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
  28 DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
  29 DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
  30
  31 DECLARE_LIST(event_list, struct event, eventitem);
  32
  33 struct cancel_req {
  34         int flags;
  35         struct event *thread;
  36         void *eventobj;
  37         struct event **threadref;
  38 };
  39
  40 /* Flags for task cancellation */
  41 #define EVENT_CANCEL_FLAG_READY 0x01
  42
  43 static int event_timer_cmp(const struct event *a, const struct event *b)
  44 {
  45         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  46                 return -1;
  47         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  48                 return 1;
  49         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  50                 return -1;
  51         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  52                 return 1;
  53         return 0;
  54 }
  55
  56 DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
  57
  58 #if defined(__APPLE__)
  59 #include <mach/mach.h>
  60 #include <mach/mach_time.h>
  61 #endif
  62
  63 #define AWAKEN(m)                                                              \
  64         do {                                                                   \
  65                 const unsigned char wakebyte = 0x01;                           \
  66                 write(m->io_pipe[1], &wakebyte, 1);                            \
  67         } while (0)
  68
  69 /* control variable for initializer */
  70 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  71 pthread_key_t thread_current;
  72
  73 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  74 static struct list *masters;
  75
  76 static void thread_free(struct event_loop *master, struct event *thread);
  77
  78 #ifndef EXCLUDE_CPU_TIME
  79 #define EXCLUDE_CPU_TIME 0
  80 #endif
  81 #ifndef CONSUMED_TIME_CHECK
  82 #define CONSUMED_TIME_CHECK 0
  83 #endif
  84
  85 bool cputime_enabled = !EXCLUDE_CPU_TIME;
  86 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
  87 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
  88
  89 /* CLI start ---------------------------------------------------------------- */
  90 #include "lib/event_clippy.c"
  91
  92 static unsigned int cpu_record_hash_key(const struct cpu_event_history *a)
  93 {
  94         int size = sizeof(a->func);
  95
  96         return jhash(&a->func, size, 0);
  97 }
  98
  99 static bool cpu_record_hash_cmp(const struct cpu_event_history *a,
 100                                 const struct cpu_event_history *b)
 101 {
 102         return a->func == b->func;
 103 }
 104
 105 static void *cpu_record_hash_alloc(struct cpu_event_history *a)
 106 {
 107         struct cpu_event_history *new;
 108
 109         new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_event_history));
 110         new->func = a->func;
 111         new->funcname = a->funcname;
 112         return new;
 113 }
 114
 115 static void cpu_record_hash_free(void *a)
 116 {
 117         struct cpu_event_history *hist = a;
 118
 119         XFREE(MTYPE_EVENT_STATS, hist);
 120 }
 121
 122 static void vty_out_cpu_event_history(struct vty *vty,
 123                                       struct cpu_event_history *a)
 124 {
 125         vty_out(vty,
 126                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 127                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 128                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 129                 (a->real.total / a->total_calls), a->real.max,
 130                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 131         vty_out(vty, "  %c%c%c%c%c  %s\n",
 132                 a->types & (1 << EVENT_READ) ? 'R' : ' ',
 133                 a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
 134                 a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
 135                 a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
 136                 a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
 137 }
 138
 139 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 140 {
 141         struct cpu_event_history *totals = args[0];
 142         struct cpu_event_history copy;
 143         struct vty *vty = args[1];
 144         uint8_t *filter = args[2];
 145
 146         struct cpu_event_history *a = bucket->data;
 147
 148         copy.total_active =
 149                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 150         copy.total_calls =
 151                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 152         copy.total_cpu_warn =
 153                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 154         copy.total_wall_warn =
 155                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 156         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 157                                                      memory_order_seq_cst);
 158         copy.cpu.total =
 159                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 160         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 161         copy.real.total =
 162                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 163         copy.real.max =
 164                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 165         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 166         copy.funcname = a->funcname;
 167
 168         if (!(copy.types & *filter))
 169                 return;
 170
 171         vty_out_cpu_event_history(vty, &copy);
 172         totals->total_active += copy.total_active;
 173         totals->total_calls += copy.total_calls;
 174         totals->total_cpu_warn += copy.total_cpu_warn;
 175         totals->total_wall_warn += copy.total_wall_warn;
 176         totals->total_starv_warn += copy.total_starv_warn;
 177         totals->real.total += copy.real.total;
 178         if (totals->real.max < copy.real.max)
 179                 totals->real.max = copy.real.max;
 180         totals->cpu.total += copy.cpu.total;
 181         if (totals->cpu.max < copy.cpu.max)
 182                 totals->cpu.max = copy.cpu.max;
 183 }
 184
 185 static void cpu_record_print(struct vty *vty, uint8_t filter)
 186 {
 187         struct cpu_event_history tmp;
 188         void *args[3] = {&tmp, vty, &filter};
 189         struct event_loop *m;
 190         struct listnode *ln;
 191
 192         if (!cputime_enabled)
 193                 vty_out(vty,
 194                         "\n"
 195                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 196                         "will be zero or may display data from when collection was enabled.  Use the\n"
 197                         "  \"service cputime-stats\"  command to start collecting data.\n"
 198                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 199
 200         memset(&tmp, 0, sizeof(tmp));
 201         tmp.funcname = "TOTAL";
 202         tmp.types = filter;
 203
 204         frr_with_mutex (&masters_mtx) {
 205                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 206                         const char *name = m->name ? m->name : "main";
 207                         char underline[strlen(name) + 1];
 208
 209                         memset(underline, '-', sizeof(underline));
 210                         underline[sizeof(underline) - 1] = '\0';
 211
 212                         vty_out(vty, "\n");
 213                         vty_out(vty, "Showing statistics for pthread %s\n",
 214                                 name);
 215                         vty_out(vty, "-------------------------------%s\n",
 216                                 underline);
 217                         vty_out(vty, "%30s %18s %18s\n", "",
 218                                 "CPU (user+system):", "Real (wall-clock):");
 219                         vty_out(vty,
 220                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 221                         vty_out(vty, " Avg uSec Max uSecs");
 222                         vty_out(vty,
 223                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 224
 225                         if (m->cpu_record->count)
 226                                 hash_iterate(
 227                                         m->cpu_record,
 228                                         (void (*)(struct hash_bucket *,
 229                                                   void *))cpu_record_hash_print,
 230                                         args);
 231                         else
 232                                 vty_out(vty, "No data to display yet.\n");
 233
 234                         vty_out(vty, "\n");
 235                 }
 236         }
 237
 238         vty_out(vty, "\n");
 239         vty_out(vty, "Total thread statistics\n");
 240         vty_out(vty, "-------------------------\n");
 241         vty_out(vty, "%30s %18s %18s\n", "",
 242                 "CPU (user+system):", "Real (wall-clock):");
 243         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 244         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 245         vty_out(vty, "  Type  Thread\n");
 246
 247         if (tmp.total_calls > 0)
 248                 vty_out_cpu_event_history(vty, &tmp);
 249 }
 250
 251 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 252 {
 253         uint8_t *filter = args[0];
 254         struct hash *cpu_record = args[1];
 255
 256         struct cpu_event_history *a = bucket->data;
 257
 258         if (!(a->types & *filter))
 259                 return;
 260
 261         hash_release(cpu_record, bucket->data);
 262 }
 263
 264 static void cpu_record_clear(uint8_t filter)
 265 {
 266         uint8_t *tmp = &filter;
 267         struct event_loop *m;
 268         struct listnode *ln;
 269
 270         frr_with_mutex (&masters_mtx) {
 271                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 272                         frr_with_mutex (&m->mtx) {
 273                                 void *args[2] = {tmp, m->cpu_record};
 274
 275                                 hash_iterate(
 276                                         m->cpu_record,
 277                                         (void (*)(struct hash_bucket *,
 278                                                   void *))cpu_record_hash_clear,
 279                                         args);
 280                         }
 281                 }
 282         }
 283 }
 284
 285 static uint8_t parse_filter(const char *filterstr)
 286 {
 287         int i = 0;
 288         int filter = 0;
 289
 290         while (filterstr[i] != '\0') {
 291                 switch (filterstr[i]) {
 292                 case 'r':
 293                 case 'R':
 294                         filter |= (1 << EVENT_READ);
 295                         break;
 296                 case 'w':
 297                 case 'W':
 298                         filter |= (1 << EVENT_WRITE);
 299                         break;
 300                 case 't':
 301                 case 'T':
 302                         filter |= (1 << EVENT_TIMER);
 303                         break;
 304                 case 'e':
 305                 case 'E':
 306                         filter |= (1 << EVENT_EVENT);
 307                         break;
 308                 case 'x':
 309                 case 'X':
 310                         filter |= (1 << EVENT_EXECUTE);
 311                         break;
 312                 default:
 313                         break;
 314                 }
 315                 ++i;
 316         }
 317         return filter;
 318 }
 319
 320 DEFUN_NOSH (show_thread_cpu,
 321             show_thread_cpu_cmd,
 322             "show thread cpu [FILTER]",
 323             SHOW_STR
 324             "Thread information\n"
 325             "Thread CPU usage\n"
 326             "Display filter (rwtex)\n")
 327 {
 328         uint8_t filter = (uint8_t)-1U;
 329         int idx = 0;
 330
 331         if (argv_find(argv, argc, "FILTER", &idx)) {
 332                 filter = parse_filter(argv[idx]->arg);
 333                 if (!filter) {
 334                         vty_out(vty,
 335                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 336                                 argv[idx]->arg);
 337                         return CMD_WARNING;
 338                 }
 339         }
 340
 341         cpu_record_print(vty, filter);
 342         return CMD_SUCCESS;
 343 }
 344
 345 DEFPY (service_cputime_stats,
 346        service_cputime_stats_cmd,
 347        "[no] service cputime-stats",
 348        NO_STR
 349        "Set up miscellaneous service\n"
 350        "Collect CPU usage statistics\n")
 351 {
 352         cputime_enabled = !no;
 353         return CMD_SUCCESS;
 354 }
 355
 356 DEFPY (service_cputime_warning,
 357        service_cputime_warning_cmd,
 358        "[no] service cputime-warning (1-4294967295)",
 359        NO_STR
 360        "Set up miscellaneous service\n"
 361        "Warn for tasks exceeding CPU usage threshold\n"
 362        "Warning threshold in milliseconds\n")
 363 {
 364         if (no)
 365                 cputime_threshold = 0;
 366         else
 367                 cputime_threshold = cputime_warning * 1000;
 368         return CMD_SUCCESS;
 369 }
 370
 371 ALIAS (service_cputime_warning,
 372        no_service_cputime_warning_cmd,
 373        "no service cputime-warning",
 374        NO_STR
 375        "Set up miscellaneous service\n"
 376        "Warn for tasks exceeding CPU usage threshold\n")
 377
 378 DEFPY (service_walltime_warning,
 379        service_walltime_warning_cmd,
 380        "[no] service walltime-warning (1-4294967295)",
 381        NO_STR
 382        "Set up miscellaneous service\n"
 383        "Warn for tasks exceeding total wallclock threshold\n"
 384        "Warning threshold in milliseconds\n")
 385 {
 386         if (no)
 387                 walltime_threshold = 0;
 388         else
 389                 walltime_threshold = walltime_warning * 1000;
 390         return CMD_SUCCESS;
 391 }
 392
 393 ALIAS (service_walltime_warning,
 394        no_service_walltime_warning_cmd,
 395        "no service walltime-warning",
 396        NO_STR
 397        "Set up miscellaneous service\n"
 398        "Warn for tasks exceeding total wallclock threshold\n")
 399
 400 static void show_thread_poll_helper(struct vty *vty, struct event_loop *m)
 401 {
 402         const char *name = m->name ? m->name : "main";
 403         char underline[strlen(name) + 1];
 404         struct event *thread;
 405         uint32_t i;
 406
 407         memset(underline, '-', sizeof(underline));
 408         underline[sizeof(underline) - 1] = '\0';
 409
 410         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 411         vty_out(vty, "----------------------%s\n", underline);
 412         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 413                 m->fd_limit);
 414         for (i = 0; i < m->handler.pfdcount; i++) {
 415                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 416                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 417                         m->handler.pfds[i].revents);
 418
 419                 if (m->handler.pfds[i].events & POLLIN) {
 420                         thread = m->read[m->handler.pfds[i].fd];
 421
 422                         if (!thread)
 423                                 vty_out(vty, "ERROR ");
 424                         else
 425                                 vty_out(vty, "%s ", thread->xref->funcname);
 426                 } else
 427                         vty_out(vty, " ");
 428
 429                 if (m->handler.pfds[i].events & POLLOUT) {
 430                         thread = m->write[m->handler.pfds[i].fd];
 431
 432                         if (!thread)
 433                                 vty_out(vty, "ERROR\n");
 434                         else
 435                                 vty_out(vty, "%s\n", thread->xref->funcname);
 436                 } else
 437                         vty_out(vty, "\n");
 438         }
 439 }
 440
 441 DEFUN_NOSH (show_thread_poll,
 442             show_thread_poll_cmd,
 443             "show thread poll",
 444             SHOW_STR
 445             "Thread information\n"
 446             "Show poll FD's and information\n")
 447 {
 448         struct listnode *node;
 449         struct event_loop *m;
 450
 451         frr_with_mutex (&masters_mtx) {
 452                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 453                         show_thread_poll_helper(vty, m);
 454         }
 455
 456         return CMD_SUCCESS;
 457 }
 458
 459
 460 DEFUN (clear_thread_cpu,
 461        clear_thread_cpu_cmd,
 462        "clear thread cpu [FILTER]",
 463        "Clear stored data in all pthreads\n"
 464        "Thread information\n"
 465        "Thread CPU usage\n"
 466        "Display filter (rwtexb)\n")
 467 {
 468         uint8_t filter = (uint8_t)-1U;
 469         int idx = 0;
 470
 471         if (argv_find(argv, argc, "FILTER", &idx)) {
 472                 filter = parse_filter(argv[idx]->arg);
 473                 if (!filter) {
 474                         vty_out(vty,
 475                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 476                                 argv[idx]->arg);
 477                         return CMD_WARNING;
 478                 }
 479         }
 480
 481         cpu_record_clear(filter);
 482         return CMD_SUCCESS;
 483 }
 484
 485 static void show_thread_timers_helper(struct vty *vty, struct event_loop *m)
 486 {
 487         const char *name = m->name ? m->name : "main";
 488         char underline[strlen(name) + 1];
 489         struct event *thread;
 490
 491         memset(underline, '-', sizeof(underline));
 492         underline[sizeof(underline) - 1] = '\0';
 493
 494         vty_out(vty, "\nShowing timers for %s\n", name);
 495         vty_out(vty, "-------------------%s\n", underline);
 496
 497         frr_each (event_timer_list, &m->timer, thread) {
 498                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 499         }
 500 }
 501
 502 DEFPY_NOSH (show_thread_timers,
 503             show_thread_timers_cmd,
 504             "show thread timers",
 505             SHOW_STR
 506             "Thread information\n"
 507             "Show all timers and how long they have in the system\n")
 508 {
 509         struct listnode *node;
 510         struct event_loop *m;
 511
 512         frr_with_mutex (&masters_mtx) {
 513                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 514                         show_thread_timers_helper(vty, m);
 515         }
 516
 517         return CMD_SUCCESS;
 518 }
 519
 520 void event_cmd_init(void)
 521 {
 522         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 523         install_element(VIEW_NODE, &show_thread_poll_cmd);
 524         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 525
 526         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 527         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 528         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 529         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 530         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 531
 532         install_element(VIEW_NODE, &show_thread_timers_cmd);
 533 }
 534 /* CLI end ------------------------------------------------------------------ */
 535
 536
 537 static void cancelreq_del(void *cr)
 538 {
 539         XFREE(MTYPE_TMP, cr);
 540 }
 541
 542 /* initializer, only ever called once */
 543 static void initializer(void)
 544 {
 545         pthread_key_create(&thread_current, NULL);
 546 }
 547
 548 struct event_loop *event_master_create(const char *name)
 549 {
 550         struct event_loop *rv;
 551         struct rlimit limit;
 552
 553         pthread_once(&init_once, &initializer);
 554
 555         rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_loop));
 556
 557         /* Initialize master mutex */
 558         pthread_mutex_init(&rv->mtx, NULL);
 559         pthread_cond_init(&rv->cancel_cond, NULL);
 560
 561         /* Set name */
 562         name = name ? name : "default";
 563         rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 564
 565         /* Initialize I/O task data structures */
 566
 567         /* Use configured limit if present, ulimit otherwise. */
 568         rv->fd_limit = frr_get_fd_limit();
 569         if (rv->fd_limit == 0) {
 570                 getrlimit(RLIMIT_NOFILE, &limit);
 571                 rv->fd_limit = (int)limit.rlim_cur;
 572         }
 573
 574         rv->read = XCALLOC(MTYPE_EVENT_POLL,
 575                            sizeof(struct event *) * rv->fd_limit);
 576
 577         rv->write = XCALLOC(MTYPE_EVENT_POLL,
 578                             sizeof(struct event *) * rv->fd_limit);
 579
 580         char tmhashname[strlen(name) + 32];
 581
 582         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 583                  name);
 584         rv->cpu_record = hash_create_size(
 585                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 586                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 587                 tmhashname);
 588
 589         event_list_init(&rv->event);
 590         event_list_init(&rv->ready);
 591         event_list_init(&rv->unuse);
 592         event_timer_list_init(&rv->timer);
 593
 594         /* Initialize event_fetch() settings */
 595         rv->spin = true;
 596         rv->handle_signals = true;
 597
 598         /* Set pthread owner, should be updated by actual owner */
 599         rv->owner = pthread_self();
 600         rv->cancel_req = list_new();
 601         rv->cancel_req->del = cancelreq_del;
 602         rv->canceled = true;
 603
 604         /* Initialize pipe poker */
 605         pipe(rv->io_pipe);
 606         set_nonblocking(rv->io_pipe[0]);
 607         set_nonblocking(rv->io_pipe[1]);
 608
 609         /* Initialize data structures for poll() */
 610         rv->handler.pfdsize = rv->fd_limit;
 611         rv->handler.pfdcount = 0;
 612         rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
 613                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 614         rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
 615                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 616
 617         /* add to list of threadmasters */
 618         frr_with_mutex (&masters_mtx) {
 619                 if (!masters)
 620                         masters = list_new();
 621
 622                 listnode_add(masters, rv);
 623         }
 624
 625         return rv;
 626 }
 627
 628 void event_master_set_name(struct event_loop *master, const char *name)
 629 {
 630         frr_with_mutex (&master->mtx) {
 631                 XFREE(MTYPE_EVENT_MASTER, master->name);
 632                 master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
 633         }
 634 }
 635
 636 #define EVENT_UNUSED_DEPTH 10
 637
 638 /* Move thread to unuse list. */
 639 static void thread_add_unuse(struct event_loop *m, struct event *thread)
 640 {
 641         pthread_mutex_t mtxc = thread->mtx;
 642
 643         assert(m != NULL && thread != NULL);
 644
 645         thread->hist->total_active--;
 646         memset(thread, 0, sizeof(struct event));
 647         thread->type = EVENT_UNUSED;
 648
 649         /* Restore the thread mutex context. */
 650         thread->mtx = mtxc;
 651
 652         if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
 653                 event_list_add_tail(&m->unuse, thread);
 654                 return;
 655         }
 656
 657         thread_free(m, thread);
 658 }
 659
 660 /* Free all unused thread. */
 661 static void thread_list_free(struct event_loop *m, struct event_list_head *list)
 662 {
 663         struct event *t;
 664
 665         while ((t = event_list_pop(list)))
 666                 thread_free(m, t);
 667 }
 668
 669 static void thread_array_free(struct event_loop *m, struct event **thread_array)
 670 {
 671         struct event *t;
 672         int index;
 673
 674         for (index = 0; index < m->fd_limit; ++index) {
 675                 t = thread_array[index];
 676                 if (t) {
 677                         thread_array[index] = NULL;
 678                         thread_free(m, t);
 679                 }
 680         }
 681         XFREE(MTYPE_EVENT_POLL, thread_array);
 682 }
 683
 684 /*
 685  * event_master_free_unused
 686  *
 687  * As threads are finished with they are put on the
 688  * unuse list for later reuse.
 689  * If we are shutting down, Free up unused threads
 690  * So we can see if we forget to shut anything off
 691  */
 692 void event_master_free_unused(struct event_loop *m)
 693 {
 694         frr_with_mutex (&m->mtx) {
 695                 struct event *t;
 696
 697                 while ((t = event_list_pop(&m->unuse)))
 698                         thread_free(m, t);
 699         }
 700 }
 701
 702 /* Stop thread scheduler. */
 703 void event_master_free(struct event_loop *m)
 704 {
 705         struct event *t;
 706
 707         frr_with_mutex (&masters_mtx) {
 708                 listnode_delete(masters, m);
 709                 if (masters->count == 0)
 710                         list_delete(&masters);
 711         }
 712
 713         thread_array_free(m, m->read);
 714         thread_array_free(m, m->write);
 715         while ((t = event_timer_list_pop(&m->timer)))
 716                 thread_free(m, t);
 717         thread_list_free(m, &m->event);
 718         thread_list_free(m, &m->ready);
 719         thread_list_free(m, &m->unuse);
 720         pthread_mutex_destroy(&m->mtx);
 721         pthread_cond_destroy(&m->cancel_cond);
 722         close(m->io_pipe[0]);
 723         close(m->io_pipe[1]);
 724         list_delete(&m->cancel_req);
 725         m->cancel_req = NULL;
 726
 727         hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
 728
 729         XFREE(MTYPE_EVENT_MASTER, m->name);
 730         XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
 731         XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
 732         XFREE(MTYPE_EVENT_MASTER, m);
 733 }
 734
 735 /* Return remain time in milliseconds. */
 736 unsigned long event_timer_remain_msec(struct event *thread)
 737 {
 738         int64_t remain;
 739
 740         if (!event_is_scheduled(thread))
 741                 return 0;
 742
 743         frr_with_mutex (&thread->mtx) {
 744                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 745         }
 746
 747         return remain < 0 ? 0 : remain;
 748 }
 749
 750 /* Return remain time in seconds. */
 751 unsigned long event_timer_remain_second(struct event *thread)
 752 {
 753         return event_timer_remain_msec(thread) / 1000LL;
 754 }
 755
 756 struct timeval event_timer_remain(struct event *thread)
 757 {
 758         struct timeval remain;
 759
 760         frr_with_mutex (&thread->mtx) {
 761                 monotime_until(&thread->u.sands, &remain);
 762         }
 763         return remain;
 764 }
 765
 766 static int time_hhmmss(char *buf, int buf_size, long sec)
 767 {
 768         long hh;
 769         long mm;
 770         int wr;
 771
 772         assert(buf_size >= 8);
 773
 774         hh = sec / 3600;
 775         sec %= 3600;
 776         mm = sec / 60;
 777         sec %= 60;
 778
 779         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 780
 781         return wr != 8;
 782 }
 783
 784 char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
 785 {
 786         if (t_timer)
 787                 time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
 788         else
 789                 snprintf(buf, buf_size, "--:--:--");
 790
 791         return buf;
 792 }
 793
 794 /* Get new thread.  */
 795 static struct event *thread_get(struct event_loop *m, uint8_t type,
 796                                 void (*func)(struct event *), void *arg,
 797                                 const struct xref_eventsched *xref)
 798 {
 799         struct event *thread = event_list_pop(&m->unuse);
 800         struct cpu_event_history tmp;
 801
 802         if (!thread) {
 803                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
 804                 /* mutex only needs to be initialized at struct creation. */
 805                 pthread_mutex_init(&thread->mtx, NULL);
 806                 m->alloc++;
 807         }
 808
 809         thread->type = type;
 810         thread->add_type = type;
 811         thread->master = m;
 812         thread->arg = arg;
 813         thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
 814         thread->ref = NULL;
 815         thread->ignore_timer_late = false;
 816
 817         /*
 818          * So if the passed in funcname is not what we have
 819          * stored that means the thread->hist needs to be
 820          * updated.  We keep the last one around in unused
 821          * under the assumption that we are probably
 822          * going to immediately allocate the same
 823          * type of thread.
 824          * This hopefully saves us some serious
 825          * hash_get lookups.
 826          */
 827         if ((thread->xref && thread->xref->funcname != xref->funcname)
 828             || thread->func != func) {
 829                 tmp.func = func;
 830                 tmp.funcname = xref->funcname;
 831                 thread->hist =
 832                         hash_get(m->cpu_record, &tmp,
 833                                  (void *(*)(void *))cpu_record_hash_alloc);
 834         }
 835         thread->hist->total_active++;
 836         thread->func = func;
 837         thread->xref = xref;
 838
 839         return thread;
 840 }
 841
 842 static void thread_free(struct event_loop *master, struct event *thread)
 843 {
 844         /* Update statistics. */
 845         assert(master->alloc > 0);
 846         master->alloc--;
 847
 848         /* Free allocated resources. */
 849         pthread_mutex_destroy(&thread->mtx);
 850         XFREE(MTYPE_THREAD, thread);
 851 }
 852
 853 static int fd_poll(struct event_loop *m, const struct timeval *timer_wait,
 854                    bool *eintr_p)
 855 {
 856         sigset_t origsigs;
 857         unsigned char trash[64];
 858         nfds_t count = m->handler.copycount;
 859
 860         /*
 861          * If timer_wait is null here, that means poll() should block
 862          * indefinitely, unless the event_master has overridden it by setting
 863          * ->selectpoll_timeout.
 864          *
 865          * If the value is positive, it specifies the maximum number of
 866          * milliseconds to wait. If the timeout is -1, it specifies that
 867          * we should never wait and always return immediately even if no
 868          * event is detected. If the value is zero, the behavior is default.
 869          */
 870         int timeout = -1;
 871
 872         /* number of file descriptors with events */
 873         int num;
 874
 875         if (timer_wait != NULL && m->selectpoll_timeout == 0) {
 876                 /* use the default value */
 877                 timeout = (timer_wait->tv_sec * 1000)
 878                           + (timer_wait->tv_usec / 1000);
 879         } else if (m->selectpoll_timeout > 0) {
 880                 /* use the user's timeout */
 881                 timeout = m->selectpoll_timeout;
 882         } else if (m->selectpoll_timeout < 0) {
 883                 /* effect a poll (return immediately) */
 884                 timeout = 0;
 885         }
 886
 887         zlog_tls_buffer_flush();
 888         rcu_read_unlock();
 889         rcu_assert_read_unlocked();
 890
 891         /* add poll pipe poker */
 892         assert(count + 1 < m->handler.pfdsize);
 893         m->handler.copy[count].fd = m->io_pipe[0];
 894         m->handler.copy[count].events = POLLIN;
 895         m->handler.copy[count].revents = 0x00;
 896
 897         /* We need to deal with a signal-handling race here: we
 898          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 899          * that may arrive just before we enter poll(). We will block the
 900          * key signals, then check whether any have arrived - if so, we return
 901          * before calling poll(). If not, we'll re-enable the signals
 902          * in the ppoll() call.
 903          */
 904
 905         sigemptyset(&origsigs);
 906         if (m->handle_signals) {
 907                 /* Main pthread that handles the app signals */
 908                 if (frr_sigevent_check(&origsigs)) {
 909                         /* Signal to process - restore signal mask and return */
 910                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 911                         num = -1;
 912                         *eintr_p = true;
 913                         goto done;
 914                 }
 915         } else {
 916                 /* Don't make any changes for the non-main pthreads */
 917                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 918         }
 919
 920 #if defined(HAVE_PPOLL)
 921         struct timespec ts, *tsp;
 922
 923         if (timeout >= 0) {
 924                 ts.tv_sec = timeout / 1000;
 925                 ts.tv_nsec = (timeout % 1000) * 1000000;
 926                 tsp = &ts;
 927         } else
 928                 tsp = NULL;
 929
 930         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 931         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 932 #else
 933         /* Not ideal - there is a race after we restore the signal mask */
 934         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 935         num = poll(m->handler.copy, count + 1, timeout);
 936 #endif
 937
 938 done:
 939
 940         if (num < 0 && errno == EINTR)
 941                 *eintr_p = true;
 942
 943         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 944                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 945                         ;
 946
 947         rcu_read_lock();
 948
 949         return num;
 950 }
 951
 952 /* Add new read thread. */
 953 void _event_add_read_write(const struct xref_eventsched *xref,
 954                            struct event_loop *m, void (*func)(struct event *),
 955                            void *arg, int fd, struct event **t_ptr)
 956 {
 957         int dir = xref->event_type;
 958         struct event *thread = NULL;
 959         struct event **thread_array;
 960
 961         if (dir == EVENT_READ)
 962                 frrtrace(9, frr_libfrr, schedule_read, m,
 963                          xref->funcname, xref->xref.file, xref->xref.line,
 964                          t_ptr, fd, 0, arg, 0);
 965         else
 966                 frrtrace(9, frr_libfrr, schedule_write, m,
 967                          xref->funcname, xref->xref.file, xref->xref.line,
 968                          t_ptr, fd, 0, arg, 0);
 969
 970         assert(fd >= 0);
 971         if (fd >= m->fd_limit)
 972                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 973
 974         frr_with_mutex (&m->mtx) {
 975                 /* Thread is already scheduled; don't reschedule */
 976                 if (t_ptr && *t_ptr)
 977                         break;
 978
 979                 /* default to a new pollfd */
 980                 nfds_t queuepos = m->handler.pfdcount;
 981
 982                 if (dir == EVENT_READ)
 983                         thread_array = m->read;
 984                 else
 985                         thread_array = m->write;
 986
 987                 /*
 988                  * if we already have a pollfd for our file descriptor, find and
 989                  * use it
 990                  */
 991                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 992                         if (m->handler.pfds[i].fd == fd) {
 993                                 queuepos = i;
 994
 995 #ifdef DEV_BUILD
 996                                 /*
 997                                  * What happens if we have a thread already
 998                                  * created for this event?
 999                                  */
1000                                 if (thread_array[fd])
1001                                         assert(!"Thread already scheduled for file descriptor");
1002 #endif
1003                                 break;
1004                         }
1005
1006                 /* make sure we have room for this fd + pipe poker fd */
1007                 assert(queuepos + 1 < m->handler.pfdsize);
1008
1009                 thread = thread_get(m, dir, func, arg, xref);
1010
1011                 m->handler.pfds[queuepos].fd = fd;
1012                 m->handler.pfds[queuepos].events |=
1013                         (dir == EVENT_READ ? POLLIN : POLLOUT);
1014
1015                 if (queuepos == m->handler.pfdcount)
1016                         m->handler.pfdcount++;
1017
1018                 if (thread) {
1019                         frr_with_mutex (&thread->mtx) {
1020                                 thread->u.fd = fd;
1021                                 thread_array[thread->u.fd] = thread;
1022                         }
1023
1024                         if (t_ptr) {
1025                                 *t_ptr = thread;
1026                                 thread->ref = t_ptr;
1027                         }
1028                 }
1029
1030                 AWAKEN(m);
1031         }
1032 }
1033
1034 static void _event_add_timer_timeval(const struct xref_eventsched *xref,
1035                                      struct event_loop *m,
1036                                      void (*func)(struct event *), void *arg,
1037                                      struct timeval *time_relative,
1038                                      struct event **t_ptr)
1039 {
1040         struct event *thread;
1041         struct timeval t;
1042
1043         assert(m != NULL);
1044
1045         assert(time_relative);
1046
1047         frrtrace(9, frr_libfrr, schedule_timer, m,
1048                  xref->funcname, xref->xref.file, xref->xref.line,
1049                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1050
1051         /* Compute expiration/deadline time. */
1052         monotime(&t);
1053         timeradd(&t, time_relative, &t);
1054
1055         frr_with_mutex (&m->mtx) {
1056                 if (t_ptr && *t_ptr)
1057                         /* thread is already scheduled; don't reschedule */
1058                         return;
1059
1060                 thread = thread_get(m, EVENT_TIMER, func, arg, xref);
1061
1062                 frr_with_mutex (&thread->mtx) {
1063                         thread->u.sands = t;
1064                         event_timer_list_add(&m->timer, thread);
1065                         if (t_ptr) {
1066                                 *t_ptr = thread;
1067                                 thread->ref = t_ptr;
1068                         }
1069                 }
1070
1071                 /* The timer list is sorted - if this new timer
1072                  * might change the time we'll wait for, give the pthread
1073                  * a chance to re-compute.
1074                  */
1075                 if (event_timer_list_first(&m->timer) == thread)
1076                         AWAKEN(m);
1077         }
1078 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1079         if (time_relative->tv_sec > ONEYEAR2SEC)
1080                 flog_err(
1081                         EC_LIB_TIMER_TOO_LONG,
1082                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1083                         thread);
1084 }
1085
1086
1087 /* Add timer event thread. */
1088 void _event_add_timer(const struct xref_eventsched *xref, struct event_loop *m,
1089                       void (*func)(struct event *), void *arg, long timer,
1090                       struct event **t_ptr)
1091 {
1092         struct timeval trel;
1093
1094         assert(m != NULL);
1095
1096         trel.tv_sec = timer;
1097         trel.tv_usec = 0;
1098
1099         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1100 }
1101
1102 /* Add timer event thread with "millisecond" resolution */
1103 void _event_add_timer_msec(const struct xref_eventsched *xref,
1104                            struct event_loop *m, void (*func)(struct event *),
1105                            void *arg, long timer, struct event **t_ptr)
1106 {
1107         struct timeval trel;
1108
1109         assert(m != NULL);
1110
1111         trel.tv_sec = timer / 1000;
1112         trel.tv_usec = 1000 * (timer % 1000);
1113
1114         _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1115 }
1116
1117 /* Add timer event thread with "timeval" resolution */
1118 void _event_add_timer_tv(const struct xref_eventsched *xref,
1119                          struct event_loop *m, void (*func)(struct event *),
1120                          void *arg, struct timeval *tv, struct event **t_ptr)
1121 {
1122         _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1123 }
1124
1125 /* Add simple event thread. */
1126 void _event_add_event(const struct xref_eventsched *xref, struct event_loop *m,
1127                       void (*func)(struct event *), void *arg, int val,
1128                       struct event **t_ptr)
1129 {
1130         struct event *thread = NULL;
1131
1132         frrtrace(9, frr_libfrr, schedule_event, m,
1133                  xref->funcname, xref->xref.file, xref->xref.line,
1134                  t_ptr, 0, val, arg, 0);
1135
1136         assert(m != NULL);
1137
1138         frr_with_mutex (&m->mtx) {
1139                 if (t_ptr && *t_ptr)
1140                         /* thread is already scheduled; don't reschedule */
1141                         break;
1142
1143                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
1144                 frr_with_mutex (&thread->mtx) {
1145                         thread->u.val = val;
1146                         event_list_add_tail(&m->event, thread);
1147                 }
1148
1149                 if (t_ptr) {
1150                         *t_ptr = thread;
1151                         thread->ref = t_ptr;
1152                 }
1153
1154                 AWAKEN(m);
1155         }
1156 }
1157
1158 /* Thread cancellation ------------------------------------------------------ */
1159
1160 /**
1161  * NOT's out the .events field of pollfd corresponding to the given file
1162  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1163  *
1164  * This needs to happen for both copies of pollfd's. See 'event_fetch'
1165  * implementation for details.
1166  *
1167  * @param master
1168  * @param fd
1169  * @param state the event to cancel. One or more (OR'd together) of the
1170  * following:
1171  *   - POLLIN
1172  *   - POLLOUT
1173  */
1174 static void event_cancel_rw(struct event_loop *master, int fd, short state,
1175                             int idx_hint)
1176 {
1177         bool found = false;
1178
1179         /* find the index of corresponding pollfd */
1180         nfds_t i;
1181
1182         /* Cancel POLLHUP too just in case some bozo set it */
1183         state |= POLLHUP;
1184
1185         /* Some callers know the index of the pfd already */
1186         if (idx_hint >= 0) {
1187                 i = idx_hint;
1188                 found = true;
1189         } else {
1190                 /* Have to look for the fd in the pfd array */
1191                 for (i = 0; i < master->handler.pfdcount; i++)
1192                         if (master->handler.pfds[i].fd == fd) {
1193                                 found = true;
1194                                 break;
1195                         }
1196         }
1197
1198         if (!found) {
1199                 zlog_debug(
1200                         "[!] Received cancellation request for nonexistent rw job");
1201                 zlog_debug("[!] threadmaster: %s | fd: %d",
1202                            master->name ? master->name : "", fd);
1203                 return;
1204         }
1205
1206         /* NOT out event. */
1207         master->handler.pfds[i].events &= ~(state);
1208
1209         /* If all events are canceled, delete / resize the pollfd array. */
1210         if (master->handler.pfds[i].events == 0) {
1211                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1212                         (master->handler.pfdcount - i - 1)
1213                                 * sizeof(struct pollfd));
1214                 master->handler.pfdcount--;
1215                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1216                 master->handler.pfds[master->handler.pfdcount].events = 0;
1217         }
1218
1219         /*
1220          * If we have the same pollfd in the copy, perform the same operations,
1221          * otherwise return.
1222          */
1223         if (i >= master->handler.copycount)
1224                 return;
1225
1226         master->handler.copy[i].events &= ~(state);
1227
1228         if (master->handler.copy[i].events == 0) {
1229                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1230                         (master->handler.copycount - i - 1)
1231                                 * sizeof(struct pollfd));
1232                 master->handler.copycount--;
1233                 master->handler.copy[master->handler.copycount].fd = 0;
1234                 master->handler.copy[master->handler.copycount].events = 0;
1235         }
1236 }
1237
1238 /*
1239  * Process task cancellation given a task argument: iterate through the
1240  * various lists of tasks, looking for any that match the argument.
1241  */
1242 static void cancel_arg_helper(struct event_loop *master,
1243                               const struct cancel_req *cr)
1244 {
1245         struct event *t;
1246         nfds_t i;
1247         int fd;
1248         struct pollfd *pfd;
1249
1250         /* We're only processing arg-based cancellations here. */
1251         if (cr->eventobj == NULL)
1252                 return;
1253
1254         /* First process the ready lists. */
1255         frr_each_safe (event_list, &master->event, t) {
1256                 if (t->arg != cr->eventobj)
1257                         continue;
1258                 event_list_del(&master->event, t);
1259                 if (t->ref)
1260                         *t->ref = NULL;
1261                 thread_add_unuse(master, t);
1262         }
1263
1264         frr_each_safe (event_list, &master->ready, t) {
1265                 if (t->arg != cr->eventobj)
1266                         continue;
1267                 event_list_del(&master->ready, t);
1268                 if (t->ref)
1269                         *t->ref = NULL;
1270                 thread_add_unuse(master, t);
1271         }
1272
1273         /* If requested, stop here and ignore io and timers */
1274         if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
1275                 return;
1276
1277         /* Check the io tasks */
1278         for (i = 0; i < master->handler.pfdcount;) {
1279                 pfd = master->handler.pfds + i;
1280
1281                 if (pfd->events & POLLIN)
1282                         t = master->read[pfd->fd];
1283                 else
1284                         t = master->write[pfd->fd];
1285
1286                 if (t && t->arg == cr->eventobj) {
1287                         fd = pfd->fd;
1288
1289                         /* Found a match to cancel: clean up fd arrays */
1290                         event_cancel_rw(master, pfd->fd, pfd->events, i);
1291
1292                         /* Clean up thread arrays */
1293                         master->read[fd] = NULL;
1294                         master->write[fd] = NULL;
1295
1296                         /* Clear caller's ref */
1297                         if (t->ref)
1298                                 *t->ref = NULL;
1299
1300                         thread_add_unuse(master, t);
1301
1302                         /* Don't increment 'i' since the cancellation will have
1303                          * removed the entry from the pfd array
1304                          */
1305                 } else
1306                         i++;
1307         }
1308
1309         /* Check the timer tasks */
1310         t = event_timer_list_first(&master->timer);
1311         while (t) {
1312                 struct event *t_next;
1313
1314                 t_next = event_timer_list_next(&master->timer, t);
1315
1316                 if (t->arg == cr->eventobj) {
1317                         event_timer_list_del(&master->timer, t);
1318                         if (t->ref)
1319                                 *t->ref = NULL;
1320                         thread_add_unuse(master, t);
1321                 }
1322
1323                 t = t_next;
1324         }
1325 }
1326
1327 /**
1328  * Process cancellation requests.
1329  *
1330  * This may only be run from the pthread which owns the event_master.
1331  *
1332  * @param master the thread master to process
1333  * @REQUIRE master->mtx
1334  */
1335 static void do_event_cancel(struct event_loop *master)
1336 {
1337         struct event_list_head *list = NULL;
1338         struct event **thread_array = NULL;
1339         struct event *thread;
1340         struct cancel_req *cr;
1341         struct listnode *ln;
1342
1343         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1344                 /*
1345                  * If this is an event object cancellation, search
1346                  * through task lists deleting any tasks which have the
1347                  * specified argument - use this handy helper function.
1348                  */
1349                 if (cr->eventobj) {
1350                         cancel_arg_helper(master, cr);
1351                         continue;
1352                 }
1353
1354                 /*
1355                  * The pointer varies depending on whether the cancellation
1356                  * request was made asynchronously or not. If it was, we
1357                  * need to check whether the thread even exists anymore
1358                  * before cancelling it.
1359                  */
1360                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1361
1362                 if (!thread)
1363                         continue;
1364
1365                 list = NULL;
1366                 thread_array = NULL;
1367
1368                 /* Determine the appropriate queue to cancel the thread from */
1369                 switch (thread->type) {
1370                 case EVENT_READ:
1371                         event_cancel_rw(master, thread->u.fd, POLLIN, -1);
1372                         thread_array = master->read;
1373                         break;
1374                 case EVENT_WRITE:
1375                         event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1376                         thread_array = master->write;
1377                         break;
1378                 case EVENT_TIMER:
1379                         event_timer_list_del(&master->timer, thread);
1380                         break;
1381                 case EVENT_EVENT:
1382                         list = &master->event;
1383                         break;
1384                 case EVENT_READY:
1385                         list = &master->ready;
1386                         break;
1387                 case EVENT_UNUSED:
1388                 case EVENT_EXECUTE:
1389                         continue;
1390                         break;
1391                 }
1392
1393                 if (list)
1394                         event_list_del(list, thread);
1395                 else if (thread_array)
1396                         thread_array[thread->u.fd] = NULL;
1397
1398                 if (thread->ref)
1399                         *thread->ref = NULL;
1400
1401                 thread_add_unuse(thread->master, thread);
1402         }
1403
1404         /* Delete and free all cancellation requests */
1405         if (master->cancel_req)
1406                 list_delete_all_node(master->cancel_req);
1407
1408         /* Wake up any threads which may be blocked in event_cancel_async() */
1409         master->canceled = true;
1410         pthread_cond_broadcast(&master->cancel_cond);
1411 }
1412
1413 /*
1414  * Helper function used for multiple flavors of arg-based cancellation.
1415  */
1416 static void cancel_event_helper(struct event_loop *m, void *arg, int flags)
1417 {
1418         struct cancel_req *cr;
1419
1420         assert(m->owner == pthread_self());
1421
1422         /* Only worth anything if caller supplies an arg. */
1423         if (arg == NULL)
1424                 return;
1425
1426         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1427
1428         cr->flags = flags;
1429
1430         frr_with_mutex (&m->mtx) {
1431                 cr->eventobj = arg;
1432                 listnode_add(m->cancel_req, cr);
1433                 do_event_cancel(m);
1434         }
1435 }
1436
1437 /**
1438  * Cancel any events which have the specified argument.
1439  *
1440  * MT-Unsafe
1441  *
1442  * @param m the event_master to cancel from
1443  * @param arg the argument passed when creating the event
1444  */
1445 void event_cancel_event(struct event_loop *master, void *arg)
1446 {
1447         cancel_event_helper(master, arg, 0);
1448 }
1449
1450 /*
1451  * Cancel ready tasks with an arg matching 'arg'
1452  *
1453  * MT-Unsafe
1454  *
1455  * @param m the event_master to cancel from
1456  * @param arg the argument passed when creating the event
1457  */
1458 void event_cancel_event_ready(struct event_loop *m, void *arg)
1459 {
1460
1461         /* Only cancel ready/event tasks */
1462         cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
1463 }
1464
1465 /**
1466  * Cancel a specific task.
1467  *
1468  * MT-Unsafe
1469  *
1470  * @param thread task to cancel
1471  */
1472 void event_cancel(struct event **thread)
1473 {
1474         struct event_loop *master;
1475
1476         if (thread == NULL || *thread == NULL)
1477                 return;
1478
1479         master = (*thread)->master;
1480
1481         frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1482                  (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1483                  (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1484                  (*thread)->u.sands.tv_sec);
1485
1486         assert(master->owner == pthread_self());
1487
1488         frr_with_mutex (&master->mtx) {
1489                 struct cancel_req *cr =
1490                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1491                 cr->thread = *thread;
1492                 listnode_add(master->cancel_req, cr);
1493                 do_event_cancel(master);
1494         }
1495
1496         *thread = NULL;
1497 }
1498
1499 /**
1500  * Asynchronous cancellation.
1501  *
1502  * Called with either a struct event ** or void * to an event argument,
1503  * this function posts the correct cancellation request and blocks until it is
1504  * serviced.
1505  *
1506  * If the thread is currently running, execution blocks until it completes.
1507  *
1508  * The last two parameters are mutually exclusive, i.e. if you pass one the
1509  * other must be NULL.
1510  *
1511  * When the cancellation procedure executes on the target event_master, the
1512  * thread * provided is checked for nullity. If it is null, the thread is
1513  * assumed to no longer exist and the cancellation request is a no-op. Thus
1514  * users of this API must pass a back-reference when scheduling the original
1515  * task.
1516  *
1517  * MT-Safe
1518  *
1519  * @param master the thread master with the relevant event / task
1520  * @param thread pointer to thread to cancel
1521  * @param eventobj the event
1522  */
1523 void event_cancel_async(struct event_loop *master, struct event **thread,
1524                         void *eventobj)
1525 {
1526         assert(!(thread && eventobj) && (thread || eventobj));
1527
1528         if (thread && *thread)
1529                 frrtrace(9, frr_libfrr, event_cancel_async, master,
1530                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1531                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1532                          (*thread)->u.val, (*thread)->arg,
1533                          (*thread)->u.sands.tv_sec);
1534         else
1535                 frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
1536                          0, NULL, 0, 0, eventobj, 0);
1537
1538         assert(master->owner != pthread_self());
1539
1540         frr_with_mutex (&master->mtx) {
1541                 master->canceled = false;
1542
1543                 if (thread) {
1544                         struct cancel_req *cr =
1545                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1546                         cr->threadref = thread;
1547                         listnode_add(master->cancel_req, cr);
1548                 } else if (eventobj) {
1549                         struct cancel_req *cr =
1550                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1551                         cr->eventobj = eventobj;
1552                         listnode_add(master->cancel_req, cr);
1553                 }
1554                 AWAKEN(master);
1555
1556                 while (!master->canceled)
1557                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1558         }
1559
1560         if (thread)
1561                 *thread = NULL;
1562 }
1563 /* ------------------------------------------------------------------------- */
1564
1565 static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
1566                                          struct timeval *timer_val)
1567 {
1568         if (!event_timer_list_count(timers))
1569                 return NULL;
1570
1571         struct event *next_timer = event_timer_list_first(timers);
1572
1573         monotime_until(&next_timer->u.sands, timer_val);
1574         return timer_val;
1575 }
1576
1577 static struct event *thread_run(struct event_loop *m, struct event *thread,
1578                                 struct event *fetch)
1579 {
1580         *fetch = *thread;
1581         thread_add_unuse(m, thread);
1582         return fetch;
1583 }
1584
1585 static int thread_process_io_helper(struct event_loop *m, struct event *thread,
1586                                     short state, short actual_state, int pos)
1587 {
1588         struct event **thread_array;
1589
1590         /*
1591          * poll() clears the .events field, but the pollfd array we
1592          * pass to poll() is a copy of the one used to schedule threads.
1593          * We need to synchronize state between the two here by applying
1594          * the same changes poll() made on the copy of the "real" pollfd
1595          * array.
1596          *
1597          * This cleans up a possible infinite loop where we refuse
1598          * to respond to a poll event but poll is insistent that
1599          * we should.
1600          */
1601         m->handler.pfds[pos].events &= ~(state);
1602
1603         if (!thread) {
1604                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1605                         flog_err(EC_LIB_NO_THREAD,
1606                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1607                                  m->handler.pfds[pos].fd, actual_state);
1608                 return 0;
1609         }
1610
1611         if (thread->type == EVENT_READ)
1612                 thread_array = m->read;
1613         else
1614                 thread_array = m->write;
1615
1616         thread_array[thread->u.fd] = NULL;
1617         event_list_add_tail(&m->ready, thread);
1618         thread->type = EVENT_READY;
1619
1620         return 1;
1621 }
1622
1623 /**
1624  * Process I/O events.
1625  *
1626  * Walks through file descriptor array looking for those pollfds whose .revents
1627  * field has something interesting. Deletes any invalid file descriptors.
1628  *
1629  * @param m the thread master
1630  * @param num the number of active file descriptors (return value of poll())
1631  */
1632 static void thread_process_io(struct event_loop *m, unsigned int num)
1633 {
1634         unsigned int ready = 0;
1635         struct pollfd *pfds = m->handler.copy;
1636
1637         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1638                 /* no event for current fd? immediately continue */
1639                 if (pfds[i].revents == 0)
1640                         continue;
1641
1642                 ready++;
1643
1644                 /*
1645                  * Unless someone has called event_cancel from another
1646                  * pthread, the only thing that could have changed in
1647                  * m->handler.pfds while we were asleep is the .events
1648                  * field in a given pollfd. Barring event_cancel() that
1649                  * value should be a superset of the values we have in our
1650                  * copy, so there's no need to update it. Similarily,
1651                  * barring deletion, the fd should still be a valid index
1652                  * into the master's pfds.
1653                  *
1654                  * We are including POLLERR here to do a READ event
1655                  * this is because the read should fail and the
1656                  * read function should handle it appropriately
1657                  */
1658                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1659                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1660                                                  pfds[i].revents, i);
1661                 }
1662                 if (pfds[i].revents & POLLOUT)
1663                         thread_process_io_helper(m, m->write[pfds[i].fd],
1664                                                  POLLOUT, pfds[i].revents, i);
1665
1666                 /*
1667                  * if one of our file descriptors is garbage, remove the same
1668                  * from both pfds + update sizes and index
1669                  */
1670                 if (pfds[i].revents & POLLNVAL) {
1671                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1672                                 (m->handler.pfdcount - i - 1)
1673                                         * sizeof(struct pollfd));
1674                         m->handler.pfdcount--;
1675                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1676                         m->handler.pfds[m->handler.pfdcount].events = 0;
1677
1678                         memmove(pfds + i, pfds + i + 1,
1679                                 (m->handler.copycount - i - 1)
1680                                         * sizeof(struct pollfd));
1681                         m->handler.copycount--;
1682                         m->handler.copy[m->handler.copycount].fd = 0;
1683                         m->handler.copy[m->handler.copycount].events = 0;
1684
1685                         i--;
1686                 }
1687         }
1688 }
1689
1690 /* Add all timers that have popped to the ready list. */
1691 static unsigned int thread_process_timers(struct event_loop *m,
1692                                           struct timeval *timenow)
1693 {
1694         struct timeval prev = *timenow;
1695         bool displayed = false;
1696         struct event *thread;
1697         unsigned int ready = 0;
1698
1699         while ((thread = event_timer_list_first(&m->timer))) {
1700                 if (timercmp(timenow, &thread->u.sands, <))
1701                         break;
1702                 prev = thread->u.sands;
1703                 prev.tv_sec += 4;
1704                 /*
1705                  * If the timer would have popped 4 seconds in the
1706                  * past then we are in a situation where we are
1707                  * really getting behind on handling of events.
1708                  * Let's log it and do the right thing with it.
1709                  */
1710                 if (timercmp(timenow, &prev, >)) {
1711                         atomic_fetch_add_explicit(
1712                                 &thread->hist->total_starv_warn, 1,
1713                                 memory_order_seq_cst);
1714                         if (!displayed && !thread->ignore_timer_late) {
1715                                 flog_warn(
1716                                         EC_LIB_STARVE_THREAD,
1717                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1718                                         thread);
1719                                 displayed = true;
1720                         }
1721                 }
1722
1723                 event_timer_list_pop(&m->timer);
1724                 thread->type = EVENT_READY;
1725                 event_list_add_tail(&m->ready, thread);
1726                 ready++;
1727         }
1728
1729         return ready;
1730 }
1731
1732 /* process a list en masse, e.g. for event thread lists */
1733 static unsigned int thread_process(struct event_list_head *list)
1734 {
1735         struct event *thread;
1736         unsigned int ready = 0;
1737
1738         while ((thread = event_list_pop(list))) {
1739                 thread->type = EVENT_READY;
1740                 event_list_add_tail(&thread->master->ready, thread);
1741                 ready++;
1742         }
1743         return ready;
1744 }
1745
1746
1747 /* Fetch next ready thread. */
1748 struct event *event_fetch(struct event_loop *m, struct event *fetch)
1749 {
1750         struct event *thread = NULL;
1751         struct timeval now;
1752         struct timeval zerotime = {0, 0};
1753         struct timeval tv;
1754         struct timeval *tw = NULL;
1755         bool eintr_p = false;
1756         int num = 0;
1757
1758         do {
1759                 /* Handle signals if any */
1760                 if (m->handle_signals)
1761                         frr_sigevent_process();
1762
1763                 pthread_mutex_lock(&m->mtx);
1764
1765                 /* Process any pending cancellation requests */
1766                 do_event_cancel(m);
1767
1768                 /*
1769                  * Attempt to flush ready queue before going into poll().
1770                  * This is performance-critical. Think twice before modifying.
1771                  */
1772                 if ((thread = event_list_pop(&m->ready))) {
1773                         fetch = thread_run(m, thread, fetch);
1774                         if (fetch->ref)
1775                                 *fetch->ref = NULL;
1776                         pthread_mutex_unlock(&m->mtx);
1777                         if (!m->ready_run_loop)
1778                                 GETRUSAGE(&m->last_getrusage);
1779                         m->ready_run_loop = true;
1780                         break;
1781                 }
1782
1783                 m->ready_run_loop = false;
1784                 /* otherwise, tick through scheduling sequence */
1785
1786                 /*
1787                  * Post events to ready queue. This must come before the
1788                  * following block since events should occur immediately
1789                  */
1790                 thread_process(&m->event);
1791
1792                 /*
1793                  * If there are no tasks on the ready queue, we will poll()
1794                  * until a timer expires or we receive I/O, whichever comes
1795                  * first. The strategy for doing this is:
1796                  *
1797                  * - If there are events pending, set the poll() timeout to zero
1798                  * - If there are no events pending, but there are timers
1799                  * pending, set the timeout to the smallest remaining time on
1800                  * any timer.
1801                  * - If there are neither timers nor events pending, but there
1802                  * are file descriptors pending, block indefinitely in poll()
1803                  * - If nothing is pending, it's time for the application to die
1804                  *
1805                  * In every case except the last, we need to hit poll() at least
1806                  * once per loop to avoid starvation by events
1807                  */
1808                 if (!event_list_count(&m->ready))
1809                         tw = thread_timer_wait(&m->timer, &tv);
1810
1811                 if (event_list_count(&m->ready) ||
1812                     (tw && !timercmp(tw, &zerotime, >)))
1813                         tw = &zerotime;
1814
1815                 if (!tw && m->handler.pfdcount == 0) { /* die */
1816                         pthread_mutex_unlock(&m->mtx);
1817                         fetch = NULL;
1818                         break;
1819                 }
1820
1821                 /*
1822                  * Copy pollfd array + # active pollfds in it. Not necessary to
1823                  * copy the array size as this is fixed.
1824                  */
1825                 m->handler.copycount = m->handler.pfdcount;
1826                 memcpy(m->handler.copy, m->handler.pfds,
1827                        m->handler.copycount * sizeof(struct pollfd));
1828
1829                 pthread_mutex_unlock(&m->mtx);
1830                 {
1831                         eintr_p = false;
1832                         num = fd_poll(m, tw, &eintr_p);
1833                 }
1834                 pthread_mutex_lock(&m->mtx);
1835
1836                 /* Handle any errors received in poll() */
1837                 if (num < 0) {
1838                         if (eintr_p) {
1839                                 pthread_mutex_unlock(&m->mtx);
1840                                 /* loop around to signal handler */
1841                                 continue;
1842                         }
1843
1844                         /* else die */
1845                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1846                                  safe_strerror(errno));
1847                         pthread_mutex_unlock(&m->mtx);
1848                         fetch = NULL;
1849                         break;
1850                 }
1851
1852                 /* Post timers to ready queue. */
1853                 monotime(&now);
1854                 thread_process_timers(m, &now);
1855
1856                 /* Post I/O to ready queue. */
1857                 if (num > 0)
1858                         thread_process_io(m, num);
1859
1860                 pthread_mutex_unlock(&m->mtx);
1861
1862         } while (!thread && m->spin);
1863
1864         return fetch;
1865 }
1866
1867 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1868 {
1869         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1870                 + (a.tv_usec - b.tv_usec));
1871 }
1872
1873 unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1874                                   unsigned long *cputime)
1875 {
1876 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1877
1878 #ifdef __FreeBSD__
1879         /*
1880          * FreeBSD appears to have an issue when calling clock_gettime
1881          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1882          * occassionally the now time will be before the start time.
1883          * This is not good and FRR is ending up with CPU HOG's
1884          * when the subtraction wraps to very large numbers
1885          *
1886          * What we are going to do here is cheat a little bit
1887          * and notice that this is a problem and just correct
1888          * it so that it is impossible to happen
1889          */
1890         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1891             start->cpu.tv_nsec > now->cpu.tv_nsec)
1892                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1893         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1894                 now->cpu.tv_sec = start->cpu.tv_sec;
1895                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1896         }
1897 #endif
1898         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1899                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1900 #else
1901         /* This is 'user + sys' time.  */
1902         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1903                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1904 #endif
1905         return timeval_elapsed(now->real, start->real);
1906 }
1907
1908 /*
1909  * We should aim to yield after yield milliseconds, which defaults
1910  * to EVENT_YIELD_TIME_SLOT .
1911  * Note: we are using real (wall clock) time for this calculation.
1912  * It could be argued that CPU time may make more sense in certain
1913  * contexts.  The things to consider are whether the thread may have
1914  * blocked (in which case wall time increases, but CPU time does not),
1915  * or whether the system is heavily loaded with other processes competing
1916  * for CPU time.  On balance, wall clock time seems to make sense.
1917  * Plus it has the added benefit that gettimeofday should be faster
1918  * than calling getrusage.
1919  */
1920 int event_should_yield(struct event *thread)
1921 {
1922         int result;
1923
1924         frr_with_mutex (&thread->mtx) {
1925                 result = monotime_since(&thread->real, NULL)
1926                          > (int64_t)thread->yield;
1927         }
1928         return result;
1929 }
1930
1931 void event_set_yield_time(struct event *thread, unsigned long yield_time)
1932 {
1933         frr_with_mutex (&thread->mtx) {
1934                 thread->yield = yield_time;
1935         }
1936 }
1937
1938 void event_getrusage(RUSAGE_T *r)
1939 {
1940         monotime(&r->real);
1941         if (!cputime_enabled) {
1942                 memset(&r->cpu, 0, sizeof(r->cpu));
1943                 return;
1944         }
1945
1946 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1947         /* not currently implemented in Linux's vDSO, but maybe at some point
1948          * in the future?
1949          */
1950         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1951 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1952 #if defined RUSAGE_THREAD
1953 #define FRR_RUSAGE RUSAGE_THREAD
1954 #else
1955 #define FRR_RUSAGE RUSAGE_SELF
1956 #endif
1957         getrusage(FRR_RUSAGE, &(r->cpu));
1958 #endif
1959 }
1960
1961 /*
1962  * Call a thread.
1963  *
1964  * This function will atomically update the thread's usage history. At present
1965  * this is the only spot where usage history is written. Nevertheless the code
1966  * has been written such that the introduction of writers in the future should
1967  * not need to update it provided the writers atomically perform only the
1968  * operations done here, i.e. updating the total and maximum times. In
1969  * particular, the maximum real and cpu times must be monotonically increasing
1970  * or this code is not correct.
1971  */
1972 void event_call(struct event *thread)
1973 {
1974         RUSAGE_T before, after;
1975
1976         /* if the thread being called is the CLI, it may change cputime_enabled
1977          * ("service cputime-stats" command), which can result in nonsensical
1978          * and very confusing warnings
1979          */
1980         bool cputime_enabled_here = cputime_enabled;
1981
1982         if (thread->master->ready_run_loop)
1983                 before = thread->master->last_getrusage;
1984         else
1985                 GETRUSAGE(&before);
1986
1987         thread->real = before.real;
1988
1989         frrtrace(9, frr_libfrr, event_call, thread->master,
1990                  thread->xref->funcname, thread->xref->xref.file,
1991                  thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1992                  thread->arg, thread->u.sands.tv_sec);
1993
1994         pthread_setspecific(thread_current, thread);
1995         (*thread->func)(thread);
1996         pthread_setspecific(thread_current, NULL);
1997
1998         GETRUSAGE(&after);
1999         thread->master->last_getrusage = after;
2000
2001         unsigned long walltime, cputime;
2002         unsigned long exp;
2003
2004         walltime = event_consumed_time(&after, &before, &cputime);
2005
2006         /* update walltime */
2007         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
2008                                   memory_order_seq_cst);
2009         exp = atomic_load_explicit(&thread->hist->real.max,
2010                                    memory_order_seq_cst);
2011         while (exp < walltime
2012                && !atomic_compare_exchange_weak_explicit(
2013                        &thread->hist->real.max, &exp, walltime,
2014                        memory_order_seq_cst, memory_order_seq_cst))
2015                 ;
2016
2017         if (cputime_enabled_here && cputime_enabled) {
2018                 /* update cputime */
2019                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2020                                           memory_order_seq_cst);
2021                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2022                                            memory_order_seq_cst);
2023                 while (exp < cputime
2024                        && !atomic_compare_exchange_weak_explicit(
2025                                &thread->hist->cpu.max, &exp, cputime,
2026                                memory_order_seq_cst, memory_order_seq_cst))
2027                         ;
2028         }
2029
2030         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2031                                   memory_order_seq_cst);
2032         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2033                                  memory_order_seq_cst);
2034
2035         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2036             && cputime > cputime_threshold) {
2037                 /*
2038                  * We have a CPU Hog on our hands.  The time FRR has spent
2039                  * doing actual work (not sleeping) is greater than 5 seconds.
2040                  * Whinge about it now, so we're aware this is yet another task
2041                  * to fix.
2042                  */
2043                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2044                                           1, memory_order_seq_cst);
2045                 flog_warn(
2046                         EC_LIB_SLOW_THREAD_CPU,
2047                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2048                         thread->xref->funcname, (unsigned long)thread->func,
2049                         walltime / 1000, cputime / 1000);
2050
2051         } else if (walltime_threshold && walltime > walltime_threshold) {
2052                 /*
2053                  * The runtime for a task is greater than 5 seconds, but the
2054                  * cpu time is under 5 seconds.  Let's whine about this because
2055                  * this could imply some sort of scheduling issue.
2056                  */
2057                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2058                                           1, memory_order_seq_cst);
2059                 flog_warn(
2060                         EC_LIB_SLOW_THREAD_WALL,
2061                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2062                         thread->xref->funcname, (unsigned long)thread->func,
2063                         walltime / 1000, cputime / 1000);
2064         }
2065 }
2066
2067 /* Execute thread */
2068 void _event_execute(const struct xref_eventsched *xref, struct event_loop *m,
2069                     void (*func)(struct event *), void *arg, int val)
2070 {
2071         struct event *thread;
2072
2073         /* Get or allocate new thread to execute. */
2074         frr_with_mutex (&m->mtx) {
2075                 thread = thread_get(m, EVENT_EVENT, func, arg, xref);
2076
2077                 /* Set its event value. */
2078                 frr_with_mutex (&thread->mtx) {
2079                         thread->add_type = EVENT_EXECUTE;
2080                         thread->u.val = val;
2081                         thread->ref = &thread;
2082                 }
2083         }
2084
2085         /* Execute thread doing all accounting. */
2086         event_call(thread);
2087
2088         /* Give back or free thread. */
2089         thread_add_unuse(m, thread);
2090 }
2091
2092 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2093 void debug_signals(const sigset_t *sigs)
2094 {
2095         int i, found;
2096         sigset_t tmpsigs;
2097         char buf[300];
2098
2099         /*
2100          * We're only looking at the non-realtime signals here, so we need
2101          * some limit value. Platform differences mean at some point we just
2102          * need to pick a reasonable value.
2103          */
2104 #if defined SIGRTMIN
2105 #  define LAST_SIGNAL SIGRTMIN
2106 #else
2107 #  define LAST_SIGNAL 32
2108 #endif
2109
2110
2111         if (sigs == NULL) {
2112                 sigemptyset(&tmpsigs);
2113                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2114                 sigs = &tmpsigs;
2115         }
2116
2117         found = 0;
2118         buf[0] = '\0';
2119
2120         for (i = 0; i < LAST_SIGNAL; i++) {
2121                 char tmp[20];
2122
2123                 if (sigismember(sigs, i) > 0) {
2124                         if (found > 0)
2125                                 strlcat(buf, ",", sizeof(buf));
2126                         snprintf(tmp, sizeof(tmp), "%d", i);
2127                         strlcat(buf, tmp, sizeof(buf));
2128                         found++;
2129                 }
2130         }
2131
2132         if (found == 0)
2133                 snprintf(buf, sizeof(buf), "<none>");
2134
2135         zlog_debug("%s: %s", __func__, buf);
2136 }
2137
2138 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2139                                    const struct event *thread)
2140 {
2141         static const char *const types[] = {
2142                 [EVENT_READ] = "read",    [EVENT_WRITE] = "write",
2143                 [EVENT_TIMER] = "timer",  [EVENT_EVENT] = "event",
2144                 [EVENT_READY] = "ready",  [EVENT_UNUSED] = "unused",
2145                 [EVENT_EXECUTE] = "exec",
2146         };
2147         ssize_t rv = 0;
2148         char info[16] = "";
2149
2150         if (!thread)
2151                 return bputs(buf, "{(thread *)NULL}");
2152
2153         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2154
2155         if (thread->type < array_size(types) && types[thread->type])
2156                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2157         else
2158                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2159
2160         switch (thread->type) {
2161         case EVENT_READ:
2162         case EVENT_WRITE:
2163                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2164                 break;
2165
2166         case EVENT_TIMER:
2167                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2168                 break;
2169         case EVENT_READY:
2170         case EVENT_EVENT:
2171         case EVENT_UNUSED:
2172         case EVENT_EXECUTE:
2173                 break;
2174         }
2175
2176         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2177                         thread->xref->funcname, thread->xref->dest,
2178                         thread->xref->xref.file, thread->xref->xref.line);
2179         return rv;
2180 }
2181
2182 printfrr_ext_autoreg_p("TH", printfrr_thread);
2183 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2184                                const void *ptr)
2185 {
2186         const struct event *thread = ptr;
2187         struct timespec remain = {};
2188
2189         if (ea->fmt[0] == 'D') {
2190                 ea->fmt++;
2191                 return printfrr_thread_dbg(buf, ea, thread);
2192         }
2193
2194         if (!thread) {
2195                 /* need to jump over time formatting flag characters in the
2196                  * input format string, i.e. adjust ea->fmt!
2197                  */
2198                 printfrr_time(buf, ea, &remain,
2199                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2200                 return bputch(buf, '-');
2201         }
2202
2203         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2204         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2205 }