lib/thread.c

   1 /* Thread management routine
   2  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   3  *
   4  * This file is part of GNU Zebra.
   5  *
   6  * GNU Zebra is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation; either version 2, or (at your option) any
   9  * later version.
  10  *
  11  * GNU Zebra is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program; see the file COPYING; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /* #define DEBUG */
  22
  23 #include <zebra.h>
  24 #include <sys/resource.h>
  25
  26 #include "thread.h"
  27 #include "memory.h"
  28 #include "frrcu.h"
  29 #include "log.h"
  30 #include "hash.h"
  31 #include "command.h"
  32 #include "sigevent.h"
  33 #include "network.h"
  34 #include "jhash.h"
  35 #include "frratomic.h"
  36 #include "frr_pthread.h"
  37 #include "lib_errors.h"
  38 #include "libfrr_trace.h"
  39 #include "libfrr.h"
  40
  41 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  42 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
  43 DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
  44 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
  45
  46 DECLARE_LIST(thread_list, struct thread, threaditem);
  47
  48 struct cancel_req {
  49         int flags;
  50         struct thread *thread;
  51         void *eventobj;
  52         struct thread **threadref;
  53 };
  54
  55 /* Flags for task cancellation */
  56 #define THREAD_CANCEL_FLAG_READY     0x01
  57
  58 static int thread_timer_cmp(const struct thread *a, const struct thread *b)
  59 {
  60         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  61                 return -1;
  62         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  63                 return 1;
  64         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  65                 return -1;
  66         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  67                 return 1;
  68         return 0;
  69 }
  70
  71 DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
  72
  73 #if defined(__APPLE__)
  74 #include <mach/mach.h>
  75 #include <mach/mach_time.h>
  76 #endif
  77
  78 #define AWAKEN(m)                                                              \
  79         do {                                                                   \
  80                 const unsigned char wakebyte = 0x01;                           \
  81                 write(m->io_pipe[1], &wakebyte, 1);                            \
  82         } while (0);
  83
  84 /* control variable for initializer */
  85 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  86 pthread_key_t thread_current;
  87
  88 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  89 static struct list *masters;
  90
  91 static void thread_free(struct thread_master *master, struct thread *thread);
  92
  93 #ifndef EXCLUDE_CPU_TIME
  94 #define EXCLUDE_CPU_TIME 0
  95 #endif
  96 #ifndef CONSUMED_TIME_CHECK
  97 #define CONSUMED_TIME_CHECK 0
  98 #endif
  99
 100 bool cputime_enabled = !EXCLUDE_CPU_TIME;
 101 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
 102 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
 103
 104 /* CLI start ---------------------------------------------------------------- */
 105 #ifndef VTYSH_EXTRACT_PL
 106 #include "lib/thread_clippy.c"
 107 #endif
 108
 109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
 110 {
 111         int size = sizeof(a->func);
 112
 113         return jhash(&a->func, size, 0);
 114 }
 115
 116 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 117                                const struct cpu_thread_history *b)
 118 {
 119         return a->func == b->func;
 120 }
 121
 122 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 123 {
 124         struct cpu_thread_history *new;
 125         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
 126         new->func = a->func;
 127         new->funcname = a->funcname;
 128         return new;
 129 }
 130
 131 static void cpu_record_hash_free(void *a)
 132 {
 133         struct cpu_thread_history *hist = a;
 134
 135         XFREE(MTYPE_THREAD_STATS, hist);
 136 }
 137
 138 static void vty_out_cpu_thread_history(struct vty *vty,
 139                                        struct cpu_thread_history *a)
 140 {
 141         vty_out(vty,
 142                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 143                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 144                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 145                 (a->real.total / a->total_calls), a->real.max,
 146                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 147         vty_out(vty, "  %c%c%c%c%c  %s\n",
 148                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
 149                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
 150                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 151                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 152                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 153 }
 154
 155 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 156 {
 157         struct cpu_thread_history *totals = args[0];
 158         struct cpu_thread_history copy;
 159         struct vty *vty = args[1];
 160         uint8_t *filter = args[2];
 161
 162         struct cpu_thread_history *a = bucket->data;
 163
 164         copy.total_active =
 165                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 166         copy.total_calls =
 167                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 168         copy.total_cpu_warn =
 169                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 170         copy.total_wall_warn =
 171                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 172         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 173                                                      memory_order_seq_cst);
 174         copy.cpu.total =
 175                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 176         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 177         copy.real.total =
 178                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 179         copy.real.max =
 180                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 181         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 182         copy.funcname = a->funcname;
 183
 184         if (!(copy.types & *filter))
 185                 return;
 186
 187         vty_out_cpu_thread_history(vty, &copy);
 188         totals->total_active += copy.total_active;
 189         totals->total_calls += copy.total_calls;
 190         totals->total_cpu_warn += copy.total_cpu_warn;
 191         totals->total_wall_warn += copy.total_wall_warn;
 192         totals->total_starv_warn += copy.total_starv_warn;
 193         totals->real.total += copy.real.total;
 194         if (totals->real.max < copy.real.max)
 195                 totals->real.max = copy.real.max;
 196         totals->cpu.total += copy.cpu.total;
 197         if (totals->cpu.max < copy.cpu.max)
 198                 totals->cpu.max = copy.cpu.max;
 199 }
 200
 201 static void cpu_record_print(struct vty *vty, uint8_t filter)
 202 {
 203         struct cpu_thread_history tmp;
 204         void *args[3] = {&tmp, vty, &filter};
 205         struct thread_master *m;
 206         struct listnode *ln;
 207
 208         if (!cputime_enabled)
 209                 vty_out(vty,
 210                         "\n"
 211                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 212                         "will be zero or may display data from when collection was enabled.  Use the\n"
 213                         "  \"service cputime-stats\"  command to start collecting data.\n"
 214                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 215
 216         memset(&tmp, 0, sizeof(tmp));
 217         tmp.funcname = "TOTAL";
 218         tmp.types = filter;
 219
 220         frr_with_mutex(&masters_mtx) {
 221                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 222                         const char *name = m->name ? m->name : "main";
 223
 224                         char underline[strlen(name) + 1];
 225                         memset(underline, '-', sizeof(underline));
 226                         underline[sizeof(underline) - 1] = '\0';
 227
 228                         vty_out(vty, "\n");
 229                         vty_out(vty, "Showing statistics for pthread %s\n",
 230                                 name);
 231                         vty_out(vty, "-------------------------------%s\n",
 232                                 underline);
 233                         vty_out(vty, "%30s %18s %18s\n", "",
 234                                 "CPU (user+system):", "Real (wall-clock):");
 235                         vty_out(vty,
 236                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 237                         vty_out(vty, " Avg uSec Max uSecs");
 238                         vty_out(vty,
 239                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 240
 241                         if (m->cpu_record->count)
 242                                 hash_iterate(
 243                                         m->cpu_record,
 244                                         (void (*)(struct hash_bucket *,
 245                                                   void *))cpu_record_hash_print,
 246                                         args);
 247                         else
 248                                 vty_out(vty, "No data to display yet.\n");
 249
 250                         vty_out(vty, "\n");
 251                 }
 252         }
 253
 254         vty_out(vty, "\n");
 255         vty_out(vty, "Total thread statistics\n");
 256         vty_out(vty, "-------------------------\n");
 257         vty_out(vty, "%30s %18s %18s\n", "",
 258                 "CPU (user+system):", "Real (wall-clock):");
 259         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 260         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 261         vty_out(vty, "  Type  Thread\n");
 262
 263         if (tmp.total_calls > 0)
 264                 vty_out_cpu_thread_history(vty, &tmp);
 265 }
 266
 267 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 268 {
 269         uint8_t *filter = args[0];
 270         struct hash *cpu_record = args[1];
 271
 272         struct cpu_thread_history *a = bucket->data;
 273
 274         if (!(a->types & *filter))
 275                 return;
 276
 277         hash_release(cpu_record, bucket->data);
 278 }
 279
 280 static void cpu_record_clear(uint8_t filter)
 281 {
 282         uint8_t *tmp = &filter;
 283         struct thread_master *m;
 284         struct listnode *ln;
 285
 286         frr_with_mutex(&masters_mtx) {
 287                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 288                         frr_with_mutex(&m->mtx) {
 289                                 void *args[2] = {tmp, m->cpu_record};
 290                                 hash_iterate(
 291                                         m->cpu_record,
 292                                         (void (*)(struct hash_bucket *,
 293                                                   void *))cpu_record_hash_clear,
 294                                         args);
 295                         }
 296                 }
 297         }
 298 }
 299
 300 static uint8_t parse_filter(const char *filterstr)
 301 {
 302         int i = 0;
 303         int filter = 0;
 304
 305         while (filterstr[i] != '\0') {
 306                 switch (filterstr[i]) {
 307                 case 'r':
 308                 case 'R':
 309                         filter |= (1 << THREAD_READ);
 310                         break;
 311                 case 'w':
 312                 case 'W':
 313                         filter |= (1 << THREAD_WRITE);
 314                         break;
 315                 case 't':
 316                 case 'T':
 317                         filter |= (1 << THREAD_TIMER);
 318                         break;
 319                 case 'e':
 320                 case 'E':
 321                         filter |= (1 << THREAD_EVENT);
 322                         break;
 323                 case 'x':
 324                 case 'X':
 325                         filter |= (1 << THREAD_EXECUTE);
 326                         break;
 327                 default:
 328                         break;
 329                 }
 330                 ++i;
 331         }
 332         return filter;
 333 }
 334
 335 DEFUN_NOSH (show_thread_cpu,
 336             show_thread_cpu_cmd,
 337             "show thread cpu [FILTER]",
 338             SHOW_STR
 339             "Thread information\n"
 340             "Thread CPU usage\n"
 341             "Display filter (rwtex)\n")
 342 {
 343         uint8_t filter = (uint8_t)-1U;
 344         int idx = 0;
 345
 346         if (argv_find(argv, argc, "FILTER", &idx)) {
 347                 filter = parse_filter(argv[idx]->arg);
 348                 if (!filter) {
 349                         vty_out(vty,
 350                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 351                                 argv[idx]->arg);
 352                         return CMD_WARNING;
 353                 }
 354         }
 355
 356         cpu_record_print(vty, filter);
 357         return CMD_SUCCESS;
 358 }
 359
 360 DEFPY (service_cputime_stats,
 361        service_cputime_stats_cmd,
 362        "[no] service cputime-stats",
 363        NO_STR
 364        "Set up miscellaneous service\n"
 365        "Collect CPU usage statistics\n")
 366 {
 367         cputime_enabled = !no;
 368         return CMD_SUCCESS;
 369 }
 370
 371 DEFPY (service_cputime_warning,
 372        service_cputime_warning_cmd,
 373        "[no] service cputime-warning (1-4294967295)",
 374        NO_STR
 375        "Set up miscellaneous service\n"
 376        "Warn for tasks exceeding CPU usage threshold\n"
 377        "Warning threshold in milliseconds\n")
 378 {
 379         if (no)
 380                 cputime_threshold = 0;
 381         else
 382                 cputime_threshold = cputime_warning * 1000;
 383         return CMD_SUCCESS;
 384 }
 385
 386 ALIAS (service_cputime_warning,
 387        no_service_cputime_warning_cmd,
 388        "no service cputime-warning",
 389        NO_STR
 390        "Set up miscellaneous service\n"
 391        "Warn for tasks exceeding CPU usage threshold\n")
 392
 393 DEFPY (service_walltime_warning,
 394        service_walltime_warning_cmd,
 395        "[no] service walltime-warning (1-4294967295)",
 396        NO_STR
 397        "Set up miscellaneous service\n"
 398        "Warn for tasks exceeding total wallclock threshold\n"
 399        "Warning threshold in milliseconds\n")
 400 {
 401         if (no)
 402                 walltime_threshold = 0;
 403         else
 404                 walltime_threshold = walltime_warning * 1000;
 405         return CMD_SUCCESS;
 406 }
 407
 408 ALIAS (service_walltime_warning,
 409        no_service_walltime_warning_cmd,
 410        "no service walltime-warning",
 411        NO_STR
 412        "Set up miscellaneous service\n"
 413        "Warn for tasks exceeding total wallclock threshold\n")
 414
 415 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 416 {
 417         const char *name = m->name ? m->name : "main";
 418         char underline[strlen(name) + 1];
 419         struct thread *thread;
 420         uint32_t i;
 421
 422         memset(underline, '-', sizeof(underline));
 423         underline[sizeof(underline) - 1] = '\0';
 424
 425         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 426         vty_out(vty, "----------------------%s\n", underline);
 427         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 428                 m->fd_limit);
 429         for (i = 0; i < m->handler.pfdcount; i++) {
 430                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 431                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 432                         m->handler.pfds[i].revents);
 433
 434                 if (m->handler.pfds[i].events & POLLIN) {
 435                         thread = m->read[m->handler.pfds[i].fd];
 436
 437                         if (!thread)
 438                                 vty_out(vty, "ERROR ");
 439                         else
 440                                 vty_out(vty, "%s ", thread->xref->funcname);
 441                 } else
 442                         vty_out(vty, " ");
 443
 444                 if (m->handler.pfds[i].events & POLLOUT) {
 445                         thread = m->write[m->handler.pfds[i].fd];
 446
 447                         if (!thread)
 448                                 vty_out(vty, "ERROR\n");
 449                         else
 450                                 vty_out(vty, "%s\n", thread->xref->funcname);
 451                 } else
 452                         vty_out(vty, "\n");
 453         }
 454 }
 455
 456 DEFUN_NOSH (show_thread_poll,
 457             show_thread_poll_cmd,
 458             "show thread poll",
 459             SHOW_STR
 460             "Thread information\n"
 461             "Show poll FD's and information\n")
 462 {
 463         struct listnode *node;
 464         struct thread_master *m;
 465
 466         frr_with_mutex(&masters_mtx) {
 467                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 468                         show_thread_poll_helper(vty, m);
 469                 }
 470         }
 471
 472         return CMD_SUCCESS;
 473 }
 474
 475
 476 DEFUN (clear_thread_cpu,
 477        clear_thread_cpu_cmd,
 478        "clear thread cpu [FILTER]",
 479        "Clear stored data in all pthreads\n"
 480        "Thread information\n"
 481        "Thread CPU usage\n"
 482        "Display filter (rwtexb)\n")
 483 {
 484         uint8_t filter = (uint8_t)-1U;
 485         int idx = 0;
 486
 487         if (argv_find(argv, argc, "FILTER", &idx)) {
 488                 filter = parse_filter(argv[idx]->arg);
 489                 if (!filter) {
 490                         vty_out(vty,
 491                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 492                                 argv[idx]->arg);
 493                         return CMD_WARNING;
 494                 }
 495         }
 496
 497         cpu_record_clear(filter);
 498         return CMD_SUCCESS;
 499 }
 500
 501 static void show_thread_timers_helper(struct vty *vty, struct thread_master *m)
 502 {
 503         const char *name = m->name ? m->name : "main";
 504         char underline[strlen(name) + 1];
 505         struct thread *thread;
 506
 507         memset(underline, '-', sizeof(underline));
 508         underline[sizeof(underline) - 1] = '\0';
 509
 510         vty_out(vty, "\nShowing timers for %s\n", name);
 511         vty_out(vty, "-------------------%s\n", underline);
 512
 513         frr_each (thread_timer_list, &m->timer, thread) {
 514                 vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
 515         }
 516 }
 517
 518 DEFPY_NOSH (show_thread_timers,
 519             show_thread_timers_cmd,
 520             "show thread timers",
 521             SHOW_STR
 522             "Thread information\n"
 523             "Show all timers and how long they have in the system\n")
 524 {
 525         struct listnode *node;
 526         struct thread_master *m;
 527
 528         frr_with_mutex (&masters_mtx) {
 529                 for (ALL_LIST_ELEMENTS_RO(masters, node, m))
 530                         show_thread_timers_helper(vty, m);
 531         }
 532
 533         return CMD_SUCCESS;
 534 }
 535
 536 void thread_cmd_init(void)
 537 {
 538         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 539         install_element(VIEW_NODE, &show_thread_poll_cmd);
 540         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 541
 542         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 543         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 544         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 545         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 546         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 547
 548         install_element(VIEW_NODE, &show_thread_timers_cmd);
 549 }
 550 /* CLI end ------------------------------------------------------------------ */
 551
 552
 553 static void cancelreq_del(void *cr)
 554 {
 555         XFREE(MTYPE_TMP, cr);
 556 }
 557
 558 /* initializer, only ever called once */
 559 static void initializer(void)
 560 {
 561         pthread_key_create(&thread_current, NULL);
 562 }
 563
 564 struct thread_master *thread_master_create(const char *name)
 565 {
 566         struct thread_master *rv;
 567         struct rlimit limit;
 568
 569         pthread_once(&init_once, &initializer);
 570
 571         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 572
 573         /* Initialize master mutex */
 574         pthread_mutex_init(&rv->mtx, NULL);
 575         pthread_cond_init(&rv->cancel_cond, NULL);
 576
 577         /* Set name */
 578         name = name ? name : "default";
 579         rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 580
 581         /* Initialize I/O task data structures */
 582
 583         /* Use configured limit if present, ulimit otherwise. */
 584         rv->fd_limit = frr_get_fd_limit();
 585         if (rv->fd_limit == 0) {
 586                 getrlimit(RLIMIT_NOFILE, &limit);
 587                 rv->fd_limit = (int)limit.rlim_cur;
 588         }
 589
 590         rv->read = XCALLOC(MTYPE_THREAD_POLL,
 591                            sizeof(struct thread *) * rv->fd_limit);
 592
 593         rv->write = XCALLOC(MTYPE_THREAD_POLL,
 594                             sizeof(struct thread *) * rv->fd_limit);
 595
 596         char tmhashname[strlen(name) + 32];
 597         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 598                  name);
 599         rv->cpu_record = hash_create_size(
 600                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 601                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 602                 tmhashname);
 603
 604         thread_list_init(&rv->event);
 605         thread_list_init(&rv->ready);
 606         thread_list_init(&rv->unuse);
 607         thread_timer_list_init(&rv->timer);
 608
 609         /* Initialize thread_fetch() settings */
 610         rv->spin = true;
 611         rv->handle_signals = true;
 612
 613         /* Set pthread owner, should be updated by actual owner */
 614         rv->owner = pthread_self();
 615         rv->cancel_req = list_new();
 616         rv->cancel_req->del = cancelreq_del;
 617         rv->canceled = true;
 618
 619         /* Initialize pipe poker */
 620         pipe(rv->io_pipe);
 621         set_nonblocking(rv->io_pipe[0]);
 622         set_nonblocking(rv->io_pipe[1]);
 623
 624         /* Initialize data structures for poll() */
 625         rv->handler.pfdsize = rv->fd_limit;
 626         rv->handler.pfdcount = 0;
 627         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 628                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 629         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 630                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 631
 632         /* add to list of threadmasters */
 633         frr_with_mutex(&masters_mtx) {
 634                 if (!masters)
 635                         masters = list_new();
 636
 637                 listnode_add(masters, rv);
 638         }
 639
 640         return rv;
 641 }
 642
 643 void thread_master_set_name(struct thread_master *master, const char *name)
 644 {
 645         frr_with_mutex(&master->mtx) {
 646                 XFREE(MTYPE_THREAD_MASTER, master->name);
 647                 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 648         }
 649 }
 650
 651 #define THREAD_UNUSED_DEPTH 10
 652
 653 /* Move thread to unuse list. */
 654 static void thread_add_unuse(struct thread_master *m, struct thread *thread)
 655 {
 656         pthread_mutex_t mtxc = thread->mtx;
 657
 658         assert(m != NULL && thread != NULL);
 659
 660         thread->hist->total_active--;
 661         memset(thread, 0, sizeof(struct thread));
 662         thread->type = THREAD_UNUSED;
 663
 664         /* Restore the thread mutex context. */
 665         thread->mtx = mtxc;
 666
 667         if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
 668                 thread_list_add_tail(&m->unuse, thread);
 669                 return;
 670         }
 671
 672         thread_free(m, thread);
 673 }
 674
 675 /* Free all unused thread. */
 676 static void thread_list_free(struct thread_master *m,
 677                 struct thread_list_head *list)
 678 {
 679         struct thread *t;
 680
 681         while ((t = thread_list_pop(list)))
 682                 thread_free(m, t);
 683 }
 684
 685 static void thread_array_free(struct thread_master *m,
 686                               struct thread **thread_array)
 687 {
 688         struct thread *t;
 689         int index;
 690
 691         for (index = 0; index < m->fd_limit; ++index) {
 692                 t = thread_array[index];
 693                 if (t) {
 694                         thread_array[index] = NULL;
 695                         thread_free(m, t);
 696                 }
 697         }
 698         XFREE(MTYPE_THREAD_POLL, thread_array);
 699 }
 700
 701 /*
 702  * thread_master_free_unused
 703  *
 704  * As threads are finished with they are put on the
 705  * unuse list for later reuse.
 706  * If we are shutting down, Free up unused threads
 707  * So we can see if we forget to shut anything off
 708  */
 709 void thread_master_free_unused(struct thread_master *m)
 710 {
 711         frr_with_mutex(&m->mtx) {
 712                 struct thread *t;
 713                 while ((t = thread_list_pop(&m->unuse)))
 714                         thread_free(m, t);
 715         }
 716 }
 717
 718 /* Stop thread scheduler. */
 719 void thread_master_free(struct thread_master *m)
 720 {
 721         struct thread *t;
 722
 723         frr_with_mutex(&masters_mtx) {
 724                 listnode_delete(masters, m);
 725                 if (masters->count == 0) {
 726                         list_delete(&masters);
 727                 }
 728         }
 729
 730         thread_array_free(m, m->read);
 731         thread_array_free(m, m->write);
 732         while ((t = thread_timer_list_pop(&m->timer)))
 733                 thread_free(m, t);
 734         thread_list_free(m, &m->event);
 735         thread_list_free(m, &m->ready);
 736         thread_list_free(m, &m->unuse);
 737         pthread_mutex_destroy(&m->mtx);
 738         pthread_cond_destroy(&m->cancel_cond);
 739         close(m->io_pipe[0]);
 740         close(m->io_pipe[1]);
 741         list_delete(&m->cancel_req);
 742         m->cancel_req = NULL;
 743
 744         hash_clean(m->cpu_record, cpu_record_hash_free);
 745         hash_free(m->cpu_record);
 746         m->cpu_record = NULL;
 747
 748         XFREE(MTYPE_THREAD_MASTER, m->name);
 749         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 750         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 751         XFREE(MTYPE_THREAD_MASTER, m);
 752 }
 753
 754 /* Return remain time in milliseconds. */
 755 unsigned long thread_timer_remain_msec(struct thread *thread)
 756 {
 757         int64_t remain;
 758
 759         if (!thread_is_scheduled(thread))
 760                 return 0;
 761
 762         frr_with_mutex(&thread->mtx) {
 763                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 764         }
 765
 766         return remain < 0 ? 0 : remain;
 767 }
 768
 769 /* Return remain time in seconds. */
 770 unsigned long thread_timer_remain_second(struct thread *thread)
 771 {
 772         return thread_timer_remain_msec(thread) / 1000LL;
 773 }
 774
 775 struct timeval thread_timer_remain(struct thread *thread)
 776 {
 777         struct timeval remain;
 778         frr_with_mutex(&thread->mtx) {
 779                 monotime_until(&thread->u.sands, &remain);
 780         }
 781         return remain;
 782 }
 783
 784 static int time_hhmmss(char *buf, int buf_size, long sec)
 785 {
 786         long hh;
 787         long mm;
 788         int wr;
 789
 790         assert(buf_size >= 8);
 791
 792         hh = sec / 3600;
 793         sec %= 3600;
 794         mm = sec / 60;
 795         sec %= 60;
 796
 797         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 798
 799         return wr != 8;
 800 }
 801
 802 char *thread_timer_to_hhmmss(char *buf, int buf_size,
 803                 struct thread *t_timer)
 804 {
 805         if (t_timer) {
 806                 time_hhmmss(buf, buf_size,
 807                                 thread_timer_remain_second(t_timer));
 808         } else {
 809                 snprintf(buf, buf_size, "--:--:--");
 810         }
 811         return buf;
 812 }
 813
 814 /* Get new thread.  */
 815 static struct thread *thread_get(struct thread_master *m, uint8_t type,
 816                                  void (*func)(struct thread *), void *arg,
 817                                  const struct xref_threadsched *xref)
 818 {
 819         struct thread *thread = thread_list_pop(&m->unuse);
 820         struct cpu_thread_history tmp;
 821
 822         if (!thread) {
 823                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
 824                 /* mutex only needs to be initialized at struct creation. */
 825                 pthread_mutex_init(&thread->mtx, NULL);
 826                 m->alloc++;
 827         }
 828
 829         thread->type = type;
 830         thread->add_type = type;
 831         thread->master = m;
 832         thread->arg = arg;
 833         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 834         thread->ref = NULL;
 835         thread->ignore_timer_late = false;
 836
 837         /*
 838          * So if the passed in funcname is not what we have
 839          * stored that means the thread->hist needs to be
 840          * updated.  We keep the last one around in unused
 841          * under the assumption that we are probably
 842          * going to immediately allocate the same
 843          * type of thread.
 844          * This hopefully saves us some serious
 845          * hash_get lookups.
 846          */
 847         if ((thread->xref && thread->xref->funcname != xref->funcname)
 848             || thread->func != func) {
 849                 tmp.func = func;
 850                 tmp.funcname = xref->funcname;
 851                 thread->hist =
 852                         hash_get(m->cpu_record, &tmp,
 853                                  (void *(*)(void *))cpu_record_hash_alloc);
 854         }
 855         thread->hist->total_active++;
 856         thread->func = func;
 857         thread->xref = xref;
 858
 859         return thread;
 860 }
 861
 862 static void thread_free(struct thread_master *master, struct thread *thread)
 863 {
 864         /* Update statistics. */
 865         assert(master->alloc > 0);
 866         master->alloc--;
 867
 868         /* Free allocated resources. */
 869         pthread_mutex_destroy(&thread->mtx);
 870         XFREE(MTYPE_THREAD, thread);
 871 }
 872
 873 static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
 874                    bool *eintr_p)
 875 {
 876         sigset_t origsigs;
 877         unsigned char trash[64];
 878         nfds_t count = m->handler.copycount;
 879
 880         /*
 881          * If timer_wait is null here, that means poll() should block
 882          * indefinitely, unless the thread_master has overridden it by setting
 883          * ->selectpoll_timeout.
 884          *
 885          * If the value is positive, it specifies the maximum number of
 886          * milliseconds to wait. If the timeout is -1, it specifies that
 887          * we should never wait and always return immediately even if no
 888          * event is detected. If the value is zero, the behavior is default.
 889          */
 890         int timeout = -1;
 891
 892         /* number of file descriptors with events */
 893         int num;
 894
 895         if (timer_wait != NULL
 896             && m->selectpoll_timeout == 0) // use the default value
 897                 timeout = (timer_wait->tv_sec * 1000)
 898                           + (timer_wait->tv_usec / 1000);
 899         else if (m->selectpoll_timeout > 0) // use the user's timeout
 900                 timeout = m->selectpoll_timeout;
 901         else if (m->selectpoll_timeout
 902                  < 0) // effect a poll (return immediately)
 903                 timeout = 0;
 904
 905         zlog_tls_buffer_flush();
 906         rcu_read_unlock();
 907         rcu_assert_read_unlocked();
 908
 909         /* add poll pipe poker */
 910         assert(count + 1 < m->handler.pfdsize);
 911         m->handler.copy[count].fd = m->io_pipe[0];
 912         m->handler.copy[count].events = POLLIN;
 913         m->handler.copy[count].revents = 0x00;
 914
 915         /* We need to deal with a signal-handling race here: we
 916          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 917          * that may arrive just before we enter poll(). We will block the
 918          * key signals, then check whether any have arrived - if so, we return
 919          * before calling poll(). If not, we'll re-enable the signals
 920          * in the ppoll() call.
 921          */
 922
 923         sigemptyset(&origsigs);
 924         if (m->handle_signals) {
 925                 /* Main pthread that handles the app signals */
 926                 if (frr_sigevent_check(&origsigs)) {
 927                         /* Signal to process - restore signal mask and return */
 928                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 929                         num = -1;
 930                         *eintr_p = true;
 931                         goto done;
 932                 }
 933         } else {
 934                 /* Don't make any changes for the non-main pthreads */
 935                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 936         }
 937
 938 #if defined(HAVE_PPOLL)
 939         struct timespec ts, *tsp;
 940
 941         if (timeout >= 0) {
 942                 ts.tv_sec = timeout / 1000;
 943                 ts.tv_nsec = (timeout % 1000) * 1000000;
 944                 tsp = &ts;
 945         } else
 946                 tsp = NULL;
 947
 948         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 949         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 950 #else
 951         /* Not ideal - there is a race after we restore the signal mask */
 952         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 953         num = poll(m->handler.copy, count + 1, timeout);
 954 #endif
 955
 956 done:
 957
 958         if (num < 0 && errno == EINTR)
 959                 *eintr_p = true;
 960
 961         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 962                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 963                         ;
 964
 965         rcu_read_lock();
 966
 967         return num;
 968 }
 969
 970 /* Add new read thread. */
 971 void _thread_add_read_write(const struct xref_threadsched *xref,
 972                             struct thread_master *m,
 973                             void (*func)(struct thread *), void *arg, int fd,
 974                             struct thread **t_ptr)
 975 {
 976         int dir = xref->thread_type;
 977         struct thread *thread = NULL;
 978         struct thread **thread_array;
 979
 980         if (dir == THREAD_READ)
 981                 frrtrace(9, frr_libfrr, schedule_read, m,
 982                          xref->funcname, xref->xref.file, xref->xref.line,
 983                          t_ptr, fd, 0, arg, 0);
 984         else
 985                 frrtrace(9, frr_libfrr, schedule_write, m,
 986                          xref->funcname, xref->xref.file, xref->xref.line,
 987                          t_ptr, fd, 0, arg, 0);
 988
 989         assert(fd >= 0);
 990         if (fd >= m->fd_limit)
 991                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 992
 993         frr_with_mutex(&m->mtx) {
 994                 if (t_ptr && *t_ptr)
 995                         // thread is already scheduled; don't reschedule
 996                         break;
 997
 998                 /* default to a new pollfd */
 999                 nfds_t queuepos = m->handler.pfdcount;
1000
1001                 if (dir == THREAD_READ)
1002                         thread_array = m->read;
1003                 else
1004                         thread_array = m->write;
1005
1006                 /* if we already have a pollfd for our file descriptor, find and
1007                  * use it */
1008                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
1009                         if (m->handler.pfds[i].fd == fd) {
1010                                 queuepos = i;
1011
1012 #ifdef DEV_BUILD
1013                                 /*
1014                                  * What happens if we have a thread already
1015                                  * created for this event?
1016                                  */
1017                                 if (thread_array[fd])
1018                                         assert(!"Thread already scheduled for file descriptor");
1019 #endif
1020                                 break;
1021                         }
1022
1023                 /* make sure we have room for this fd + pipe poker fd */
1024                 assert(queuepos + 1 < m->handler.pfdsize);
1025
1026                 thread = thread_get(m, dir, func, arg, xref);
1027
1028                 m->handler.pfds[queuepos].fd = fd;
1029                 m->handler.pfds[queuepos].events |=
1030                         (dir == THREAD_READ ? POLLIN : POLLOUT);
1031
1032                 if (queuepos == m->handler.pfdcount)
1033                         m->handler.pfdcount++;
1034
1035                 if (thread) {
1036                         frr_with_mutex(&thread->mtx) {
1037                                 thread->u.fd = fd;
1038                                 thread_array[thread->u.fd] = thread;
1039                         }
1040
1041                         if (t_ptr) {
1042                                 *t_ptr = thread;
1043                                 thread->ref = t_ptr;
1044                         }
1045                 }
1046
1047                 AWAKEN(m);
1048         }
1049 }
1050
1051 static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1052                                       struct thread_master *m,
1053                                       void (*func)(struct thread *), void *arg,
1054                                       struct timeval *time_relative,
1055                                       struct thread **t_ptr)
1056 {
1057         struct thread *thread;
1058         struct timeval t;
1059
1060         assert(m != NULL);
1061
1062         assert(time_relative);
1063
1064         frrtrace(9, frr_libfrr, schedule_timer, m,
1065                  xref->funcname, xref->xref.file, xref->xref.line,
1066                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1067
1068         /* Compute expiration/deadline time. */
1069         monotime(&t);
1070         timeradd(&t, time_relative, &t);
1071
1072         frr_with_mutex(&m->mtx) {
1073                 if (t_ptr && *t_ptr)
1074                         /* thread is already scheduled; don't reschedule */
1075                         return;
1076
1077                 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
1078
1079                 frr_with_mutex(&thread->mtx) {
1080                         thread->u.sands = t;
1081                         thread_timer_list_add(&m->timer, thread);
1082                         if (t_ptr) {
1083                                 *t_ptr = thread;
1084                                 thread->ref = t_ptr;
1085                         }
1086                 }
1087
1088                 /* The timer list is sorted - if this new timer
1089                  * might change the time we'll wait for, give the pthread
1090                  * a chance to re-compute.
1091                  */
1092                 if (thread_timer_list_first(&m->timer) == thread)
1093                         AWAKEN(m);
1094         }
1095 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1096         if (time_relative->tv_sec > ONEYEAR2SEC)
1097                 flog_err(
1098                         EC_LIB_TIMER_TOO_LONG,
1099                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1100                         thread);
1101 }
1102
1103
1104 /* Add timer event thread. */
1105 void _thread_add_timer(const struct xref_threadsched *xref,
1106                        struct thread_master *m, void (*func)(struct thread *),
1107                        void *arg, long timer, struct thread **t_ptr)
1108 {
1109         struct timeval trel;
1110
1111         assert(m != NULL);
1112
1113         trel.tv_sec = timer;
1114         trel.tv_usec = 0;
1115
1116         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1117 }
1118
1119 /* Add timer event thread with "millisecond" resolution */
1120 void _thread_add_timer_msec(const struct xref_threadsched *xref,
1121                             struct thread_master *m,
1122                             void (*func)(struct thread *), void *arg,
1123                             long timer, struct thread **t_ptr)
1124 {
1125         struct timeval trel;
1126
1127         assert(m != NULL);
1128
1129         trel.tv_sec = timer / 1000;
1130         trel.tv_usec = 1000 * (timer % 1000);
1131
1132         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1133 }
1134
1135 /* Add timer event thread with "timeval" resolution */
1136 void _thread_add_timer_tv(const struct xref_threadsched *xref,
1137                           struct thread_master *m,
1138                           void (*func)(struct thread *), void *arg,
1139                           struct timeval *tv, struct thread **t_ptr)
1140 {
1141         _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1142 }
1143
1144 /* Add simple event thread. */
1145 void _thread_add_event(const struct xref_threadsched *xref,
1146                        struct thread_master *m, void (*func)(struct thread *),
1147                        void *arg, int val, struct thread **t_ptr)
1148 {
1149         struct thread *thread = NULL;
1150
1151         frrtrace(9, frr_libfrr, schedule_event, m,
1152                  xref->funcname, xref->xref.file, xref->xref.line,
1153                  t_ptr, 0, val, arg, 0);
1154
1155         assert(m != NULL);
1156
1157         frr_with_mutex(&m->mtx) {
1158                 if (t_ptr && *t_ptr)
1159                         /* thread is already scheduled; don't reschedule */
1160                         break;
1161
1162                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
1163                 frr_with_mutex(&thread->mtx) {
1164                         thread->u.val = val;
1165                         thread_list_add_tail(&m->event, thread);
1166                 }
1167
1168                 if (t_ptr) {
1169                         *t_ptr = thread;
1170                         thread->ref = t_ptr;
1171                 }
1172
1173                 AWAKEN(m);
1174         }
1175 }
1176
1177 /* Thread cancellation ------------------------------------------------------ */
1178
1179 /**
1180  * NOT's out the .events field of pollfd corresponding to the given file
1181  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1182  *
1183  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1184  * implementation for details.
1185  *
1186  * @param master
1187  * @param fd
1188  * @param state the event to cancel. One or more (OR'd together) of the
1189  * following:
1190  *   - POLLIN
1191  *   - POLLOUT
1192  */
1193 static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1194                              int idx_hint)
1195 {
1196         bool found = false;
1197
1198         /* find the index of corresponding pollfd */
1199         nfds_t i;
1200
1201         /* Cancel POLLHUP too just in case some bozo set it */
1202         state |= POLLHUP;
1203
1204         /* Some callers know the index of the pfd already */
1205         if (idx_hint >= 0) {
1206                 i = idx_hint;
1207                 found = true;
1208         } else {
1209                 /* Have to look for the fd in the pfd array */
1210                 for (i = 0; i < master->handler.pfdcount; i++)
1211                         if (master->handler.pfds[i].fd == fd) {
1212                                 found = true;
1213                                 break;
1214                         }
1215         }
1216
1217         if (!found) {
1218                 zlog_debug(
1219                         "[!] Received cancellation request for nonexistent rw job");
1220                 zlog_debug("[!] threadmaster: %s | fd: %d",
1221                            master->name ? master->name : "", fd);
1222                 return;
1223         }
1224
1225         /* NOT out event. */
1226         master->handler.pfds[i].events &= ~(state);
1227
1228         /* If all events are canceled, delete / resize the pollfd array. */
1229         if (master->handler.pfds[i].events == 0) {
1230                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1231                         (master->handler.pfdcount - i - 1)
1232                                 * sizeof(struct pollfd));
1233                 master->handler.pfdcount--;
1234                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1235                 master->handler.pfds[master->handler.pfdcount].events = 0;
1236         }
1237
1238         /* If we have the same pollfd in the copy, perform the same operations,
1239          * otherwise return. */
1240         if (i >= master->handler.copycount)
1241                 return;
1242
1243         master->handler.copy[i].events &= ~(state);
1244
1245         if (master->handler.copy[i].events == 0) {
1246                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1247                         (master->handler.copycount - i - 1)
1248                                 * sizeof(struct pollfd));
1249                 master->handler.copycount--;
1250                 master->handler.copy[master->handler.copycount].fd = 0;
1251                 master->handler.copy[master->handler.copycount].events = 0;
1252         }
1253 }
1254
1255 /*
1256  * Process task cancellation given a task argument: iterate through the
1257  * various lists of tasks, looking for any that match the argument.
1258  */
1259 static void cancel_arg_helper(struct thread_master *master,
1260                               const struct cancel_req *cr)
1261 {
1262         struct thread *t;
1263         nfds_t i;
1264         int fd;
1265         struct pollfd *pfd;
1266
1267         /* We're only processing arg-based cancellations here. */
1268         if (cr->eventobj == NULL)
1269                 return;
1270
1271         /* First process the ready lists. */
1272         frr_each_safe(thread_list, &master->event, t) {
1273                 if (t->arg != cr->eventobj)
1274                         continue;
1275                 thread_list_del(&master->event, t);
1276                 if (t->ref)
1277                         *t->ref = NULL;
1278                 thread_add_unuse(master, t);
1279         }
1280
1281         frr_each_safe(thread_list, &master->ready, t) {
1282                 if (t->arg != cr->eventobj)
1283                         continue;
1284                 thread_list_del(&master->ready, t);
1285                 if (t->ref)
1286                         *t->ref = NULL;
1287                 thread_add_unuse(master, t);
1288         }
1289
1290         /* If requested, stop here and ignore io and timers */
1291         if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1292                 return;
1293
1294         /* Check the io tasks */
1295         for (i = 0; i < master->handler.pfdcount;) {
1296                 pfd = master->handler.pfds + i;
1297
1298                 if (pfd->events & POLLIN)
1299                         t = master->read[pfd->fd];
1300                 else
1301                         t = master->write[pfd->fd];
1302
1303                 if (t && t->arg == cr->eventobj) {
1304                         fd = pfd->fd;
1305
1306                         /* Found a match to cancel: clean up fd arrays */
1307                         thread_cancel_rw(master, pfd->fd, pfd->events, i);
1308
1309                         /* Clean up thread arrays */
1310                         master->read[fd] = NULL;
1311                         master->write[fd] = NULL;
1312
1313                         /* Clear caller's ref */
1314                         if (t->ref)
1315                                 *t->ref = NULL;
1316
1317                         thread_add_unuse(master, t);
1318
1319                         /* Don't increment 'i' since the cancellation will have
1320                          * removed the entry from the pfd array
1321                          */
1322                 } else
1323                         i++;
1324         }
1325
1326         /* Check the timer tasks */
1327         t = thread_timer_list_first(&master->timer);
1328         while (t) {
1329                 struct thread *t_next;
1330
1331                 t_next = thread_timer_list_next(&master->timer, t);
1332
1333                 if (t->arg == cr->eventobj) {
1334                         thread_timer_list_del(&master->timer, t);
1335                         if (t->ref)
1336                                 *t->ref = NULL;
1337                         thread_add_unuse(master, t);
1338                 }
1339
1340                 t = t_next;
1341         }
1342 }
1343
1344 /**
1345  * Process cancellation requests.
1346  *
1347  * This may only be run from the pthread which owns the thread_master.
1348  *
1349  * @param master the thread master to process
1350  * @REQUIRE master->mtx
1351  */
1352 static void do_thread_cancel(struct thread_master *master)
1353 {
1354         struct thread_list_head *list = NULL;
1355         struct thread **thread_array = NULL;
1356         struct thread *thread;
1357
1358         struct cancel_req *cr;
1359         struct listnode *ln;
1360         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1361                 /*
1362                  * If this is an event object cancellation, search
1363                  * through task lists deleting any tasks which have the
1364                  * specified argument - use this handy helper function.
1365                  */
1366                 if (cr->eventobj) {
1367                         cancel_arg_helper(master, cr);
1368                         continue;
1369                 }
1370
1371                 /*
1372                  * The pointer varies depending on whether the cancellation
1373                  * request was made asynchronously or not. If it was, we
1374                  * need to check whether the thread even exists anymore
1375                  * before cancelling it.
1376                  */
1377                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1378
1379                 if (!thread)
1380                         continue;
1381
1382                 /* Determine the appropriate queue to cancel the thread from */
1383                 switch (thread->type) {
1384                 case THREAD_READ:
1385                         thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
1386                         thread_array = master->read;
1387                         break;
1388                 case THREAD_WRITE:
1389                         thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1390                         thread_array = master->write;
1391                         break;
1392                 case THREAD_TIMER:
1393                         thread_timer_list_del(&master->timer, thread);
1394                         break;
1395                 case THREAD_EVENT:
1396                         list = &master->event;
1397                         break;
1398                 case THREAD_READY:
1399                         list = &master->ready;
1400                         break;
1401                 default:
1402                         continue;
1403                         break;
1404                 }
1405
1406                 if (list) {
1407                         thread_list_del(list, thread);
1408                 } else if (thread_array) {
1409                         thread_array[thread->u.fd] = NULL;
1410                 }
1411
1412                 if (thread->ref)
1413                         *thread->ref = NULL;
1414
1415                 thread_add_unuse(thread->master, thread);
1416         }
1417
1418         /* Delete and free all cancellation requests */
1419         if (master->cancel_req)
1420                 list_delete_all_node(master->cancel_req);
1421
1422         /* Wake up any threads which may be blocked in thread_cancel_async() */
1423         master->canceled = true;
1424         pthread_cond_broadcast(&master->cancel_cond);
1425 }
1426
1427 /*
1428  * Helper function used for multiple flavors of arg-based cancellation.
1429  */
1430 static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1431 {
1432         struct cancel_req *cr;
1433
1434         assert(m->owner == pthread_self());
1435
1436         /* Only worth anything if caller supplies an arg. */
1437         if (arg == NULL)
1438                 return;
1439
1440         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1441
1442         cr->flags = flags;
1443
1444         frr_with_mutex(&m->mtx) {
1445                 cr->eventobj = arg;
1446                 listnode_add(m->cancel_req, cr);
1447                 do_thread_cancel(m);
1448         }
1449 }
1450
1451 /**
1452  * Cancel any events which have the specified argument.
1453  *
1454  * MT-Unsafe
1455  *
1456  * @param m the thread_master to cancel from
1457  * @param arg the argument passed when creating the event
1458  */
1459 void thread_cancel_event(struct thread_master *master, void *arg)
1460 {
1461         cancel_event_helper(master, arg, 0);
1462 }
1463
1464 /*
1465  * Cancel ready tasks with an arg matching 'arg'
1466  *
1467  * MT-Unsafe
1468  *
1469  * @param m the thread_master to cancel from
1470  * @param arg the argument passed when creating the event
1471  */
1472 void thread_cancel_event_ready(struct thread_master *m, void *arg)
1473 {
1474
1475         /* Only cancel ready/event tasks */
1476         cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
1477 }
1478
1479 /**
1480  * Cancel a specific task.
1481  *
1482  * MT-Unsafe
1483  *
1484  * @param thread task to cancel
1485  */
1486 void thread_cancel(struct thread **thread)
1487 {
1488         struct thread_master *master;
1489
1490         if (thread == NULL || *thread == NULL)
1491                 return;
1492
1493         master = (*thread)->master;
1494
1495         frrtrace(9, frr_libfrr, thread_cancel, master,
1496                  (*thread)->xref->funcname, (*thread)->xref->xref.file,
1497                  (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1498                  (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
1499
1500         assert(master->owner == pthread_self());
1501
1502         frr_with_mutex(&master->mtx) {
1503                 struct cancel_req *cr =
1504                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1505                 cr->thread = *thread;
1506                 listnode_add(master->cancel_req, cr);
1507                 do_thread_cancel(master);
1508         }
1509
1510         *thread = NULL;
1511 }
1512
1513 /**
1514  * Asynchronous cancellation.
1515  *
1516  * Called with either a struct thread ** or void * to an event argument,
1517  * this function posts the correct cancellation request and blocks until it is
1518  * serviced.
1519  *
1520  * If the thread is currently running, execution blocks until it completes.
1521  *
1522  * The last two parameters are mutually exclusive, i.e. if you pass one the
1523  * other must be NULL.
1524  *
1525  * When the cancellation procedure executes on the target thread_master, the
1526  * thread * provided is checked for nullity. If it is null, the thread is
1527  * assumed to no longer exist and the cancellation request is a no-op. Thus
1528  * users of this API must pass a back-reference when scheduling the original
1529  * task.
1530  *
1531  * MT-Safe
1532  *
1533  * @param master the thread master with the relevant event / task
1534  * @param thread pointer to thread to cancel
1535  * @param eventobj the event
1536  */
1537 void thread_cancel_async(struct thread_master *master, struct thread **thread,
1538                          void *eventobj)
1539 {
1540         assert(!(thread && eventobj) && (thread || eventobj));
1541
1542         if (thread && *thread)
1543                 frrtrace(9, frr_libfrr, thread_cancel_async, master,
1544                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1545                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1546                          (*thread)->u.val, (*thread)->arg,
1547                          (*thread)->u.sands.tv_sec);
1548         else
1549                 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1550                          0, NULL, 0, 0, eventobj, 0);
1551
1552         assert(master->owner != pthread_self());
1553
1554         frr_with_mutex(&master->mtx) {
1555                 master->canceled = false;
1556
1557                 if (thread) {
1558                         struct cancel_req *cr =
1559                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1560                         cr->threadref = thread;
1561                         listnode_add(master->cancel_req, cr);
1562                 } else if (eventobj) {
1563                         struct cancel_req *cr =
1564                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1565                         cr->eventobj = eventobj;
1566                         listnode_add(master->cancel_req, cr);
1567                 }
1568                 AWAKEN(master);
1569
1570                 while (!master->canceled)
1571                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1572         }
1573
1574         if (thread)
1575                 *thread = NULL;
1576 }
1577 /* ------------------------------------------------------------------------- */
1578
1579 static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
1580                                          struct timeval *timer_val)
1581 {
1582         if (!thread_timer_list_count(timers))
1583                 return NULL;
1584
1585         struct thread *next_timer = thread_timer_list_first(timers);
1586         monotime_until(&next_timer->u.sands, timer_val);
1587         return timer_val;
1588 }
1589
1590 static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1591                                  struct thread *fetch)
1592 {
1593         *fetch = *thread;
1594         thread_add_unuse(m, thread);
1595         return fetch;
1596 }
1597
1598 static int thread_process_io_helper(struct thread_master *m,
1599                                     struct thread *thread, short state,
1600                                     short actual_state, int pos)
1601 {
1602         struct thread **thread_array;
1603
1604         /*
1605          * poll() clears the .events field, but the pollfd array we
1606          * pass to poll() is a copy of the one used to schedule threads.
1607          * We need to synchronize state between the two here by applying
1608          * the same changes poll() made on the copy of the "real" pollfd
1609          * array.
1610          *
1611          * This cleans up a possible infinite loop where we refuse
1612          * to respond to a poll event but poll is insistent that
1613          * we should.
1614          */
1615         m->handler.pfds[pos].events &= ~(state);
1616
1617         if (!thread) {
1618                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1619                         flog_err(EC_LIB_NO_THREAD,
1620                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1621                                  m->handler.pfds[pos].fd, actual_state);
1622                 return 0;
1623         }
1624
1625         if (thread->type == THREAD_READ)
1626                 thread_array = m->read;
1627         else
1628                 thread_array = m->write;
1629
1630         thread_array[thread->u.fd] = NULL;
1631         thread_list_add_tail(&m->ready, thread);
1632         thread->type = THREAD_READY;
1633
1634         return 1;
1635 }
1636
1637 /**
1638  * Process I/O events.
1639  *
1640  * Walks through file descriptor array looking for those pollfds whose .revents
1641  * field has something interesting. Deletes any invalid file descriptors.
1642  *
1643  * @param m the thread master
1644  * @param num the number of active file descriptors (return value of poll())
1645  */
1646 static void thread_process_io(struct thread_master *m, unsigned int num)
1647 {
1648         unsigned int ready = 0;
1649         struct pollfd *pfds = m->handler.copy;
1650
1651         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1652                 /* no event for current fd? immediately continue */
1653                 if (pfds[i].revents == 0)
1654                         continue;
1655
1656                 ready++;
1657
1658                 /*
1659                  * Unless someone has called thread_cancel from another
1660                  * pthread, the only thing that could have changed in
1661                  * m->handler.pfds while we were asleep is the .events
1662                  * field in a given pollfd. Barring thread_cancel() that
1663                  * value should be a superset of the values we have in our
1664                  * copy, so there's no need to update it. Similarily,
1665                  * barring deletion, the fd should still be a valid index
1666                  * into the master's pfds.
1667                  *
1668                  * We are including POLLERR here to do a READ event
1669                  * this is because the read should fail and the
1670                  * read function should handle it appropriately
1671                  */
1672                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1673                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1674                                                  pfds[i].revents, i);
1675                 }
1676                 if (pfds[i].revents & POLLOUT)
1677                         thread_process_io_helper(m, m->write[pfds[i].fd],
1678                                                  POLLOUT, pfds[i].revents, i);
1679
1680                 /* if one of our file descriptors is garbage, remove the same
1681                  * from
1682                  * both pfds + update sizes and index */
1683                 if (pfds[i].revents & POLLNVAL) {
1684                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1685                                 (m->handler.pfdcount - i - 1)
1686                                         * sizeof(struct pollfd));
1687                         m->handler.pfdcount--;
1688                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1689                         m->handler.pfds[m->handler.pfdcount].events = 0;
1690
1691                         memmove(pfds + i, pfds + i + 1,
1692                                 (m->handler.copycount - i - 1)
1693                                         * sizeof(struct pollfd));
1694                         m->handler.copycount--;
1695                         m->handler.copy[m->handler.copycount].fd = 0;
1696                         m->handler.copy[m->handler.copycount].events = 0;
1697
1698                         i--;
1699                 }
1700         }
1701 }
1702
1703 /* Add all timers that have popped to the ready list. */
1704 static unsigned int thread_process_timers(struct thread_master *m,
1705                                           struct timeval *timenow)
1706 {
1707         struct timeval prev = *timenow;
1708         bool displayed = false;
1709         struct thread *thread;
1710         unsigned int ready = 0;
1711
1712         while ((thread = thread_timer_list_first(&m->timer))) {
1713                 if (timercmp(timenow, &thread->u.sands, <))
1714                         break;
1715                 prev = thread->u.sands;
1716                 prev.tv_sec += 4;
1717                 /*
1718                  * If the timer would have popped 4 seconds in the
1719                  * past then we are in a situation where we are
1720                  * really getting behind on handling of events.
1721                  * Let's log it and do the right thing with it.
1722                  */
1723                 if (timercmp(timenow, &prev, >)) {
1724                         atomic_fetch_add_explicit(
1725                                 &thread->hist->total_starv_warn, 1,
1726                                 memory_order_seq_cst);
1727                         if (!displayed && !thread->ignore_timer_late) {
1728                                 flog_warn(
1729                                         EC_LIB_STARVE_THREAD,
1730                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1731                                         thread);
1732                                 displayed = true;
1733                         }
1734                 }
1735
1736                 thread_timer_list_pop(&m->timer);
1737                 thread->type = THREAD_READY;
1738                 thread_list_add_tail(&m->ready, thread);
1739                 ready++;
1740         }
1741
1742         return ready;
1743 }
1744
1745 /* process a list en masse, e.g. for event thread lists */
1746 static unsigned int thread_process(struct thread_list_head *list)
1747 {
1748         struct thread *thread;
1749         unsigned int ready = 0;
1750
1751         while ((thread = thread_list_pop(list))) {
1752                 thread->type = THREAD_READY;
1753                 thread_list_add_tail(&thread->master->ready, thread);
1754                 ready++;
1755         }
1756         return ready;
1757 }
1758
1759
1760 /* Fetch next ready thread. */
1761 struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
1762 {
1763         struct thread *thread = NULL;
1764         struct timeval now;
1765         struct timeval zerotime = {0, 0};
1766         struct timeval tv;
1767         struct timeval *tw = NULL;
1768         bool eintr_p = false;
1769         int num = 0;
1770
1771         do {
1772                 /* Handle signals if any */
1773                 if (m->handle_signals)
1774                         frr_sigevent_process();
1775
1776                 pthread_mutex_lock(&m->mtx);
1777
1778                 /* Process any pending cancellation requests */
1779                 do_thread_cancel(m);
1780
1781                 /*
1782                  * Attempt to flush ready queue before going into poll().
1783                  * This is performance-critical. Think twice before modifying.
1784                  */
1785                 if ((thread = thread_list_pop(&m->ready))) {
1786                         fetch = thread_run(m, thread, fetch);
1787                         if (fetch->ref)
1788                                 *fetch->ref = NULL;
1789                         pthread_mutex_unlock(&m->mtx);
1790                         if (!m->ready_run_loop)
1791                                 GETRUSAGE(&m->last_getrusage);
1792                         m->ready_run_loop = true;
1793                         break;
1794                 }
1795
1796                 m->ready_run_loop = false;
1797                 /* otherwise, tick through scheduling sequence */
1798
1799                 /*
1800                  * Post events to ready queue. This must come before the
1801                  * following block since events should occur immediately
1802                  */
1803                 thread_process(&m->event);
1804
1805                 /*
1806                  * If there are no tasks on the ready queue, we will poll()
1807                  * until a timer expires or we receive I/O, whichever comes
1808                  * first. The strategy for doing this is:
1809                  *
1810                  * - If there are events pending, set the poll() timeout to zero
1811                  * - If there are no events pending, but there are timers
1812                  * pending, set the timeout to the smallest remaining time on
1813                  * any timer.
1814                  * - If there are neither timers nor events pending, but there
1815                  * are file descriptors pending, block indefinitely in poll()
1816                  * - If nothing is pending, it's time for the application to die
1817                  *
1818                  * In every case except the last, we need to hit poll() at least
1819                  * once per loop to avoid starvation by events
1820                  */
1821                 if (!thread_list_count(&m->ready))
1822                         tw = thread_timer_wait(&m->timer, &tv);
1823
1824                 if (thread_list_count(&m->ready) ||
1825                                 (tw && !timercmp(tw, &zerotime, >)))
1826                         tw = &zerotime;
1827
1828                 if (!tw && m->handler.pfdcount == 0) { /* die */
1829                         pthread_mutex_unlock(&m->mtx);
1830                         fetch = NULL;
1831                         break;
1832                 }
1833
1834                 /*
1835                  * Copy pollfd array + # active pollfds in it. Not necessary to
1836                  * copy the array size as this is fixed.
1837                  */
1838                 m->handler.copycount = m->handler.pfdcount;
1839                 memcpy(m->handler.copy, m->handler.pfds,
1840                        m->handler.copycount * sizeof(struct pollfd));
1841
1842                 pthread_mutex_unlock(&m->mtx);
1843                 {
1844                         eintr_p = false;
1845                         num = fd_poll(m, tw, &eintr_p);
1846                 }
1847                 pthread_mutex_lock(&m->mtx);
1848
1849                 /* Handle any errors received in poll() */
1850                 if (num < 0) {
1851                         if (eintr_p) {
1852                                 pthread_mutex_unlock(&m->mtx);
1853                                 /* loop around to signal handler */
1854                                 continue;
1855                         }
1856
1857                         /* else die */
1858                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1859                                  safe_strerror(errno));
1860                         pthread_mutex_unlock(&m->mtx);
1861                         fetch = NULL;
1862                         break;
1863                 }
1864
1865                 /* Post timers to ready queue. */
1866                 monotime(&now);
1867                 thread_process_timers(m, &now);
1868
1869                 /* Post I/O to ready queue. */
1870                 if (num > 0)
1871                         thread_process_io(m, num);
1872
1873                 pthread_mutex_unlock(&m->mtx);
1874
1875         } while (!thread && m->spin);
1876
1877         return fetch;
1878 }
1879
1880 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1881 {
1882         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1883                 + (a.tv_usec - b.tv_usec));
1884 }
1885
1886 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1887                                    unsigned long *cputime)
1888 {
1889 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1890
1891 #ifdef __FreeBSD__
1892         /*
1893          * FreeBSD appears to have an issue when calling clock_gettime
1894          * with CLOCK_THREAD_CPUTIME_ID really close to each other
1895          * occassionally the now time will be before the start time.
1896          * This is not good and FRR is ending up with CPU HOG's
1897          * when the subtraction wraps to very large numbers
1898          *
1899          * What we are going to do here is cheat a little bit
1900          * and notice that this is a problem and just correct
1901          * it so that it is impossible to happen
1902          */
1903         if (start->cpu.tv_sec == now->cpu.tv_sec &&
1904             start->cpu.tv_nsec > now->cpu.tv_nsec)
1905                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1906         else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1907                 now->cpu.tv_sec = start->cpu.tv_sec;
1908                 now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1909         }
1910 #endif
1911         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1912                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1913 #else
1914         /* This is 'user + sys' time.  */
1915         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1916                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1917 #endif
1918         return timeval_elapsed(now->real, start->real);
1919 }
1920
1921 /* We should aim to yield after yield milliseconds, which defaults
1922    to THREAD_YIELD_TIME_SLOT .
1923    Note: we are using real (wall clock) time for this calculation.
1924    It could be argued that CPU time may make more sense in certain
1925    contexts.  The things to consider are whether the thread may have
1926    blocked (in which case wall time increases, but CPU time does not),
1927    or whether the system is heavily loaded with other processes competing
1928    for CPU time.  On balance, wall clock time seems to make sense.
1929    Plus it has the added benefit that gettimeofday should be faster
1930    than calling getrusage. */
1931 int thread_should_yield(struct thread *thread)
1932 {
1933         int result;
1934         frr_with_mutex(&thread->mtx) {
1935                 result = monotime_since(&thread->real, NULL)
1936                          > (int64_t)thread->yield;
1937         }
1938         return result;
1939 }
1940
1941 void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
1942 {
1943         frr_with_mutex(&thread->mtx) {
1944                 thread->yield = yield_time;
1945         }
1946 }
1947
1948 void thread_getrusage(RUSAGE_T *r)
1949 {
1950         monotime(&r->real);
1951         if (!cputime_enabled) {
1952                 memset(&r->cpu, 0, sizeof(r->cpu));
1953                 return;
1954         }
1955
1956 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1957         /* not currently implemented in Linux's vDSO, but maybe at some point
1958          * in the future?
1959          */
1960         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1961 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1962 #if defined RUSAGE_THREAD
1963 #define FRR_RUSAGE RUSAGE_THREAD
1964 #else
1965 #define FRR_RUSAGE RUSAGE_SELF
1966 #endif
1967         getrusage(FRR_RUSAGE, &(r->cpu));
1968 #endif
1969 }
1970
1971 /*
1972  * Call a thread.
1973  *
1974  * This function will atomically update the thread's usage history. At present
1975  * this is the only spot where usage history is written. Nevertheless the code
1976  * has been written such that the introduction of writers in the future should
1977  * not need to update it provided the writers atomically perform only the
1978  * operations done here, i.e. updating the total and maximum times. In
1979  * particular, the maximum real and cpu times must be monotonically increasing
1980  * or this code is not correct.
1981  */
1982 void thread_call(struct thread *thread)
1983 {
1984         RUSAGE_T before, after;
1985
1986         /* if the thread being called is the CLI, it may change cputime_enabled
1987          * ("service cputime-stats" command), which can result in nonsensical
1988          * and very confusing warnings
1989          */
1990         bool cputime_enabled_here = cputime_enabled;
1991
1992         if (thread->master->ready_run_loop)
1993                 before = thread->master->last_getrusage;
1994         else
1995                 GETRUSAGE(&before);
1996
1997         thread->real = before.real;
1998
1999         frrtrace(9, frr_libfrr, thread_call, thread->master,
2000                  thread->xref->funcname, thread->xref->xref.file,
2001                  thread->xref->xref.line, NULL, thread->u.fd,
2002                  thread->u.val, thread->arg, thread->u.sands.tv_sec);
2003
2004         pthread_setspecific(thread_current, thread);
2005         (*thread->func)(thread);
2006         pthread_setspecific(thread_current, NULL);
2007
2008         GETRUSAGE(&after);
2009         thread->master->last_getrusage = after;
2010
2011         unsigned long walltime, cputime;
2012         unsigned long exp;
2013
2014         walltime = thread_consumed_time(&after, &before, &cputime);
2015
2016         /* update walltime */
2017         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
2018                                   memory_order_seq_cst);
2019         exp = atomic_load_explicit(&thread->hist->real.max,
2020                                    memory_order_seq_cst);
2021         while (exp < walltime
2022                && !atomic_compare_exchange_weak_explicit(
2023                        &thread->hist->real.max, &exp, walltime,
2024                        memory_order_seq_cst, memory_order_seq_cst))
2025                 ;
2026
2027         if (cputime_enabled_here && cputime_enabled) {
2028                 /* update cputime */
2029                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2030                                           memory_order_seq_cst);
2031                 exp = atomic_load_explicit(&thread->hist->cpu.max,
2032                                            memory_order_seq_cst);
2033                 while (exp < cputime
2034                        && !atomic_compare_exchange_weak_explicit(
2035                                &thread->hist->cpu.max, &exp, cputime,
2036                                memory_order_seq_cst, memory_order_seq_cst))
2037                         ;
2038         }
2039
2040         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2041                                   memory_order_seq_cst);
2042         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2043                                  memory_order_seq_cst);
2044
2045         if (cputime_enabled_here && cputime_enabled && cputime_threshold
2046             && cputime > cputime_threshold) {
2047                 /*
2048                  * We have a CPU Hog on our hands.  The time FRR has spent
2049                  * doing actual work (not sleeping) is greater than 5 seconds.
2050                  * Whinge about it now, so we're aware this is yet another task
2051                  * to fix.
2052                  */
2053                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2054                                           1, memory_order_seq_cst);
2055                 flog_warn(
2056                         EC_LIB_SLOW_THREAD_CPU,
2057                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2058                         thread->xref->funcname, (unsigned long)thread->func,
2059                         walltime / 1000, cputime / 1000);
2060
2061         } else if (walltime_threshold && walltime > walltime_threshold) {
2062                 /*
2063                  * The runtime for a task is greater than 5 seconds, but the
2064                  * cpu time is under 5 seconds.  Let's whine about this because
2065                  * this could imply some sort of scheduling issue.
2066                  */
2067                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2068                                           1, memory_order_seq_cst);
2069                 flog_warn(
2070                         EC_LIB_SLOW_THREAD_WALL,
2071                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2072                         thread->xref->funcname, (unsigned long)thread->func,
2073                         walltime / 1000, cputime / 1000);
2074         }
2075 }
2076
2077 /* Execute thread */
2078 void _thread_execute(const struct xref_threadsched *xref,
2079                      struct thread_master *m, void (*func)(struct thread *),
2080                      void *arg, int val)
2081 {
2082         struct thread *thread;
2083
2084         /* Get or allocate new thread to execute. */
2085         frr_with_mutex(&m->mtx) {
2086                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
2087
2088                 /* Set its event value. */
2089                 frr_with_mutex(&thread->mtx) {
2090                         thread->add_type = THREAD_EXECUTE;
2091                         thread->u.val = val;
2092                         thread->ref = &thread;
2093                 }
2094         }
2095
2096         /* Execute thread doing all accounting. */
2097         thread_call(thread);
2098
2099         /* Give back or free thread. */
2100         thread_add_unuse(m, thread);
2101 }
2102
2103 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2104 void debug_signals(const sigset_t *sigs)
2105 {
2106         int i, found;
2107         sigset_t tmpsigs;
2108         char buf[300];
2109
2110         /*
2111          * We're only looking at the non-realtime signals here, so we need
2112          * some limit value. Platform differences mean at some point we just
2113          * need to pick a reasonable value.
2114          */
2115 #if defined SIGRTMIN
2116 #  define LAST_SIGNAL SIGRTMIN
2117 #else
2118 #  define LAST_SIGNAL 32
2119 #endif
2120
2121
2122         if (sigs == NULL) {
2123                 sigemptyset(&tmpsigs);
2124                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2125                 sigs = &tmpsigs;
2126         }
2127
2128         found = 0;
2129         buf[0] = '\0';
2130
2131         for (i = 0; i < LAST_SIGNAL; i++) {
2132                 char tmp[20];
2133
2134                 if (sigismember(sigs, i) > 0) {
2135                         if (found > 0)
2136                                 strlcat(buf, ",", sizeof(buf));
2137                         snprintf(tmp, sizeof(tmp), "%d", i);
2138                         strlcat(buf, tmp, sizeof(buf));
2139                         found++;
2140                 }
2141         }
2142
2143         if (found == 0)
2144                 snprintf(buf, sizeof(buf), "<none>");
2145
2146         zlog_debug("%s: %s", __func__, buf);
2147 }
2148
2149 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2150                                    const struct thread *thread)
2151 {
2152         static const char * const types[] = {
2153                 [THREAD_READ] = "read",
2154                 [THREAD_WRITE] = "write",
2155                 [THREAD_TIMER] = "timer",
2156                 [THREAD_EVENT] = "event",
2157                 [THREAD_READY] = "ready",
2158                 [THREAD_UNUSED] = "unused",
2159                 [THREAD_EXECUTE] = "exec",
2160         };
2161         ssize_t rv = 0;
2162         char info[16] = "";
2163
2164         if (!thread)
2165                 return bputs(buf, "{(thread *)NULL}");
2166
2167         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2168
2169         if (thread->type < array_size(types) && types[thread->type])
2170                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2171         else
2172                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2173
2174         switch (thread->type) {
2175         case THREAD_READ:
2176         case THREAD_WRITE:
2177                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2178                 break;
2179
2180         case THREAD_TIMER:
2181                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2182                 break;
2183         }
2184
2185         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2186                         thread->xref->funcname, thread->xref->dest,
2187                         thread->xref->xref.file, thread->xref->xref.line);
2188         return rv;
2189 }
2190
2191 printfrr_ext_autoreg_p("TH", printfrr_thread);
2192 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2193                                const void *ptr)
2194 {
2195         const struct thread *thread = ptr;
2196         struct timespec remain = {};
2197
2198         if (ea->fmt[0] == 'D') {
2199                 ea->fmt++;
2200                 return printfrr_thread_dbg(buf, ea, thread);
2201         }
2202
2203         if (!thread) {
2204                 /* need to jump over time formatting flag characters in the
2205                  * input format string, i.e. adjust ea->fmt!
2206                  */
2207                 printfrr_time(buf, ea, &remain,
2208                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2209                 return bputch(buf, '-');
2210         }
2211
2212         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2213         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2214 }