lib/thread.c

   1 /* Thread management routine
   2  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   3  *
   4  * This file is part of GNU Zebra.
   5  *
   6  * GNU Zebra is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation; either version 2, or (at your option) any
   9  * later version.
  10  *
  11  * GNU Zebra is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program; see the file COPYING; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /* #define DEBUG */
  22
  23 #include <zebra.h>
  24 #include <sys/resource.h>
  25
  26 #include "thread.h"
  27 #include "memory.h"
  28 #include "frrcu.h"
  29 #include "log.h"
  30 #include "hash.h"
  31 #include "command.h"
  32 #include "sigevent.h"
  33 #include "network.h"
  34 #include "jhash.h"
  35 #include "frratomic.h"
  36 #include "frr_pthread.h"
  37 #include "lib_errors.h"
  38 #include "libfrr_trace.h"
  39 #include "libfrr.h"
  40
  41 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  42 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
  43 DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
  44 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
  45
  46 DECLARE_LIST(thread_list, struct thread, threaditem);
  47
  48 struct cancel_req {
  49         int flags;
  50         struct thread *thread;
  51         void *eventobj;
  52         struct thread **threadref;
  53 };
  54
  55 /* Flags for task cancellation */
  56 #define THREAD_CANCEL_FLAG_READY     0x01
  57
  58 static int thread_timer_cmp(const struct thread *a, const struct thread *b)
  59 {
  60         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  61                 return -1;
  62         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  63                 return 1;
  64         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  65                 return -1;
  66         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  67                 return 1;
  68         return 0;
  69 }
  70
  71 DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
  72
  73 #if defined(__APPLE__)
  74 #include <mach/mach.h>
  75 #include <mach/mach_time.h>
  76 #endif
  77
  78 #define AWAKEN(m)                                                              \
  79         do {                                                                   \
  80                 const unsigned char wakebyte = 0x01;                           \
  81                 write(m->io_pipe[1], &wakebyte, 1);                            \
  82         } while (0);
  83
  84 /* control variable for initializer */
  85 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  86 pthread_key_t thread_current;
  87
  88 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  89 static struct list *masters;
  90
  91 static void thread_free(struct thread_master *master, struct thread *thread);
  92
  93 #ifndef EXCLUDE_CPU_TIME
  94 #define EXCLUDE_CPU_TIME 0
  95 #endif
  96 #ifndef CONSUMED_TIME_CHECK
  97 #define CONSUMED_TIME_CHECK 0
  98 #endif
  99
 100 bool cputime_enabled = !EXCLUDE_CPU_TIME;
 101 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
 102 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
 103
 104 /* CLI start ---------------------------------------------------------------- */
 105 #ifndef VTYSH_EXTRACT_PL
 106 #include "lib/thread_clippy.c"
 107 #endif
 108
 109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
 110 {
 111         int size = sizeof(a->func);
 112
 113         return jhash(&a->func, size, 0);
 114 }
 115
 116 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 117                                const struct cpu_thread_history *b)
 118 {
 119         return a->func == b->func;
 120 }
 121
 122 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 123 {
 124         struct cpu_thread_history *new;
 125         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
 126         new->func = a->func;
 127         new->funcname = a->funcname;
 128         return new;
 129 }
 130
 131 static void cpu_record_hash_free(void *a)
 132 {
 133         struct cpu_thread_history *hist = a;
 134
 135         XFREE(MTYPE_THREAD_STATS, hist);
 136 }
 137
 138 static void vty_out_cpu_thread_history(struct vty *vty,
 139                                        struct cpu_thread_history *a)
 140 {
 141         vty_out(vty,
 142                 "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
 143                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 144                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 145                 (a->real.total / a->total_calls), a->real.max,
 146                 a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
 147         vty_out(vty, "  %c%c%c%c%c  %s\n",
 148                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
 149                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
 150                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 151                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 152                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 153 }
 154
 155 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 156 {
 157         struct cpu_thread_history *totals = args[0];
 158         struct cpu_thread_history copy;
 159         struct vty *vty = args[1];
 160         uint8_t *filter = args[2];
 161
 162         struct cpu_thread_history *a = bucket->data;
 163
 164         copy.total_active =
 165                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 166         copy.total_calls =
 167                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 168         copy.total_cpu_warn =
 169                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 170         copy.total_wall_warn =
 171                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 172         copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
 173                                                      memory_order_seq_cst);
 174         copy.cpu.total =
 175                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 176         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 177         copy.real.total =
 178                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 179         copy.real.max =
 180                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 181         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 182         copy.funcname = a->funcname;
 183
 184         if (!(copy.types & *filter))
 185                 return;
 186
 187         vty_out_cpu_thread_history(vty, &copy);
 188         totals->total_active += copy.total_active;
 189         totals->total_calls += copy.total_calls;
 190         totals->total_cpu_warn += copy.total_cpu_warn;
 191         totals->total_wall_warn += copy.total_wall_warn;
 192         totals->total_starv_warn += copy.total_starv_warn;
 193         totals->real.total += copy.real.total;
 194         if (totals->real.max < copy.real.max)
 195                 totals->real.max = copy.real.max;
 196         totals->cpu.total += copy.cpu.total;
 197         if (totals->cpu.max < copy.cpu.max)
 198                 totals->cpu.max = copy.cpu.max;
 199 }
 200
 201 static void cpu_record_print(struct vty *vty, uint8_t filter)
 202 {
 203         struct cpu_thread_history tmp;
 204         void *args[3] = {&tmp, vty, &filter};
 205         struct thread_master *m;
 206         struct listnode *ln;
 207
 208         if (!cputime_enabled)
 209                 vty_out(vty,
 210                         "\n"
 211                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 212                         "will be zero or may display data from when collection was enabled.  Use the\n"
 213                         "  \"service cputime-stats\"  command to start collecting data.\n"
 214                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 215
 216         memset(&tmp, 0, sizeof(tmp));
 217         tmp.funcname = "TOTAL";
 218         tmp.types = filter;
 219
 220         frr_with_mutex(&masters_mtx) {
 221                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 222                         const char *name = m->name ? m->name : "main";
 223
 224                         char underline[strlen(name) + 1];
 225                         memset(underline, '-', sizeof(underline));
 226                         underline[sizeof(underline) - 1] = '\0';
 227
 228                         vty_out(vty, "\n");
 229                         vty_out(vty, "Showing statistics for pthread %s\n",
 230                                 name);
 231                         vty_out(vty, "-------------------------------%s\n",
 232                                 underline);
 233                         vty_out(vty, "%30s %18s %18s\n", "",
 234                                 "CPU (user+system):", "Real (wall-clock):");
 235                         vty_out(vty,
 236                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 237                         vty_out(vty, " Avg uSec Max uSecs");
 238                         vty_out(vty,
 239                                 "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
 240
 241                         if (m->cpu_record->count)
 242                                 hash_iterate(
 243                                         m->cpu_record,
 244                                         (void (*)(struct hash_bucket *,
 245                                                   void *))cpu_record_hash_print,
 246                                         args);
 247                         else
 248                                 vty_out(vty, "No data to display yet.\n");
 249
 250                         vty_out(vty, "\n");
 251                 }
 252         }
 253
 254         vty_out(vty, "\n");
 255         vty_out(vty, "Total thread statistics\n");
 256         vty_out(vty, "-------------------------\n");
 257         vty_out(vty, "%30s %18s %18s\n", "",
 258                 "CPU (user+system):", "Real (wall-clock):");
 259         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 260         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 261         vty_out(vty, "  Type  Thread\n");
 262
 263         if (tmp.total_calls > 0)
 264                 vty_out_cpu_thread_history(vty, &tmp);
 265 }
 266
 267 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 268 {
 269         uint8_t *filter = args[0];
 270         struct hash *cpu_record = args[1];
 271
 272         struct cpu_thread_history *a = bucket->data;
 273
 274         if (!(a->types & *filter))
 275                 return;
 276
 277         hash_release(cpu_record, bucket->data);
 278 }
 279
 280 static void cpu_record_clear(uint8_t filter)
 281 {
 282         uint8_t *tmp = &filter;
 283         struct thread_master *m;
 284         struct listnode *ln;
 285
 286         frr_with_mutex(&masters_mtx) {
 287                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 288                         frr_with_mutex(&m->mtx) {
 289                                 void *args[2] = {tmp, m->cpu_record};
 290                                 hash_iterate(
 291                                         m->cpu_record,
 292                                         (void (*)(struct hash_bucket *,
 293                                                   void *))cpu_record_hash_clear,
 294                                         args);
 295                         }
 296                 }
 297         }
 298 }
 299
 300 static uint8_t parse_filter(const char *filterstr)
 301 {
 302         int i = 0;
 303         int filter = 0;
 304
 305         while (filterstr[i] != '\0') {
 306                 switch (filterstr[i]) {
 307                 case 'r':
 308                 case 'R':
 309                         filter |= (1 << THREAD_READ);
 310                         break;
 311                 case 'w':
 312                 case 'W':
 313                         filter |= (1 << THREAD_WRITE);
 314                         break;
 315                 case 't':
 316                 case 'T':
 317                         filter |= (1 << THREAD_TIMER);
 318                         break;
 319                 case 'e':
 320                 case 'E':
 321                         filter |= (1 << THREAD_EVENT);
 322                         break;
 323                 case 'x':
 324                 case 'X':
 325                         filter |= (1 << THREAD_EXECUTE);
 326                         break;
 327                 default:
 328                         break;
 329                 }
 330                 ++i;
 331         }
 332         return filter;
 333 }
 334
 335 DEFUN_NOSH (show_thread_cpu,
 336             show_thread_cpu_cmd,
 337             "show thread cpu [FILTER]",
 338             SHOW_STR
 339             "Thread information\n"
 340             "Thread CPU usage\n"
 341             "Display filter (rwtex)\n")
 342 {
 343         uint8_t filter = (uint8_t)-1U;
 344         int idx = 0;
 345
 346         if (argv_find(argv, argc, "FILTER", &idx)) {
 347                 filter = parse_filter(argv[idx]->arg);
 348                 if (!filter) {
 349                         vty_out(vty,
 350                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 351                                 argv[idx]->arg);
 352                         return CMD_WARNING;
 353                 }
 354         }
 355
 356         cpu_record_print(vty, filter);
 357         return CMD_SUCCESS;
 358 }
 359
 360 DEFPY (service_cputime_stats,
 361        service_cputime_stats_cmd,
 362        "[no] service cputime-stats",
 363        NO_STR
 364        "Set up miscellaneous service\n"
 365        "Collect CPU usage statistics\n")
 366 {
 367         cputime_enabled = !no;
 368         return CMD_SUCCESS;
 369 }
 370
 371 DEFPY (service_cputime_warning,
 372        service_cputime_warning_cmd,
 373        "[no] service cputime-warning (1-4294967295)",
 374        NO_STR
 375        "Set up miscellaneous service\n"
 376        "Warn for tasks exceeding CPU usage threshold\n"
 377        "Warning threshold in milliseconds\n")
 378 {
 379         if (no)
 380                 cputime_threshold = 0;
 381         else
 382                 cputime_threshold = cputime_warning * 1000;
 383         return CMD_SUCCESS;
 384 }
 385
 386 ALIAS (service_cputime_warning,
 387        no_service_cputime_warning_cmd,
 388        "no service cputime-warning",
 389        NO_STR
 390        "Set up miscellaneous service\n"
 391        "Warn for tasks exceeding CPU usage threshold\n")
 392
 393 DEFPY (service_walltime_warning,
 394        service_walltime_warning_cmd,
 395        "[no] service walltime-warning (1-4294967295)",
 396        NO_STR
 397        "Set up miscellaneous service\n"
 398        "Warn for tasks exceeding total wallclock threshold\n"
 399        "Warning threshold in milliseconds\n")
 400 {
 401         if (no)
 402                 walltime_threshold = 0;
 403         else
 404                 walltime_threshold = walltime_warning * 1000;
 405         return CMD_SUCCESS;
 406 }
 407
 408 ALIAS (service_walltime_warning,
 409        no_service_walltime_warning_cmd,
 410        "no service walltime-warning",
 411        NO_STR
 412        "Set up miscellaneous service\n"
 413        "Warn for tasks exceeding total wallclock threshold\n")
 414
 415 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 416 {
 417         const char *name = m->name ? m->name : "main";
 418         char underline[strlen(name) + 1];
 419         struct thread *thread;
 420         uint32_t i;
 421
 422         memset(underline, '-', sizeof(underline));
 423         underline[sizeof(underline) - 1] = '\0';
 424
 425         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 426         vty_out(vty, "----------------------%s\n", underline);
 427         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 428                 m->fd_limit);
 429         for (i = 0; i < m->handler.pfdcount; i++) {
 430                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 431                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 432                         m->handler.pfds[i].revents);
 433
 434                 if (m->handler.pfds[i].events & POLLIN) {
 435                         thread = m->read[m->handler.pfds[i].fd];
 436
 437                         if (!thread)
 438                                 vty_out(vty, "ERROR ");
 439                         else
 440                                 vty_out(vty, "%s ", thread->xref->funcname);
 441                 } else
 442                         vty_out(vty, " ");
 443
 444                 if (m->handler.pfds[i].events & POLLOUT) {
 445                         thread = m->write[m->handler.pfds[i].fd];
 446
 447                         if (!thread)
 448                                 vty_out(vty, "ERROR\n");
 449                         else
 450                                 vty_out(vty, "%s\n", thread->xref->funcname);
 451                 } else
 452                         vty_out(vty, "\n");
 453         }
 454 }
 455
 456 DEFUN_NOSH (show_thread_poll,
 457             show_thread_poll_cmd,
 458             "show thread poll",
 459             SHOW_STR
 460             "Thread information\n"
 461             "Show poll FD's and information\n")
 462 {
 463         struct listnode *node;
 464         struct thread_master *m;
 465
 466         frr_with_mutex(&masters_mtx) {
 467                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 468                         show_thread_poll_helper(vty, m);
 469                 }
 470         }
 471
 472         return CMD_SUCCESS;
 473 }
 474
 475
 476 DEFUN (clear_thread_cpu,
 477        clear_thread_cpu_cmd,
 478        "clear thread cpu [FILTER]",
 479        "Clear stored data in all pthreads\n"
 480        "Thread information\n"
 481        "Thread CPU usage\n"
 482        "Display filter (rwtexb)\n")
 483 {
 484         uint8_t filter = (uint8_t)-1U;
 485         int idx = 0;
 486
 487         if (argv_find(argv, argc, "FILTER", &idx)) {
 488                 filter = parse_filter(argv[idx]->arg);
 489                 if (!filter) {
 490                         vty_out(vty,
 491                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 492                                 argv[idx]->arg);
 493                         return CMD_WARNING;
 494                 }
 495         }
 496
 497         cpu_record_clear(filter);
 498         return CMD_SUCCESS;
 499 }
 500
 501 void thread_cmd_init(void)
 502 {
 503         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 504         install_element(VIEW_NODE, &show_thread_poll_cmd);
 505         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 506
 507         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 508         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 509         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 510         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 511         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 512 }
 513 /* CLI end ------------------------------------------------------------------ */
 514
 515
 516 static void cancelreq_del(void *cr)
 517 {
 518         XFREE(MTYPE_TMP, cr);
 519 }
 520
 521 /* initializer, only ever called once */
 522 static void initializer(void)
 523 {
 524         pthread_key_create(&thread_current, NULL);
 525 }
 526
 527 struct thread_master *thread_master_create(const char *name)
 528 {
 529         struct thread_master *rv;
 530         struct rlimit limit;
 531
 532         pthread_once(&init_once, &initializer);
 533
 534         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 535
 536         /* Initialize master mutex */
 537         pthread_mutex_init(&rv->mtx, NULL);
 538         pthread_cond_init(&rv->cancel_cond, NULL);
 539
 540         /* Set name */
 541         name = name ? name : "default";
 542         rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 543
 544         /* Initialize I/O task data structures */
 545
 546         /* Use configured limit if present, ulimit otherwise. */
 547         rv->fd_limit = frr_get_fd_limit();
 548         if (rv->fd_limit == 0) {
 549                 getrlimit(RLIMIT_NOFILE, &limit);
 550                 rv->fd_limit = (int)limit.rlim_cur;
 551         }
 552
 553         rv->read = XCALLOC(MTYPE_THREAD_POLL,
 554                            sizeof(struct thread *) * rv->fd_limit);
 555
 556         rv->write = XCALLOC(MTYPE_THREAD_POLL,
 557                             sizeof(struct thread *) * rv->fd_limit);
 558
 559         char tmhashname[strlen(name) + 32];
 560         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 561                  name);
 562         rv->cpu_record = hash_create_size(
 563                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 564                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 565                 tmhashname);
 566
 567         thread_list_init(&rv->event);
 568         thread_list_init(&rv->ready);
 569         thread_list_init(&rv->unuse);
 570         thread_timer_list_init(&rv->timer);
 571
 572         /* Initialize thread_fetch() settings */
 573         rv->spin = true;
 574         rv->handle_signals = true;
 575
 576         /* Set pthread owner, should be updated by actual owner */
 577         rv->owner = pthread_self();
 578         rv->cancel_req = list_new();
 579         rv->cancel_req->del = cancelreq_del;
 580         rv->canceled = true;
 581
 582         /* Initialize pipe poker */
 583         pipe(rv->io_pipe);
 584         set_nonblocking(rv->io_pipe[0]);
 585         set_nonblocking(rv->io_pipe[1]);
 586
 587         /* Initialize data structures for poll() */
 588         rv->handler.pfdsize = rv->fd_limit;
 589         rv->handler.pfdcount = 0;
 590         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 591                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 592         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 593                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 594
 595         /* add to list of threadmasters */
 596         frr_with_mutex(&masters_mtx) {
 597                 if (!masters)
 598                         masters = list_new();
 599
 600                 listnode_add(masters, rv);
 601         }
 602
 603         return rv;
 604 }
 605
 606 void thread_master_set_name(struct thread_master *master, const char *name)
 607 {
 608         frr_with_mutex(&master->mtx) {
 609                 XFREE(MTYPE_THREAD_MASTER, master->name);
 610                 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 611         }
 612 }
 613
 614 #define THREAD_UNUSED_DEPTH 10
 615
 616 /* Move thread to unuse list. */
 617 static void thread_add_unuse(struct thread_master *m, struct thread *thread)
 618 {
 619         pthread_mutex_t mtxc = thread->mtx;
 620
 621         assert(m != NULL && thread != NULL);
 622
 623         thread->hist->total_active--;
 624         memset(thread, 0, sizeof(struct thread));
 625         thread->type = THREAD_UNUSED;
 626
 627         /* Restore the thread mutex context. */
 628         thread->mtx = mtxc;
 629
 630         if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
 631                 thread_list_add_tail(&m->unuse, thread);
 632                 return;
 633         }
 634
 635         thread_free(m, thread);
 636 }
 637
 638 /* Free all unused thread. */
 639 static void thread_list_free(struct thread_master *m,
 640                 struct thread_list_head *list)
 641 {
 642         struct thread *t;
 643
 644         while ((t = thread_list_pop(list)))
 645                 thread_free(m, t);
 646 }
 647
 648 static void thread_array_free(struct thread_master *m,
 649                               struct thread **thread_array)
 650 {
 651         struct thread *t;
 652         int index;
 653
 654         for (index = 0; index < m->fd_limit; ++index) {
 655                 t = thread_array[index];
 656                 if (t) {
 657                         thread_array[index] = NULL;
 658                         thread_free(m, t);
 659                 }
 660         }
 661         XFREE(MTYPE_THREAD_POLL, thread_array);
 662 }
 663
 664 /*
 665  * thread_master_free_unused
 666  *
 667  * As threads are finished with they are put on the
 668  * unuse list for later reuse.
 669  * If we are shutting down, Free up unused threads
 670  * So we can see if we forget to shut anything off
 671  */
 672 void thread_master_free_unused(struct thread_master *m)
 673 {
 674         frr_with_mutex(&m->mtx) {
 675                 struct thread *t;
 676                 while ((t = thread_list_pop(&m->unuse)))
 677                         thread_free(m, t);
 678         }
 679 }
 680
 681 /* Stop thread scheduler. */
 682 void thread_master_free(struct thread_master *m)
 683 {
 684         struct thread *t;
 685
 686         frr_with_mutex(&masters_mtx) {
 687                 listnode_delete(masters, m);
 688                 if (masters->count == 0) {
 689                         list_delete(&masters);
 690                 }
 691         }
 692
 693         thread_array_free(m, m->read);
 694         thread_array_free(m, m->write);
 695         while ((t = thread_timer_list_pop(&m->timer)))
 696                 thread_free(m, t);
 697         thread_list_free(m, &m->event);
 698         thread_list_free(m, &m->ready);
 699         thread_list_free(m, &m->unuse);
 700         pthread_mutex_destroy(&m->mtx);
 701         pthread_cond_destroy(&m->cancel_cond);
 702         close(m->io_pipe[0]);
 703         close(m->io_pipe[1]);
 704         list_delete(&m->cancel_req);
 705         m->cancel_req = NULL;
 706
 707         hash_clean(m->cpu_record, cpu_record_hash_free);
 708         hash_free(m->cpu_record);
 709         m->cpu_record = NULL;
 710
 711         XFREE(MTYPE_THREAD_MASTER, m->name);
 712         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 713         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 714         XFREE(MTYPE_THREAD_MASTER, m);
 715 }
 716
 717 /* Return remain time in miliseconds. */
 718 unsigned long thread_timer_remain_msec(struct thread *thread)
 719 {
 720         int64_t remain;
 721
 722         frr_with_mutex(&thread->mtx) {
 723                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 724         }
 725
 726         return remain < 0 ? 0 : remain;
 727 }
 728
 729 /* Return remain time in seconds. */
 730 unsigned long thread_timer_remain_second(struct thread *thread)
 731 {
 732         return thread_timer_remain_msec(thread) / 1000LL;
 733 }
 734
 735 struct timeval thread_timer_remain(struct thread *thread)
 736 {
 737         struct timeval remain;
 738         frr_with_mutex(&thread->mtx) {
 739                 monotime_until(&thread->u.sands, &remain);
 740         }
 741         return remain;
 742 }
 743
 744 static int time_hhmmss(char *buf, int buf_size, long sec)
 745 {
 746         long hh;
 747         long mm;
 748         int wr;
 749
 750         assert(buf_size >= 8);
 751
 752         hh = sec / 3600;
 753         sec %= 3600;
 754         mm = sec / 60;
 755         sec %= 60;
 756
 757         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 758
 759         return wr != 8;
 760 }
 761
 762 char *thread_timer_to_hhmmss(char *buf, int buf_size,
 763                 struct thread *t_timer)
 764 {
 765         if (t_timer) {
 766                 time_hhmmss(buf, buf_size,
 767                                 thread_timer_remain_second(t_timer));
 768         } else {
 769                 snprintf(buf, buf_size, "--:--:--");
 770         }
 771         return buf;
 772 }
 773
 774 /* Get new thread.  */
 775 static struct thread *thread_get(struct thread_master *m, uint8_t type,
 776                                  void (*func)(struct thread *), void *arg,
 777                                  const struct xref_threadsched *xref)
 778 {
 779         struct thread *thread = thread_list_pop(&m->unuse);
 780         struct cpu_thread_history tmp;
 781
 782         if (!thread) {
 783                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
 784                 /* mutex only needs to be initialized at struct creation. */
 785                 pthread_mutex_init(&thread->mtx, NULL);
 786                 m->alloc++;
 787         }
 788
 789         thread->type = type;
 790         thread->add_type = type;
 791         thread->master = m;
 792         thread->arg = arg;
 793         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 794         thread->ref = NULL;
 795         thread->ignore_timer_late = false;
 796
 797         /*
 798          * So if the passed in funcname is not what we have
 799          * stored that means the thread->hist needs to be
 800          * updated.  We keep the last one around in unused
 801          * under the assumption that we are probably
 802          * going to immediately allocate the same
 803          * type of thread.
 804          * This hopefully saves us some serious
 805          * hash_get lookups.
 806          */
 807         if ((thread->xref && thread->xref->funcname != xref->funcname)
 808             || thread->func != func) {
 809                 tmp.func = func;
 810                 tmp.funcname = xref->funcname;
 811                 thread->hist =
 812                         hash_get(m->cpu_record, &tmp,
 813                                  (void *(*)(void *))cpu_record_hash_alloc);
 814         }
 815         thread->hist->total_active++;
 816         thread->func = func;
 817         thread->xref = xref;
 818
 819         return thread;
 820 }
 821
 822 static void thread_free(struct thread_master *master, struct thread *thread)
 823 {
 824         /* Update statistics. */
 825         assert(master->alloc > 0);
 826         master->alloc--;
 827
 828         /* Free allocated resources. */
 829         pthread_mutex_destroy(&thread->mtx);
 830         XFREE(MTYPE_THREAD, thread);
 831 }
 832
 833 static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
 834                    bool *eintr_p)
 835 {
 836         sigset_t origsigs;
 837         unsigned char trash[64];
 838         nfds_t count = m->handler.copycount;
 839
 840         /*
 841          * If timer_wait is null here, that means poll() should block
 842          * indefinitely, unless the thread_master has overridden it by setting
 843          * ->selectpoll_timeout.
 844          *
 845          * If the value is positive, it specifies the maximum number of
 846          * milliseconds to wait. If the timeout is -1, it specifies that
 847          * we should never wait and always return immediately even if no
 848          * event is detected. If the value is zero, the behavior is default.
 849          */
 850         int timeout = -1;
 851
 852         /* number of file descriptors with events */
 853         int num;
 854
 855         if (timer_wait != NULL
 856             && m->selectpoll_timeout == 0) // use the default value
 857                 timeout = (timer_wait->tv_sec * 1000)
 858                           + (timer_wait->tv_usec / 1000);
 859         else if (m->selectpoll_timeout > 0) // use the user's timeout
 860                 timeout = m->selectpoll_timeout;
 861         else if (m->selectpoll_timeout
 862                  < 0) // effect a poll (return immediately)
 863                 timeout = 0;
 864
 865         zlog_tls_buffer_flush();
 866         rcu_read_unlock();
 867         rcu_assert_read_unlocked();
 868
 869         /* add poll pipe poker */
 870         assert(count + 1 < m->handler.pfdsize);
 871         m->handler.copy[count].fd = m->io_pipe[0];
 872         m->handler.copy[count].events = POLLIN;
 873         m->handler.copy[count].revents = 0x00;
 874
 875         /* We need to deal with a signal-handling race here: we
 876          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 877          * that may arrive just before we enter poll(). We will block the
 878          * key signals, then check whether any have arrived - if so, we return
 879          * before calling poll(). If not, we'll re-enable the signals
 880          * in the ppoll() call.
 881          */
 882
 883         sigemptyset(&origsigs);
 884         if (m->handle_signals) {
 885                 /* Main pthread that handles the app signals */
 886                 if (frr_sigevent_check(&origsigs)) {
 887                         /* Signal to process - restore signal mask and return */
 888                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 889                         num = -1;
 890                         *eintr_p = true;
 891                         goto done;
 892                 }
 893         } else {
 894                 /* Don't make any changes for the non-main pthreads */
 895                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 896         }
 897
 898 #if defined(HAVE_PPOLL)
 899         struct timespec ts, *tsp;
 900
 901         if (timeout >= 0) {
 902                 ts.tv_sec = timeout / 1000;
 903                 ts.tv_nsec = (timeout % 1000) * 1000000;
 904                 tsp = &ts;
 905         } else
 906                 tsp = NULL;
 907
 908         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 909         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 910 #else
 911         /* Not ideal - there is a race after we restore the signal mask */
 912         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 913         num = poll(m->handler.copy, count + 1, timeout);
 914 #endif
 915
 916 done:
 917
 918         if (num < 0 && errno == EINTR)
 919                 *eintr_p = true;
 920
 921         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 922                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 923                         ;
 924
 925         rcu_read_lock();
 926
 927         return num;
 928 }
 929
 930 /* Add new read thread. */
 931 void _thread_add_read_write(const struct xref_threadsched *xref,
 932                             struct thread_master *m,
 933                             void (*func)(struct thread *), void *arg, int fd,
 934                             struct thread **t_ptr)
 935 {
 936         int dir = xref->thread_type;
 937         struct thread *thread = NULL;
 938         struct thread **thread_array;
 939
 940         if (dir == THREAD_READ)
 941                 frrtrace(9, frr_libfrr, schedule_read, m,
 942                          xref->funcname, xref->xref.file, xref->xref.line,
 943                          t_ptr, fd, 0, arg, 0);
 944         else
 945                 frrtrace(9, frr_libfrr, schedule_write, m,
 946                          xref->funcname, xref->xref.file, xref->xref.line,
 947                          t_ptr, fd, 0, arg, 0);
 948
 949         assert(fd >= 0);
 950         if (fd >= m->fd_limit)
 951                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 952
 953         frr_with_mutex(&m->mtx) {
 954                 if (t_ptr && *t_ptr)
 955                         // thread is already scheduled; don't reschedule
 956                         break;
 957
 958                 /* default to a new pollfd */
 959                 nfds_t queuepos = m->handler.pfdcount;
 960
 961                 if (dir == THREAD_READ)
 962                         thread_array = m->read;
 963                 else
 964                         thread_array = m->write;
 965
 966                 /* if we already have a pollfd for our file descriptor, find and
 967                  * use it */
 968                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 969                         if (m->handler.pfds[i].fd == fd) {
 970                                 queuepos = i;
 971
 972 #ifdef DEV_BUILD
 973                                 /*
 974                                  * What happens if we have a thread already
 975                                  * created for this event?
 976                                  */
 977                                 if (thread_array[fd])
 978                                         assert(!"Thread already scheduled for file descriptor");
 979 #endif
 980                                 break;
 981                         }
 982
 983                 /* make sure we have room for this fd + pipe poker fd */
 984                 assert(queuepos + 1 < m->handler.pfdsize);
 985
 986                 thread = thread_get(m, dir, func, arg, xref);
 987
 988                 m->handler.pfds[queuepos].fd = fd;
 989                 m->handler.pfds[queuepos].events |=
 990                         (dir == THREAD_READ ? POLLIN : POLLOUT);
 991
 992                 if (queuepos == m->handler.pfdcount)
 993                         m->handler.pfdcount++;
 994
 995                 if (thread) {
 996                         frr_with_mutex(&thread->mtx) {
 997                                 thread->u.fd = fd;
 998                                 thread_array[thread->u.fd] = thread;
 999                         }
1000
1001                         if (t_ptr) {
1002                                 *t_ptr = thread;
1003                                 thread->ref = t_ptr;
1004                         }
1005                 }
1006
1007                 AWAKEN(m);
1008         }
1009 }
1010
1011 static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1012                                       struct thread_master *m,
1013                                       void (*func)(struct thread *), void *arg,
1014                                       struct timeval *time_relative,
1015                                       struct thread **t_ptr)
1016 {
1017         struct thread *thread;
1018         struct timeval t;
1019
1020         assert(m != NULL);
1021
1022         assert(time_relative);
1023
1024         frrtrace(9, frr_libfrr, schedule_timer, m,
1025                  xref->funcname, xref->xref.file, xref->xref.line,
1026                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1027
1028         /* Compute expiration/deadline time. */
1029         monotime(&t);
1030         timeradd(&t, time_relative, &t);
1031
1032         frr_with_mutex(&m->mtx) {
1033                 if (t_ptr && *t_ptr)
1034                         /* thread is already scheduled; don't reschedule */
1035                         return;
1036
1037                 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
1038
1039                 frr_with_mutex(&thread->mtx) {
1040                         thread->u.sands = t;
1041                         thread_timer_list_add(&m->timer, thread);
1042                         if (t_ptr) {
1043                                 *t_ptr = thread;
1044                                 thread->ref = t_ptr;
1045                         }
1046                 }
1047
1048                 /* The timer list is sorted - if this new timer
1049                  * might change the time we'll wait for, give the pthread
1050                  * a chance to re-compute.
1051                  */
1052                 if (thread_timer_list_first(&m->timer) == thread)
1053                         AWAKEN(m);
1054         }
1055 #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1056         if (time_relative->tv_sec > ONEYEAR2SEC)
1057                 flog_err(
1058                         EC_LIB_TIMER_TOO_LONG,
1059                         "Timer: %pTHD is created with an expiration that is greater than 1 year",
1060                         thread);
1061 }
1062
1063
1064 /* Add timer event thread. */
1065 void _thread_add_timer(const struct xref_threadsched *xref,
1066                        struct thread_master *m, void (*func)(struct thread *),
1067                        void *arg, long timer, struct thread **t_ptr)
1068 {
1069         struct timeval trel;
1070
1071         assert(m != NULL);
1072
1073         trel.tv_sec = timer;
1074         trel.tv_usec = 0;
1075
1076         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1077 }
1078
1079 /* Add timer event thread with "millisecond" resolution */
1080 void _thread_add_timer_msec(const struct xref_threadsched *xref,
1081                             struct thread_master *m,
1082                             void (*func)(struct thread *), void *arg,
1083                             long timer, struct thread **t_ptr)
1084 {
1085         struct timeval trel;
1086
1087         assert(m != NULL);
1088
1089         trel.tv_sec = timer / 1000;
1090         trel.tv_usec = 1000 * (timer % 1000);
1091
1092         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1093 }
1094
1095 /* Add timer event thread with "timeval" resolution */
1096 void _thread_add_timer_tv(const struct xref_threadsched *xref,
1097                           struct thread_master *m,
1098                           void (*func)(struct thread *), void *arg,
1099                           struct timeval *tv, struct thread **t_ptr)
1100 {
1101         _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1102 }
1103
1104 /* Add simple event thread. */
1105 void _thread_add_event(const struct xref_threadsched *xref,
1106                        struct thread_master *m, void (*func)(struct thread *),
1107                        void *arg, int val, struct thread **t_ptr)
1108 {
1109         struct thread *thread = NULL;
1110
1111         frrtrace(9, frr_libfrr, schedule_event, m,
1112                  xref->funcname, xref->xref.file, xref->xref.line,
1113                  t_ptr, 0, val, arg, 0);
1114
1115         assert(m != NULL);
1116
1117         frr_with_mutex(&m->mtx) {
1118                 if (t_ptr && *t_ptr)
1119                         /* thread is already scheduled; don't reschedule */
1120                         break;
1121
1122                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
1123                 frr_with_mutex(&thread->mtx) {
1124                         thread->u.val = val;
1125                         thread_list_add_tail(&m->event, thread);
1126                 }
1127
1128                 if (t_ptr) {
1129                         *t_ptr = thread;
1130                         thread->ref = t_ptr;
1131                 }
1132
1133                 AWAKEN(m);
1134         }
1135 }
1136
1137 /* Thread cancellation ------------------------------------------------------ */
1138
1139 /**
1140  * NOT's out the .events field of pollfd corresponding to the given file
1141  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1142  *
1143  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1144  * implementation for details.
1145  *
1146  * @param master
1147  * @param fd
1148  * @param state the event to cancel. One or more (OR'd together) of the
1149  * following:
1150  *   - POLLIN
1151  *   - POLLOUT
1152  */
1153 static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1154                              int idx_hint)
1155 {
1156         bool found = false;
1157
1158         /* find the index of corresponding pollfd */
1159         nfds_t i;
1160
1161         /* Cancel POLLHUP too just in case some bozo set it */
1162         state |= POLLHUP;
1163
1164         /* Some callers know the index of the pfd already */
1165         if (idx_hint >= 0) {
1166                 i = idx_hint;
1167                 found = true;
1168         } else {
1169                 /* Have to look for the fd in the pfd array */
1170                 for (i = 0; i < master->handler.pfdcount; i++)
1171                         if (master->handler.pfds[i].fd == fd) {
1172                                 found = true;
1173                                 break;
1174                         }
1175         }
1176
1177         if (!found) {
1178                 zlog_debug(
1179                         "[!] Received cancellation request for nonexistent rw job");
1180                 zlog_debug("[!] threadmaster: %s | fd: %d",
1181                            master->name ? master->name : "", fd);
1182                 return;
1183         }
1184
1185         /* NOT out event. */
1186         master->handler.pfds[i].events &= ~(state);
1187
1188         /* If all events are canceled, delete / resize the pollfd array. */
1189         if (master->handler.pfds[i].events == 0) {
1190                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1191                         (master->handler.pfdcount - i - 1)
1192                                 * sizeof(struct pollfd));
1193                 master->handler.pfdcount--;
1194                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1195                 master->handler.pfds[master->handler.pfdcount].events = 0;
1196         }
1197
1198         /* If we have the same pollfd in the copy, perform the same operations,
1199          * otherwise return. */
1200         if (i >= master->handler.copycount)
1201                 return;
1202
1203         master->handler.copy[i].events &= ~(state);
1204
1205         if (master->handler.copy[i].events == 0) {
1206                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1207                         (master->handler.copycount - i - 1)
1208                                 * sizeof(struct pollfd));
1209                 master->handler.copycount--;
1210                 master->handler.copy[master->handler.copycount].fd = 0;
1211                 master->handler.copy[master->handler.copycount].events = 0;
1212         }
1213 }
1214
1215 /*
1216  * Process task cancellation given a task argument: iterate through the
1217  * various lists of tasks, looking for any that match the argument.
1218  */
1219 static void cancel_arg_helper(struct thread_master *master,
1220                               const struct cancel_req *cr)
1221 {
1222         struct thread *t;
1223         nfds_t i;
1224         int fd;
1225         struct pollfd *pfd;
1226
1227         /* We're only processing arg-based cancellations here. */
1228         if (cr->eventobj == NULL)
1229                 return;
1230
1231         /* First process the ready lists. */
1232         frr_each_safe(thread_list, &master->event, t) {
1233                 if (t->arg != cr->eventobj)
1234                         continue;
1235                 thread_list_del(&master->event, t);
1236                 if (t->ref)
1237                         *t->ref = NULL;
1238                 thread_add_unuse(master, t);
1239         }
1240
1241         frr_each_safe(thread_list, &master->ready, t) {
1242                 if (t->arg != cr->eventobj)
1243                         continue;
1244                 thread_list_del(&master->ready, t);
1245                 if (t->ref)
1246                         *t->ref = NULL;
1247                 thread_add_unuse(master, t);
1248         }
1249
1250         /* If requested, stop here and ignore io and timers */
1251         if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1252                 return;
1253
1254         /* Check the io tasks */
1255         for (i = 0; i < master->handler.pfdcount;) {
1256                 pfd = master->handler.pfds + i;
1257
1258                 if (pfd->events & POLLIN)
1259                         t = master->read[pfd->fd];
1260                 else
1261                         t = master->write[pfd->fd];
1262
1263                 if (t && t->arg == cr->eventobj) {
1264                         fd = pfd->fd;
1265
1266                         /* Found a match to cancel: clean up fd arrays */
1267                         thread_cancel_rw(master, pfd->fd, pfd->events, i);
1268
1269                         /* Clean up thread arrays */
1270                         master->read[fd] = NULL;
1271                         master->write[fd] = NULL;
1272
1273                         /* Clear caller's ref */
1274                         if (t->ref)
1275                                 *t->ref = NULL;
1276
1277                         thread_add_unuse(master, t);
1278
1279                         /* Don't increment 'i' since the cancellation will have
1280                          * removed the entry from the pfd array
1281                          */
1282                 } else
1283                         i++;
1284         }
1285
1286         /* Check the timer tasks */
1287         t = thread_timer_list_first(&master->timer);
1288         while (t) {
1289                 struct thread *t_next;
1290
1291                 t_next = thread_timer_list_next(&master->timer, t);
1292
1293                 if (t->arg == cr->eventobj) {
1294                         thread_timer_list_del(&master->timer, t);
1295                         if (t->ref)
1296                                 *t->ref = NULL;
1297                         thread_add_unuse(master, t);
1298                 }
1299
1300                 t = t_next;
1301         }
1302 }
1303
1304 /**
1305  * Process cancellation requests.
1306  *
1307  * This may only be run from the pthread which owns the thread_master.
1308  *
1309  * @param master the thread master to process
1310  * @REQUIRE master->mtx
1311  */
1312 static void do_thread_cancel(struct thread_master *master)
1313 {
1314         struct thread_list_head *list = NULL;
1315         struct thread **thread_array = NULL;
1316         struct thread *thread;
1317
1318         struct cancel_req *cr;
1319         struct listnode *ln;
1320         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1321                 /*
1322                  * If this is an event object cancellation, search
1323                  * through task lists deleting any tasks which have the
1324                  * specified argument - use this handy helper function.
1325                  */
1326                 if (cr->eventobj) {
1327                         cancel_arg_helper(master, cr);
1328                         continue;
1329                 }
1330
1331                 /*
1332                  * The pointer varies depending on whether the cancellation
1333                  * request was made asynchronously or not. If it was, we
1334                  * need to check whether the thread even exists anymore
1335                  * before cancelling it.
1336                  */
1337                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1338
1339                 if (!thread)
1340                         continue;
1341
1342                 /* Determine the appropriate queue to cancel the thread from */
1343                 switch (thread->type) {
1344                 case THREAD_READ:
1345                         thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
1346                         thread_array = master->read;
1347                         break;
1348                 case THREAD_WRITE:
1349                         thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1350                         thread_array = master->write;
1351                         break;
1352                 case THREAD_TIMER:
1353                         thread_timer_list_del(&master->timer, thread);
1354                         break;
1355                 case THREAD_EVENT:
1356                         list = &master->event;
1357                         break;
1358                 case THREAD_READY:
1359                         list = &master->ready;
1360                         break;
1361                 default:
1362                         continue;
1363                         break;
1364                 }
1365
1366                 if (list) {
1367                         thread_list_del(list, thread);
1368                 } else if (thread_array) {
1369                         thread_array[thread->u.fd] = NULL;
1370                 }
1371
1372                 if (thread->ref)
1373                         *thread->ref = NULL;
1374
1375                 thread_add_unuse(thread->master, thread);
1376         }
1377
1378         /* Delete and free all cancellation requests */
1379         if (master->cancel_req)
1380                 list_delete_all_node(master->cancel_req);
1381
1382         /* Wake up any threads which may be blocked in thread_cancel_async() */
1383         master->canceled = true;
1384         pthread_cond_broadcast(&master->cancel_cond);
1385 }
1386
1387 /*
1388  * Helper function used for multiple flavors of arg-based cancellation.
1389  */
1390 static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1391 {
1392         struct cancel_req *cr;
1393
1394         assert(m->owner == pthread_self());
1395
1396         /* Only worth anything if caller supplies an arg. */
1397         if (arg == NULL)
1398                 return;
1399
1400         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1401
1402         cr->flags = flags;
1403
1404         frr_with_mutex(&m->mtx) {
1405                 cr->eventobj = arg;
1406                 listnode_add(m->cancel_req, cr);
1407                 do_thread_cancel(m);
1408         }
1409 }
1410
1411 /**
1412  * Cancel any events which have the specified argument.
1413  *
1414  * MT-Unsafe
1415  *
1416  * @param m the thread_master to cancel from
1417  * @param arg the argument passed when creating the event
1418  */
1419 void thread_cancel_event(struct thread_master *master, void *arg)
1420 {
1421         cancel_event_helper(master, arg, 0);
1422 }
1423
1424 /*
1425  * Cancel ready tasks with an arg matching 'arg'
1426  *
1427  * MT-Unsafe
1428  *
1429  * @param m the thread_master to cancel from
1430  * @param arg the argument passed when creating the event
1431  */
1432 void thread_cancel_event_ready(struct thread_master *m, void *arg)
1433 {
1434
1435         /* Only cancel ready/event tasks */
1436         cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
1437 }
1438
1439 /**
1440  * Cancel a specific task.
1441  *
1442  * MT-Unsafe
1443  *
1444  * @param thread task to cancel
1445  */
1446 void thread_cancel(struct thread **thread)
1447 {
1448         struct thread_master *master;
1449
1450         if (thread == NULL || *thread == NULL)
1451                 return;
1452
1453         master = (*thread)->master;
1454
1455         frrtrace(9, frr_libfrr, thread_cancel, master,
1456                  (*thread)->xref->funcname, (*thread)->xref->xref.file,
1457                  (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1458                  (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
1459
1460         assert(master->owner == pthread_self());
1461
1462         frr_with_mutex(&master->mtx) {
1463                 struct cancel_req *cr =
1464                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1465                 cr->thread = *thread;
1466                 listnode_add(master->cancel_req, cr);
1467                 do_thread_cancel(master);
1468         }
1469
1470         *thread = NULL;
1471 }
1472
1473 /**
1474  * Asynchronous cancellation.
1475  *
1476  * Called with either a struct thread ** or void * to an event argument,
1477  * this function posts the correct cancellation request and blocks until it is
1478  * serviced.
1479  *
1480  * If the thread is currently running, execution blocks until it completes.
1481  *
1482  * The last two parameters are mutually exclusive, i.e. if you pass one the
1483  * other must be NULL.
1484  *
1485  * When the cancellation procedure executes on the target thread_master, the
1486  * thread * provided is checked for nullity. If it is null, the thread is
1487  * assumed to no longer exist and the cancellation request is a no-op. Thus
1488  * users of this API must pass a back-reference when scheduling the original
1489  * task.
1490  *
1491  * MT-Safe
1492  *
1493  * @param master the thread master with the relevant event / task
1494  * @param thread pointer to thread to cancel
1495  * @param eventobj the event
1496  */
1497 void thread_cancel_async(struct thread_master *master, struct thread **thread,
1498                          void *eventobj)
1499 {
1500         assert(!(thread && eventobj) && (thread || eventobj));
1501
1502         if (thread && *thread)
1503                 frrtrace(9, frr_libfrr, thread_cancel_async, master,
1504                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1505                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1506                          (*thread)->u.val, (*thread)->arg,
1507                          (*thread)->u.sands.tv_sec);
1508         else
1509                 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1510                          0, NULL, 0, 0, eventobj, 0);
1511
1512         assert(master->owner != pthread_self());
1513
1514         frr_with_mutex(&master->mtx) {
1515                 master->canceled = false;
1516
1517                 if (thread) {
1518                         struct cancel_req *cr =
1519                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1520                         cr->threadref = thread;
1521                         listnode_add(master->cancel_req, cr);
1522                 } else if (eventobj) {
1523                         struct cancel_req *cr =
1524                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1525                         cr->eventobj = eventobj;
1526                         listnode_add(master->cancel_req, cr);
1527                 }
1528                 AWAKEN(master);
1529
1530                 while (!master->canceled)
1531                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1532         }
1533
1534         if (thread)
1535                 *thread = NULL;
1536 }
1537 /* ------------------------------------------------------------------------- */
1538
1539 static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
1540                                          struct timeval *timer_val)
1541 {
1542         if (!thread_timer_list_count(timers))
1543                 return NULL;
1544
1545         struct thread *next_timer = thread_timer_list_first(timers);
1546         monotime_until(&next_timer->u.sands, timer_val);
1547         return timer_val;
1548 }
1549
1550 static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1551                                  struct thread *fetch)
1552 {
1553         *fetch = *thread;
1554         thread_add_unuse(m, thread);
1555         return fetch;
1556 }
1557
1558 static int thread_process_io_helper(struct thread_master *m,
1559                                     struct thread *thread, short state,
1560                                     short actual_state, int pos)
1561 {
1562         struct thread **thread_array;
1563
1564         /*
1565          * poll() clears the .events field, but the pollfd array we
1566          * pass to poll() is a copy of the one used to schedule threads.
1567          * We need to synchronize state between the two here by applying
1568          * the same changes poll() made on the copy of the "real" pollfd
1569          * array.
1570          *
1571          * This cleans up a possible infinite loop where we refuse
1572          * to respond to a poll event but poll is insistent that
1573          * we should.
1574          */
1575         m->handler.pfds[pos].events &= ~(state);
1576
1577         if (!thread) {
1578                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1579                         flog_err(EC_LIB_NO_THREAD,
1580                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1581                                  m->handler.pfds[pos].fd, actual_state);
1582                 return 0;
1583         }
1584
1585         if (thread->type == THREAD_READ)
1586                 thread_array = m->read;
1587         else
1588                 thread_array = m->write;
1589
1590         thread_array[thread->u.fd] = NULL;
1591         thread_list_add_tail(&m->ready, thread);
1592         thread->type = THREAD_READY;
1593
1594         return 1;
1595 }
1596
1597 /**
1598  * Process I/O events.
1599  *
1600  * Walks through file descriptor array looking for those pollfds whose .revents
1601  * field has something interesting. Deletes any invalid file descriptors.
1602  *
1603  * @param m the thread master
1604  * @param num the number of active file descriptors (return value of poll())
1605  */
1606 static void thread_process_io(struct thread_master *m, unsigned int num)
1607 {
1608         unsigned int ready = 0;
1609         struct pollfd *pfds = m->handler.copy;
1610
1611         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1612                 /* no event for current fd? immediately continue */
1613                 if (pfds[i].revents == 0)
1614                         continue;
1615
1616                 ready++;
1617
1618                 /*
1619                  * Unless someone has called thread_cancel from another
1620                  * pthread, the only thing that could have changed in
1621                  * m->handler.pfds while we were asleep is the .events
1622                  * field in a given pollfd. Barring thread_cancel() that
1623                  * value should be a superset of the values we have in our
1624                  * copy, so there's no need to update it. Similarily,
1625                  * barring deletion, the fd should still be a valid index
1626                  * into the master's pfds.
1627                  *
1628                  * We are including POLLERR here to do a READ event
1629                  * this is because the read should fail and the
1630                  * read function should handle it appropriately
1631                  */
1632                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1633                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1634                                                  pfds[i].revents, i);
1635                 }
1636                 if (pfds[i].revents & POLLOUT)
1637                         thread_process_io_helper(m, m->write[pfds[i].fd],
1638                                                  POLLOUT, pfds[i].revents, i);
1639
1640                 /* if one of our file descriptors is garbage, remove the same
1641                  * from
1642                  * both pfds + update sizes and index */
1643                 if (pfds[i].revents & POLLNVAL) {
1644                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1645                                 (m->handler.pfdcount - i - 1)
1646                                         * sizeof(struct pollfd));
1647                         m->handler.pfdcount--;
1648                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1649                         m->handler.pfds[m->handler.pfdcount].events = 0;
1650
1651                         memmove(pfds + i, pfds + i + 1,
1652                                 (m->handler.copycount - i - 1)
1653                                         * sizeof(struct pollfd));
1654                         m->handler.copycount--;
1655                         m->handler.copy[m->handler.copycount].fd = 0;
1656                         m->handler.copy[m->handler.copycount].events = 0;
1657
1658                         i--;
1659                 }
1660         }
1661 }
1662
1663 /* Add all timers that have popped to the ready list. */
1664 static unsigned int thread_process_timers(struct thread_master *m,
1665                                           struct timeval *timenow)
1666 {
1667         struct timeval prev = *timenow;
1668         bool displayed = false;
1669         struct thread *thread;
1670         unsigned int ready = 0;
1671
1672         while ((thread = thread_timer_list_first(&m->timer))) {
1673                 if (timercmp(timenow, &thread->u.sands, <))
1674                         break;
1675                 prev = thread->u.sands;
1676                 prev.tv_sec += 4;
1677                 /*
1678                  * If the timer would have popped 4 seconds in the
1679                  * past then we are in a situation where we are
1680                  * really getting behind on handling of events.
1681                  * Let's log it and do the right thing with it.
1682                  */
1683                 if (timercmp(timenow, &prev, >)) {
1684                         atomic_fetch_add_explicit(
1685                                 &thread->hist->total_starv_warn, 1,
1686                                 memory_order_seq_cst);
1687                         if (!displayed && !thread->ignore_timer_late) {
1688                                 flog_warn(
1689                                         EC_LIB_STARVE_THREAD,
1690                                         "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1691                                         thread);
1692                                 displayed = true;
1693                         }
1694                 }
1695
1696                 thread_timer_list_pop(&m->timer);
1697                 thread->type = THREAD_READY;
1698                 thread_list_add_tail(&m->ready, thread);
1699                 ready++;
1700         }
1701
1702         return ready;
1703 }
1704
1705 /* process a list en masse, e.g. for event thread lists */
1706 static unsigned int thread_process(struct thread_list_head *list)
1707 {
1708         struct thread *thread;
1709         unsigned int ready = 0;
1710
1711         while ((thread = thread_list_pop(list))) {
1712                 thread->type = THREAD_READY;
1713                 thread_list_add_tail(&thread->master->ready, thread);
1714                 ready++;
1715         }
1716         return ready;
1717 }
1718
1719
1720 /* Fetch next ready thread. */
1721 struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
1722 {
1723         struct thread *thread = NULL;
1724         struct timeval now;
1725         struct timeval zerotime = {0, 0};
1726         struct timeval tv;
1727         struct timeval *tw = NULL;
1728         bool eintr_p = false;
1729         int num = 0;
1730
1731         do {
1732                 /* Handle signals if any */
1733                 if (m->handle_signals)
1734                         frr_sigevent_process();
1735
1736                 pthread_mutex_lock(&m->mtx);
1737
1738                 /* Process any pending cancellation requests */
1739                 do_thread_cancel(m);
1740
1741                 /*
1742                  * Attempt to flush ready queue before going into poll().
1743                  * This is performance-critical. Think twice before modifying.
1744                  */
1745                 if ((thread = thread_list_pop(&m->ready))) {
1746                         fetch = thread_run(m, thread, fetch);
1747                         if (fetch->ref)
1748                                 *fetch->ref = NULL;
1749                         pthread_mutex_unlock(&m->mtx);
1750                         if (!m->ready_run_loop)
1751                                 GETRUSAGE(&m->last_getrusage);
1752                         m->ready_run_loop = true;
1753                         break;
1754                 }
1755
1756                 m->ready_run_loop = false;
1757                 /* otherwise, tick through scheduling sequence */
1758
1759                 /*
1760                  * Post events to ready queue. This must come before the
1761                  * following block since events should occur immediately
1762                  */
1763                 thread_process(&m->event);
1764
1765                 /*
1766                  * If there are no tasks on the ready queue, we will poll()
1767                  * until a timer expires or we receive I/O, whichever comes
1768                  * first. The strategy for doing this is:
1769                  *
1770                  * - If there are events pending, set the poll() timeout to zero
1771                  * - If there are no events pending, but there are timers
1772                  * pending, set the timeout to the smallest remaining time on
1773                  * any timer.
1774                  * - If there are neither timers nor events pending, but there
1775                  * are file descriptors pending, block indefinitely in poll()
1776                  * - If nothing is pending, it's time for the application to die
1777                  *
1778                  * In every case except the last, we need to hit poll() at least
1779                  * once per loop to avoid starvation by events
1780                  */
1781                 if (!thread_list_count(&m->ready))
1782                         tw = thread_timer_wait(&m->timer, &tv);
1783
1784                 if (thread_list_count(&m->ready) ||
1785                                 (tw && !timercmp(tw, &zerotime, >)))
1786                         tw = &zerotime;
1787
1788                 if (!tw && m->handler.pfdcount == 0) { /* die */
1789                         pthread_mutex_unlock(&m->mtx);
1790                         fetch = NULL;
1791                         break;
1792                 }
1793
1794                 /*
1795                  * Copy pollfd array + # active pollfds in it. Not necessary to
1796                  * copy the array size as this is fixed.
1797                  */
1798                 m->handler.copycount = m->handler.pfdcount;
1799                 memcpy(m->handler.copy, m->handler.pfds,
1800                        m->handler.copycount * sizeof(struct pollfd));
1801
1802                 pthread_mutex_unlock(&m->mtx);
1803                 {
1804                         eintr_p = false;
1805                         num = fd_poll(m, tw, &eintr_p);
1806                 }
1807                 pthread_mutex_lock(&m->mtx);
1808
1809                 /* Handle any errors received in poll() */
1810                 if (num < 0) {
1811                         if (eintr_p) {
1812                                 pthread_mutex_unlock(&m->mtx);
1813                                 /* loop around to signal handler */
1814                                 continue;
1815                         }
1816
1817                         /* else die */
1818                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1819                                  safe_strerror(errno));
1820                         pthread_mutex_unlock(&m->mtx);
1821                         fetch = NULL;
1822                         break;
1823                 }
1824
1825                 /* Post timers to ready queue. */
1826                 monotime(&now);
1827                 thread_process_timers(m, &now);
1828
1829                 /* Post I/O to ready queue. */
1830                 if (num > 0)
1831                         thread_process_io(m, num);
1832
1833                 pthread_mutex_unlock(&m->mtx);
1834
1835         } while (!thread && m->spin);
1836
1837         return fetch;
1838 }
1839
1840 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1841 {
1842         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1843                 + (a.tv_usec - b.tv_usec));
1844 }
1845
1846 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1847                                    unsigned long *cputime)
1848 {
1849 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1850         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1851                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1852 #else
1853         /* This is 'user + sys' time.  */
1854         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1855                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1856 #endif
1857         return timeval_elapsed(now->real, start->real);
1858 }
1859
1860 /* We should aim to yield after yield milliseconds, which defaults
1861    to THREAD_YIELD_TIME_SLOT .
1862    Note: we are using real (wall clock) time for this calculation.
1863    It could be argued that CPU time may make more sense in certain
1864    contexts.  The things to consider are whether the thread may have
1865    blocked (in which case wall time increases, but CPU time does not),
1866    or whether the system is heavily loaded with other processes competing
1867    for CPU time.  On balance, wall clock time seems to make sense.
1868    Plus it has the added benefit that gettimeofday should be faster
1869    than calling getrusage. */
1870 int thread_should_yield(struct thread *thread)
1871 {
1872         int result;
1873         frr_with_mutex(&thread->mtx) {
1874                 result = monotime_since(&thread->real, NULL)
1875                          > (int64_t)thread->yield;
1876         }
1877         return result;
1878 }
1879
1880 void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
1881 {
1882         frr_with_mutex(&thread->mtx) {
1883                 thread->yield = yield_time;
1884         }
1885 }
1886
1887 void thread_getrusage(RUSAGE_T *r)
1888 {
1889         monotime(&r->real);
1890         if (!cputime_enabled) {
1891                 memset(&r->cpu, 0, sizeof(r->cpu));
1892                 return;
1893         }
1894
1895 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1896         /* not currently implemented in Linux's vDSO, but maybe at some point
1897          * in the future?
1898          */
1899         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1900 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1901 #if defined RUSAGE_THREAD
1902 #define FRR_RUSAGE RUSAGE_THREAD
1903 #else
1904 #define FRR_RUSAGE RUSAGE_SELF
1905 #endif
1906         getrusage(FRR_RUSAGE, &(r->cpu));
1907 #endif
1908 }
1909
1910 /*
1911  * Call a thread.
1912  *
1913  * This function will atomically update the thread's usage history. At present
1914  * this is the only spot where usage history is written. Nevertheless the code
1915  * has been written such that the introduction of writers in the future should
1916  * not need to update it provided the writers atomically perform only the
1917  * operations done here, i.e. updating the total and maximum times. In
1918  * particular, the maximum real and cpu times must be monotonically increasing
1919  * or this code is not correct.
1920  */
1921 void thread_call(struct thread *thread)
1922 {
1923         RUSAGE_T before, after;
1924
1925         /* if the thread being called is the CLI, it may change cputime_enabled
1926          * ("service cputime-stats" command), which can result in nonsensical
1927          * and very confusing warnings
1928          */
1929         bool cputime_enabled_here = cputime_enabled;
1930
1931         if (thread->master->ready_run_loop)
1932                 before = thread->master->last_getrusage;
1933         else
1934                 GETRUSAGE(&before);
1935
1936         thread->real = before.real;
1937
1938         frrtrace(9, frr_libfrr, thread_call, thread->master,
1939                  thread->xref->funcname, thread->xref->xref.file,
1940                  thread->xref->xref.line, NULL, thread->u.fd,
1941                  thread->u.val, thread->arg, thread->u.sands.tv_sec);
1942
1943         pthread_setspecific(thread_current, thread);
1944         (*thread->func)(thread);
1945         pthread_setspecific(thread_current, NULL);
1946
1947         GETRUSAGE(&after);
1948         thread->master->last_getrusage = after;
1949
1950         unsigned long walltime, cputime;
1951         unsigned long exp;
1952
1953         walltime = thread_consumed_time(&after, &before, &cputime);
1954
1955         /* update walltime */
1956         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1957                                   memory_order_seq_cst);
1958         exp = atomic_load_explicit(&thread->hist->real.max,
1959                                    memory_order_seq_cst);
1960         while (exp < walltime
1961                && !atomic_compare_exchange_weak_explicit(
1962                        &thread->hist->real.max, &exp, walltime,
1963                        memory_order_seq_cst, memory_order_seq_cst))
1964                 ;
1965
1966         if (cputime_enabled_here && cputime_enabled) {
1967                 /* update cputime */
1968                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
1969                                           memory_order_seq_cst);
1970                 exp = atomic_load_explicit(&thread->hist->cpu.max,
1971                                            memory_order_seq_cst);
1972                 while (exp < cputime
1973                        && !atomic_compare_exchange_weak_explicit(
1974                                &thread->hist->cpu.max, &exp, cputime,
1975                                memory_order_seq_cst, memory_order_seq_cst))
1976                         ;
1977         }
1978
1979         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
1980                                   memory_order_seq_cst);
1981         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
1982                                  memory_order_seq_cst);
1983
1984         if (cputime_enabled_here && cputime_enabled && cputime_threshold
1985             && cputime > cputime_threshold) {
1986                 /*
1987                  * We have a CPU Hog on our hands.  The time FRR has spent
1988                  * doing actual work (not sleeping) is greater than 5 seconds.
1989                  * Whinge about it now, so we're aware this is yet another task
1990                  * to fix.
1991                  */
1992                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
1993                                           1, memory_order_seq_cst);
1994                 flog_warn(
1995                         EC_LIB_SLOW_THREAD_CPU,
1996                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1997                         thread->xref->funcname, (unsigned long)thread->func,
1998                         walltime / 1000, cputime / 1000);
1999
2000         } else if (walltime_threshold && walltime > walltime_threshold) {
2001                 /*
2002                  * The runtime for a task is greater than 5 seconds, but the
2003                  * cpu time is under 5 seconds.  Let's whine about this because
2004                  * this could imply some sort of scheduling issue.
2005                  */
2006                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2007                                           1, memory_order_seq_cst);
2008                 flog_warn(
2009                         EC_LIB_SLOW_THREAD_WALL,
2010                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2011                         thread->xref->funcname, (unsigned long)thread->func,
2012                         walltime / 1000, cputime / 1000);
2013         }
2014 }
2015
2016 /* Execute thread */
2017 void _thread_execute(const struct xref_threadsched *xref,
2018                      struct thread_master *m, void (*func)(struct thread *),
2019                      void *arg, int val)
2020 {
2021         struct thread *thread;
2022
2023         /* Get or allocate new thread to execute. */
2024         frr_with_mutex(&m->mtx) {
2025                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
2026
2027                 /* Set its event value. */
2028                 frr_with_mutex(&thread->mtx) {
2029                         thread->add_type = THREAD_EXECUTE;
2030                         thread->u.val = val;
2031                         thread->ref = &thread;
2032                 }
2033         }
2034
2035         /* Execute thread doing all accounting. */
2036         thread_call(thread);
2037
2038         /* Give back or free thread. */
2039         thread_add_unuse(m, thread);
2040 }
2041
2042 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2043 void debug_signals(const sigset_t *sigs)
2044 {
2045         int i, found;
2046         sigset_t tmpsigs;
2047         char buf[300];
2048
2049         /*
2050          * We're only looking at the non-realtime signals here, so we need
2051          * some limit value. Platform differences mean at some point we just
2052          * need to pick a reasonable value.
2053          */
2054 #if defined SIGRTMIN
2055 #  define LAST_SIGNAL SIGRTMIN
2056 #else
2057 #  define LAST_SIGNAL 32
2058 #endif
2059
2060
2061         if (sigs == NULL) {
2062                 sigemptyset(&tmpsigs);
2063                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2064                 sigs = &tmpsigs;
2065         }
2066
2067         found = 0;
2068         buf[0] = '\0';
2069
2070         for (i = 0; i < LAST_SIGNAL; i++) {
2071                 char tmp[20];
2072
2073                 if (sigismember(sigs, i) > 0) {
2074                         if (found > 0)
2075                                 strlcat(buf, ",", sizeof(buf));
2076                         snprintf(tmp, sizeof(tmp), "%d", i);
2077                         strlcat(buf, tmp, sizeof(buf));
2078                         found++;
2079                 }
2080         }
2081
2082         if (found == 0)
2083                 snprintf(buf, sizeof(buf), "<none>");
2084
2085         zlog_debug("%s: %s", __func__, buf);
2086 }
2087
2088 bool thread_is_scheduled(struct thread *thread)
2089 {
2090         if (thread == NULL)
2091                 return false;
2092
2093         return true;
2094 }
2095
2096 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2097                                    const struct thread *thread)
2098 {
2099         static const char * const types[] = {
2100                 [THREAD_READ] = "read",
2101                 [THREAD_WRITE] = "write",
2102                 [THREAD_TIMER] = "timer",
2103                 [THREAD_EVENT] = "event",
2104                 [THREAD_READY] = "ready",
2105                 [THREAD_UNUSED] = "unused",
2106                 [THREAD_EXECUTE] = "exec",
2107         };
2108         ssize_t rv = 0;
2109         char info[16] = "";
2110
2111         if (!thread)
2112                 return bputs(buf, "{(thread *)NULL}");
2113
2114         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2115
2116         if (thread->type < array_size(types) && types[thread->type])
2117                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2118         else
2119                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2120
2121         switch (thread->type) {
2122         case THREAD_READ:
2123         case THREAD_WRITE:
2124                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2125                 break;
2126
2127         case THREAD_TIMER:
2128                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2129                 break;
2130         }
2131
2132         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2133                         thread->xref->funcname, thread->xref->dest,
2134                         thread->xref->xref.file, thread->xref->xref.line);
2135         return rv;
2136 }
2137
2138 printfrr_ext_autoreg_p("TH", printfrr_thread);
2139 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2140                                const void *ptr)
2141 {
2142         const struct thread *thread = ptr;
2143         struct timespec remain = {};
2144
2145         if (ea->fmt[0] == 'D') {
2146                 ea->fmt++;
2147                 return printfrr_thread_dbg(buf, ea, thread);
2148         }
2149
2150         if (!thread) {
2151                 /* need to jump over time formatting flag characters in the
2152                  * input format string, i.e. adjust ea->fmt!
2153                  */
2154                 printfrr_time(buf, ea, &remain,
2155                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2156                 return bputch(buf, '-');
2157         }
2158
2159         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2160         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2161 }