lib/thread.c

   1 /* Thread management routine
   2  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   3  *
   4  * This file is part of GNU Zebra.
   5  *
   6  * GNU Zebra is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation; either version 2, or (at your option) any
   9  * later version.
  10  *
  11  * GNU Zebra is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program; see the file COPYING; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /* #define DEBUG */
  22
  23 #include <zebra.h>
  24 #include <sys/resource.h>
  25
  26 #include "thread.h"
  27 #include "memory.h"
  28 #include "frrcu.h"
  29 #include "log.h"
  30 #include "hash.h"
  31 #include "command.h"
  32 #include "sigevent.h"
  33 #include "network.h"
  34 #include "jhash.h"
  35 #include "frratomic.h"
  36 #include "frr_pthread.h"
  37 #include "lib_errors.h"
  38 #include "libfrr_trace.h"
  39 #include "libfrr.h"
  40
  41 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
  42 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master");
  43 DEFINE_MTYPE_STATIC(LIB, THREAD_POLL, "Thread Poll Info");
  44 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats");
  45
  46 DECLARE_LIST(thread_list, struct thread, threaditem);
  47
  48 struct cancel_req {
  49         int flags;
  50         struct thread *thread;
  51         void *eventobj;
  52         struct thread **threadref;
  53 };
  54
  55 /* Flags for task cancellation */
  56 #define THREAD_CANCEL_FLAG_READY     0x01
  57
  58 static int thread_timer_cmp(const struct thread *a, const struct thread *b)
  59 {
  60         if (a->u.sands.tv_sec < b->u.sands.tv_sec)
  61                 return -1;
  62         if (a->u.sands.tv_sec > b->u.sands.tv_sec)
  63                 return 1;
  64         if (a->u.sands.tv_usec < b->u.sands.tv_usec)
  65                 return -1;
  66         if (a->u.sands.tv_usec > b->u.sands.tv_usec)
  67                 return 1;
  68         return 0;
  69 }
  70
  71 DECLARE_HEAP(thread_timer_list, struct thread, timeritem, thread_timer_cmp);
  72
  73 #if defined(__APPLE__)
  74 #include <mach/mach.h>
  75 #include <mach/mach_time.h>
  76 #endif
  77
  78 #define AWAKEN(m)                                                              \
  79         do {                                                                   \
  80                 const unsigned char wakebyte = 0x01;                           \
  81                 write(m->io_pipe[1], &wakebyte, 1);                            \
  82         } while (0);
  83
  84 /* control variable for initializer */
  85 static pthread_once_t init_once = PTHREAD_ONCE_INIT;
  86 pthread_key_t thread_current;
  87
  88 static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  89 static struct list *masters;
  90
  91 static void thread_free(struct thread_master *master, struct thread *thread);
  92
  93 #ifndef EXCLUDE_CPU_TIME
  94 #define EXCLUDE_CPU_TIME 0
  95 #endif
  96 #ifndef CONSUMED_TIME_CHECK
  97 #define CONSUMED_TIME_CHECK 0
  98 #endif
  99
 100 bool cputime_enabled = !EXCLUDE_CPU_TIME;
 101 unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
 102 unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
 103
 104 /* CLI start ---------------------------------------------------------------- */
 105 #ifndef VTYSH_EXTRACT_PL
 106 #include "lib/thread_clippy.c"
 107 #endif
 108
 109 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
 110 {
 111         int size = sizeof(a->func);
 112
 113         return jhash(&a->func, size, 0);
 114 }
 115
 116 static bool cpu_record_hash_cmp(const struct cpu_thread_history *a,
 117                                const struct cpu_thread_history *b)
 118 {
 119         return a->func == b->func;
 120 }
 121
 122 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
 123 {
 124         struct cpu_thread_history *new;
 125         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
 126         new->func = a->func;
 127         new->funcname = a->funcname;
 128         return new;
 129 }
 130
 131 static void cpu_record_hash_free(void *a)
 132 {
 133         struct cpu_thread_history *hist = a;
 134
 135         XFREE(MTYPE_THREAD_STATS, hist);
 136 }
 137
 138 static void vty_out_cpu_thread_history(struct vty *vty,
 139                                        struct cpu_thread_history *a)
 140 {
 141         vty_out(vty, "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu",
 142                 a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
 143                 a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
 144                 (a->real.total / a->total_calls), a->real.max,
 145                 a->total_cpu_warn, a->total_wall_warn);
 146         vty_out(vty, "  %c%c%c%c%c  %s\n",
 147                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
 148                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
 149                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 150                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 151                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 152 }
 153
 154 static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
 155 {
 156         struct cpu_thread_history *totals = args[0];
 157         struct cpu_thread_history copy;
 158         struct vty *vty = args[1];
 159         uint8_t *filter = args[2];
 160
 161         struct cpu_thread_history *a = bucket->data;
 162
 163         copy.total_active =
 164                 atomic_load_explicit(&a->total_active, memory_order_seq_cst);
 165         copy.total_calls =
 166                 atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
 167         copy.total_cpu_warn =
 168                 atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
 169         copy.total_wall_warn =
 170                 atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
 171         copy.cpu.total =
 172                 atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
 173         copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
 174         copy.real.total =
 175                 atomic_load_explicit(&a->real.total, memory_order_seq_cst);
 176         copy.real.max =
 177                 atomic_load_explicit(&a->real.max, memory_order_seq_cst);
 178         copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
 179         copy.funcname = a->funcname;
 180
 181         if (!(copy.types & *filter))
 182                 return;
 183
 184         vty_out_cpu_thread_history(vty, &copy);
 185         totals->total_active += copy.total_active;
 186         totals->total_calls += copy.total_calls;
 187         totals->total_cpu_warn += copy.total_cpu_warn;
 188         totals->total_wall_warn += copy.total_wall_warn;
 189         totals->real.total += copy.real.total;
 190         if (totals->real.max < copy.real.max)
 191                 totals->real.max = copy.real.max;
 192         totals->cpu.total += copy.cpu.total;
 193         if (totals->cpu.max < copy.cpu.max)
 194                 totals->cpu.max = copy.cpu.max;
 195 }
 196
 197 static void cpu_record_print(struct vty *vty, uint8_t filter)
 198 {
 199         struct cpu_thread_history tmp;
 200         void *args[3] = {&tmp, vty, &filter};
 201         struct thread_master *m;
 202         struct listnode *ln;
 203
 204         if (!cputime_enabled)
 205                 vty_out(vty,
 206                         "\n"
 207                         "Collecting CPU time statistics is currently disabled.  Following statistics\n"
 208                         "will be zero or may display data from when collection was enabled.  Use the\n"
 209                         "  \"service cputime-stats\"  command to start collecting data.\n"
 210                         "\nCounters and wallclock times are always maintained and should be accurate.\n");
 211
 212         memset(&tmp, 0, sizeof(tmp));
 213         tmp.funcname = "TOTAL";
 214         tmp.types = filter;
 215
 216         frr_with_mutex(&masters_mtx) {
 217                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 218                         const char *name = m->name ? m->name : "main";
 219
 220                         char underline[strlen(name) + 1];
 221                         memset(underline, '-', sizeof(underline));
 222                         underline[sizeof(underline) - 1] = '\0';
 223
 224                         vty_out(vty, "\n");
 225                         vty_out(vty, "Showing statistics for pthread %s\n",
 226                                 name);
 227                         vty_out(vty, "-------------------------------%s\n",
 228                                 underline);
 229                         vty_out(vty, "%30s %18s %18s\n", "",
 230                                 "CPU (user+system):", "Real (wall-clock):");
 231                         vty_out(vty,
 232                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 233                         vty_out(vty, " Avg uSec Max uSecs");
 234                         vty_out(vty, "  CPU_Warn Wall_Warn  Type   Thread\n");
 235
 236                         if (m->cpu_record->count)
 237                                 hash_iterate(
 238                                         m->cpu_record,
 239                                         (void (*)(struct hash_bucket *,
 240                                                   void *))cpu_record_hash_print,
 241                                         args);
 242                         else
 243                                 vty_out(vty, "No data to display yet.\n");
 244
 245                         vty_out(vty, "\n");
 246                 }
 247         }
 248
 249         vty_out(vty, "\n");
 250         vty_out(vty, "Total thread statistics\n");
 251         vty_out(vty, "-------------------------\n");
 252         vty_out(vty, "%30s %18s %18s\n", "",
 253                 "CPU (user+system):", "Real (wall-clock):");
 254         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 255         vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
 256         vty_out(vty, "  Type  Thread\n");
 257
 258         if (tmp.total_calls > 0)
 259                 vty_out_cpu_thread_history(vty, &tmp);
 260 }
 261
 262 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 263 {
 264         uint8_t *filter = args[0];
 265         struct hash *cpu_record = args[1];
 266
 267         struct cpu_thread_history *a = bucket->data;
 268
 269         if (!(a->types & *filter))
 270                 return;
 271
 272         hash_release(cpu_record, bucket->data);
 273 }
 274
 275 static void cpu_record_clear(uint8_t filter)
 276 {
 277         uint8_t *tmp = &filter;
 278         struct thread_master *m;
 279         struct listnode *ln;
 280
 281         frr_with_mutex(&masters_mtx) {
 282                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 283                         frr_with_mutex(&m->mtx) {
 284                                 void *args[2] = {tmp, m->cpu_record};
 285                                 hash_iterate(
 286                                         m->cpu_record,
 287                                         (void (*)(struct hash_bucket *,
 288                                                   void *))cpu_record_hash_clear,
 289                                         args);
 290                         }
 291                 }
 292         }
 293 }
 294
 295 static uint8_t parse_filter(const char *filterstr)
 296 {
 297         int i = 0;
 298         int filter = 0;
 299
 300         while (filterstr[i] != '\0') {
 301                 switch (filterstr[i]) {
 302                 case 'r':
 303                 case 'R':
 304                         filter |= (1 << THREAD_READ);
 305                         break;
 306                 case 'w':
 307                 case 'W':
 308                         filter |= (1 << THREAD_WRITE);
 309                         break;
 310                 case 't':
 311                 case 'T':
 312                         filter |= (1 << THREAD_TIMER);
 313                         break;
 314                 case 'e':
 315                 case 'E':
 316                         filter |= (1 << THREAD_EVENT);
 317                         break;
 318                 case 'x':
 319                 case 'X':
 320                         filter |= (1 << THREAD_EXECUTE);
 321                         break;
 322                 default:
 323                         break;
 324                 }
 325                 ++i;
 326         }
 327         return filter;
 328 }
 329
 330 DEFUN_NOSH (show_thread_cpu,
 331             show_thread_cpu_cmd,
 332             "show thread cpu [FILTER]",
 333             SHOW_STR
 334             "Thread information\n"
 335             "Thread CPU usage\n"
 336             "Display filter (rwtex)\n")
 337 {
 338         uint8_t filter = (uint8_t)-1U;
 339         int idx = 0;
 340
 341         if (argv_find(argv, argc, "FILTER", &idx)) {
 342                 filter = parse_filter(argv[idx]->arg);
 343                 if (!filter) {
 344                         vty_out(vty,
 345                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 346                                 argv[idx]->arg);
 347                         return CMD_WARNING;
 348                 }
 349         }
 350
 351         cpu_record_print(vty, filter);
 352         return CMD_SUCCESS;
 353 }
 354
 355 DEFPY (service_cputime_stats,
 356        service_cputime_stats_cmd,
 357        "[no] service cputime-stats",
 358        NO_STR
 359        "Set up miscellaneous service\n"
 360        "Collect CPU usage statistics\n")
 361 {
 362         cputime_enabled = !no;
 363         return CMD_SUCCESS;
 364 }
 365
 366 DEFPY (service_cputime_warning,
 367        service_cputime_warning_cmd,
 368        "[no] service cputime-warning (1-4294967295)",
 369        NO_STR
 370        "Set up miscellaneous service\n"
 371        "Warn for tasks exceeding CPU usage threshold\n"
 372        "Warning threshold in milliseconds\n")
 373 {
 374         if (no)
 375                 cputime_threshold = 0;
 376         else
 377                 cputime_threshold = cputime_warning * 1000;
 378         return CMD_SUCCESS;
 379 }
 380
 381 ALIAS (service_cputime_warning,
 382        no_service_cputime_warning_cmd,
 383        "no service cputime-warning",
 384        NO_STR
 385        "Set up miscellaneous service\n"
 386        "Warn for tasks exceeding CPU usage threshold\n")
 387
 388 DEFPY (service_walltime_warning,
 389        service_walltime_warning_cmd,
 390        "[no] service walltime-warning (1-4294967295)",
 391        NO_STR
 392        "Set up miscellaneous service\n"
 393        "Warn for tasks exceeding total wallclock threshold\n"
 394        "Warning threshold in milliseconds\n")
 395 {
 396         if (no)
 397                 walltime_threshold = 0;
 398         else
 399                 walltime_threshold = walltime_warning * 1000;
 400         return CMD_SUCCESS;
 401 }
 402
 403 ALIAS (service_walltime_warning,
 404        no_service_walltime_warning_cmd,
 405        "no service walltime-warning",
 406        NO_STR
 407        "Set up miscellaneous service\n"
 408        "Warn for tasks exceeding total wallclock threshold\n")
 409
 410 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 411 {
 412         const char *name = m->name ? m->name : "main";
 413         char underline[strlen(name) + 1];
 414         struct thread *thread;
 415         uint32_t i;
 416
 417         memset(underline, '-', sizeof(underline));
 418         underline[sizeof(underline) - 1] = '\0';
 419
 420         vty_out(vty, "\nShowing poll FD's for %s\n", name);
 421         vty_out(vty, "----------------------%s\n", underline);
 422         vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
 423                 m->fd_limit);
 424         for (i = 0; i < m->handler.pfdcount; i++) {
 425                 vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
 426                         m->handler.pfds[i].fd, m->handler.pfds[i].events,
 427                         m->handler.pfds[i].revents);
 428
 429                 if (m->handler.pfds[i].events & POLLIN) {
 430                         thread = m->read[m->handler.pfds[i].fd];
 431
 432                         if (!thread)
 433                                 vty_out(vty, "ERROR ");
 434                         else
 435                                 vty_out(vty, "%s ", thread->xref->funcname);
 436                 } else
 437                         vty_out(vty, " ");
 438
 439                 if (m->handler.pfds[i].events & POLLOUT) {
 440                         thread = m->write[m->handler.pfds[i].fd];
 441
 442                         if (!thread)
 443                                 vty_out(vty, "ERROR\n");
 444                         else
 445                                 vty_out(vty, "%s\n", thread->xref->funcname);
 446                 } else
 447                         vty_out(vty, "\n");
 448         }
 449 }
 450
 451 DEFUN_NOSH (show_thread_poll,
 452             show_thread_poll_cmd,
 453             "show thread poll",
 454             SHOW_STR
 455             "Thread information\n"
 456             "Show poll FD's and information\n")
 457 {
 458         struct listnode *node;
 459         struct thread_master *m;
 460
 461         frr_with_mutex(&masters_mtx) {
 462                 for (ALL_LIST_ELEMENTS_RO(masters, node, m)) {
 463                         show_thread_poll_helper(vty, m);
 464                 }
 465         }
 466
 467         return CMD_SUCCESS;
 468 }
 469
 470
 471 DEFUN (clear_thread_cpu,
 472        clear_thread_cpu_cmd,
 473        "clear thread cpu [FILTER]",
 474        "Clear stored data in all pthreads\n"
 475        "Thread information\n"
 476        "Thread CPU usage\n"
 477        "Display filter (rwtexb)\n")
 478 {
 479         uint8_t filter = (uint8_t)-1U;
 480         int idx = 0;
 481
 482         if (argv_find(argv, argc, "FILTER", &idx)) {
 483                 filter = parse_filter(argv[idx]->arg);
 484                 if (!filter) {
 485                         vty_out(vty,
 486                                 "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
 487                                 argv[idx]->arg);
 488                         return CMD_WARNING;
 489                 }
 490         }
 491
 492         cpu_record_clear(filter);
 493         return CMD_SUCCESS;
 494 }
 495
 496 void thread_cmd_init(void)
 497 {
 498         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 499         install_element(VIEW_NODE, &show_thread_poll_cmd);
 500         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 501
 502         install_element(CONFIG_NODE, &service_cputime_stats_cmd);
 503         install_element(CONFIG_NODE, &service_cputime_warning_cmd);
 504         install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
 505         install_element(CONFIG_NODE, &service_walltime_warning_cmd);
 506         install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 507 }
 508 /* CLI end ------------------------------------------------------------------ */
 509
 510
 511 static void cancelreq_del(void *cr)
 512 {
 513         XFREE(MTYPE_TMP, cr);
 514 }
 515
 516 /* initializer, only ever called once */
 517 static void initializer(void)
 518 {
 519         pthread_key_create(&thread_current, NULL);
 520 }
 521
 522 struct thread_master *thread_master_create(const char *name)
 523 {
 524         struct thread_master *rv;
 525         struct rlimit limit;
 526
 527         pthread_once(&init_once, &initializer);
 528
 529         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 530
 531         /* Initialize master mutex */
 532         pthread_mutex_init(&rv->mtx, NULL);
 533         pthread_cond_init(&rv->cancel_cond, NULL);
 534
 535         /* Set name */
 536         name = name ? name : "default";
 537         rv->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 538
 539         /* Initialize I/O task data structures */
 540
 541         /* Use configured limit if present, ulimit otherwise. */
 542         rv->fd_limit = frr_get_fd_limit();
 543         if (rv->fd_limit == 0) {
 544                 getrlimit(RLIMIT_NOFILE, &limit);
 545                 rv->fd_limit = (int)limit.rlim_cur;
 546         }
 547
 548         rv->read = XCALLOC(MTYPE_THREAD_POLL,
 549                            sizeof(struct thread *) * rv->fd_limit);
 550
 551         rv->write = XCALLOC(MTYPE_THREAD_POLL,
 552                             sizeof(struct thread *) * rv->fd_limit);
 553
 554         char tmhashname[strlen(name) + 32];
 555         snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
 556                  name);
 557         rv->cpu_record = hash_create_size(
 558                 8, (unsigned int (*)(const void *))cpu_record_hash_key,
 559                 (bool (*)(const void *, const void *))cpu_record_hash_cmp,
 560                 tmhashname);
 561
 562         thread_list_init(&rv->event);
 563         thread_list_init(&rv->ready);
 564         thread_list_init(&rv->unuse);
 565         thread_timer_list_init(&rv->timer);
 566
 567         /* Initialize thread_fetch() settings */
 568         rv->spin = true;
 569         rv->handle_signals = true;
 570
 571         /* Set pthread owner, should be updated by actual owner */
 572         rv->owner = pthread_self();
 573         rv->cancel_req = list_new();
 574         rv->cancel_req->del = cancelreq_del;
 575         rv->canceled = true;
 576
 577         /* Initialize pipe poker */
 578         pipe(rv->io_pipe);
 579         set_nonblocking(rv->io_pipe[0]);
 580         set_nonblocking(rv->io_pipe[1]);
 581
 582         /* Initialize data structures for poll() */
 583         rv->handler.pfdsize = rv->fd_limit;
 584         rv->handler.pfdcount = 0;
 585         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 586                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 587         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 588                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 589
 590         /* add to list of threadmasters */
 591         frr_with_mutex(&masters_mtx) {
 592                 if (!masters)
 593                         masters = list_new();
 594
 595                 listnode_add(masters, rv);
 596         }
 597
 598         return rv;
 599 }
 600
 601 void thread_master_set_name(struct thread_master *master, const char *name)
 602 {
 603         frr_with_mutex(&master->mtx) {
 604                 XFREE(MTYPE_THREAD_MASTER, master->name);
 605                 master->name = XSTRDUP(MTYPE_THREAD_MASTER, name);
 606         }
 607 }
 608
 609 #define THREAD_UNUSED_DEPTH 10
 610
 611 /* Move thread to unuse list. */
 612 static void thread_add_unuse(struct thread_master *m, struct thread *thread)
 613 {
 614         pthread_mutex_t mtxc = thread->mtx;
 615
 616         assert(m != NULL && thread != NULL);
 617
 618         thread->hist->total_active--;
 619         memset(thread, 0, sizeof(struct thread));
 620         thread->type = THREAD_UNUSED;
 621
 622         /* Restore the thread mutex context. */
 623         thread->mtx = mtxc;
 624
 625         if (thread_list_count(&m->unuse) < THREAD_UNUSED_DEPTH) {
 626                 thread_list_add_tail(&m->unuse, thread);
 627                 return;
 628         }
 629
 630         thread_free(m, thread);
 631 }
 632
 633 /* Free all unused thread. */
 634 static void thread_list_free(struct thread_master *m,
 635                 struct thread_list_head *list)
 636 {
 637         struct thread *t;
 638
 639         while ((t = thread_list_pop(list)))
 640                 thread_free(m, t);
 641 }
 642
 643 static void thread_array_free(struct thread_master *m,
 644                               struct thread **thread_array)
 645 {
 646         struct thread *t;
 647         int index;
 648
 649         for (index = 0; index < m->fd_limit; ++index) {
 650                 t = thread_array[index];
 651                 if (t) {
 652                         thread_array[index] = NULL;
 653                         thread_free(m, t);
 654                 }
 655         }
 656         XFREE(MTYPE_THREAD_POLL, thread_array);
 657 }
 658
 659 /*
 660  * thread_master_free_unused
 661  *
 662  * As threads are finished with they are put on the
 663  * unuse list for later reuse.
 664  * If we are shutting down, Free up unused threads
 665  * So we can see if we forget to shut anything off
 666  */
 667 void thread_master_free_unused(struct thread_master *m)
 668 {
 669         frr_with_mutex(&m->mtx) {
 670                 struct thread *t;
 671                 while ((t = thread_list_pop(&m->unuse)))
 672                         thread_free(m, t);
 673         }
 674 }
 675
 676 /* Stop thread scheduler. */
 677 void thread_master_free(struct thread_master *m)
 678 {
 679         struct thread *t;
 680
 681         frr_with_mutex(&masters_mtx) {
 682                 listnode_delete(masters, m);
 683                 if (masters->count == 0) {
 684                         list_delete(&masters);
 685                 }
 686         }
 687
 688         thread_array_free(m, m->read);
 689         thread_array_free(m, m->write);
 690         while ((t = thread_timer_list_pop(&m->timer)))
 691                 thread_free(m, t);
 692         thread_list_free(m, &m->event);
 693         thread_list_free(m, &m->ready);
 694         thread_list_free(m, &m->unuse);
 695         pthread_mutex_destroy(&m->mtx);
 696         pthread_cond_destroy(&m->cancel_cond);
 697         close(m->io_pipe[0]);
 698         close(m->io_pipe[1]);
 699         list_delete(&m->cancel_req);
 700         m->cancel_req = NULL;
 701
 702         hash_clean(m->cpu_record, cpu_record_hash_free);
 703         hash_free(m->cpu_record);
 704         m->cpu_record = NULL;
 705
 706         XFREE(MTYPE_THREAD_MASTER, m->name);
 707         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 708         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 709         XFREE(MTYPE_THREAD_MASTER, m);
 710 }
 711
 712 /* Return remain time in miliseconds. */
 713 unsigned long thread_timer_remain_msec(struct thread *thread)
 714 {
 715         int64_t remain;
 716
 717         frr_with_mutex(&thread->mtx) {
 718                 remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
 719         }
 720
 721         return remain < 0 ? 0 : remain;
 722 }
 723
 724 /* Return remain time in seconds. */
 725 unsigned long thread_timer_remain_second(struct thread *thread)
 726 {
 727         return thread_timer_remain_msec(thread) / 1000LL;
 728 }
 729
 730 struct timeval thread_timer_remain(struct thread *thread)
 731 {
 732         struct timeval remain;
 733         frr_with_mutex(&thread->mtx) {
 734                 monotime_until(&thread->u.sands, &remain);
 735         }
 736         return remain;
 737 }
 738
 739 static int time_hhmmss(char *buf, int buf_size, long sec)
 740 {
 741         long hh;
 742         long mm;
 743         int wr;
 744
 745         assert(buf_size >= 8);
 746
 747         hh = sec / 3600;
 748         sec %= 3600;
 749         mm = sec / 60;
 750         sec %= 60;
 751
 752         wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
 753
 754         return wr != 8;
 755 }
 756
 757 char *thread_timer_to_hhmmss(char *buf, int buf_size,
 758                 struct thread *t_timer)
 759 {
 760         if (t_timer) {
 761                 time_hhmmss(buf, buf_size,
 762                                 thread_timer_remain_second(t_timer));
 763         } else {
 764                 snprintf(buf, buf_size, "--:--:--");
 765         }
 766         return buf;
 767 }
 768
 769 /* Get new thread.  */
 770 static struct thread *thread_get(struct thread_master *m, uint8_t type,
 771                                  int (*func)(struct thread *), void *arg,
 772                                  const struct xref_threadsched *xref)
 773 {
 774         struct thread *thread = thread_list_pop(&m->unuse);
 775         struct cpu_thread_history tmp;
 776
 777         if (!thread) {
 778                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
 779                 /* mutex only needs to be initialized at struct creation. */
 780                 pthread_mutex_init(&thread->mtx, NULL);
 781                 m->alloc++;
 782         }
 783
 784         thread->type = type;
 785         thread->add_type = type;
 786         thread->master = m;
 787         thread->arg = arg;
 788         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 789         thread->ref = NULL;
 790         thread->ignore_timer_late = false;
 791
 792         /*
 793          * So if the passed in funcname is not what we have
 794          * stored that means the thread->hist needs to be
 795          * updated.  We keep the last one around in unused
 796          * under the assumption that we are probably
 797          * going to immediately allocate the same
 798          * type of thread.
 799          * This hopefully saves us some serious
 800          * hash_get lookups.
 801          */
 802         if ((thread->xref && thread->xref->funcname != xref->funcname)
 803             || thread->func != func) {
 804                 tmp.func = func;
 805                 tmp.funcname = xref->funcname;
 806                 thread->hist =
 807                         hash_get(m->cpu_record, &tmp,
 808                                  (void *(*)(void *))cpu_record_hash_alloc);
 809         }
 810         thread->hist->total_active++;
 811         thread->func = func;
 812         thread->xref = xref;
 813
 814         return thread;
 815 }
 816
 817 static void thread_free(struct thread_master *master, struct thread *thread)
 818 {
 819         /* Update statistics. */
 820         assert(master->alloc > 0);
 821         master->alloc--;
 822
 823         /* Free allocated resources. */
 824         pthread_mutex_destroy(&thread->mtx);
 825         XFREE(MTYPE_THREAD, thread);
 826 }
 827
 828 static int fd_poll(struct thread_master *m, const struct timeval *timer_wait,
 829                    bool *eintr_p)
 830 {
 831         sigset_t origsigs;
 832         unsigned char trash[64];
 833         nfds_t count = m->handler.copycount;
 834
 835         /*
 836          * If timer_wait is null here, that means poll() should block
 837          * indefinitely, unless the thread_master has overridden it by setting
 838          * ->selectpoll_timeout.
 839          *
 840          * If the value is positive, it specifies the maximum number of
 841          * milliseconds to wait. If the timeout is -1, it specifies that
 842          * we should never wait and always return immediately even if no
 843          * event is detected. If the value is zero, the behavior is default.
 844          */
 845         int timeout = -1;
 846
 847         /* number of file descriptors with events */
 848         int num;
 849
 850         if (timer_wait != NULL
 851             && m->selectpoll_timeout == 0) // use the default value
 852                 timeout = (timer_wait->tv_sec * 1000)
 853                           + (timer_wait->tv_usec / 1000);
 854         else if (m->selectpoll_timeout > 0) // use the user's timeout
 855                 timeout = m->selectpoll_timeout;
 856         else if (m->selectpoll_timeout
 857                  < 0) // effect a poll (return immediately)
 858                 timeout = 0;
 859
 860         zlog_tls_buffer_flush();
 861         rcu_read_unlock();
 862         rcu_assert_read_unlocked();
 863
 864         /* add poll pipe poker */
 865         assert(count + 1 < m->handler.pfdsize);
 866         m->handler.copy[count].fd = m->io_pipe[0];
 867         m->handler.copy[count].events = POLLIN;
 868         m->handler.copy[count].revents = 0x00;
 869
 870         /* We need to deal with a signal-handling race here: we
 871          * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
 872          * that may arrive just before we enter poll(). We will block the
 873          * key signals, then check whether any have arrived - if so, we return
 874          * before calling poll(). If not, we'll re-enable the signals
 875          * in the ppoll() call.
 876          */
 877
 878         sigemptyset(&origsigs);
 879         if (m->handle_signals) {
 880                 /* Main pthread that handles the app signals */
 881                 if (frr_sigevent_check(&origsigs)) {
 882                         /* Signal to process - restore signal mask and return */
 883                         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 884                         num = -1;
 885                         *eintr_p = true;
 886                         goto done;
 887                 }
 888         } else {
 889                 /* Don't make any changes for the non-main pthreads */
 890                 pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
 891         }
 892
 893 #if defined(HAVE_PPOLL)
 894         struct timespec ts, *tsp;
 895
 896         if (timeout >= 0) {
 897                 ts.tv_sec = timeout / 1000;
 898                 ts.tv_nsec = (timeout % 1000) * 1000000;
 899                 tsp = &ts;
 900         } else
 901                 tsp = NULL;
 902
 903         num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
 904         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 905 #else
 906         /* Not ideal - there is a race after we restore the signal mask */
 907         pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
 908         num = poll(m->handler.copy, count + 1, timeout);
 909 #endif
 910
 911 done:
 912
 913         if (num < 0 && errno == EINTR)
 914                 *eintr_p = true;
 915
 916         if (num > 0 && m->handler.copy[count].revents != 0 && num--)
 917                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 918                         ;
 919
 920         rcu_read_lock();
 921
 922         return num;
 923 }
 924
 925 /* Add new read thread. */
 926 void _thread_add_read_write(const struct xref_threadsched *xref,
 927                             struct thread_master *m,
 928                             int (*func)(struct thread *), void *arg, int fd,
 929                             struct thread **t_ptr)
 930 {
 931         int dir = xref->thread_type;
 932         struct thread *thread = NULL;
 933         struct thread **thread_array;
 934
 935         if (dir == THREAD_READ)
 936                 frrtrace(9, frr_libfrr, schedule_read, m,
 937                          xref->funcname, xref->xref.file, xref->xref.line,
 938                          t_ptr, fd, 0, arg, 0);
 939         else
 940                 frrtrace(9, frr_libfrr, schedule_write, m,
 941                          xref->funcname, xref->xref.file, xref->xref.line,
 942                          t_ptr, fd, 0, arg, 0);
 943
 944         assert(fd >= 0);
 945         if (fd >= m->fd_limit)
 946                 assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
 947
 948         frr_with_mutex(&m->mtx) {
 949                 if (t_ptr && *t_ptr)
 950                         // thread is already scheduled; don't reschedule
 951                         break;
 952
 953                 /* default to a new pollfd */
 954                 nfds_t queuepos = m->handler.pfdcount;
 955
 956                 if (dir == THREAD_READ)
 957                         thread_array = m->read;
 958                 else
 959                         thread_array = m->write;
 960
 961                 /* if we already have a pollfd for our file descriptor, find and
 962                  * use it */
 963                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 964                         if (m->handler.pfds[i].fd == fd) {
 965                                 queuepos = i;
 966
 967 #ifdef DEV_BUILD
 968                                 /*
 969                                  * What happens if we have a thread already
 970                                  * created for this event?
 971                                  */
 972                                 if (thread_array[fd])
 973                                         assert(!"Thread already scheduled for file descriptor");
 974 #endif
 975                                 break;
 976                         }
 977
 978                 /* make sure we have room for this fd + pipe poker fd */
 979                 assert(queuepos + 1 < m->handler.pfdsize);
 980
 981                 thread = thread_get(m, dir, func, arg, xref);
 982
 983                 m->handler.pfds[queuepos].fd = fd;
 984                 m->handler.pfds[queuepos].events |=
 985                         (dir == THREAD_READ ? POLLIN : POLLOUT);
 986
 987                 if (queuepos == m->handler.pfdcount)
 988                         m->handler.pfdcount++;
 989
 990                 if (thread) {
 991                         frr_with_mutex(&thread->mtx) {
 992                                 thread->u.fd = fd;
 993                                 thread_array[thread->u.fd] = thread;
 994                         }
 995
 996                         if (t_ptr) {
 997                                 *t_ptr = thread;
 998                                 thread->ref = t_ptr;
 999                         }
1000                 }
1001
1002                 AWAKEN(m);
1003         }
1004 }
1005
1006 static void _thread_add_timer_timeval(const struct xref_threadsched *xref,
1007                                       struct thread_master *m,
1008                                       int (*func)(struct thread *), void *arg,
1009                                       struct timeval *time_relative,
1010                                       struct thread **t_ptr)
1011 {
1012         struct thread *thread;
1013         struct timeval t;
1014
1015         assert(m != NULL);
1016
1017         assert(time_relative);
1018
1019         frrtrace(9, frr_libfrr, schedule_timer, m,
1020                  xref->funcname, xref->xref.file, xref->xref.line,
1021                  t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1022
1023         /* Compute expiration/deadline time. */
1024         monotime(&t);
1025         timeradd(&t, time_relative, &t);
1026
1027         frr_with_mutex(&m->mtx) {
1028                 if (t_ptr && *t_ptr)
1029                         /* thread is already scheduled; don't reschedule */
1030                         return;
1031
1032                 thread = thread_get(m, THREAD_TIMER, func, arg, xref);
1033
1034                 frr_with_mutex(&thread->mtx) {
1035                         thread->u.sands = t;
1036                         thread_timer_list_add(&m->timer, thread);
1037                         if (t_ptr) {
1038                                 *t_ptr = thread;
1039                                 thread->ref = t_ptr;
1040                         }
1041                 }
1042
1043                 /* The timer list is sorted - if this new timer
1044                  * might change the time we'll wait for, give the pthread
1045                  * a chance to re-compute.
1046                  */
1047                 if (thread_timer_list_first(&m->timer) == thread)
1048                         AWAKEN(m);
1049         }
1050 }
1051
1052
1053 /* Add timer event thread. */
1054 void _thread_add_timer(const struct xref_threadsched *xref,
1055                        struct thread_master *m, int (*func)(struct thread *),
1056                        void *arg, long timer, struct thread **t_ptr)
1057 {
1058         struct timeval trel;
1059
1060         assert(m != NULL);
1061
1062         trel.tv_sec = timer;
1063         trel.tv_usec = 0;
1064
1065         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1066 }
1067
1068 /* Add timer event thread with "millisecond" resolution */
1069 void _thread_add_timer_msec(const struct xref_threadsched *xref,
1070                             struct thread_master *m,
1071                             int (*func)(struct thread *), void *arg, long timer,
1072                             struct thread **t_ptr)
1073 {
1074         struct timeval trel;
1075
1076         assert(m != NULL);
1077
1078         trel.tv_sec = timer / 1000;
1079         trel.tv_usec = 1000 * (timer % 1000);
1080
1081         _thread_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1082 }
1083
1084 /* Add timer event thread with "timeval" resolution */
1085 void _thread_add_timer_tv(const struct xref_threadsched *xref,
1086                           struct thread_master *m, int (*func)(struct thread *),
1087                           void *arg, struct timeval *tv, struct thread **t_ptr)
1088 {
1089         _thread_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1090 }
1091
1092 /* Add simple event thread. */
1093 void _thread_add_event(const struct xref_threadsched *xref,
1094                        struct thread_master *m, int (*func)(struct thread *),
1095                        void *arg, int val, struct thread **t_ptr)
1096 {
1097         struct thread *thread = NULL;
1098
1099         frrtrace(9, frr_libfrr, schedule_event, m,
1100                  xref->funcname, xref->xref.file, xref->xref.line,
1101                  t_ptr, 0, val, arg, 0);
1102
1103         assert(m != NULL);
1104
1105         frr_with_mutex(&m->mtx) {
1106                 if (t_ptr && *t_ptr)
1107                         /* thread is already scheduled; don't reschedule */
1108                         break;
1109
1110                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
1111                 frr_with_mutex(&thread->mtx) {
1112                         thread->u.val = val;
1113                         thread_list_add_tail(&m->event, thread);
1114                 }
1115
1116                 if (t_ptr) {
1117                         *t_ptr = thread;
1118                         thread->ref = t_ptr;
1119                 }
1120
1121                 AWAKEN(m);
1122         }
1123 }
1124
1125 /* Thread cancellation ------------------------------------------------------ */
1126
1127 /**
1128  * NOT's out the .events field of pollfd corresponding to the given file
1129  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1130  *
1131  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
1132  * implementation for details.
1133  *
1134  * @param master
1135  * @param fd
1136  * @param state the event to cancel. One or more (OR'd together) of the
1137  * following:
1138  *   - POLLIN
1139  *   - POLLOUT
1140  */
1141 static void thread_cancel_rw(struct thread_master *master, int fd, short state,
1142                              int idx_hint)
1143 {
1144         bool found = false;
1145
1146         /* find the index of corresponding pollfd */
1147         nfds_t i;
1148
1149         /* Cancel POLLHUP too just in case some bozo set it */
1150         state |= POLLHUP;
1151
1152         /* Some callers know the index of the pfd already */
1153         if (idx_hint >= 0) {
1154                 i = idx_hint;
1155                 found = true;
1156         } else {
1157                 /* Have to look for the fd in the pfd array */
1158                 for (i = 0; i < master->handler.pfdcount; i++)
1159                         if (master->handler.pfds[i].fd == fd) {
1160                                 found = true;
1161                                 break;
1162                         }
1163         }
1164
1165         if (!found) {
1166                 zlog_debug(
1167                         "[!] Received cancellation request for nonexistent rw job");
1168                 zlog_debug("[!] threadmaster: %s | fd: %d",
1169                            master->name ? master->name : "", fd);
1170                 return;
1171         }
1172
1173         /* NOT out event. */
1174         master->handler.pfds[i].events &= ~(state);
1175
1176         /* If all events are canceled, delete / resize the pollfd array. */
1177         if (master->handler.pfds[i].events == 0) {
1178                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1179                         (master->handler.pfdcount - i - 1)
1180                                 * sizeof(struct pollfd));
1181                 master->handler.pfdcount--;
1182                 master->handler.pfds[master->handler.pfdcount].fd = 0;
1183                 master->handler.pfds[master->handler.pfdcount].events = 0;
1184         }
1185
1186         /* If we have the same pollfd in the copy, perform the same operations,
1187          * otherwise return. */
1188         if (i >= master->handler.copycount)
1189                 return;
1190
1191         master->handler.copy[i].events &= ~(state);
1192
1193         if (master->handler.copy[i].events == 0) {
1194                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
1195                         (master->handler.copycount - i - 1)
1196                                 * sizeof(struct pollfd));
1197                 master->handler.copycount--;
1198                 master->handler.copy[master->handler.copycount].fd = 0;
1199                 master->handler.copy[master->handler.copycount].events = 0;
1200         }
1201 }
1202
1203 /*
1204  * Process task cancellation given a task argument: iterate through the
1205  * various lists of tasks, looking for any that match the argument.
1206  */
1207 static void cancel_arg_helper(struct thread_master *master,
1208                               const struct cancel_req *cr)
1209 {
1210         struct thread *t;
1211         nfds_t i;
1212         int fd;
1213         struct pollfd *pfd;
1214
1215         /* We're only processing arg-based cancellations here. */
1216         if (cr->eventobj == NULL)
1217                 return;
1218
1219         /* First process the ready lists. */
1220         frr_each_safe(thread_list, &master->event, t) {
1221                 if (t->arg != cr->eventobj)
1222                         continue;
1223                 thread_list_del(&master->event, t);
1224                 if (t->ref)
1225                         *t->ref = NULL;
1226                 thread_add_unuse(master, t);
1227         }
1228
1229         frr_each_safe(thread_list, &master->ready, t) {
1230                 if (t->arg != cr->eventobj)
1231                         continue;
1232                 thread_list_del(&master->ready, t);
1233                 if (t->ref)
1234                         *t->ref = NULL;
1235                 thread_add_unuse(master, t);
1236         }
1237
1238         /* If requested, stop here and ignore io and timers */
1239         if (CHECK_FLAG(cr->flags, THREAD_CANCEL_FLAG_READY))
1240                 return;
1241
1242         /* Check the io tasks */
1243         for (i = 0; i < master->handler.pfdcount;) {
1244                 pfd = master->handler.pfds + i;
1245
1246                 if (pfd->events & POLLIN)
1247                         t = master->read[pfd->fd];
1248                 else
1249                         t = master->write[pfd->fd];
1250
1251                 if (t && t->arg == cr->eventobj) {
1252                         fd = pfd->fd;
1253
1254                         /* Found a match to cancel: clean up fd arrays */
1255                         thread_cancel_rw(master, pfd->fd, pfd->events, i);
1256
1257                         /* Clean up thread arrays */
1258                         master->read[fd] = NULL;
1259                         master->write[fd] = NULL;
1260
1261                         /* Clear caller's ref */
1262                         if (t->ref)
1263                                 *t->ref = NULL;
1264
1265                         thread_add_unuse(master, t);
1266
1267                         /* Don't increment 'i' since the cancellation will have
1268                          * removed the entry from the pfd array
1269                          */
1270                 } else
1271                         i++;
1272         }
1273
1274         /* Check the timer tasks */
1275         t = thread_timer_list_first(&master->timer);
1276         while (t) {
1277                 struct thread *t_next;
1278
1279                 t_next = thread_timer_list_next(&master->timer, t);
1280
1281                 if (t->arg == cr->eventobj) {
1282                         thread_timer_list_del(&master->timer, t);
1283                         if (t->ref)
1284                                 *t->ref = NULL;
1285                         thread_add_unuse(master, t);
1286                 }
1287
1288                 t = t_next;
1289         }
1290 }
1291
1292 /**
1293  * Process cancellation requests.
1294  *
1295  * This may only be run from the pthread which owns the thread_master.
1296  *
1297  * @param master the thread master to process
1298  * @REQUIRE master->mtx
1299  */
1300 static void do_thread_cancel(struct thread_master *master)
1301 {
1302         struct thread_list_head *list = NULL;
1303         struct thread **thread_array = NULL;
1304         struct thread *thread;
1305
1306         struct cancel_req *cr;
1307         struct listnode *ln;
1308         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1309                 /*
1310                  * If this is an event object cancellation, search
1311                  * through task lists deleting any tasks which have the
1312                  * specified argument - use this handy helper function.
1313                  */
1314                 if (cr->eventobj) {
1315                         cancel_arg_helper(master, cr);
1316                         continue;
1317                 }
1318
1319                 /*
1320                  * The pointer varies depending on whether the cancellation
1321                  * request was made asynchronously or not. If it was, we
1322                  * need to check whether the thread even exists anymore
1323                  * before cancelling it.
1324                  */
1325                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1326
1327                 if (!thread)
1328                         continue;
1329
1330                 /* Determine the appropriate queue to cancel the thread from */
1331                 switch (thread->type) {
1332                 case THREAD_READ:
1333                         thread_cancel_rw(master, thread->u.fd, POLLIN, -1);
1334                         thread_array = master->read;
1335                         break;
1336                 case THREAD_WRITE:
1337                         thread_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1338                         thread_array = master->write;
1339                         break;
1340                 case THREAD_TIMER:
1341                         thread_timer_list_del(&master->timer, thread);
1342                         break;
1343                 case THREAD_EVENT:
1344                         list = &master->event;
1345                         break;
1346                 case THREAD_READY:
1347                         list = &master->ready;
1348                         break;
1349                 default:
1350                         continue;
1351                         break;
1352                 }
1353
1354                 if (list) {
1355                         thread_list_del(list, thread);
1356                 } else if (thread_array) {
1357                         thread_array[thread->u.fd] = NULL;
1358                 }
1359
1360                 if (thread->ref)
1361                         *thread->ref = NULL;
1362
1363                 thread_add_unuse(thread->master, thread);
1364         }
1365
1366         /* Delete and free all cancellation requests */
1367         if (master->cancel_req)
1368                 list_delete_all_node(master->cancel_req);
1369
1370         /* Wake up any threads which may be blocked in thread_cancel_async() */
1371         master->canceled = true;
1372         pthread_cond_broadcast(&master->cancel_cond);
1373 }
1374
1375 /*
1376  * Helper function used for multiple flavors of arg-based cancellation.
1377  */
1378 static void cancel_event_helper(struct thread_master *m, void *arg, int flags)
1379 {
1380         struct cancel_req *cr;
1381
1382         assert(m->owner == pthread_self());
1383
1384         /* Only worth anything if caller supplies an arg. */
1385         if (arg == NULL)
1386                 return;
1387
1388         cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1389
1390         cr->flags = flags;
1391
1392         frr_with_mutex(&m->mtx) {
1393                 cr->eventobj = arg;
1394                 listnode_add(m->cancel_req, cr);
1395                 do_thread_cancel(m);
1396         }
1397 }
1398
1399 /**
1400  * Cancel any events which have the specified argument.
1401  *
1402  * MT-Unsafe
1403  *
1404  * @param m the thread_master to cancel from
1405  * @param arg the argument passed when creating the event
1406  */
1407 void thread_cancel_event(struct thread_master *master, void *arg)
1408 {
1409         cancel_event_helper(master, arg, 0);
1410 }
1411
1412 /*
1413  * Cancel ready tasks with an arg matching 'arg'
1414  *
1415  * MT-Unsafe
1416  *
1417  * @param m the thread_master to cancel from
1418  * @param arg the argument passed when creating the event
1419  */
1420 void thread_cancel_event_ready(struct thread_master *m, void *arg)
1421 {
1422
1423         /* Only cancel ready/event tasks */
1424         cancel_event_helper(m, arg, THREAD_CANCEL_FLAG_READY);
1425 }
1426
1427 /**
1428  * Cancel a specific task.
1429  *
1430  * MT-Unsafe
1431  *
1432  * @param thread task to cancel
1433  */
1434 void thread_cancel(struct thread **thread)
1435 {
1436         struct thread_master *master;
1437
1438         if (thread == NULL || *thread == NULL)
1439                 return;
1440
1441         master = (*thread)->master;
1442
1443         frrtrace(9, frr_libfrr, thread_cancel, master,
1444                  (*thread)->xref->funcname, (*thread)->xref->xref.file,
1445                  (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1446                  (*thread)->u.val, (*thread)->arg, (*thread)->u.sands.tv_sec);
1447
1448         assert(master->owner == pthread_self());
1449
1450         frr_with_mutex(&master->mtx) {
1451                 struct cancel_req *cr =
1452                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1453                 cr->thread = *thread;
1454                 listnode_add(master->cancel_req, cr);
1455                 do_thread_cancel(master);
1456         }
1457
1458         *thread = NULL;
1459 }
1460
1461 /**
1462  * Asynchronous cancellation.
1463  *
1464  * Called with either a struct thread ** or void * to an event argument,
1465  * this function posts the correct cancellation request and blocks until it is
1466  * serviced.
1467  *
1468  * If the thread is currently running, execution blocks until it completes.
1469  *
1470  * The last two parameters are mutually exclusive, i.e. if you pass one the
1471  * other must be NULL.
1472  *
1473  * When the cancellation procedure executes on the target thread_master, the
1474  * thread * provided is checked for nullity. If it is null, the thread is
1475  * assumed to no longer exist and the cancellation request is a no-op. Thus
1476  * users of this API must pass a back-reference when scheduling the original
1477  * task.
1478  *
1479  * MT-Safe
1480  *
1481  * @param master the thread master with the relevant event / task
1482  * @param thread pointer to thread to cancel
1483  * @param eventobj the event
1484  */
1485 void thread_cancel_async(struct thread_master *master, struct thread **thread,
1486                          void *eventobj)
1487 {
1488         assert(!(thread && eventobj) && (thread || eventobj));
1489
1490         if (thread && *thread)
1491                 frrtrace(9, frr_libfrr, thread_cancel_async, master,
1492                          (*thread)->xref->funcname, (*thread)->xref->xref.file,
1493                          (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1494                          (*thread)->u.val, (*thread)->arg,
1495                          (*thread)->u.sands.tv_sec);
1496         else
1497                 frrtrace(9, frr_libfrr, thread_cancel_async, master, NULL, NULL,
1498                          0, NULL, 0, 0, eventobj, 0);
1499
1500         assert(master->owner != pthread_self());
1501
1502         frr_with_mutex(&master->mtx) {
1503                 master->canceled = false;
1504
1505                 if (thread) {
1506                         struct cancel_req *cr =
1507                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1508                         cr->threadref = thread;
1509                         listnode_add(master->cancel_req, cr);
1510                 } else if (eventobj) {
1511                         struct cancel_req *cr =
1512                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1513                         cr->eventobj = eventobj;
1514                         listnode_add(master->cancel_req, cr);
1515                 }
1516                 AWAKEN(master);
1517
1518                 while (!master->canceled)
1519                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1520         }
1521
1522         if (thread)
1523                 *thread = NULL;
1524 }
1525 /* ------------------------------------------------------------------------- */
1526
1527 static struct timeval *thread_timer_wait(struct thread_timer_list_head *timers,
1528                                          struct timeval *timer_val)
1529 {
1530         if (!thread_timer_list_count(timers))
1531                 return NULL;
1532
1533         struct thread *next_timer = thread_timer_list_first(timers);
1534         monotime_until(&next_timer->u.sands, timer_val);
1535         return timer_val;
1536 }
1537
1538 static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1539                                  struct thread *fetch)
1540 {
1541         *fetch = *thread;
1542         thread_add_unuse(m, thread);
1543         return fetch;
1544 }
1545
1546 static int thread_process_io_helper(struct thread_master *m,
1547                                     struct thread *thread, short state,
1548                                     short actual_state, int pos)
1549 {
1550         struct thread **thread_array;
1551
1552         /*
1553          * poll() clears the .events field, but the pollfd array we
1554          * pass to poll() is a copy of the one used to schedule threads.
1555          * We need to synchronize state between the two here by applying
1556          * the same changes poll() made on the copy of the "real" pollfd
1557          * array.
1558          *
1559          * This cleans up a possible infinite loop where we refuse
1560          * to respond to a poll event but poll is insistent that
1561          * we should.
1562          */
1563         m->handler.pfds[pos].events &= ~(state);
1564
1565         if (!thread) {
1566                 if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1567                         flog_err(EC_LIB_NO_THREAD,
1568                                  "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1569                                  m->handler.pfds[pos].fd, actual_state);
1570                 return 0;
1571         }
1572
1573         if (thread->type == THREAD_READ)
1574                 thread_array = m->read;
1575         else
1576                 thread_array = m->write;
1577
1578         thread_array[thread->u.fd] = NULL;
1579         thread_list_add_tail(&m->ready, thread);
1580         thread->type = THREAD_READY;
1581
1582         return 1;
1583 }
1584
1585 /**
1586  * Process I/O events.
1587  *
1588  * Walks through file descriptor array looking for those pollfds whose .revents
1589  * field has something interesting. Deletes any invalid file descriptors.
1590  *
1591  * @param m the thread master
1592  * @param num the number of active file descriptors (return value of poll())
1593  */
1594 static void thread_process_io(struct thread_master *m, unsigned int num)
1595 {
1596         unsigned int ready = 0;
1597         struct pollfd *pfds = m->handler.copy;
1598
1599         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1600                 /* no event for current fd? immediately continue */
1601                 if (pfds[i].revents == 0)
1602                         continue;
1603
1604                 ready++;
1605
1606                 /*
1607                  * Unless someone has called thread_cancel from another
1608                  * pthread, the only thing that could have changed in
1609                  * m->handler.pfds while we were asleep is the .events
1610                  * field in a given pollfd. Barring thread_cancel() that
1611                  * value should be a superset of the values we have in our
1612                  * copy, so there's no need to update it. Similarily,
1613                  * barring deletion, the fd should still be a valid index
1614                  * into the master's pfds.
1615                  *
1616                  * We are including POLLERR here to do a READ event
1617                  * this is because the read should fail and the
1618                  * read function should handle it appropriately
1619                  */
1620                 if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1621                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1622                                                  pfds[i].revents, i);
1623                 }
1624                 if (pfds[i].revents & POLLOUT)
1625                         thread_process_io_helper(m, m->write[pfds[i].fd],
1626                                                  POLLOUT, pfds[i].revents, i);
1627
1628                 /* if one of our file descriptors is garbage, remove the same
1629                  * from
1630                  * both pfds + update sizes and index */
1631                 if (pfds[i].revents & POLLNVAL) {
1632                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1633                                 (m->handler.pfdcount - i - 1)
1634                                         * sizeof(struct pollfd));
1635                         m->handler.pfdcount--;
1636                         m->handler.pfds[m->handler.pfdcount].fd = 0;
1637                         m->handler.pfds[m->handler.pfdcount].events = 0;
1638
1639                         memmove(pfds + i, pfds + i + 1,
1640                                 (m->handler.copycount - i - 1)
1641                                         * sizeof(struct pollfd));
1642                         m->handler.copycount--;
1643                         m->handler.copy[m->handler.copycount].fd = 0;
1644                         m->handler.copy[m->handler.copycount].events = 0;
1645
1646                         i--;
1647                 }
1648         }
1649 }
1650
1651 /* Add all timers that have popped to the ready list. */
1652 static unsigned int thread_process_timers(struct thread_master *m,
1653                                           struct timeval *timenow)
1654 {
1655         struct timeval prev = *timenow;
1656         bool displayed = false;
1657         struct thread *thread;
1658         unsigned int ready = 0;
1659
1660         while ((thread = thread_timer_list_first(&m->timer))) {
1661                 if (timercmp(timenow, &thread->u.sands, <))
1662                         break;
1663                 prev = thread->u.sands;
1664                 prev.tv_sec += 4;
1665                 /*
1666                  * If the timer would have popped 4 seconds in the
1667                  * past then we are in a situation where we are
1668                  * really getting behind on handling of events.
1669                  * Let's log it and do the right thing with it.
1670                  */
1671                 if (!displayed && !thread->ignore_timer_late &&
1672                     timercmp(timenow, &prev, >)) {
1673                         flog_warn(
1674                                 EC_LIB_STARVE_THREAD,
1675                                 "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1676                                 thread);
1677                         displayed = true;
1678                 }
1679
1680                 thread_timer_list_pop(&m->timer);
1681                 thread->type = THREAD_READY;
1682                 thread_list_add_tail(&m->ready, thread);
1683                 ready++;
1684         }
1685
1686         return ready;
1687 }
1688
1689 /* process a list en masse, e.g. for event thread lists */
1690 static unsigned int thread_process(struct thread_list_head *list)
1691 {
1692         struct thread *thread;
1693         unsigned int ready = 0;
1694
1695         while ((thread = thread_list_pop(list))) {
1696                 thread->type = THREAD_READY;
1697                 thread_list_add_tail(&thread->master->ready, thread);
1698                 ready++;
1699         }
1700         return ready;
1701 }
1702
1703
1704 /* Fetch next ready thread. */
1705 struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
1706 {
1707         struct thread *thread = NULL;
1708         struct timeval now;
1709         struct timeval zerotime = {0, 0};
1710         struct timeval tv;
1711         struct timeval *tw = NULL;
1712         bool eintr_p = false;
1713         int num = 0;
1714
1715         do {
1716                 /* Handle signals if any */
1717                 if (m->handle_signals)
1718                         frr_sigevent_process();
1719
1720                 pthread_mutex_lock(&m->mtx);
1721
1722                 /* Process any pending cancellation requests */
1723                 do_thread_cancel(m);
1724
1725                 /*
1726                  * Attempt to flush ready queue before going into poll().
1727                  * This is performance-critical. Think twice before modifying.
1728                  */
1729                 if ((thread = thread_list_pop(&m->ready))) {
1730                         fetch = thread_run(m, thread, fetch);
1731                         if (fetch->ref)
1732                                 *fetch->ref = NULL;
1733                         pthread_mutex_unlock(&m->mtx);
1734                         if (!m->ready_run_loop)
1735                                 GETRUSAGE(&m->last_getrusage);
1736                         m->ready_run_loop = true;
1737                         break;
1738                 }
1739
1740                 m->ready_run_loop = false;
1741                 /* otherwise, tick through scheduling sequence */
1742
1743                 /*
1744                  * Post events to ready queue. This must come before the
1745                  * following block since events should occur immediately
1746                  */
1747                 thread_process(&m->event);
1748
1749                 /*
1750                  * If there are no tasks on the ready queue, we will poll()
1751                  * until a timer expires or we receive I/O, whichever comes
1752                  * first. The strategy for doing this is:
1753                  *
1754                  * - If there are events pending, set the poll() timeout to zero
1755                  * - If there are no events pending, but there are timers
1756                  * pending, set the timeout to the smallest remaining time on
1757                  * any timer.
1758                  * - If there are neither timers nor events pending, but there
1759                  * are file descriptors pending, block indefinitely in poll()
1760                  * - If nothing is pending, it's time for the application to die
1761                  *
1762                  * In every case except the last, we need to hit poll() at least
1763                  * once per loop to avoid starvation by events
1764                  */
1765                 if (!thread_list_count(&m->ready))
1766                         tw = thread_timer_wait(&m->timer, &tv);
1767
1768                 if (thread_list_count(&m->ready) ||
1769                                 (tw && !timercmp(tw, &zerotime, >)))
1770                         tw = &zerotime;
1771
1772                 if (!tw && m->handler.pfdcount == 0) { /* die */
1773                         pthread_mutex_unlock(&m->mtx);
1774                         fetch = NULL;
1775                         break;
1776                 }
1777
1778                 /*
1779                  * Copy pollfd array + # active pollfds in it. Not necessary to
1780                  * copy the array size as this is fixed.
1781                  */
1782                 m->handler.copycount = m->handler.pfdcount;
1783                 memcpy(m->handler.copy, m->handler.pfds,
1784                        m->handler.copycount * sizeof(struct pollfd));
1785
1786                 pthread_mutex_unlock(&m->mtx);
1787                 {
1788                         eintr_p = false;
1789                         num = fd_poll(m, tw, &eintr_p);
1790                 }
1791                 pthread_mutex_lock(&m->mtx);
1792
1793                 /* Handle any errors received in poll() */
1794                 if (num < 0) {
1795                         if (eintr_p) {
1796                                 pthread_mutex_unlock(&m->mtx);
1797                                 /* loop around to signal handler */
1798                                 continue;
1799                         }
1800
1801                         /* else die */
1802                         flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1803                                  safe_strerror(errno));
1804                         pthread_mutex_unlock(&m->mtx);
1805                         fetch = NULL;
1806                         break;
1807                 }
1808
1809                 /* Post timers to ready queue. */
1810                 monotime(&now);
1811                 thread_process_timers(m, &now);
1812
1813                 /* Post I/O to ready queue. */
1814                 if (num > 0)
1815                         thread_process_io(m, num);
1816
1817                 pthread_mutex_unlock(&m->mtx);
1818
1819         } while (!thread && m->spin);
1820
1821         return fetch;
1822 }
1823
1824 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1825 {
1826         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1827                 + (a.tv_usec - b.tv_usec));
1828 }
1829
1830 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1831                                    unsigned long *cputime)
1832 {
1833 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1834         *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1835                    + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1836 #else
1837         /* This is 'user + sys' time.  */
1838         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1839                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1840 #endif
1841         return timeval_elapsed(now->real, start->real);
1842 }
1843
1844 /* We should aim to yield after yield milliseconds, which defaults
1845    to THREAD_YIELD_TIME_SLOT .
1846    Note: we are using real (wall clock) time for this calculation.
1847    It could be argued that CPU time may make more sense in certain
1848    contexts.  The things to consider are whether the thread may have
1849    blocked (in which case wall time increases, but CPU time does not),
1850    or whether the system is heavily loaded with other processes competing
1851    for CPU time.  On balance, wall clock time seems to make sense.
1852    Plus it has the added benefit that gettimeofday should be faster
1853    than calling getrusage. */
1854 int thread_should_yield(struct thread *thread)
1855 {
1856         int result;
1857         frr_with_mutex(&thread->mtx) {
1858                 result = monotime_since(&thread->real, NULL)
1859                          > (int64_t)thread->yield;
1860         }
1861         return result;
1862 }
1863
1864 void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
1865 {
1866         frr_with_mutex(&thread->mtx) {
1867                 thread->yield = yield_time;
1868         }
1869 }
1870
1871 void thread_getrusage(RUSAGE_T *r)
1872 {
1873         monotime(&r->real);
1874         if (!cputime_enabled) {
1875                 memset(&r->cpu, 0, sizeof(r->cpu));
1876                 return;
1877         }
1878
1879 #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1880         /* not currently implemented in Linux's vDSO, but maybe at some point
1881          * in the future?
1882          */
1883         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1884 #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1885 #if defined RUSAGE_THREAD
1886 #define FRR_RUSAGE RUSAGE_THREAD
1887 #else
1888 #define FRR_RUSAGE RUSAGE_SELF
1889 #endif
1890         getrusage(FRR_RUSAGE, &(r->cpu));
1891 #endif
1892 }
1893
1894 /*
1895  * Call a thread.
1896  *
1897  * This function will atomically update the thread's usage history. At present
1898  * this is the only spot where usage history is written. Nevertheless the code
1899  * has been written such that the introduction of writers in the future should
1900  * not need to update it provided the writers atomically perform only the
1901  * operations done here, i.e. updating the total and maximum times. In
1902  * particular, the maximum real and cpu times must be monotonically increasing
1903  * or this code is not correct.
1904  */
1905 void thread_call(struct thread *thread)
1906 {
1907         RUSAGE_T before, after;
1908
1909         /* if the thread being called is the CLI, it may change cputime_enabled
1910          * ("service cputime-stats" command), which can result in nonsensical
1911          * and very confusing warnings
1912          */
1913         bool cputime_enabled_here = cputime_enabled;
1914
1915         if (thread->master->ready_run_loop)
1916                 before = thread->master->last_getrusage;
1917         else
1918                 GETRUSAGE(&before);
1919
1920         thread->real = before.real;
1921
1922         frrtrace(9, frr_libfrr, thread_call, thread->master,
1923                  thread->xref->funcname, thread->xref->xref.file,
1924                  thread->xref->xref.line, NULL, thread->u.fd,
1925                  thread->u.val, thread->arg, thread->u.sands.tv_sec);
1926
1927         pthread_setspecific(thread_current, thread);
1928         (*thread->func)(thread);
1929         pthread_setspecific(thread_current, NULL);
1930
1931         GETRUSAGE(&after);
1932         thread->master->last_getrusage = after;
1933
1934         unsigned long walltime, cputime;
1935         unsigned long exp;
1936
1937         walltime = thread_consumed_time(&after, &before, &cputime);
1938
1939         /* update walltime */
1940         atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1941                                   memory_order_seq_cst);
1942         exp = atomic_load_explicit(&thread->hist->real.max,
1943                                    memory_order_seq_cst);
1944         while (exp < walltime
1945                && !atomic_compare_exchange_weak_explicit(
1946                        &thread->hist->real.max, &exp, walltime,
1947                        memory_order_seq_cst, memory_order_seq_cst))
1948                 ;
1949
1950         if (cputime_enabled_here && cputime_enabled) {
1951                 /* update cputime */
1952                 atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
1953                                           memory_order_seq_cst);
1954                 exp = atomic_load_explicit(&thread->hist->cpu.max,
1955                                            memory_order_seq_cst);
1956                 while (exp < cputime
1957                        && !atomic_compare_exchange_weak_explicit(
1958                                &thread->hist->cpu.max, &exp, cputime,
1959                                memory_order_seq_cst, memory_order_seq_cst))
1960                         ;
1961         }
1962
1963         atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
1964                                   memory_order_seq_cst);
1965         atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
1966                                  memory_order_seq_cst);
1967
1968         if (cputime_enabled_here && cputime_enabled && cputime_threshold
1969             && cputime > cputime_threshold) {
1970                 /*
1971                  * We have a CPU Hog on our hands.  The time FRR has spent
1972                  * doing actual work (not sleeping) is greater than 5 seconds.
1973                  * Whinge about it now, so we're aware this is yet another task
1974                  * to fix.
1975                  */
1976                 atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
1977                                           1, memory_order_seq_cst);
1978                 flog_warn(
1979                         EC_LIB_SLOW_THREAD_CPU,
1980                         "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
1981                         thread->xref->funcname, (unsigned long)thread->func,
1982                         walltime / 1000, cputime / 1000);
1983
1984         } else if (walltime_threshold && walltime > walltime_threshold) {
1985                 /*
1986                  * The runtime for a task is greater than 5 seconds, but the
1987                  * cpu time is under 5 seconds.  Let's whine about this because
1988                  * this could imply some sort of scheduling issue.
1989                  */
1990                 atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
1991                                           1, memory_order_seq_cst);
1992                 flog_warn(
1993                         EC_LIB_SLOW_THREAD_WALL,
1994                         "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
1995                         thread->xref->funcname, (unsigned long)thread->func,
1996                         walltime / 1000, cputime / 1000);
1997         }
1998 }
1999
2000 /* Execute thread */
2001 void _thread_execute(const struct xref_threadsched *xref,
2002                      struct thread_master *m, int (*func)(struct thread *),
2003                      void *arg, int val)
2004 {
2005         struct thread *thread;
2006
2007         /* Get or allocate new thread to execute. */
2008         frr_with_mutex(&m->mtx) {
2009                 thread = thread_get(m, THREAD_EVENT, func, arg, xref);
2010
2011                 /* Set its event value. */
2012                 frr_with_mutex(&thread->mtx) {
2013                         thread->add_type = THREAD_EXECUTE;
2014                         thread->u.val = val;
2015                         thread->ref = &thread;
2016                 }
2017         }
2018
2019         /* Execute thread doing all accounting. */
2020         thread_call(thread);
2021
2022         /* Give back or free thread. */
2023         thread_add_unuse(m, thread);
2024 }
2025
2026 /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2027 void debug_signals(const sigset_t *sigs)
2028 {
2029         int i, found;
2030         sigset_t tmpsigs;
2031         char buf[300];
2032
2033         /*
2034          * We're only looking at the non-realtime signals here, so we need
2035          * some limit value. Platform differences mean at some point we just
2036          * need to pick a reasonable value.
2037          */
2038 #if defined SIGRTMIN
2039 #  define LAST_SIGNAL SIGRTMIN
2040 #else
2041 #  define LAST_SIGNAL 32
2042 #endif
2043
2044
2045         if (sigs == NULL) {
2046                 sigemptyset(&tmpsigs);
2047                 pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2048                 sigs = &tmpsigs;
2049         }
2050
2051         found = 0;
2052         buf[0] = '\0';
2053
2054         for (i = 0; i < LAST_SIGNAL; i++) {
2055                 char tmp[20];
2056
2057                 if (sigismember(sigs, i) > 0) {
2058                         if (found > 0)
2059                                 strlcat(buf, ",", sizeof(buf));
2060                         snprintf(tmp, sizeof(tmp), "%d", i);
2061                         strlcat(buf, tmp, sizeof(buf));
2062                         found++;
2063                 }
2064         }
2065
2066         if (found == 0)
2067                 snprintf(buf, sizeof(buf), "<none>");
2068
2069         zlog_debug("%s: %s", __func__, buf);
2070 }
2071
2072 bool thread_is_scheduled(struct thread *thread)
2073 {
2074         if (thread == NULL)
2075                 return false;
2076
2077         return true;
2078 }
2079
2080 static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2081                                    const struct thread *thread)
2082 {
2083         static const char * const types[] = {
2084                 [THREAD_READ] = "read",
2085                 [THREAD_WRITE] = "write",
2086                 [THREAD_TIMER] = "timer",
2087                 [THREAD_EVENT] = "event",
2088                 [THREAD_READY] = "ready",
2089                 [THREAD_UNUSED] = "unused",
2090                 [THREAD_EXECUTE] = "exec",
2091         };
2092         ssize_t rv = 0;
2093         char info[16] = "";
2094
2095         if (!thread)
2096                 return bputs(buf, "{(thread *)NULL}");
2097
2098         rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2099
2100         if (thread->type < array_size(types) && types[thread->type])
2101                 rv += bprintfrr(buf, " %-6s", types[thread->type]);
2102         else
2103                 rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2104
2105         switch (thread->type) {
2106         case THREAD_READ:
2107         case THREAD_WRITE:
2108                 snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2109                 break;
2110
2111         case THREAD_TIMER:
2112                 snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2113                 break;
2114         }
2115
2116         rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2117                         thread->xref->funcname, thread->xref->dest,
2118                         thread->xref->xref.file, thread->xref->xref.line);
2119         return rv;
2120 }
2121
2122 printfrr_ext_autoreg_p("TH", printfrr_thread);
2123 static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2124                                const void *ptr)
2125 {
2126         const struct thread *thread = ptr;
2127         struct timespec remain = {};
2128
2129         if (ea->fmt[0] == 'D') {
2130                 ea->fmt++;
2131                 return printfrr_thread_dbg(buf, ea, thread);
2132         }
2133
2134         if (!thread) {
2135                 /* need to jump over time formatting flag characters in the
2136                  * input format string, i.e. adjust ea->fmt!
2137                  */
2138                 printfrr_time(buf, ea, &remain,
2139                               TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2140                 return bputch(buf, '-');
2141         }
2142
2143         TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2144         return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2145 }