lib/thread.c

   1 /* Thread management routine
   2  * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
   3  *
   4  * This file is part of GNU Zebra.
   5  *
   6  * GNU Zebra is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation; either version 2, or (at your option) any
   9  * later version.
  10  *
  11  * GNU Zebra is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program; see the file COPYING; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /* #define DEBUG */
  22
  23 #include <zebra.h>
  24 #include <sys/resource.h>
  25
  26 #include "thread.h"
  27 #include "memory.h"
  28 #include "log.h"
  29 #include "hash.h"
  30 #include "pqueue.h"
  31 #include "command.h"
  32 #include "sigevent.h"
  33 #include "network.h"
  34 #include "jhash.h"
  35
  36 DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread")
  37 DEFINE_MTYPE_STATIC(LIB, THREAD_MASTER, "Thread master")
  38 DEFINE_MTYPE_STATIC(LIB, THREAD_STATS, "Thread stats")
  39
  40 #if defined(__APPLE__)
  41 #include <mach/mach.h>
  42 #include <mach/mach_time.h>
  43 #endif
  44
  45 #define AWAKEN(m)                                                              \
  46         do {                                                                   \
  47                 static unsigned char wakebyte = 0x01;                          \
  48                 write(m->io_pipe[1], &wakebyte, 1);                            \
  49         } while (0);
  50
  51 /* control variable for initializer */
  52 pthread_once_t init_once = PTHREAD_ONCE_INIT;
  53 pthread_key_t thread_current;
  54
  55 pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
  56 static struct list *masters;
  57
  58
  59 /* CLI start ---------------------------------------------------------------- */
  60 static unsigned int cpu_record_hash_key(struct cpu_thread_history *a)
  61 {
  62         int size = sizeof (&a->func);
  63
  64         return jhash(&a->func, size, 0);
  65 }
  66
  67 static int cpu_record_hash_cmp(const struct cpu_thread_history *a,
  68                                const struct cpu_thread_history *b)
  69 {
  70         return a->func == b->func;
  71 }
  72
  73 static void *cpu_record_hash_alloc(struct cpu_thread_history *a)
  74 {
  75         struct cpu_thread_history *new;
  76         new = XCALLOC(MTYPE_THREAD_STATS, sizeof(struct cpu_thread_history));
  77         new->func = a->func;
  78         new->funcname = a->funcname;
  79         return new;
  80 }
  81
  82 static void cpu_record_hash_free(void *a)
  83 {
  84         struct cpu_thread_history *hist = a;
  85
  86         XFREE(MTYPE_THREAD_STATS, hist);
  87 }
  88
  89 static void vty_out_cpu_thread_history(struct vty *vty,
  90                                        struct cpu_thread_history *a)
  91 {
  92         vty_out(vty, "%5d %10ld.%03ld %9d %8ld %9ld %8ld %9ld", a->total_active,
  93                 a->cpu.total / 1000, a->cpu.total % 1000, a->total_calls,
  94                 a->cpu.total / a->total_calls, a->cpu.max,
  95                 a->real.total / a->total_calls, a->real.max);
  96         vty_out(vty, " %c%c%c%c%c %s\n",
  97                 a->types & (1 << THREAD_READ) ? 'R' : ' ',
  98                 a->types & (1 << THREAD_WRITE) ? 'W' : ' ',
  99                 a->types & (1 << THREAD_TIMER) ? 'T' : ' ',
 100                 a->types & (1 << THREAD_EVENT) ? 'E' : ' ',
 101                 a->types & (1 << THREAD_EXECUTE) ? 'X' : ' ', a->funcname);
 102 }
 103
 104 static void cpu_record_hash_print(struct hash_backet *bucket, void *args[])
 105 {
 106         struct cpu_thread_history *totals = args[0];
 107         struct vty *vty = args[1];
 108         thread_type *filter = args[2];
 109
 110         struct cpu_thread_history *a = bucket->data;
 111
 112         if (!(a->types & *filter))
 113                 return;
 114         vty_out_cpu_thread_history(vty, a);
 115         totals->total_active += a->total_active;
 116         totals->total_calls += a->total_calls;
 117         totals->real.total += a->real.total;
 118         if (totals->real.max < a->real.max)
 119                 totals->real.max = a->real.max;
 120         totals->cpu.total += a->cpu.total;
 121         if (totals->cpu.max < a->cpu.max)
 122                 totals->cpu.max = a->cpu.max;
 123 }
 124
 125 static void cpu_record_print(struct vty *vty, thread_type filter)
 126 {
 127         struct cpu_thread_history tmp;
 128         void *args[3] = {&tmp, vty, &filter};
 129         struct thread_master *m;
 130         struct listnode *ln;
 131
 132         memset(&tmp, 0, sizeof tmp);
 133         tmp.funcname = "TOTAL";
 134         tmp.types = filter;
 135
 136         pthread_mutex_lock(&masters_mtx);
 137         {
 138                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 139                         const char *name = m->name ? m->name : "main";
 140
 141                         char underline[strlen(name) + 1];
 142                         memset(underline, '-', sizeof(underline));
 143                         underline[sizeof(underline)] = '\0';
 144
 145                         vty_out(vty, "\n");
 146                         vty_out(vty, "Showing statistics for pthread %s\n",
 147                                 name);
 148                         vty_out(vty, "-------------------------------%s\n",
 149                                 underline);
 150                         vty_out(vty, "%21s %18s %18s\n", "",
 151                                 "CPU (user+system):", "Real (wall-clock):");
 152                         vty_out(vty,
 153                                 "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 154                         vty_out(vty, " Avg uSec Max uSecs");
 155                         vty_out(vty, "  Type  Thread\n");
 156
 157                         if (m->cpu_record->count)
 158                                 hash_iterate(
 159                                         m->cpu_record,
 160                                         (void (*)(struct hash_backet *,
 161                                                   void *))cpu_record_hash_print,
 162                                         args);
 163                         else
 164                                 vty_out(vty, "No data to display yet.\n");
 165
 166                         vty_out(vty, "\n");
 167                 }
 168         }
 169         pthread_mutex_unlock(&masters_mtx);
 170
 171         vty_out(vty, "\n");
 172         vty_out(vty, "Total thread statistics\n");
 173         vty_out(vty, "-------------------------\n");
 174         vty_out(vty, "%21s %18s %18s\n", "",
 175                 "CPU (user+system):", "Real (wall-clock):");
 176         vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
 177         vty_out(vty, " Avg uSec Max uSecs");
 178         vty_out(vty, "  Type  Thread\n");
 179
 180         if (tmp.total_calls > 0)
 181                 vty_out_cpu_thread_history(vty, &tmp);
 182 }
 183
 184 static void cpu_record_hash_clear(struct hash_backet *bucket, void *args[])
 185 {
 186         thread_type *filter = args[0];
 187         struct hash *cpu_record = args[1];
 188
 189         struct cpu_thread_history *a = bucket->data;
 190
 191         if (!(a->types & *filter))
 192                 return;
 193
 194         hash_release(cpu_record, bucket->data);
 195 }
 196
 197 static void cpu_record_clear(thread_type filter)
 198 {
 199         thread_type *tmp = &filter;
 200         struct thread_master *m;
 201         struct listnode *ln;
 202
 203         pthread_mutex_lock(&masters_mtx);
 204         {
 205                 for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
 206                         pthread_mutex_lock(&m->mtx);
 207                         {
 208                                 void *args[2] = {tmp, m->cpu_record};
 209                                 hash_iterate(
 210                                         m->cpu_record,
 211                                         (void (*)(struct hash_backet *,
 212                                                   void *))cpu_record_hash_clear,
 213                                         args);
 214                         }
 215                         pthread_mutex_unlock(&m->mtx);
 216                 }
 217         }
 218         pthread_mutex_unlock(&masters_mtx);
 219 }
 220
 221 static thread_type parse_filter(const char *filterstr)
 222 {
 223         int i = 0;
 224         int filter = 0;
 225
 226         while (filterstr[i] != '\0') {
 227                 switch (filterstr[i]) {
 228                 case 'r':
 229                 case 'R':
 230                         filter |= (1 << THREAD_READ);
 231                         break;
 232                 case 'w':
 233                 case 'W':
 234                         filter |= (1 << THREAD_WRITE);
 235                         break;
 236                 case 't':
 237                 case 'T':
 238                         filter |= (1 << THREAD_TIMER);
 239                         break;
 240                 case 'e':
 241                 case 'E':
 242                         filter |= (1 << THREAD_EVENT);
 243                         break;
 244                 case 'x':
 245                 case 'X':
 246                         filter |= (1 << THREAD_EXECUTE);
 247                         break;
 248                 default:
 249                         break;
 250                 }
 251                 ++i;
 252         }
 253         return filter;
 254 }
 255
 256 DEFUN (show_thread_cpu,
 257        show_thread_cpu_cmd,
 258        "show thread cpu [FILTER]",
 259        SHOW_STR
 260        "Thread information\n"
 261        "Thread CPU usage\n"
 262        "Display filter (rwtexb)\n")
 263 {
 264         thread_type filter = (thread_type)-1U;
 265         int idx = 0;
 266
 267         if (argv_find(argv, argc, "FILTER", &idx)) {
 268                 filter = parse_filter(argv[idx]->arg);
 269                 if (!filter) {
 270                         vty_out(vty,
 271                                 "Invalid filter \"%s\" specified; must contain at least"
 272                                 "one of 'RWTEXB'\n",
 273                                 argv[idx]->arg);
 274                         return CMD_WARNING;
 275                 }
 276         }
 277
 278         cpu_record_print(vty, filter);
 279         return CMD_SUCCESS;
 280 }
 281
 282 DEFUN (clear_thread_cpu,
 283        clear_thread_cpu_cmd,
 284        "clear thread cpu [FILTER]",
 285        "Clear stored data in all pthreads\n"
 286        "Thread information\n"
 287        "Thread CPU usage\n"
 288        "Display filter (rwtexb)\n")
 289 {
 290         thread_type filter = (thread_type)-1U;
 291         int idx = 0;
 292
 293         if (argv_find(argv, argc, "FILTER", &idx)) {
 294                 filter = parse_filter(argv[idx]->arg);
 295                 if (!filter) {
 296                         vty_out(vty,
 297                                 "Invalid filter \"%s\" specified; must contain at least"
 298                                 "one of 'RWTEXB'\n",
 299                                 argv[idx]->arg);
 300                         return CMD_WARNING;
 301                 }
 302         }
 303
 304         cpu_record_clear(filter);
 305         return CMD_SUCCESS;
 306 }
 307
 308 void thread_cmd_init(void)
 309 {
 310         install_element(VIEW_NODE, &show_thread_cpu_cmd);
 311         install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
 312 }
 313 /* CLI end ------------------------------------------------------------------ */
 314
 315
 316 static int thread_timer_cmp(void *a, void *b)
 317 {
 318         struct thread *thread_a = a;
 319         struct thread *thread_b = b;
 320
 321         if (timercmp(&thread_a->u.sands, &thread_b->u.sands, <))
 322                 return -1;
 323         if (timercmp(&thread_a->u.sands, &thread_b->u.sands, >))
 324                 return 1;
 325         return 0;
 326 }
 327
 328 static void thread_timer_update(void *node, int actual_position)
 329 {
 330         struct thread *thread = node;
 331
 332         thread->index = actual_position;
 333 }
 334
 335 static void cancelreq_del(void *cr)
 336 {
 337         XFREE(MTYPE_TMP, cr);
 338 }
 339
 340 /* initializer, only ever called once */
 341 static void initializer()
 342 {
 343         pthread_key_create(&thread_current, NULL);
 344 }
 345
 346 /* Allocate new thread master.  */
 347 struct thread_master *thread_master_create(const char *name)
 348 {
 349         struct thread_master *rv;
 350         struct rlimit limit;
 351
 352         pthread_once(&init_once, &initializer);
 353
 354         rv = XCALLOC(MTYPE_THREAD_MASTER, sizeof(struct thread_master));
 355         if (rv == NULL)
 356                 return NULL;
 357
 358         /* Initialize master mutex */
 359         pthread_mutex_init(&rv->mtx, NULL);
 360         pthread_cond_init(&rv->cancel_cond, NULL);
 361
 362         /* Set name */
 363         rv->name = name ? XSTRDUP(MTYPE_THREAD_MASTER, name) : NULL;
 364
 365         /* Initialize I/O task data structures */
 366         getrlimit(RLIMIT_NOFILE, &limit);
 367         rv->fd_limit = (int)limit.rlim_cur;
 368         rv->read =
 369                 XCALLOC(MTYPE_THREAD, sizeof(struct thread *) * rv->fd_limit);
 370         if (rv->read == NULL) {
 371                 XFREE(MTYPE_THREAD_MASTER, rv);
 372                 return NULL;
 373         }
 374         rv->write =
 375                 XCALLOC(MTYPE_THREAD, sizeof(struct thread *) * rv->fd_limit);
 376         if (rv->write == NULL) {
 377                 XFREE(MTYPE_THREAD, rv->read);
 378                 XFREE(MTYPE_THREAD_MASTER, rv);
 379                 return NULL;
 380         }
 381
 382         rv->cpu_record = hash_create_size(
 383                 8,
 384                 (unsigned int (*)(void *))cpu_record_hash_key,
 385                 (int (*)(const void *, const void *))cpu_record_hash_cmp,
 386                 "Thread Hash");
 387
 388
 389         /* Initialize the timer queues */
 390         rv->timer = pqueue_create();
 391         rv->timer->cmp = thread_timer_cmp;
 392         rv->timer->update = thread_timer_update;
 393
 394         /* Initialize thread_fetch() settings */
 395         rv->spin = true;
 396         rv->handle_signals = true;
 397
 398         /* Set pthread owner, should be updated by actual owner */
 399         rv->owner = pthread_self();
 400         rv->cancel_req = list_new();
 401         rv->cancel_req->del = cancelreq_del;
 402         rv->canceled = true;
 403
 404         /* Initialize pipe poker */
 405         pipe(rv->io_pipe);
 406         set_nonblocking(rv->io_pipe[0]);
 407         set_nonblocking(rv->io_pipe[1]);
 408
 409         /* Initialize data structures for poll() */
 410         rv->handler.pfdsize = rv->fd_limit;
 411         rv->handler.pfdcount = 0;
 412         rv->handler.pfds = XCALLOC(MTYPE_THREAD_MASTER,
 413                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 414         rv->handler.copy = XCALLOC(MTYPE_THREAD_MASTER,
 415                                    sizeof(struct pollfd) * rv->handler.pfdsize);
 416
 417         /* add to list of threadmasters */
 418         pthread_mutex_lock(&masters_mtx);
 419         {
 420                 if (!masters)
 421                         masters = list_new();
 422
 423                 listnode_add(masters, rv);
 424         }
 425         pthread_mutex_unlock(&masters_mtx);
 426
 427         return rv;
 428 }
 429
 430 /* Add a new thread to the list.  */
 431 static void thread_list_add(struct thread_list *list, struct thread *thread)
 432 {
 433         thread->next = NULL;
 434         thread->prev = list->tail;
 435         if (list->tail)
 436                 list->tail->next = thread;
 437         else
 438                 list->head = thread;
 439         list->tail = thread;
 440         list->count++;
 441 }
 442
 443 /* Delete a thread from the list. */
 444 static struct thread *thread_list_delete(struct thread_list *list,
 445                                          struct thread *thread)
 446 {
 447         if (thread->next)
 448                 thread->next->prev = thread->prev;
 449         else
 450                 list->tail = thread->prev;
 451         if (thread->prev)
 452                 thread->prev->next = thread->next;
 453         else
 454                 list->head = thread->next;
 455         thread->next = thread->prev = NULL;
 456         list->count--;
 457         return thread;
 458 }
 459
 460 /* Thread list is empty or not.  */
 461 static int thread_empty(struct thread_list *list)
 462 {
 463         return list->head ? 0 : 1;
 464 }
 465
 466 /* Delete top of the list and return it. */
 467 static struct thread *thread_trim_head(struct thread_list *list)
 468 {
 469         if (!thread_empty(list))
 470                 return thread_list_delete(list, list->head);
 471         return NULL;
 472 }
 473
 474 /* Move thread to unuse list. */
 475 static void thread_add_unuse(struct thread_master *m, struct thread *thread)
 476 {
 477         assert(m != NULL && thread != NULL);
 478         assert(thread->next == NULL);
 479         assert(thread->prev == NULL);
 480         thread->ref = NULL;
 481
 482         thread->type = THREAD_UNUSED;
 483         thread->hist->total_active--;
 484         thread_list_add(&m->unuse, thread);
 485 }
 486
 487 /* Free all unused thread. */
 488 static void thread_list_free(struct thread_master *m, struct thread_list *list)
 489 {
 490         struct thread *t;
 491         struct thread *next;
 492
 493         for (t = list->head; t; t = next) {
 494                 next = t->next;
 495                 XFREE(MTYPE_THREAD, t);
 496                 list->count--;
 497                 m->alloc--;
 498         }
 499 }
 500
 501 static void thread_array_free(struct thread_master *m,
 502                               struct thread **thread_array)
 503 {
 504         struct thread *t;
 505         int index;
 506
 507         for (index = 0; index < m->fd_limit; ++index) {
 508                 t = thread_array[index];
 509                 if (t) {
 510                         thread_array[index] = NULL;
 511                         XFREE(MTYPE_THREAD, t);
 512                         m->alloc--;
 513                 }
 514         }
 515         XFREE(MTYPE_THREAD, thread_array);
 516 }
 517
 518 static void thread_queue_free(struct thread_master *m, struct pqueue *queue)
 519 {
 520         int i;
 521
 522         for (i = 0; i < queue->size; i++)
 523                 XFREE(MTYPE_THREAD, queue->array[i]);
 524
 525         m->alloc -= queue->size;
 526         pqueue_delete(queue);
 527 }
 528
 529 /*
 530  * thread_master_free_unused
 531  *
 532  * As threads are finished with they are put on the
 533  * unuse list for later reuse.
 534  * If we are shutting down, Free up unused threads
 535  * So we can see if we forget to shut anything off
 536  */
 537 void thread_master_free_unused(struct thread_master *m)
 538 {
 539         pthread_mutex_lock(&m->mtx);
 540         {
 541                 struct thread *t;
 542                 while ((t = thread_trim_head(&m->unuse)) != NULL) {
 543                         pthread_mutex_destroy(&t->mtx);
 544                         XFREE(MTYPE_THREAD, t);
 545                 }
 546         }
 547         pthread_mutex_unlock(&m->mtx);
 548 }
 549
 550 /* Stop thread scheduler. */
 551 void thread_master_free(struct thread_master *m)
 552 {
 553         pthread_mutex_lock(&masters_mtx);
 554         {
 555                 listnode_delete(masters, m);
 556                 if (masters->count == 0) {
 557                         list_delete_and_null(&masters);
 558                 }
 559         }
 560         pthread_mutex_unlock(&masters_mtx);
 561
 562         thread_array_free(m, m->read);
 563         thread_array_free(m, m->write);
 564         thread_queue_free(m, m->timer);
 565         thread_list_free(m, &m->event);
 566         thread_list_free(m, &m->ready);
 567         thread_list_free(m, &m->unuse);
 568         pthread_mutex_destroy(&m->mtx);
 569         pthread_cond_destroy(&m->cancel_cond);
 570         close(m->io_pipe[0]);
 571         close(m->io_pipe[1]);
 572         list_delete_and_null(&m->cancel_req);
 573         m->cancel_req = NULL;
 574
 575         hash_clean(m->cpu_record, cpu_record_hash_free);
 576         hash_free(m->cpu_record);
 577         m->cpu_record = NULL;
 578
 579         if (m->name)
 580                 XFREE(MTYPE_THREAD_MASTER, m->name);
 581         XFREE(MTYPE_THREAD_MASTER, m->handler.pfds);
 582         XFREE(MTYPE_THREAD_MASTER, m->handler.copy);
 583         XFREE(MTYPE_THREAD_MASTER, m);
 584 }
 585
 586 /* Return remain time in second. */
 587 unsigned long thread_timer_remain_second(struct thread *thread)
 588 {
 589         int64_t remain;
 590
 591         pthread_mutex_lock(&thread->mtx);
 592         {
 593                 remain = monotime_until(&thread->u.sands, NULL) / 1000000LL;
 594         }
 595         pthread_mutex_unlock(&thread->mtx);
 596
 597         return remain < 0 ? 0 : remain;
 598 }
 599
 600 #define debugargdef  const char *funcname, const char *schedfrom, int fromln
 601 #define debugargpass funcname, schedfrom, fromln
 602
 603 struct timeval thread_timer_remain(struct thread *thread)
 604 {
 605         struct timeval remain;
 606         pthread_mutex_lock(&thread->mtx);
 607         {
 608                 monotime_until(&thread->u.sands, &remain);
 609         }
 610         pthread_mutex_unlock(&thread->mtx);
 611         return remain;
 612 }
 613
 614 /* Get new thread.  */
 615 static struct thread *thread_get(struct thread_master *m, u_char type,
 616                                  int (*func)(struct thread *), void *arg,
 617                                  debugargdef)
 618 {
 619         struct thread *thread = thread_trim_head(&m->unuse);
 620         struct cpu_thread_history tmp;
 621
 622         if (!thread) {
 623                 thread = XCALLOC(MTYPE_THREAD, sizeof(struct thread));
 624                 /* mutex only needs to be initialized at struct creation. */
 625                 pthread_mutex_init(&thread->mtx, NULL);
 626                 m->alloc++;
 627         }
 628
 629         thread->type = type;
 630         thread->add_type = type;
 631         thread->master = m;
 632         thread->arg = arg;
 633         thread->index = -1;
 634         thread->yield = THREAD_YIELD_TIME_SLOT; /* default */
 635         thread->ref = NULL;
 636
 637         /*
 638          * So if the passed in funcname is not what we have
 639          * stored that means the thread->hist needs to be
 640          * updated.  We keep the last one around in unused
 641          * under the assumption that we are probably
 642          * going to immediately allocate the same
 643          * type of thread.
 644          * This hopefully saves us some serious
 645          * hash_get lookups.
 646          */
 647         if (thread->funcname != funcname || thread->func != func) {
 648                 tmp.func = func;
 649                 tmp.funcname = funcname;
 650                 thread->hist =
 651                         hash_get(m->cpu_record, &tmp,
 652                                  (void *(*)(void *))cpu_record_hash_alloc);
 653         }
 654         thread->hist->total_active++;
 655         thread->func = func;
 656         thread->funcname = funcname;
 657         thread->schedfrom = schedfrom;
 658         thread->schedfrom_line = fromln;
 659
 660         return thread;
 661 }
 662
 663 static int fd_poll(struct thread_master *m, struct pollfd *pfds, nfds_t pfdsize,
 664                    nfds_t count, const struct timeval *timer_wait)
 665 {
 666         /* If timer_wait is null here, that means poll() should block
 667          * indefinitely,
 668          * unless the thread_master has overriden it by setting
 669          * ->selectpoll_timeout.
 670          * If the value is positive, it specifies the maximum number of
 671          * milliseconds
 672          * to wait. If the timeout is -1, it specifies that we should never wait
 673          * and
 674          * always return immediately even if no event is detected. If the value
 675          * is
 676          * zero, the behavior is default. */
 677         int timeout = -1;
 678
 679         /* number of file descriptors with events */
 680         int num;
 681
 682         if (timer_wait != NULL
 683             && m->selectpoll_timeout == 0) // use the default value
 684                 timeout = (timer_wait->tv_sec * 1000)
 685                           + (timer_wait->tv_usec / 1000);
 686         else if (m->selectpoll_timeout > 0) // use the user's timeout
 687                 timeout = m->selectpoll_timeout;
 688         else if (m->selectpoll_timeout
 689                  < 0) // effect a poll (return immediately)
 690                 timeout = 0;
 691
 692         /* add poll pipe poker */
 693         assert(count + 1 < pfdsize);
 694         pfds[count].fd = m->io_pipe[0];
 695         pfds[count].events = POLLIN;
 696         pfds[count].revents = 0x00;
 697
 698         num = poll(pfds, count + 1, timeout);
 699
 700         unsigned char trash[64];
 701         if (num > 0 && pfds[count].revents != 0 && num--)
 702                 while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
 703                         ;
 704
 705         return num;
 706 }
 707
 708 /* Add new read thread. */
 709 struct thread *funcname_thread_add_read_write(int dir, struct thread_master *m,
 710                                               int (*func)(struct thread *),
 711                                               void *arg, int fd,
 712                                               struct thread **t_ptr,
 713                                               debugargdef)
 714 {
 715         struct thread *thread = NULL;
 716
 717         pthread_mutex_lock(&m->mtx);
 718         {
 719                 if (t_ptr
 720                     && *t_ptr) // thread is already scheduled; don't reschedule
 721                 {
 722                         pthread_mutex_unlock(&m->mtx);
 723                         return NULL;
 724                 }
 725
 726                 /* default to a new pollfd */
 727                 nfds_t queuepos = m->handler.pfdcount;
 728
 729                 /* if we already have a pollfd for our file descriptor, find and
 730                  * use it */
 731                 for (nfds_t i = 0; i < m->handler.pfdcount; i++)
 732                         if (m->handler.pfds[i].fd == fd) {
 733                                 queuepos = i;
 734                                 break;
 735                         }
 736
 737                 /* make sure we have room for this fd + pipe poker fd */
 738                 assert(queuepos + 1 < m->handler.pfdsize);
 739
 740                 thread = thread_get(m, dir, func, arg, debugargpass);
 741
 742                 m->handler.pfds[queuepos].fd = fd;
 743                 m->handler.pfds[queuepos].events |=
 744                         (dir == THREAD_READ ? POLLIN : POLLOUT);
 745
 746                 if (queuepos == m->handler.pfdcount)
 747                         m->handler.pfdcount++;
 748
 749                 if (thread) {
 750                         pthread_mutex_lock(&thread->mtx);
 751                         {
 752                                 thread->u.fd = fd;
 753                                 if (dir == THREAD_READ)
 754                                         m->read[thread->u.fd] = thread;
 755                                 else
 756                                         m->write[thread->u.fd] = thread;
 757                         }
 758                         pthread_mutex_unlock(&thread->mtx);
 759
 760                         if (t_ptr) {
 761                                 *t_ptr = thread;
 762                                 thread->ref = t_ptr;
 763                         }
 764                 }
 765
 766                 AWAKEN(m);
 767         }
 768         pthread_mutex_unlock(&m->mtx);
 769
 770         return thread;
 771 }
 772
 773 static struct thread *
 774 funcname_thread_add_timer_timeval(struct thread_master *m,
 775                                   int (*func)(struct thread *), int type,
 776                                   void *arg, struct timeval *time_relative,
 777                                   struct thread **t_ptr, debugargdef)
 778 {
 779         struct thread *thread;
 780         struct pqueue *queue;
 781
 782         assert(m != NULL);
 783
 784         assert(type == THREAD_TIMER);
 785         assert(time_relative);
 786
 787         pthread_mutex_lock(&m->mtx);
 788         {
 789                 if (t_ptr
 790                     && *t_ptr) // thread is already scheduled; don't reschedule
 791                 {
 792                         pthread_mutex_unlock(&m->mtx);
 793                         return NULL;
 794                 }
 795
 796                 queue = m->timer;
 797                 thread = thread_get(m, type, func, arg, debugargpass);
 798
 799                 pthread_mutex_lock(&thread->mtx);
 800                 {
 801                         monotime(&thread->u.sands);
 802                         timeradd(&thread->u.sands, time_relative,
 803                                  &thread->u.sands);
 804                         pqueue_enqueue(thread, queue);
 805                         if (t_ptr) {
 806                                 *t_ptr = thread;
 807                                 thread->ref = t_ptr;
 808                         }
 809                 }
 810                 pthread_mutex_unlock(&thread->mtx);
 811
 812                 AWAKEN(m);
 813         }
 814         pthread_mutex_unlock(&m->mtx);
 815
 816         return thread;
 817 }
 818
 819
 820 /* Add timer event thread. */
 821 struct thread *funcname_thread_add_timer(struct thread_master *m,
 822                                          int (*func)(struct thread *),
 823                                          void *arg, long timer,
 824                                          struct thread **t_ptr, debugargdef)
 825 {
 826         struct timeval trel;
 827
 828         assert(m != NULL);
 829
 830         trel.tv_sec = timer;
 831         trel.tv_usec = 0;
 832
 833         return funcname_thread_add_timer_timeval(m, func, THREAD_TIMER, arg,
 834                                                  &trel, t_ptr, debugargpass);
 835 }
 836
 837 /* Add timer event thread with "millisecond" resolution */
 838 struct thread *funcname_thread_add_timer_msec(struct thread_master *m,
 839                                               int (*func)(struct thread *),
 840                                               void *arg, long timer,
 841                                               struct thread **t_ptr,
 842                                               debugargdef)
 843 {
 844         struct timeval trel;
 845
 846         assert(m != NULL);
 847
 848         trel.tv_sec = timer / 1000;
 849         trel.tv_usec = 1000 * (timer % 1000);
 850
 851         return funcname_thread_add_timer_timeval(m, func, THREAD_TIMER, arg,
 852                                                  &trel, t_ptr, debugargpass);
 853 }
 854
 855 /* Add timer event thread with "millisecond" resolution */
 856 struct thread *funcname_thread_add_timer_tv(struct thread_master *m,
 857                                             int (*func)(struct thread *),
 858                                             void *arg, struct timeval *tv,
 859                                             struct thread **t_ptr, debugargdef)
 860 {
 861         return funcname_thread_add_timer_timeval(m, func, THREAD_TIMER, arg, tv,
 862                                                  t_ptr, debugargpass);
 863 }
 864
 865 /* Add simple event thread. */
 866 struct thread *funcname_thread_add_event(struct thread_master *m,
 867                                          int (*func)(struct thread *),
 868                                          void *arg, int val,
 869                                          struct thread **t_ptr, debugargdef)
 870 {
 871         struct thread *thread;
 872
 873         assert(m != NULL);
 874
 875         pthread_mutex_lock(&m->mtx);
 876         {
 877                 if (t_ptr
 878                     && *t_ptr) // thread is already scheduled; don't reschedule
 879                 {
 880                         pthread_mutex_unlock(&m->mtx);
 881                         return NULL;
 882                 }
 883
 884                 thread = thread_get(m, THREAD_EVENT, func, arg, debugargpass);
 885                 pthread_mutex_lock(&thread->mtx);
 886                 {
 887                         thread->u.val = val;
 888                         thread_list_add(&m->event, thread);
 889                 }
 890                 pthread_mutex_unlock(&thread->mtx);
 891
 892                 if (t_ptr) {
 893                         *t_ptr = thread;
 894                         thread->ref = t_ptr;
 895                 }
 896
 897                 AWAKEN(m);
 898         }
 899         pthread_mutex_unlock(&m->mtx);
 900
 901         return thread;
 902 }
 903
 904 /* Thread cancellation ------------------------------------------------------ */
 905
 906 /**
 907  * NOT's out the .events field of pollfd corresponding to the given file
 908  * descriptor. The event to be NOT'd is passed in the 'state' parameter.
 909  *
 910  * This needs to happen for both copies of pollfd's. See 'thread_fetch'
 911  * implementation for details.
 912  *
 913  * @param master
 914  * @param fd
 915  * @param state the event to cancel. One or more (OR'd together) of the
 916  * following:
 917  *   - POLLIN
 918  *   - POLLOUT
 919  */
 920 static void thread_cancel_rw(struct thread_master *master, int fd, short state)
 921 {
 922         /* Cancel POLLHUP too just in case some bozo set it */
 923         state |= POLLHUP;
 924
 925         /* find the index of corresponding pollfd */
 926         nfds_t i;
 927
 928         for (i = 0; i < master->handler.pfdcount; i++)
 929                 if (master->handler.pfds[i].fd == fd)
 930                         break;
 931
 932         /* NOT out event. */
 933         master->handler.pfds[i].events &= ~(state);
 934
 935         /* If all events are canceled, delete / resize the pollfd array. */
 936         if (master->handler.pfds[i].events == 0) {
 937                 memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
 938                         (master->handler.pfdcount - i - 1)
 939                                 * sizeof(struct pollfd));
 940                 master->handler.pfdcount--;
 941         }
 942
 943         /* If we have the same pollfd in the copy, perform the same operations,
 944          * otherwise return. */
 945         if (i >= master->handler.copycount)
 946                 return;
 947
 948         master->handler.copy[i].events &= ~(state);
 949
 950         if (master->handler.copy[i].events == 0) {
 951                 memmove(master->handler.copy + i, master->handler.copy + i + 1,
 952                         (master->handler.copycount - i - 1)
 953                                 * sizeof(struct pollfd));
 954                 master->handler.copycount--;
 955         }
 956 }
 957
 958 /**
 959  * Process cancellation requests.
 960  *
 961  * This may only be run from the pthread which owns the thread_master.
 962  *
 963  * @param master the thread master to process
 964  * @REQUIRE master->mtx
 965  */
 966 static void do_thread_cancel(struct thread_master *master)
 967 {
 968         struct thread_list *list = NULL;
 969         struct pqueue *queue = NULL;
 970         struct thread **thread_array = NULL;
 971         struct thread *thread;
 972
 973         struct cancel_req *cr;
 974         struct listnode *ln;
 975         for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
 976                 /* If this is an event object cancellation, linear search
 977                  * through event
 978                  * list deleting any events which have the specified argument.
 979                  * We also
 980                  * need to check every thread in the ready queue. */
 981                 if (cr->eventobj) {
 982                         struct thread *t;
 983                         thread = master->event.head;
 984
 985                         while (thread) {
 986                                 t = thread;
 987                                 thread = t->next;
 988
 989                                 if (t->arg == cr->eventobj) {
 990                                         thread_list_delete(&master->event, t);
 991                                         if (t->ref)
 992                                                 *t->ref = NULL;
 993                                         thread_add_unuse(master, t);
 994                                 }
 995                         }
 996
 997                         thread = master->ready.head;
 998                         while (thread) {
 999                                 t = thread;
1000                                 thread = t->next;
1001
1002                                 if (t->arg == cr->eventobj) {
1003                                         thread_list_delete(&master->ready, t);
1004                                         if (t->ref)
1005                                                 *t->ref = NULL;
1006                                         thread_add_unuse(master, t);
1007                                 }
1008                         }
1009                         continue;
1010                 }
1011
1012                 /* The pointer varies depending on whether the cancellation
1013                  * request was
1014                  * made asynchronously or not. If it was, we need to check
1015                  * whether the
1016                  * thread even exists anymore before cancelling it. */
1017                 thread = (cr->thread) ? cr->thread : *cr->threadref;
1018
1019                 if (!thread)
1020                         continue;
1021
1022                 /* Determine the appropriate queue to cancel the thread from */
1023                 switch (thread->type) {
1024                 case THREAD_READ:
1025                         thread_cancel_rw(master, thread->u.fd, POLLIN);
1026                         thread_array = master->read;
1027                         break;
1028                 case THREAD_WRITE:
1029                         thread_cancel_rw(master, thread->u.fd, POLLOUT);
1030                         thread_array = master->write;
1031                         break;
1032                 case THREAD_TIMER:
1033                         queue = master->timer;
1034                         break;
1035                 case THREAD_EVENT:
1036                         list = &master->event;
1037                         break;
1038                 case THREAD_READY:
1039                         list = &master->ready;
1040                         break;
1041                 default:
1042                         continue;
1043                         break;
1044                 }
1045
1046                 if (queue) {
1047                         assert(thread->index >= 0);
1048                         pqueue_remove(thread, queue);
1049                 } else if (list) {
1050                         thread_list_delete(list, thread);
1051                 } else if (thread_array) {
1052                         thread_array[thread->u.fd] = NULL;
1053                 } else {
1054                         assert(!"Thread should be either in queue or list or array!");
1055                 }
1056
1057                 if (thread->ref)
1058                         *thread->ref = NULL;
1059
1060                 thread_add_unuse(thread->master, thread);
1061         }
1062
1063         /* Delete and free all cancellation requests */
1064         list_delete_all_node(master->cancel_req);
1065
1066         /* Wake up any threads which may be blocked in thread_cancel_async() */
1067         master->canceled = true;
1068         pthread_cond_broadcast(&master->cancel_cond);
1069 }
1070
1071 /**
1072  * Cancel any events which have the specified argument.
1073  *
1074  * MT-Unsafe
1075  *
1076  * @param m the thread_master to cancel from
1077  * @param arg the argument passed when creating the event
1078  */
1079 void thread_cancel_event(struct thread_master *master, void *arg)
1080 {
1081         assert(master->owner == pthread_self());
1082
1083         pthread_mutex_lock(&master->mtx);
1084         {
1085                 struct cancel_req *cr =
1086                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1087                 cr->eventobj = arg;
1088                 listnode_add(master->cancel_req, cr);
1089                 do_thread_cancel(master);
1090         }
1091         pthread_mutex_unlock(&master->mtx);
1092 }
1093
1094 /**
1095  * Cancel a specific task.
1096  *
1097  * MT-Unsafe
1098  *
1099  * @param thread task to cancel
1100  */
1101 void thread_cancel(struct thread *thread)
1102 {
1103         assert(thread->master->owner == pthread_self());
1104
1105         pthread_mutex_lock(&thread->master->mtx);
1106         {
1107                 struct cancel_req *cr =
1108                         XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1109                 cr->thread = thread;
1110                 listnode_add(thread->master->cancel_req, cr);
1111                 do_thread_cancel(thread->master);
1112         }
1113         pthread_mutex_unlock(&thread->master->mtx);
1114 }
1115
1116 /**
1117  * Asynchronous cancellation.
1118  *
1119  * Called with either a struct thread ** or void * to an event argument,
1120  * this function posts the correct cancellation request and blocks until it is
1121  * serviced.
1122  *
1123  * If the thread is currently running, execution blocks until it completes.
1124  *
1125  * The last two parameters are mutually exclusive, i.e. if you pass one the
1126  * other must be NULL.
1127  *
1128  * When the cancellation procedure executes on the target thread_master, the
1129  * thread * provided is checked for nullity. If it is null, the thread is
1130  * assumed to no longer exist and the cancellation request is a no-op. Thus
1131  * users of this API must pass a back-reference when scheduling the original
1132  * task.
1133  *
1134  * MT-Safe
1135  *
1136  * @param master the thread master with the relevant event / task
1137  * @param thread pointer to thread to cancel
1138  * @param eventobj the event
1139  */
1140 void thread_cancel_async(struct thread_master *master, struct thread **thread,
1141                          void *eventobj)
1142 {
1143         assert(!(thread && eventobj) && (thread || eventobj));
1144         assert(master->owner != pthread_self());
1145
1146         pthread_mutex_lock(&master->mtx);
1147         {
1148                 master->canceled = false;
1149
1150                 if (thread) {
1151                         struct cancel_req *cr =
1152                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1153                         cr->threadref = thread;
1154                         listnode_add(master->cancel_req, cr);
1155                 } else if (eventobj) {
1156                         struct cancel_req *cr =
1157                                 XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1158                         cr->eventobj = eventobj;
1159                         listnode_add(master->cancel_req, cr);
1160                 }
1161                 AWAKEN(master);
1162
1163                 while (!master->canceled)
1164                         pthread_cond_wait(&master->cancel_cond, &master->mtx);
1165         }
1166         pthread_mutex_unlock(&master->mtx);
1167 }
1168 /* ------------------------------------------------------------------------- */
1169
1170 static struct timeval *thread_timer_wait(struct pqueue *queue,
1171                                          struct timeval *timer_val)
1172 {
1173         if (queue->size) {
1174                 struct thread *next_timer = queue->array[0];
1175                 monotime_until(&next_timer->u.sands, timer_val);
1176                 return timer_val;
1177         }
1178         return NULL;
1179 }
1180
1181 static struct thread *thread_run(struct thread_master *m, struct thread *thread,
1182                                  struct thread *fetch)
1183 {
1184         *fetch = *thread;
1185         thread_add_unuse(m, thread);
1186         return fetch;
1187 }
1188
1189 static int thread_process_io_helper(struct thread_master *m,
1190                                     struct thread *thread, short state, int pos)
1191 {
1192         struct thread **thread_array;
1193
1194         if (!thread)
1195                 return 0;
1196
1197         if (thread->type == THREAD_READ)
1198                 thread_array = m->read;
1199         else
1200                 thread_array = m->write;
1201
1202         thread_array[thread->u.fd] = NULL;
1203         thread_list_add(&m->ready, thread);
1204         thread->type = THREAD_READY;
1205         /* if another pthread scheduled this file descriptor for the event we're
1206          * responding to, no problem; we're getting to it now */
1207         thread->master->handler.pfds[pos].events &= ~(state);
1208         return 1;
1209 }
1210
1211 /**
1212  * Process I/O events.
1213  *
1214  * Walks through file descriptor array looking for those pollfds whose .revents
1215  * field has something interesting. Deletes any invalid file descriptors.
1216  *
1217  * @param m the thread master
1218  * @param num the number of active file descriptors (return value of poll())
1219  */
1220 static void thread_process_io(struct thread_master *m, unsigned int num)
1221 {
1222         unsigned int ready = 0;
1223         struct pollfd *pfds = m->handler.copy;
1224
1225         for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1226                 /* no event for current fd? immediately continue */
1227                 if (pfds[i].revents == 0)
1228                         continue;
1229
1230                 ready++;
1231
1232                 /* Unless someone has called thread_cancel from another pthread,
1233                  * the only
1234                  * thing that could have changed in m->handler.pfds while we
1235                  * were
1236                  * asleep is the .events field in a given pollfd. Barring
1237                  * thread_cancel()
1238                  * that value should be a superset of the values we have in our
1239                  * copy, so
1240                  * there's no need to update it. Similarily, barring deletion,
1241                  * the fd
1242                  * should still be a valid index into the master's pfds. */
1243                 if (pfds[i].revents & (POLLIN | POLLHUP))
1244                         thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1245                                                  i);
1246                 if (pfds[i].revents & POLLOUT)
1247                         thread_process_io_helper(m, m->write[pfds[i].fd],
1248                                                  POLLOUT, i);
1249
1250                 /* if one of our file descriptors is garbage, remove the same
1251                  * from
1252                  * both pfds + update sizes and index */
1253                 if (pfds[i].revents & POLLNVAL) {
1254                         memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1255                                 (m->handler.pfdcount - i - 1)
1256                                         * sizeof(struct pollfd));
1257                         m->handler.pfdcount--;
1258
1259                         memmove(pfds + i, pfds + i + 1,
1260                                 (m->handler.copycount - i - 1)
1261                                         * sizeof(struct pollfd));
1262                         m->handler.copycount--;
1263
1264                         i--;
1265                 }
1266         }
1267 }
1268
1269 /* Add all timers that have popped to the ready list. */
1270 static unsigned int thread_process_timers(struct pqueue *queue,
1271                                           struct timeval *timenow)
1272 {
1273         struct thread *thread;
1274         unsigned int ready = 0;
1275
1276         while (queue->size) {
1277                 thread = queue->array[0];
1278                 if (timercmp(timenow, &thread->u.sands, <))
1279                         return ready;
1280                 pqueue_dequeue(queue);
1281                 thread->type = THREAD_READY;
1282                 thread_list_add(&thread->master->ready, thread);
1283                 ready++;
1284         }
1285         return ready;
1286 }
1287
1288 /* process a list en masse, e.g. for event thread lists */
1289 static unsigned int thread_process(struct thread_list *list)
1290 {
1291         struct thread *thread;
1292         struct thread *next;
1293         unsigned int ready = 0;
1294
1295         for (thread = list->head; thread; thread = next) {
1296                 next = thread->next;
1297                 thread_list_delete(list, thread);
1298                 thread->type = THREAD_READY;
1299                 thread_list_add(&thread->master->ready, thread);
1300                 ready++;
1301         }
1302         return ready;
1303 }
1304
1305
1306 /* Fetch next ready thread. */
1307 struct thread *thread_fetch(struct thread_master *m, struct thread *fetch)
1308 {
1309         struct thread *thread = NULL;
1310         struct timeval now;
1311         struct timeval zerotime = {0, 0};
1312         struct timeval tv;
1313         struct timeval *tw = NULL;
1314
1315         int num = 0;
1316
1317         do {
1318                 /* Handle signals if any */
1319                 if (m->handle_signals)
1320                         quagga_sigevent_process();
1321
1322                 pthread_mutex_lock(&m->mtx);
1323
1324                 /* Process any pending cancellation requests */
1325                 do_thread_cancel(m);
1326
1327                 /*
1328                  * Attempt to flush ready queue before going into poll().
1329                  * This is performance-critical. Think twice before modifying.
1330                  */
1331                 if ((thread = thread_trim_head(&m->ready))) {
1332                         fetch = thread_run(m, thread, fetch);
1333                         if (fetch->ref)
1334                                 *fetch->ref = NULL;
1335                         pthread_mutex_unlock(&m->mtx);
1336                         break;
1337                 }
1338
1339                 /* otherwise, tick through scheduling sequence */
1340
1341                 /*
1342                  * Post events to ready queue. This must come before the
1343                  * following block since events should occur immediately
1344                  */
1345                 thread_process(&m->event);
1346
1347                 /*
1348                  * If there are no tasks on the ready queue, we will poll()
1349                  * until a timer expires or we receive I/O, whichever comes
1350                  * first. The strategy for doing this is:
1351                  *
1352                  * - If there are events pending, set the poll() timeout to zero
1353                  * - If there are no events pending, but there are timers
1354                  * pending, set the
1355                  *   timeout to the smallest remaining time on any timer
1356                  * - If there are neither timers nor events pending, but there
1357                  * are file
1358                  *   descriptors pending, block indefinitely in poll()
1359                  * - If nothing is pending, it's time for the application to die
1360                  *
1361                  * In every case except the last, we need to hit poll() at least
1362                  * once per loop to avoid starvation by events
1363                  */
1364                 if (m->ready.count == 0)
1365                         tw = thread_timer_wait(m->timer, &tv);
1366
1367                 if (m->ready.count != 0 || (tw && !timercmp(tw, &zerotime, >)))
1368                         tw = &zerotime;
1369
1370                 if (!tw && m->handler.pfdcount == 0) { /* die */
1371                         pthread_mutex_unlock(&m->mtx);
1372                         fetch = NULL;
1373                         break;
1374                 }
1375
1376                 /*
1377                  * Copy pollfd array + # active pollfds in it. Not necessary to
1378                  * copy the array size as this is fixed.
1379                  */
1380                 m->handler.copycount = m->handler.pfdcount;
1381                 memcpy(m->handler.copy, m->handler.pfds,
1382                        m->handler.copycount * sizeof(struct pollfd));
1383
1384                 pthread_mutex_unlock(&m->mtx);
1385                 {
1386                         num = fd_poll(m, m->handler.copy, m->handler.pfdsize,
1387                                       m->handler.copycount, tw);
1388                 }
1389                 pthread_mutex_lock(&m->mtx);
1390
1391                 /* Handle any errors received in poll() */
1392                 if (num < 0) {
1393                         if (errno == EINTR) {
1394                                 pthread_mutex_unlock(&m->mtx);
1395                                 /* loop around to signal handler */
1396                                 continue;
1397                         }
1398
1399                         /* else die */
1400                         zlog_warn("poll() error: %s", safe_strerror(errno));
1401                         pthread_mutex_unlock(&m->mtx);
1402                         fetch = NULL;
1403                         break;
1404                 }
1405
1406                 /* Post timers to ready queue. */
1407                 monotime(&now);
1408                 thread_process_timers(m->timer, &now);
1409
1410                 /* Post I/O to ready queue. */
1411                 if (num > 0)
1412                         thread_process_io(m, num);
1413
1414                 pthread_mutex_unlock(&m->mtx);
1415
1416         } while (!thread && m->spin);
1417
1418         return fetch;
1419 }
1420
1421 static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1422 {
1423         return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1424                 + (a.tv_usec - b.tv_usec));
1425 }
1426
1427 unsigned long thread_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1428                                    unsigned long *cputime)
1429 {
1430         /* This is 'user + sys' time.  */
1431         *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1432                    + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1433         return timeval_elapsed(now->real, start->real);
1434 }
1435
1436 /* We should aim to yield after yield milliseconds, which defaults
1437    to THREAD_YIELD_TIME_SLOT .
1438    Note: we are using real (wall clock) time for this calculation.
1439    It could be argued that CPU time may make more sense in certain
1440    contexts.  The things to consider are whether the thread may have
1441    blocked (in which case wall time increases, but CPU time does not),
1442    or whether the system is heavily loaded with other processes competing
1443    for CPU time.  On balance, wall clock time seems to make sense.
1444    Plus it has the added benefit that gettimeofday should be faster
1445    than calling getrusage. */
1446 int thread_should_yield(struct thread *thread)
1447 {
1448         int result;
1449         pthread_mutex_lock(&thread->mtx);
1450         {
1451                 result = monotime_since(&thread->real, NULL)
1452                          > (int64_t)thread->yield;
1453         }
1454         pthread_mutex_unlock(&thread->mtx);
1455         return result;
1456 }
1457
1458 void thread_set_yield_time(struct thread *thread, unsigned long yield_time)
1459 {
1460         pthread_mutex_lock(&thread->mtx);
1461         {
1462                 thread->yield = yield_time;
1463         }
1464         pthread_mutex_unlock(&thread->mtx);
1465 }
1466
1467 void thread_getrusage(RUSAGE_T *r)
1468 {
1469         monotime(&r->real);
1470         getrusage(RUSAGE_SELF, &(r->cpu));
1471 }
1472
1473 /* We check thread consumed time. If the system has getrusage, we'll
1474    use that to get in-depth stats on the performance of the thread in addition
1475    to wall clock time stats from gettimeofday. */
1476 void thread_call(struct thread *thread)
1477 {
1478         unsigned long realtime, cputime;
1479         RUSAGE_T before, after;
1480
1481         GETRUSAGE(&before);
1482         thread->real = before.real;
1483
1484         pthread_setspecific(thread_current, thread);
1485         (*thread->func)(thread);
1486         pthread_setspecific(thread_current, NULL);
1487
1488         GETRUSAGE(&after);
1489
1490         realtime = thread_consumed_time(&after, &before, &cputime);
1491         thread->hist->real.total += realtime;
1492         if (thread->hist->real.max < realtime)
1493                 thread->hist->real.max = realtime;
1494         thread->hist->cpu.total += cputime;
1495         if (thread->hist->cpu.max < cputime)
1496                 thread->hist->cpu.max = cputime;
1497
1498         ++(thread->hist->total_calls);
1499         thread->hist->types |= (1 << thread->add_type);
1500
1501 #ifdef CONSUMED_TIME_CHECK
1502         if (realtime > CONSUMED_TIME_CHECK) {
1503                 /*
1504                  * We have a CPU Hog on our hands.
1505                  * Whinge about it now, so we're aware this is yet another task
1506                  * to fix.
1507                  */
1508                 zlog_warn(
1509                         "SLOW THREAD: task %s (%lx) ran for %lums (cpu time %lums)",
1510                         thread->funcname, (unsigned long)thread->func,
1511                         realtime / 1000, cputime / 1000);
1512         }
1513 #endif /* CONSUMED_TIME_CHECK */
1514 }
1515
1516 /* Execute thread */
1517 void funcname_thread_execute(struct thread_master *m,
1518                              int (*func)(struct thread *), void *arg, int val,
1519                              debugargdef)
1520 {
1521         struct cpu_thread_history tmp;
1522         struct thread dummy;
1523
1524         memset(&dummy, 0, sizeof(struct thread));
1525
1526         pthread_mutex_init(&dummy.mtx, NULL);
1527         dummy.type = THREAD_EVENT;
1528         dummy.add_type = THREAD_EXECUTE;
1529         dummy.master = NULL;
1530         dummy.arg = arg;
1531         dummy.u.val = val;
1532
1533         tmp.func = dummy.func = func;
1534         tmp.funcname = dummy.funcname = funcname;
1535         dummy.hist = hash_get(m->cpu_record, &tmp,
1536                               (void *(*)(void *))cpu_record_hash_alloc);
1537
1538         dummy.schedfrom = schedfrom;
1539         dummy.schedfrom_line = fromln;
1540
1541         thread_call(&dummy);
1542 }