kernel/locking/rtmutex.c

   1 /*
   2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3  *
   4  * started by Ingo Molnar and Thomas Gleixner.
   5  *
   6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9  *  Copyright (C) 2006 Esben Nielsen
  10  *
  11  *  See Documentation/locking/rt-mutex-design.txt for details.
  12  */
  13 #include <linux/spinlock.h>
  14 #include <linux/export.h>
  15 #include <linux/sched/signal.h>
  16 #include <linux/sched/rt.h>
  17 #include <linux/sched/deadline.h>
  18 #include <linux/sched/wake_q.h>
  19 #include <linux/sched/debug.h>
  20 #include <linux/timer.h>
  21
  22 #include "rtmutex_common.h"
  23
  24 /*
  25  * lock->owner state tracking:
  26  *
  27  * lock->owner holds the task_struct pointer of the owner. Bit 0
  28  * is used to keep track of the "lock has waiters" state.
  29  *
  30  * owner        bit0
  31  * NULL         0       lock is free (fast acquire possible)
  32  * NULL         1       lock is free and has waiters and the top waiter
  33  *                              is going to take the lock*
  34  * taskpointer  0       lock is held (fast release possible)
  35  * taskpointer  1       lock is held and has waiters**
  36  *
  37  * The fast atomic compare exchange based acquire and release is only
  38  * possible when bit 0 of lock->owner is 0.
  39  *
  40  * (*) It also can be a transitional state when grabbing the lock
  41  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  42  * we need to set the bit0 before looking at the lock, and the owner may be
  43  * NULL in this small time, hence this can be a transitional state.
  44  *
  45  * (**) There is a small time when bit 0 is set but there are no
  46  * waiters. This can happen when grabbing the lock in the slow path.
  47  * To prevent a cmpxchg of the owner releasing the lock, we need to
  48  * set this bit before looking at the lock.
  49  */
  50
  51 static void
  52 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
  53 {
  54         unsigned long val = (unsigned long)owner;
  55
  56         if (rt_mutex_has_waiters(lock))
  57                 val |= RT_MUTEX_HAS_WAITERS;
  58
  59         lock->owner = (struct task_struct *)val;
  60 }
  61
  62 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  63 {
  64         lock->owner = (struct task_struct *)
  65                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  66 }
  67
  68 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  69 {
  70         unsigned long owner, *p = (unsigned long *) &lock->owner;
  71
  72         if (rt_mutex_has_waiters(lock))
  73                 return;
  74
  75         /*
  76          * The rbtree has no waiters enqueued, now make sure that the
  77          * lock->owner still has the waiters bit set, otherwise the
  78          * following can happen:
  79          *
  80          * CPU 0        CPU 1           CPU2
  81          * l->owner=T1
  82          *              rt_mutex_lock(l)
  83          *              lock(l->lock)
  84          *              l->owner = T1 | HAS_WAITERS;
  85          *              enqueue(T2)
  86          *              boost()
  87          *                unlock(l->lock)
  88          *              block()
  89          *
  90          *                              rt_mutex_lock(l)
  91          *                              lock(l->lock)
  92          *                              l->owner = T1 | HAS_WAITERS;
  93          *                              enqueue(T3)
  94          *                              boost()
  95          *                                unlock(l->lock)
  96          *                              block()
  97          *              signal(->T2)    signal(->T3)
  98          *              lock(l->lock)
  99          *              dequeue(T2)
 100          *              deboost()
 101          *                unlock(l->lock)
 102          *                              lock(l->lock)
 103          *                              dequeue(T3)
 104          *                               ==> wait list is empty
 105          *                              deboost()
 106          *                               unlock(l->lock)
 107          *              lock(l->lock)
 108          *              fixup_rt_mutex_waiters()
 109          *                if (wait_list_empty(l) {
 110          *                  l->owner = owner
 111          *                  owner = l->owner & ~HAS_WAITERS;
 112          *                    ==> l->owner = T1
 113          *                }
 114          *                              lock(l->lock)
 115          * rt_mutex_unlock(l)           fixup_rt_mutex_waiters()
 116          *                                if (wait_list_empty(l) {
 117          *                                  owner = l->owner & ~HAS_WAITERS;
 118          * cmpxchg(l->owner, T1, NULL)
 119          *  ===> Success (l->owner = NULL)
 120          *
 121          *                                  l->owner = owner
 122          *                                    ==> l->owner = T1
 123          *                                }
 124          *
 125          * With the check for the waiter bit in place T3 on CPU2 will not
 126          * overwrite. All tasks fiddling with the waiters bit are
 127          * serialized by l->lock, so nothing else can modify the waiters
 128          * bit. If the bit is set then nothing can change l->owner either
 129          * so the simple RMW is safe. The cmpxchg() will simply fail if it
 130          * happens in the middle of the RMW because the waiters bit is
 131          * still set.
 132          */
 133         owner = READ_ONCE(*p);
 134         if (owner & RT_MUTEX_HAS_WAITERS)
 135                 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
 136 }
 137
 138 /*
 139  * We can speed up the acquire/release, if there's no debugging state to be
 140  * set up.
 141  */
 142 #ifndef CONFIG_DEBUG_RT_MUTEXES
 143 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
 144 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
 145 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
 146
 147 /*
 148  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
 149  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
 150  * relaxed semantics suffice.
 151  */
 152 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 153 {
 154         unsigned long owner, *p = (unsigned long *) &lock->owner;
 155
 156         do {
 157                 owner = *p;
 158         } while (cmpxchg_relaxed(p, owner,
 159                                  owner | RT_MUTEX_HAS_WAITERS) != owner);
 160 }
 161
 162 /*
 163  * Safe fastpath aware unlock:
 164  * 1) Clear the waiters bit
 165  * 2) Drop lock->wait_lock
 166  * 3) Try to unlock the lock with cmpxchg
 167  */
 168 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 169                                         unsigned long flags)
 170         __releases(lock->wait_lock)
 171 {
 172         struct task_struct *owner = rt_mutex_owner(lock);
 173
 174         clear_rt_mutex_waiters(lock);
 175         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 176         /*
 177          * If a new waiter comes in between the unlock and the cmpxchg
 178          * we have two situations:
 179          *
 180          * unlock(wait_lock);
 181          *                                      lock(wait_lock);
 182          * cmpxchg(p, owner, 0) == owner
 183          *                                      mark_rt_mutex_waiters(lock);
 184          *                                      acquire(lock);
 185          * or:
 186          *
 187          * unlock(wait_lock);
 188          *                                      lock(wait_lock);
 189          *                                      mark_rt_mutex_waiters(lock);
 190          *
 191          * cmpxchg(p, owner, 0) != owner
 192          *                                      enqueue_waiter();
 193          *                                      unlock(wait_lock);
 194          * lock(wait_lock);
 195          * wake waiter();
 196          * unlock(wait_lock);
 197          *                                      lock(wait_lock);
 198          *                                      acquire(lock);
 199          */
 200         return rt_mutex_cmpxchg_release(lock, owner, NULL);
 201 }
 202
 203 #else
 204 # define rt_mutex_cmpxchg_relaxed(l,c,n)        (0)
 205 # define rt_mutex_cmpxchg_acquire(l,c,n)        (0)
 206 # define rt_mutex_cmpxchg_release(l,c,n)        (0)
 207
 208 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 209 {
 210         lock->owner = (struct task_struct *)
 211                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 212 }
 213
 214 /*
 215  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 216  */
 217 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 218                                         unsigned long flags)
 219         __releases(lock->wait_lock)
 220 {
 221         lock->owner = NULL;
 222         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 223         return true;
 224 }
 225 #endif
 226
 227 static inline int
 228 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
 229                      struct rt_mutex_waiter *right)
 230 {
 231         if (left->prio < right->prio)
 232                 return 1;
 233
 234         /*
 235          * If both waiters have dl_prio(), we check the deadlines of the
 236          * associated tasks.
 237          * If left waiter has a dl_prio(), and we didn't return 1 above,
 238          * then right waiter has a dl_prio() too.
 239          */
 240         if (dl_prio(left->prio))
 241                 return dl_time_before(left->task->dl.deadline,
 242                                       right->task->dl.deadline);
 243
 244         return 0;
 245 }
 246
 247 static void
 248 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 249 {
 250         struct rb_node **link = &lock->waiters.rb_node;
 251         struct rb_node *parent = NULL;
 252         struct rt_mutex_waiter *entry;
 253         int leftmost = 1;
 254
 255         while (*link) {
 256                 parent = *link;
 257                 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
 258                 if (rt_mutex_waiter_less(waiter, entry)) {
 259                         link = &parent->rb_left;
 260                 } else {
 261                         link = &parent->rb_right;
 262                         leftmost = 0;
 263                 }
 264         }
 265
 266         if (leftmost)
 267                 lock->waiters_leftmost = &waiter->tree_entry;
 268
 269         rb_link_node(&waiter->tree_entry, parent, link);
 270         rb_insert_color(&waiter->tree_entry, &lock->waiters);
 271 }
 272
 273 static void
 274 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 275 {
 276         if (RB_EMPTY_NODE(&waiter->tree_entry))
 277                 return;
 278
 279         if (lock->waiters_leftmost == &waiter->tree_entry)
 280                 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
 281
 282         rb_erase(&waiter->tree_entry, &lock->waiters);
 283         RB_CLEAR_NODE(&waiter->tree_entry);
 284 }
 285
 286 static void
 287 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 288 {
 289         struct rb_node **link = &task->pi_waiters.rb_node;
 290         struct rb_node *parent = NULL;
 291         struct rt_mutex_waiter *entry;
 292         int leftmost = 1;
 293
 294         while (*link) {
 295                 parent = *link;
 296                 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
 297                 if (rt_mutex_waiter_less(waiter, entry)) {
 298                         link = &parent->rb_left;
 299                 } else {
 300                         link = &parent->rb_right;
 301                         leftmost = 0;
 302                 }
 303         }
 304
 305         if (leftmost)
 306                 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
 307
 308         rb_link_node(&waiter->pi_tree_entry, parent, link);
 309         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
 310 }
 311
 312 static void
 313 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 314 {
 315         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
 316                 return;
 317
 318         if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
 319                 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
 320
 321         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
 322         RB_CLEAR_NODE(&waiter->pi_tree_entry);
 323 }
 324
 325 /*
 326  * Must hold both p->pi_lock and task_rq(p)->lock.
 327  */
 328 void rt_mutex_update_top_task(struct task_struct *p)
 329 {
 330         if (!task_has_pi_waiters(p)) {
 331                 p->pi_top_task = NULL;
 332                 return;
 333         }
 334
 335         p->pi_top_task = task_top_pi_waiter(p)->task;
 336 }
 337
 338 /*
 339  * Calculate task priority from the waiter tree priority
 340  *
 341  * Return task->normal_prio when the waiter tree is empty or when
 342  * the waiter is not allowed to do priority boosting
 343  */
 344 int rt_mutex_getprio(struct task_struct *task)
 345 {
 346         if (likely(!task_has_pi_waiters(task)))
 347                 return task->normal_prio;
 348
 349         return min(task_top_pi_waiter(task)->prio,
 350                    task->normal_prio);
 351 }
 352
 353 /*
 354  * Must hold either p->pi_lock or task_rq(p)->lock.
 355  */
 356 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 357 {
 358         return task->pi_top_task;
 359 }
 360
 361 /*
 362  * Called by sched_setscheduler() to get the priority which will be
 363  * effective after the change.
 364  */
 365 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
 366 {
 367         struct task_struct *top_task = rt_mutex_get_top_task(task);
 368
 369         if (!top_task)
 370                 return newprio;
 371
 372         return min(top_task->prio, newprio);
 373 }
 374
 375 /*
 376  * Adjust the priority of a task, after its pi_waiters got modified.
 377  *
 378  * This can be both boosting and unboosting. task->pi_lock must be held.
 379  */
 380 static void __rt_mutex_adjust_prio(struct task_struct *task)
 381 {
 382         int prio = rt_mutex_getprio(task);
 383
 384         if (task->prio != prio || dl_prio(prio))
 385                 rt_mutex_setprio(task, prio);
 386 }
 387
 388 /*
 389  * Deadlock detection is conditional:
 390  *
 391  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
 392  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
 393  *
 394  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
 395  * conducted independent of the detect argument.
 396  *
 397  * If the waiter argument is NULL this indicates the deboost path and
 398  * deadlock detection is disabled independent of the detect argument
 399  * and the config settings.
 400  */
 401 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
 402                                           enum rtmutex_chainwalk chwalk)
 403 {
 404         /*
 405          * This is just a wrapper function for the following call,
 406          * because debug_rt_mutex_detect_deadlock() smells like a magic
 407          * debug feature and I wanted to keep the cond function in the
 408          * main source file along with the comments instead of having
 409          * two of the same in the headers.
 410          */
 411         return debug_rt_mutex_detect_deadlock(waiter, chwalk);
 412 }
 413
 414 /*
 415  * Max number of times we'll walk the boosting chain:
 416  */
 417 int max_lock_depth = 1024;
 418
 419 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
 420 {
 421         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
 422 }
 423
 424 /*
 425  * Adjust the priority chain. Also used for deadlock detection.
 426  * Decreases task's usage by one - may thus free the task.
 427  *
 428  * @task:       the task owning the mutex (owner) for which a chain walk is
 429  *              probably needed
 430  * @chwalk:     do we have to carry out deadlock detection?
 431  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
 432  *              things for a task that has just got its priority adjusted, and
 433  *              is waiting on a mutex)
 434  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
 435  *              we dropped its pi_lock. Is never dereferenced, only used for
 436  *              comparison to detect lock chain changes.
 437  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
 438  *              its priority to the mutex owner (can be NULL in the case
 439  *              depicted above or if the top waiter is gone away and we are
 440  *              actually deboosting the owner)
 441  * @top_task:   the current top waiter
 442  *
 443  * Returns 0 or -EDEADLK.
 444  *
 445  * Chain walk basics and protection scope
 446  *
 447  * [R] refcount on task
 448  * [P] task->pi_lock held
 449  * [L] rtmutex->wait_lock held
 450  *
 451  * Step Description                             Protected by
 452  *      function arguments:
 453  *      @task                                   [R]
 454  *      @orig_lock if != NULL                   @top_task is blocked on it
 455  *      @next_lock                              Unprotected. Cannot be
 456  *                                              dereferenced. Only used for
 457  *                                              comparison.
 458  *      @orig_waiter if != NULL                 @top_task is blocked on it
 459  *      @top_task                               current, or in case of proxy
 460  *                                              locking protected by calling
 461  *                                              code
 462  *      again:
 463  *        loop_sanity_check();
 464  *      retry:
 465  * [1]    lock(task->pi_lock);                  [R] acquire [P]
 466  * [2]    waiter = task->pi_blocked_on;         [P]
 467  * [3]    check_exit_conditions_1();            [P]
 468  * [4]    lock = waiter->lock;                  [P]
 469  * [5]    if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
 470  *          unlock(task->pi_lock);              release [P]
 471  *          goto retry;
 472  *        }
 473  * [6]    check_exit_conditions_2();            [P] + [L]
 474  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
 475  * [8]    unlock(task->pi_lock);                release [P]
 476  *        put_task_struct(task);                release [R]
 477  * [9]    check_exit_conditions_3();            [L]
 478  * [10]   task = owner(lock);                   [L]
 479  *        get_task_struct(task);                [L] acquire [R]
 480  *        lock(task->pi_lock);                  [L] acquire [P]
 481  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
 482  * [12]   check_exit_conditions_4();            [P] + [L]
 483  * [13]   unlock(task->pi_lock);                release [P]
 484  *        unlock(lock->wait_lock);              release [L]
 485  *        goto again;
 486  */
 487 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 488                                       enum rtmutex_chainwalk chwalk,
 489                                       struct rt_mutex *orig_lock,
 490                                       struct rt_mutex *next_lock,
 491                                       struct rt_mutex_waiter *orig_waiter,
 492                                       struct task_struct *top_task)
 493 {
 494         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 495         struct rt_mutex_waiter *prerequeue_top_waiter;
 496         int ret = 0, depth = 0;
 497         struct rt_mutex *lock;
 498         bool detect_deadlock;
 499         bool requeue = true;
 500
 501         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
 502
 503         /*
 504          * The (de)boosting is a step by step approach with a lot of
 505          * pitfalls. We want this to be preemptible and we want hold a
 506          * maximum of two locks per step. So we have to check
 507          * carefully whether things change under us.
 508          */
 509  again:
 510         /*
 511          * We limit the lock chain length for each invocation.
 512          */
 513         if (++depth > max_lock_depth) {
 514                 static int prev_max;
 515
 516                 /*
 517                  * Print this only once. If the admin changes the limit,
 518                  * print a new message when reaching the limit again.
 519                  */
 520                 if (prev_max != max_lock_depth) {
 521                         prev_max = max_lock_depth;
 522                         printk(KERN_WARNING "Maximum lock depth %d reached "
 523                                "task: %s (%d)\n", max_lock_depth,
 524                                top_task->comm, task_pid_nr(top_task));
 525                 }
 526                 put_task_struct(task);
 527
 528                 return -EDEADLK;
 529         }
 530
 531         /*
 532          * We are fully preemptible here and only hold the refcount on
 533          * @task. So everything can have changed under us since the
 534          * caller or our own code below (goto retry/again) dropped all
 535          * locks.
 536          */
 537  retry:
 538         /*
 539          * [1] Task cannot go away as we did a get_task() before !
 540          */
 541         raw_spin_lock_irq(&task->pi_lock);
 542
 543         /*
 544          * [2] Get the waiter on which @task is blocked on.
 545          */
 546         waiter = task->pi_blocked_on;
 547
 548         /*
 549          * [3] check_exit_conditions_1() protected by task->pi_lock.
 550          */
 551
 552         /*
 553          * Check whether the end of the boosting chain has been
 554          * reached or the state of the chain has changed while we
 555          * dropped the locks.
 556          */
 557         if (!waiter)
 558                 goto out_unlock_pi;
 559
 560         /*
 561          * Check the orig_waiter state. After we dropped the locks,
 562          * the previous owner of the lock might have released the lock.
 563          */
 564         if (orig_waiter && !rt_mutex_owner(orig_lock))
 565                 goto out_unlock_pi;
 566
 567         /*
 568          * We dropped all locks after taking a refcount on @task, so
 569          * the task might have moved on in the lock chain or even left
 570          * the chain completely and blocks now on an unrelated lock or
 571          * on @orig_lock.
 572          *
 573          * We stored the lock on which @task was blocked in @next_lock,
 574          * so we can detect the chain change.
 575          */
 576         if (next_lock != waiter->lock)
 577                 goto out_unlock_pi;
 578
 579         /*
 580          * Drop out, when the task has no waiters. Note,
 581          * top_waiter can be NULL, when we are in the deboosting
 582          * mode!
 583          */
 584         if (top_waiter) {
 585                 if (!task_has_pi_waiters(task))
 586                         goto out_unlock_pi;
 587                 /*
 588                  * If deadlock detection is off, we stop here if we
 589                  * are not the top pi waiter of the task. If deadlock
 590                  * detection is enabled we continue, but stop the
 591                  * requeueing in the chain walk.
 592                  */
 593                 if (top_waiter != task_top_pi_waiter(task)) {
 594                         if (!detect_deadlock)
 595                                 goto out_unlock_pi;
 596                         else
 597                                 requeue = false;
 598                 }
 599         }
 600
 601         /*
 602          * If the waiter priority is the same as the task priority
 603          * then there is no further priority adjustment necessary.  If
 604          * deadlock detection is off, we stop the chain walk. If its
 605          * enabled we continue, but stop the requeueing in the chain
 606          * walk.
 607          */
 608         if (waiter->prio == task->prio && !dl_task(task)) {
 609                 if (!detect_deadlock)
 610                         goto out_unlock_pi;
 611                 else
 612                         requeue = false;
 613         }
 614
 615         /*
 616          * [4] Get the next lock
 617          */
 618         lock = waiter->lock;
 619         /*
 620          * [5] We need to trylock here as we are holding task->pi_lock,
 621          * which is the reverse lock order versus the other rtmutex
 622          * operations.
 623          */
 624         if (!raw_spin_trylock(&lock->wait_lock)) {
 625                 raw_spin_unlock_irq(&task->pi_lock);
 626                 cpu_relax();
 627                 goto retry;
 628         }
 629
 630         /*
 631          * [6] check_exit_conditions_2() protected by task->pi_lock and
 632          * lock->wait_lock.
 633          *
 634          * Deadlock detection. If the lock is the same as the original
 635          * lock which caused us to walk the lock chain or if the
 636          * current lock is owned by the task which initiated the chain
 637          * walk, we detected a deadlock.
 638          */
 639         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 640                 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
 641                 raw_spin_unlock(&lock->wait_lock);
 642                 ret = -EDEADLK;
 643                 goto out_unlock_pi;
 644         }
 645
 646         /*
 647          * If we just follow the lock chain for deadlock detection, no
 648          * need to do all the requeue operations. To avoid a truckload
 649          * of conditionals around the various places below, just do the
 650          * minimum chain walk checks.
 651          */
 652         if (!requeue) {
 653                 /*
 654                  * No requeue[7] here. Just release @task [8]
 655                  */
 656                 raw_spin_unlock(&task->pi_lock);
 657                 put_task_struct(task);
 658
 659                 /*
 660                  * [9] check_exit_conditions_3 protected by lock->wait_lock.
 661                  * If there is no owner of the lock, end of chain.
 662                  */
 663                 if (!rt_mutex_owner(lock)) {
 664                         raw_spin_unlock_irq(&lock->wait_lock);
 665                         return 0;
 666                 }
 667
 668                 /* [10] Grab the next task, i.e. owner of @lock */
 669                 task = rt_mutex_owner(lock);
 670                 get_task_struct(task);
 671                 raw_spin_lock(&task->pi_lock);
 672
 673                 /*
 674                  * No requeue [11] here. We just do deadlock detection.
 675                  *
 676                  * [12] Store whether owner is blocked
 677                  * itself. Decision is made after dropping the locks
 678                  */
 679                 next_lock = task_blocked_on_lock(task);
 680                 /*
 681                  * Get the top waiter for the next iteration
 682                  */
 683                 top_waiter = rt_mutex_top_waiter(lock);
 684
 685                 /* [13] Drop locks */
 686                 raw_spin_unlock(&task->pi_lock);
 687                 raw_spin_unlock_irq(&lock->wait_lock);
 688
 689                 /* If owner is not blocked, end of chain. */
 690                 if (!next_lock)
 691                         goto out_put_task;
 692                 goto again;
 693         }
 694
 695         /*
 696          * Store the current top waiter before doing the requeue
 697          * operation on @lock. We need it for the boost/deboost
 698          * decision below.
 699          */
 700         prerequeue_top_waiter = rt_mutex_top_waiter(lock);
 701
 702         /* [7] Requeue the waiter in the lock waiter tree. */
 703         rt_mutex_dequeue(lock, waiter);
 704         waiter->prio = task->prio;
 705         rt_mutex_enqueue(lock, waiter);
 706
 707         /* [8] Release the task */
 708         raw_spin_unlock(&task->pi_lock);
 709         put_task_struct(task);
 710
 711         /*
 712          * [9] check_exit_conditions_3 protected by lock->wait_lock.
 713          *
 714          * We must abort the chain walk if there is no lock owner even
 715          * in the dead lock detection case, as we have nothing to
 716          * follow here. This is the end of the chain we are walking.
 717          */
 718         if (!rt_mutex_owner(lock)) {
 719                 /*
 720                  * If the requeue [7] above changed the top waiter,
 721                  * then we need to wake the new top waiter up to try
 722                  * to get the lock.
 723                  */
 724                 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
 725                         wake_up_process(rt_mutex_top_waiter(lock)->task);
 726                 raw_spin_unlock_irq(&lock->wait_lock);
 727                 return 0;
 728         }
 729
 730         /* [10] Grab the next task, i.e. the owner of @lock */
 731         task = rt_mutex_owner(lock);
 732         get_task_struct(task);
 733         raw_spin_lock(&task->pi_lock);
 734
 735         /* [11] requeue the pi waiters if necessary */
 736         if (waiter == rt_mutex_top_waiter(lock)) {
 737                 /*
 738                  * The waiter became the new top (highest priority)
 739                  * waiter on the lock. Replace the previous top waiter
 740                  * in the owner tasks pi waiters tree with this waiter
 741                  * and adjust the priority of the owner.
 742                  */
 743                 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
 744                 rt_mutex_enqueue_pi(task, waiter);
 745                 __rt_mutex_adjust_prio(task);
 746
 747         } else if (prerequeue_top_waiter == waiter) {
 748                 /*
 749                  * The waiter was the top waiter on the lock, but is
 750                  * no longer the top prority waiter. Replace waiter in
 751                  * the owner tasks pi waiters tree with the new top
 752                  * (highest priority) waiter and adjust the priority
 753                  * of the owner.
 754                  * The new top waiter is stored in @waiter so that
 755                  * @waiter == @top_waiter evaluates to true below and
 756                  * we continue to deboost the rest of the chain.
 757                  */
 758                 rt_mutex_dequeue_pi(task, waiter);
 759                 waiter = rt_mutex_top_waiter(lock);
 760                 rt_mutex_enqueue_pi(task, waiter);
 761                 __rt_mutex_adjust_prio(task);
 762         } else {
 763                 /*
 764                  * Nothing changed. No need to do any priority
 765                  * adjustment.
 766                  */
 767         }
 768
 769         /*
 770          * [12] check_exit_conditions_4() protected by task->pi_lock
 771          * and lock->wait_lock. The actual decisions are made after we
 772          * dropped the locks.
 773          *
 774          * Check whether the task which owns the current lock is pi
 775          * blocked itself. If yes we store a pointer to the lock for
 776          * the lock chain change detection above. After we dropped
 777          * task->pi_lock next_lock cannot be dereferenced anymore.
 778          */
 779         next_lock = task_blocked_on_lock(task);
 780         /*
 781          * Store the top waiter of @lock for the end of chain walk
 782          * decision below.
 783          */
 784         top_waiter = rt_mutex_top_waiter(lock);
 785
 786         /* [13] Drop the locks */
 787         raw_spin_unlock(&task->pi_lock);
 788         raw_spin_unlock_irq(&lock->wait_lock);
 789
 790         /*
 791          * Make the actual exit decisions [12], based on the stored
 792          * values.
 793          *
 794          * We reached the end of the lock chain. Stop right here. No
 795          * point to go back just to figure that out.
 796          */
 797         if (!next_lock)
 798                 goto out_put_task;
 799
 800         /*
 801          * If the current waiter is not the top waiter on the lock,
 802          * then we can stop the chain walk here if we are not in full
 803          * deadlock detection mode.
 804          */
 805         if (!detect_deadlock && waiter != top_waiter)
 806                 goto out_put_task;
 807
 808         goto again;
 809
 810  out_unlock_pi:
 811         raw_spin_unlock_irq(&task->pi_lock);
 812  out_put_task:
 813         put_task_struct(task);
 814
 815         return ret;
 816 }
 817
 818 /*
 819  * Try to take an rt-mutex
 820  *
 821  * Must be called with lock->wait_lock held and interrupts disabled
 822  *
 823  * @lock:   The lock to be acquired.
 824  * @task:   The task which wants to acquire the lock
 825  * @waiter: The waiter that is queued to the lock's wait tree if the
 826  *          callsite called task_blocked_on_lock(), otherwise NULL
 827  */
 828 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 829                                 struct rt_mutex_waiter *waiter)
 830 {
 831         /*
 832          * Before testing whether we can acquire @lock, we set the
 833          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
 834          * other tasks which try to modify @lock into the slow path
 835          * and they serialize on @lock->wait_lock.
 836          *
 837          * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
 838          * as explained at the top of this file if and only if:
 839          *
 840          * - There is a lock owner. The caller must fixup the
 841          *   transient state if it does a trylock or leaves the lock
 842          *   function due to a signal or timeout.
 843          *
 844          * - @task acquires the lock and there are no other
 845          *   waiters. This is undone in rt_mutex_set_owner(@task) at
 846          *   the end of this function.
 847          */
 848         mark_rt_mutex_waiters(lock);
 849
 850         /*
 851          * If @lock has an owner, give up.
 852          */
 853         if (rt_mutex_owner(lock))
 854                 return 0;
 855
 856         /*
 857          * If @waiter != NULL, @task has already enqueued the waiter
 858          * into @lock waiter tree. If @waiter == NULL then this is a
 859          * trylock attempt.
 860          */
 861         if (waiter) {
 862                 /*
 863                  * If waiter is not the highest priority waiter of
 864                  * @lock, give up.
 865                  */
 866                 if (waiter != rt_mutex_top_waiter(lock))
 867                         return 0;
 868
 869                 /*
 870                  * We can acquire the lock. Remove the waiter from the
 871                  * lock waiters tree.
 872                  */
 873                 rt_mutex_dequeue(lock, waiter);
 874
 875         } else {
 876                 /*
 877                  * If the lock has waiters already we check whether @task is
 878                  * eligible to take over the lock.
 879                  *
 880                  * If there are no other waiters, @task can acquire
 881                  * the lock.  @task->pi_blocked_on is NULL, so it does
 882                  * not need to be dequeued.
 883                  */
 884                 if (rt_mutex_has_waiters(lock)) {
 885                         /*
 886                          * If @task->prio is greater than or equal to
 887                          * the top waiter priority (kernel view),
 888                          * @task lost.
 889                          */
 890                         if (task->prio >= rt_mutex_top_waiter(lock)->prio)
 891                                 return 0;
 892
 893                         /*
 894                          * The current top waiter stays enqueued. We
 895                          * don't have to change anything in the lock
 896                          * waiters order.
 897                          */
 898                 } else {
 899                         /*
 900                          * No waiters. Take the lock without the
 901                          * pi_lock dance.@task->pi_blocked_on is NULL
 902                          * and we have no waiters to enqueue in @task
 903                          * pi waiters tree.
 904                          */
 905                         goto takeit;
 906                 }
 907         }
 908
 909         /*
 910          * Clear @task->pi_blocked_on. Requires protection by
 911          * @task->pi_lock. Redundant operation for the @waiter == NULL
 912          * case, but conditionals are more expensive than a redundant
 913          * store.
 914          */
 915         raw_spin_lock(&task->pi_lock);
 916         task->pi_blocked_on = NULL;
 917         /*
 918          * Finish the lock acquisition. @task is the new owner. If
 919          * other waiters exist we have to insert the highest priority
 920          * waiter into @task->pi_waiters tree.
 921          */
 922         if (rt_mutex_has_waiters(lock))
 923                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
 924         raw_spin_unlock(&task->pi_lock);
 925
 926 takeit:
 927         /* We got the lock. */
 928         debug_rt_mutex_lock(lock);
 929
 930         /*
 931          * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
 932          * are still waiters or clears it.
 933          */
 934         rt_mutex_set_owner(lock, task);
 935
 936         return 1;
 937 }
 938
 939 /*
 940  * Task blocks on lock.
 941  *
 942  * Prepare waiter and propagate pi chain
 943  *
 944  * This must be called with lock->wait_lock held and interrupts disabled
 945  */
 946 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 947                                    struct rt_mutex_waiter *waiter,
 948                                    struct task_struct *task,
 949                                    enum rtmutex_chainwalk chwalk)
 950 {
 951         struct task_struct *owner = rt_mutex_owner(lock);
 952         struct rt_mutex_waiter *top_waiter = waiter;
 953         struct rt_mutex *next_lock;
 954         int chain_walk = 0, res;
 955
 956         /*
 957          * Early deadlock detection. We really don't want the task to
 958          * enqueue on itself just to untangle the mess later. It's not
 959          * only an optimization. We drop the locks, so another waiter
 960          * can come in before the chain walk detects the deadlock. So
 961          * the other will detect the deadlock and return -EDEADLOCK,
 962          * which is wrong, as the other waiter is not in a deadlock
 963          * situation.
 964          */
 965         if (owner == task)
 966                 return -EDEADLK;
 967
 968         raw_spin_lock(&task->pi_lock);
 969         __rt_mutex_adjust_prio(task);
 970         waiter->task = task;
 971         waiter->lock = lock;
 972         waiter->prio = task->prio;
 973
 974         /* Get the top priority waiter on the lock */
 975         if (rt_mutex_has_waiters(lock))
 976                 top_waiter = rt_mutex_top_waiter(lock);
 977         rt_mutex_enqueue(lock, waiter);
 978
 979         task->pi_blocked_on = waiter;
 980
 981         raw_spin_unlock(&task->pi_lock);
 982
 983         if (!owner)
 984                 return 0;
 985
 986         raw_spin_lock(&owner->pi_lock);
 987         if (waiter == rt_mutex_top_waiter(lock)) {
 988                 rt_mutex_dequeue_pi(owner, top_waiter);
 989                 rt_mutex_enqueue_pi(owner, waiter);
 990
 991                 __rt_mutex_adjust_prio(owner);
 992                 if (owner->pi_blocked_on)
 993                         chain_walk = 1;
 994         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
 995                 chain_walk = 1;
 996         }
 997
 998         /* Store the lock on which owner is blocked or NULL */
 999         next_lock = task_blocked_on_lock(owner);
1000
1001         raw_spin_unlock(&owner->pi_lock);
1002         /*
1003          * Even if full deadlock detection is on, if the owner is not
1004          * blocked itself, we can avoid finding this out in the chain
1005          * walk.
1006          */
1007         if (!chain_walk || !next_lock)
1008                 return 0;
1009
1010         /*
1011          * The owner can't disappear while holding a lock,
1012          * so the owner struct is protected by wait_lock.
1013          * Gets dropped in rt_mutex_adjust_prio_chain()!
1014          */
1015         get_task_struct(owner);
1016
1017         raw_spin_unlock_irq(&lock->wait_lock);
1018
1019         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1020                                          next_lock, waiter, task);
1021
1022         raw_spin_lock_irq(&lock->wait_lock);
1023
1024         return res;
1025 }
1026
1027 /*
1028  * Remove the top waiter from the current tasks pi waiter tree and
1029  * queue it up.
1030  *
1031  * Called with lock->wait_lock held and interrupts disabled.
1032  */
1033 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1034                                     struct rt_mutex *lock)
1035 {
1036         struct rt_mutex_waiter *waiter;
1037
1038         raw_spin_lock(&current->pi_lock);
1039
1040         waiter = rt_mutex_top_waiter(lock);
1041
1042         /*
1043          * Remove it from current->pi_waiters. We do not adjust a
1044          * possible priority boost right now. We execute wakeup in the
1045          * boosted mode and go back to normal after releasing
1046          * lock->wait_lock.
1047          */
1048         rt_mutex_dequeue_pi(current, waiter);
1049         __rt_mutex_adjust_prio(current);
1050
1051         /*
1052          * As we are waking up the top waiter, and the waiter stays
1053          * queued on the lock until it gets the lock, this lock
1054          * obviously has waiters. Just set the bit here and this has
1055          * the added benefit of forcing all new tasks into the
1056          * slow path making sure no task of lower priority than
1057          * the top waiter can steal this lock.
1058          */
1059         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1060
1061         raw_spin_unlock(&current->pi_lock);
1062
1063         wake_q_add(wake_q, waiter->task);
1064 }
1065
1066 /*
1067  * Remove a waiter from a lock and give up
1068  *
1069  * Must be called with lock->wait_lock held and interrupts disabled. I must
1070  * have just failed to try_to_take_rt_mutex().
1071  */
1072 static void remove_waiter(struct rt_mutex *lock,
1073                           struct rt_mutex_waiter *waiter)
1074 {
1075         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1076         struct task_struct *owner = rt_mutex_owner(lock);
1077         struct rt_mutex *next_lock;
1078
1079         raw_spin_lock(&current->pi_lock);
1080         rt_mutex_dequeue(lock, waiter);
1081         current->pi_blocked_on = NULL;
1082         raw_spin_unlock(&current->pi_lock);
1083
1084         /*
1085          * Only update priority if the waiter was the highest priority
1086          * waiter of the lock and there is an owner to update.
1087          */
1088         if (!owner || !is_top_waiter)
1089                 return;
1090
1091         raw_spin_lock(&owner->pi_lock);
1092
1093         rt_mutex_dequeue_pi(owner, waiter);
1094
1095         if (rt_mutex_has_waiters(lock))
1096                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1097
1098         __rt_mutex_adjust_prio(owner);
1099
1100         /* Store the lock on which owner is blocked or NULL */
1101         next_lock = task_blocked_on_lock(owner);
1102
1103         raw_spin_unlock(&owner->pi_lock);
1104
1105         /*
1106          * Don't walk the chain, if the owner task is not blocked
1107          * itself.
1108          */
1109         if (!next_lock)
1110                 return;
1111
1112         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1113         get_task_struct(owner);
1114
1115         raw_spin_unlock_irq(&lock->wait_lock);
1116
1117         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1118                                    next_lock, NULL, current);
1119
1120         raw_spin_lock_irq(&lock->wait_lock);
1121 }
1122
1123 /*
1124  * Recheck the pi chain, in case we got a priority setting
1125  *
1126  * Called from sched_setscheduler
1127  */
1128 void rt_mutex_adjust_pi(struct task_struct *task)
1129 {
1130         struct rt_mutex_waiter *waiter;
1131         struct rt_mutex *next_lock;
1132         unsigned long flags;
1133
1134         raw_spin_lock_irqsave(&task->pi_lock, flags);
1135
1136         waiter = task->pi_blocked_on;
1137         if (!waiter || (waiter->prio == task->prio &&
1138                         !dl_prio(task->prio))) {
1139                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1140                 return;
1141         }
1142         next_lock = waiter->lock;
1143         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1144
1145         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1146         get_task_struct(task);
1147
1148         rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1149                                    next_lock, NULL, task);
1150 }
1151
1152 void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
1153 {
1154         debug_rt_mutex_init_waiter(waiter);
1155         RB_CLEAR_NODE(&waiter->pi_tree_entry);
1156         RB_CLEAR_NODE(&waiter->tree_entry);
1157         waiter->task = NULL;
1158 }
1159
1160 /**
1161  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1162  * @lock:                the rt_mutex to take
1163  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
1164  *                       or TASK_UNINTERRUPTIBLE)
1165  * @timeout:             the pre-initialized and started timer, or NULL for none
1166  * @waiter:              the pre-initialized rt_mutex_waiter
1167  *
1168  * Must be called with lock->wait_lock held and interrupts disabled
1169  */
1170 static int __sched
1171 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
1172                     struct hrtimer_sleeper *timeout,
1173                     struct rt_mutex_waiter *waiter)
1174 {
1175         int ret = 0;
1176
1177         for (;;) {
1178                 /* Try to acquire the lock: */
1179                 if (try_to_take_rt_mutex(lock, current, waiter))
1180                         break;
1181
1182                 /*
1183                  * TASK_INTERRUPTIBLE checks for signals and
1184                  * timeout. Ignored otherwise.
1185                  */
1186                 if (likely(state == TASK_INTERRUPTIBLE)) {
1187                         /* Signal pending? */
1188                         if (signal_pending(current))
1189                                 ret = -EINTR;
1190                         if (timeout && !timeout->task)
1191                                 ret = -ETIMEDOUT;
1192                         if (ret)
1193                                 break;
1194                 }
1195
1196                 raw_spin_unlock_irq(&lock->wait_lock);
1197
1198                 debug_rt_mutex_print_deadlock(waiter);
1199
1200                 schedule();
1201
1202                 raw_spin_lock_irq(&lock->wait_lock);
1203                 set_current_state(state);
1204         }
1205
1206         __set_current_state(TASK_RUNNING);
1207         return ret;
1208 }
1209
1210 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1211                                      struct rt_mutex_waiter *w)
1212 {
1213         /*
1214          * If the result is not -EDEADLOCK or the caller requested
1215          * deadlock detection, nothing to do here.
1216          */
1217         if (res != -EDEADLOCK || detect_deadlock)
1218                 return;
1219
1220         /*
1221          * Yell lowdly and stop the task right here.
1222          */
1223         rt_mutex_print_deadlock(w);
1224         while (1) {
1225                 set_current_state(TASK_INTERRUPTIBLE);
1226                 schedule();
1227         }
1228 }
1229
1230 /*
1231  * Slow path lock function:
1232  */
1233 static int __sched
1234 rt_mutex_slowlock(struct rt_mutex *lock, int state,
1235                   struct hrtimer_sleeper *timeout,
1236                   enum rtmutex_chainwalk chwalk)
1237 {
1238         struct rt_mutex_waiter waiter;
1239         unsigned long flags;
1240         int ret = 0;
1241
1242         rt_mutex_init_waiter(&waiter);
1243
1244         /*
1245          * Technically we could use raw_spin_[un]lock_irq() here, but this can
1246          * be called in early boot if the cmpxchg() fast path is disabled
1247          * (debug, no architecture support). In this case we will acquire the
1248          * rtmutex with lock->wait_lock held. But we cannot unconditionally
1249          * enable interrupts in that early boot case. So we need to use the
1250          * irqsave/restore variants.
1251          */
1252         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1253
1254         /* Try to acquire the lock again: */
1255         if (try_to_take_rt_mutex(lock, current, NULL)) {
1256                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1257                 return 0;
1258         }
1259
1260         set_current_state(state);
1261
1262         /* Setup the timer, when timeout != NULL */
1263         if (unlikely(timeout))
1264                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1265
1266         ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1267
1268         if (likely(!ret))
1269                 /* sleep on the mutex */
1270                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1271
1272         if (unlikely(ret)) {
1273                 __set_current_state(TASK_RUNNING);
1274                 if (rt_mutex_has_waiters(lock))
1275                         remove_waiter(lock, &waiter);
1276                 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1277         }
1278
1279         /*
1280          * try_to_take_rt_mutex() sets the waiter bit
1281          * unconditionally. We might have to fix that up.
1282          */
1283         fixup_rt_mutex_waiters(lock);
1284
1285         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1286
1287         /* Remove pending timer: */
1288         if (unlikely(timeout))
1289                 hrtimer_cancel(&timeout->timer);
1290
1291         debug_rt_mutex_free_waiter(&waiter);
1292
1293         return ret;
1294 }
1295
1296 /*
1297  * Slow path try-lock function:
1298  */
1299 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1300 {
1301         unsigned long flags;
1302         int ret;
1303
1304         /*
1305          * If the lock already has an owner we fail to get the lock.
1306          * This can be done without taking the @lock->wait_lock as
1307          * it is only being read, and this is a trylock anyway.
1308          */
1309         if (rt_mutex_owner(lock))
1310                 return 0;
1311
1312         /*
1313          * The mutex has currently no owner. Lock the wait lock and try to
1314          * acquire the lock. We use irqsave here to support early boot calls.
1315          */
1316         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1317
1318         ret = try_to_take_rt_mutex(lock, current, NULL);
1319
1320         /*
1321          * try_to_take_rt_mutex() sets the lock waiters bit
1322          * unconditionally. Clean this up.
1323          */
1324         fixup_rt_mutex_waiters(lock);
1325
1326         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1327
1328         return ret;
1329 }
1330
1331 /*
1332  * Slow path to release a rt-mutex.
1333  *
1334  * Return whether the current task needs to call rt_mutex_postunlock().
1335  */
1336 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1337                                         struct wake_q_head *wake_q)
1338 {
1339         unsigned long flags;
1340
1341         /* irqsave required to support early boot calls */
1342         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1343
1344         debug_rt_mutex_unlock(lock);
1345
1346         /*
1347          * We must be careful here if the fast path is enabled. If we
1348          * have no waiters queued we cannot set owner to NULL here
1349          * because of:
1350          *
1351          * foo->lock->owner = NULL;
1352          *                      rtmutex_lock(foo->lock);   <- fast path
1353          *                      free = atomic_dec_and_test(foo->refcnt);
1354          *                      rtmutex_unlock(foo->lock); <- fast path
1355          *                      if (free)
1356          *                              kfree(foo);
1357          * raw_spin_unlock(foo->lock->wait_lock);
1358          *
1359          * So for the fastpath enabled kernel:
1360          *
1361          * Nothing can set the waiters bit as long as we hold
1362          * lock->wait_lock. So we do the following sequence:
1363          *
1364          *      owner = rt_mutex_owner(lock);
1365          *      clear_rt_mutex_waiters(lock);
1366          *      raw_spin_unlock(&lock->wait_lock);
1367          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
1368          *              return;
1369          *      goto retry;
1370          *
1371          * The fastpath disabled variant is simple as all access to
1372          * lock->owner is serialized by lock->wait_lock:
1373          *
1374          *      lock->owner = NULL;
1375          *      raw_spin_unlock(&lock->wait_lock);
1376          */
1377         while (!rt_mutex_has_waiters(lock)) {
1378                 /* Drops lock->wait_lock ! */
1379                 if (unlock_rt_mutex_safe(lock, flags) == true)
1380                         return false;
1381                 /* Relock the rtmutex and try again */
1382                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1383         }
1384
1385         /*
1386          * The wakeup next waiter path does not suffer from the above
1387          * race. See the comments there.
1388          *
1389          * Queue the next waiter for wakeup once we release the wait_lock.
1390          */
1391         mark_wakeup_next_waiter(wake_q, lock);
1392
1393         /*
1394          * We should deboost before waking the top waiter task such that
1395          * we don't run two tasks with the 'same' priority. This however
1396          * can lead to prio-inversion if we would get preempted after
1397          * the deboost but before waking our high-prio task, hence the
1398          * preempt_disable before unlock. Pairs with preempt_enable() in
1399          * rt_mutex_postunlock();
1400          */
1401         preempt_disable();
1402
1403         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1404
1405         return true; /* call rt_mutex_postunlock() */
1406 }
1407
1408 /*
1409  * debug aware fast / slowpath lock,trylock,unlock
1410  *
1411  * The atomic acquire/release ops are compiled away, when either the
1412  * architecture does not support cmpxchg or when debugging is enabled.
1413  */
1414 static inline int
1415 rt_mutex_fastlock(struct rt_mutex *lock, int state,
1416                   int (*slowfn)(struct rt_mutex *lock, int state,
1417                                 struct hrtimer_sleeper *timeout,
1418                                 enum rtmutex_chainwalk chwalk))
1419 {
1420         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1421                 return 0;
1422
1423         return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
1424 }
1425
1426 static inline int
1427 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
1428                         struct hrtimer_sleeper *timeout,
1429                         enum rtmutex_chainwalk chwalk,
1430                         int (*slowfn)(struct rt_mutex *lock, int state,
1431                                       struct hrtimer_sleeper *timeout,
1432                                       enum rtmutex_chainwalk chwalk))
1433 {
1434         if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
1435             likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1436                 return 0;
1437
1438         return slowfn(lock, state, timeout, chwalk);
1439 }
1440
1441 static inline int
1442 rt_mutex_fasttrylock(struct rt_mutex *lock,
1443                      int (*slowfn)(struct rt_mutex *lock))
1444 {
1445         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1446                 return 1;
1447
1448         return slowfn(lock);
1449 }
1450
1451 /*
1452  * Performs the wakeup of the the top-waiter and re-enables preemption.
1453  */
1454 void rt_mutex_postunlock(struct wake_q_head *wake_q)
1455 {
1456         wake_up_q(wake_q);
1457
1458         /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
1459         preempt_enable();
1460 }
1461
1462 static inline void
1463 rt_mutex_fastunlock(struct rt_mutex *lock,
1464                     bool (*slowfn)(struct rt_mutex *lock,
1465                                    struct wake_q_head *wqh))
1466 {
1467         DEFINE_WAKE_Q(wake_q);
1468
1469         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1470                 return;
1471
1472         if (slowfn(lock, &wake_q))
1473                 rt_mutex_postunlock(&wake_q);
1474 }
1475
1476 /**
1477  * rt_mutex_lock - lock a rt_mutex
1478  *
1479  * @lock: the rt_mutex to be locked
1480  */
1481 void __sched rt_mutex_lock(struct rt_mutex *lock)
1482 {
1483         might_sleep();
1484
1485         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
1486 }
1487 EXPORT_SYMBOL_GPL(rt_mutex_lock);
1488
1489 /**
1490  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1491  *
1492  * @lock:               the rt_mutex to be locked
1493  *
1494  * Returns:
1495  *  0           on success
1496  * -EINTR       when interrupted by a signal
1497  */
1498 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
1499 {
1500         might_sleep();
1501
1502         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
1503 }
1504 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1505
1506 /*
1507  * Futex variant, must not use fastpath.
1508  */
1509 int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
1510 {
1511         return rt_mutex_slowtrylock(lock);
1512 }
1513
1514 /**
1515  * rt_mutex_timed_lock - lock a rt_mutex interruptible
1516  *                      the timeout structure is provided
1517  *                      by the caller
1518  *
1519  * @lock:               the rt_mutex to be locked
1520  * @timeout:            timeout structure or NULL (no timeout)
1521  *
1522  * Returns:
1523  *  0           on success
1524  * -EINTR       when interrupted by a signal
1525  * -ETIMEDOUT   when the timeout expired
1526  */
1527 int
1528 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
1529 {
1530         might_sleep();
1531
1532         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1533                                        RT_MUTEX_MIN_CHAINWALK,
1534                                        rt_mutex_slowlock);
1535 }
1536 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1537
1538 /**
1539  * rt_mutex_trylock - try to lock a rt_mutex
1540  *
1541  * @lock:       the rt_mutex to be locked
1542  *
1543  * This function can only be called in thread context. It's safe to
1544  * call it from atomic regions, but not from hard interrupt or soft
1545  * interrupt context.
1546  *
1547  * Returns 1 on success and 0 on contention
1548  */
1549 int __sched rt_mutex_trylock(struct rt_mutex *lock)
1550 {
1551         if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
1552                 return 0;
1553
1554         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1555 }
1556 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1557
1558 /**
1559  * rt_mutex_unlock - unlock a rt_mutex
1560  *
1561  * @lock: the rt_mutex to be unlocked
1562  */
1563 void __sched rt_mutex_unlock(struct rt_mutex *lock)
1564 {
1565         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1566 }
1567 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1568
1569 /**
1570  * Futex variant, that since futex variants do not use the fast-path, can be
1571  * simple and will not need to retry.
1572  */
1573 bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
1574                                     struct wake_q_head *wake_q)
1575 {
1576         lockdep_assert_held(&lock->wait_lock);
1577
1578         debug_rt_mutex_unlock(lock);
1579
1580         if (!rt_mutex_has_waiters(lock)) {
1581                 lock->owner = NULL;
1582                 return false; /* done */
1583         }
1584
1585         mark_wakeup_next_waiter(wake_q, lock);
1586         /*
1587          * We've already deboosted, retain preempt_disabled when dropping
1588          * the wait_lock to avoid inversion until the wakeup. Matched
1589          * by rt_mutex_postunlock();
1590          */
1591         preempt_disable();
1592
1593         return true; /* call postunlock() */
1594 }
1595
1596 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
1597 {
1598         DEFINE_WAKE_Q(wake_q);
1599         bool postunlock;
1600
1601         raw_spin_lock_irq(&lock->wait_lock);
1602         postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
1603         raw_spin_unlock_irq(&lock->wait_lock);
1604
1605         if (postunlock)
1606                 rt_mutex_postunlock(&wake_q);
1607 }
1608
1609 /**
1610  * rt_mutex_destroy - mark a mutex unusable
1611  * @lock: the mutex to be destroyed
1612  *
1613  * This function marks the mutex uninitialized, and any subsequent
1614  * use of the mutex is forbidden. The mutex must not be locked when
1615  * this function is called.
1616  */
1617 void rt_mutex_destroy(struct rt_mutex *lock)
1618 {
1619         WARN_ON(rt_mutex_is_locked(lock));
1620 #ifdef CONFIG_DEBUG_RT_MUTEXES
1621         lock->magic = NULL;
1622 #endif
1623 }
1624
1625 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
1626
1627 /**
1628  * __rt_mutex_init - initialize the rt lock
1629  *
1630  * @lock: the rt lock to be initialized
1631  *
1632  * Initialize the rt lock to unlocked state.
1633  *
1634  * Initializing of a locked rt lock is not allowed
1635  */
1636 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1637 {
1638         lock->owner = NULL;
1639         raw_spin_lock_init(&lock->wait_lock);
1640         lock->waiters = RB_ROOT;
1641         lock->waiters_leftmost = NULL;
1642
1643         debug_rt_mutex_init(lock, name);
1644 }
1645 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1646
1647 /**
1648  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1649  *                              proxy owner
1650  *
1651  * @lock:       the rt_mutex to be locked
1652  * @proxy_owner:the task to set as owner
1653  *
1654  * No locking. Caller has to do serializing itself
1655  *
1656  * Special API call for PI-futex support. This initializes the rtmutex and
1657  * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not
1658  * possible at this point because the pi_state which contains the rtmutex
1659  * is not yet visible to other tasks.
1660  */
1661 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1662                                 struct task_struct *proxy_owner)
1663 {
1664         __rt_mutex_init(lock, NULL);
1665         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1666         rt_mutex_set_owner(lock, proxy_owner);
1667 }
1668
1669 /**
1670  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1671  *
1672  * @lock:       the rt_mutex to be locked
1673  *
1674  * No locking. Caller has to do serializing itself
1675  *
1676  * Special API call for PI-futex support. This merrily cleans up the rtmutex
1677  * (debugging) state. Concurrent operations on this rt_mutex are not
1678  * possible because it belongs to the pi_state which is about to be freed
1679  * and it is not longer visible to other tasks.
1680  */
1681 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1682                            struct task_struct *proxy_owner)
1683 {
1684         debug_rt_mutex_proxy_unlock(lock);
1685         rt_mutex_set_owner(lock, NULL);
1686 }
1687
1688 int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1689                               struct rt_mutex_waiter *waiter,
1690                               struct task_struct *task)
1691 {
1692         int ret;
1693
1694         if (try_to_take_rt_mutex(lock, task, NULL))
1695                 return 1;
1696
1697         /* We enforce deadlock detection for futexes */
1698         ret = task_blocks_on_rt_mutex(lock, waiter, task,
1699                                       RT_MUTEX_FULL_CHAINWALK);
1700
1701         if (ret && !rt_mutex_owner(lock)) {
1702                 /*
1703                  * Reset the return value. We might have
1704                  * returned with -EDEADLK and the owner
1705                  * released the lock while we were walking the
1706                  * pi chain.  Let the waiter sort it out.
1707                  */
1708                 ret = 0;
1709         }
1710
1711         if (unlikely(ret))
1712                 remove_waiter(lock, waiter);
1713
1714         debug_rt_mutex_print_deadlock(waiter);
1715
1716         return ret;
1717 }
1718
1719 /**
1720  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1721  * @lock:               the rt_mutex to take
1722  * @waiter:             the pre-initialized rt_mutex_waiter
1723  * @task:               the task to prepare
1724  *
1725  * Returns:
1726  *  0 - task blocked on lock
1727  *  1 - acquired the lock for task, caller should wake it up
1728  * <0 - error
1729  *
1730  * Special API call for FUTEX_REQUEUE_PI support.
1731  */
1732 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1733                               struct rt_mutex_waiter *waiter,
1734                               struct task_struct *task)
1735 {
1736         int ret;
1737
1738         raw_spin_lock_irq(&lock->wait_lock);
1739         ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1740         raw_spin_unlock_irq(&lock->wait_lock);
1741
1742         return ret;
1743 }
1744
1745 /**
1746  * rt_mutex_next_owner - return the next owner of the lock
1747  *
1748  * @lock: the rt lock query
1749  *
1750  * Returns the next owner of the lock or NULL
1751  *
1752  * Caller has to serialize against other accessors to the lock
1753  * itself.
1754  *
1755  * Special API call for PI-futex support
1756  */
1757 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1758 {
1759         if (!rt_mutex_has_waiters(lock))
1760                 return NULL;
1761
1762         return rt_mutex_top_waiter(lock)->task;
1763 }
1764
1765 /**
1766  * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
1767  * @lock:               the rt_mutex we were woken on
1768  * @to:                 the timeout, null if none. hrtimer should already have
1769  *                      been started.
1770  * @waiter:             the pre-initialized rt_mutex_waiter
1771  *
1772  * Wait for the the lock acquisition started on our behalf by
1773  * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
1774  * rt_mutex_cleanup_proxy_lock().
1775  *
1776  * Returns:
1777  *  0 - success
1778  * <0 - error, one of -EINTR, -ETIMEDOUT
1779  *
1780  * Special API call for PI-futex support
1781  */
1782 int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
1783                                struct hrtimer_sleeper *to,
1784                                struct rt_mutex_waiter *waiter)
1785 {
1786         int ret;
1787
1788         raw_spin_lock_irq(&lock->wait_lock);
1789
1790         set_current_state(TASK_INTERRUPTIBLE);
1791
1792         /* sleep on the mutex */
1793         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1794
1795         raw_spin_unlock_irq(&lock->wait_lock);
1796
1797         return ret;
1798 }
1799
1800 /**
1801  * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
1802  * @lock:               the rt_mutex we were woken on
1803  * @waiter:             the pre-initialized rt_mutex_waiter
1804  *
1805  * Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
1806  *
1807  * Unless we acquired the lock; we're still enqueued on the wait-list and can
1808  * in fact still be granted ownership until we're removed. Therefore we can
1809  * find we are in fact the owner and must disregard the
1810  * rt_mutex_wait_proxy_lock() failure.
1811  *
1812  * Returns:
1813  *  true  - did the cleanup, we done.
1814  *  false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
1815  *          caller should disregards its return value.
1816  *
1817  * Special API call for PI-futex support
1818  */
1819 bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
1820                                  struct rt_mutex_waiter *waiter)
1821 {
1822         bool cleanup = false;
1823
1824         raw_spin_lock_irq(&lock->wait_lock);
1825         /*
1826          * Unless we're the owner; we're still enqueued on the wait_list.
1827          * So check if we became owner, if not, take us off the wait_list.
1828          */
1829         if (rt_mutex_owner(lock) != current) {
1830                 remove_waiter(lock, waiter);
1831                 fixup_rt_mutex_waiters(lock);
1832                 cleanup = true;
1833         }
1834
1835         /*
1836          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1837          * have to fix that up.
1838          */
1839         fixup_rt_mutex_waiters(lock);
1840
1841         raw_spin_unlock_irq(&lock->wait_lock);
1842
1843         return cleanup;
1844 }