]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - kernel/futex.c
locking/spinlock/debug: Fix various data races
[mirror_ubuntu-bionic-kernel.git] / kernel / futex.c
index 959af4463c408121f00a4c05a01bfb2dc6a02370..f984a818fcb595536cf9f965650afd2721216b53 100644 (file)
@@ -1176,17 +1176,47 @@ out_error:
        return ret;
 }
 
+/**
+ * wait_for_owner_exiting - Block until the owner has exited
+ * @exiting:   Pointer to the exiting task
+ *
+ * Caller must hold a refcount on @exiting.
+ */
+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+{
+       if (ret != -EBUSY) {
+               WARN_ON_ONCE(exiting);
+               return;
+       }
+
+       if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+               return;
+
+       mutex_lock(&exiting->futex_exit_mutex);
+       /*
+        * No point in doing state checking here. If the waiter got here
+        * while the task was in exec()->exec_futex_release() then it can
+        * have any FUTEX_STATE_* value when the waiter has acquired the
+        * mutex. OK, if running, EXITING or DEAD if it reached exit()
+        * already. Highly unlikely and not a problem. Just one more round
+        * through the futex maze.
+        */
+       mutex_unlock(&exiting->futex_exit_mutex);
+
+       put_task_struct(exiting);
+}
+
 static int handle_exit_race(u32 __user *uaddr, u32 uval,
                            struct task_struct *tsk)
 {
        u32 uval2;
 
        /*
-        * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-        * for it to finish.
+        * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+        * caller that the alleged owner is busy.
         */
        if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-               return -EAGAIN;
+               return -EBUSY;
 
        /*
         * Reread the user space value to handle the following situation:
@@ -1237,7 +1267,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
  * it after doing proper sanity checks.
  */
 static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-                             struct futex_pi_state **ps)
+                             struct futex_pi_state **ps,
+                             struct task_struct **exiting)
 {
        pid_t pid = uval & FUTEX_TID_MASK;
        struct futex_pi_state *pi_state;
@@ -1276,7 +1307,19 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
                int ret = handle_exit_race(uaddr, uval, p);
 
                raw_spin_unlock_irq(&p->pi_lock);
-               put_task_struct(p);
+               /*
+                * If the owner task is between FUTEX_STATE_EXITING and
+                * FUTEX_STATE_DEAD then store the task pointer and keep
+                * the reference on the task struct. The calling code will
+                * drop all locks, wait for the task to reach
+                * FUTEX_STATE_DEAD and then drop the refcount. This is
+                * required to prevent a live lock when the current task
+                * preempted the exiting task between the two states.
+                */
+               if (ret == -EBUSY)
+                       *exiting = p;
+               else
+                       put_task_struct(p);
                return ret;
        }
 
@@ -1315,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
 
 static int lookup_pi_state(u32 __user *uaddr, u32 uval,
                           struct futex_hash_bucket *hb,
-                          union futex_key *key, struct futex_pi_state **ps)
+                          union futex_key *key, struct futex_pi_state **ps,
+                          struct task_struct **exiting)
 {
        struct futex_q *top_waiter = futex_top_waiter(hb, key);
 
@@ -1330,7 +1374,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
         * We are the first waiter - try to look up the owner based on
         * @uval and attach to it.
         */
-       return attach_to_pi_owner(uaddr, uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
 }
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1358,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
  *                     lookup
  * @task:              the task to perform the atomic lock work for.  This will
  *                     be "current" except in the case of requeue pi.
+ * @exiting:           Pointer to store the task pointer of the owner task
+ *                     which is in the middle of exiting
  * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Return:
@@ -1366,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
  *  - <0 - error
  *
  * The hb->lock and futex_key refs shall be held by the caller.
+ *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
  */
 static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
                                union futex_key *key,
                                struct futex_pi_state **ps,
-                               struct task_struct *task, int set_waiters)
+                               struct task_struct *task,
+                               struct task_struct **exiting,
+                               int set_waiters)
 {
        u32 uval, newval, vpid = task_pid_vnr(task);
        struct futex_q *top_waiter;
@@ -1440,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
         * attach to the owner. If that fails, no harm done, we only
         * set the FUTEX_WAITERS bit in the user space variable.
         */
-       return attach_to_pi_owner(uaddr, newval, key, ps);
+       return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
 }
 
 /**
@@ -1859,6 +1911,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
  * @key1:              the from futex key
  * @key2:              the to futex key
  * @ps:                        address to store the pi_state pointer
+ * @exiting:           Pointer to store the task pointer of the owner task
+ *                     which is in the middle of exiting
  * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1866,16 +1920,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
  * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
  * hb1 and hb2 must be held by the caller.
  *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
+ *
  * Return:
  *  -  0 - failed to acquire the lock atomically;
  *  - >0 - acquired the lock, return value is vpid of the top_waiter
  *  - <0 - error
  */
-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
-                                struct futex_hash_bucket *hb1,
-                                struct futex_hash_bucket *hb2,
-                                union futex_key *key1, union futex_key *key2,
-                                struct futex_pi_state **ps, int set_waiters)
+static int
+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+                          struct futex_hash_bucket *hb2, union futex_key *key1,
+                          union futex_key *key2, struct futex_pi_state **ps,
+                          struct task_struct **exiting, int set_waiters)
 {
        struct futex_q *top_waiter = NULL;
        u32 curval;
@@ -1912,7 +1970,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
         */
        vpid = task_pid_vnr(top_waiter->task);
        ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-                                  set_waiters);
+                                  exiting, set_waiters);
        if (ret == 1) {
                requeue_pi_wake_futex(top_waiter, key2, hb2);
                return vpid;
@@ -2041,6 +2099,8 @@ retry_private:
        }
 
        if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+               struct task_struct *exiting = NULL;
+
                /*
                 * Attempt to acquire uaddr2 and wake the top waiter. If we
                 * intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -2048,7 +2108,8 @@ retry_private:
                 * faults rather in the requeue loop below.
                 */
                ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-                                                &key2, &pi_state, nr_requeue);
+                                                &key2, &pi_state,
+                                                &exiting, nr_requeue);
 
                /*
                 * At this point the top_waiter has either taken uaddr2 or is
@@ -2075,7 +2136,8 @@ retry_private:
                         * If that call succeeds then we have pi_state and an
                         * initial refcount on it.
                         */
-                       ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
+                       ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
+                                             &pi_state, &exiting);
                }
 
                switch (ret) {
@@ -2093,17 +2155,24 @@ retry_private:
                        if (!ret)
                                goto retry;
                        goto out;
+               case -EBUSY:
                case -EAGAIN:
                        /*
                         * Two reasons for this:
-                        * - Owner is exiting and we just wait for the
+                        * - EBUSY: Owner is exiting and we just wait for the
                         *   exit to complete.
-                        * - The user space value changed.
+                        * - EAGAIN: The user space value changed.
                         */
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
+                       /*
+                        * Handle the case where the owner is in the middle of
+                        * exiting. Wait for the exit to complete otherwise
+                        * this task might loop forever, aka. live lock.
+                        */
+                       wait_for_owner_exiting(ret, exiting);
                        cond_resched();
                        goto retry;
                default:
@@ -2819,6 +2888,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct futex_pi_state *pi_state = NULL;
+       struct task_struct *exiting = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
        struct futex_q q = futex_q_init;
@@ -2846,7 +2916,8 @@ retry:
 retry_private:
        hb = queue_lock(&q);
 
-       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+                                  &exiting, 0);
        if (unlikely(ret)) {
                /*
                 * Atomic work succeeded and we got the lock,
@@ -2859,15 +2930,22 @@ retry_private:
                        goto out_unlock_put_key;
                case -EFAULT:
                        goto uaddr_faulted;
+               case -EBUSY:
                case -EAGAIN:
                        /*
                         * Two reasons for this:
-                        * - Task is exiting and we just wait for the
+                        * - EBUSY: Task is exiting and we just wait for the
                         *   exit to complete.
-                        * - The user space value changed.
+                        * - EAGAIN: The user space value changed.
                         */
                        queue_unlock(hb);
                        put_futex_key(&q.key);
+                       /*
+                        * Handle the case where the owner is in the middle of
+                        * exiting. Wait for the exit to complete otherwise
+                        * this task might loop forever, aka. live lock.
+                        */
+                       wait_for_owner_exiting(ret, exiting);
                        cond_resched();
                        goto retry;
                default: