]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
mm/hugetlb: make userfaultfd_huge_must_wait() safe to pmd unshare
authorPeter Xu <peterx@redhat.com>
Fri, 16 Dec 2022 15:52:17 +0000 (10:52 -0500)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 19 Jan 2023 01:12:38 +0000 (17:12 -0800)
We can take the hugetlb walker lock, here taking vma lock directly.

Link: https://lkml.kernel.org/r/20221216155217.2043700-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/userfaultfd.c

index cc694846617a52bc874b86a85377b7a97811cc49..3b1797e0448a5d1b85f7608343a6ba8b0f8c4eae 100644 (file)
@@ -391,7 +391,8 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
  */
 vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 {
-       struct mm_struct *mm = vmf->vma->vm_mm;
+       struct vm_area_struct *vma = vmf->vma;
+       struct mm_struct *mm = vma->vm_mm;
        struct userfaultfd_ctx *ctx;
        struct userfaultfd_wait_queue uwq;
        vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -418,7 +419,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
         */
        mmap_assert_locked(mm);
 
-       ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
+       ctx = vma->vm_userfaultfd_ctx.ctx;
        if (!ctx)
                goto out;
 
@@ -508,6 +509,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 
        blocking_state = userfaultfd_get_blocking_state(vmf->flags);
 
+        /*
+         * Take the vma lock now, in order to safely call
+         * userfaultfd_huge_must_wait() later. Since acquiring the
+         * (sleepable) vma lock can modify the current task state, that
+         * must be before explicitly calling set_current_state().
+         */
+       if (is_vm_hugetlb_page(vma))
+               hugetlb_vma_lock_read(vma);
+
        spin_lock_irq(&ctx->fault_pending_wqh.lock);
        /*
         * After the __add_wait_queue the uwq is visible to userland
@@ -522,13 +532,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
        set_current_state(blocking_state);
        spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
-       if (!is_vm_hugetlb_page(vmf->vma))
+       if (!is_vm_hugetlb_page(vma))
                must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
                                                  reason);
        else
-               must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+               must_wait = userfaultfd_huge_must_wait(ctx, vma,
                                                       vmf->address,
                                                       vmf->flags, reason);
+       if (is_vm_hugetlb_page(vma))
+               hugetlb_vma_unlock_read(vma);
        mmap_read_unlock(mm);
 
        if (likely(must_wait && !READ_ONCE(ctx->released))) {