]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
arm64: mm: don't acquire mutex when rewriting swapper
authorMark Rutland <mark.rutland@arm.com>
Tue, 20 Sep 2022 13:47:31 +0000 (14:47 +0100)
committerWill Deacon <will@kernel.org>
Thu, 22 Sep 2022 11:55:39 +0000 (12:55 +0100)
Since commit:

  47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)"

... when building with CONFIG_DEBUG_ATOMIC_SLEEP=y and booting under
QEMU TCG with '-cpu max', there's a boot-time splat:

| BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 15, name: migration/0
| preempt_count: 1, expected: 0
| RCU nest depth: 0, expected: 0
| no locks held by migration/0/15.
| irq event stamp: 28
| hardirqs last  enabled at (27): [<ffff8000091ed180>] _raw_spin_unlock_irq+0x3c/0x7c
| hardirqs last disabled at (28): [<ffff8000081b8d74>] multi_cpu_stop+0x150/0x18c
| softirqs last  enabled at (0): [<ffff80000809a314>] copy_process+0x594/0x1964
| softirqs last disabled at (0): [<0000000000000000>] 0x0
| CPU: 0 PID: 15 Comm: migration/0 Not tainted 6.0.0-rc3-00002-g419b42ff7eef #3
| Hardware name: linux,dummy-virt (DT)
| Stopper: multi_cpu_stop+0x0/0x18c <- stop_cpus.constprop.0+0xa0/0xfc
| Call trace:
|  dump_backtrace.part.0+0xd0/0xe0
|  show_stack+0x1c/0x5c
|  dump_stack_lvl+0x88/0xb4
|  dump_stack+0x1c/0x38
|  __might_resched+0x180/0x230
|  __might_sleep+0x4c/0xa0
|  __mutex_lock+0x5c/0x450
|  mutex_lock_nested+0x30/0x40
|  create_kpti_ng_temp_pgd+0x4fc/0x6d0
|  kpti_install_ng_mappings+0x2b8/0x3b0
|  cpu_enable_non_boot_scope_capabilities+0x7c/0xd0
|  multi_cpu_stop+0xa0/0x18c
|  cpu_stopper_thread+0x88/0x11c
|  smpboot_thread_fn+0x1ec/0x290
|  kthread+0x118/0x120
|  ret_from_fork+0x10/0x20

Since commit:

  ee017ee353506fce ("arm64/mm: avoid fixmap race condition when create pud mapping")

... once the kernel leave the SYSTEM_BOOTING state, the fixmap pagetable
entries are protected by the fixmap_lock mutex.

The new KPTI rewrite code uses __create_pgd_mapping() to create a
temporary pagetable. This happens in atomic context, after secondary
CPUs are brought up and the kernel has left the SYSTEM_BOOTING state.
Hence we try to acquire a mutex in atomic context, which is generally
unsound (though benign in this case as the mutex should be free and all
other CPUs are quiescent).

This patch avoids the issue by pulling the mutex out of alloc_init_pud()
and calling it at a higher level in the pagetable manipulation code.
This allows it to be used without locking where one CPU is known to be
in exclusive control of the machine, even after having left the
SYSTEM_BOOTING state.

Fixes: 47546a1912fc ("arm64: mm: install KPTI nG mappings with MMU enabled")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220920134731.1625740-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
arch/arm64/mm/mmu.c

index e7ad44585f40abab6293dc4ffbfd08775f1e53ac..eb489302c28a41db00ca0e42d8ed1082753942f2 100644 (file)
@@ -331,12 +331,6 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
        }
        BUG_ON(p4d_bad(p4d));
 
-       /*
-        * No need for locking during early boot. And it doesn't work as
-        * expected with KASLR enabled.
-        */
-       if (system_state != SYSTEM_BOOTING)
-               mutex_lock(&fixmap_lock);
        pudp = pud_set_fixmap_offset(p4dp, addr);
        do {
                pud_t old_pud = READ_ONCE(*pudp);
@@ -368,15 +362,13 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
        } while (pudp++, addr = next, addr != end);
 
        pud_clear_fixmap();
-       if (system_state != SYSTEM_BOOTING)
-               mutex_unlock(&fixmap_lock);
 }
 
-static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
-                                unsigned long virt, phys_addr_t size,
-                                pgprot_t prot,
-                                phys_addr_t (*pgtable_alloc)(int),
-                                int flags)
+static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
+                                       unsigned long virt, phys_addr_t size,
+                                       pgprot_t prot,
+                                       phys_addr_t (*pgtable_alloc)(int),
+                                       int flags)
 {
        unsigned long addr, end, next;
        pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
@@ -400,8 +392,20 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
        } while (pgdp++, addr = next, addr != end);
 }
 
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+                                unsigned long virt, phys_addr_t size,
+                                pgprot_t prot,
+                                phys_addr_t (*pgtable_alloc)(int),
+                                int flags)
+{
+       mutex_lock(&fixmap_lock);
+       __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
+                                   pgtable_alloc, flags);
+       mutex_unlock(&fixmap_lock);
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-extern __alias(__create_pgd_mapping)
+extern __alias(__create_pgd_mapping_locked)
 void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
                             phys_addr_t size, pgprot_t prot,
                             phys_addr_t (*pgtable_alloc)(int), int flags);