]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
MIPS: Close races in TLB modify handlers.
authorDavid Daney <david.daney@cavium.com>
Tue, 5 Jul 2011 23:34:46 +0000 (16:34 -0700)
committerRalf Baechle <ralf@linux-mips.org>
Tue, 26 Jul 2011 05:47:47 +0000 (06:47 +0100)
Page table entries are made invalid by writing a zero into the the PTE
slot in a page table.  This creates a race condition with the TLB
modify handlers when they are updating the PTE.

CPU0                              CPU1

Test for _PAGE_PRESENT
.                                 set to not _PAGE_PRESENT (zero)
Set to _PAGE_VALID

So now the page not present value (zero) is suddenly valid and user
space programs have access to physical page zero.

We close the race by putting the test for _PAGE_PRESENT and setting of
_PAGE_VALID into an atomic LL/SC section.  This requires more registers
than just K0 and K1 in the handlers, so we need to save some registers
to a save area and then restore them when we are done.

The save area is an array of cacheline aligned structures that should
not suffer cache line bouncing as they are CPU private.

[ralf@linux-mips.org: Fix !defined(CONFIG_MIPS_PGD_C0_CONTEXT) build error.]

Signed-off-by: David Daney <david.daney@cavium.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2577/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
arch/mips/mm/tlbex.c

index 424ed4b92e6d9435107a96f260329b3a40ac2992..b6e1cff5066704de206ed92febcb6e8bb7da49b3 100644 (file)
 extern void tlb_do_page_fault_0(void);
 extern void tlb_do_page_fault_1(void);
 
+struct work_registers {
+       int r1;
+       int r2;
+       int r3;
+};
+
+struct tlb_reg_save {
+       unsigned long a;
+       unsigned long b;
+} ____cacheline_aligned_in_smp;
+
+static struct tlb_reg_save handler_reg_save[NR_CPUS];
 
 static inline int r45k_bvahwbug(void)
 {
@@ -248,6 +260,73 @@ static int scratch_reg __cpuinitdata;
 static int pgd_reg __cpuinitdata;
 enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};
 
+static struct work_registers __cpuinit build_get_work_registers(u32 **p)
+{
+       struct work_registers r;
+
+       int smp_processor_id_reg;
+       int smp_processor_id_sel;
+       int smp_processor_id_shift;
+
+       if (scratch_reg > 0) {
+               /* Save in CPU local C0_KScratch? */
+               UASM_i_MTC0(p, 1, 31, scratch_reg);
+               r.r1 = K0;
+               r.r2 = K1;
+               r.r3 = 1;
+               return r;
+       }
+
+       if (num_possible_cpus() > 1) {
+#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
+               smp_processor_id_shift = 51;
+               smp_processor_id_reg = 20; /* XContext */
+               smp_processor_id_sel = 0;
+#else
+# ifdef CONFIG_32BIT
+               smp_processor_id_shift = 25;
+               smp_processor_id_reg = 4; /* Context */
+               smp_processor_id_sel = 0;
+# endif
+# ifdef CONFIG_64BIT
+               smp_processor_id_shift = 26;
+               smp_processor_id_reg = 4; /* Context */
+               smp_processor_id_sel = 0;
+# endif
+#endif
+               /* Get smp_processor_id */
+               UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel);
+               UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift);
+
+               /* handler_reg_save index in K0 */
+               UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save)));
+
+               UASM_i_LA(p, K1, (long)&handler_reg_save);
+               UASM_i_ADDU(p, K0, K0, K1);
+       } else {
+               UASM_i_LA(p, K0, (long)&handler_reg_save);
+       }
+       /* K0 now points to save area, save $1 and $2  */
+       UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0);
+       UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+
+       r.r1 = K1;
+       r.r2 = 1;
+       r.r3 = 2;
+       return r;
+}
+
+static void __cpuinit build_restore_work_registers(u32 **p)
+{
+       if (scratch_reg > 0) {
+               UASM_i_MFC0(p, 1, 31, scratch_reg);
+               return;
+       }
+       /* K0 already points to save area, restore $1 and $2  */
+       UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0);
+       UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+}
+
 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
 
 /*
@@ -1160,9 +1239,6 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
        memset(relocs, 0, sizeof(relocs));
        memset(final_handler, 0, sizeof(final_handler));
 
-       if (scratch_reg == 0)
-               scratch_reg = allocate_kscratch();
-
        if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) {
                htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
                                                          scratch_reg);
@@ -1462,22 +1538,28 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
  */
 static void __cpuinit
 build_pte_present(u32 **p, struct uasm_reloc **r,
-                 unsigned int pte, unsigned int ptr, enum label_id lid)
+                 int pte, int ptr, int scratch, enum label_id lid)
 {
+       int t = scratch >= 0 ? scratch : pte;
+
        if (kernel_uses_smartmips_rixi) {
                if (use_bbit_insns()) {
                        uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
                        uasm_i_nop(p);
                } else {
-                       uasm_i_andi(p, pte, pte, _PAGE_PRESENT);
-                       uasm_il_beqz(p, r, pte, lid);
-                       iPTE_LW(p, pte, ptr);
+                       uasm_i_andi(p, t, pte, _PAGE_PRESENT);
+                       uasm_il_beqz(p, r, t, lid);
+                       if (pte == t)
+                               /* You lose the SMP race :-(*/
+                               iPTE_LW(p, pte, ptr);
                }
        } else {
-               uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
-               uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
-               uasm_il_bnez(p, r, pte, lid);
-               iPTE_LW(p, pte, ptr);
+               uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ);
+               uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ);
+               uasm_il_bnez(p, r, t, lid);
+               if (pte == t)
+                       /* You lose the SMP race :-(*/
+                       iPTE_LW(p, pte, ptr);
        }
 }
 
@@ -1497,19 +1579,19 @@ build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,
  */
 static void __cpuinit
 build_pte_writable(u32 **p, struct uasm_reloc **r,
-                  unsigned int pte, unsigned int ptr, enum label_id lid)
+                  unsigned int pte, unsigned int ptr, int scratch,
+                  enum label_id lid)
 {
-       if (use_bbit_insns()) {
-               uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
-               uasm_i_nop(p);
-               uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
-               uasm_i_nop(p);
-       } else {
-               uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
-               uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
-               uasm_il_bnez(p, r, pte, lid);
+       int t = scratch >= 0 ? scratch : pte;
+
+       uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE);
+       uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE);
+       uasm_il_bnez(p, r, t, lid);
+       if (pte == t)
+               /* You lose the SMP race :-(*/
                iPTE_LW(p, pte, ptr);
-       }
+       else
+               uasm_i_nop(p);
 }
 
 /* Make PTE writable, update software status bits as well, then store
@@ -1531,15 +1613,19 @@ build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,
  */
 static void __cpuinit
 build_pte_modifiable(u32 **p, struct uasm_reloc **r,
-                    unsigned int pte, unsigned int ptr, enum label_id lid)
+                    unsigned int pte, unsigned int ptr, int scratch,
+                    enum label_id lid)
 {
        if (use_bbit_insns()) {
                uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
                uasm_i_nop(p);
        } else {
-               uasm_i_andi(p, pte, pte, _PAGE_WRITE);
-               uasm_il_beqz(p, r, pte, lid);
-               iPTE_LW(p, pte, ptr);
+               int t = scratch >= 0 ? scratch : pte;
+               uasm_i_andi(p, t, pte, _PAGE_WRITE);
+               uasm_il_beqz(p, r, t, lid);
+               if (pte == t)
+                       /* You lose the SMP race :-(*/
+                       iPTE_LW(p, pte, ptr);
        }
 }
 
@@ -1619,7 +1705,7 @@ static void __cpuinit build_r3000_tlb_load_handler(void)
        memset(relocs, 0, sizeof(relocs));
 
        build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_present(&p, &r, K0, K1, label_nopage_tlbl);
+       build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
        uasm_i_nop(&p); /* load delay */
        build_make_valid(&p, &r, K0, K1);
        build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
@@ -1649,7 +1735,7 @@ static void __cpuinit build_r3000_tlb_store_handler(void)
        memset(relocs, 0, sizeof(relocs));
 
        build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs);
+       build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
        uasm_i_nop(&p); /* load delay */
        build_make_write(&p, &r, K0, K1);
        build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
@@ -1673,13 +1759,14 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
        u32 *p = handle_tlbm;
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
+       struct work_registers wr;
 
        memset(handle_tlbm, 0, sizeof(handle_tlbm));
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
        build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm);
+       build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);
        uasm_i_nop(&p); /* load delay */
        build_make_write(&p, &r, K0, K1);
        build_r3000_pte_reload_tlbwi(&p, K0, K1);
@@ -1702,15 +1789,16 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
 /*
  * R4000 style TLB load/store/modify handlers.
  */
-static void __cpuinit
+static struct work_registers __cpuinit
 build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
-                                  struct uasm_reloc **r, unsigned int pte,
-                                  unsigned int ptr)
+                                  struct uasm_reloc **r)
 {
+       struct work_registers wr = build_get_work_registers(p);
+
 #ifdef CONFIG_64BIT
-       build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */
+       build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */
 #else
-       build_get_pgde32(p, pte, ptr); /* get pgd in ptr */
+       build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */
 #endif
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -1719,21 +1807,22 @@ build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
         * instead contains the tlb pte. Check the PAGE_HUGE bit and
         * see if we need to jump to huge tlb processing.
         */
-       build_is_huge_pte(p, r, pte, ptr, label_tlb_huge_update);
+       build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update);
 #endif
 
-       UASM_i_MFC0(p, pte, C0_BADVADDR);
-       UASM_i_LW(p, ptr, 0, ptr);
-       UASM_i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2);
-       uasm_i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
-       UASM_i_ADDU(p, ptr, ptr, pte);
+       UASM_i_MFC0(p, wr.r1, C0_BADVADDR);
+       UASM_i_LW(p, wr.r2, 0, wr.r2);
+       UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2);
+       uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
+       UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1);
 
 #ifdef CONFIG_SMP
        uasm_l_smp_pgtable_change(l, *p);
 #endif
-       iPTE_LW(p, pte, ptr); /* get even pte */
+       iPTE_LW(p, wr.r1, wr.r2); /* get even pte */
        if (!m4kc_tlbp_war())
                build_tlb_probe_entry(p);
+       return wr;
 }
 
 static void __cpuinit
@@ -1746,6 +1835,7 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,
        build_update_entries(p, tmp, ptr);
        build_tlb_write_entry(p, l, r, tlb_indexed);
        uasm_l_leave(l, *p);
+       build_restore_work_registers(p);
        uasm_i_eret(p); /* return from trap */
 
 #ifdef CONFIG_64BIT
@@ -1758,6 +1848,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
        u32 *p = handle_tlbl;
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
+       struct work_registers wr;
 
        memset(handle_tlbl, 0, sizeof(handle_tlbl));
        memset(labels, 0, sizeof(labels));
@@ -1777,8 +1868,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
                /* No need for uasm_i_nop */
        }
 
-       build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
-       build_pte_present(&p, &r, K0, K1, label_nopage_tlbl);
+       wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+       build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
        if (m4kc_tlbp_war())
                build_tlb_probe_entry(&p);
 
@@ -1788,44 +1879,43 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
                 * have triggered it.  Skip the expensive test..
                 */
                if (use_bbit_insns()) {
-                       uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID),
+                       uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
                                      label_tlbl_goaround1);
                } else {
-                       uasm_i_andi(&p, K0, K0, _PAGE_VALID);
-                       uasm_il_beqz(&p, &r, K0, label_tlbl_goaround1);
+                       uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
+                       uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1);
                }
                uasm_i_nop(&p);
 
                uasm_i_tlbr(&p);
                /* Examine  entrylo 0 or 1 based on ptr. */
                if (use_bbit_insns()) {
-                       uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8);
+                       uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
                } else {
-                       uasm_i_andi(&p, K0, K1, sizeof(pte_t));
-                       uasm_i_beqz(&p, K0, 8);
+                       uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
+                       uasm_i_beqz(&p, wr.r3, 8);
                }
-
-               UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/
-               UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */
+               /* load it in the delay slot*/
+               UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
+               /* load it if ptr is odd */
+               UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
                /*
-                * If the entryLo (now in K0) is valid (bit 1), RI or
+                * If the entryLo (now in wr.r3) is valid (bit 1), RI or
                 * XI must have triggered it.
                 */
                if (use_bbit_insns()) {
-                       uasm_il_bbit1(&p, &r, K0, 1, label_nopage_tlbl);
-                       /* Reload the PTE value */
-                       iPTE_LW(&p, K0, K1);
+                       uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl);
+                       uasm_i_nop(&p);
                        uasm_l_tlbl_goaround1(&l, p);
                } else {
-                       uasm_i_andi(&p, K0, K0, 2);
-                       uasm_il_bnez(&p, &r, K0, label_nopage_tlbl);
-                       uasm_l_tlbl_goaround1(&l, p);
-                       /* Reload the PTE value */
-                       iPTE_LW(&p, K0, K1);
+                       uasm_i_andi(&p, wr.r3, wr.r3, 2);
+                       uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl);
+                       uasm_i_nop(&p);
                }
+               uasm_l_tlbl_goaround1(&l, p);
        }
-       build_make_valid(&p, &r, K0, K1);
-       build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+       build_make_valid(&p, &r, wr.r1, wr.r2);
+       build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_HUGETLB_PAGE
        /*
@@ -1833,8 +1923,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
         * spots a huge page.
         */
        uasm_l_tlb_huge_update(&l, p);
-       iPTE_LW(&p, K0, K1);
-       build_pte_present(&p, &r, K0, K1, label_nopage_tlbl);
+       iPTE_LW(&p, wr.r1, wr.r2);
+       build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
        build_tlb_probe_entry(&p);
 
        if (kernel_uses_smartmips_rixi) {
@@ -1843,50 +1933,51 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
                 * have triggered it.  Skip the expensive test..
                 */
                if (use_bbit_insns()) {
-                       uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID),
+                       uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
                                      label_tlbl_goaround2);
                } else {
-                       uasm_i_andi(&p, K0, K0, _PAGE_VALID);
-                       uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+                       uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
+                       uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
                }
                uasm_i_nop(&p);
 
                uasm_i_tlbr(&p);
                /* Examine  entrylo 0 or 1 based on ptr. */
                if (use_bbit_insns()) {
-                       uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8);
+                       uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
                } else {
-                       uasm_i_andi(&p, K0, K1, sizeof(pte_t));
-                       uasm_i_beqz(&p, K0, 8);
+                       uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
+                       uasm_i_beqz(&p, wr.r3, 8);
                }
-               UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/
-               UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */
+               /* load it in the delay slot*/
+               UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
+               /* load it if ptr is odd */
+               UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
                /*
-                * If the entryLo (now in K0) is valid (bit 1), RI or
+                * If the entryLo (now in wr.r3) is valid (bit 1), RI or
                 * XI must have triggered it.
                 */
                if (use_bbit_insns()) {
-                       uasm_il_bbit0(&p, &r, K0, 1, label_tlbl_goaround2);
+                       uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2);
                } else {
-                       uasm_i_andi(&p, K0, K0, 2);
-                       uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+                       uasm_i_andi(&p, wr.r3, wr.r3, 2);
+                       uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
                }
-               /* Reload the PTE value */
-               iPTE_LW(&p, K0, K1);
 
                /*
                 * We clobbered C0_PAGEMASK, restore it.  On the other branch
                 * it is restored in build_huge_tlb_write_entry.
                 */
-               build_restore_pagemask(&p, &r, K0, label_nopage_tlbl, 0);
+               build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0);
 
                uasm_l_tlbl_goaround2(&l, p);
        }
-       uasm_i_ori(&p, K0, K0, (_PAGE_ACCESSED | _PAGE_VALID));
-       build_huge_handler_tail(&p, &r, &l, K0, K1);
+       uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
 #endif
 
        uasm_l_nopage_tlbl(&l, p);
+       build_restore_work_registers(&p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
        uasm_i_nop(&p);
 
@@ -1905,17 +1996,18 @@ static void __cpuinit build_r4000_tlb_store_handler(void)
        u32 *p = handle_tlbs;
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
+       struct work_registers wr;
 
        memset(handle_tlbs, 0, sizeof(handle_tlbs));
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
-       build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs);
+       wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+       build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
        if (m4kc_tlbp_war())
                build_tlb_probe_entry(&p);
-       build_make_write(&p, &r, K0, K1);
-       build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+       build_make_write(&p, &r, wr.r1, wr.r2);
+       build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_HUGETLB_PAGE
        /*
@@ -1923,15 +2015,16 @@ static void __cpuinit build_r4000_tlb_store_handler(void)
         * build_r4000_tlbchange_handler_head spots a huge page.
         */
        uasm_l_tlb_huge_update(&l, p);
-       iPTE_LW(&p, K0, K1);
-       build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs);
+       iPTE_LW(&p, wr.r1, wr.r2);
+       build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
        build_tlb_probe_entry(&p);
-       uasm_i_ori(&p, K0, K0,
+       uasm_i_ori(&p, wr.r1, wr.r1,
                   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-       build_huge_handler_tail(&p, &r, &l, K0, K1);
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
 #endif
 
        uasm_l_nopage_tlbs(&l, p);
+       build_restore_work_registers(&p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
        uasm_i_nop(&p);
 
@@ -1950,18 +2043,19 @@ static void __cpuinit build_r4000_tlb_modify_handler(void)
        u32 *p = handle_tlbm;
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
+       struct work_registers wr;
 
        memset(handle_tlbm, 0, sizeof(handle_tlbm));
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
-       build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm);
+       wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+       build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);
        if (m4kc_tlbp_war())
                build_tlb_probe_entry(&p);
        /* Present and writable bits set, set accessed and dirty bits. */
-       build_make_write(&p, &r, K0, K1);
-       build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+       build_make_write(&p, &r, wr.r1, wr.r2);
+       build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_HUGETLB_PAGE
        /*
@@ -1969,15 +2063,16 @@ static void __cpuinit build_r4000_tlb_modify_handler(void)
         * build_r4000_tlbchange_handler_head spots a huge page.
         */
        uasm_l_tlb_huge_update(&l, p);
-       iPTE_LW(&p, K0, K1);
-       build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm);
+       iPTE_LW(&p, wr.r1, wr.r2);
+       build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);
        build_tlb_probe_entry(&p);
-       uasm_i_ori(&p, K0, K0,
+       uasm_i_ori(&p, wr.r1, wr.r1,
                   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-       build_huge_handler_tail(&p, &r, &l, K0, K1);
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
 #endif
 
        uasm_l_nopage_tlbm(&l, p);
+       build_restore_work_registers(&p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
        uasm_i_nop(&p);
 
@@ -2036,6 +2131,7 @@ void __cpuinit build_tlb_refill_handler(void)
 
        default:
                if (!run_once) {
+                       scratch_reg = allocate_kscratch();
 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT
                        build_r4000_setup_pgd();
 #endif