arch/sh/mm/fault.c

   1 /*
   2  * Page fault handler for SH with an MMU.
   3  *
   4  *  Copyright (C) 1999  Niibe Yutaka
   5  *  Copyright (C) 2003 - 2012  Paul Mundt
   6  *
   7  *  Based on linux/arch/i386/mm/fault.c:
   8  *   Copyright (C) 1995  Linus Torvalds
   9  *
  10  * This file is subject to the terms and conditions of the GNU General Public
  11  * License.  See the file "COPYING" in the main directory of this archive
  12  * for more details.
  13  */
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/hardirq.h>
  18 #include <linux/kprobes.h>
  19 #include <linux/perf_event.h>
  20 #include <linux/kdebug.h>
  21 #include <linux/uaccess.h>
  22 #include <asm/io_trapped.h>
  23 #include <asm/mmu_context.h>
  24 #include <asm/tlbflush.h>
  25 #include <asm/traps.h>
  26
  27 static inline int notify_page_fault(struct pt_regs *regs, int trap)
  28 {
  29         int ret = 0;
  30
  31         if (kprobes_built_in() && !user_mode(regs)) {
  32                 preempt_disable();
  33                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
  34                         ret = 1;
  35                 preempt_enable();
  36         }
  37
  38         return ret;
  39 }
  40
  41 static void
  42 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
  43                      struct task_struct *tsk)
  44 {
  45         siginfo_t info;
  46
  47         clear_siginfo(&info);
  48         info.si_signo   = si_signo;
  49         info.si_errno   = 0;
  50         info.si_code    = si_code;
  51         info.si_addr    = (void __user *)address;
  52
  53         force_sig_info(si_signo, &info, tsk);
  54 }
  55
  56 /*
  57  * This is useful to dump out the page tables associated with
  58  * 'addr' in mm 'mm'.
  59  */
  60 static void show_pte(struct mm_struct *mm, unsigned long addr)
  61 {
  62         pgd_t *pgd;
  63
  64         if (mm) {
  65                 pgd = mm->pgd;
  66         } else {
  67                 pgd = get_TTB();
  68
  69                 if (unlikely(!pgd))
  70                         pgd = swapper_pg_dir;
  71         }
  72
  73         printk(KERN_ALERT "pgd = %p\n", pgd);
  74         pgd += pgd_index(addr);
  75         printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
  76                (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
  77
  78         do {
  79                 pud_t *pud;
  80                 pmd_t *pmd;
  81                 pte_t *pte;
  82
  83                 if (pgd_none(*pgd))
  84                         break;
  85
  86                 if (pgd_bad(*pgd)) {
  87                         printk("(bad)");
  88                         break;
  89                 }
  90
  91                 pud = pud_offset(pgd, addr);
  92                 if (PTRS_PER_PUD != 1)
  93                         printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
  94                                (u64)pud_val(*pud));
  95
  96                 if (pud_none(*pud))
  97                         break;
  98
  99                 if (pud_bad(*pud)) {
 100                         printk("(bad)");
 101                         break;
 102                 }
 103
 104                 pmd = pmd_offset(pud, addr);
 105                 if (PTRS_PER_PMD != 1)
 106                         printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
 107                                (u64)pmd_val(*pmd));
 108
 109                 if (pmd_none(*pmd))
 110                         break;
 111
 112                 if (pmd_bad(*pmd)) {
 113                         printk("(bad)");
 114                         break;
 115                 }
 116
 117                 /* We must not map this if we have highmem enabled */
 118                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
 119                         break;
 120
 121                 pte = pte_offset_kernel(pmd, addr);
 122                 printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
 123                        (u64)pte_val(*pte));
 124         } while (0);
 125
 126         printk("\n");
 127 }
 128
 129 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 130 {
 131         unsigned index = pgd_index(address);
 132         pgd_t *pgd_k;
 133         pud_t *pud, *pud_k;
 134         pmd_t *pmd, *pmd_k;
 135
 136         pgd += index;
 137         pgd_k = init_mm.pgd + index;
 138
 139         if (!pgd_present(*pgd_k))
 140                 return NULL;
 141
 142         pud = pud_offset(pgd, address);
 143         pud_k = pud_offset(pgd_k, address);
 144         if (!pud_present(*pud_k))
 145                 return NULL;
 146
 147         if (!pud_present(*pud))
 148             set_pud(pud, *pud_k);
 149
 150         pmd = pmd_offset(pud, address);
 151         pmd_k = pmd_offset(pud_k, address);
 152         if (!pmd_present(*pmd_k))
 153                 return NULL;
 154
 155         if (!pmd_present(*pmd))
 156                 set_pmd(pmd, *pmd_k);
 157         else {
 158                 /*
 159                  * The page tables are fully synchronised so there must
 160                  * be another reason for the fault. Return NULL here to
 161                  * signal that we have not taken care of the fault.
 162                  */
 163                 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 164                 return NULL;
 165         }
 166
 167         return pmd_k;
 168 }
 169
 170 #ifdef CONFIG_SH_STORE_QUEUES
 171 #define __FAULT_ADDR_LIMIT      P3_ADDR_MAX
 172 #else
 173 #define __FAULT_ADDR_LIMIT      VMALLOC_END
 174 #endif
 175
 176 /*
 177  * Handle a fault on the vmalloc or module mapping area
 178  */
 179 static noinline int vmalloc_fault(unsigned long address)
 180 {
 181         pgd_t *pgd_k;
 182         pmd_t *pmd_k;
 183         pte_t *pte_k;
 184
 185         /* Make sure we are in vmalloc/module/P3 area: */
 186         if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
 187                 return -1;
 188
 189         /*
 190          * Synchronize this task's top level page-table
 191          * with the 'reference' page table.
 192          *
 193          * Do _not_ use "current" here. We might be inside
 194          * an interrupt in the middle of a task switch..
 195          */
 196         pgd_k = get_TTB();
 197         pmd_k = vmalloc_sync_one(pgd_k, address);
 198         if (!pmd_k)
 199                 return -1;
 200
 201         pte_k = pte_offset_kernel(pmd_k, address);
 202         if (!pte_present(*pte_k))
 203                 return -1;
 204
 205         return 0;
 206 }
 207
 208 static void
 209 show_fault_oops(struct pt_regs *regs, unsigned long address)
 210 {
 211         if (!oops_may_print())
 212                 return;
 213
 214         printk(KERN_ALERT "BUG: unable to handle kernel ");
 215         if (address < PAGE_SIZE)
 216                 printk(KERN_CONT "NULL pointer dereference");
 217         else
 218                 printk(KERN_CONT "paging request");
 219
 220         printk(KERN_CONT " at %08lx\n", address);
 221         printk(KERN_ALERT "PC:");
 222         printk_address(regs->pc, 1);
 223
 224         show_pte(NULL, address);
 225 }
 226
 227 static noinline void
 228 no_context(struct pt_regs *regs, unsigned long error_code,
 229            unsigned long address)
 230 {
 231         /* Are we prepared to handle this kernel fault?  */
 232         if (fixup_exception(regs))
 233                 return;
 234
 235         if (handle_trapped_io(regs, address))
 236                 return;
 237
 238         /*
 239          * Oops. The kernel tried to access some bad page. We'll have to
 240          * terminate things with extreme prejudice.
 241          */
 242         bust_spinlocks(1);
 243
 244         show_fault_oops(regs, address);
 245
 246         die("Oops", regs, error_code);
 247         bust_spinlocks(0);
 248         do_exit(SIGKILL);
 249 }
 250
 251 static void
 252 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 253                        unsigned long address, int si_code)
 254 {
 255         struct task_struct *tsk = current;
 256
 257         /* User mode accesses just cause a SIGSEGV */
 258         if (user_mode(regs)) {
 259                 /*
 260                  * It's possible to have interrupts off here:
 261                  */
 262                 local_irq_enable();
 263
 264                 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 265
 266                 return;
 267         }
 268
 269         no_context(regs, error_code, address);
 270 }
 271
 272 static noinline void
 273 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 274                      unsigned long address)
 275 {
 276         __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
 277 }
 278
 279 static void
 280 __bad_area(struct pt_regs *regs, unsigned long error_code,
 281            unsigned long address, int si_code)
 282 {
 283         struct mm_struct *mm = current->mm;
 284
 285         /*
 286          * Something tried to access memory that isn't in our memory map..
 287          * Fix it, but check if it's kernel or user first..
 288          */
 289         up_read(&mm->mmap_sem);
 290
 291         __bad_area_nosemaphore(regs, error_code, address, si_code);
 292 }
 293
 294 static noinline void
 295 bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 296 {
 297         __bad_area(regs, error_code, address, SEGV_MAPERR);
 298 }
 299
 300 static noinline void
 301 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 302                       unsigned long address)
 303 {
 304         __bad_area(regs, error_code, address, SEGV_ACCERR);
 305 }
 306
 307 static void
 308 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 309 {
 310         struct task_struct *tsk = current;
 311         struct mm_struct *mm = tsk->mm;
 312
 313         up_read(&mm->mmap_sem);
 314
 315         /* Kernel mode? Handle exceptions or die: */
 316         if (!user_mode(regs))
 317                 no_context(regs, error_code, address);
 318
 319         force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 320 }
 321
 322 static noinline int
 323 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 324                unsigned long address, unsigned int fault)
 325 {
 326         /*
 327          * Pagefault was interrupted by SIGKILL. We have no reason to
 328          * continue pagefault.
 329          */
 330         if (fatal_signal_pending(current)) {
 331                 if (!(fault & VM_FAULT_RETRY))
 332                         up_read(&current->mm->mmap_sem);
 333                 if (!user_mode(regs))
 334                         no_context(regs, error_code, address);
 335                 return 1;
 336         }
 337
 338         if (!(fault & VM_FAULT_ERROR))
 339                 return 0;
 340
 341         if (fault & VM_FAULT_OOM) {
 342                 /* Kernel mode? Handle exceptions or die: */
 343                 if (!user_mode(regs)) {
 344                         up_read(&current->mm->mmap_sem);
 345                         no_context(regs, error_code, address);
 346                         return 1;
 347                 }
 348                 up_read(&current->mm->mmap_sem);
 349
 350                 /*
 351                  * We ran out of memory, call the OOM killer, and return the
 352                  * userspace (which will retry the fault, or kill us if we got
 353                  * oom-killed):
 354                  */
 355                 pagefault_out_of_memory();
 356         } else {
 357                 if (fault & VM_FAULT_SIGBUS)
 358                         do_sigbus(regs, error_code, address);
 359                 else if (fault & VM_FAULT_SIGSEGV)
 360                         bad_area(regs, error_code, address);
 361                 else
 362                         BUG();
 363         }
 364
 365         return 1;
 366 }
 367
 368 static inline int access_error(int error_code, struct vm_area_struct *vma)
 369 {
 370         if (error_code & FAULT_CODE_WRITE) {
 371                 /* write, present and write, not present: */
 372                 if (unlikely(!(vma->vm_flags & VM_WRITE)))
 373                         return 1;
 374                 return 0;
 375         }
 376
 377         /* ITLB miss on NX page */
 378         if (unlikely((error_code & FAULT_CODE_ITLB) &&
 379                      !(vma->vm_flags & VM_EXEC)))
 380                 return 1;
 381
 382         /* read, not present: */
 383         if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
 384                 return 1;
 385
 386         return 0;
 387 }
 388
 389 static int fault_in_kernel_space(unsigned long address)
 390 {
 391         return address >= TASK_SIZE;
 392 }
 393
 394 /*
 395  * This routine handles page faults.  It determines the address,
 396  * and the problem, and then passes it off to one of the appropriate
 397  * routines.
 398  */
 399 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 400                                         unsigned long error_code,
 401                                         unsigned long address)
 402 {
 403         unsigned long vec;
 404         struct task_struct *tsk;
 405         struct mm_struct *mm;
 406         struct vm_area_struct * vma;
 407         int fault;
 408         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 409
 410         tsk = current;
 411         mm = tsk->mm;
 412         vec = lookup_exception_vector();
 413
 414         /*
 415          * We fault-in kernel-space virtual memory on-demand. The
 416          * 'reference' page table is init_mm.pgd.
 417          *
 418          * NOTE! We MUST NOT take any locks for this case. We may
 419          * be in an interrupt or a critical region, and should
 420          * only copy the information from the master page table,
 421          * nothing more.
 422          */
 423         if (unlikely(fault_in_kernel_space(address))) {
 424                 if (vmalloc_fault(address) >= 0)
 425                         return;
 426                 if (notify_page_fault(regs, vec))
 427                         return;
 428
 429                 bad_area_nosemaphore(regs, error_code, address);
 430                 return;
 431         }
 432
 433         if (unlikely(notify_page_fault(regs, vec)))
 434                 return;
 435
 436         /* Only enable interrupts if they were on before the fault */
 437         if ((regs->sr & SR_IMASK) != SR_IMASK)
 438                 local_irq_enable();
 439
 440         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 441
 442         /*
 443          * If we're in an interrupt, have no user context or are running
 444          * with pagefaults disabled then we must not take the fault:
 445          */
 446         if (unlikely(faulthandler_disabled() || !mm)) {
 447                 bad_area_nosemaphore(regs, error_code, address);
 448                 return;
 449         }
 450
 451 retry:
 452         down_read(&mm->mmap_sem);
 453
 454         vma = find_vma(mm, address);
 455         if (unlikely(!vma)) {
 456                 bad_area(regs, error_code, address);
 457                 return;
 458         }
 459         if (likely(vma->vm_start <= address))
 460                 goto good_area;
 461         if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
 462                 bad_area(regs, error_code, address);
 463                 return;
 464         }
 465         if (unlikely(expand_stack(vma, address))) {
 466                 bad_area(regs, error_code, address);
 467                 return;
 468         }
 469
 470         /*
 471          * Ok, we have a good vm_area for this memory access, so
 472          * we can handle it..
 473          */
 474 good_area:
 475         if (unlikely(access_error(error_code, vma))) {
 476                 bad_area_access_error(regs, error_code, address);
 477                 return;
 478         }
 479
 480         set_thread_fault_code(error_code);
 481
 482         if (user_mode(regs))
 483                 flags |= FAULT_FLAG_USER;
 484         if (error_code & FAULT_CODE_WRITE)
 485                 flags |= FAULT_FLAG_WRITE;
 486
 487         /*
 488          * If for any reason at all we couldn't handle the fault,
 489          * make sure we exit gracefully rather than endlessly redo
 490          * the fault.
 491          */
 492         fault = handle_mm_fault(vma, address, flags);
 493
 494         if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
 495                 if (mm_fault_error(regs, error_code, address, fault))
 496                         return;
 497
 498         if (flags & FAULT_FLAG_ALLOW_RETRY) {
 499                 if (fault & VM_FAULT_MAJOR) {
 500                         tsk->maj_flt++;
 501                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 502                                       regs, address);
 503                 } else {
 504                         tsk->min_flt++;
 505                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 506                                       regs, address);
 507                 }
 508                 if (fault & VM_FAULT_RETRY) {
 509                         flags &= ~FAULT_FLAG_ALLOW_RETRY;
 510                         flags |= FAULT_FLAG_TRIED;
 511
 512                         /*
 513                          * No need to up_read(&mm->mmap_sem) as we would
 514                          * have already released it in __lock_page_or_retry
 515                          * in mm/filemap.c.
 516                          */
 517                         goto retry;
 518                 }
 519         }
 520
 521         up_read(&mm->mmap_sem);
 522 }