]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/mm/fault_64.c
x86: print which shared library/executable faulted in segfault etc. messages v3
[mirror_ubuntu-artful-kernel.git] / arch / x86 / mm / fault_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
4 */
5
1da177e4
LT
6#include <linux/signal.h>
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/types.h>
12#include <linux/ptrace.h>
13#include <linux/mman.h>
14#include <linux/mm.h>
15#include <linux/smp.h>
1da177e4
LT
16#include <linux/interrupt.h>
17#include <linux/init.h>
18#include <linux/tty.h>
19#include <linux/vt_kern.h> /* For unblank_screen() */
20#include <linux/compiler.h>
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
0f2fbdcb 23#include <linux/kprobes.h>
ab2bf0c1 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4
LT
28#include <asm/pgalloc.h>
29#include <asm/smp.h>
30#include <asm/tlbflush.h>
31#include <asm/proto.h>
1da177e4 32#include <asm-generic/sections.h>
1da177e4 33
33cb5243
HH
34/*
35 * Page fault error code bits
36 * bit 0 == 0 means no page found, 1 means protection fault
37 * bit 1 == 0 means read, 1 means write
38 * bit 2 == 0 means kernel, 1 means user-mode
39 * bit 3 == 1 means use of reserved bit detected
40 * bit 4 == 1 means fault was an instruction fetch
41 */
8a19da7b 42#define PF_PROT (1<<0)
66c58156 43#define PF_WRITE (1<<1)
8a19da7b
IM
44#define PF_USER (1<<2)
45#define PF_RSVD (1<<3)
66c58156
AK
46#define PF_INSTR (1<<4)
47
74a0b576 48static inline int notify_page_fault(struct pt_regs *regs)
1bd858a5 49{
33cb5243 50#ifdef CONFIG_KPROBES
74a0b576
CH
51 int ret = 0;
52
53 /* kprobe_running() needs smp_processor_id() */
54 if (!user_mode(regs)) {
55 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 14))
57 ret = 1;
58 preempt_enable();
59 }
1bd858a5 60
74a0b576 61 return ret;
74a0b576 62#else
74a0b576 63 return 0;
74a0b576 64#endif
33cb5243 65}
1bd858a5 66
1dc85be0
HH
67/*
68 * X86_32
69 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
70 * Check that here and ignore it.
71 *
72 * X86_64
73 * Sometimes the CPU reports invalid exceptions on prefetch.
74 * Check that here and ignore it.
75 *
76 * Opcode checker based on code by Richard Brunner
77 */
78static int is_prefetch(struct pt_regs *regs, unsigned long addr,
79 unsigned long error_code)
33cb5243 80{
ab2bf0c1 81 unsigned char *instr;
1da177e4 82 int scan_more = 1;
33cb5243 83 int prefetch = 0;
f1290ec9 84 unsigned char *max_instr;
1da177e4 85
1dc85be0 86#ifdef CONFIG_X86_32
1dc85be0
HH
87 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
88 boot_cpu_data.x86 >= 6)) {
89 /* Catch an obscure case of prefetch inside an NX page. */
90 if (nx_enabled && (error_code & PF_INSTR))
91 return 0;
92 } else {
93 return 0;
94 }
1dc85be0 95#else
1da177e4 96 /* If it was a exec fault ignore */
66c58156 97 if (error_code & PF_INSTR)
1da177e4 98 return 0;
1dc85be0
HH
99#endif
100
f2857ce9 101 instr = (unsigned char *)convert_ip_to_linear(current, regs);
f1290ec9 102 max_instr = instr + 15;
1da177e4 103
76381fee 104 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
1da177e4
LT
105 return 0;
106
33cb5243 107 while (scan_more && instr < max_instr) {
1da177e4
LT
108 unsigned char opcode;
109 unsigned char instr_hi;
110 unsigned char instr_lo;
111
ab2bf0c1 112 if (probe_kernel_address(instr, opcode))
33cb5243 113 break;
1da177e4 114
33cb5243
HH
115 instr_hi = opcode & 0xf0;
116 instr_lo = opcode & 0x0f;
1da177e4
LT
117 instr++;
118
33cb5243 119 switch (instr_hi) {
1da177e4
LT
120 case 0x20:
121 case 0x30:
33cb5243
HH
122 /*
123 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
124 * In X86_64 long mode, the CPU will signal invalid
125 * opcode if some of these prefixes are present so
126 * X86_64 will never get here anyway
127 */
1da177e4
LT
128 scan_more = ((instr_lo & 7) == 0x6);
129 break;
33cb5243 130#ifdef CONFIG_X86_64
1da177e4 131 case 0x40:
33cb5243
HH
132 /*
133 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
134 * Need to figure out under what instruction mode the
135 * instruction was issued. Could check the LDT for lm,
136 * but for now it's good enough to assume that long
137 * mode only uses well known segments or kernel.
138 */
76381fee 139 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
1da177e4 140 break;
33cb5243 141#endif
1da177e4
LT
142 case 0x60:
143 /* 0x64 thru 0x67 are valid prefixes in all modes. */
144 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 145 break;
1da177e4 146 case 0xF0:
1dc85be0 147 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 148 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 149 break;
1da177e4
LT
150 case 0x00:
151 /* Prefetch instruction is 0x0F0D or 0x0F18 */
152 scan_more = 0;
f2857ce9 153
ab2bf0c1 154 if (probe_kernel_address(instr, opcode))
1da177e4
LT
155 break;
156 prefetch = (instr_lo == 0xF) &&
157 (opcode == 0x0D || opcode == 0x18);
33cb5243 158 break;
1da177e4
LT
159 default:
160 scan_more = 0;
161 break;
33cb5243 162 }
1da177e4
LT
163 }
164 return prefetch;
165}
166
c4aba4a8
HH
167static void force_sig_info_fault(int si_signo, int si_code,
168 unsigned long address, struct task_struct *tsk)
169{
170 siginfo_t info;
171
172 info.si_signo = si_signo;
173 info.si_errno = 0;
174 info.si_code = si_code;
175 info.si_addr = (void __user *)address;
176 force_sig_info(si_signo, &info, tsk);
177}
178
33cb5243
HH
179static int bad_address(void *p)
180{
1da177e4 181 unsigned long dummy;
ab2bf0c1 182 return probe_kernel_address((unsigned long *)p, dummy);
33cb5243 183}
1da177e4
LT
184
185void dump_pagetable(unsigned long address)
186{
187 pgd_t *pgd;
188 pud_t *pud;
189 pmd_t *pmd;
190 pte_t *pte;
191
f51c9452 192 pgd = (pgd_t *)read_cr3();
1da177e4 193
33cb5243 194 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
1da177e4 195 pgd += pgd_index(address);
1da177e4 196 if (bad_address(pgd)) goto bad;
d646bce4 197 printk("PGD %lx ", pgd_val(*pgd));
33cb5243 198 if (!pgd_present(*pgd)) goto ret;
1da177e4 199
d2ae5b5f 200 pud = pud_offset(pgd, address);
1da177e4
LT
201 if (bad_address(pud)) goto bad;
202 printk("PUD %lx ", pud_val(*pud));
203 if (!pud_present(*pud)) goto ret;
204
205 pmd = pmd_offset(pud, address);
206 if (bad_address(pmd)) goto bad;
207 printk("PMD %lx ", pmd_val(*pmd));
b1992df3 208 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
1da177e4
LT
209
210 pte = pte_offset_kernel(pmd, address);
211 if (bad_address(pte)) goto bad;
33cb5243 212 printk("PTE %lx", pte_val(*pte));
1da177e4
LT
213ret:
214 printk("\n");
215 return;
216bad:
217 printk("BAD\n");
218}
219
1dc85be0 220#ifdef CONFIG_X86_64
33cb5243 221static const char errata93_warning[] =
1da177e4
LT
222KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
223KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
224KERN_ERR "******* Please consider a BIOS update.\n"
225KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 226#endif
1da177e4
LT
227
228/* Workaround for K8 erratum #93 & buggy BIOS.
229 BIOS SMM functions are required to use a specific workaround
33cb5243
HH
230 to avoid corruption of the 64bit RIP register on C stepping K8.
231 A lot of BIOS that didn't get tested properly miss this.
1da177e4
LT
232 The OS sees this as a page fault with the upper 32bits of RIP cleared.
233 Try to work around it here.
fdfe8aa8
HH
234 Note we only handle faults in kernel here.
235 Does nothing for X86_32
236 */
33cb5243 237static int is_errata93(struct pt_regs *regs, unsigned long address)
1da177e4 238{
fdfe8aa8 239#ifdef CONFIG_X86_64
1da177e4 240 static int warned;
65ea5b03 241 if (address != regs->ip)
1da177e4 242 return 0;
33cb5243 243 if ((address >> 32) != 0)
1da177e4
LT
244 return 0;
245 address |= 0xffffffffUL << 32;
33cb5243
HH
246 if ((address >= (u64)_stext && address <= (u64)_etext) ||
247 (address >= MODULES_VADDR && address <= MODULES_END)) {
1da177e4 248 if (!warned) {
33cb5243 249 printk(errata93_warning);
1da177e4
LT
250 warned = 1;
251 }
65ea5b03 252 regs->ip = address;
1da177e4
LT
253 return 1;
254 }
fdfe8aa8 255#endif
1da177e4 256 return 0;
33cb5243 257}
1da177e4 258
1da177e4
LT
259static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
260 unsigned long error_code)
261{
1209140c 262 unsigned long flags = oops_begin();
6e3f3617 263 struct task_struct *tsk;
1209140c 264
1da177e4
LT
265 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
266 current->comm, address);
267 dump_pagetable(address);
6e3f3617
JB
268 tsk = current;
269 tsk->thread.cr2 = address;
270 tsk->thread.trap_no = 14;
271 tsk->thread.error_code = error_code;
22f5991c
JB
272 if (__die("Bad pagetable", regs, error_code))
273 regs = NULL;
274 oops_end(flags, regs, SIGKILL);
1da177e4
LT
275}
276
277/*
f95190b2 278 * Handle a fault on the vmalloc area
3b9ba4d5
AK
279 *
280 * This assumes no large pages in there.
1da177e4
LT
281 */
282static int vmalloc_fault(unsigned long address)
283{
fdfe8aa8
HH
284#ifdef CONFIG_X86_32
285 unsigned long pgd_paddr;
286 pmd_t *pmd_k;
287 pte_t *pte_k;
288 /*
289 * Synchronize this task's top level page-table
290 * with the 'reference' page table.
291 *
292 * Do _not_ use "current" here. We might be inside
293 * an interrupt in the middle of a task switch..
294 */
295 pgd_paddr = read_cr3();
296 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
297 if (!pmd_k)
298 return -1;
299 pte_k = pte_offset_kernel(pmd_k, address);
300 if (!pte_present(*pte_k))
301 return -1;
302 return 0;
303#else
1da177e4
LT
304 pgd_t *pgd, *pgd_ref;
305 pud_t *pud, *pud_ref;
306 pmd_t *pmd, *pmd_ref;
307 pte_t *pte, *pte_ref;
308
309 /* Copy kernel mappings over when needed. This can also
310 happen within a race in page table update. In the later
311 case just flush. */
312
313 pgd = pgd_offset(current->mm ?: &init_mm, address);
314 pgd_ref = pgd_offset_k(address);
315 if (pgd_none(*pgd_ref))
316 return -1;
317 if (pgd_none(*pgd))
318 set_pgd(pgd, *pgd_ref);
8c914cb7 319 else
46a82b2d 320 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
1da177e4
LT
321
322 /* Below here mismatches are bugs because these lower tables
323 are shared */
324
325 pud = pud_offset(pgd, address);
326 pud_ref = pud_offset(pgd_ref, address);
327 if (pud_none(*pud_ref))
328 return -1;
46a82b2d 329 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
1da177e4
LT
330 BUG();
331 pmd = pmd_offset(pud, address);
332 pmd_ref = pmd_offset(pud_ref, address);
333 if (pmd_none(*pmd_ref))
334 return -1;
335 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
336 BUG();
337 pte_ref = pte_offset_kernel(pmd_ref, address);
338 if (!pte_present(*pte_ref))
339 return -1;
340 pte = pte_offset_kernel(pmd, address);
3b9ba4d5
AK
341 /* Don't use pte_page here, because the mappings can point
342 outside mem_map, and the NUMA hash lookup cannot handle
343 that. */
344 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
1da177e4 345 BUG();
1da177e4 346 return 0;
fdfe8aa8 347#endif
1da177e4
LT
348}
349
abd4f750 350int show_unhandled_signals = 1;
1da177e4
LT
351
352/*
353 * This routine handles page faults. It determines the address,
354 * and the problem, and then passes it off to one of the appropriate
355 * routines.
1da177e4 356 */
0f2fbdcb
PP
357asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
358 unsigned long error_code)
1da177e4
LT
359{
360 struct task_struct *tsk;
361 struct mm_struct *mm;
33cb5243 362 struct vm_area_struct *vma;
1da177e4 363 unsigned long address;
83c54070 364 int write, fault;
1209140c 365 unsigned long flags;
c4aba4a8 366 int si_code;
1da177e4 367
143a5d32
PZ
368 /*
369 * We can fault from pretty much anywhere, with unknown IRQ state.
370 */
371 trace_hardirqs_fixup();
372
a9ba9a3b
AV
373 tsk = current;
374 mm = tsk->mm;
375 prefetchw(&mm->mmap_sem);
376
1da177e4 377 /* get the address */
f51c9452 378 address = read_cr2();
1da177e4 379
c4aba4a8 380 si_code = SEGV_MAPERR;
1da177e4 381
608566b4
HH
382 if (notify_page_fault(regs))
383 return;
1da177e4
LT
384
385 /*
386 * We fault-in kernel-space virtual memory on-demand. The
387 * 'reference' page table is init_mm.pgd.
388 *
389 * NOTE! We MUST NOT take any locks for this case. We may
390 * be in an interrupt or a critical region, and should
391 * only copy the information from the master page table,
392 * nothing more.
393 *
394 * This verifies that the fault happens in kernel space
395 * (error_code & 4) == 0, and that the fault was not a
8b1bde93 396 * protection error (error_code & 9) == 0.
1da177e4 397 */
84929801 398 if (unlikely(address >= TASK_SIZE64)) {
f95190b2
AK
399 /*
400 * Don't check for the module range here: its PML4
401 * is always initialized because it's shared with the main
402 * kernel text. Only vmalloc may need PML4 syncups.
403 */
66c58156 404 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
f95190b2 405 ((address >= VMALLOC_START && address < VMALLOC_END))) {
8c914cb7
JB
406 if (vmalloc_fault(address) >= 0)
407 return;
1da177e4
LT
408 }
409 /*
410 * Don't take the mm semaphore here. If we fixup a prefetch
411 * fault we could otherwise deadlock.
412 */
413 goto bad_area_nosemaphore;
414 }
415
65ea5b03 416 if (likely(regs->flags & X86_EFLAGS_IF))
8c914cb7
JB
417 local_irq_enable();
418
66c58156 419 if (unlikely(error_code & PF_RSVD))
1da177e4
LT
420 pgtable_bad(address, regs, error_code);
421
422 /*
33cb5243
HH
423 * If we're in an interrupt, have no user context or are running in an
424 * atomic region then we must not take the fault.
1da177e4
LT
425 */
426 if (unlikely(in_atomic() || !mm))
427 goto bad_area_nosemaphore;
428
dbe3ed1c
LT
429 /*
430 * User-mode registers count as a user access even for any
431 * potential system fault or CPU buglet.
432 */
433 if (user_mode_vm(regs))
434 error_code |= PF_USER;
435
1da177e4
LT
436 again:
437 /* When running in the kernel we expect faults to occur only to
438 * addresses in user space. All other faults represent errors in the
676b1855 439 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 440 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
441 * we will deadlock attempting to validate the fault against the
442 * address space. Luckily the kernel only validly references user
443 * space from well defined areas of code, which are listed in the
444 * exceptions table.
445 *
446 * As the vast majority of faults will be valid we will only perform
676b1855 447 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
448 * Attempt to lock the address space, if we cannot we then validate the
449 * source. If this is invalid we can skip the address space check,
450 * thus avoiding the deadlock.
451 */
452 if (!down_read_trylock(&mm->mmap_sem)) {
66c58156 453 if ((error_code & PF_USER) == 0 &&
65ea5b03 454 !search_exception_tables(regs->ip))
1da177e4
LT
455 goto bad_area_nosemaphore;
456 down_read(&mm->mmap_sem);
457 }
458
459 vma = find_vma(mm, address);
460 if (!vma)
461 goto bad_area;
462 if (likely(vma->vm_start <= address))
463 goto good_area;
464 if (!(vma->vm_flags & VM_GROWSDOWN))
465 goto bad_area;
33cb5243 466 if (error_code & PF_USER) {
6f4d368e
HH
467 /*
468 * Accessing the stack below %sp is always a bug.
469 * The large cushion allows instructions like enter
470 * and pusha to work. ("enter $65535,$31" pushes
471 * 32 pointers and then decrements %sp by 65535.)
03fdc2c2 472 */
65ea5b03 473 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
474 goto bad_area;
475 }
476 if (expand_stack(vma, address))
477 goto bad_area;
478/*
479 * Ok, we have a good vm_area for this memory access, so
480 * we can handle it..
481 */
482good_area:
c4aba4a8 483 si_code = SEGV_ACCERR;
1da177e4 484 write = 0;
66c58156 485 switch (error_code & (PF_PROT|PF_WRITE)) {
33cb5243
HH
486 default: /* 3: write, present */
487 /* fall through */
488 case PF_WRITE: /* write, not present */
489 if (!(vma->vm_flags & VM_WRITE))
490 goto bad_area;
491 write++;
492 break;
493 case PF_PROT: /* read, present */
494 goto bad_area;
495 case 0: /* read, not present */
496 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 497 goto bad_area;
1da177e4
LT
498 }
499
500 /*
501 * If for any reason at all we couldn't handle the fault,
502 * make sure we exit gracefully rather than endlessly redo
503 * the fault.
504 */
83c54070
NP
505 fault = handle_mm_fault(mm, vma, address, write);
506 if (unlikely(fault & VM_FAULT_ERROR)) {
507 if (fault & VM_FAULT_OOM)
508 goto out_of_memory;
509 else if (fault & VM_FAULT_SIGBUS)
510 goto do_sigbus;
511 BUG();
1da177e4 512 }
83c54070
NP
513 if (fault & VM_FAULT_MAJOR)
514 tsk->maj_flt++;
515 else
516 tsk->min_flt++;
1da177e4
LT
517 up_read(&mm->mmap_sem);
518 return;
519
520/*
521 * Something tried to access memory that isn't in our memory map..
522 * Fix it, but check if it's kernel or user first..
523 */
524bad_area:
525 up_read(&mm->mmap_sem);
526
527bad_area_nosemaphore:
1da177e4 528 /* User mode accesses just cause a SIGSEGV */
66c58156 529 if (error_code & PF_USER) {
e5e3c84b
SR
530
531 /*
532 * It's possible to have interrupts off here.
533 */
534 local_irq_enable();
535
1da177e4
LT
536 if (is_prefetch(regs, address, error_code))
537 return;
538
539 /* Work around K8 erratum #100 K8 in compat mode
540 occasionally jumps to illegal addresses >4GB. We
541 catch this here in the page fault handler because
542 these addresses are not reachable. Just detect this
543 case and return. Any code segment in LDT is
544 compatibility mode. */
545 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
546 (address >> 32))
547 return;
548
abd4f750
MAS
549 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
550 printk_ratelimit()) {
1da177e4 551 printk(
6f4d368e 552#ifdef CONFIG_X86_32
edcd8119 553 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e 554#else
03252919 555 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
6f4d368e
HH
556#endif
557 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
558 tsk->comm, task_pid_nr(tsk), address, regs->ip,
559 regs->sp, error_code);
03252919
AK
560 print_vma_addr(" in ", regs->ip);
561 printk("\n");
1da177e4 562 }
33cb5243 563
1da177e4
LT
564 tsk->thread.cr2 = address;
565 /* Kernel addresses are always protection faults */
566 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
567 tsk->thread.trap_no = 14;
c4aba4a8
HH
568
569 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
570 return;
571 }
572
573no_context:
1da177e4 574 /* Are we prepared to handle this kernel fault? */
33cb5243 575 if (fixup_exception(regs))
1da177e4 576 return;
1da177e4 577
33cb5243 578 /*
1da177e4
LT
579 * Hall of shame of CPU/BIOS bugs.
580 */
581
33cb5243
HH
582 if (is_prefetch(regs, address, error_code))
583 return;
1da177e4
LT
584
585 if (is_errata93(regs, address))
33cb5243 586 return;
1da177e4
LT
587
588/*
589 * Oops. The kernel tried to access some bad page. We'll have to
590 * terminate things with extreme prejudice.
591 */
592
1209140c 593 flags = oops_begin();
1da177e4
LT
594
595 if (address < PAGE_SIZE)
596 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
597 else
598 printk(KERN_ALERT "Unable to handle kernel paging request");
33cb5243 599 printk(" at %016lx RIP: \n" KERN_ALERT, address);
bc850d6b 600 printk_address(regs->ip, regs->bp);
1da177e4 601 dump_pagetable(address);
6e3f3617
JB
602 tsk->thread.cr2 = address;
603 tsk->thread.trap_no = 14;
604 tsk->thread.error_code = error_code;
22f5991c
JB
605 if (__die("Oops", regs, error_code))
606 regs = NULL;
1da177e4
LT
607 /* Executive summary in case the body of the oops scrolled away */
608 printk(KERN_EMERG "CR2: %016lx\n", address);
22f5991c 609 oops_end(flags, regs, SIGKILL);
1da177e4
LT
610
611/*
612 * We ran out of memory, or some other thing happened to us that made
613 * us unable to handle the page fault gracefully.
614 */
615out_of_memory:
616 up_read(&mm->mmap_sem);
b460cbc5 617 if (is_global_init(current)) {
1da177e4
LT
618 yield();
619 goto again;
620 }
621 printk("VM: killing process %s\n", tsk->comm);
318aa296 622 if (error_code & PF_USER)
021daae2 623 do_group_exit(SIGKILL);
1da177e4
LT
624 goto no_context;
625
626do_sigbus:
627 up_read(&mm->mmap_sem);
628
629 /* Kernel mode? Handle exceptions or die */
66c58156 630 if (!(error_code & PF_USER))
1da177e4
LT
631 goto no_context;
632
633 tsk->thread.cr2 = address;
634 tsk->thread.error_code = error_code;
635 tsk->thread.trap_no = 14;
c4aba4a8 636 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
1da177e4
LT
637 return;
638}
9e43e1b7 639
8c914cb7 640DEFINE_SPINLOCK(pgd_lock);
2bff7383 641LIST_HEAD(pgd_list);
8c914cb7
JB
642
643void vmalloc_sync_all(void)
644{
6f4d368e
HH
645 /*
646 * Note that races in the updates of insync and start aren't
647 * problematic: insync can only get set bits added, and updates to
648 * start are only improving performance (without affecting correctness
649 * if undone).
650 */
8c914cb7
JB
651 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
652 static unsigned long start = VMALLOC_START & PGDIR_MASK;
653 unsigned long address;
654
655 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
656 if (!test_bit(pgd_index(address), insync)) {
657 const pgd_t *pgd_ref = pgd_offset_k(address);
658 struct page *page;
659
660 if (pgd_none(*pgd_ref))
661 continue;
662 spin_lock(&pgd_lock);
2bff7383 663 list_for_each_entry(page, &pgd_list, lru) {
8c914cb7
JB
664 pgd_t *pgd;
665 pgd = (pgd_t *)page_address(page) + pgd_index(address);
666 if (pgd_none(*pgd))
667 set_pgd(pgd, *pgd_ref);
668 else
46a82b2d 669 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8c914cb7
JB
670 }
671 spin_unlock(&pgd_lock);
672 set_bit(pgd_index(address), insync);
673 }
674 if (address == start)
675 start = address + PGDIR_SIZE;
676 }
677 /* Check that there is no need to do the same for the modules area. */
678 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
33cb5243 679 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
8c914cb7
JB
680 (__START_KERNEL & PGDIR_MASK)));
681}