]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - fs/binfmt_elf.c
Add __GFP_MOVABLE for callers to flag allocations from high memory that may be migrated
[mirror_ubuntu-zesty-kernel.git] / fs / binfmt_elf.c
1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
49
50 /*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
79 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
81 };
82
83 #define BAD_ADDR(x) IS_ERR_VALUE(x)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
104 be in memory
105 */
106 static int padzero(unsigned long elf_bss)
107 {
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138 {
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
151
152 /*
153 * If this architecture has a platform capability string, copy it
154 * to userspace. In some cases (Sparc), this info is impossible
155 * for userspace to get any other way, in others (i386) it is
156 * merely difficult.
157 */
158 u_platform = NULL;
159 if (k_platform) {
160 size_t len = strlen(k_platform) + 1;
161
162 /*
163 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164 * evictions by the processes running on the same package. One
165 * thing we can do is to shuffle the initial stack for them.
166 */
167
168 p = arch_align_stack(p);
169
170 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 if (__copy_to_user(u_platform, k_platform, len))
172 return -EFAULT;
173 }
174
175 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 #define NEW_AUX_ENT(id, val) \
178 do { \
179 elf_info[ei_index++] = id; \
180 elf_info[ei_index++] = val; \
181 } while (0)
182
183 #ifdef ARCH_DLINFO
184 /*
185 * ARCH_DLINFO must come first so PPC can do its special alignment of
186 * AUXV.
187 */
188 ARCH_DLINFO;
189 #endif
190 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
194 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
195 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196 NEW_AUX_ENT(AT_BASE, interp_load_addr);
197 NEW_AUX_ENT(AT_FLAGS, 0);
198 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
199 NEW_AUX_ENT(AT_UID, tsk->uid);
200 NEW_AUX_ENT(AT_EUID, tsk->euid);
201 NEW_AUX_ENT(AT_GID, tsk->gid);
202 NEW_AUX_ENT(AT_EGID, tsk->egid);
203 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
204 if (k_platform) {
205 NEW_AUX_ENT(AT_PLATFORM,
206 (elf_addr_t)(unsigned long)u_platform);
207 }
208 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
209 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
210 }
211 #undef NEW_AUX_ENT
212 /* AT_NULL is zero; clear the rest too */
213 memset(&elf_info[ei_index], 0,
214 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
215
216 /* And advance past the AT_NULL entry. */
217 ei_index += 2;
218
219 sp = STACK_ADD(p, ei_index);
220
221 items = (argc + 1) + (envc + 1);
222 if (interp_aout) {
223 items += 3; /* a.out interpreters require argv & envp too */
224 } else {
225 items += 1; /* ELF interpreters only put argc on the stack */
226 }
227 bprm->p = STACK_ROUND(sp, items);
228
229 /* Point sp at the lowest address on the stack */
230 #ifdef CONFIG_STACK_GROWSUP
231 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
232 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
233 #else
234 sp = (elf_addr_t __user *)bprm->p;
235 #endif
236
237 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 if (__put_user(argc, sp++))
239 return -EFAULT;
240 if (interp_aout) {
241 argv = sp + 2;
242 envp = argv + argc + 1;
243 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244 __put_user((elf_addr_t)(unsigned long)envp, sp++))
245 return -EFAULT;
246 } else {
247 argv = sp;
248 envp = argv + argc + 1;
249 }
250
251 /* Populate argv and envp */
252 p = current->mm->arg_end = current->mm->arg_start;
253 while (argc-- > 0) {
254 size_t len;
255 if (__put_user((elf_addr_t)p, argv++))
256 return -EFAULT;
257 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259 return 0;
260 p += len;
261 }
262 if (__put_user(0, argv))
263 return -EFAULT;
264 current->mm->arg_end = current->mm->env_start = p;
265 while (envc-- > 0) {
266 size_t len;
267 if (__put_user((elf_addr_t)p, envp++))
268 return -EFAULT;
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
273 }
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
277
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
283 }
284
285 #ifndef elf_map
286
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type,
289 unsigned long total_size)
290 {
291 unsigned long map_addr;
292 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 addr = ELF_PAGESTART(addr);
295 size = ELF_PAGEALIGN(size);
296
297 /* mmap() will return -EINVAL if given a zero size, but a
298 * segment with zero filesize is perfectly valid */
299 if (!size)
300 return addr;
301
302 down_write(&current->mm->mmap_sem);
303 /*
304 * total_size is the size of the ELF (interpreter) image.
305 * The _first_ mmap needs to know the full size, otherwise
306 * randomization might put this image into an overlapping
307 * position with the ELF binary image. (since size < total_size)
308 * So we first map the 'big' image - and unmap the remainder at
309 * the end. (which unmap is needed for ELF images with holes.)
310 */
311 if (total_size) {
312 total_size = ELF_PAGEALIGN(total_size);
313 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 if (!BAD_ADDR(map_addr))
315 do_munmap(current->mm, map_addr+size, total_size-size);
316 } else
317 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
319 up_write(&current->mm->mmap_sem);
320 return(map_addr);
321 }
322
323 #endif /* !elf_map */
324
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327 int i, first_idx = -1, last_idx = -1;
328
329 for (i = 0; i < nr; i++) {
330 if (cmds[i].p_type == PT_LOAD) {
331 last_idx = i;
332 if (first_idx == -1)
333 first_idx = i;
334 }
335 }
336 if (first_idx == -1)
337 return 0;
338
339 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342
343
344 /* This is much more generalized than the library routine read function,
345 so we keep this separate. Technically the library read function
346 is only provided so that we can read a.out libraries that have
347 an ELF header */
348
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350 struct file *interpreter, unsigned long *interp_map_addr,
351 unsigned long no_base)
352 {
353 struct elf_phdr *elf_phdata;
354 struct elf_phdr *eppnt;
355 unsigned long load_addr = 0;
356 int load_addr_set = 0;
357 unsigned long last_bss = 0, elf_bss = 0;
358 unsigned long error = ~0UL;
359 unsigned long total_size;
360 int retval, i, size;
361
362 /* First of all, some simple consistency checks */
363 if (interp_elf_ex->e_type != ET_EXEC &&
364 interp_elf_ex->e_type != ET_DYN)
365 goto out;
366 if (!elf_check_arch(interp_elf_ex))
367 goto out;
368 if (!interpreter->f_op || !interpreter->f_op->mmap)
369 goto out;
370
371 /*
372 * If the size of this structure has changed, then punt, since
373 * we will be doing the wrong thing.
374 */
375 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 goto out;
377 if (interp_elf_ex->e_phnum < 1 ||
378 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 goto out;
380
381 /* Now read in all of the header information */
382 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 if (size > ELF_MIN_ALIGN)
384 goto out;
385 elf_phdata = kmalloc(size, GFP_KERNEL);
386 if (!elf_phdata)
387 goto out;
388
389 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 (char *)elf_phdata,size);
391 error = -EIO;
392 if (retval != size) {
393 if (retval < 0)
394 error = retval;
395 goto out_close;
396 }
397
398 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 if (!total_size) {
400 error = -EINVAL;
401 goto out_close;
402 }
403
404 eppnt = elf_phdata;
405 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 if (eppnt->p_type == PT_LOAD) {
407 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 int elf_prot = 0;
409 unsigned long vaddr = 0;
410 unsigned long k, map_addr;
411
412 if (eppnt->p_flags & PF_R)
413 elf_prot = PROT_READ;
414 if (eppnt->p_flags & PF_W)
415 elf_prot |= PROT_WRITE;
416 if (eppnt->p_flags & PF_X)
417 elf_prot |= PROT_EXEC;
418 vaddr = eppnt->p_vaddr;
419 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 elf_type |= MAP_FIXED;
421 else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 load_addr = -vaddr;
423
424 map_addr = elf_map(interpreter, load_addr + vaddr,
425 eppnt, elf_prot, elf_type, total_size);
426 total_size = 0;
427 if (!*interp_map_addr)
428 *interp_map_addr = map_addr;
429 error = map_addr;
430 if (BAD_ADDR(map_addr))
431 goto out_close;
432
433 if (!load_addr_set &&
434 interp_elf_ex->e_type == ET_DYN) {
435 load_addr = map_addr - ELF_PAGESTART(vaddr);
436 load_addr_set = 1;
437 }
438
439 /*
440 * Check to see if the section's size will overflow the
441 * allowed task size. Note that p_filesz must always be
442 * <= p_memsize so it's only necessary to check p_memsz.
443 */
444 k = load_addr + eppnt->p_vaddr;
445 if (BAD_ADDR(k) ||
446 eppnt->p_filesz > eppnt->p_memsz ||
447 eppnt->p_memsz > TASK_SIZE ||
448 TASK_SIZE - eppnt->p_memsz < k) {
449 error = -ENOMEM;
450 goto out_close;
451 }
452
453 /*
454 * Find the end of the file mapping for this phdr, and
455 * keep track of the largest address we see for this.
456 */
457 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 if (k > elf_bss)
459 elf_bss = k;
460
461 /*
462 * Do the same thing for the memory mapping - between
463 * elf_bss and last_bss is the bss section.
464 */
465 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 if (k > last_bss)
467 last_bss = k;
468 }
469 }
470
471 /*
472 * Now fill out the bss section. First pad the last page up
473 * to the page boundary, and then perform a mmap to make sure
474 * that there are zero-mapped pages up to and including the
475 * last bss page.
476 */
477 if (padzero(elf_bss)) {
478 error = -EFAULT;
479 goto out_close;
480 }
481
482 /* What we have mapped so far */
483 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484
485 /* Map the last of the bss segment */
486 if (last_bss > elf_bss) {
487 down_write(&current->mm->mmap_sem);
488 error = do_brk(elf_bss, last_bss - elf_bss);
489 up_write(&current->mm->mmap_sem);
490 if (BAD_ADDR(error))
491 goto out_close;
492 }
493
494 error = load_addr;
495
496 out_close:
497 kfree(elf_phdata);
498 out:
499 return error;
500 }
501
502 static unsigned long load_aout_interp(struct exec *interp_ex,
503 struct file *interpreter)
504 {
505 unsigned long text_data, elf_entry = ~0UL;
506 char __user * addr;
507 loff_t offset;
508
509 current->mm->end_code = interp_ex->a_text;
510 text_data = interp_ex->a_text + interp_ex->a_data;
511 current->mm->end_data = text_data;
512 current->mm->brk = interp_ex->a_bss + text_data;
513
514 switch (N_MAGIC(*interp_ex)) {
515 case OMAGIC:
516 offset = 32;
517 addr = (char __user *)0;
518 break;
519 case ZMAGIC:
520 case QMAGIC:
521 offset = N_TXTOFF(*interp_ex);
522 addr = (char __user *)N_TXTADDR(*interp_ex);
523 break;
524 default:
525 goto out;
526 }
527
528 down_write(&current->mm->mmap_sem);
529 do_brk(0, text_data);
530 up_write(&current->mm->mmap_sem);
531 if (!interpreter->f_op || !interpreter->f_op->read)
532 goto out;
533 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
534 goto out;
535 flush_icache_range((unsigned long)addr,
536 (unsigned long)addr + text_data);
537
538 down_write(&current->mm->mmap_sem);
539 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
540 interp_ex->a_bss);
541 up_write(&current->mm->mmap_sem);
542 elf_entry = interp_ex->a_entry;
543
544 out:
545 return elf_entry;
546 }
547
548 /*
549 * These are the functions used to load ELF style executables and shared
550 * libraries. There is no binary dependent code anywhere else.
551 */
552
553 #define INTERPRETER_NONE 0
554 #define INTERPRETER_AOUT 1
555 #define INTERPRETER_ELF 2
556
557 #ifndef STACK_RND_MASK
558 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
559 #endif
560
561 static unsigned long randomize_stack_top(unsigned long stack_top)
562 {
563 unsigned int random_variable = 0;
564
565 if ((current->flags & PF_RANDOMIZE) &&
566 !(current->personality & ADDR_NO_RANDOMIZE)) {
567 random_variable = get_random_int() & STACK_RND_MASK;
568 random_variable <<= PAGE_SHIFT;
569 }
570 #ifdef CONFIG_STACK_GROWSUP
571 return PAGE_ALIGN(stack_top) + random_variable;
572 #else
573 return PAGE_ALIGN(stack_top) - random_variable;
574 #endif
575 }
576
577 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
578 {
579 struct file *interpreter = NULL; /* to shut gcc up */
580 unsigned long load_addr = 0, load_bias = 0;
581 int load_addr_set = 0;
582 char * elf_interpreter = NULL;
583 unsigned int interpreter_type = INTERPRETER_NONE;
584 unsigned char ibcs2_interpreter = 0;
585 unsigned long error;
586 struct elf_phdr *elf_ppnt, *elf_phdata;
587 unsigned long elf_bss, elf_brk;
588 int elf_exec_fileno;
589 int retval, i;
590 unsigned int size;
591 unsigned long elf_entry;
592 unsigned long interp_load_addr = 0;
593 unsigned long start_code, end_code, start_data, end_data;
594 unsigned long reloc_func_desc = 0;
595 char passed_fileno[6];
596 struct files_struct *files;
597 int executable_stack = EXSTACK_DEFAULT;
598 unsigned long def_flags = 0;
599 struct {
600 struct elfhdr elf_ex;
601 struct elfhdr interp_elf_ex;
602 struct exec interp_ex;
603 } *loc;
604
605 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
606 if (!loc) {
607 retval = -ENOMEM;
608 goto out_ret;
609 }
610
611 /* Get the exec-header */
612 loc->elf_ex = *((struct elfhdr *)bprm->buf);
613
614 retval = -ENOEXEC;
615 /* First of all, some simple consistency checks */
616 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
617 goto out;
618
619 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
620 goto out;
621 if (!elf_check_arch(&loc->elf_ex))
622 goto out;
623 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
624 goto out;
625
626 /* Now read in all of the header information */
627 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
628 goto out;
629 if (loc->elf_ex.e_phnum < 1 ||
630 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
631 goto out;
632 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
633 retval = -ENOMEM;
634 elf_phdata = kmalloc(size, GFP_KERNEL);
635 if (!elf_phdata)
636 goto out;
637
638 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
639 (char *)elf_phdata, size);
640 if (retval != size) {
641 if (retval >= 0)
642 retval = -EIO;
643 goto out_free_ph;
644 }
645
646 files = current->files; /* Refcounted so ok */
647 retval = unshare_files();
648 if (retval < 0)
649 goto out_free_ph;
650 if (files == current->files) {
651 put_files_struct(files);
652 files = NULL;
653 }
654
655 /* exec will make our files private anyway, but for the a.out
656 loader stuff we need to do it earlier */
657 retval = get_unused_fd();
658 if (retval < 0)
659 goto out_free_fh;
660 get_file(bprm->file);
661 fd_install(elf_exec_fileno = retval, bprm->file);
662
663 elf_ppnt = elf_phdata;
664 elf_bss = 0;
665 elf_brk = 0;
666
667 start_code = ~0UL;
668 end_code = 0;
669 start_data = 0;
670 end_data = 0;
671
672 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
673 if (elf_ppnt->p_type == PT_INTERP) {
674 /* This is the program interpreter used for
675 * shared libraries - for now assume that this
676 * is an a.out format binary
677 */
678 retval = -ENOEXEC;
679 if (elf_ppnt->p_filesz > PATH_MAX ||
680 elf_ppnt->p_filesz < 2)
681 goto out_free_file;
682
683 retval = -ENOMEM;
684 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
685 GFP_KERNEL);
686 if (!elf_interpreter)
687 goto out_free_file;
688
689 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
690 elf_interpreter,
691 elf_ppnt->p_filesz);
692 if (retval != elf_ppnt->p_filesz) {
693 if (retval >= 0)
694 retval = -EIO;
695 goto out_free_interp;
696 }
697 /* make sure path is NULL terminated */
698 retval = -ENOEXEC;
699 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
700 goto out_free_interp;
701
702 /* If the program interpreter is one of these two,
703 * then assume an iBCS2 image. Otherwise assume
704 * a native linux image.
705 */
706 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
707 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
708 ibcs2_interpreter = 1;
709
710 /*
711 * The early SET_PERSONALITY here is so that the lookup
712 * for the interpreter happens in the namespace of the
713 * to-be-execed image. SET_PERSONALITY can select an
714 * alternate root.
715 *
716 * However, SET_PERSONALITY is NOT allowed to switch
717 * this task into the new images's memory mapping
718 * policy - that is, TASK_SIZE must still evaluate to
719 * that which is appropriate to the execing application.
720 * This is because exit_mmap() needs to have TASK_SIZE
721 * evaluate to the size of the old image.
722 *
723 * So if (say) a 64-bit application is execing a 32-bit
724 * application it is the architecture's responsibility
725 * to defer changing the value of TASK_SIZE until the
726 * switch really is going to happen - do this in
727 * flush_thread(). - akpm
728 */
729 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
730
731 interpreter = open_exec(elf_interpreter);
732 retval = PTR_ERR(interpreter);
733 if (IS_ERR(interpreter))
734 goto out_free_interp;
735
736 /*
737 * If the binary is not readable then enforce
738 * mm->dumpable = 0 regardless of the interpreter's
739 * permissions.
740 */
741 if (file_permission(interpreter, MAY_READ) < 0)
742 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
743
744 retval = kernel_read(interpreter, 0, bprm->buf,
745 BINPRM_BUF_SIZE);
746 if (retval != BINPRM_BUF_SIZE) {
747 if (retval >= 0)
748 retval = -EIO;
749 goto out_free_dentry;
750 }
751
752 /* Get the exec headers */
753 loc->interp_ex = *((struct exec *)bprm->buf);
754 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
755 break;
756 }
757 elf_ppnt++;
758 }
759
760 elf_ppnt = elf_phdata;
761 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
762 if (elf_ppnt->p_type == PT_GNU_STACK) {
763 if (elf_ppnt->p_flags & PF_X)
764 executable_stack = EXSTACK_ENABLE_X;
765 else
766 executable_stack = EXSTACK_DISABLE_X;
767 break;
768 }
769
770 /* Some simple consistency checks for the interpreter */
771 if (elf_interpreter) {
772 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
773
774 /* Now figure out which format our binary is */
775 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
776 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
777 (N_MAGIC(loc->interp_ex) != QMAGIC))
778 interpreter_type = INTERPRETER_ELF;
779
780 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
781 interpreter_type &= ~INTERPRETER_ELF;
782
783 retval = -ELIBBAD;
784 if (!interpreter_type)
785 goto out_free_dentry;
786
787 /* Make sure only one type was selected */
788 if ((interpreter_type & INTERPRETER_ELF) &&
789 interpreter_type != INTERPRETER_ELF) {
790 // FIXME - ratelimit this before re-enabling
791 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
792 interpreter_type = INTERPRETER_ELF;
793 }
794 /* Verify the interpreter has a valid arch */
795 if ((interpreter_type == INTERPRETER_ELF) &&
796 !elf_check_arch(&loc->interp_elf_ex))
797 goto out_free_dentry;
798 } else {
799 /* Executables without an interpreter also need a personality */
800 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
801 }
802
803 /* OK, we are done with that, now set up the arg stuff,
804 and then start this sucker up */
805 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
806 char *passed_p = passed_fileno;
807 sprintf(passed_fileno, "%d", elf_exec_fileno);
808
809 if (elf_interpreter) {
810 retval = copy_strings_kernel(1, &passed_p, bprm);
811 if (retval)
812 goto out_free_dentry;
813 bprm->argc++;
814 }
815 }
816
817 /* Flush all traces of the currently running executable */
818 retval = flush_old_exec(bprm);
819 if (retval)
820 goto out_free_dentry;
821
822 /* Discard our unneeded old files struct */
823 if (files) {
824 put_files_struct(files);
825 files = NULL;
826 }
827
828 /* OK, This is the point of no return */
829 current->mm->start_data = 0;
830 current->mm->end_data = 0;
831 current->mm->end_code = 0;
832 current->mm->mmap = NULL;
833 current->flags &= ~PF_FORKNOEXEC;
834 current->mm->def_flags = def_flags;
835
836 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
837 may depend on the personality. */
838 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
839 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
840 current->personality |= READ_IMPLIES_EXEC;
841
842 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
843 current->flags |= PF_RANDOMIZE;
844 arch_pick_mmap_layout(current->mm);
845
846 /* Do this so that we can load the interpreter, if need be. We will
847 change some of these later */
848 current->mm->free_area_cache = current->mm->mmap_base;
849 current->mm->cached_hole_size = 0;
850 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
851 executable_stack);
852 if (retval < 0) {
853 send_sig(SIGKILL, current, 0);
854 goto out_free_dentry;
855 }
856
857 current->mm->start_stack = bprm->p;
858
859 /* Now we do a little grungy work by mmaping the ELF image into
860 the correct location in memory. */
861 for(i = 0, elf_ppnt = elf_phdata;
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr;
865
866 if (elf_ppnt->p_type != PT_LOAD)
867 continue;
868
869 if (unlikely (elf_brk > elf_bss)) {
870 unsigned long nbyte;
871
872 /* There was a PT_LOAD segment with p_memsz > p_filesz
873 before this one. Map anonymous pages, if needed,
874 and clear the area. */
875 retval = set_brk (elf_bss + load_bias,
876 elf_brk + load_bias);
877 if (retval) {
878 send_sig(SIGKILL, current, 0);
879 goto out_free_dentry;
880 }
881 nbyte = ELF_PAGEOFFSET(elf_bss);
882 if (nbyte) {
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
887 load_bias, nbyte)) {
888 /*
889 * This bss-zeroing can fail if the ELF
890 * file specifies odd protections. So
891 * we don't check the return value
892 */
893 }
894 }
895 }
896
897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
903
904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
905
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
914 #ifdef CONFIG_X86
915 load_bias = 0;
916 #else
917 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
918 #endif
919 }
920
921 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
922 elf_prot, elf_flags,0);
923 if (BAD_ADDR(error)) {
924 send_sig(SIGKILL, current, 0);
925 retval = IS_ERR((void *)error) ?
926 PTR_ERR((void*)error) : -EINVAL;
927 goto out_free_dentry;
928 }
929
930 if (!load_addr_set) {
931 load_addr_set = 1;
932 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
933 if (loc->elf_ex.e_type == ET_DYN) {
934 load_bias += error -
935 ELF_PAGESTART(load_bias + vaddr);
936 load_addr += load_bias;
937 reloc_func_desc = load_bias;
938 }
939 }
940 k = elf_ppnt->p_vaddr;
941 if (k < start_code)
942 start_code = k;
943 if (start_data < k)
944 start_data = k;
945
946 /*
947 * Check to see if the section's size will overflow the
948 * allowed task size. Note that p_filesz must always be
949 * <= p_memsz so it is only necessary to check p_memsz.
950 */
951 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
952 elf_ppnt->p_memsz > TASK_SIZE ||
953 TASK_SIZE - elf_ppnt->p_memsz < k) {
954 /* set_brk can never work. Avoid overflows. */
955 send_sig(SIGKILL, current, 0);
956 retval = -EINVAL;
957 goto out_free_dentry;
958 }
959
960 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
961
962 if (k > elf_bss)
963 elf_bss = k;
964 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
965 end_code = k;
966 if (end_data < k)
967 end_data = k;
968 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
969 if (k > elf_brk)
970 elf_brk = k;
971 }
972
973 loc->elf_ex.e_entry += load_bias;
974 elf_bss += load_bias;
975 elf_brk += load_bias;
976 start_code += load_bias;
977 end_code += load_bias;
978 start_data += load_bias;
979 end_data += load_bias;
980
981 /* Calling set_brk effectively mmaps the pages that we need
982 * for the bss and break sections. We must do this before
983 * mapping in the interpreter, to make sure it doesn't wind
984 * up getting placed where the bss needs to go.
985 */
986 retval = set_brk(elf_bss, elf_brk);
987 if (retval) {
988 send_sig(SIGKILL, current, 0);
989 goto out_free_dentry;
990 }
991 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
992 send_sig(SIGSEGV, current, 0);
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
995 }
996
997 if (elf_interpreter) {
998 if (interpreter_type == INTERPRETER_AOUT) {
999 elf_entry = load_aout_interp(&loc->interp_ex,
1000 interpreter);
1001 } else {
1002 unsigned long uninitialized_var(interp_map_addr);
1003
1004 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1005 interpreter,
1006 &interp_map_addr,
1007 load_bias);
1008 if (!BAD_ADDR(elf_entry)) {
1009 /*
1010 * load_elf_interp() returns relocation
1011 * adjustment
1012 */
1013 interp_load_addr = elf_entry;
1014 elf_entry += loc->interp_elf_ex.e_entry;
1015 }
1016 }
1017 if (BAD_ADDR(elf_entry)) {
1018 force_sig(SIGSEGV, current);
1019 retval = IS_ERR((void *)elf_entry) ?
1020 (int)elf_entry : -EINVAL;
1021 goto out_free_dentry;
1022 }
1023 reloc_func_desc = interp_load_addr;
1024
1025 allow_write_access(interpreter);
1026 fput(interpreter);
1027 kfree(elf_interpreter);
1028 } else {
1029 elf_entry = loc->elf_ex.e_entry;
1030 if (BAD_ADDR(elf_entry)) {
1031 force_sig(SIGSEGV, current);
1032 retval = -EINVAL;
1033 goto out_free_dentry;
1034 }
1035 }
1036
1037 kfree(elf_phdata);
1038
1039 if (interpreter_type != INTERPRETER_AOUT)
1040 sys_close(elf_exec_fileno);
1041
1042 set_binfmt(&elf_format);
1043
1044 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1045 retval = arch_setup_additional_pages(bprm, executable_stack);
1046 if (retval < 0) {
1047 send_sig(SIGKILL, current, 0);
1048 goto out;
1049 }
1050 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1051
1052 compute_creds(bprm);
1053 current->flags &= ~PF_FORKNOEXEC;
1054 create_elf_tables(bprm, &loc->elf_ex,
1055 (interpreter_type == INTERPRETER_AOUT),
1056 load_addr, interp_load_addr);
1057 /* N.B. passed_fileno might not be initialized? */
1058 if (interpreter_type == INTERPRETER_AOUT)
1059 current->mm->arg_start += strlen(passed_fileno) + 1;
1060 current->mm->end_code = end_code;
1061 current->mm->start_code = start_code;
1062 current->mm->start_data = start_data;
1063 current->mm->end_data = end_data;
1064 current->mm->start_stack = bprm->p;
1065
1066 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1068 and some applications "depend" upon this behavior.
1069 Since we do not have the power to recompile these, we
1070 emulate the SVr4 behavior. Sigh. */
1071 down_write(&current->mm->mmap_sem);
1072 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1073 MAP_FIXED | MAP_PRIVATE, 0);
1074 up_write(&current->mm->mmap_sem);
1075 }
1076
1077 #ifdef ELF_PLAT_INIT
1078 /*
1079 * The ABI may specify that certain registers be set up in special
1080 * ways (on i386 %edx is the address of a DT_FINI function, for
1081 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1082 * that the e_entry field is the address of the function descriptor
1083 * for the startup routine, rather than the address of the startup
1084 * routine itself. This macro performs whatever initialization to
1085 * the regs structure is required as well as any relocations to the
1086 * function descriptor entries when executing dynamically links apps.
1087 */
1088 ELF_PLAT_INIT(regs, reloc_func_desc);
1089 #endif
1090
1091 start_thread(regs, elf_entry, bprm->p);
1092 if (unlikely(current->ptrace & PT_PTRACED)) {
1093 if (current->ptrace & PT_TRACE_EXEC)
1094 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1095 else
1096 send_sig(SIGTRAP, current, 0);
1097 }
1098 retval = 0;
1099 out:
1100 kfree(loc);
1101 out_ret:
1102 return retval;
1103
1104 /* error cleanup */
1105 out_free_dentry:
1106 allow_write_access(interpreter);
1107 if (interpreter)
1108 fput(interpreter);
1109 out_free_interp:
1110 kfree(elf_interpreter);
1111 out_free_file:
1112 sys_close(elf_exec_fileno);
1113 out_free_fh:
1114 if (files)
1115 reset_files_struct(current, files);
1116 out_free_ph:
1117 kfree(elf_phdata);
1118 goto out;
1119 }
1120
1121 /* This is really simpleminded and specialized - we are loading an
1122 a.out library that is given an ELF header. */
1123 static int load_elf_library(struct file *file)
1124 {
1125 struct elf_phdr *elf_phdata;
1126 struct elf_phdr *eppnt;
1127 unsigned long elf_bss, bss, len;
1128 int retval, error, i, j;
1129 struct elfhdr elf_ex;
1130
1131 error = -ENOEXEC;
1132 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1133 if (retval != sizeof(elf_ex))
1134 goto out;
1135
1136 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1137 goto out;
1138
1139 /* First of all, some simple consistency checks */
1140 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1141 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1142 goto out;
1143
1144 /* Now read in all of the header information */
1145
1146 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1147 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1148
1149 error = -ENOMEM;
1150 elf_phdata = kmalloc(j, GFP_KERNEL);
1151 if (!elf_phdata)
1152 goto out;
1153
1154 eppnt = elf_phdata;
1155 error = -ENOEXEC;
1156 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1157 if (retval != j)
1158 goto out_free_ph;
1159
1160 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1161 if ((eppnt + i)->p_type == PT_LOAD)
1162 j++;
1163 if (j != 1)
1164 goto out_free_ph;
1165
1166 while (eppnt->p_type != PT_LOAD)
1167 eppnt++;
1168
1169 /* Now use mmap to map the library into memory. */
1170 down_write(&current->mm->mmap_sem);
1171 error = do_mmap(file,
1172 ELF_PAGESTART(eppnt->p_vaddr),
1173 (eppnt->p_filesz +
1174 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1175 PROT_READ | PROT_WRITE | PROT_EXEC,
1176 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1177 (eppnt->p_offset -
1178 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1179 up_write(&current->mm->mmap_sem);
1180 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1181 goto out_free_ph;
1182
1183 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1184 if (padzero(elf_bss)) {
1185 error = -EFAULT;
1186 goto out_free_ph;
1187 }
1188
1189 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1190 ELF_MIN_ALIGN - 1);
1191 bss = eppnt->p_memsz + eppnt->p_vaddr;
1192 if (bss > len) {
1193 down_write(&current->mm->mmap_sem);
1194 do_brk(len, bss - len);
1195 up_write(&current->mm->mmap_sem);
1196 }
1197 error = 0;
1198
1199 out_free_ph:
1200 kfree(elf_phdata);
1201 out:
1202 return error;
1203 }
1204
1205 /*
1206 * Note that some platforms still use traditional core dumps and not
1207 * the ELF core dump. Each platform can select it as appropriate.
1208 */
1209 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1210
1211 /*
1212 * ELF core dumper
1213 *
1214 * Modelled on fs/exec.c:aout_core_dump()
1215 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1216 */
1217 /*
1218 * These are the only things you should do on a core-file: use only these
1219 * functions to write out all the necessary info.
1220 */
1221 static int dump_write(struct file *file, const void *addr, int nr)
1222 {
1223 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1224 }
1225
1226 static int dump_seek(struct file *file, loff_t off)
1227 {
1228 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1229 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1230 return 0;
1231 } else {
1232 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1233 if (!buf)
1234 return 0;
1235 while (off > 0) {
1236 unsigned long n = off;
1237 if (n > PAGE_SIZE)
1238 n = PAGE_SIZE;
1239 if (!dump_write(file, buf, n))
1240 return 0;
1241 off -= n;
1242 }
1243 free_page((unsigned long)buf);
1244 }
1245 return 1;
1246 }
1247
1248 /*
1249 * Decide whether a segment is worth dumping; default is yes to be
1250 * sure (missing info is worse than too much; etc).
1251 * Personally I'd include everything, and use the coredump limit...
1252 *
1253 * I think we should skip something. But I am not sure how. H.J.
1254 */
1255 static int maydump(struct vm_area_struct *vma)
1256 {
1257 /* The vma can be set up to tell us the answer directly. */
1258 if (vma->vm_flags & VM_ALWAYSDUMP)
1259 return 1;
1260
1261 /* Do not dump I/O mapped devices or special mappings */
1262 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1263 return 0;
1264
1265 /* Dump shared memory only if mapped from an anonymous file. */
1266 if (vma->vm_flags & VM_SHARED)
1267 return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1268
1269 /* If it hasn't been written to, don't write it out */
1270 if (!vma->anon_vma)
1271 return 0;
1272
1273 return 1;
1274 }
1275
1276 /* An ELF note in memory */
1277 struct memelfnote
1278 {
1279 const char *name;
1280 int type;
1281 unsigned int datasz;
1282 void *data;
1283 };
1284
1285 static int notesize(struct memelfnote *en)
1286 {
1287 int sz;
1288
1289 sz = sizeof(struct elf_note);
1290 sz += roundup(strlen(en->name) + 1, 4);
1291 sz += roundup(en->datasz, 4);
1292
1293 return sz;
1294 }
1295
1296 #define DUMP_WRITE(addr, nr, foffset) \
1297 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1298
1299 static int alignfile(struct file *file, loff_t *foffset)
1300 {
1301 static const char buf[4] = { 0, };
1302 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1303 return 1;
1304 }
1305
1306 static int writenote(struct memelfnote *men, struct file *file,
1307 loff_t *foffset)
1308 {
1309 struct elf_note en;
1310 en.n_namesz = strlen(men->name) + 1;
1311 en.n_descsz = men->datasz;
1312 en.n_type = men->type;
1313
1314 DUMP_WRITE(&en, sizeof(en), foffset);
1315 DUMP_WRITE(men->name, en.n_namesz, foffset);
1316 if (!alignfile(file, foffset))
1317 return 0;
1318 DUMP_WRITE(men->data, men->datasz, foffset);
1319 if (!alignfile(file, foffset))
1320 return 0;
1321
1322 return 1;
1323 }
1324 #undef DUMP_WRITE
1325
1326 #define DUMP_WRITE(addr, nr) \
1327 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1328 goto end_coredump;
1329 #define DUMP_SEEK(off) \
1330 if (!dump_seek(file, (off))) \
1331 goto end_coredump;
1332
1333 static void fill_elf_header(struct elfhdr *elf, int segs)
1334 {
1335 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1336 elf->e_ident[EI_CLASS] = ELF_CLASS;
1337 elf->e_ident[EI_DATA] = ELF_DATA;
1338 elf->e_ident[EI_VERSION] = EV_CURRENT;
1339 elf->e_ident[EI_OSABI] = ELF_OSABI;
1340 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1341
1342 elf->e_type = ET_CORE;
1343 elf->e_machine = ELF_ARCH;
1344 elf->e_version = EV_CURRENT;
1345 elf->e_entry = 0;
1346 elf->e_phoff = sizeof(struct elfhdr);
1347 elf->e_shoff = 0;
1348 elf->e_flags = ELF_CORE_EFLAGS;
1349 elf->e_ehsize = sizeof(struct elfhdr);
1350 elf->e_phentsize = sizeof(struct elf_phdr);
1351 elf->e_phnum = segs;
1352 elf->e_shentsize = 0;
1353 elf->e_shnum = 0;
1354 elf->e_shstrndx = 0;
1355 return;
1356 }
1357
1358 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1359 {
1360 phdr->p_type = PT_NOTE;
1361 phdr->p_offset = offset;
1362 phdr->p_vaddr = 0;
1363 phdr->p_paddr = 0;
1364 phdr->p_filesz = sz;
1365 phdr->p_memsz = 0;
1366 phdr->p_flags = 0;
1367 phdr->p_align = 0;
1368 return;
1369 }
1370
1371 static void fill_note(struct memelfnote *note, const char *name, int type,
1372 unsigned int sz, void *data)
1373 {
1374 note->name = name;
1375 note->type = type;
1376 note->datasz = sz;
1377 note->data = data;
1378 return;
1379 }
1380
1381 /*
1382 * fill up all the fields in prstatus from the given task struct, except
1383 * registers which need to be filled up separately.
1384 */
1385 static void fill_prstatus(struct elf_prstatus *prstatus,
1386 struct task_struct *p, long signr)
1387 {
1388 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1389 prstatus->pr_sigpend = p->pending.signal.sig[0];
1390 prstatus->pr_sighold = p->blocked.sig[0];
1391 prstatus->pr_pid = p->pid;
1392 prstatus->pr_ppid = p->parent->pid;
1393 prstatus->pr_pgrp = process_group(p);
1394 prstatus->pr_sid = process_session(p);
1395 if (thread_group_leader(p)) {
1396 /*
1397 * This is the record for the group leader. Add in the
1398 * cumulative times of previous dead threads. This total
1399 * won't include the time of each live thread whose state
1400 * is included in the core dump. The final total reported
1401 * to our parent process when it calls wait4 will include
1402 * those sums as well as the little bit more time it takes
1403 * this and each other thread to finish dying after the
1404 * core dump synchronization phase.
1405 */
1406 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1407 &prstatus->pr_utime);
1408 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1409 &prstatus->pr_stime);
1410 } else {
1411 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1412 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1413 }
1414 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1415 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1416 }
1417
1418 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1419 struct mm_struct *mm)
1420 {
1421 unsigned int i, len;
1422
1423 /* first copy the parameters from user space */
1424 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1425
1426 len = mm->arg_end - mm->arg_start;
1427 if (len >= ELF_PRARGSZ)
1428 len = ELF_PRARGSZ-1;
1429 if (copy_from_user(&psinfo->pr_psargs,
1430 (const char __user *)mm->arg_start, len))
1431 return -EFAULT;
1432 for(i = 0; i < len; i++)
1433 if (psinfo->pr_psargs[i] == 0)
1434 psinfo->pr_psargs[i] = ' ';
1435 psinfo->pr_psargs[len] = 0;
1436
1437 psinfo->pr_pid = p->pid;
1438 psinfo->pr_ppid = p->parent->pid;
1439 psinfo->pr_pgrp = process_group(p);
1440 psinfo->pr_sid = process_session(p);
1441
1442 i = p->state ? ffz(~p->state) + 1 : 0;
1443 psinfo->pr_state = i;
1444 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1445 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1446 psinfo->pr_nice = task_nice(p);
1447 psinfo->pr_flag = p->flags;
1448 SET_UID(psinfo->pr_uid, p->uid);
1449 SET_GID(psinfo->pr_gid, p->gid);
1450 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1451
1452 return 0;
1453 }
1454
1455 /* Here is the structure in which status of each thread is captured. */
1456 struct elf_thread_status
1457 {
1458 struct list_head list;
1459 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1460 elf_fpregset_t fpu; /* NT_PRFPREG */
1461 struct task_struct *thread;
1462 #ifdef ELF_CORE_COPY_XFPREGS
1463 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1464 #endif
1465 struct memelfnote notes[3];
1466 int num_notes;
1467 };
1468
1469 /*
1470 * In order to add the specific thread information for the elf file format,
1471 * we need to keep a linked list of every threads pr_status and then create
1472 * a single section for them in the final core file.
1473 */
1474 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1475 {
1476 int sz = 0;
1477 struct task_struct *p = t->thread;
1478 t->num_notes = 0;
1479
1480 fill_prstatus(&t->prstatus, p, signr);
1481 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1482
1483 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1484 &(t->prstatus));
1485 t->num_notes++;
1486 sz += notesize(&t->notes[0]);
1487
1488 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1489 &t->fpu))) {
1490 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1491 &(t->fpu));
1492 t->num_notes++;
1493 sz += notesize(&t->notes[1]);
1494 }
1495
1496 #ifdef ELF_CORE_COPY_XFPREGS
1497 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1498 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1499 &t->xfpu);
1500 t->num_notes++;
1501 sz += notesize(&t->notes[2]);
1502 }
1503 #endif
1504 return sz;
1505 }
1506
1507 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1508 struct vm_area_struct *gate_vma)
1509 {
1510 struct vm_area_struct *ret = tsk->mm->mmap;
1511
1512 if (ret)
1513 return ret;
1514 return gate_vma;
1515 }
1516 /*
1517 * Helper function for iterating across a vma list. It ensures that the caller
1518 * will visit `gate_vma' prior to terminating the search.
1519 */
1520 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1521 struct vm_area_struct *gate_vma)
1522 {
1523 struct vm_area_struct *ret;
1524
1525 ret = this_vma->vm_next;
1526 if (ret)
1527 return ret;
1528 if (this_vma == gate_vma)
1529 return NULL;
1530 return gate_vma;
1531 }
1532
1533 /*
1534 * Actual dumper
1535 *
1536 * This is a two-pass process; first we find the offsets of the bits,
1537 * and then they are actually written out. If we run out of core limit
1538 * we just truncate.
1539 */
1540 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1541 {
1542 #define NUM_NOTES 6
1543 int has_dumped = 0;
1544 mm_segment_t fs;
1545 int segs;
1546 size_t size = 0;
1547 int i;
1548 struct vm_area_struct *vma, *gate_vma;
1549 struct elfhdr *elf = NULL;
1550 loff_t offset = 0, dataoff, foffset;
1551 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1552 int numnote;
1553 struct memelfnote *notes = NULL;
1554 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1555 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1556 struct task_struct *g, *p;
1557 LIST_HEAD(thread_list);
1558 struct list_head *t;
1559 elf_fpregset_t *fpu = NULL;
1560 #ifdef ELF_CORE_COPY_XFPREGS
1561 elf_fpxregset_t *xfpu = NULL;
1562 #endif
1563 int thread_status_size = 0;
1564 elf_addr_t *auxv;
1565 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1566 int extra_notes_size;
1567 #endif
1568
1569 /*
1570 * We no longer stop all VM operations.
1571 *
1572 * This is because those proceses that could possibly change map_count
1573 * or the mmap / vma pages are now blocked in do_exit on current
1574 * finishing this core dump.
1575 *
1576 * Only ptrace can touch these memory addresses, but it doesn't change
1577 * the map_count or the pages allocated. So no possibility of crashing
1578 * exists while dumping the mm->vm_next areas to the core file.
1579 */
1580
1581 /* alloc memory for large data structures: too large to be on stack */
1582 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1583 if (!elf)
1584 goto cleanup;
1585 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1586 if (!prstatus)
1587 goto cleanup;
1588 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1589 if (!psinfo)
1590 goto cleanup;
1591 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1592 if (!notes)
1593 goto cleanup;
1594 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1595 if (!fpu)
1596 goto cleanup;
1597 #ifdef ELF_CORE_COPY_XFPREGS
1598 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1599 if (!xfpu)
1600 goto cleanup;
1601 #endif
1602
1603 if (signr) {
1604 struct elf_thread_status *tmp;
1605 rcu_read_lock();
1606 do_each_thread(g,p)
1607 if (current->mm == p->mm && current != p) {
1608 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1609 if (!tmp) {
1610 rcu_read_unlock();
1611 goto cleanup;
1612 }
1613 tmp->thread = p;
1614 list_add(&tmp->list, &thread_list);
1615 }
1616 while_each_thread(g,p);
1617 rcu_read_unlock();
1618 list_for_each(t, &thread_list) {
1619 struct elf_thread_status *tmp;
1620 int sz;
1621
1622 tmp = list_entry(t, struct elf_thread_status, list);
1623 sz = elf_dump_thread_status(signr, tmp);
1624 thread_status_size += sz;
1625 }
1626 }
1627 /* now collect the dump for the current */
1628 memset(prstatus, 0, sizeof(*prstatus));
1629 fill_prstatus(prstatus, current, signr);
1630 elf_core_copy_regs(&prstatus->pr_reg, regs);
1631
1632 segs = current->mm->map_count;
1633 #ifdef ELF_CORE_EXTRA_PHDRS
1634 segs += ELF_CORE_EXTRA_PHDRS;
1635 #endif
1636
1637 gate_vma = get_gate_vma(current);
1638 if (gate_vma != NULL)
1639 segs++;
1640
1641 /* Set up header */
1642 fill_elf_header(elf, segs + 1); /* including notes section */
1643
1644 has_dumped = 1;
1645 current->flags |= PF_DUMPCORE;
1646
1647 /*
1648 * Set up the notes in similar form to SVR4 core dumps made
1649 * with info from their /proc.
1650 */
1651
1652 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1653 fill_psinfo(psinfo, current->group_leader, current->mm);
1654 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1655
1656 numnote = 2;
1657
1658 auxv = (elf_addr_t *)current->mm->saved_auxv;
1659
1660 i = 0;
1661 do
1662 i += 2;
1663 while (auxv[i - 2] != AT_NULL);
1664 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1665 i * sizeof(elf_addr_t), auxv);
1666
1667 /* Try to dump the FPU. */
1668 if ((prstatus->pr_fpvalid =
1669 elf_core_copy_task_fpregs(current, regs, fpu)))
1670 fill_note(notes + numnote++,
1671 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1672 #ifdef ELF_CORE_COPY_XFPREGS
1673 if (elf_core_copy_task_xfpregs(current, xfpu))
1674 fill_note(notes + numnote++,
1675 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1676 #endif
1677
1678 fs = get_fs();
1679 set_fs(KERNEL_DS);
1680
1681 DUMP_WRITE(elf, sizeof(*elf));
1682 offset += sizeof(*elf); /* Elf header */
1683 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1684 foffset = offset;
1685
1686 /* Write notes phdr entry */
1687 {
1688 struct elf_phdr phdr;
1689 int sz = 0;
1690
1691 for (i = 0; i < numnote; i++)
1692 sz += notesize(notes + i);
1693
1694 sz += thread_status_size;
1695
1696 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1697 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1698 sz += extra_notes_size;
1699 #endif
1700
1701 fill_elf_note_phdr(&phdr, sz, offset);
1702 offset += sz;
1703 DUMP_WRITE(&phdr, sizeof(phdr));
1704 }
1705
1706 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1707
1708 /* Write program headers for segments dump */
1709 for (vma = first_vma(current, gate_vma); vma != NULL;
1710 vma = next_vma(vma, gate_vma)) {
1711 struct elf_phdr phdr;
1712 size_t sz;
1713
1714 sz = vma->vm_end - vma->vm_start;
1715
1716 phdr.p_type = PT_LOAD;
1717 phdr.p_offset = offset;
1718 phdr.p_vaddr = vma->vm_start;
1719 phdr.p_paddr = 0;
1720 phdr.p_filesz = maydump(vma) ? sz : 0;
1721 phdr.p_memsz = sz;
1722 offset += phdr.p_filesz;
1723 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1724 if (vma->vm_flags & VM_WRITE)
1725 phdr.p_flags |= PF_W;
1726 if (vma->vm_flags & VM_EXEC)
1727 phdr.p_flags |= PF_X;
1728 phdr.p_align = ELF_EXEC_PAGESIZE;
1729
1730 DUMP_WRITE(&phdr, sizeof(phdr));
1731 }
1732
1733 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1734 ELF_CORE_WRITE_EXTRA_PHDRS;
1735 #endif
1736
1737 /* write out the notes section */
1738 for (i = 0; i < numnote; i++)
1739 if (!writenote(notes + i, file, &foffset))
1740 goto end_coredump;
1741
1742 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1743 ELF_CORE_WRITE_EXTRA_NOTES;
1744 foffset += extra_notes_size;
1745 #endif
1746
1747 /* write out the thread status notes section */
1748 list_for_each(t, &thread_list) {
1749 struct elf_thread_status *tmp =
1750 list_entry(t, struct elf_thread_status, list);
1751
1752 for (i = 0; i < tmp->num_notes; i++)
1753 if (!writenote(&tmp->notes[i], file, &foffset))
1754 goto end_coredump;
1755 }
1756
1757 /* Align to page */
1758 DUMP_SEEK(dataoff - foffset);
1759
1760 for (vma = first_vma(current, gate_vma); vma != NULL;
1761 vma = next_vma(vma, gate_vma)) {
1762 unsigned long addr;
1763
1764 if (!maydump(vma))
1765 continue;
1766
1767 for (addr = vma->vm_start;
1768 addr < vma->vm_end;
1769 addr += PAGE_SIZE) {
1770 struct page *page;
1771 struct vm_area_struct *vma;
1772
1773 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1774 &page, &vma) <= 0) {
1775 DUMP_SEEK(PAGE_SIZE);
1776 } else {
1777 if (page == ZERO_PAGE(addr)) {
1778 if (!dump_seek(file, PAGE_SIZE)) {
1779 page_cache_release(page);
1780 goto end_coredump;
1781 }
1782 } else {
1783 void *kaddr;
1784 flush_cache_page(vma, addr,
1785 page_to_pfn(page));
1786 kaddr = kmap(page);
1787 if ((size += PAGE_SIZE) > limit ||
1788 !dump_write(file, kaddr,
1789 PAGE_SIZE)) {
1790 kunmap(page);
1791 page_cache_release(page);
1792 goto end_coredump;
1793 }
1794 kunmap(page);
1795 }
1796 page_cache_release(page);
1797 }
1798 }
1799 }
1800
1801 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1802 ELF_CORE_WRITE_EXTRA_DATA;
1803 #endif
1804
1805 end_coredump:
1806 set_fs(fs);
1807
1808 cleanup:
1809 while (!list_empty(&thread_list)) {
1810 struct list_head *tmp = thread_list.next;
1811 list_del(tmp);
1812 kfree(list_entry(tmp, struct elf_thread_status, list));
1813 }
1814
1815 kfree(elf);
1816 kfree(prstatus);
1817 kfree(psinfo);
1818 kfree(notes);
1819 kfree(fpu);
1820 #ifdef ELF_CORE_COPY_XFPREGS
1821 kfree(xfpu);
1822 #endif
1823 return has_dumped;
1824 #undef NUM_NOTES
1825 }
1826
1827 #endif /* USE_ELF_CORE_DUMP */
1828
1829 static int __init init_elf_binfmt(void)
1830 {
1831 return register_binfmt(&elf_format);
1832 }
1833
1834 static void __exit exit_elf_binfmt(void)
1835 {
1836 /* Remove the COFF and ELF loaders. */
1837 unregister_binfmt(&elf_format);
1838 }
1839
1840 core_initcall(init_elf_binfmt);
1841 module_exit(exit_elf_binfmt);
1842 MODULE_LICENSE("GPL");