]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/binfmt_elf.c
efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y
[mirror_ubuntu-artful-kernel.git] / fs / binfmt_elf.c
1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <linux/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 int, int, unsigned long);
53
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59
60 /*
61 * If we don't support core dumping, then supply a NULL so we
62 * don't even try.
63 */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump NULL
68 #endif
69
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN PAGE_SIZE
74 #endif
75
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS 0
78 #endif
79
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83
84 static struct linux_binfmt elf_format = {
85 .module = THIS_MODULE,
86 .load_binary = load_elf_binary,
87 .load_shlib = load_elf_library,
88 .core_dump = elf_core_dump,
89 .min_coredump = ELF_EXEC_PAGESIZE,
90 };
91
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93
94 static int set_brk(unsigned long start, unsigned long end, int prot)
95 {
96 start = ELF_PAGEALIGN(start);
97 end = ELF_PAGEALIGN(end);
98 if (end > start) {
99 /*
100 * Map the last of the bss segment.
101 * If the header is requesting these pages to be
102 * executable, honour that (ppc32 needs this).
103 */
104 int error = vm_brk_flags(start, end - start,
105 prot & PROT_EXEC ? VM_EXEC : 0);
106 if (error)
107 return error;
108 }
109 current->mm->start_brk = current->mm->brk = end;
110 return 0;
111 }
112
113 /* We need to explicitly zero any fractional pages
114 after the data section (i.e. bss). This would
115 contain the junk from the file that should not
116 be in memory
117 */
118 static int padzero(unsigned long elf_bss)
119 {
120 unsigned long nbyte;
121
122 nbyte = ELF_PAGEOFFSET(elf_bss);
123 if (nbyte) {
124 nbyte = ELF_MIN_ALIGN - nbyte;
125 if (clear_user((void __user *) elf_bss, nbyte))
126 return -EFAULT;
127 }
128 return 0;
129 }
130
131 /* Let's use some macros to make this stack manipulation a little clearer */
132 #ifdef CONFIG_STACK_GROWSUP
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134 #define STACK_ROUND(sp, items) \
135 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ \
137 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
138 old_sp; })
139 #else
140 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141 #define STACK_ROUND(sp, items) \
142 (((unsigned long) (sp - items)) &~ 15UL)
143 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
144 #endif
145
146 #ifndef ELF_BASE_PLATFORM
147 /*
148 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150 * will be copied to the user stack in the same manner as AT_PLATFORM.
151 */
152 #define ELF_BASE_PLATFORM NULL
153 #endif
154
155 static int
156 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
157 unsigned long load_addr, unsigned long interp_load_addr)
158 {
159 unsigned long p = bprm->p;
160 int argc = bprm->argc;
161 int envc = bprm->envc;
162 elf_addr_t __user *argv;
163 elf_addr_t __user *envp;
164 elf_addr_t __user *sp;
165 elf_addr_t __user *u_platform;
166 elf_addr_t __user *u_base_platform;
167 elf_addr_t __user *u_rand_bytes;
168 const char *k_platform = ELF_PLATFORM;
169 const char *k_base_platform = ELF_BASE_PLATFORM;
170 unsigned char k_rand_bytes[16];
171 int items;
172 elf_addr_t *elf_info;
173 int ei_index = 0;
174 const struct cred *cred = current_cred();
175 struct vm_area_struct *vma;
176
177 /*
178 * In some cases (e.g. Hyper-Threading), we want to avoid L1
179 * evictions by the processes running on the same package. One
180 * thing we can do is to shuffle the initial stack for them.
181 */
182
183 p = arch_align_stack(p);
184
185 /*
186 * If this architecture has a platform capability string, copy it
187 * to userspace. In some cases (Sparc), this info is impossible
188 * for userspace to get any other way, in others (i386) it is
189 * merely difficult.
190 */
191 u_platform = NULL;
192 if (k_platform) {
193 size_t len = strlen(k_platform) + 1;
194
195 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
196 if (__copy_to_user(u_platform, k_platform, len))
197 return -EFAULT;
198 }
199
200 /*
201 * If this architecture has a "base" platform capability
202 * string, copy it to userspace.
203 */
204 u_base_platform = NULL;
205 if (k_base_platform) {
206 size_t len = strlen(k_base_platform) + 1;
207
208 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
209 if (__copy_to_user(u_base_platform, k_base_platform, len))
210 return -EFAULT;
211 }
212
213 /*
214 * Generate 16 random bytes for userspace PRNG seeding.
215 */
216 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
217 u_rand_bytes = (elf_addr_t __user *)
218 STACK_ALLOC(p, sizeof(k_rand_bytes));
219 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
220 return -EFAULT;
221
222 /* Create the ELF interpreter info */
223 elf_info = (elf_addr_t *)current->mm->saved_auxv;
224 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
225 #define NEW_AUX_ENT(id, val) \
226 do { \
227 elf_info[ei_index++] = id; \
228 elf_info[ei_index++] = val; \
229 } while (0)
230
231 #ifdef ARCH_DLINFO
232 /*
233 * ARCH_DLINFO must come first so PPC can do its special alignment of
234 * AUXV.
235 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
236 * ARCH_DLINFO changes
237 */
238 ARCH_DLINFO;
239 #endif
240 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
241 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
242 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
243 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
244 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
245 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
246 NEW_AUX_ENT(AT_BASE, interp_load_addr);
247 NEW_AUX_ENT(AT_FLAGS, 0);
248 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
249 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
250 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
251 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
252 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
253 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
254 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
255 #ifdef ELF_HWCAP2
256 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
257 #endif
258 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
259 if (k_platform) {
260 NEW_AUX_ENT(AT_PLATFORM,
261 (elf_addr_t)(unsigned long)u_platform);
262 }
263 if (k_base_platform) {
264 NEW_AUX_ENT(AT_BASE_PLATFORM,
265 (elf_addr_t)(unsigned long)u_base_platform);
266 }
267 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
268 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
269 }
270 #undef NEW_AUX_ENT
271 /* AT_NULL is zero; clear the rest too */
272 memset(&elf_info[ei_index], 0,
273 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
274
275 /* And advance past the AT_NULL entry. */
276 ei_index += 2;
277
278 sp = STACK_ADD(p, ei_index);
279
280 items = (argc + 1) + (envc + 1) + 1;
281 bprm->p = STACK_ROUND(sp, items);
282
283 /* Point sp at the lowest address on the stack */
284 #ifdef CONFIG_STACK_GROWSUP
285 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
286 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
287 #else
288 sp = (elf_addr_t __user *)bprm->p;
289 #endif
290
291
292 /*
293 * Grow the stack manually; some architectures have a limit on how
294 * far ahead a user-space access may be in order to grow the stack.
295 */
296 vma = find_extend_vma(current->mm, bprm->p);
297 if (!vma)
298 return -EFAULT;
299
300 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
301 if (__put_user(argc, sp++))
302 return -EFAULT;
303 argv = sp;
304 envp = argv + argc + 1;
305
306 /* Populate argv and envp */
307 p = current->mm->arg_end = current->mm->arg_start;
308 while (argc-- > 0) {
309 size_t len;
310 if (__put_user((elf_addr_t)p, argv++))
311 return -EFAULT;
312 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 if (!len || len > MAX_ARG_STRLEN)
314 return -EINVAL;
315 p += len;
316 }
317 if (__put_user(0, argv))
318 return -EFAULT;
319 current->mm->arg_end = current->mm->env_start = p;
320 while (envc-- > 0) {
321 size_t len;
322 if (__put_user((elf_addr_t)p, envp++))
323 return -EFAULT;
324 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
325 if (!len || len > MAX_ARG_STRLEN)
326 return -EINVAL;
327 p += len;
328 }
329 if (__put_user(0, envp))
330 return -EFAULT;
331 current->mm->env_end = p;
332
333 /* Put the elf_info on the stack in the right place. */
334 sp = (elf_addr_t __user *)envp + 1;
335 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
336 return -EFAULT;
337 return 0;
338 }
339
340 #ifndef elf_map
341
342 static unsigned long elf_map(struct file *filep, unsigned long addr,
343 struct elf_phdr *eppnt, int prot, int type,
344 unsigned long total_size)
345 {
346 unsigned long map_addr;
347 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
348 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
349 addr = ELF_PAGESTART(addr);
350 size = ELF_PAGEALIGN(size);
351
352 /* mmap() will return -EINVAL if given a zero size, but a
353 * segment with zero filesize is perfectly valid */
354 if (!size)
355 return addr;
356
357 /*
358 * total_size is the size of the ELF (interpreter) image.
359 * The _first_ mmap needs to know the full size, otherwise
360 * randomization might put this image into an overlapping
361 * position with the ELF binary image. (since size < total_size)
362 * So we first map the 'big' image - and unmap the remainder at
363 * the end. (which unmap is needed for ELF images with holes.)
364 */
365 if (total_size) {
366 total_size = ELF_PAGEALIGN(total_size);
367 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
368 if (!BAD_ADDR(map_addr))
369 vm_munmap(map_addr+size, total_size-size);
370 } else
371 map_addr = vm_mmap(filep, addr, size, prot, type, off);
372
373 return(map_addr);
374 }
375
376 #endif /* !elf_map */
377
378 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
379 {
380 int i, first_idx = -1, last_idx = -1;
381
382 for (i = 0; i < nr; i++) {
383 if (cmds[i].p_type == PT_LOAD) {
384 last_idx = i;
385 if (first_idx == -1)
386 first_idx = i;
387 }
388 }
389 if (first_idx == -1)
390 return 0;
391
392 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
393 ELF_PAGESTART(cmds[first_idx].p_vaddr);
394 }
395
396 /**
397 * load_elf_phdrs() - load ELF program headers
398 * @elf_ex: ELF header of the binary whose program headers should be loaded
399 * @elf_file: the opened ELF binary file
400 *
401 * Loads ELF program headers from the binary file elf_file, which has the ELF
402 * header pointed to by elf_ex, into a newly allocated array. The caller is
403 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
404 */
405 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
406 struct file *elf_file)
407 {
408 struct elf_phdr *elf_phdata = NULL;
409 int retval, size, err = -1;
410
411 /*
412 * If the size of this structure has changed, then punt, since
413 * we will be doing the wrong thing.
414 */
415 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
416 goto out;
417
418 /* Sanity check the number of program headers... */
419 if (elf_ex->e_phnum < 1 ||
420 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
421 goto out;
422
423 /* ...and their total size. */
424 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
425 if (size > ELF_MIN_ALIGN)
426 goto out;
427
428 elf_phdata = kmalloc(size, GFP_KERNEL);
429 if (!elf_phdata)
430 goto out;
431
432 /* Read in the program headers */
433 retval = kernel_read(elf_file, elf_ex->e_phoff,
434 (char *)elf_phdata, size);
435 if (retval != size) {
436 err = (retval < 0) ? retval : -EIO;
437 goto out;
438 }
439
440 /* Success! */
441 err = 0;
442 out:
443 if (err) {
444 kfree(elf_phdata);
445 elf_phdata = NULL;
446 }
447 return elf_phdata;
448 }
449
450 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
451
452 /**
453 * struct arch_elf_state - arch-specific ELF loading state
454 *
455 * This structure is used to preserve architecture specific data during
456 * the loading of an ELF file, throughout the checking of architecture
457 * specific ELF headers & through to the point where the ELF load is
458 * known to be proceeding (ie. SET_PERSONALITY).
459 *
460 * This implementation is a dummy for architectures which require no
461 * specific state.
462 */
463 struct arch_elf_state {
464 };
465
466 #define INIT_ARCH_ELF_STATE {}
467
468 /**
469 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
470 * @ehdr: The main ELF header
471 * @phdr: The program header to check
472 * @elf: The open ELF file
473 * @is_interp: True if the phdr is from the interpreter of the ELF being
474 * loaded, else false.
475 * @state: Architecture-specific state preserved throughout the process
476 * of loading the ELF.
477 *
478 * Inspects the program header phdr to validate its correctness and/or
479 * suitability for the system. Called once per ELF program header in the
480 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
481 * interpreter.
482 *
483 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
484 * with that return code.
485 */
486 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
487 struct elf_phdr *phdr,
488 struct file *elf, bool is_interp,
489 struct arch_elf_state *state)
490 {
491 /* Dummy implementation, always proceed */
492 return 0;
493 }
494
495 /**
496 * arch_check_elf() - check an ELF executable
497 * @ehdr: The main ELF header
498 * @has_interp: True if the ELF has an interpreter, else false.
499 * @interp_ehdr: The interpreter's ELF header
500 * @state: Architecture-specific state preserved throughout the process
501 * of loading the ELF.
502 *
503 * Provides a final opportunity for architecture code to reject the loading
504 * of the ELF & cause an exec syscall to return an error. This is called after
505 * all program headers to be checked by arch_elf_pt_proc have been.
506 *
507 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
508 * with that return code.
509 */
510 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
511 struct elfhdr *interp_ehdr,
512 struct arch_elf_state *state)
513 {
514 /* Dummy implementation, always proceed */
515 return 0;
516 }
517
518 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
519
520 /* This is much more generalized than the library routine read function,
521 so we keep this separate. Technically the library read function
522 is only provided so that we can read a.out libraries that have
523 an ELF header */
524
525 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
526 struct file *interpreter, unsigned long *interp_map_addr,
527 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
528 {
529 struct elf_phdr *eppnt;
530 unsigned long load_addr = 0;
531 int load_addr_set = 0;
532 unsigned long last_bss = 0, elf_bss = 0;
533 int bss_prot = 0;
534 unsigned long error = ~0UL;
535 unsigned long total_size;
536 int i;
537
538 /* First of all, some simple consistency checks */
539 if (interp_elf_ex->e_type != ET_EXEC &&
540 interp_elf_ex->e_type != ET_DYN)
541 goto out;
542 if (!elf_check_arch(interp_elf_ex))
543 goto out;
544 if (!interpreter->f_op->mmap)
545 goto out;
546
547 total_size = total_mapping_size(interp_elf_phdata,
548 interp_elf_ex->e_phnum);
549 if (!total_size) {
550 error = -EINVAL;
551 goto out;
552 }
553
554 eppnt = interp_elf_phdata;
555 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
556 if (eppnt->p_type == PT_LOAD) {
557 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
558 int elf_prot = 0;
559 unsigned long vaddr = 0;
560 unsigned long k, map_addr;
561
562 if (eppnt->p_flags & PF_R)
563 elf_prot = PROT_READ;
564 if (eppnt->p_flags & PF_W)
565 elf_prot |= PROT_WRITE;
566 if (eppnt->p_flags & PF_X)
567 elf_prot |= PROT_EXEC;
568 vaddr = eppnt->p_vaddr;
569 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
570 elf_type |= MAP_FIXED;
571 else if (no_base && interp_elf_ex->e_type == ET_DYN)
572 load_addr = -vaddr;
573
574 map_addr = elf_map(interpreter, load_addr + vaddr,
575 eppnt, elf_prot, elf_type, total_size);
576 total_size = 0;
577 if (!*interp_map_addr)
578 *interp_map_addr = map_addr;
579 error = map_addr;
580 if (BAD_ADDR(map_addr))
581 goto out;
582
583 if (!load_addr_set &&
584 interp_elf_ex->e_type == ET_DYN) {
585 load_addr = map_addr - ELF_PAGESTART(vaddr);
586 load_addr_set = 1;
587 }
588
589 /*
590 * Check to see if the section's size will overflow the
591 * allowed task size. Note that p_filesz must always be
592 * <= p_memsize so it's only necessary to check p_memsz.
593 */
594 k = load_addr + eppnt->p_vaddr;
595 if (BAD_ADDR(k) ||
596 eppnt->p_filesz > eppnt->p_memsz ||
597 eppnt->p_memsz > TASK_SIZE ||
598 TASK_SIZE - eppnt->p_memsz < k) {
599 error = -ENOMEM;
600 goto out;
601 }
602
603 /*
604 * Find the end of the file mapping for this phdr, and
605 * keep track of the largest address we see for this.
606 */
607 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
608 if (k > elf_bss)
609 elf_bss = k;
610
611 /*
612 * Do the same thing for the memory mapping - between
613 * elf_bss and last_bss is the bss section.
614 */
615 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
616 if (k > last_bss) {
617 last_bss = k;
618 bss_prot = elf_prot;
619 }
620 }
621 }
622
623 /*
624 * Now fill out the bss section: first pad the last page from
625 * the file up to the page boundary, and zero it from elf_bss
626 * up to the end of the page.
627 */
628 if (padzero(elf_bss)) {
629 error = -EFAULT;
630 goto out;
631 }
632 /*
633 * Next, align both the file and mem bss up to the page size,
634 * since this is where elf_bss was just zeroed up to, and where
635 * last_bss will end after the vm_brk_flags() below.
636 */
637 elf_bss = ELF_PAGEALIGN(elf_bss);
638 last_bss = ELF_PAGEALIGN(last_bss);
639 /* Finally, if there is still more bss to allocate, do it. */
640 if (last_bss > elf_bss) {
641 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
642 bss_prot & PROT_EXEC ? VM_EXEC : 0);
643 if (error)
644 goto out;
645 }
646
647 error = load_addr;
648 out:
649 return error;
650 }
651
652 /*
653 * These are the functions used to load ELF style executables and shared
654 * libraries. There is no binary dependent code anywhere else.
655 */
656
657 #ifndef STACK_RND_MASK
658 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
659 #endif
660
661 static unsigned long randomize_stack_top(unsigned long stack_top)
662 {
663 unsigned long random_variable = 0;
664
665 if ((current->flags & PF_RANDOMIZE) &&
666 !(current->personality & ADDR_NO_RANDOMIZE)) {
667 random_variable = get_random_long();
668 random_variable &= STACK_RND_MASK;
669 random_variable <<= PAGE_SHIFT;
670 }
671 #ifdef CONFIG_STACK_GROWSUP
672 return PAGE_ALIGN(stack_top) + random_variable;
673 #else
674 return PAGE_ALIGN(stack_top) - random_variable;
675 #endif
676 }
677
678 static int load_elf_binary(struct linux_binprm *bprm)
679 {
680 struct file *interpreter = NULL; /* to shut gcc up */
681 unsigned long load_addr = 0, load_bias = 0;
682 int load_addr_set = 0;
683 char * elf_interpreter = NULL;
684 unsigned long error;
685 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
686 unsigned long elf_bss, elf_brk;
687 int bss_prot = 0;
688 int retval, i;
689 unsigned long elf_entry;
690 unsigned long interp_load_addr = 0;
691 unsigned long start_code, end_code, start_data, end_data;
692 unsigned long reloc_func_desc __maybe_unused = 0;
693 int executable_stack = EXSTACK_DEFAULT;
694 struct pt_regs *regs = current_pt_regs();
695 struct {
696 struct elfhdr elf_ex;
697 struct elfhdr interp_elf_ex;
698 } *loc;
699 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
700
701 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
702 if (!loc) {
703 retval = -ENOMEM;
704 goto out_ret;
705 }
706
707 /* Get the exec-header */
708 loc->elf_ex = *((struct elfhdr *)bprm->buf);
709
710 retval = -ENOEXEC;
711 /* First of all, some simple consistency checks */
712 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
713 goto out;
714
715 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
716 goto out;
717 if (!elf_check_arch(&loc->elf_ex))
718 goto out;
719 if (!bprm->file->f_op->mmap)
720 goto out;
721
722 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
723 if (!elf_phdata)
724 goto out;
725
726 elf_ppnt = elf_phdata;
727 elf_bss = 0;
728 elf_brk = 0;
729
730 start_code = ~0UL;
731 end_code = 0;
732 start_data = 0;
733 end_data = 0;
734
735 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
736 if (elf_ppnt->p_type == PT_INTERP) {
737 /* This is the program interpreter used for
738 * shared libraries - for now assume that this
739 * is an a.out format binary
740 */
741 retval = -ENOEXEC;
742 if (elf_ppnt->p_filesz > PATH_MAX ||
743 elf_ppnt->p_filesz < 2)
744 goto out_free_ph;
745
746 retval = -ENOMEM;
747 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
748 GFP_KERNEL);
749 if (!elf_interpreter)
750 goto out_free_ph;
751
752 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
753 elf_interpreter,
754 elf_ppnt->p_filesz);
755 if (retval != elf_ppnt->p_filesz) {
756 if (retval >= 0)
757 retval = -EIO;
758 goto out_free_interp;
759 }
760 /* make sure path is NULL terminated */
761 retval = -ENOEXEC;
762 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
763 goto out_free_interp;
764
765 interpreter = open_exec(elf_interpreter);
766 retval = PTR_ERR(interpreter);
767 if (IS_ERR(interpreter))
768 goto out_free_interp;
769
770 /*
771 * If the binary is not readable then enforce
772 * mm->dumpable = 0 regardless of the interpreter's
773 * permissions.
774 */
775 would_dump(bprm, interpreter);
776
777 /* Get the exec headers */
778 retval = kernel_read(interpreter, 0,
779 (void *)&loc->interp_elf_ex,
780 sizeof(loc->interp_elf_ex));
781 if (retval != sizeof(loc->interp_elf_ex)) {
782 if (retval >= 0)
783 retval = -EIO;
784 goto out_free_dentry;
785 }
786
787 break;
788 }
789 elf_ppnt++;
790 }
791
792 elf_ppnt = elf_phdata;
793 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
794 switch (elf_ppnt->p_type) {
795 case PT_GNU_STACK:
796 if (elf_ppnt->p_flags & PF_X)
797 executable_stack = EXSTACK_ENABLE_X;
798 else
799 executable_stack = EXSTACK_DISABLE_X;
800 break;
801
802 case PT_LOPROC ... PT_HIPROC:
803 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
804 bprm->file, false,
805 &arch_state);
806 if (retval)
807 goto out_free_dentry;
808 break;
809 }
810
811 /* Some simple consistency checks for the interpreter */
812 if (elf_interpreter) {
813 retval = -ELIBBAD;
814 /* Not an ELF interpreter */
815 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
816 goto out_free_dentry;
817 /* Verify the interpreter has a valid arch */
818 if (!elf_check_arch(&loc->interp_elf_ex))
819 goto out_free_dentry;
820
821 /* Load the interpreter program headers */
822 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
823 interpreter);
824 if (!interp_elf_phdata)
825 goto out_free_dentry;
826
827 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
828 elf_ppnt = interp_elf_phdata;
829 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
830 switch (elf_ppnt->p_type) {
831 case PT_LOPROC ... PT_HIPROC:
832 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
833 elf_ppnt, interpreter,
834 true, &arch_state);
835 if (retval)
836 goto out_free_dentry;
837 break;
838 }
839 }
840
841 /*
842 * Allow arch code to reject the ELF at this point, whilst it's
843 * still possible to return an error to the code that invoked
844 * the exec syscall.
845 */
846 retval = arch_check_elf(&loc->elf_ex,
847 !!interpreter, &loc->interp_elf_ex,
848 &arch_state);
849 if (retval)
850 goto out_free_dentry;
851
852 /* Flush all traces of the currently running executable */
853 retval = flush_old_exec(bprm);
854 if (retval)
855 goto out_free_dentry;
856
857 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
858 may depend on the personality. */
859 SET_PERSONALITY2(loc->elf_ex, &arch_state);
860 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
861 current->personality |= READ_IMPLIES_EXEC;
862
863 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
864 current->flags |= PF_RANDOMIZE;
865
866 setup_new_exec(bprm);
867 install_exec_creds(bprm);
868
869 /* Do this so that we can load the interpreter, if need be. We will
870 change some of these later */
871 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
872 executable_stack);
873 if (retval < 0)
874 goto out_free_dentry;
875
876 current->mm->start_stack = bprm->p;
877
878 /* Now we do a little grungy work by mmapping the ELF image into
879 the correct location in memory. */
880 for(i = 0, elf_ppnt = elf_phdata;
881 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
882 int elf_prot = 0, elf_flags;
883 unsigned long k, vaddr;
884 unsigned long total_size = 0;
885
886 if (elf_ppnt->p_type != PT_LOAD)
887 continue;
888
889 if (unlikely (elf_brk > elf_bss)) {
890 unsigned long nbyte;
891
892 /* There was a PT_LOAD segment with p_memsz > p_filesz
893 before this one. Map anonymous pages, if needed,
894 and clear the area. */
895 retval = set_brk(elf_bss + load_bias,
896 elf_brk + load_bias,
897 bss_prot);
898 if (retval)
899 goto out_free_dentry;
900 nbyte = ELF_PAGEOFFSET(elf_bss);
901 if (nbyte) {
902 nbyte = ELF_MIN_ALIGN - nbyte;
903 if (nbyte > elf_brk - elf_bss)
904 nbyte = elf_brk - elf_bss;
905 if (clear_user((void __user *)elf_bss +
906 load_bias, nbyte)) {
907 /*
908 * This bss-zeroing can fail if the ELF
909 * file specifies odd protections. So
910 * we don't check the return value
911 */
912 }
913 }
914 }
915
916 if (elf_ppnt->p_flags & PF_R)
917 elf_prot |= PROT_READ;
918 if (elf_ppnt->p_flags & PF_W)
919 elf_prot |= PROT_WRITE;
920 if (elf_ppnt->p_flags & PF_X)
921 elf_prot |= PROT_EXEC;
922
923 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
924
925 vaddr = elf_ppnt->p_vaddr;
926 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
927 elf_flags |= MAP_FIXED;
928 } else if (loc->elf_ex.e_type == ET_DYN) {
929 /* Try and get dynamic programs out of the way of the
930 * default mmap base, as well as whatever program they
931 * might try to exec. This is because the brk will
932 * follow the loader, and is not movable. */
933 load_bias = ELF_ET_DYN_BASE - vaddr;
934 if (current->flags & PF_RANDOMIZE)
935 load_bias += arch_mmap_rnd();
936 load_bias = ELF_PAGESTART(load_bias);
937 total_size = total_mapping_size(elf_phdata,
938 loc->elf_ex.e_phnum);
939 if (!total_size) {
940 retval = -EINVAL;
941 goto out_free_dentry;
942 }
943 }
944
945 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
946 elf_prot, elf_flags, total_size);
947 if (BAD_ADDR(error)) {
948 retval = IS_ERR((void *)error) ?
949 PTR_ERR((void*)error) : -EINVAL;
950 goto out_free_dentry;
951 }
952
953 if (!load_addr_set) {
954 load_addr_set = 1;
955 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
956 if (loc->elf_ex.e_type == ET_DYN) {
957 load_bias += error -
958 ELF_PAGESTART(load_bias + vaddr);
959 load_addr += load_bias;
960 reloc_func_desc = load_bias;
961 }
962 }
963 k = elf_ppnt->p_vaddr;
964 if (k < start_code)
965 start_code = k;
966 if (start_data < k)
967 start_data = k;
968
969 /*
970 * Check to see if the section's size will overflow the
971 * allowed task size. Note that p_filesz must always be
972 * <= p_memsz so it is only necessary to check p_memsz.
973 */
974 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
975 elf_ppnt->p_memsz > TASK_SIZE ||
976 TASK_SIZE - elf_ppnt->p_memsz < k) {
977 /* set_brk can never work. Avoid overflows. */
978 retval = -EINVAL;
979 goto out_free_dentry;
980 }
981
982 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
983
984 if (k > elf_bss)
985 elf_bss = k;
986 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
987 end_code = k;
988 if (end_data < k)
989 end_data = k;
990 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
991 if (k > elf_brk) {
992 bss_prot = elf_prot;
993 elf_brk = k;
994 }
995 }
996
997 loc->elf_ex.e_entry += load_bias;
998 elf_bss += load_bias;
999 elf_brk += load_bias;
1000 start_code += load_bias;
1001 end_code += load_bias;
1002 start_data += load_bias;
1003 end_data += load_bias;
1004
1005 /* Calling set_brk effectively mmaps the pages that we need
1006 * for the bss and break sections. We must do this before
1007 * mapping in the interpreter, to make sure it doesn't wind
1008 * up getting placed where the bss needs to go.
1009 */
1010 retval = set_brk(elf_bss, elf_brk, bss_prot);
1011 if (retval)
1012 goto out_free_dentry;
1013 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1014 retval = -EFAULT; /* Nobody gets to see this, but.. */
1015 goto out_free_dentry;
1016 }
1017
1018 if (elf_interpreter) {
1019 unsigned long interp_map_addr = 0;
1020
1021 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1022 interpreter,
1023 &interp_map_addr,
1024 load_bias, interp_elf_phdata);
1025 if (!IS_ERR((void *)elf_entry)) {
1026 /*
1027 * load_elf_interp() returns relocation
1028 * adjustment
1029 */
1030 interp_load_addr = elf_entry;
1031 elf_entry += loc->interp_elf_ex.e_entry;
1032 }
1033 if (BAD_ADDR(elf_entry)) {
1034 retval = IS_ERR((void *)elf_entry) ?
1035 (int)elf_entry : -EINVAL;
1036 goto out_free_dentry;
1037 }
1038 reloc_func_desc = interp_load_addr;
1039
1040 allow_write_access(interpreter);
1041 fput(interpreter);
1042 kfree(elf_interpreter);
1043 } else {
1044 elf_entry = loc->elf_ex.e_entry;
1045 if (BAD_ADDR(elf_entry)) {
1046 retval = -EINVAL;
1047 goto out_free_dentry;
1048 }
1049 }
1050
1051 kfree(interp_elf_phdata);
1052 kfree(elf_phdata);
1053
1054 set_binfmt(&elf_format);
1055
1056 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1057 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1058 if (retval < 0)
1059 goto out;
1060 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1061
1062 retval = create_elf_tables(bprm, &loc->elf_ex,
1063 load_addr, interp_load_addr);
1064 if (retval < 0)
1065 goto out;
1066 /* N.B. passed_fileno might not be initialized? */
1067 current->mm->end_code = end_code;
1068 current->mm->start_code = start_code;
1069 current->mm->start_data = start_data;
1070 current->mm->end_data = end_data;
1071 current->mm->start_stack = bprm->p;
1072
1073 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1074 current->mm->brk = current->mm->start_brk =
1075 arch_randomize_brk(current->mm);
1076 #ifdef compat_brk_randomized
1077 current->brk_randomized = 1;
1078 #endif
1079 }
1080
1081 if (current->personality & MMAP_PAGE_ZERO) {
1082 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1083 and some applications "depend" upon this behavior.
1084 Since we do not have the power to recompile these, we
1085 emulate the SVr4 behavior. Sigh. */
1086 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1087 MAP_FIXED | MAP_PRIVATE, 0);
1088 }
1089
1090 #ifdef ELF_PLAT_INIT
1091 /*
1092 * The ABI may specify that certain registers be set up in special
1093 * ways (on i386 %edx is the address of a DT_FINI function, for
1094 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1095 * that the e_entry field is the address of the function descriptor
1096 * for the startup routine, rather than the address of the startup
1097 * routine itself. This macro performs whatever initialization to
1098 * the regs structure is required as well as any relocations to the
1099 * function descriptor entries when executing dynamically links apps.
1100 */
1101 ELF_PLAT_INIT(regs, reloc_func_desc);
1102 #endif
1103
1104 start_thread(regs, elf_entry, bprm->p);
1105 retval = 0;
1106 out:
1107 kfree(loc);
1108 out_ret:
1109 return retval;
1110
1111 /* error cleanup */
1112 out_free_dentry:
1113 kfree(interp_elf_phdata);
1114 allow_write_access(interpreter);
1115 if (interpreter)
1116 fput(interpreter);
1117 out_free_interp:
1118 kfree(elf_interpreter);
1119 out_free_ph:
1120 kfree(elf_phdata);
1121 goto out;
1122 }
1123
1124 #ifdef CONFIG_USELIB
1125 /* This is really simpleminded and specialized - we are loading an
1126 a.out library that is given an ELF header. */
1127 static int load_elf_library(struct file *file)
1128 {
1129 struct elf_phdr *elf_phdata;
1130 struct elf_phdr *eppnt;
1131 unsigned long elf_bss, bss, len;
1132 int retval, error, i, j;
1133 struct elfhdr elf_ex;
1134
1135 error = -ENOEXEC;
1136 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1137 if (retval != sizeof(elf_ex))
1138 goto out;
1139
1140 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1141 goto out;
1142
1143 /* First of all, some simple consistency checks */
1144 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1145 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1146 goto out;
1147
1148 /* Now read in all of the header information */
1149
1150 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1151 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1152
1153 error = -ENOMEM;
1154 elf_phdata = kmalloc(j, GFP_KERNEL);
1155 if (!elf_phdata)
1156 goto out;
1157
1158 eppnt = elf_phdata;
1159 error = -ENOEXEC;
1160 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1161 if (retval != j)
1162 goto out_free_ph;
1163
1164 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1165 if ((eppnt + i)->p_type == PT_LOAD)
1166 j++;
1167 if (j != 1)
1168 goto out_free_ph;
1169
1170 while (eppnt->p_type != PT_LOAD)
1171 eppnt++;
1172
1173 /* Now use mmap to map the library into memory. */
1174 error = vm_mmap(file,
1175 ELF_PAGESTART(eppnt->p_vaddr),
1176 (eppnt->p_filesz +
1177 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1178 PROT_READ | PROT_WRITE | PROT_EXEC,
1179 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1180 (eppnt->p_offset -
1181 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1182 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1183 goto out_free_ph;
1184
1185 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1186 if (padzero(elf_bss)) {
1187 error = -EFAULT;
1188 goto out_free_ph;
1189 }
1190
1191 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1192 ELF_MIN_ALIGN - 1);
1193 bss = eppnt->p_memsz + eppnt->p_vaddr;
1194 if (bss > len) {
1195 error = vm_brk(len, bss - len);
1196 if (error)
1197 goto out_free_ph;
1198 }
1199 error = 0;
1200
1201 out_free_ph:
1202 kfree(elf_phdata);
1203 out:
1204 return error;
1205 }
1206 #endif /* #ifdef CONFIG_USELIB */
1207
1208 #ifdef CONFIG_ELF_CORE
1209 /*
1210 * ELF core dumper
1211 *
1212 * Modelled on fs/exec.c:aout_core_dump()
1213 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1214 */
1215
1216 /*
1217 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1218 * that are useful for post-mortem analysis are included in every core dump.
1219 * In that way we ensure that the core dump is fully interpretable later
1220 * without matching up the same kernel and hardware config to see what PC values
1221 * meant. These special mappings include - vDSO, vsyscall, and other
1222 * architecture specific mappings
1223 */
1224 static bool always_dump_vma(struct vm_area_struct *vma)
1225 {
1226 /* Any vsyscall mappings? */
1227 if (vma == get_gate_vma(vma->vm_mm))
1228 return true;
1229
1230 /*
1231 * Assume that all vmas with a .name op should always be dumped.
1232 * If this changes, a new vm_ops field can easily be added.
1233 */
1234 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1235 return true;
1236
1237 /*
1238 * arch_vma_name() returns non-NULL for special architecture mappings,
1239 * such as vDSO sections.
1240 */
1241 if (arch_vma_name(vma))
1242 return true;
1243
1244 return false;
1245 }
1246
1247 /*
1248 * Decide what to dump of a segment, part, all or none.
1249 */
1250 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1251 unsigned long mm_flags)
1252 {
1253 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1254
1255 /* always dump the vdso and vsyscall sections */
1256 if (always_dump_vma(vma))
1257 goto whole;
1258
1259 if (vma->vm_flags & VM_DONTDUMP)
1260 return 0;
1261
1262 /* support for DAX */
1263 if (vma_is_dax(vma)) {
1264 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1265 goto whole;
1266 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1267 goto whole;
1268 return 0;
1269 }
1270
1271 /* Hugetlb memory check */
1272 if (vma->vm_flags & VM_HUGETLB) {
1273 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1274 goto whole;
1275 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1276 goto whole;
1277 return 0;
1278 }
1279
1280 /* Do not dump I/O mapped devices or special mappings */
1281 if (vma->vm_flags & VM_IO)
1282 return 0;
1283
1284 /* By default, dump shared memory if mapped from an anonymous file. */
1285 if (vma->vm_flags & VM_SHARED) {
1286 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1287 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1288 goto whole;
1289 return 0;
1290 }
1291
1292 /* Dump segments that have been written to. */
1293 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1294 goto whole;
1295 if (vma->vm_file == NULL)
1296 return 0;
1297
1298 if (FILTER(MAPPED_PRIVATE))
1299 goto whole;
1300
1301 /*
1302 * If this looks like the beginning of a DSO or executable mapping,
1303 * check for an ELF header. If we find one, dump the first page to
1304 * aid in determining what was mapped here.
1305 */
1306 if (FILTER(ELF_HEADERS) &&
1307 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1308 u32 __user *header = (u32 __user *) vma->vm_start;
1309 u32 word;
1310 mm_segment_t fs = get_fs();
1311 /*
1312 * Doing it this way gets the constant folded by GCC.
1313 */
1314 union {
1315 u32 cmp;
1316 char elfmag[SELFMAG];
1317 } magic;
1318 BUILD_BUG_ON(SELFMAG != sizeof word);
1319 magic.elfmag[EI_MAG0] = ELFMAG0;
1320 magic.elfmag[EI_MAG1] = ELFMAG1;
1321 magic.elfmag[EI_MAG2] = ELFMAG2;
1322 magic.elfmag[EI_MAG3] = ELFMAG3;
1323 /*
1324 * Switch to the user "segment" for get_user(),
1325 * then put back what elf_core_dump() had in place.
1326 */
1327 set_fs(USER_DS);
1328 if (unlikely(get_user(word, header)))
1329 word = 0;
1330 set_fs(fs);
1331 if (word == magic.cmp)
1332 return PAGE_SIZE;
1333 }
1334
1335 #undef FILTER
1336
1337 return 0;
1338
1339 whole:
1340 return vma->vm_end - vma->vm_start;
1341 }
1342
1343 /* An ELF note in memory */
1344 struct memelfnote
1345 {
1346 const char *name;
1347 int type;
1348 unsigned int datasz;
1349 void *data;
1350 };
1351
1352 static int notesize(struct memelfnote *en)
1353 {
1354 int sz;
1355
1356 sz = sizeof(struct elf_note);
1357 sz += roundup(strlen(en->name) + 1, 4);
1358 sz += roundup(en->datasz, 4);
1359
1360 return sz;
1361 }
1362
1363 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1364 {
1365 struct elf_note en;
1366 en.n_namesz = strlen(men->name) + 1;
1367 en.n_descsz = men->datasz;
1368 en.n_type = men->type;
1369
1370 return dump_emit(cprm, &en, sizeof(en)) &&
1371 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1372 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1373 }
1374
1375 static void fill_elf_header(struct elfhdr *elf, int segs,
1376 u16 machine, u32 flags)
1377 {
1378 memset(elf, 0, sizeof(*elf));
1379
1380 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1381 elf->e_ident[EI_CLASS] = ELF_CLASS;
1382 elf->e_ident[EI_DATA] = ELF_DATA;
1383 elf->e_ident[EI_VERSION] = EV_CURRENT;
1384 elf->e_ident[EI_OSABI] = ELF_OSABI;
1385
1386 elf->e_type = ET_CORE;
1387 elf->e_machine = machine;
1388 elf->e_version = EV_CURRENT;
1389 elf->e_phoff = sizeof(struct elfhdr);
1390 elf->e_flags = flags;
1391 elf->e_ehsize = sizeof(struct elfhdr);
1392 elf->e_phentsize = sizeof(struct elf_phdr);
1393 elf->e_phnum = segs;
1394
1395 return;
1396 }
1397
1398 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1399 {
1400 phdr->p_type = PT_NOTE;
1401 phdr->p_offset = offset;
1402 phdr->p_vaddr = 0;
1403 phdr->p_paddr = 0;
1404 phdr->p_filesz = sz;
1405 phdr->p_memsz = 0;
1406 phdr->p_flags = 0;
1407 phdr->p_align = 0;
1408 return;
1409 }
1410
1411 static void fill_note(struct memelfnote *note, const char *name, int type,
1412 unsigned int sz, void *data)
1413 {
1414 note->name = name;
1415 note->type = type;
1416 note->datasz = sz;
1417 note->data = data;
1418 return;
1419 }
1420
1421 /*
1422 * fill up all the fields in prstatus from the given task struct, except
1423 * registers which need to be filled up separately.
1424 */
1425 static void fill_prstatus(struct elf_prstatus *prstatus,
1426 struct task_struct *p, long signr)
1427 {
1428 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1429 prstatus->pr_sigpend = p->pending.signal.sig[0];
1430 prstatus->pr_sighold = p->blocked.sig[0];
1431 rcu_read_lock();
1432 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1433 rcu_read_unlock();
1434 prstatus->pr_pid = task_pid_vnr(p);
1435 prstatus->pr_pgrp = task_pgrp_vnr(p);
1436 prstatus->pr_sid = task_session_vnr(p);
1437 if (thread_group_leader(p)) {
1438 struct task_cputime cputime;
1439
1440 /*
1441 * This is the record for the group leader. It shows the
1442 * group-wide total, not its individual thread total.
1443 */
1444 thread_group_cputime(p, &cputime);
1445 prstatus->pr_utime = ns_to_timeval(cputime.utime);
1446 prstatus->pr_stime = ns_to_timeval(cputime.stime);
1447 } else {
1448 u64 utime, stime;
1449
1450 task_cputime(p, &utime, &stime);
1451 prstatus->pr_utime = ns_to_timeval(utime);
1452 prstatus->pr_stime = ns_to_timeval(stime);
1453 }
1454
1455 prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1456 prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1457 }
1458
1459 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1460 struct mm_struct *mm)
1461 {
1462 const struct cred *cred;
1463 unsigned int i, len;
1464
1465 /* first copy the parameters from user space */
1466 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1467
1468 len = mm->arg_end - mm->arg_start;
1469 if (len >= ELF_PRARGSZ)
1470 len = ELF_PRARGSZ-1;
1471 if (copy_from_user(&psinfo->pr_psargs,
1472 (const char __user *)mm->arg_start, len))
1473 return -EFAULT;
1474 for(i = 0; i < len; i++)
1475 if (psinfo->pr_psargs[i] == 0)
1476 psinfo->pr_psargs[i] = ' ';
1477 psinfo->pr_psargs[len] = 0;
1478
1479 rcu_read_lock();
1480 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1481 rcu_read_unlock();
1482 psinfo->pr_pid = task_pid_vnr(p);
1483 psinfo->pr_pgrp = task_pgrp_vnr(p);
1484 psinfo->pr_sid = task_session_vnr(p);
1485
1486 i = p->state ? ffz(~p->state) + 1 : 0;
1487 psinfo->pr_state = i;
1488 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1489 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1490 psinfo->pr_nice = task_nice(p);
1491 psinfo->pr_flag = p->flags;
1492 rcu_read_lock();
1493 cred = __task_cred(p);
1494 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1495 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1496 rcu_read_unlock();
1497 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1498
1499 return 0;
1500 }
1501
1502 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1503 {
1504 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1505 int i = 0;
1506 do
1507 i += 2;
1508 while (auxv[i - 2] != AT_NULL);
1509 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1510 }
1511
1512 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1513 const siginfo_t *siginfo)
1514 {
1515 mm_segment_t old_fs = get_fs();
1516 set_fs(KERNEL_DS);
1517 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1518 set_fs(old_fs);
1519 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1520 }
1521
1522 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1523 /*
1524 * Format of NT_FILE note:
1525 *
1526 * long count -- how many files are mapped
1527 * long page_size -- units for file_ofs
1528 * array of [COUNT] elements of
1529 * long start
1530 * long end
1531 * long file_ofs
1532 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1533 */
1534 static int fill_files_note(struct memelfnote *note)
1535 {
1536 struct vm_area_struct *vma;
1537 unsigned count, size, names_ofs, remaining, n;
1538 user_long_t *data;
1539 user_long_t *start_end_ofs;
1540 char *name_base, *name_curpos;
1541
1542 /* *Estimated* file count and total data size needed */
1543 count = current->mm->map_count;
1544 size = count * 64;
1545
1546 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1547 alloc:
1548 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1549 return -EINVAL;
1550 size = round_up(size, PAGE_SIZE);
1551 data = vmalloc(size);
1552 if (!data)
1553 return -ENOMEM;
1554
1555 start_end_ofs = data + 2;
1556 name_base = name_curpos = ((char *)data) + names_ofs;
1557 remaining = size - names_ofs;
1558 count = 0;
1559 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1560 struct file *file;
1561 const char *filename;
1562
1563 file = vma->vm_file;
1564 if (!file)
1565 continue;
1566 filename = file_path(file, name_curpos, remaining);
1567 if (IS_ERR(filename)) {
1568 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1569 vfree(data);
1570 size = size * 5 / 4;
1571 goto alloc;
1572 }
1573 continue;
1574 }
1575
1576 /* file_path() fills at the end, move name down */
1577 /* n = strlen(filename) + 1: */
1578 n = (name_curpos + remaining) - filename;
1579 remaining = filename - name_curpos;
1580 memmove(name_curpos, filename, n);
1581 name_curpos += n;
1582
1583 *start_end_ofs++ = vma->vm_start;
1584 *start_end_ofs++ = vma->vm_end;
1585 *start_end_ofs++ = vma->vm_pgoff;
1586 count++;
1587 }
1588
1589 /* Now we know exact count of files, can store it */
1590 data[0] = count;
1591 data[1] = PAGE_SIZE;
1592 /*
1593 * Count usually is less than current->mm->map_count,
1594 * we need to move filenames down.
1595 */
1596 n = current->mm->map_count - count;
1597 if (n != 0) {
1598 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1599 memmove(name_base - shift_bytes, name_base,
1600 name_curpos - name_base);
1601 name_curpos -= shift_bytes;
1602 }
1603
1604 size = name_curpos - (char *)data;
1605 fill_note(note, "CORE", NT_FILE, size, data);
1606 return 0;
1607 }
1608
1609 #ifdef CORE_DUMP_USE_REGSET
1610 #include <linux/regset.h>
1611
1612 struct elf_thread_core_info {
1613 struct elf_thread_core_info *next;
1614 struct task_struct *task;
1615 struct elf_prstatus prstatus;
1616 struct memelfnote notes[0];
1617 };
1618
1619 struct elf_note_info {
1620 struct elf_thread_core_info *thread;
1621 struct memelfnote psinfo;
1622 struct memelfnote signote;
1623 struct memelfnote auxv;
1624 struct memelfnote files;
1625 user_siginfo_t csigdata;
1626 size_t size;
1627 int thread_notes;
1628 };
1629
1630 /*
1631 * When a regset has a writeback hook, we call it on each thread before
1632 * dumping user memory. On register window machines, this makes sure the
1633 * user memory backing the register data is up to date before we read it.
1634 */
1635 static void do_thread_regset_writeback(struct task_struct *task,
1636 const struct user_regset *regset)
1637 {
1638 if (regset->writeback)
1639 regset->writeback(task, regset, 1);
1640 }
1641
1642 #ifndef PRSTATUS_SIZE
1643 #define PRSTATUS_SIZE(S, R) sizeof(S)
1644 #endif
1645
1646 #ifndef SET_PR_FPVALID
1647 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1648 #endif
1649
1650 static int fill_thread_core_info(struct elf_thread_core_info *t,
1651 const struct user_regset_view *view,
1652 long signr, size_t *total)
1653 {
1654 unsigned int i;
1655 unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1656
1657 /*
1658 * NT_PRSTATUS is the one special case, because the regset data
1659 * goes into the pr_reg field inside the note contents, rather
1660 * than being the whole note contents. We fill the reset in here.
1661 * We assume that regset 0 is NT_PRSTATUS.
1662 */
1663 fill_prstatus(&t->prstatus, t->task, signr);
1664 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1665 &t->prstatus.pr_reg, NULL);
1666
1667 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1668 PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1669 *total += notesize(&t->notes[0]);
1670
1671 do_thread_regset_writeback(t->task, &view->regsets[0]);
1672
1673 /*
1674 * Each other regset might generate a note too. For each regset
1675 * that has no core_note_type or is inactive, we leave t->notes[i]
1676 * all zero and we'll know to skip writing it later.
1677 */
1678 for (i = 1; i < view->n; ++i) {
1679 const struct user_regset *regset = &view->regsets[i];
1680 do_thread_regset_writeback(t->task, regset);
1681 if (regset->core_note_type && regset->get &&
1682 (!regset->active || regset->active(t->task, regset))) {
1683 int ret;
1684 size_t size = regset->n * regset->size;
1685 void *data = kmalloc(size, GFP_KERNEL);
1686 if (unlikely(!data))
1687 return 0;
1688 ret = regset->get(t->task, regset,
1689 0, size, data, NULL);
1690 if (unlikely(ret))
1691 kfree(data);
1692 else {
1693 if (regset->core_note_type != NT_PRFPREG)
1694 fill_note(&t->notes[i], "LINUX",
1695 regset->core_note_type,
1696 size, data);
1697 else {
1698 SET_PR_FPVALID(&t->prstatus,
1699 1, regset_size);
1700 fill_note(&t->notes[i], "CORE",
1701 NT_PRFPREG, size, data);
1702 }
1703 *total += notesize(&t->notes[i]);
1704 }
1705 }
1706 }
1707
1708 return 1;
1709 }
1710
1711 static int fill_note_info(struct elfhdr *elf, int phdrs,
1712 struct elf_note_info *info,
1713 const siginfo_t *siginfo, struct pt_regs *regs)
1714 {
1715 struct task_struct *dump_task = current;
1716 const struct user_regset_view *view = task_user_regset_view(dump_task);
1717 struct elf_thread_core_info *t;
1718 struct elf_prpsinfo *psinfo;
1719 struct core_thread *ct;
1720 unsigned int i;
1721
1722 info->size = 0;
1723 info->thread = NULL;
1724
1725 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1726 if (psinfo == NULL) {
1727 info->psinfo.data = NULL; /* So we don't free this wrongly */
1728 return 0;
1729 }
1730
1731 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1732
1733 /*
1734 * Figure out how many notes we're going to need for each thread.
1735 */
1736 info->thread_notes = 0;
1737 for (i = 0; i < view->n; ++i)
1738 if (view->regsets[i].core_note_type != 0)
1739 ++info->thread_notes;
1740
1741 /*
1742 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1743 * since it is our one special case.
1744 */
1745 if (unlikely(info->thread_notes == 0) ||
1746 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1747 WARN_ON(1);
1748 return 0;
1749 }
1750
1751 /*
1752 * Initialize the ELF file header.
1753 */
1754 fill_elf_header(elf, phdrs,
1755 view->e_machine, view->e_flags);
1756
1757 /*
1758 * Allocate a structure for each thread.
1759 */
1760 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1761 t = kzalloc(offsetof(struct elf_thread_core_info,
1762 notes[info->thread_notes]),
1763 GFP_KERNEL);
1764 if (unlikely(!t))
1765 return 0;
1766
1767 t->task = ct->task;
1768 if (ct->task == dump_task || !info->thread) {
1769 t->next = info->thread;
1770 info->thread = t;
1771 } else {
1772 /*
1773 * Make sure to keep the original task at
1774 * the head of the list.
1775 */
1776 t->next = info->thread->next;
1777 info->thread->next = t;
1778 }
1779 }
1780
1781 /*
1782 * Now fill in each thread's information.
1783 */
1784 for (t = info->thread; t != NULL; t = t->next)
1785 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1786 return 0;
1787
1788 /*
1789 * Fill in the two process-wide notes.
1790 */
1791 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1792 info->size += notesize(&info->psinfo);
1793
1794 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1795 info->size += notesize(&info->signote);
1796
1797 fill_auxv_note(&info->auxv, current->mm);
1798 info->size += notesize(&info->auxv);
1799
1800 if (fill_files_note(&info->files) == 0)
1801 info->size += notesize(&info->files);
1802
1803 return 1;
1804 }
1805
1806 static size_t get_note_info_size(struct elf_note_info *info)
1807 {
1808 return info->size;
1809 }
1810
1811 /*
1812 * Write all the notes for each thread. When writing the first thread, the
1813 * process-wide notes are interleaved after the first thread-specific note.
1814 */
1815 static int write_note_info(struct elf_note_info *info,
1816 struct coredump_params *cprm)
1817 {
1818 bool first = true;
1819 struct elf_thread_core_info *t = info->thread;
1820
1821 do {
1822 int i;
1823
1824 if (!writenote(&t->notes[0], cprm))
1825 return 0;
1826
1827 if (first && !writenote(&info->psinfo, cprm))
1828 return 0;
1829 if (first && !writenote(&info->signote, cprm))
1830 return 0;
1831 if (first && !writenote(&info->auxv, cprm))
1832 return 0;
1833 if (first && info->files.data &&
1834 !writenote(&info->files, cprm))
1835 return 0;
1836
1837 for (i = 1; i < info->thread_notes; ++i)
1838 if (t->notes[i].data &&
1839 !writenote(&t->notes[i], cprm))
1840 return 0;
1841
1842 first = false;
1843 t = t->next;
1844 } while (t);
1845
1846 return 1;
1847 }
1848
1849 static void free_note_info(struct elf_note_info *info)
1850 {
1851 struct elf_thread_core_info *threads = info->thread;
1852 while (threads) {
1853 unsigned int i;
1854 struct elf_thread_core_info *t = threads;
1855 threads = t->next;
1856 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1857 for (i = 1; i < info->thread_notes; ++i)
1858 kfree(t->notes[i].data);
1859 kfree(t);
1860 }
1861 kfree(info->psinfo.data);
1862 vfree(info->files.data);
1863 }
1864
1865 #else
1866
1867 /* Here is the structure in which status of each thread is captured. */
1868 struct elf_thread_status
1869 {
1870 struct list_head list;
1871 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1872 elf_fpregset_t fpu; /* NT_PRFPREG */
1873 struct task_struct *thread;
1874 #ifdef ELF_CORE_COPY_XFPREGS
1875 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1876 #endif
1877 struct memelfnote notes[3];
1878 int num_notes;
1879 };
1880
1881 /*
1882 * In order to add the specific thread information for the elf file format,
1883 * we need to keep a linked list of every threads pr_status and then create
1884 * a single section for them in the final core file.
1885 */
1886 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1887 {
1888 int sz = 0;
1889 struct task_struct *p = t->thread;
1890 t->num_notes = 0;
1891
1892 fill_prstatus(&t->prstatus, p, signr);
1893 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1894
1895 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1896 &(t->prstatus));
1897 t->num_notes++;
1898 sz += notesize(&t->notes[0]);
1899
1900 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1901 &t->fpu))) {
1902 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1903 &(t->fpu));
1904 t->num_notes++;
1905 sz += notesize(&t->notes[1]);
1906 }
1907
1908 #ifdef ELF_CORE_COPY_XFPREGS
1909 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1910 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1911 sizeof(t->xfpu), &t->xfpu);
1912 t->num_notes++;
1913 sz += notesize(&t->notes[2]);
1914 }
1915 #endif
1916 return sz;
1917 }
1918
1919 struct elf_note_info {
1920 struct memelfnote *notes;
1921 struct memelfnote *notes_files;
1922 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1923 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1924 struct list_head thread_list;
1925 elf_fpregset_t *fpu;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 elf_fpxregset_t *xfpu;
1928 #endif
1929 user_siginfo_t csigdata;
1930 int thread_status_size;
1931 int numnote;
1932 };
1933
1934 static int elf_note_info_init(struct elf_note_info *info)
1935 {
1936 memset(info, 0, sizeof(*info));
1937 INIT_LIST_HEAD(&info->thread_list);
1938
1939 /* Allocate space for ELF notes */
1940 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1941 if (!info->notes)
1942 return 0;
1943 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1944 if (!info->psinfo)
1945 return 0;
1946 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1947 if (!info->prstatus)
1948 return 0;
1949 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1950 if (!info->fpu)
1951 return 0;
1952 #ifdef ELF_CORE_COPY_XFPREGS
1953 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1954 if (!info->xfpu)
1955 return 0;
1956 #endif
1957 return 1;
1958 }
1959
1960 static int fill_note_info(struct elfhdr *elf, int phdrs,
1961 struct elf_note_info *info,
1962 const siginfo_t *siginfo, struct pt_regs *regs)
1963 {
1964 struct list_head *t;
1965 struct core_thread *ct;
1966 struct elf_thread_status *ets;
1967
1968 if (!elf_note_info_init(info))
1969 return 0;
1970
1971 for (ct = current->mm->core_state->dumper.next;
1972 ct; ct = ct->next) {
1973 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1974 if (!ets)
1975 return 0;
1976
1977 ets->thread = ct->task;
1978 list_add(&ets->list, &info->thread_list);
1979 }
1980
1981 list_for_each(t, &info->thread_list) {
1982 int sz;
1983
1984 ets = list_entry(t, struct elf_thread_status, list);
1985 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1986 info->thread_status_size += sz;
1987 }
1988 /* now collect the dump for the current */
1989 memset(info->prstatus, 0, sizeof(*info->prstatus));
1990 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1991 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1992
1993 /* Set up header */
1994 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1995
1996 /*
1997 * Set up the notes in similar form to SVR4 core dumps made
1998 * with info from their /proc.
1999 */
2000
2001 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2002 sizeof(*info->prstatus), info->prstatus);
2003 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2004 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2005 sizeof(*info->psinfo), info->psinfo);
2006
2007 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2008 fill_auxv_note(info->notes + 3, current->mm);
2009 info->numnote = 4;
2010
2011 if (fill_files_note(info->notes + info->numnote) == 0) {
2012 info->notes_files = info->notes + info->numnote;
2013 info->numnote++;
2014 }
2015
2016 /* Try to dump the FPU. */
2017 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2018 info->fpu);
2019 if (info->prstatus->pr_fpvalid)
2020 fill_note(info->notes + info->numnote++,
2021 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2022 #ifdef ELF_CORE_COPY_XFPREGS
2023 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2024 fill_note(info->notes + info->numnote++,
2025 "LINUX", ELF_CORE_XFPREG_TYPE,
2026 sizeof(*info->xfpu), info->xfpu);
2027 #endif
2028
2029 return 1;
2030 }
2031
2032 static size_t get_note_info_size(struct elf_note_info *info)
2033 {
2034 int sz = 0;
2035 int i;
2036
2037 for (i = 0; i < info->numnote; i++)
2038 sz += notesize(info->notes + i);
2039
2040 sz += info->thread_status_size;
2041
2042 return sz;
2043 }
2044
2045 static int write_note_info(struct elf_note_info *info,
2046 struct coredump_params *cprm)
2047 {
2048 int i;
2049 struct list_head *t;
2050
2051 for (i = 0; i < info->numnote; i++)
2052 if (!writenote(info->notes + i, cprm))
2053 return 0;
2054
2055 /* write out the thread status notes section */
2056 list_for_each(t, &info->thread_list) {
2057 struct elf_thread_status *tmp =
2058 list_entry(t, struct elf_thread_status, list);
2059
2060 for (i = 0; i < tmp->num_notes; i++)
2061 if (!writenote(&tmp->notes[i], cprm))
2062 return 0;
2063 }
2064
2065 return 1;
2066 }
2067
2068 static void free_note_info(struct elf_note_info *info)
2069 {
2070 while (!list_empty(&info->thread_list)) {
2071 struct list_head *tmp = info->thread_list.next;
2072 list_del(tmp);
2073 kfree(list_entry(tmp, struct elf_thread_status, list));
2074 }
2075
2076 /* Free data possibly allocated by fill_files_note(): */
2077 if (info->notes_files)
2078 vfree(info->notes_files->data);
2079
2080 kfree(info->prstatus);
2081 kfree(info->psinfo);
2082 kfree(info->notes);
2083 kfree(info->fpu);
2084 #ifdef ELF_CORE_COPY_XFPREGS
2085 kfree(info->xfpu);
2086 #endif
2087 }
2088
2089 #endif
2090
2091 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2092 struct vm_area_struct *gate_vma)
2093 {
2094 struct vm_area_struct *ret = tsk->mm->mmap;
2095
2096 if (ret)
2097 return ret;
2098 return gate_vma;
2099 }
2100 /*
2101 * Helper function for iterating across a vma list. It ensures that the caller
2102 * will visit `gate_vma' prior to terminating the search.
2103 */
2104 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2105 struct vm_area_struct *gate_vma)
2106 {
2107 struct vm_area_struct *ret;
2108
2109 ret = this_vma->vm_next;
2110 if (ret)
2111 return ret;
2112 if (this_vma == gate_vma)
2113 return NULL;
2114 return gate_vma;
2115 }
2116
2117 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2118 elf_addr_t e_shoff, int segs)
2119 {
2120 elf->e_shoff = e_shoff;
2121 elf->e_shentsize = sizeof(*shdr4extnum);
2122 elf->e_shnum = 1;
2123 elf->e_shstrndx = SHN_UNDEF;
2124
2125 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2126
2127 shdr4extnum->sh_type = SHT_NULL;
2128 shdr4extnum->sh_size = elf->e_shnum;
2129 shdr4extnum->sh_link = elf->e_shstrndx;
2130 shdr4extnum->sh_info = segs;
2131 }
2132
2133 /*
2134 * Actual dumper
2135 *
2136 * This is a two-pass process; first we find the offsets of the bits,
2137 * and then they are actually written out. If we run out of core limit
2138 * we just truncate.
2139 */
2140 static int elf_core_dump(struct coredump_params *cprm)
2141 {
2142 int has_dumped = 0;
2143 mm_segment_t fs;
2144 int segs, i;
2145 size_t vma_data_size = 0;
2146 struct vm_area_struct *vma, *gate_vma;
2147 struct elfhdr *elf = NULL;
2148 loff_t offset = 0, dataoff;
2149 struct elf_note_info info = { };
2150 struct elf_phdr *phdr4note = NULL;
2151 struct elf_shdr *shdr4extnum = NULL;
2152 Elf_Half e_phnum;
2153 elf_addr_t e_shoff;
2154 elf_addr_t *vma_filesz = NULL;
2155
2156 /*
2157 * We no longer stop all VM operations.
2158 *
2159 * This is because those proceses that could possibly change map_count
2160 * or the mmap / vma pages are now blocked in do_exit on current
2161 * finishing this core dump.
2162 *
2163 * Only ptrace can touch these memory addresses, but it doesn't change
2164 * the map_count or the pages allocated. So no possibility of crashing
2165 * exists while dumping the mm->vm_next areas to the core file.
2166 */
2167
2168 /* alloc memory for large data structures: too large to be on stack */
2169 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2170 if (!elf)
2171 goto out;
2172 /*
2173 * The number of segs are recored into ELF header as 16bit value.
2174 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2175 */
2176 segs = current->mm->map_count;
2177 segs += elf_core_extra_phdrs();
2178
2179 gate_vma = get_gate_vma(current->mm);
2180 if (gate_vma != NULL)
2181 segs++;
2182
2183 /* for notes section */
2184 segs++;
2185
2186 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2187 * this, kernel supports extended numbering. Have a look at
2188 * include/linux/elf.h for further information. */
2189 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2190
2191 /*
2192 * Collect all the non-memory information about the process for the
2193 * notes. This also sets up the file header.
2194 */
2195 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2196 goto cleanup;
2197
2198 has_dumped = 1;
2199
2200 fs = get_fs();
2201 set_fs(KERNEL_DS);
2202
2203 offset += sizeof(*elf); /* Elf header */
2204 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2205
2206 /* Write notes phdr entry */
2207 {
2208 size_t sz = get_note_info_size(&info);
2209
2210 sz += elf_coredump_extra_notes_size();
2211
2212 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2213 if (!phdr4note)
2214 goto end_coredump;
2215
2216 fill_elf_note_phdr(phdr4note, sz, offset);
2217 offset += sz;
2218 }
2219
2220 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2221
2222 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2223 goto end_coredump;
2224 vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2225 if (!vma_filesz)
2226 goto end_coredump;
2227
2228 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2229 vma = next_vma(vma, gate_vma)) {
2230 unsigned long dump_size;
2231
2232 dump_size = vma_dump_size(vma, cprm->mm_flags);
2233 vma_filesz[i++] = dump_size;
2234 vma_data_size += dump_size;
2235 }
2236
2237 offset += vma_data_size;
2238 offset += elf_core_extra_data_size();
2239 e_shoff = offset;
2240
2241 if (e_phnum == PN_XNUM) {
2242 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2243 if (!shdr4extnum)
2244 goto end_coredump;
2245 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2246 }
2247
2248 offset = dataoff;
2249
2250 if (!dump_emit(cprm, elf, sizeof(*elf)))
2251 goto end_coredump;
2252
2253 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2254 goto end_coredump;
2255
2256 /* Write program headers for segments dump */
2257 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2258 vma = next_vma(vma, gate_vma)) {
2259 struct elf_phdr phdr;
2260
2261 phdr.p_type = PT_LOAD;
2262 phdr.p_offset = offset;
2263 phdr.p_vaddr = vma->vm_start;
2264 phdr.p_paddr = 0;
2265 phdr.p_filesz = vma_filesz[i++];
2266 phdr.p_memsz = vma->vm_end - vma->vm_start;
2267 offset += phdr.p_filesz;
2268 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2269 if (vma->vm_flags & VM_WRITE)
2270 phdr.p_flags |= PF_W;
2271 if (vma->vm_flags & VM_EXEC)
2272 phdr.p_flags |= PF_X;
2273 phdr.p_align = ELF_EXEC_PAGESIZE;
2274
2275 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2276 goto end_coredump;
2277 }
2278
2279 if (!elf_core_write_extra_phdrs(cprm, offset))
2280 goto end_coredump;
2281
2282 /* write out the notes section */
2283 if (!write_note_info(&info, cprm))
2284 goto end_coredump;
2285
2286 if (elf_coredump_extra_notes_write(cprm))
2287 goto end_coredump;
2288
2289 /* Align to page */
2290 if (!dump_skip(cprm, dataoff - cprm->pos))
2291 goto end_coredump;
2292
2293 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2294 vma = next_vma(vma, gate_vma)) {
2295 unsigned long addr;
2296 unsigned long end;
2297
2298 end = vma->vm_start + vma_filesz[i++];
2299
2300 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2301 struct page *page;
2302 int stop;
2303
2304 page = get_dump_page(addr);
2305 if (page) {
2306 void *kaddr = kmap(page);
2307 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2308 kunmap(page);
2309 put_page(page);
2310 } else
2311 stop = !dump_skip(cprm, PAGE_SIZE);
2312 if (stop)
2313 goto end_coredump;
2314 }
2315 }
2316 dump_truncate(cprm);
2317
2318 if (!elf_core_write_extra_data(cprm))
2319 goto end_coredump;
2320
2321 if (e_phnum == PN_XNUM) {
2322 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2323 goto end_coredump;
2324 }
2325
2326 end_coredump:
2327 set_fs(fs);
2328
2329 cleanup:
2330 free_note_info(&info);
2331 kfree(shdr4extnum);
2332 vfree(vma_filesz);
2333 kfree(phdr4note);
2334 kfree(elf);
2335 out:
2336 return has_dumped;
2337 }
2338
2339 #endif /* CONFIG_ELF_CORE */
2340
2341 static int __init init_elf_binfmt(void)
2342 {
2343 register_binfmt(&elf_format);
2344 return 0;
2345 }
2346
2347 static void __exit exit_elf_binfmt(void)
2348 {
2349 /* Remove the COFF and ELF loaders. */
2350 unregister_binfmt(&elf_format);
2351 }
2352
2353 core_initcall(init_elf_binfmt);
2354 module_exit(exit_elf_binfmt);
2355 MODULE_LICENSE("GPL");