]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/binfmt_elf.c
sched/headers: Prepare for new header dependencies before moving code to <linux/sched...
[mirror_ubuntu-artful-kernel.git] / fs / binfmt_elf.c
1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/sched/task_stack.h>
40 #include <linux/cred.h>
41 #include <linux/dax.h>
42 #include <linux/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 #ifndef user_long_t
47 #define user_long_t long
48 #endif
49 #ifndef user_siginfo_t
50 #define user_siginfo_t siginfo_t
51 #endif
52
53 static int load_elf_binary(struct linux_binprm *bprm);
54 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
55 int, int, unsigned long);
56
57 #ifdef CONFIG_USELIB
58 static int load_elf_library(struct file *);
59 #else
60 #define load_elf_library NULL
61 #endif
62
63 /*
64 * If we don't support core dumping, then supply a NULL so we
65 * don't even try.
66 */
67 #ifdef CONFIG_ELF_CORE
68 static int elf_core_dump(struct coredump_params *cprm);
69 #else
70 #define elf_core_dump NULL
71 #endif
72
73 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
74 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
75 #else
76 #define ELF_MIN_ALIGN PAGE_SIZE
77 #endif
78
79 #ifndef ELF_CORE_EFLAGS
80 #define ELF_CORE_EFLAGS 0
81 #endif
82
83 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
84 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
85 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
86
87 static struct linux_binfmt elf_format = {
88 .module = THIS_MODULE,
89 .load_binary = load_elf_binary,
90 .load_shlib = load_elf_library,
91 .core_dump = elf_core_dump,
92 .min_coredump = ELF_EXEC_PAGESIZE,
93 };
94
95 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
96
97 static int set_brk(unsigned long start, unsigned long end, int prot)
98 {
99 start = ELF_PAGEALIGN(start);
100 end = ELF_PAGEALIGN(end);
101 if (end > start) {
102 /*
103 * Map the last of the bss segment.
104 * If the header is requesting these pages to be
105 * executable, honour that (ppc32 needs this).
106 */
107 int error = vm_brk_flags(start, end - start,
108 prot & PROT_EXEC ? VM_EXEC : 0);
109 if (error)
110 return error;
111 }
112 current->mm->start_brk = current->mm->brk = end;
113 return 0;
114 }
115
116 /* We need to explicitly zero any fractional pages
117 after the data section (i.e. bss). This would
118 contain the junk from the file that should not
119 be in memory
120 */
121 static int padzero(unsigned long elf_bss)
122 {
123 unsigned long nbyte;
124
125 nbyte = ELF_PAGEOFFSET(elf_bss);
126 if (nbyte) {
127 nbyte = ELF_MIN_ALIGN - nbyte;
128 if (clear_user((void __user *) elf_bss, nbyte))
129 return -EFAULT;
130 }
131 return 0;
132 }
133
134 /* Let's use some macros to make this stack manipulation a little clearer */
135 #ifdef CONFIG_STACK_GROWSUP
136 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
137 #define STACK_ROUND(sp, items) \
138 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
139 #define STACK_ALLOC(sp, len) ({ \
140 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
141 old_sp; })
142 #else
143 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
144 #define STACK_ROUND(sp, items) \
145 (((unsigned long) (sp - items)) &~ 15UL)
146 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
147 #endif
148
149 #ifndef ELF_BASE_PLATFORM
150 /*
151 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
152 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
153 * will be copied to the user stack in the same manner as AT_PLATFORM.
154 */
155 #define ELF_BASE_PLATFORM NULL
156 #endif
157
158 static int
159 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
160 unsigned long load_addr, unsigned long interp_load_addr)
161 {
162 unsigned long p = bprm->p;
163 int argc = bprm->argc;
164 int envc = bprm->envc;
165 elf_addr_t __user *argv;
166 elf_addr_t __user *envp;
167 elf_addr_t __user *sp;
168 elf_addr_t __user *u_platform;
169 elf_addr_t __user *u_base_platform;
170 elf_addr_t __user *u_rand_bytes;
171 const char *k_platform = ELF_PLATFORM;
172 const char *k_base_platform = ELF_BASE_PLATFORM;
173 unsigned char k_rand_bytes[16];
174 int items;
175 elf_addr_t *elf_info;
176 int ei_index = 0;
177 const struct cred *cred = current_cred();
178 struct vm_area_struct *vma;
179
180 /*
181 * In some cases (e.g. Hyper-Threading), we want to avoid L1
182 * evictions by the processes running on the same package. One
183 * thing we can do is to shuffle the initial stack for them.
184 */
185
186 p = arch_align_stack(p);
187
188 /*
189 * If this architecture has a platform capability string, copy it
190 * to userspace. In some cases (Sparc), this info is impossible
191 * for userspace to get any other way, in others (i386) it is
192 * merely difficult.
193 */
194 u_platform = NULL;
195 if (k_platform) {
196 size_t len = strlen(k_platform) + 1;
197
198 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
199 if (__copy_to_user(u_platform, k_platform, len))
200 return -EFAULT;
201 }
202
203 /*
204 * If this architecture has a "base" platform capability
205 * string, copy it to userspace.
206 */
207 u_base_platform = NULL;
208 if (k_base_platform) {
209 size_t len = strlen(k_base_platform) + 1;
210
211 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
212 if (__copy_to_user(u_base_platform, k_base_platform, len))
213 return -EFAULT;
214 }
215
216 /*
217 * Generate 16 random bytes for userspace PRNG seeding.
218 */
219 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
220 u_rand_bytes = (elf_addr_t __user *)
221 STACK_ALLOC(p, sizeof(k_rand_bytes));
222 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
223 return -EFAULT;
224
225 /* Create the ELF interpreter info */
226 elf_info = (elf_addr_t *)current->mm->saved_auxv;
227 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
228 #define NEW_AUX_ENT(id, val) \
229 do { \
230 elf_info[ei_index++] = id; \
231 elf_info[ei_index++] = val; \
232 } while (0)
233
234 #ifdef ARCH_DLINFO
235 /*
236 * ARCH_DLINFO must come first so PPC can do its special alignment of
237 * AUXV.
238 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
239 * ARCH_DLINFO changes
240 */
241 ARCH_DLINFO;
242 #endif
243 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
244 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
245 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
246 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
247 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
248 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
249 NEW_AUX_ENT(AT_BASE, interp_load_addr);
250 NEW_AUX_ENT(AT_FLAGS, 0);
251 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
252 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
253 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
254 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
255 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
256 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
257 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
258 #ifdef ELF_HWCAP2
259 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
260 #endif
261 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
262 if (k_platform) {
263 NEW_AUX_ENT(AT_PLATFORM,
264 (elf_addr_t)(unsigned long)u_platform);
265 }
266 if (k_base_platform) {
267 NEW_AUX_ENT(AT_BASE_PLATFORM,
268 (elf_addr_t)(unsigned long)u_base_platform);
269 }
270 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
271 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
272 }
273 #undef NEW_AUX_ENT
274 /* AT_NULL is zero; clear the rest too */
275 memset(&elf_info[ei_index], 0,
276 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
277
278 /* And advance past the AT_NULL entry. */
279 ei_index += 2;
280
281 sp = STACK_ADD(p, ei_index);
282
283 items = (argc + 1) + (envc + 1) + 1;
284 bprm->p = STACK_ROUND(sp, items);
285
286 /* Point sp at the lowest address on the stack */
287 #ifdef CONFIG_STACK_GROWSUP
288 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
289 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
290 #else
291 sp = (elf_addr_t __user *)bprm->p;
292 #endif
293
294
295 /*
296 * Grow the stack manually; some architectures have a limit on how
297 * far ahead a user-space access may be in order to grow the stack.
298 */
299 vma = find_extend_vma(current->mm, bprm->p);
300 if (!vma)
301 return -EFAULT;
302
303 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
304 if (__put_user(argc, sp++))
305 return -EFAULT;
306 argv = sp;
307 envp = argv + argc + 1;
308
309 /* Populate argv and envp */
310 p = current->mm->arg_end = current->mm->arg_start;
311 while (argc-- > 0) {
312 size_t len;
313 if (__put_user((elf_addr_t)p, argv++))
314 return -EFAULT;
315 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
316 if (!len || len > MAX_ARG_STRLEN)
317 return -EINVAL;
318 p += len;
319 }
320 if (__put_user(0, argv))
321 return -EFAULT;
322 current->mm->arg_end = current->mm->env_start = p;
323 while (envc-- > 0) {
324 size_t len;
325 if (__put_user((elf_addr_t)p, envp++))
326 return -EFAULT;
327 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 if (!len || len > MAX_ARG_STRLEN)
329 return -EINVAL;
330 p += len;
331 }
332 if (__put_user(0, envp))
333 return -EFAULT;
334 current->mm->env_end = p;
335
336 /* Put the elf_info on the stack in the right place. */
337 sp = (elf_addr_t __user *)envp + 1;
338 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
339 return -EFAULT;
340 return 0;
341 }
342
343 #ifndef elf_map
344
345 static unsigned long elf_map(struct file *filep, unsigned long addr,
346 struct elf_phdr *eppnt, int prot, int type,
347 unsigned long total_size)
348 {
349 unsigned long map_addr;
350 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
351 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
352 addr = ELF_PAGESTART(addr);
353 size = ELF_PAGEALIGN(size);
354
355 /* mmap() will return -EINVAL if given a zero size, but a
356 * segment with zero filesize is perfectly valid */
357 if (!size)
358 return addr;
359
360 /*
361 * total_size is the size of the ELF (interpreter) image.
362 * The _first_ mmap needs to know the full size, otherwise
363 * randomization might put this image into an overlapping
364 * position with the ELF binary image. (since size < total_size)
365 * So we first map the 'big' image - and unmap the remainder at
366 * the end. (which unmap is needed for ELF images with holes.)
367 */
368 if (total_size) {
369 total_size = ELF_PAGEALIGN(total_size);
370 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
371 if (!BAD_ADDR(map_addr))
372 vm_munmap(map_addr+size, total_size-size);
373 } else
374 map_addr = vm_mmap(filep, addr, size, prot, type, off);
375
376 return(map_addr);
377 }
378
379 #endif /* !elf_map */
380
381 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
382 {
383 int i, first_idx = -1, last_idx = -1;
384
385 for (i = 0; i < nr; i++) {
386 if (cmds[i].p_type == PT_LOAD) {
387 last_idx = i;
388 if (first_idx == -1)
389 first_idx = i;
390 }
391 }
392 if (first_idx == -1)
393 return 0;
394
395 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
396 ELF_PAGESTART(cmds[first_idx].p_vaddr);
397 }
398
399 /**
400 * load_elf_phdrs() - load ELF program headers
401 * @elf_ex: ELF header of the binary whose program headers should be loaded
402 * @elf_file: the opened ELF binary file
403 *
404 * Loads ELF program headers from the binary file elf_file, which has the ELF
405 * header pointed to by elf_ex, into a newly allocated array. The caller is
406 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
407 */
408 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
409 struct file *elf_file)
410 {
411 struct elf_phdr *elf_phdata = NULL;
412 int retval, size, err = -1;
413
414 /*
415 * If the size of this structure has changed, then punt, since
416 * we will be doing the wrong thing.
417 */
418 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
419 goto out;
420
421 /* Sanity check the number of program headers... */
422 if (elf_ex->e_phnum < 1 ||
423 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
424 goto out;
425
426 /* ...and their total size. */
427 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
428 if (size > ELF_MIN_ALIGN)
429 goto out;
430
431 elf_phdata = kmalloc(size, GFP_KERNEL);
432 if (!elf_phdata)
433 goto out;
434
435 /* Read in the program headers */
436 retval = kernel_read(elf_file, elf_ex->e_phoff,
437 (char *)elf_phdata, size);
438 if (retval != size) {
439 err = (retval < 0) ? retval : -EIO;
440 goto out;
441 }
442
443 /* Success! */
444 err = 0;
445 out:
446 if (err) {
447 kfree(elf_phdata);
448 elf_phdata = NULL;
449 }
450 return elf_phdata;
451 }
452
453 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
454
455 /**
456 * struct arch_elf_state - arch-specific ELF loading state
457 *
458 * This structure is used to preserve architecture specific data during
459 * the loading of an ELF file, throughout the checking of architecture
460 * specific ELF headers & through to the point where the ELF load is
461 * known to be proceeding (ie. SET_PERSONALITY).
462 *
463 * This implementation is a dummy for architectures which require no
464 * specific state.
465 */
466 struct arch_elf_state {
467 };
468
469 #define INIT_ARCH_ELF_STATE {}
470
471 /**
472 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
473 * @ehdr: The main ELF header
474 * @phdr: The program header to check
475 * @elf: The open ELF file
476 * @is_interp: True if the phdr is from the interpreter of the ELF being
477 * loaded, else false.
478 * @state: Architecture-specific state preserved throughout the process
479 * of loading the ELF.
480 *
481 * Inspects the program header phdr to validate its correctness and/or
482 * suitability for the system. Called once per ELF program header in the
483 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
484 * interpreter.
485 *
486 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
487 * with that return code.
488 */
489 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
490 struct elf_phdr *phdr,
491 struct file *elf, bool is_interp,
492 struct arch_elf_state *state)
493 {
494 /* Dummy implementation, always proceed */
495 return 0;
496 }
497
498 /**
499 * arch_check_elf() - check an ELF executable
500 * @ehdr: The main ELF header
501 * @has_interp: True if the ELF has an interpreter, else false.
502 * @interp_ehdr: The interpreter's ELF header
503 * @state: Architecture-specific state preserved throughout the process
504 * of loading the ELF.
505 *
506 * Provides a final opportunity for architecture code to reject the loading
507 * of the ELF & cause an exec syscall to return an error. This is called after
508 * all program headers to be checked by arch_elf_pt_proc have been.
509 *
510 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
511 * with that return code.
512 */
513 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
514 struct elfhdr *interp_ehdr,
515 struct arch_elf_state *state)
516 {
517 /* Dummy implementation, always proceed */
518 return 0;
519 }
520
521 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
522
523 /* This is much more generalized than the library routine read function,
524 so we keep this separate. Technically the library read function
525 is only provided so that we can read a.out libraries that have
526 an ELF header */
527
528 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
529 struct file *interpreter, unsigned long *interp_map_addr,
530 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
531 {
532 struct elf_phdr *eppnt;
533 unsigned long load_addr = 0;
534 int load_addr_set = 0;
535 unsigned long last_bss = 0, elf_bss = 0;
536 int bss_prot = 0;
537 unsigned long error = ~0UL;
538 unsigned long total_size;
539 int i;
540
541 /* First of all, some simple consistency checks */
542 if (interp_elf_ex->e_type != ET_EXEC &&
543 interp_elf_ex->e_type != ET_DYN)
544 goto out;
545 if (!elf_check_arch(interp_elf_ex))
546 goto out;
547 if (!interpreter->f_op->mmap)
548 goto out;
549
550 total_size = total_mapping_size(interp_elf_phdata,
551 interp_elf_ex->e_phnum);
552 if (!total_size) {
553 error = -EINVAL;
554 goto out;
555 }
556
557 eppnt = interp_elf_phdata;
558 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
559 if (eppnt->p_type == PT_LOAD) {
560 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
561 int elf_prot = 0;
562 unsigned long vaddr = 0;
563 unsigned long k, map_addr;
564
565 if (eppnt->p_flags & PF_R)
566 elf_prot = PROT_READ;
567 if (eppnt->p_flags & PF_W)
568 elf_prot |= PROT_WRITE;
569 if (eppnt->p_flags & PF_X)
570 elf_prot |= PROT_EXEC;
571 vaddr = eppnt->p_vaddr;
572 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
573 elf_type |= MAP_FIXED;
574 else if (no_base && interp_elf_ex->e_type == ET_DYN)
575 load_addr = -vaddr;
576
577 map_addr = elf_map(interpreter, load_addr + vaddr,
578 eppnt, elf_prot, elf_type, total_size);
579 total_size = 0;
580 if (!*interp_map_addr)
581 *interp_map_addr = map_addr;
582 error = map_addr;
583 if (BAD_ADDR(map_addr))
584 goto out;
585
586 if (!load_addr_set &&
587 interp_elf_ex->e_type == ET_DYN) {
588 load_addr = map_addr - ELF_PAGESTART(vaddr);
589 load_addr_set = 1;
590 }
591
592 /*
593 * Check to see if the section's size will overflow the
594 * allowed task size. Note that p_filesz must always be
595 * <= p_memsize so it's only necessary to check p_memsz.
596 */
597 k = load_addr + eppnt->p_vaddr;
598 if (BAD_ADDR(k) ||
599 eppnt->p_filesz > eppnt->p_memsz ||
600 eppnt->p_memsz > TASK_SIZE ||
601 TASK_SIZE - eppnt->p_memsz < k) {
602 error = -ENOMEM;
603 goto out;
604 }
605
606 /*
607 * Find the end of the file mapping for this phdr, and
608 * keep track of the largest address we see for this.
609 */
610 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
611 if (k > elf_bss)
612 elf_bss = k;
613
614 /*
615 * Do the same thing for the memory mapping - between
616 * elf_bss and last_bss is the bss section.
617 */
618 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
619 if (k > last_bss) {
620 last_bss = k;
621 bss_prot = elf_prot;
622 }
623 }
624 }
625
626 /*
627 * Now fill out the bss section: first pad the last page from
628 * the file up to the page boundary, and zero it from elf_bss
629 * up to the end of the page.
630 */
631 if (padzero(elf_bss)) {
632 error = -EFAULT;
633 goto out;
634 }
635 /*
636 * Next, align both the file and mem bss up to the page size,
637 * since this is where elf_bss was just zeroed up to, and where
638 * last_bss will end after the vm_brk_flags() below.
639 */
640 elf_bss = ELF_PAGEALIGN(elf_bss);
641 last_bss = ELF_PAGEALIGN(last_bss);
642 /* Finally, if there is still more bss to allocate, do it. */
643 if (last_bss > elf_bss) {
644 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
645 bss_prot & PROT_EXEC ? VM_EXEC : 0);
646 if (error)
647 goto out;
648 }
649
650 error = load_addr;
651 out:
652 return error;
653 }
654
655 /*
656 * These are the functions used to load ELF style executables and shared
657 * libraries. There is no binary dependent code anywhere else.
658 */
659
660 #ifndef STACK_RND_MASK
661 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
662 #endif
663
664 static unsigned long randomize_stack_top(unsigned long stack_top)
665 {
666 unsigned long random_variable = 0;
667
668 if ((current->flags & PF_RANDOMIZE) &&
669 !(current->personality & ADDR_NO_RANDOMIZE)) {
670 random_variable = get_random_long();
671 random_variable &= STACK_RND_MASK;
672 random_variable <<= PAGE_SHIFT;
673 }
674 #ifdef CONFIG_STACK_GROWSUP
675 return PAGE_ALIGN(stack_top) + random_variable;
676 #else
677 return PAGE_ALIGN(stack_top) - random_variable;
678 #endif
679 }
680
681 static int load_elf_binary(struct linux_binprm *bprm)
682 {
683 struct file *interpreter = NULL; /* to shut gcc up */
684 unsigned long load_addr = 0, load_bias = 0;
685 int load_addr_set = 0;
686 char * elf_interpreter = NULL;
687 unsigned long error;
688 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
689 unsigned long elf_bss, elf_brk;
690 int bss_prot = 0;
691 int retval, i;
692 unsigned long elf_entry;
693 unsigned long interp_load_addr = 0;
694 unsigned long start_code, end_code, start_data, end_data;
695 unsigned long reloc_func_desc __maybe_unused = 0;
696 int executable_stack = EXSTACK_DEFAULT;
697 struct pt_regs *regs = current_pt_regs();
698 struct {
699 struct elfhdr elf_ex;
700 struct elfhdr interp_elf_ex;
701 } *loc;
702 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
703
704 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
705 if (!loc) {
706 retval = -ENOMEM;
707 goto out_ret;
708 }
709
710 /* Get the exec-header */
711 loc->elf_ex = *((struct elfhdr *)bprm->buf);
712
713 retval = -ENOEXEC;
714 /* First of all, some simple consistency checks */
715 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
716 goto out;
717
718 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
719 goto out;
720 if (!elf_check_arch(&loc->elf_ex))
721 goto out;
722 if (!bprm->file->f_op->mmap)
723 goto out;
724
725 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
726 if (!elf_phdata)
727 goto out;
728
729 elf_ppnt = elf_phdata;
730 elf_bss = 0;
731 elf_brk = 0;
732
733 start_code = ~0UL;
734 end_code = 0;
735 start_data = 0;
736 end_data = 0;
737
738 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
739 if (elf_ppnt->p_type == PT_INTERP) {
740 /* This is the program interpreter used for
741 * shared libraries - for now assume that this
742 * is an a.out format binary
743 */
744 retval = -ENOEXEC;
745 if (elf_ppnt->p_filesz > PATH_MAX ||
746 elf_ppnt->p_filesz < 2)
747 goto out_free_ph;
748
749 retval = -ENOMEM;
750 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
751 GFP_KERNEL);
752 if (!elf_interpreter)
753 goto out_free_ph;
754
755 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
756 elf_interpreter,
757 elf_ppnt->p_filesz);
758 if (retval != elf_ppnt->p_filesz) {
759 if (retval >= 0)
760 retval = -EIO;
761 goto out_free_interp;
762 }
763 /* make sure path is NULL terminated */
764 retval = -ENOEXEC;
765 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
766 goto out_free_interp;
767
768 interpreter = open_exec(elf_interpreter);
769 retval = PTR_ERR(interpreter);
770 if (IS_ERR(interpreter))
771 goto out_free_interp;
772
773 /*
774 * If the binary is not readable then enforce
775 * mm->dumpable = 0 regardless of the interpreter's
776 * permissions.
777 */
778 would_dump(bprm, interpreter);
779
780 /* Get the exec headers */
781 retval = kernel_read(interpreter, 0,
782 (void *)&loc->interp_elf_ex,
783 sizeof(loc->interp_elf_ex));
784 if (retval != sizeof(loc->interp_elf_ex)) {
785 if (retval >= 0)
786 retval = -EIO;
787 goto out_free_dentry;
788 }
789
790 break;
791 }
792 elf_ppnt++;
793 }
794
795 elf_ppnt = elf_phdata;
796 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
797 switch (elf_ppnt->p_type) {
798 case PT_GNU_STACK:
799 if (elf_ppnt->p_flags & PF_X)
800 executable_stack = EXSTACK_ENABLE_X;
801 else
802 executable_stack = EXSTACK_DISABLE_X;
803 break;
804
805 case PT_LOPROC ... PT_HIPROC:
806 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
807 bprm->file, false,
808 &arch_state);
809 if (retval)
810 goto out_free_dentry;
811 break;
812 }
813
814 /* Some simple consistency checks for the interpreter */
815 if (elf_interpreter) {
816 retval = -ELIBBAD;
817 /* Not an ELF interpreter */
818 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
819 goto out_free_dentry;
820 /* Verify the interpreter has a valid arch */
821 if (!elf_check_arch(&loc->interp_elf_ex))
822 goto out_free_dentry;
823
824 /* Load the interpreter program headers */
825 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
826 interpreter);
827 if (!interp_elf_phdata)
828 goto out_free_dentry;
829
830 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
831 elf_ppnt = interp_elf_phdata;
832 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
833 switch (elf_ppnt->p_type) {
834 case PT_LOPROC ... PT_HIPROC:
835 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
836 elf_ppnt, interpreter,
837 true, &arch_state);
838 if (retval)
839 goto out_free_dentry;
840 break;
841 }
842 }
843
844 /*
845 * Allow arch code to reject the ELF at this point, whilst it's
846 * still possible to return an error to the code that invoked
847 * the exec syscall.
848 */
849 retval = arch_check_elf(&loc->elf_ex,
850 !!interpreter, &loc->interp_elf_ex,
851 &arch_state);
852 if (retval)
853 goto out_free_dentry;
854
855 /* Flush all traces of the currently running executable */
856 retval = flush_old_exec(bprm);
857 if (retval)
858 goto out_free_dentry;
859
860 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
861 may depend on the personality. */
862 SET_PERSONALITY2(loc->elf_ex, &arch_state);
863 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
864 current->personality |= READ_IMPLIES_EXEC;
865
866 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
867 current->flags |= PF_RANDOMIZE;
868
869 setup_new_exec(bprm);
870 install_exec_creds(bprm);
871
872 /* Do this so that we can load the interpreter, if need be. We will
873 change some of these later */
874 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
875 executable_stack);
876 if (retval < 0)
877 goto out_free_dentry;
878
879 current->mm->start_stack = bprm->p;
880
881 /* Now we do a little grungy work by mmapping the ELF image into
882 the correct location in memory. */
883 for(i = 0, elf_ppnt = elf_phdata;
884 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
885 int elf_prot = 0, elf_flags;
886 unsigned long k, vaddr;
887 unsigned long total_size = 0;
888
889 if (elf_ppnt->p_type != PT_LOAD)
890 continue;
891
892 if (unlikely (elf_brk > elf_bss)) {
893 unsigned long nbyte;
894
895 /* There was a PT_LOAD segment with p_memsz > p_filesz
896 before this one. Map anonymous pages, if needed,
897 and clear the area. */
898 retval = set_brk(elf_bss + load_bias,
899 elf_brk + load_bias,
900 bss_prot);
901 if (retval)
902 goto out_free_dentry;
903 nbyte = ELF_PAGEOFFSET(elf_bss);
904 if (nbyte) {
905 nbyte = ELF_MIN_ALIGN - nbyte;
906 if (nbyte > elf_brk - elf_bss)
907 nbyte = elf_brk - elf_bss;
908 if (clear_user((void __user *)elf_bss +
909 load_bias, nbyte)) {
910 /*
911 * This bss-zeroing can fail if the ELF
912 * file specifies odd protections. So
913 * we don't check the return value
914 */
915 }
916 }
917 }
918
919 if (elf_ppnt->p_flags & PF_R)
920 elf_prot |= PROT_READ;
921 if (elf_ppnt->p_flags & PF_W)
922 elf_prot |= PROT_WRITE;
923 if (elf_ppnt->p_flags & PF_X)
924 elf_prot |= PROT_EXEC;
925
926 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
927
928 vaddr = elf_ppnt->p_vaddr;
929 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
930 elf_flags |= MAP_FIXED;
931 } else if (loc->elf_ex.e_type == ET_DYN) {
932 /* Try and get dynamic programs out of the way of the
933 * default mmap base, as well as whatever program they
934 * might try to exec. This is because the brk will
935 * follow the loader, and is not movable. */
936 load_bias = ELF_ET_DYN_BASE - vaddr;
937 if (current->flags & PF_RANDOMIZE)
938 load_bias += arch_mmap_rnd();
939 load_bias = ELF_PAGESTART(load_bias);
940 total_size = total_mapping_size(elf_phdata,
941 loc->elf_ex.e_phnum);
942 if (!total_size) {
943 retval = -EINVAL;
944 goto out_free_dentry;
945 }
946 }
947
948 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
949 elf_prot, elf_flags, total_size);
950 if (BAD_ADDR(error)) {
951 retval = IS_ERR((void *)error) ?
952 PTR_ERR((void*)error) : -EINVAL;
953 goto out_free_dentry;
954 }
955
956 if (!load_addr_set) {
957 load_addr_set = 1;
958 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
959 if (loc->elf_ex.e_type == ET_DYN) {
960 load_bias += error -
961 ELF_PAGESTART(load_bias + vaddr);
962 load_addr += load_bias;
963 reloc_func_desc = load_bias;
964 }
965 }
966 k = elf_ppnt->p_vaddr;
967 if (k < start_code)
968 start_code = k;
969 if (start_data < k)
970 start_data = k;
971
972 /*
973 * Check to see if the section's size will overflow the
974 * allowed task size. Note that p_filesz must always be
975 * <= p_memsz so it is only necessary to check p_memsz.
976 */
977 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
978 elf_ppnt->p_memsz > TASK_SIZE ||
979 TASK_SIZE - elf_ppnt->p_memsz < k) {
980 /* set_brk can never work. Avoid overflows. */
981 retval = -EINVAL;
982 goto out_free_dentry;
983 }
984
985 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
986
987 if (k > elf_bss)
988 elf_bss = k;
989 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
990 end_code = k;
991 if (end_data < k)
992 end_data = k;
993 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
994 if (k > elf_brk) {
995 bss_prot = elf_prot;
996 elf_brk = k;
997 }
998 }
999
1000 loc->elf_ex.e_entry += load_bias;
1001 elf_bss += load_bias;
1002 elf_brk += load_bias;
1003 start_code += load_bias;
1004 end_code += load_bias;
1005 start_data += load_bias;
1006 end_data += load_bias;
1007
1008 /* Calling set_brk effectively mmaps the pages that we need
1009 * for the bss and break sections. We must do this before
1010 * mapping in the interpreter, to make sure it doesn't wind
1011 * up getting placed where the bss needs to go.
1012 */
1013 retval = set_brk(elf_bss, elf_brk, bss_prot);
1014 if (retval)
1015 goto out_free_dentry;
1016 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1017 retval = -EFAULT; /* Nobody gets to see this, but.. */
1018 goto out_free_dentry;
1019 }
1020
1021 if (elf_interpreter) {
1022 unsigned long interp_map_addr = 0;
1023
1024 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1025 interpreter,
1026 &interp_map_addr,
1027 load_bias, interp_elf_phdata);
1028 if (!IS_ERR((void *)elf_entry)) {
1029 /*
1030 * load_elf_interp() returns relocation
1031 * adjustment
1032 */
1033 interp_load_addr = elf_entry;
1034 elf_entry += loc->interp_elf_ex.e_entry;
1035 }
1036 if (BAD_ADDR(elf_entry)) {
1037 retval = IS_ERR((void *)elf_entry) ?
1038 (int)elf_entry : -EINVAL;
1039 goto out_free_dentry;
1040 }
1041 reloc_func_desc = interp_load_addr;
1042
1043 allow_write_access(interpreter);
1044 fput(interpreter);
1045 kfree(elf_interpreter);
1046 } else {
1047 elf_entry = loc->elf_ex.e_entry;
1048 if (BAD_ADDR(elf_entry)) {
1049 retval = -EINVAL;
1050 goto out_free_dentry;
1051 }
1052 }
1053
1054 kfree(interp_elf_phdata);
1055 kfree(elf_phdata);
1056
1057 set_binfmt(&elf_format);
1058
1059 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1060 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1061 if (retval < 0)
1062 goto out;
1063 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1064
1065 retval = create_elf_tables(bprm, &loc->elf_ex,
1066 load_addr, interp_load_addr);
1067 if (retval < 0)
1068 goto out;
1069 /* N.B. passed_fileno might not be initialized? */
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1075
1076 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1077 current->mm->brk = current->mm->start_brk =
1078 arch_randomize_brk(current->mm);
1079 #ifdef compat_brk_randomized
1080 current->brk_randomized = 1;
1081 #endif
1082 }
1083
1084 if (current->personality & MMAP_PAGE_ZERO) {
1085 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1086 and some applications "depend" upon this behavior.
1087 Since we do not have the power to recompile these, we
1088 emulate the SVr4 behavior. Sigh. */
1089 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1090 MAP_FIXED | MAP_PRIVATE, 0);
1091 }
1092
1093 #ifdef ELF_PLAT_INIT
1094 /*
1095 * The ABI may specify that certain registers be set up in special
1096 * ways (on i386 %edx is the address of a DT_FINI function, for
1097 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1098 * that the e_entry field is the address of the function descriptor
1099 * for the startup routine, rather than the address of the startup
1100 * routine itself. This macro performs whatever initialization to
1101 * the regs structure is required as well as any relocations to the
1102 * function descriptor entries when executing dynamically links apps.
1103 */
1104 ELF_PLAT_INIT(regs, reloc_func_desc);
1105 #endif
1106
1107 start_thread(regs, elf_entry, bprm->p);
1108 retval = 0;
1109 out:
1110 kfree(loc);
1111 out_ret:
1112 return retval;
1113
1114 /* error cleanup */
1115 out_free_dentry:
1116 kfree(interp_elf_phdata);
1117 allow_write_access(interpreter);
1118 if (interpreter)
1119 fput(interpreter);
1120 out_free_interp:
1121 kfree(elf_interpreter);
1122 out_free_ph:
1123 kfree(elf_phdata);
1124 goto out;
1125 }
1126
1127 #ifdef CONFIG_USELIB
1128 /* This is really simpleminded and specialized - we are loading an
1129 a.out library that is given an ELF header. */
1130 static int load_elf_library(struct file *file)
1131 {
1132 struct elf_phdr *elf_phdata;
1133 struct elf_phdr *eppnt;
1134 unsigned long elf_bss, bss, len;
1135 int retval, error, i, j;
1136 struct elfhdr elf_ex;
1137
1138 error = -ENOEXEC;
1139 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1140 if (retval != sizeof(elf_ex))
1141 goto out;
1142
1143 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1144 goto out;
1145
1146 /* First of all, some simple consistency checks */
1147 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1148 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1149 goto out;
1150
1151 /* Now read in all of the header information */
1152
1153 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1154 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1155
1156 error = -ENOMEM;
1157 elf_phdata = kmalloc(j, GFP_KERNEL);
1158 if (!elf_phdata)
1159 goto out;
1160
1161 eppnt = elf_phdata;
1162 error = -ENOEXEC;
1163 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1164 if (retval != j)
1165 goto out_free_ph;
1166
1167 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1168 if ((eppnt + i)->p_type == PT_LOAD)
1169 j++;
1170 if (j != 1)
1171 goto out_free_ph;
1172
1173 while (eppnt->p_type != PT_LOAD)
1174 eppnt++;
1175
1176 /* Now use mmap to map the library into memory. */
1177 error = vm_mmap(file,
1178 ELF_PAGESTART(eppnt->p_vaddr),
1179 (eppnt->p_filesz +
1180 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1181 PROT_READ | PROT_WRITE | PROT_EXEC,
1182 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1183 (eppnt->p_offset -
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1185 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1186 goto out_free_ph;
1187
1188 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1189 if (padzero(elf_bss)) {
1190 error = -EFAULT;
1191 goto out_free_ph;
1192 }
1193
1194 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1195 ELF_MIN_ALIGN - 1);
1196 bss = eppnt->p_memsz + eppnt->p_vaddr;
1197 if (bss > len) {
1198 error = vm_brk(len, bss - len);
1199 if (error)
1200 goto out_free_ph;
1201 }
1202 error = 0;
1203
1204 out_free_ph:
1205 kfree(elf_phdata);
1206 out:
1207 return error;
1208 }
1209 #endif /* #ifdef CONFIG_USELIB */
1210
1211 #ifdef CONFIG_ELF_CORE
1212 /*
1213 * ELF core dumper
1214 *
1215 * Modelled on fs/exec.c:aout_core_dump()
1216 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1217 */
1218
1219 /*
1220 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1221 * that are useful for post-mortem analysis are included in every core dump.
1222 * In that way we ensure that the core dump is fully interpretable later
1223 * without matching up the same kernel and hardware config to see what PC values
1224 * meant. These special mappings include - vDSO, vsyscall, and other
1225 * architecture specific mappings
1226 */
1227 static bool always_dump_vma(struct vm_area_struct *vma)
1228 {
1229 /* Any vsyscall mappings? */
1230 if (vma == get_gate_vma(vma->vm_mm))
1231 return true;
1232
1233 /*
1234 * Assume that all vmas with a .name op should always be dumped.
1235 * If this changes, a new vm_ops field can easily be added.
1236 */
1237 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1238 return true;
1239
1240 /*
1241 * arch_vma_name() returns non-NULL for special architecture mappings,
1242 * such as vDSO sections.
1243 */
1244 if (arch_vma_name(vma))
1245 return true;
1246
1247 return false;
1248 }
1249
1250 /*
1251 * Decide what to dump of a segment, part, all or none.
1252 */
1253 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1254 unsigned long mm_flags)
1255 {
1256 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1257
1258 /* always dump the vdso and vsyscall sections */
1259 if (always_dump_vma(vma))
1260 goto whole;
1261
1262 if (vma->vm_flags & VM_DONTDUMP)
1263 return 0;
1264
1265 /* support for DAX */
1266 if (vma_is_dax(vma)) {
1267 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1268 goto whole;
1269 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1270 goto whole;
1271 return 0;
1272 }
1273
1274 /* Hugetlb memory check */
1275 if (vma->vm_flags & VM_HUGETLB) {
1276 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1277 goto whole;
1278 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1279 goto whole;
1280 return 0;
1281 }
1282
1283 /* Do not dump I/O mapped devices or special mappings */
1284 if (vma->vm_flags & VM_IO)
1285 return 0;
1286
1287 /* By default, dump shared memory if mapped from an anonymous file. */
1288 if (vma->vm_flags & VM_SHARED) {
1289 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1290 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1291 goto whole;
1292 return 0;
1293 }
1294
1295 /* Dump segments that have been written to. */
1296 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1297 goto whole;
1298 if (vma->vm_file == NULL)
1299 return 0;
1300
1301 if (FILTER(MAPPED_PRIVATE))
1302 goto whole;
1303
1304 /*
1305 * If this looks like the beginning of a DSO or executable mapping,
1306 * check for an ELF header. If we find one, dump the first page to
1307 * aid in determining what was mapped here.
1308 */
1309 if (FILTER(ELF_HEADERS) &&
1310 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1311 u32 __user *header = (u32 __user *) vma->vm_start;
1312 u32 word;
1313 mm_segment_t fs = get_fs();
1314 /*
1315 * Doing it this way gets the constant folded by GCC.
1316 */
1317 union {
1318 u32 cmp;
1319 char elfmag[SELFMAG];
1320 } magic;
1321 BUILD_BUG_ON(SELFMAG != sizeof word);
1322 magic.elfmag[EI_MAG0] = ELFMAG0;
1323 magic.elfmag[EI_MAG1] = ELFMAG1;
1324 magic.elfmag[EI_MAG2] = ELFMAG2;
1325 magic.elfmag[EI_MAG3] = ELFMAG3;
1326 /*
1327 * Switch to the user "segment" for get_user(),
1328 * then put back what elf_core_dump() had in place.
1329 */
1330 set_fs(USER_DS);
1331 if (unlikely(get_user(word, header)))
1332 word = 0;
1333 set_fs(fs);
1334 if (word == magic.cmp)
1335 return PAGE_SIZE;
1336 }
1337
1338 #undef FILTER
1339
1340 return 0;
1341
1342 whole:
1343 return vma->vm_end - vma->vm_start;
1344 }
1345
1346 /* An ELF note in memory */
1347 struct memelfnote
1348 {
1349 const char *name;
1350 int type;
1351 unsigned int datasz;
1352 void *data;
1353 };
1354
1355 static int notesize(struct memelfnote *en)
1356 {
1357 int sz;
1358
1359 sz = sizeof(struct elf_note);
1360 sz += roundup(strlen(en->name) + 1, 4);
1361 sz += roundup(en->datasz, 4);
1362
1363 return sz;
1364 }
1365
1366 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1367 {
1368 struct elf_note en;
1369 en.n_namesz = strlen(men->name) + 1;
1370 en.n_descsz = men->datasz;
1371 en.n_type = men->type;
1372
1373 return dump_emit(cprm, &en, sizeof(en)) &&
1374 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1375 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1376 }
1377
1378 static void fill_elf_header(struct elfhdr *elf, int segs,
1379 u16 machine, u32 flags)
1380 {
1381 memset(elf, 0, sizeof(*elf));
1382
1383 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1384 elf->e_ident[EI_CLASS] = ELF_CLASS;
1385 elf->e_ident[EI_DATA] = ELF_DATA;
1386 elf->e_ident[EI_VERSION] = EV_CURRENT;
1387 elf->e_ident[EI_OSABI] = ELF_OSABI;
1388
1389 elf->e_type = ET_CORE;
1390 elf->e_machine = machine;
1391 elf->e_version = EV_CURRENT;
1392 elf->e_phoff = sizeof(struct elfhdr);
1393 elf->e_flags = flags;
1394 elf->e_ehsize = sizeof(struct elfhdr);
1395 elf->e_phentsize = sizeof(struct elf_phdr);
1396 elf->e_phnum = segs;
1397
1398 return;
1399 }
1400
1401 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1402 {
1403 phdr->p_type = PT_NOTE;
1404 phdr->p_offset = offset;
1405 phdr->p_vaddr = 0;
1406 phdr->p_paddr = 0;
1407 phdr->p_filesz = sz;
1408 phdr->p_memsz = 0;
1409 phdr->p_flags = 0;
1410 phdr->p_align = 0;
1411 return;
1412 }
1413
1414 static void fill_note(struct memelfnote *note, const char *name, int type,
1415 unsigned int sz, void *data)
1416 {
1417 note->name = name;
1418 note->type = type;
1419 note->datasz = sz;
1420 note->data = data;
1421 return;
1422 }
1423
1424 /*
1425 * fill up all the fields in prstatus from the given task struct, except
1426 * registers which need to be filled up separately.
1427 */
1428 static void fill_prstatus(struct elf_prstatus *prstatus,
1429 struct task_struct *p, long signr)
1430 {
1431 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1432 prstatus->pr_sigpend = p->pending.signal.sig[0];
1433 prstatus->pr_sighold = p->blocked.sig[0];
1434 rcu_read_lock();
1435 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1436 rcu_read_unlock();
1437 prstatus->pr_pid = task_pid_vnr(p);
1438 prstatus->pr_pgrp = task_pgrp_vnr(p);
1439 prstatus->pr_sid = task_session_vnr(p);
1440 if (thread_group_leader(p)) {
1441 struct task_cputime cputime;
1442
1443 /*
1444 * This is the record for the group leader. It shows the
1445 * group-wide total, not its individual thread total.
1446 */
1447 thread_group_cputime(p, &cputime);
1448 prstatus->pr_utime = ns_to_timeval(cputime.utime);
1449 prstatus->pr_stime = ns_to_timeval(cputime.stime);
1450 } else {
1451 u64 utime, stime;
1452
1453 task_cputime(p, &utime, &stime);
1454 prstatus->pr_utime = ns_to_timeval(utime);
1455 prstatus->pr_stime = ns_to_timeval(stime);
1456 }
1457
1458 prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1459 prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1460 }
1461
1462 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1463 struct mm_struct *mm)
1464 {
1465 const struct cred *cred;
1466 unsigned int i, len;
1467
1468 /* first copy the parameters from user space */
1469 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1470
1471 len = mm->arg_end - mm->arg_start;
1472 if (len >= ELF_PRARGSZ)
1473 len = ELF_PRARGSZ-1;
1474 if (copy_from_user(&psinfo->pr_psargs,
1475 (const char __user *)mm->arg_start, len))
1476 return -EFAULT;
1477 for(i = 0; i < len; i++)
1478 if (psinfo->pr_psargs[i] == 0)
1479 psinfo->pr_psargs[i] = ' ';
1480 psinfo->pr_psargs[len] = 0;
1481
1482 rcu_read_lock();
1483 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1484 rcu_read_unlock();
1485 psinfo->pr_pid = task_pid_vnr(p);
1486 psinfo->pr_pgrp = task_pgrp_vnr(p);
1487 psinfo->pr_sid = task_session_vnr(p);
1488
1489 i = p->state ? ffz(~p->state) + 1 : 0;
1490 psinfo->pr_state = i;
1491 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1492 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1493 psinfo->pr_nice = task_nice(p);
1494 psinfo->pr_flag = p->flags;
1495 rcu_read_lock();
1496 cred = __task_cred(p);
1497 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1498 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1499 rcu_read_unlock();
1500 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1501
1502 return 0;
1503 }
1504
1505 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1506 {
1507 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1508 int i = 0;
1509 do
1510 i += 2;
1511 while (auxv[i - 2] != AT_NULL);
1512 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1513 }
1514
1515 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1516 const siginfo_t *siginfo)
1517 {
1518 mm_segment_t old_fs = get_fs();
1519 set_fs(KERNEL_DS);
1520 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1521 set_fs(old_fs);
1522 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1523 }
1524
1525 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1526 /*
1527 * Format of NT_FILE note:
1528 *
1529 * long count -- how many files are mapped
1530 * long page_size -- units for file_ofs
1531 * array of [COUNT] elements of
1532 * long start
1533 * long end
1534 * long file_ofs
1535 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1536 */
1537 static int fill_files_note(struct memelfnote *note)
1538 {
1539 struct vm_area_struct *vma;
1540 unsigned count, size, names_ofs, remaining, n;
1541 user_long_t *data;
1542 user_long_t *start_end_ofs;
1543 char *name_base, *name_curpos;
1544
1545 /* *Estimated* file count and total data size needed */
1546 count = current->mm->map_count;
1547 size = count * 64;
1548
1549 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1550 alloc:
1551 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1552 return -EINVAL;
1553 size = round_up(size, PAGE_SIZE);
1554 data = vmalloc(size);
1555 if (!data)
1556 return -ENOMEM;
1557
1558 start_end_ofs = data + 2;
1559 name_base = name_curpos = ((char *)data) + names_ofs;
1560 remaining = size - names_ofs;
1561 count = 0;
1562 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1563 struct file *file;
1564 const char *filename;
1565
1566 file = vma->vm_file;
1567 if (!file)
1568 continue;
1569 filename = file_path(file, name_curpos, remaining);
1570 if (IS_ERR(filename)) {
1571 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1572 vfree(data);
1573 size = size * 5 / 4;
1574 goto alloc;
1575 }
1576 continue;
1577 }
1578
1579 /* file_path() fills at the end, move name down */
1580 /* n = strlen(filename) + 1: */
1581 n = (name_curpos + remaining) - filename;
1582 remaining = filename - name_curpos;
1583 memmove(name_curpos, filename, n);
1584 name_curpos += n;
1585
1586 *start_end_ofs++ = vma->vm_start;
1587 *start_end_ofs++ = vma->vm_end;
1588 *start_end_ofs++ = vma->vm_pgoff;
1589 count++;
1590 }
1591
1592 /* Now we know exact count of files, can store it */
1593 data[0] = count;
1594 data[1] = PAGE_SIZE;
1595 /*
1596 * Count usually is less than current->mm->map_count,
1597 * we need to move filenames down.
1598 */
1599 n = current->mm->map_count - count;
1600 if (n != 0) {
1601 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1602 memmove(name_base - shift_bytes, name_base,
1603 name_curpos - name_base);
1604 name_curpos -= shift_bytes;
1605 }
1606
1607 size = name_curpos - (char *)data;
1608 fill_note(note, "CORE", NT_FILE, size, data);
1609 return 0;
1610 }
1611
1612 #ifdef CORE_DUMP_USE_REGSET
1613 #include <linux/regset.h>
1614
1615 struct elf_thread_core_info {
1616 struct elf_thread_core_info *next;
1617 struct task_struct *task;
1618 struct elf_prstatus prstatus;
1619 struct memelfnote notes[0];
1620 };
1621
1622 struct elf_note_info {
1623 struct elf_thread_core_info *thread;
1624 struct memelfnote psinfo;
1625 struct memelfnote signote;
1626 struct memelfnote auxv;
1627 struct memelfnote files;
1628 user_siginfo_t csigdata;
1629 size_t size;
1630 int thread_notes;
1631 };
1632
1633 /*
1634 * When a regset has a writeback hook, we call it on each thread before
1635 * dumping user memory. On register window machines, this makes sure the
1636 * user memory backing the register data is up to date before we read it.
1637 */
1638 static void do_thread_regset_writeback(struct task_struct *task,
1639 const struct user_regset *regset)
1640 {
1641 if (regset->writeback)
1642 regset->writeback(task, regset, 1);
1643 }
1644
1645 #ifndef PRSTATUS_SIZE
1646 #define PRSTATUS_SIZE(S, R) sizeof(S)
1647 #endif
1648
1649 #ifndef SET_PR_FPVALID
1650 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1651 #endif
1652
1653 static int fill_thread_core_info(struct elf_thread_core_info *t,
1654 const struct user_regset_view *view,
1655 long signr, size_t *total)
1656 {
1657 unsigned int i;
1658 unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1659
1660 /*
1661 * NT_PRSTATUS is the one special case, because the regset data
1662 * goes into the pr_reg field inside the note contents, rather
1663 * than being the whole note contents. We fill the reset in here.
1664 * We assume that regset 0 is NT_PRSTATUS.
1665 */
1666 fill_prstatus(&t->prstatus, t->task, signr);
1667 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1668 &t->prstatus.pr_reg, NULL);
1669
1670 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1671 PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1672 *total += notesize(&t->notes[0]);
1673
1674 do_thread_regset_writeback(t->task, &view->regsets[0]);
1675
1676 /*
1677 * Each other regset might generate a note too. For each regset
1678 * that has no core_note_type or is inactive, we leave t->notes[i]
1679 * all zero and we'll know to skip writing it later.
1680 */
1681 for (i = 1; i < view->n; ++i) {
1682 const struct user_regset *regset = &view->regsets[i];
1683 do_thread_regset_writeback(t->task, regset);
1684 if (regset->core_note_type && regset->get &&
1685 (!regset->active || regset->active(t->task, regset))) {
1686 int ret;
1687 size_t size = regset->n * regset->size;
1688 void *data = kmalloc(size, GFP_KERNEL);
1689 if (unlikely(!data))
1690 return 0;
1691 ret = regset->get(t->task, regset,
1692 0, size, data, NULL);
1693 if (unlikely(ret))
1694 kfree(data);
1695 else {
1696 if (regset->core_note_type != NT_PRFPREG)
1697 fill_note(&t->notes[i], "LINUX",
1698 regset->core_note_type,
1699 size, data);
1700 else {
1701 SET_PR_FPVALID(&t->prstatus,
1702 1, regset_size);
1703 fill_note(&t->notes[i], "CORE",
1704 NT_PRFPREG, size, data);
1705 }
1706 *total += notesize(&t->notes[i]);
1707 }
1708 }
1709 }
1710
1711 return 1;
1712 }
1713
1714 static int fill_note_info(struct elfhdr *elf, int phdrs,
1715 struct elf_note_info *info,
1716 const siginfo_t *siginfo, struct pt_regs *regs)
1717 {
1718 struct task_struct *dump_task = current;
1719 const struct user_regset_view *view = task_user_regset_view(dump_task);
1720 struct elf_thread_core_info *t;
1721 struct elf_prpsinfo *psinfo;
1722 struct core_thread *ct;
1723 unsigned int i;
1724
1725 info->size = 0;
1726 info->thread = NULL;
1727
1728 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1729 if (psinfo == NULL) {
1730 info->psinfo.data = NULL; /* So we don't free this wrongly */
1731 return 0;
1732 }
1733
1734 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1735
1736 /*
1737 * Figure out how many notes we're going to need for each thread.
1738 */
1739 info->thread_notes = 0;
1740 for (i = 0; i < view->n; ++i)
1741 if (view->regsets[i].core_note_type != 0)
1742 ++info->thread_notes;
1743
1744 /*
1745 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1746 * since it is our one special case.
1747 */
1748 if (unlikely(info->thread_notes == 0) ||
1749 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1750 WARN_ON(1);
1751 return 0;
1752 }
1753
1754 /*
1755 * Initialize the ELF file header.
1756 */
1757 fill_elf_header(elf, phdrs,
1758 view->e_machine, view->e_flags);
1759
1760 /*
1761 * Allocate a structure for each thread.
1762 */
1763 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1764 t = kzalloc(offsetof(struct elf_thread_core_info,
1765 notes[info->thread_notes]),
1766 GFP_KERNEL);
1767 if (unlikely(!t))
1768 return 0;
1769
1770 t->task = ct->task;
1771 if (ct->task == dump_task || !info->thread) {
1772 t->next = info->thread;
1773 info->thread = t;
1774 } else {
1775 /*
1776 * Make sure to keep the original task at
1777 * the head of the list.
1778 */
1779 t->next = info->thread->next;
1780 info->thread->next = t;
1781 }
1782 }
1783
1784 /*
1785 * Now fill in each thread's information.
1786 */
1787 for (t = info->thread; t != NULL; t = t->next)
1788 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1789 return 0;
1790
1791 /*
1792 * Fill in the two process-wide notes.
1793 */
1794 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1795 info->size += notesize(&info->psinfo);
1796
1797 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1798 info->size += notesize(&info->signote);
1799
1800 fill_auxv_note(&info->auxv, current->mm);
1801 info->size += notesize(&info->auxv);
1802
1803 if (fill_files_note(&info->files) == 0)
1804 info->size += notesize(&info->files);
1805
1806 return 1;
1807 }
1808
1809 static size_t get_note_info_size(struct elf_note_info *info)
1810 {
1811 return info->size;
1812 }
1813
1814 /*
1815 * Write all the notes for each thread. When writing the first thread, the
1816 * process-wide notes are interleaved after the first thread-specific note.
1817 */
1818 static int write_note_info(struct elf_note_info *info,
1819 struct coredump_params *cprm)
1820 {
1821 bool first = true;
1822 struct elf_thread_core_info *t = info->thread;
1823
1824 do {
1825 int i;
1826
1827 if (!writenote(&t->notes[0], cprm))
1828 return 0;
1829
1830 if (first && !writenote(&info->psinfo, cprm))
1831 return 0;
1832 if (first && !writenote(&info->signote, cprm))
1833 return 0;
1834 if (first && !writenote(&info->auxv, cprm))
1835 return 0;
1836 if (first && info->files.data &&
1837 !writenote(&info->files, cprm))
1838 return 0;
1839
1840 for (i = 1; i < info->thread_notes; ++i)
1841 if (t->notes[i].data &&
1842 !writenote(&t->notes[i], cprm))
1843 return 0;
1844
1845 first = false;
1846 t = t->next;
1847 } while (t);
1848
1849 return 1;
1850 }
1851
1852 static void free_note_info(struct elf_note_info *info)
1853 {
1854 struct elf_thread_core_info *threads = info->thread;
1855 while (threads) {
1856 unsigned int i;
1857 struct elf_thread_core_info *t = threads;
1858 threads = t->next;
1859 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1860 for (i = 1; i < info->thread_notes; ++i)
1861 kfree(t->notes[i].data);
1862 kfree(t);
1863 }
1864 kfree(info->psinfo.data);
1865 vfree(info->files.data);
1866 }
1867
1868 #else
1869
1870 /* Here is the structure in which status of each thread is captured. */
1871 struct elf_thread_status
1872 {
1873 struct list_head list;
1874 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1875 elf_fpregset_t fpu; /* NT_PRFPREG */
1876 struct task_struct *thread;
1877 #ifdef ELF_CORE_COPY_XFPREGS
1878 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1879 #endif
1880 struct memelfnote notes[3];
1881 int num_notes;
1882 };
1883
1884 /*
1885 * In order to add the specific thread information for the elf file format,
1886 * we need to keep a linked list of every threads pr_status and then create
1887 * a single section for them in the final core file.
1888 */
1889 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1890 {
1891 int sz = 0;
1892 struct task_struct *p = t->thread;
1893 t->num_notes = 0;
1894
1895 fill_prstatus(&t->prstatus, p, signr);
1896 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1897
1898 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1899 &(t->prstatus));
1900 t->num_notes++;
1901 sz += notesize(&t->notes[0]);
1902
1903 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1904 &t->fpu))) {
1905 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1906 &(t->fpu));
1907 t->num_notes++;
1908 sz += notesize(&t->notes[1]);
1909 }
1910
1911 #ifdef ELF_CORE_COPY_XFPREGS
1912 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1913 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1914 sizeof(t->xfpu), &t->xfpu);
1915 t->num_notes++;
1916 sz += notesize(&t->notes[2]);
1917 }
1918 #endif
1919 return sz;
1920 }
1921
1922 struct elf_note_info {
1923 struct memelfnote *notes;
1924 struct memelfnote *notes_files;
1925 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1926 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1927 struct list_head thread_list;
1928 elf_fpregset_t *fpu;
1929 #ifdef ELF_CORE_COPY_XFPREGS
1930 elf_fpxregset_t *xfpu;
1931 #endif
1932 user_siginfo_t csigdata;
1933 int thread_status_size;
1934 int numnote;
1935 };
1936
1937 static int elf_note_info_init(struct elf_note_info *info)
1938 {
1939 memset(info, 0, sizeof(*info));
1940 INIT_LIST_HEAD(&info->thread_list);
1941
1942 /* Allocate space for ELF notes */
1943 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1944 if (!info->notes)
1945 return 0;
1946 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1947 if (!info->psinfo)
1948 return 0;
1949 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1950 if (!info->prstatus)
1951 return 0;
1952 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1953 if (!info->fpu)
1954 return 0;
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1957 if (!info->xfpu)
1958 return 0;
1959 #endif
1960 return 1;
1961 }
1962
1963 static int fill_note_info(struct elfhdr *elf, int phdrs,
1964 struct elf_note_info *info,
1965 const siginfo_t *siginfo, struct pt_regs *regs)
1966 {
1967 struct list_head *t;
1968 struct core_thread *ct;
1969 struct elf_thread_status *ets;
1970
1971 if (!elf_note_info_init(info))
1972 return 0;
1973
1974 for (ct = current->mm->core_state->dumper.next;
1975 ct; ct = ct->next) {
1976 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1977 if (!ets)
1978 return 0;
1979
1980 ets->thread = ct->task;
1981 list_add(&ets->list, &info->thread_list);
1982 }
1983
1984 list_for_each(t, &info->thread_list) {
1985 int sz;
1986
1987 ets = list_entry(t, struct elf_thread_status, list);
1988 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1989 info->thread_status_size += sz;
1990 }
1991 /* now collect the dump for the current */
1992 memset(info->prstatus, 0, sizeof(*info->prstatus));
1993 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1994 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1995
1996 /* Set up header */
1997 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1998
1999 /*
2000 * Set up the notes in similar form to SVR4 core dumps made
2001 * with info from their /proc.
2002 */
2003
2004 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2005 sizeof(*info->prstatus), info->prstatus);
2006 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2007 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2008 sizeof(*info->psinfo), info->psinfo);
2009
2010 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2011 fill_auxv_note(info->notes + 3, current->mm);
2012 info->numnote = 4;
2013
2014 if (fill_files_note(info->notes + info->numnote) == 0) {
2015 info->notes_files = info->notes + info->numnote;
2016 info->numnote++;
2017 }
2018
2019 /* Try to dump the FPU. */
2020 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2021 info->fpu);
2022 if (info->prstatus->pr_fpvalid)
2023 fill_note(info->notes + info->numnote++,
2024 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2025 #ifdef ELF_CORE_COPY_XFPREGS
2026 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2027 fill_note(info->notes + info->numnote++,
2028 "LINUX", ELF_CORE_XFPREG_TYPE,
2029 sizeof(*info->xfpu), info->xfpu);
2030 #endif
2031
2032 return 1;
2033 }
2034
2035 static size_t get_note_info_size(struct elf_note_info *info)
2036 {
2037 int sz = 0;
2038 int i;
2039
2040 for (i = 0; i < info->numnote; i++)
2041 sz += notesize(info->notes + i);
2042
2043 sz += info->thread_status_size;
2044
2045 return sz;
2046 }
2047
2048 static int write_note_info(struct elf_note_info *info,
2049 struct coredump_params *cprm)
2050 {
2051 int i;
2052 struct list_head *t;
2053
2054 for (i = 0; i < info->numnote; i++)
2055 if (!writenote(info->notes + i, cprm))
2056 return 0;
2057
2058 /* write out the thread status notes section */
2059 list_for_each(t, &info->thread_list) {
2060 struct elf_thread_status *tmp =
2061 list_entry(t, struct elf_thread_status, list);
2062
2063 for (i = 0; i < tmp->num_notes; i++)
2064 if (!writenote(&tmp->notes[i], cprm))
2065 return 0;
2066 }
2067
2068 return 1;
2069 }
2070
2071 static void free_note_info(struct elf_note_info *info)
2072 {
2073 while (!list_empty(&info->thread_list)) {
2074 struct list_head *tmp = info->thread_list.next;
2075 list_del(tmp);
2076 kfree(list_entry(tmp, struct elf_thread_status, list));
2077 }
2078
2079 /* Free data possibly allocated by fill_files_note(): */
2080 if (info->notes_files)
2081 vfree(info->notes_files->data);
2082
2083 kfree(info->prstatus);
2084 kfree(info->psinfo);
2085 kfree(info->notes);
2086 kfree(info->fpu);
2087 #ifdef ELF_CORE_COPY_XFPREGS
2088 kfree(info->xfpu);
2089 #endif
2090 }
2091
2092 #endif
2093
2094 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2095 struct vm_area_struct *gate_vma)
2096 {
2097 struct vm_area_struct *ret = tsk->mm->mmap;
2098
2099 if (ret)
2100 return ret;
2101 return gate_vma;
2102 }
2103 /*
2104 * Helper function for iterating across a vma list. It ensures that the caller
2105 * will visit `gate_vma' prior to terminating the search.
2106 */
2107 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2108 struct vm_area_struct *gate_vma)
2109 {
2110 struct vm_area_struct *ret;
2111
2112 ret = this_vma->vm_next;
2113 if (ret)
2114 return ret;
2115 if (this_vma == gate_vma)
2116 return NULL;
2117 return gate_vma;
2118 }
2119
2120 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2121 elf_addr_t e_shoff, int segs)
2122 {
2123 elf->e_shoff = e_shoff;
2124 elf->e_shentsize = sizeof(*shdr4extnum);
2125 elf->e_shnum = 1;
2126 elf->e_shstrndx = SHN_UNDEF;
2127
2128 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2129
2130 shdr4extnum->sh_type = SHT_NULL;
2131 shdr4extnum->sh_size = elf->e_shnum;
2132 shdr4extnum->sh_link = elf->e_shstrndx;
2133 shdr4extnum->sh_info = segs;
2134 }
2135
2136 /*
2137 * Actual dumper
2138 *
2139 * This is a two-pass process; first we find the offsets of the bits,
2140 * and then they are actually written out. If we run out of core limit
2141 * we just truncate.
2142 */
2143 static int elf_core_dump(struct coredump_params *cprm)
2144 {
2145 int has_dumped = 0;
2146 mm_segment_t fs;
2147 int segs, i;
2148 size_t vma_data_size = 0;
2149 struct vm_area_struct *vma, *gate_vma;
2150 struct elfhdr *elf = NULL;
2151 loff_t offset = 0, dataoff;
2152 struct elf_note_info info = { };
2153 struct elf_phdr *phdr4note = NULL;
2154 struct elf_shdr *shdr4extnum = NULL;
2155 Elf_Half e_phnum;
2156 elf_addr_t e_shoff;
2157 elf_addr_t *vma_filesz = NULL;
2158
2159 /*
2160 * We no longer stop all VM operations.
2161 *
2162 * This is because those proceses that could possibly change map_count
2163 * or the mmap / vma pages are now blocked in do_exit on current
2164 * finishing this core dump.
2165 *
2166 * Only ptrace can touch these memory addresses, but it doesn't change
2167 * the map_count or the pages allocated. So no possibility of crashing
2168 * exists while dumping the mm->vm_next areas to the core file.
2169 */
2170
2171 /* alloc memory for large data structures: too large to be on stack */
2172 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2173 if (!elf)
2174 goto out;
2175 /*
2176 * The number of segs are recored into ELF header as 16bit value.
2177 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2178 */
2179 segs = current->mm->map_count;
2180 segs += elf_core_extra_phdrs();
2181
2182 gate_vma = get_gate_vma(current->mm);
2183 if (gate_vma != NULL)
2184 segs++;
2185
2186 /* for notes section */
2187 segs++;
2188
2189 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2190 * this, kernel supports extended numbering. Have a look at
2191 * include/linux/elf.h for further information. */
2192 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2193
2194 /*
2195 * Collect all the non-memory information about the process for the
2196 * notes. This also sets up the file header.
2197 */
2198 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2199 goto cleanup;
2200
2201 has_dumped = 1;
2202
2203 fs = get_fs();
2204 set_fs(KERNEL_DS);
2205
2206 offset += sizeof(*elf); /* Elf header */
2207 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2208
2209 /* Write notes phdr entry */
2210 {
2211 size_t sz = get_note_info_size(&info);
2212
2213 sz += elf_coredump_extra_notes_size();
2214
2215 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2216 if (!phdr4note)
2217 goto end_coredump;
2218
2219 fill_elf_note_phdr(phdr4note, sz, offset);
2220 offset += sz;
2221 }
2222
2223 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2224
2225 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2226 goto end_coredump;
2227 vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2228 if (!vma_filesz)
2229 goto end_coredump;
2230
2231 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2232 vma = next_vma(vma, gate_vma)) {
2233 unsigned long dump_size;
2234
2235 dump_size = vma_dump_size(vma, cprm->mm_flags);
2236 vma_filesz[i++] = dump_size;
2237 vma_data_size += dump_size;
2238 }
2239
2240 offset += vma_data_size;
2241 offset += elf_core_extra_data_size();
2242 e_shoff = offset;
2243
2244 if (e_phnum == PN_XNUM) {
2245 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2246 if (!shdr4extnum)
2247 goto end_coredump;
2248 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2249 }
2250
2251 offset = dataoff;
2252
2253 if (!dump_emit(cprm, elf, sizeof(*elf)))
2254 goto end_coredump;
2255
2256 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2257 goto end_coredump;
2258
2259 /* Write program headers for segments dump */
2260 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2261 vma = next_vma(vma, gate_vma)) {
2262 struct elf_phdr phdr;
2263
2264 phdr.p_type = PT_LOAD;
2265 phdr.p_offset = offset;
2266 phdr.p_vaddr = vma->vm_start;
2267 phdr.p_paddr = 0;
2268 phdr.p_filesz = vma_filesz[i++];
2269 phdr.p_memsz = vma->vm_end - vma->vm_start;
2270 offset += phdr.p_filesz;
2271 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2272 if (vma->vm_flags & VM_WRITE)
2273 phdr.p_flags |= PF_W;
2274 if (vma->vm_flags & VM_EXEC)
2275 phdr.p_flags |= PF_X;
2276 phdr.p_align = ELF_EXEC_PAGESIZE;
2277
2278 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2279 goto end_coredump;
2280 }
2281
2282 if (!elf_core_write_extra_phdrs(cprm, offset))
2283 goto end_coredump;
2284
2285 /* write out the notes section */
2286 if (!write_note_info(&info, cprm))
2287 goto end_coredump;
2288
2289 if (elf_coredump_extra_notes_write(cprm))
2290 goto end_coredump;
2291
2292 /* Align to page */
2293 if (!dump_skip(cprm, dataoff - cprm->pos))
2294 goto end_coredump;
2295
2296 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2297 vma = next_vma(vma, gate_vma)) {
2298 unsigned long addr;
2299 unsigned long end;
2300
2301 end = vma->vm_start + vma_filesz[i++];
2302
2303 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2304 struct page *page;
2305 int stop;
2306
2307 page = get_dump_page(addr);
2308 if (page) {
2309 void *kaddr = kmap(page);
2310 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2311 kunmap(page);
2312 put_page(page);
2313 } else
2314 stop = !dump_skip(cprm, PAGE_SIZE);
2315 if (stop)
2316 goto end_coredump;
2317 }
2318 }
2319 dump_truncate(cprm);
2320
2321 if (!elf_core_write_extra_data(cprm))
2322 goto end_coredump;
2323
2324 if (e_phnum == PN_XNUM) {
2325 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2326 goto end_coredump;
2327 }
2328
2329 end_coredump:
2330 set_fs(fs);
2331
2332 cleanup:
2333 free_note_info(&info);
2334 kfree(shdr4extnum);
2335 vfree(vma_filesz);
2336 kfree(phdr4note);
2337 kfree(elf);
2338 out:
2339 return has_dumped;
2340 }
2341
2342 #endif /* CONFIG_ELF_CORE */
2343
2344 static int __init init_elf_binfmt(void)
2345 {
2346 register_binfmt(&elf_format);
2347 return 0;
2348 }
2349
2350 static void __exit exit_elf_binfmt(void)
2351 {
2352 /* Remove the COFF and ELF loaders. */
2353 unregister_binfmt(&elf_format);
2354 }
2355
2356 core_initcall(init_elf_binfmt);
2357 module_exit(exit_elf_binfmt);
2358 MODULE_LICENSE("GPL");