]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/proc/base.c
prctl: Add force disable speculation
[mirror_ubuntu-artful-kernel.git] / fs / proc / base.c
1 /*
2 * linux/fs/proc/base.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc base directory handling functions
7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore.
14 *
15 *
16 * Changelog:
17 * 17-Jan-2005
18 * Allan Bezerra
19 * Bruna Moreira <bruna.moreira@indt.org.br>
20 * Edjard Mota <edjard.mota@indt.org.br>
21 * Ilias Biris <ilias.biris@indt.org.br>
22 * Mauricio Lin <mauricio.lin@indt.org.br>
23 *
24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25 *
26 * A new process specific entry (smaps) included in /proc. It shows the
27 * size of rss for each memory area. The maps entry lacks information
28 * about physical memory size (rss) for each mapped file, i.e.,
29 * rss information for executables and library files.
30 * This additional information is useful for any tools that need to know
31 * about physical memory consumption for a process specific library.
32 *
33 * Changelog:
34 * 21-Feb-2005
35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36 * Pud inclusion in the page table walking.
37 *
38 * ChangeLog:
39 * 10-Mar-2005
40 * 10LE Instituto Nokia de Tecnologia - INdT:
41 * A better way to walks through the page table as suggested by Hugh Dickins.
42 *
43 * Simo Piiroinen <simo.piiroinen@nokia.com>:
44 * Smaps information related to shared, private, clean and dirty pages.
45 *
46 * Paul Mundt <paul.mundt@nokia.com>:
47 * Overall revision about smaps.
48 */
49
50 #include <linux/uaccess.h>
51
52 #include <linux/errno.h>
53 #include <linux/time.h>
54 #include <linux/proc_fs.h>
55 #include <linux/stat.h>
56 #include <linux/task_io_accounting_ops.h>
57 #include <linux/init.h>
58 #include <linux/capability.h>
59 #include <linux/file.h>
60 #include <linux/fdtable.h>
61 #include <linux/string.h>
62 #include <linux/seq_file.h>
63 #include <linux/namei.h>
64 #include <linux/mnt_namespace.h>
65 #include <linux/mm.h>
66 #include <linux/swap.h>
67 #include <linux/rcupdate.h>
68 #include <linux/kallsyms.h>
69 #include <linux/stacktrace.h>
70 #include <linux/resource.h>
71 #include <linux/module.h>
72 #include <linux/mount.h>
73 #include <linux/security.h>
74 #include <linux/ptrace.h>
75 #include <linux/tracehook.h>
76 #include <linux/printk.h>
77 #include <linux/cgroup.h>
78 #include <linux/cpuset.h>
79 #include <linux/audit.h>
80 #include <linux/poll.h>
81 #include <linux/nsproxy.h>
82 #include <linux/oom.h>
83 #include <linux/elf.h>
84 #include <linux/pid_namespace.h>
85 #include <linux/user_namespace.h>
86 #include <linux/fs_struct.h>
87 #include <linux/slab.h>
88 #include <linux/sched/autogroup.h>
89 #include <linux/sched/mm.h>
90 #include <linux/sched/coredump.h>
91 #include <linux/sched/debug.h>
92 #include <linux/sched/stat.h>
93 #include <linux/flex_array.h>
94 #include <linux/posix-timers.h>
95 #ifdef CONFIG_HARDWALL
96 #include <asm/hardwall.h>
97 #endif
98 #include <trace/events/oom.h>
99 #include "internal.h"
100 #include "fd.h"
101
102 /* NOTE:
103 * Implementing inode permission operations in /proc is almost
104 * certainly an error. Permission checks need to happen during
105 * each system call not at open time. The reason is that most of
106 * what we wish to check for permissions in /proc varies at runtime.
107 *
108 * The classic example of a problem is opening file descriptors
109 * in /proc for a task before it execs a suid executable.
110 */
111
112 static u8 nlink_tid;
113 static u8 nlink_tgid;
114
115 struct pid_entry {
116 const char *name;
117 unsigned int len;
118 umode_t mode;
119 const struct inode_operations *iop;
120 const struct file_operations *fop;
121 union proc_op op;
122 };
123
124 #define NOD(NAME, MODE, IOP, FOP, OP) { \
125 .name = (NAME), \
126 .len = sizeof(NAME) - 1, \
127 .mode = MODE, \
128 .iop = IOP, \
129 .fop = FOP, \
130 .op = OP, \
131 }
132
133 #define DIR(NAME, MODE, iops, fops) \
134 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
135 #define LNK(NAME, get_link) \
136 NOD(NAME, (S_IFLNK|S_IRWXUGO), \
137 &proc_pid_link_inode_operations, NULL, \
138 { .proc_get_link = get_link } )
139 #define REG(NAME, MODE, fops) \
140 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
141 #define ONE(NAME, MODE, show) \
142 NOD(NAME, (S_IFREG|(MODE)), \
143 NULL, &proc_single_file_operations, \
144 { .proc_show = show } )
145 #define ATTR(LSM, NAME, MODE) \
146 NOD(NAME, (S_IFREG|(MODE)), \
147 NULL, &proc_pid_attr_operations, \
148 { .lsm = LSM })
149
150 /*
151 * Count the number of hardlinks for the pid_entry table, excluding the .
152 * and .. links.
153 */
154 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
155 unsigned int n)
156 {
157 unsigned int i;
158 unsigned int count;
159
160 count = 2;
161 for (i = 0; i < n; ++i) {
162 if (S_ISDIR(entries[i].mode))
163 ++count;
164 }
165
166 return count;
167 }
168
169 static int get_task_root(struct task_struct *task, struct path *root)
170 {
171 int result = -ENOENT;
172
173 task_lock(task);
174 if (task->fs) {
175 get_fs_root(task->fs, root);
176 result = 0;
177 }
178 task_unlock(task);
179 return result;
180 }
181
182 static int proc_cwd_link(struct dentry *dentry, struct path *path)
183 {
184 struct task_struct *task = get_proc_task(d_inode(dentry));
185 int result = -ENOENT;
186
187 if (task) {
188 task_lock(task);
189 if (task->fs) {
190 get_fs_pwd(task->fs, path);
191 result = 0;
192 }
193 task_unlock(task);
194 put_task_struct(task);
195 }
196 return result;
197 }
198
199 static int proc_root_link(struct dentry *dentry, struct path *path)
200 {
201 struct task_struct *task = get_proc_task(d_inode(dentry));
202 int result = -ENOENT;
203
204 if (task) {
205 result = get_task_root(task, path);
206 put_task_struct(task);
207 }
208 return result;
209 }
210
211 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
212 size_t _count, loff_t *pos)
213 {
214 struct task_struct *tsk;
215 struct mm_struct *mm;
216 char *page;
217 unsigned long count = _count;
218 unsigned long arg_start, arg_end, env_start, env_end;
219 unsigned long len1, len2, len;
220 unsigned long p;
221 char c;
222 ssize_t rv;
223
224 BUG_ON(*pos < 0);
225
226 tsk = get_proc_task(file_inode(file));
227 if (!tsk)
228 return -ESRCH;
229 mm = get_task_mm(tsk);
230 put_task_struct(tsk);
231 if (!mm)
232 return 0;
233 /* Check if process spawned far enough to have cmdline. */
234 if (!mm->env_end) {
235 rv = 0;
236 goto out_mmput;
237 }
238
239 page = (char *)__get_free_page(GFP_TEMPORARY);
240 if (!page) {
241 rv = -ENOMEM;
242 goto out_mmput;
243 }
244
245 down_read(&mm->mmap_sem);
246 arg_start = mm->arg_start;
247 arg_end = mm->arg_end;
248 env_start = mm->env_start;
249 env_end = mm->env_end;
250 up_read(&mm->mmap_sem);
251
252 BUG_ON(arg_start > arg_end);
253 BUG_ON(env_start > env_end);
254
255 len1 = arg_end - arg_start;
256 len2 = env_end - env_start;
257
258 /* Empty ARGV. */
259 if (len1 == 0) {
260 rv = 0;
261 goto out_free_page;
262 }
263 /*
264 * Inherently racy -- command line shares address space
265 * with code and data.
266 */
267 rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
268 if (rv <= 0)
269 goto out_free_page;
270
271 rv = 0;
272
273 if (c == '\0') {
274 /* Command line (set of strings) occupies whole ARGV. */
275 if (len1 <= *pos)
276 goto out_free_page;
277
278 p = arg_start + *pos;
279 len = len1 - *pos;
280 while (count > 0 && len > 0) {
281 unsigned int _count;
282 int nr_read;
283
284 _count = min3(count, len, PAGE_SIZE);
285 nr_read = access_remote_vm(mm, p, page, _count, 0);
286 if (nr_read < 0)
287 rv = nr_read;
288 if (nr_read <= 0)
289 goto out_free_page;
290
291 if (copy_to_user(buf, page, nr_read)) {
292 rv = -EFAULT;
293 goto out_free_page;
294 }
295
296 p += nr_read;
297 len -= nr_read;
298 buf += nr_read;
299 count -= nr_read;
300 rv += nr_read;
301 }
302 } else {
303 /*
304 * Command line (1 string) occupies ARGV and
305 * extends into ENVP.
306 */
307 struct {
308 unsigned long p;
309 unsigned long len;
310 } cmdline[2] = {
311 { .p = arg_start, .len = len1 },
312 { .p = env_start, .len = len2 },
313 };
314 loff_t pos1 = *pos;
315 unsigned int i;
316
317 i = 0;
318 while (i < 2 && pos1 >= cmdline[i].len) {
319 pos1 -= cmdline[i].len;
320 i++;
321 }
322 while (i < 2) {
323 p = cmdline[i].p + pos1;
324 len = cmdline[i].len - pos1;
325 while (count > 0 && len > 0) {
326 unsigned int _count, l;
327 int nr_read;
328 bool final;
329
330 _count = min3(count, len, PAGE_SIZE);
331 nr_read = access_remote_vm(mm, p, page, _count, 0);
332 if (nr_read < 0)
333 rv = nr_read;
334 if (nr_read <= 0)
335 goto out_free_page;
336
337 /*
338 * Command line can be shorter than whole ARGV
339 * even if last "marker" byte says it is not.
340 */
341 final = false;
342 l = strnlen(page, nr_read);
343 if (l < nr_read) {
344 nr_read = l;
345 final = true;
346 }
347
348 if (copy_to_user(buf, page, nr_read)) {
349 rv = -EFAULT;
350 goto out_free_page;
351 }
352
353 p += nr_read;
354 len -= nr_read;
355 buf += nr_read;
356 count -= nr_read;
357 rv += nr_read;
358
359 if (final)
360 goto out_free_page;
361 }
362
363 /* Only first chunk can be read partially. */
364 pos1 = 0;
365 i++;
366 }
367 }
368
369 out_free_page:
370 free_page((unsigned long)page);
371 out_mmput:
372 mmput(mm);
373 if (rv > 0)
374 *pos += rv;
375 return rv;
376 }
377
378 static const struct file_operations proc_pid_cmdline_ops = {
379 .read = proc_pid_cmdline_read,
380 .llseek = generic_file_llseek,
381 };
382
383 #ifdef CONFIG_KALLSYMS
384 /*
385 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
386 * Returns the resolved symbol. If that fails, simply return the address.
387 */
388 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
389 struct pid *pid, struct task_struct *task)
390 {
391 unsigned long wchan;
392 char symname[KSYM_NAME_LEN];
393
394 wchan = get_wchan(task);
395
396 if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
397 && !lookup_symbol_name(wchan, symname))
398 seq_printf(m, "%s", symname);
399 else
400 seq_putc(m, '0');
401
402 return 0;
403 }
404 #endif /* CONFIG_KALLSYMS */
405
406 static int lock_trace(struct task_struct *task)
407 {
408 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
409 if (err)
410 return err;
411 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
412 mutex_unlock(&task->signal->cred_guard_mutex);
413 return -EPERM;
414 }
415 return 0;
416 }
417
418 static void unlock_trace(struct task_struct *task)
419 {
420 mutex_unlock(&task->signal->cred_guard_mutex);
421 }
422
423 #ifdef CONFIG_STACKTRACE
424
425 #define MAX_STACK_TRACE_DEPTH 64
426
427 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
428 struct pid *pid, struct task_struct *task)
429 {
430 struct stack_trace trace;
431 unsigned long *entries;
432 int err;
433 int i;
434
435 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
436 if (!entries)
437 return -ENOMEM;
438
439 trace.nr_entries = 0;
440 trace.max_entries = MAX_STACK_TRACE_DEPTH;
441 trace.entries = entries;
442 trace.skip = 0;
443
444 err = lock_trace(task);
445 if (!err) {
446 save_stack_trace_tsk(task, &trace);
447
448 for (i = 0; i < trace.nr_entries; i++) {
449 seq_printf(m, "[<%pK>] %pB\n",
450 (void *)entries[i], (void *)entries[i]);
451 }
452 unlock_trace(task);
453 }
454 kfree(entries);
455
456 return err;
457 }
458 #endif
459
460 #ifdef CONFIG_SCHED_INFO
461 /*
462 * Provides /proc/PID/schedstat
463 */
464 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
465 struct pid *pid, struct task_struct *task)
466 {
467 if (unlikely(!sched_info_on()))
468 seq_printf(m, "0 0 0\n");
469 else
470 seq_printf(m, "%llu %llu %lu\n",
471 (unsigned long long)task->se.sum_exec_runtime,
472 (unsigned long long)task->sched_info.run_delay,
473 task->sched_info.pcount);
474
475 return 0;
476 }
477 #endif
478
479 #ifdef CONFIG_LATENCYTOP
480 static int lstats_show_proc(struct seq_file *m, void *v)
481 {
482 int i;
483 struct inode *inode = m->private;
484 struct task_struct *task = get_proc_task(inode);
485
486 if (!task)
487 return -ESRCH;
488 seq_puts(m, "Latency Top version : v0.1\n");
489 for (i = 0; i < 32; i++) {
490 struct latency_record *lr = &task->latency_record[i];
491 if (lr->backtrace[0]) {
492 int q;
493 seq_printf(m, "%i %li %li",
494 lr->count, lr->time, lr->max);
495 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
496 unsigned long bt = lr->backtrace[q];
497 if (!bt)
498 break;
499 if (bt == ULONG_MAX)
500 break;
501 seq_printf(m, " %ps", (void *)bt);
502 }
503 seq_putc(m, '\n');
504 }
505
506 }
507 put_task_struct(task);
508 return 0;
509 }
510
511 static int lstats_open(struct inode *inode, struct file *file)
512 {
513 return single_open(file, lstats_show_proc, inode);
514 }
515
516 static ssize_t lstats_write(struct file *file, const char __user *buf,
517 size_t count, loff_t *offs)
518 {
519 struct task_struct *task = get_proc_task(file_inode(file));
520
521 if (!task)
522 return -ESRCH;
523 clear_all_latency_tracing(task);
524 put_task_struct(task);
525
526 return count;
527 }
528
529 static const struct file_operations proc_lstats_operations = {
530 .open = lstats_open,
531 .read = seq_read,
532 .write = lstats_write,
533 .llseek = seq_lseek,
534 .release = single_release,
535 };
536
537 #endif
538
539 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
540 struct pid *pid, struct task_struct *task)
541 {
542 unsigned long totalpages = totalram_pages + total_swap_pages;
543 unsigned long points = 0;
544
545 points = oom_badness(task, NULL, NULL, totalpages) *
546 1000 / totalpages;
547 seq_printf(m, "%lu\n", points);
548
549 return 0;
550 }
551
552 struct limit_names {
553 const char *name;
554 const char *unit;
555 };
556
557 static const struct limit_names lnames[RLIM_NLIMITS] = {
558 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
559 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
560 [RLIMIT_DATA] = {"Max data size", "bytes"},
561 [RLIMIT_STACK] = {"Max stack size", "bytes"},
562 [RLIMIT_CORE] = {"Max core file size", "bytes"},
563 [RLIMIT_RSS] = {"Max resident set", "bytes"},
564 [RLIMIT_NPROC] = {"Max processes", "processes"},
565 [RLIMIT_NOFILE] = {"Max open files", "files"},
566 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
567 [RLIMIT_AS] = {"Max address space", "bytes"},
568 [RLIMIT_LOCKS] = {"Max file locks", "locks"},
569 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
570 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
571 [RLIMIT_NICE] = {"Max nice priority", NULL},
572 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
573 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
574 };
575
576 /* Display limits for a process */
577 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
578 struct pid *pid, struct task_struct *task)
579 {
580 unsigned int i;
581 unsigned long flags;
582
583 struct rlimit rlim[RLIM_NLIMITS];
584
585 if (!lock_task_sighand(task, &flags))
586 return 0;
587 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
588 unlock_task_sighand(task, &flags);
589
590 /*
591 * print the file header
592 */
593 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
594 "Limit", "Soft Limit", "Hard Limit", "Units");
595
596 for (i = 0; i < RLIM_NLIMITS; i++) {
597 if (rlim[i].rlim_cur == RLIM_INFINITY)
598 seq_printf(m, "%-25s %-20s ",
599 lnames[i].name, "unlimited");
600 else
601 seq_printf(m, "%-25s %-20lu ",
602 lnames[i].name, rlim[i].rlim_cur);
603
604 if (rlim[i].rlim_max == RLIM_INFINITY)
605 seq_printf(m, "%-20s ", "unlimited");
606 else
607 seq_printf(m, "%-20lu ", rlim[i].rlim_max);
608
609 if (lnames[i].unit)
610 seq_printf(m, "%-10s\n", lnames[i].unit);
611 else
612 seq_putc(m, '\n');
613 }
614
615 return 0;
616 }
617
618 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
619 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
620 struct pid *pid, struct task_struct *task)
621 {
622 long nr;
623 unsigned long args[6], sp, pc;
624 int res;
625
626 res = lock_trace(task);
627 if (res)
628 return res;
629
630 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
631 seq_puts(m, "running\n");
632 else if (nr < 0)
633 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
634 else
635 seq_printf(m,
636 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
637 nr,
638 args[0], args[1], args[2], args[3], args[4], args[5],
639 sp, pc);
640 unlock_trace(task);
641
642 return 0;
643 }
644 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
645
646 /************************************************************************/
647 /* Here the fs part begins */
648 /************************************************************************/
649
650 /* permission checks */
651 static int proc_fd_access_allowed(struct inode *inode)
652 {
653 struct task_struct *task;
654 int allowed = 0;
655 /* Allow access to a task's file descriptors if it is us or we
656 * may use ptrace attach to the process and find out that
657 * information.
658 */
659 task = get_proc_task(inode);
660 if (task) {
661 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
662 put_task_struct(task);
663 }
664 return allowed;
665 }
666
667 int proc_setattr(struct dentry *dentry, struct iattr *attr)
668 {
669 int error;
670 struct inode *inode = d_inode(dentry);
671 struct user_namespace *s_user_ns;
672
673 if (attr->ia_valid & ATTR_MODE)
674 return -EPERM;
675
676 /* Don't let anyone mess with weird proc files */
677 s_user_ns = inode->i_sb->s_user_ns;
678 if (!kuid_has_mapping(s_user_ns, inode->i_uid) ||
679 !kgid_has_mapping(s_user_ns, inode->i_gid))
680 return -EPERM;
681
682 error = setattr_prepare(dentry, attr);
683 if (error)
684 return error;
685
686 setattr_copy(inode, attr);
687 mark_inode_dirty(inode);
688 return 0;
689 }
690
691 /*
692 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
693 * or euid/egid (for hide_pid_min=2)?
694 */
695 static bool has_pid_permissions(struct pid_namespace *pid,
696 struct task_struct *task,
697 int hide_pid_min)
698 {
699 if (pid->hide_pid < hide_pid_min)
700 return true;
701 if (in_group_p(pid->pid_gid))
702 return true;
703 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
704 }
705
706
707 static int proc_pid_permission(struct inode *inode, int mask)
708 {
709 struct pid_namespace *pid = inode->i_sb->s_fs_info;
710 struct task_struct *task;
711 bool has_perms;
712
713 task = get_proc_task(inode);
714 if (!task)
715 return -ESRCH;
716 has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
717 put_task_struct(task);
718
719 if (!has_perms) {
720 if (pid->hide_pid == HIDEPID_INVISIBLE) {
721 /*
722 * Let's make getdents(), stat(), and open()
723 * consistent with each other. If a process
724 * may not stat() a file, it shouldn't be seen
725 * in procfs at all.
726 */
727 return -ENOENT;
728 }
729
730 return -EPERM;
731 }
732 return generic_permission(inode, mask);
733 }
734
735
736
737 static const struct inode_operations proc_def_inode_operations = {
738 .setattr = proc_setattr,
739 };
740
741 static int proc_single_show(struct seq_file *m, void *v)
742 {
743 struct inode *inode = m->private;
744 struct pid_namespace *ns;
745 struct pid *pid;
746 struct task_struct *task;
747 int ret;
748
749 ns = inode->i_sb->s_fs_info;
750 pid = proc_pid(inode);
751 task = get_pid_task(pid, PIDTYPE_PID);
752 if (!task)
753 return -ESRCH;
754
755 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
756
757 put_task_struct(task);
758 return ret;
759 }
760
761 static int proc_single_open(struct inode *inode, struct file *filp)
762 {
763 return single_open(filp, proc_single_show, inode);
764 }
765
766 static const struct file_operations proc_single_file_operations = {
767 .open = proc_single_open,
768 .read = seq_read,
769 .llseek = seq_lseek,
770 .release = single_release,
771 };
772
773
774 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
775 {
776 struct task_struct *task = get_proc_task(inode);
777 struct mm_struct *mm = ERR_PTR(-ESRCH);
778
779 if (task) {
780 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
781 put_task_struct(task);
782
783 if (!IS_ERR_OR_NULL(mm)) {
784 /* ensure this mm_struct can't be freed */
785 mmgrab(mm);
786 /* but do not pin its memory */
787 mmput(mm);
788 }
789 }
790
791 return mm;
792 }
793
794 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
795 {
796 struct mm_struct *mm = proc_mem_open(inode, mode);
797
798 if (IS_ERR(mm))
799 return PTR_ERR(mm);
800
801 file->private_data = mm;
802 return 0;
803 }
804
805 static int mem_open(struct inode *inode, struct file *file)
806 {
807 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
808
809 /* OK to pass negative loff_t, we can catch out-of-range */
810 file->f_mode |= FMODE_UNSIGNED_OFFSET;
811
812 return ret;
813 }
814
815 static ssize_t mem_rw(struct file *file, char __user *buf,
816 size_t count, loff_t *ppos, int write)
817 {
818 struct mm_struct *mm = file->private_data;
819 unsigned long addr = *ppos;
820 ssize_t copied;
821 char *page;
822 unsigned int flags;
823
824 if (!mm)
825 return 0;
826
827 page = (char *)__get_free_page(GFP_TEMPORARY);
828 if (!page)
829 return -ENOMEM;
830
831 copied = 0;
832 if (!mmget_not_zero(mm))
833 goto free;
834
835 flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
836
837 while (count > 0) {
838 int this_len = min_t(int, count, PAGE_SIZE);
839
840 if (write && copy_from_user(page, buf, this_len)) {
841 copied = -EFAULT;
842 break;
843 }
844
845 this_len = access_remote_vm(mm, addr, page, this_len, flags);
846 if (!this_len) {
847 if (!copied)
848 copied = -EIO;
849 break;
850 }
851
852 if (!write && copy_to_user(buf, page, this_len)) {
853 copied = -EFAULT;
854 break;
855 }
856
857 buf += this_len;
858 addr += this_len;
859 copied += this_len;
860 count -= this_len;
861 }
862 *ppos = addr;
863
864 mmput(mm);
865 free:
866 free_page((unsigned long) page);
867 return copied;
868 }
869
870 static ssize_t mem_read(struct file *file, char __user *buf,
871 size_t count, loff_t *ppos)
872 {
873 return mem_rw(file, buf, count, ppos, 0);
874 }
875
876 static ssize_t mem_write(struct file *file, const char __user *buf,
877 size_t count, loff_t *ppos)
878 {
879 return mem_rw(file, (char __user*)buf, count, ppos, 1);
880 }
881
882 loff_t mem_lseek(struct file *file, loff_t offset, int orig)
883 {
884 switch (orig) {
885 case 0:
886 file->f_pos = offset;
887 break;
888 case 1:
889 file->f_pos += offset;
890 break;
891 default:
892 return -EINVAL;
893 }
894 force_successful_syscall_return();
895 return file->f_pos;
896 }
897
898 static int mem_release(struct inode *inode, struct file *file)
899 {
900 struct mm_struct *mm = file->private_data;
901 if (mm)
902 mmdrop(mm);
903 return 0;
904 }
905
906 static const struct file_operations proc_mem_operations = {
907 .llseek = mem_lseek,
908 .read = mem_read,
909 .write = mem_write,
910 .open = mem_open,
911 .release = mem_release,
912 };
913
914 static int environ_open(struct inode *inode, struct file *file)
915 {
916 return __mem_open(inode, file, PTRACE_MODE_READ);
917 }
918
919 static ssize_t environ_read(struct file *file, char __user *buf,
920 size_t count, loff_t *ppos)
921 {
922 char *page;
923 unsigned long src = *ppos;
924 int ret = 0;
925 struct mm_struct *mm = file->private_data;
926 unsigned long env_start, env_end;
927
928 /* Ensure the process spawned far enough to have an environment. */
929 if (!mm || !mm->env_end)
930 return 0;
931
932 page = (char *)__get_free_page(GFP_TEMPORARY);
933 if (!page)
934 return -ENOMEM;
935
936 ret = 0;
937 if (!mmget_not_zero(mm))
938 goto free;
939
940 down_read(&mm->mmap_sem);
941 env_start = mm->env_start;
942 env_end = mm->env_end;
943 up_read(&mm->mmap_sem);
944
945 while (count > 0) {
946 size_t this_len, max_len;
947 int retval;
948
949 if (src >= (env_end - env_start))
950 break;
951
952 this_len = env_end - (env_start + src);
953
954 max_len = min_t(size_t, PAGE_SIZE, count);
955 this_len = min(max_len, this_len);
956
957 retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
958
959 if (retval <= 0) {
960 ret = retval;
961 break;
962 }
963
964 if (copy_to_user(buf, page, retval)) {
965 ret = -EFAULT;
966 break;
967 }
968
969 ret += retval;
970 src += retval;
971 buf += retval;
972 count -= retval;
973 }
974 *ppos = src;
975 mmput(mm);
976
977 free:
978 free_page((unsigned long) page);
979 return ret;
980 }
981
982 static const struct file_operations proc_environ_operations = {
983 .open = environ_open,
984 .read = environ_read,
985 .llseek = generic_file_llseek,
986 .release = mem_release,
987 };
988
989 static int auxv_open(struct inode *inode, struct file *file)
990 {
991 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
992 }
993
994 static ssize_t auxv_read(struct file *file, char __user *buf,
995 size_t count, loff_t *ppos)
996 {
997 struct mm_struct *mm = file->private_data;
998 unsigned int nwords = 0;
999
1000 if (!mm)
1001 return 0;
1002 do {
1003 nwords += 2;
1004 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
1005 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
1006 nwords * sizeof(mm->saved_auxv[0]));
1007 }
1008
1009 static const struct file_operations proc_auxv_operations = {
1010 .open = auxv_open,
1011 .read = auxv_read,
1012 .llseek = generic_file_llseek,
1013 .release = mem_release,
1014 };
1015
1016 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
1017 loff_t *ppos)
1018 {
1019 struct task_struct *task = get_proc_task(file_inode(file));
1020 char buffer[PROC_NUMBUF];
1021 int oom_adj = OOM_ADJUST_MIN;
1022 size_t len;
1023
1024 if (!task)
1025 return -ESRCH;
1026 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
1027 oom_adj = OOM_ADJUST_MAX;
1028 else
1029 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
1030 OOM_SCORE_ADJ_MAX;
1031 put_task_struct(task);
1032 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
1033 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1034 }
1035
1036 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
1037 {
1038 static DEFINE_MUTEX(oom_adj_mutex);
1039 struct mm_struct *mm = NULL;
1040 struct task_struct *task;
1041 int err = 0;
1042
1043 task = get_proc_task(file_inode(file));
1044 if (!task)
1045 return -ESRCH;
1046
1047 mutex_lock(&oom_adj_mutex);
1048 if (legacy) {
1049 if (oom_adj < task->signal->oom_score_adj &&
1050 !capable(CAP_SYS_RESOURCE)) {
1051 err = -EACCES;
1052 goto err_unlock;
1053 }
1054 /*
1055 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
1056 * /proc/pid/oom_score_adj instead.
1057 */
1058 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1059 current->comm, task_pid_nr(current), task_pid_nr(task),
1060 task_pid_nr(task));
1061 } else {
1062 if ((short)oom_adj < task->signal->oom_score_adj_min &&
1063 !capable(CAP_SYS_RESOURCE)) {
1064 err = -EACCES;
1065 goto err_unlock;
1066 }
1067 }
1068
1069 /*
1070 * Make sure we will check other processes sharing the mm if this is
1071 * not vfrok which wants its own oom_score_adj.
1072 * pin the mm so it doesn't go away and get reused after task_unlock
1073 */
1074 if (!task->vfork_done) {
1075 struct task_struct *p = find_lock_task_mm(task);
1076
1077 if (p) {
1078 if (atomic_read(&p->mm->mm_users) > 1) {
1079 mm = p->mm;
1080 mmgrab(mm);
1081 }
1082 task_unlock(p);
1083 }
1084 }
1085
1086 task->signal->oom_score_adj = oom_adj;
1087 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1088 task->signal->oom_score_adj_min = (short)oom_adj;
1089 trace_oom_score_adj_update(task);
1090
1091 if (mm) {
1092 struct task_struct *p;
1093
1094 rcu_read_lock();
1095 for_each_process(p) {
1096 if (same_thread_group(task, p))
1097 continue;
1098
1099 /* do not touch kernel threads or the global init */
1100 if (p->flags & PF_KTHREAD || is_global_init(p))
1101 continue;
1102
1103 task_lock(p);
1104 if (!p->vfork_done && process_shares_mm(p, mm)) {
1105 pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
1106 task_pid_nr(p), p->comm,
1107 p->signal->oom_score_adj, oom_adj,
1108 task_pid_nr(task), task->comm);
1109 p->signal->oom_score_adj = oom_adj;
1110 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1111 p->signal->oom_score_adj_min = (short)oom_adj;
1112 }
1113 task_unlock(p);
1114 }
1115 rcu_read_unlock();
1116 mmdrop(mm);
1117 }
1118 err_unlock:
1119 mutex_unlock(&oom_adj_mutex);
1120 put_task_struct(task);
1121 return err;
1122 }
1123
1124 /*
1125 * /proc/pid/oom_adj exists solely for backwards compatibility with previous
1126 * kernels. The effective policy is defined by oom_score_adj, which has a
1127 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
1128 * Values written to oom_adj are simply mapped linearly to oom_score_adj.
1129 * Processes that become oom disabled via oom_adj will still be oom disabled
1130 * with this implementation.
1131 *
1132 * oom_adj cannot be removed since existing userspace binaries use it.
1133 */
1134 static ssize_t oom_adj_write(struct file *file, const char __user *buf,
1135 size_t count, loff_t *ppos)
1136 {
1137 char buffer[PROC_NUMBUF];
1138 int oom_adj;
1139 int err;
1140
1141 memset(buffer, 0, sizeof(buffer));
1142 if (count > sizeof(buffer) - 1)
1143 count = sizeof(buffer) - 1;
1144 if (copy_from_user(buffer, buf, count)) {
1145 err = -EFAULT;
1146 goto out;
1147 }
1148
1149 err = kstrtoint(strstrip(buffer), 0, &oom_adj);
1150 if (err)
1151 goto out;
1152 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
1153 oom_adj != OOM_DISABLE) {
1154 err = -EINVAL;
1155 goto out;
1156 }
1157
1158 /*
1159 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1160 * value is always attainable.
1161 */
1162 if (oom_adj == OOM_ADJUST_MAX)
1163 oom_adj = OOM_SCORE_ADJ_MAX;
1164 else
1165 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
1166
1167 err = __set_oom_adj(file, oom_adj, true);
1168 out:
1169 return err < 0 ? err : count;
1170 }
1171
1172 static const struct file_operations proc_oom_adj_operations = {
1173 .read = oom_adj_read,
1174 .write = oom_adj_write,
1175 .llseek = generic_file_llseek,
1176 };
1177
1178 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
1179 size_t count, loff_t *ppos)
1180 {
1181 struct task_struct *task = get_proc_task(file_inode(file));
1182 char buffer[PROC_NUMBUF];
1183 short oom_score_adj = OOM_SCORE_ADJ_MIN;
1184 size_t len;
1185
1186 if (!task)
1187 return -ESRCH;
1188 oom_score_adj = task->signal->oom_score_adj;
1189 put_task_struct(task);
1190 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
1191 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1192 }
1193
1194 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1195 size_t count, loff_t *ppos)
1196 {
1197 char buffer[PROC_NUMBUF];
1198 int oom_score_adj;
1199 int err;
1200
1201 memset(buffer, 0, sizeof(buffer));
1202 if (count > sizeof(buffer) - 1)
1203 count = sizeof(buffer) - 1;
1204 if (copy_from_user(buffer, buf, count)) {
1205 err = -EFAULT;
1206 goto out;
1207 }
1208
1209 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1210 if (err)
1211 goto out;
1212 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1213 oom_score_adj > OOM_SCORE_ADJ_MAX) {
1214 err = -EINVAL;
1215 goto out;
1216 }
1217
1218 err = __set_oom_adj(file, oom_score_adj, false);
1219 out:
1220 return err < 0 ? err : count;
1221 }
1222
1223 static const struct file_operations proc_oom_score_adj_operations = {
1224 .read = oom_score_adj_read,
1225 .write = oom_score_adj_write,
1226 .llseek = default_llseek,
1227 };
1228
1229 #ifdef CONFIG_AUDITSYSCALL
1230 #define TMPBUFLEN 11
1231 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1232 size_t count, loff_t *ppos)
1233 {
1234 struct inode * inode = file_inode(file);
1235 struct task_struct *task = get_proc_task(inode);
1236 ssize_t length;
1237 char tmpbuf[TMPBUFLEN];
1238
1239 if (!task)
1240 return -ESRCH;
1241 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1242 from_kuid(file->f_cred->user_ns,
1243 audit_get_loginuid(task)));
1244 put_task_struct(task);
1245 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1246 }
1247
1248 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1249 size_t count, loff_t *ppos)
1250 {
1251 struct inode * inode = file_inode(file);
1252 uid_t loginuid;
1253 kuid_t kloginuid;
1254 int rv;
1255
1256 rcu_read_lock();
1257 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1258 rcu_read_unlock();
1259 return -EPERM;
1260 }
1261 rcu_read_unlock();
1262
1263 if (*ppos != 0) {
1264 /* No partial writes. */
1265 return -EINVAL;
1266 }
1267
1268 rv = kstrtou32_from_user(buf, count, 10, &loginuid);
1269 if (rv < 0)
1270 return rv;
1271
1272 /* is userspace tring to explicitly UNSET the loginuid? */
1273 if (loginuid == AUDIT_UID_UNSET) {
1274 kloginuid = INVALID_UID;
1275 } else {
1276 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
1277 if (!uid_valid(kloginuid))
1278 return -EINVAL;
1279 }
1280
1281 rv = audit_set_loginuid(kloginuid);
1282 if (rv < 0)
1283 return rv;
1284 return count;
1285 }
1286
1287 static const struct file_operations proc_loginuid_operations = {
1288 .read = proc_loginuid_read,
1289 .write = proc_loginuid_write,
1290 .llseek = generic_file_llseek,
1291 };
1292
1293 static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1294 size_t count, loff_t *ppos)
1295 {
1296 struct inode * inode = file_inode(file);
1297 struct task_struct *task = get_proc_task(inode);
1298 ssize_t length;
1299 char tmpbuf[TMPBUFLEN];
1300
1301 if (!task)
1302 return -ESRCH;
1303 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1304 audit_get_sessionid(task));
1305 put_task_struct(task);
1306 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1307 }
1308
1309 static const struct file_operations proc_sessionid_operations = {
1310 .read = proc_sessionid_read,
1311 .llseek = generic_file_llseek,
1312 };
1313 #endif
1314
1315 #ifdef CONFIG_FAULT_INJECTION
1316 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1317 size_t count, loff_t *ppos)
1318 {
1319 struct task_struct *task = get_proc_task(file_inode(file));
1320 char buffer[PROC_NUMBUF];
1321 size_t len;
1322 int make_it_fail;
1323
1324 if (!task)
1325 return -ESRCH;
1326 make_it_fail = task->make_it_fail;
1327 put_task_struct(task);
1328
1329 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1330
1331 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1332 }
1333
1334 static ssize_t proc_fault_inject_write(struct file * file,
1335 const char __user * buf, size_t count, loff_t *ppos)
1336 {
1337 struct task_struct *task;
1338 char buffer[PROC_NUMBUF];
1339 int make_it_fail;
1340 int rv;
1341
1342 if (!capable(CAP_SYS_RESOURCE))
1343 return -EPERM;
1344 memset(buffer, 0, sizeof(buffer));
1345 if (count > sizeof(buffer) - 1)
1346 count = sizeof(buffer) - 1;
1347 if (copy_from_user(buffer, buf, count))
1348 return -EFAULT;
1349 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
1350 if (rv < 0)
1351 return rv;
1352 if (make_it_fail < 0 || make_it_fail > 1)
1353 return -EINVAL;
1354
1355 task = get_proc_task(file_inode(file));
1356 if (!task)
1357 return -ESRCH;
1358 task->make_it_fail = make_it_fail;
1359 put_task_struct(task);
1360
1361 return count;
1362 }
1363
1364 static const struct file_operations proc_fault_inject_operations = {
1365 .read = proc_fault_inject_read,
1366 .write = proc_fault_inject_write,
1367 .llseek = generic_file_llseek,
1368 };
1369
1370 static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
1371 size_t count, loff_t *ppos)
1372 {
1373 struct task_struct *task;
1374 int err;
1375 unsigned int n;
1376
1377 err = kstrtouint_from_user(buf, count, 0, &n);
1378 if (err)
1379 return err;
1380
1381 task = get_proc_task(file_inode(file));
1382 if (!task)
1383 return -ESRCH;
1384 WRITE_ONCE(task->fail_nth, n);
1385 put_task_struct(task);
1386
1387 return count;
1388 }
1389
1390 static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
1391 size_t count, loff_t *ppos)
1392 {
1393 struct task_struct *task;
1394 char numbuf[PROC_NUMBUF];
1395 ssize_t len;
1396
1397 task = get_proc_task(file_inode(file));
1398 if (!task)
1399 return -ESRCH;
1400 len = snprintf(numbuf, sizeof(numbuf), "%u\n",
1401 READ_ONCE(task->fail_nth));
1402 len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
1403 put_task_struct(task);
1404
1405 return len;
1406 }
1407
1408 static const struct file_operations proc_fail_nth_operations = {
1409 .read = proc_fail_nth_read,
1410 .write = proc_fail_nth_write,
1411 };
1412 #endif
1413
1414
1415 #ifdef CONFIG_SCHED_DEBUG
1416 /*
1417 * Print out various scheduling related per-task fields:
1418 */
1419 static int sched_show(struct seq_file *m, void *v)
1420 {
1421 struct inode *inode = m->private;
1422 struct task_struct *p;
1423
1424 p = get_proc_task(inode);
1425 if (!p)
1426 return -ESRCH;
1427 proc_sched_show_task(p, m);
1428
1429 put_task_struct(p);
1430
1431 return 0;
1432 }
1433
1434 static ssize_t
1435 sched_write(struct file *file, const char __user *buf,
1436 size_t count, loff_t *offset)
1437 {
1438 struct inode *inode = file_inode(file);
1439 struct task_struct *p;
1440
1441 p = get_proc_task(inode);
1442 if (!p)
1443 return -ESRCH;
1444 proc_sched_set_task(p);
1445
1446 put_task_struct(p);
1447
1448 return count;
1449 }
1450
1451 static int sched_open(struct inode *inode, struct file *filp)
1452 {
1453 return single_open(filp, sched_show, inode);
1454 }
1455
1456 static const struct file_operations proc_pid_sched_operations = {
1457 .open = sched_open,
1458 .read = seq_read,
1459 .write = sched_write,
1460 .llseek = seq_lseek,
1461 .release = single_release,
1462 };
1463
1464 #endif
1465
1466 #ifdef CONFIG_SCHED_AUTOGROUP
1467 /*
1468 * Print out autogroup related information:
1469 */
1470 static int sched_autogroup_show(struct seq_file *m, void *v)
1471 {
1472 struct inode *inode = m->private;
1473 struct task_struct *p;
1474
1475 p = get_proc_task(inode);
1476 if (!p)
1477 return -ESRCH;
1478 proc_sched_autogroup_show_task(p, m);
1479
1480 put_task_struct(p);
1481
1482 return 0;
1483 }
1484
1485 static ssize_t
1486 sched_autogroup_write(struct file *file, const char __user *buf,
1487 size_t count, loff_t *offset)
1488 {
1489 struct inode *inode = file_inode(file);
1490 struct task_struct *p;
1491 char buffer[PROC_NUMBUF];
1492 int nice;
1493 int err;
1494
1495 memset(buffer, 0, sizeof(buffer));
1496 if (count > sizeof(buffer) - 1)
1497 count = sizeof(buffer) - 1;
1498 if (copy_from_user(buffer, buf, count))
1499 return -EFAULT;
1500
1501 err = kstrtoint(strstrip(buffer), 0, &nice);
1502 if (err < 0)
1503 return err;
1504
1505 p = get_proc_task(inode);
1506 if (!p)
1507 return -ESRCH;
1508
1509 err = proc_sched_autogroup_set_nice(p, nice);
1510 if (err)
1511 count = err;
1512
1513 put_task_struct(p);
1514
1515 return count;
1516 }
1517
1518 static int sched_autogroup_open(struct inode *inode, struct file *filp)
1519 {
1520 int ret;
1521
1522 ret = single_open(filp, sched_autogroup_show, NULL);
1523 if (!ret) {
1524 struct seq_file *m = filp->private_data;
1525
1526 m->private = inode;
1527 }
1528 return ret;
1529 }
1530
1531 static const struct file_operations proc_pid_sched_autogroup_operations = {
1532 .open = sched_autogroup_open,
1533 .read = seq_read,
1534 .write = sched_autogroup_write,
1535 .llseek = seq_lseek,
1536 .release = single_release,
1537 };
1538
1539 #endif /* CONFIG_SCHED_AUTOGROUP */
1540
1541 static ssize_t comm_write(struct file *file, const char __user *buf,
1542 size_t count, loff_t *offset)
1543 {
1544 struct inode *inode = file_inode(file);
1545 struct task_struct *p;
1546 char buffer[TASK_COMM_LEN];
1547 const size_t maxlen = sizeof(buffer) - 1;
1548
1549 memset(buffer, 0, sizeof(buffer));
1550 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
1551 return -EFAULT;
1552
1553 p = get_proc_task(inode);
1554 if (!p)
1555 return -ESRCH;
1556
1557 if (same_thread_group(current, p))
1558 set_task_comm(p, buffer);
1559 else
1560 count = -EINVAL;
1561
1562 put_task_struct(p);
1563
1564 return count;
1565 }
1566
1567 static int comm_show(struct seq_file *m, void *v)
1568 {
1569 struct inode *inode = m->private;
1570 struct task_struct *p;
1571
1572 p = get_proc_task(inode);
1573 if (!p)
1574 return -ESRCH;
1575
1576 task_lock(p);
1577 seq_printf(m, "%s\n", p->comm);
1578 task_unlock(p);
1579
1580 put_task_struct(p);
1581
1582 return 0;
1583 }
1584
1585 static int comm_open(struct inode *inode, struct file *filp)
1586 {
1587 return single_open(filp, comm_show, inode);
1588 }
1589
1590 static const struct file_operations proc_pid_set_comm_operations = {
1591 .open = comm_open,
1592 .read = seq_read,
1593 .write = comm_write,
1594 .llseek = seq_lseek,
1595 .release = single_release,
1596 };
1597
1598 static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1599 {
1600 struct task_struct *task;
1601 struct file *exe_file;
1602
1603 task = get_proc_task(d_inode(dentry));
1604 if (!task)
1605 return -ENOENT;
1606 exe_file = get_task_exe_file(task);
1607 put_task_struct(task);
1608 if (exe_file) {
1609 *exe_path = exe_file->f_path;
1610 path_get(&exe_file->f_path);
1611 fput(exe_file);
1612 return 0;
1613 } else
1614 return -ENOENT;
1615 }
1616
1617 static const char *proc_pid_get_link(struct dentry *dentry,
1618 struct inode *inode,
1619 struct delayed_call *done)
1620 {
1621 struct path path;
1622 int error = -EACCES;
1623
1624 if (!dentry)
1625 return ERR_PTR(-ECHILD);
1626
1627 /* Are we allowed to snoop on the tasks file descriptors? */
1628 if (!proc_fd_access_allowed(inode))
1629 goto out;
1630
1631 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1632 if (error)
1633 goto out;
1634
1635 nd_jump_link(&path);
1636 return NULL;
1637 out:
1638 return ERR_PTR(error);
1639 }
1640
1641 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1642 {
1643 char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1644 char *pathname;
1645 int len;
1646
1647 if (!tmp)
1648 return -ENOMEM;
1649
1650 pathname = d_path(path, tmp, PAGE_SIZE);
1651 len = PTR_ERR(pathname);
1652 if (IS_ERR(pathname))
1653 goto out;
1654 len = tmp + PAGE_SIZE - 1 - pathname;
1655
1656 if (len > buflen)
1657 len = buflen;
1658 if (copy_to_user(buffer, pathname, len))
1659 len = -EFAULT;
1660 out:
1661 free_page((unsigned long)tmp);
1662 return len;
1663 }
1664
1665 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1666 {
1667 int error = -EACCES;
1668 struct inode *inode = d_inode(dentry);
1669 struct path path;
1670
1671 /* Are we allowed to snoop on the tasks file descriptors? */
1672 if (!proc_fd_access_allowed(inode))
1673 goto out;
1674
1675 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1676 if (error)
1677 goto out;
1678
1679 error = do_proc_readlink(&path, buffer, buflen);
1680 path_put(&path);
1681 out:
1682 return error;
1683 }
1684
1685 const struct inode_operations proc_pid_link_inode_operations = {
1686 .readlink = proc_pid_readlink,
1687 .get_link = proc_pid_get_link,
1688 .setattr = proc_setattr,
1689 };
1690
1691
1692 /* building an inode */
1693
1694 void task_dump_owner(struct task_struct *task, mode_t mode,
1695 kuid_t *ruid, kgid_t *rgid)
1696 {
1697 /* Depending on the state of dumpable compute who should own a
1698 * proc file for a task.
1699 */
1700 const struct cred *cred;
1701 kuid_t uid;
1702 kgid_t gid;
1703
1704 /* Default to the tasks effective ownership */
1705 rcu_read_lock();
1706 cred = __task_cred(task);
1707 uid = cred->euid;
1708 gid = cred->egid;
1709 rcu_read_unlock();
1710
1711 /*
1712 * Before the /proc/pid/status file was created the only way to read
1713 * the effective uid of a /process was to stat /proc/pid. Reading
1714 * /proc/pid/status is slow enough that procps and other packages
1715 * kept stating /proc/pid. To keep the rules in /proc simple I have
1716 * made this apply to all per process world readable and executable
1717 * directories.
1718 */
1719 if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
1720 struct mm_struct *mm;
1721 task_lock(task);
1722 mm = task->mm;
1723 /* Make non-dumpable tasks owned by some root */
1724 if (mm) {
1725 if (get_dumpable(mm) != SUID_DUMP_USER) {
1726 struct user_namespace *user_ns = mm->user_ns;
1727
1728 uid = make_kuid(user_ns, 0);
1729 if (!uid_valid(uid))
1730 uid = GLOBAL_ROOT_UID;
1731
1732 gid = make_kgid(user_ns, 0);
1733 if (!gid_valid(gid))
1734 gid = GLOBAL_ROOT_GID;
1735 }
1736 } else {
1737 uid = GLOBAL_ROOT_UID;
1738 gid = GLOBAL_ROOT_GID;
1739 }
1740 task_unlock(task);
1741 }
1742 *ruid = uid;
1743 *rgid = gid;
1744 }
1745
1746 struct inode *proc_pid_make_inode(struct super_block * sb,
1747 struct task_struct *task, umode_t mode)
1748 {
1749 struct inode * inode;
1750 struct proc_inode *ei;
1751
1752 /* We need a new inode */
1753
1754 inode = new_inode(sb);
1755 if (!inode)
1756 goto out;
1757
1758 /* Common stuff */
1759 ei = PROC_I(inode);
1760 inode->i_mode = mode;
1761 inode->i_ino = get_next_ino();
1762 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
1763 inode->i_op = &proc_def_inode_operations;
1764
1765 /*
1766 * grab the reference to task.
1767 */
1768 ei->pid = get_task_pid(task, PIDTYPE_PID);
1769 if (!ei->pid)
1770 goto out_unlock;
1771
1772 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
1773 security_task_to_inode(task, inode);
1774
1775 out:
1776 return inode;
1777
1778 out_unlock:
1779 iput(inode);
1780 return NULL;
1781 }
1782
1783 int pid_getattr(const struct path *path, struct kstat *stat,
1784 u32 request_mask, unsigned int query_flags)
1785 {
1786 struct inode *inode = d_inode(path->dentry);
1787 struct task_struct *task;
1788 struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
1789
1790 generic_fillattr(inode, stat);
1791
1792 rcu_read_lock();
1793 stat->uid = GLOBAL_ROOT_UID;
1794 stat->gid = GLOBAL_ROOT_GID;
1795 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1796 if (task) {
1797 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
1798 rcu_read_unlock();
1799 /*
1800 * This doesn't prevent learning whether PID exists,
1801 * it only makes getattr() consistent with readdir().
1802 */
1803 return -ENOENT;
1804 }
1805 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
1806 }
1807 rcu_read_unlock();
1808 return 0;
1809 }
1810
1811 /* dentry stuff */
1812
1813 /*
1814 * Exceptional case: normally we are not allowed to unhash a busy
1815 * directory. In this case, however, we can do it - no aliasing problems
1816 * due to the way we treat inodes.
1817 *
1818 * Rewrite the inode's ownerships here because the owning task may have
1819 * performed a setuid(), etc.
1820 *
1821 */
1822 int pid_revalidate(struct dentry *dentry, unsigned int flags)
1823 {
1824 struct inode *inode;
1825 struct task_struct *task;
1826
1827 if (flags & LOOKUP_RCU)
1828 return -ECHILD;
1829
1830 inode = d_inode(dentry);
1831 task = get_proc_task(inode);
1832
1833 if (task) {
1834 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
1835
1836 inode->i_mode &= ~(S_ISUID | S_ISGID);
1837 security_task_to_inode(task, inode);
1838 put_task_struct(task);
1839 return 1;
1840 }
1841 return 0;
1842 }
1843
1844 static inline bool proc_inode_is_dead(struct inode *inode)
1845 {
1846 return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
1847 }
1848
1849 int pid_delete_dentry(const struct dentry *dentry)
1850 {
1851 /* Is the task we represent dead?
1852 * If so, then don't put the dentry on the lru list,
1853 * kill it immediately.
1854 */
1855 return proc_inode_is_dead(d_inode(dentry));
1856 }
1857
1858 const struct dentry_operations pid_dentry_operations =
1859 {
1860 .d_revalidate = pid_revalidate,
1861 .d_delete = pid_delete_dentry,
1862 };
1863
1864 /* Lookups */
1865
1866 /*
1867 * Fill a directory entry.
1868 *
1869 * If possible create the dcache entry and derive our inode number and
1870 * file type from dcache entry.
1871 *
1872 * Since all of the proc inode numbers are dynamically generated, the inode
1873 * numbers do not exist until the inode is cache. This means creating the
1874 * the dcache entry in readdir is necessary to keep the inode numbers
1875 * reported by readdir in sync with the inode numbers reported
1876 * by stat.
1877 */
1878 bool proc_fill_cache(struct file *file, struct dir_context *ctx,
1879 const char *name, int len,
1880 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1881 {
1882 struct dentry *child, *dir = file->f_path.dentry;
1883 struct qstr qname = QSTR_INIT(name, len);
1884 struct inode *inode;
1885 unsigned type;
1886 ino_t ino;
1887
1888 child = d_hash_and_lookup(dir, &qname);
1889 if (!child) {
1890 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1891 child = d_alloc_parallel(dir, &qname, &wq);
1892 if (IS_ERR(child))
1893 goto end_instantiate;
1894 if (d_in_lookup(child)) {
1895 int err = instantiate(d_inode(dir), child, task, ptr);
1896 d_lookup_done(child);
1897 if (err < 0) {
1898 dput(child);
1899 goto end_instantiate;
1900 }
1901 }
1902 }
1903 inode = d_inode(child);
1904 ino = inode->i_ino;
1905 type = inode->i_mode >> 12;
1906 dput(child);
1907 return dir_emit(ctx, name, len, ino, type);
1908
1909 end_instantiate:
1910 return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
1911 }
1912
1913 /*
1914 * dname_to_vma_addr - maps a dentry name into two unsigned longs
1915 * which represent vma start and end addresses.
1916 */
1917 static int dname_to_vma_addr(struct dentry *dentry,
1918 unsigned long *start, unsigned long *end)
1919 {
1920 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
1921 return -EINVAL;
1922
1923 return 0;
1924 }
1925
1926 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
1927 {
1928 unsigned long vm_start, vm_end;
1929 bool exact_vma_exists = false;
1930 struct mm_struct *mm = NULL;
1931 struct task_struct *task;
1932 struct inode *inode;
1933 int status = 0;
1934
1935 if (flags & LOOKUP_RCU)
1936 return -ECHILD;
1937
1938 inode = d_inode(dentry);
1939 task = get_proc_task(inode);
1940 if (!task)
1941 goto out_notask;
1942
1943 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
1944 if (IS_ERR_OR_NULL(mm))
1945 goto out;
1946
1947 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
1948 down_read(&mm->mmap_sem);
1949 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
1950 up_read(&mm->mmap_sem);
1951 }
1952
1953 mmput(mm);
1954
1955 if (exact_vma_exists) {
1956 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
1957
1958 security_task_to_inode(task, inode);
1959 status = 1;
1960 }
1961
1962 out:
1963 put_task_struct(task);
1964
1965 out_notask:
1966 return status;
1967 }
1968
1969 static const struct dentry_operations tid_map_files_dentry_operations = {
1970 .d_revalidate = map_files_d_revalidate,
1971 .d_delete = pid_delete_dentry,
1972 };
1973
1974 static int map_files_get_link(struct dentry *dentry, struct path *path)
1975 {
1976 unsigned long vm_start, vm_end;
1977 struct vm_area_struct *vma;
1978 struct task_struct *task;
1979 struct mm_struct *mm;
1980 int rc;
1981
1982 rc = -ENOENT;
1983 task = get_proc_task(d_inode(dentry));
1984 if (!task)
1985 goto out;
1986
1987 mm = get_task_mm(task);
1988 put_task_struct(task);
1989 if (!mm)
1990 goto out;
1991
1992 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
1993 if (rc)
1994 goto out_mmput;
1995
1996 rc = -ENOENT;
1997 down_read(&mm->mmap_sem);
1998 vma = find_exact_vma(mm, vm_start, vm_end);
1999 if (vma && vma->vm_file) {
2000 *path = vma_pr_or_file(vma)->f_path;
2001 path_get(path);
2002 rc = 0;
2003 }
2004 up_read(&mm->mmap_sem);
2005
2006 out_mmput:
2007 mmput(mm);
2008 out:
2009 return rc;
2010 }
2011
2012 struct map_files_info {
2013 fmode_t mode;
2014 unsigned int len;
2015 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2016 };
2017
2018 /*
2019 * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
2020 * symlinks may be used to bypass permissions on ancestor directories in the
2021 * path to the file in question.
2022 */
2023 static const char *
2024 proc_map_files_get_link(struct dentry *dentry,
2025 struct inode *inode,
2026 struct delayed_call *done)
2027 {
2028 if (!capable(CAP_SYS_ADMIN))
2029 return ERR_PTR(-EPERM);
2030
2031 return proc_pid_get_link(dentry, inode, done);
2032 }
2033
2034 /*
2035 * Identical to proc_pid_link_inode_operations except for get_link()
2036 */
2037 static const struct inode_operations proc_map_files_link_inode_operations = {
2038 .readlink = proc_pid_readlink,
2039 .get_link = proc_map_files_get_link,
2040 .setattr = proc_setattr,
2041 };
2042
2043 static int
2044 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2045 struct task_struct *task, const void *ptr)
2046 {
2047 fmode_t mode = (fmode_t)(unsigned long)ptr;
2048 struct proc_inode *ei;
2049 struct inode *inode;
2050
2051 inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
2052 ((mode & FMODE_READ ) ? S_IRUSR : 0) |
2053 ((mode & FMODE_WRITE) ? S_IWUSR : 0));
2054 if (!inode)
2055 return -ENOENT;
2056
2057 ei = PROC_I(inode);
2058 ei->op.proc_get_link = map_files_get_link;
2059
2060 inode->i_op = &proc_map_files_link_inode_operations;
2061 inode->i_size = 64;
2062
2063 d_set_d_op(dentry, &tid_map_files_dentry_operations);
2064 d_add(dentry, inode);
2065
2066 return 0;
2067 }
2068
2069 static struct dentry *proc_map_files_lookup(struct inode *dir,
2070 struct dentry *dentry, unsigned int flags)
2071 {
2072 unsigned long vm_start, vm_end;
2073 struct vm_area_struct *vma;
2074 struct task_struct *task;
2075 int result;
2076 struct mm_struct *mm;
2077
2078 result = -ENOENT;
2079 task = get_proc_task(dir);
2080 if (!task)
2081 goto out;
2082
2083 result = -EACCES;
2084 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2085 goto out_put_task;
2086
2087 result = -ENOENT;
2088 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2089 goto out_put_task;
2090
2091 mm = get_task_mm(task);
2092 if (!mm)
2093 goto out_put_task;
2094
2095 down_read(&mm->mmap_sem);
2096 vma = find_exact_vma(mm, vm_start, vm_end);
2097 if (!vma)
2098 goto out_no_vma;
2099
2100 if (vma->vm_file)
2101 result = proc_map_files_instantiate(dir, dentry, task,
2102 (void *)(unsigned long)vma->vm_file->f_mode);
2103
2104 out_no_vma:
2105 up_read(&mm->mmap_sem);
2106 mmput(mm);
2107 out_put_task:
2108 put_task_struct(task);
2109 out:
2110 return ERR_PTR(result);
2111 }
2112
2113 static const struct inode_operations proc_map_files_inode_operations = {
2114 .lookup = proc_map_files_lookup,
2115 .permission = proc_fd_permission,
2116 .setattr = proc_setattr,
2117 };
2118
2119 static int
2120 proc_map_files_readdir(struct file *file, struct dir_context *ctx)
2121 {
2122 struct vm_area_struct *vma;
2123 struct task_struct *task;
2124 struct mm_struct *mm;
2125 unsigned long nr_files, pos, i;
2126 struct flex_array *fa = NULL;
2127 struct map_files_info info;
2128 struct map_files_info *p;
2129 int ret;
2130
2131 ret = -ENOENT;
2132 task = get_proc_task(file_inode(file));
2133 if (!task)
2134 goto out;
2135
2136 ret = -EACCES;
2137 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2138 goto out_put_task;
2139
2140 ret = 0;
2141 if (!dir_emit_dots(file, ctx))
2142 goto out_put_task;
2143
2144 mm = get_task_mm(task);
2145 if (!mm)
2146 goto out_put_task;
2147 down_read(&mm->mmap_sem);
2148
2149 nr_files = 0;
2150
2151 /*
2152 * We need two passes here:
2153 *
2154 * 1) Collect vmas of mapped files with mmap_sem taken
2155 * 2) Release mmap_sem and instantiate entries
2156 *
2157 * otherwise we get lockdep complained, since filldir()
2158 * routine might require mmap_sem taken in might_fault().
2159 */
2160
2161 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2162 if (vma->vm_file && ++pos > ctx->pos)
2163 nr_files++;
2164 }
2165
2166 if (nr_files) {
2167 fa = flex_array_alloc(sizeof(info), nr_files,
2168 GFP_KERNEL);
2169 if (!fa || flex_array_prealloc(fa, 0, nr_files,
2170 GFP_KERNEL)) {
2171 ret = -ENOMEM;
2172 if (fa)
2173 flex_array_free(fa);
2174 up_read(&mm->mmap_sem);
2175 mmput(mm);
2176 goto out_put_task;
2177 }
2178 for (i = 0, vma = mm->mmap, pos = 2; vma;
2179 vma = vma->vm_next) {
2180 if (!vma->vm_file)
2181 continue;
2182 if (++pos <= ctx->pos)
2183 continue;
2184
2185 info.mode = vma->vm_file->f_mode;
2186 info.len = snprintf(info.name,
2187 sizeof(info.name), "%lx-%lx",
2188 vma->vm_start, vma->vm_end);
2189 if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2190 BUG();
2191 }
2192 }
2193 up_read(&mm->mmap_sem);
2194
2195 for (i = 0; i < nr_files; i++) {
2196 p = flex_array_get(fa, i);
2197 if (!proc_fill_cache(file, ctx,
2198 p->name, p->len,
2199 proc_map_files_instantiate,
2200 task,
2201 (void *)(unsigned long)p->mode))
2202 break;
2203 ctx->pos++;
2204 }
2205 if (fa)
2206 flex_array_free(fa);
2207 mmput(mm);
2208
2209 out_put_task:
2210 put_task_struct(task);
2211 out:
2212 return ret;
2213 }
2214
2215 static const struct file_operations proc_map_files_operations = {
2216 .read = generic_read_dir,
2217 .iterate_shared = proc_map_files_readdir,
2218 .llseek = generic_file_llseek,
2219 };
2220
2221 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
2222 struct timers_private {
2223 struct pid *pid;
2224 struct task_struct *task;
2225 struct sighand_struct *sighand;
2226 struct pid_namespace *ns;
2227 unsigned long flags;
2228 };
2229
2230 static void *timers_start(struct seq_file *m, loff_t *pos)
2231 {
2232 struct timers_private *tp = m->private;
2233
2234 tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
2235 if (!tp->task)
2236 return ERR_PTR(-ESRCH);
2237
2238 tp->sighand = lock_task_sighand(tp->task, &tp->flags);
2239 if (!tp->sighand)
2240 return ERR_PTR(-ESRCH);
2241
2242 return seq_list_start(&tp->task->signal->posix_timers, *pos);
2243 }
2244
2245 static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
2246 {
2247 struct timers_private *tp = m->private;
2248 return seq_list_next(v, &tp->task->signal->posix_timers, pos);
2249 }
2250
2251 static void timers_stop(struct seq_file *m, void *v)
2252 {
2253 struct timers_private *tp = m->private;
2254
2255 if (tp->sighand) {
2256 unlock_task_sighand(tp->task, &tp->flags);
2257 tp->sighand = NULL;
2258 }
2259
2260 if (tp->task) {
2261 put_task_struct(tp->task);
2262 tp->task = NULL;
2263 }
2264 }
2265
2266 static int show_timer(struct seq_file *m, void *v)
2267 {
2268 struct k_itimer *timer;
2269 struct timers_private *tp = m->private;
2270 int notify;
2271 static const char * const nstr[] = {
2272 [SIGEV_SIGNAL] = "signal",
2273 [SIGEV_NONE] = "none",
2274 [SIGEV_THREAD] = "thread",
2275 };
2276
2277 timer = list_entry((struct list_head *)v, struct k_itimer, list);
2278 notify = timer->it_sigev_notify;
2279
2280 seq_printf(m, "ID: %d\n", timer->it_id);
2281 seq_printf(m, "signal: %d/%p\n",
2282 timer->sigq->info.si_signo,
2283 timer->sigq->info.si_value.sival_ptr);
2284 seq_printf(m, "notify: %s/%s.%d\n",
2285 nstr[notify & ~SIGEV_THREAD_ID],
2286 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2287 pid_nr_ns(timer->it_pid, tp->ns));
2288 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2289
2290 return 0;
2291 }
2292
2293 static const struct seq_operations proc_timers_seq_ops = {
2294 .start = timers_start,
2295 .next = timers_next,
2296 .stop = timers_stop,
2297 .show = show_timer,
2298 };
2299
2300 static int proc_timers_open(struct inode *inode, struct file *file)
2301 {
2302 struct timers_private *tp;
2303
2304 tp = __seq_open_private(file, &proc_timers_seq_ops,
2305 sizeof(struct timers_private));
2306 if (!tp)
2307 return -ENOMEM;
2308
2309 tp->pid = proc_pid(inode);
2310 tp->ns = inode->i_sb->s_fs_info;
2311 return 0;
2312 }
2313
2314 static const struct file_operations proc_timers_operations = {
2315 .open = proc_timers_open,
2316 .read = seq_read,
2317 .llseek = seq_lseek,
2318 .release = seq_release_private,
2319 };
2320 #endif
2321
2322 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
2323 size_t count, loff_t *offset)
2324 {
2325 struct inode *inode = file_inode(file);
2326 struct task_struct *p;
2327 u64 slack_ns;
2328 int err;
2329
2330 err = kstrtoull_from_user(buf, count, 10, &slack_ns);
2331 if (err < 0)
2332 return err;
2333
2334 p = get_proc_task(inode);
2335 if (!p)
2336 return -ESRCH;
2337
2338 if (p != current) {
2339 if (!capable(CAP_SYS_NICE)) {
2340 count = -EPERM;
2341 goto out;
2342 }
2343
2344 err = security_task_setscheduler(p);
2345 if (err) {
2346 count = err;
2347 goto out;
2348 }
2349 }
2350
2351 task_lock(p);
2352 if (slack_ns == 0)
2353 p->timer_slack_ns = p->default_timer_slack_ns;
2354 else
2355 p->timer_slack_ns = slack_ns;
2356 task_unlock(p);
2357
2358 out:
2359 put_task_struct(p);
2360
2361 return count;
2362 }
2363
2364 static int timerslack_ns_show(struct seq_file *m, void *v)
2365 {
2366 struct inode *inode = m->private;
2367 struct task_struct *p;
2368 int err = 0;
2369
2370 p = get_proc_task(inode);
2371 if (!p)
2372 return -ESRCH;
2373
2374 if (p != current) {
2375
2376 if (!capable(CAP_SYS_NICE)) {
2377 err = -EPERM;
2378 goto out;
2379 }
2380 err = security_task_getscheduler(p);
2381 if (err)
2382 goto out;
2383 }
2384
2385 task_lock(p);
2386 seq_printf(m, "%llu\n", p->timer_slack_ns);
2387 task_unlock(p);
2388
2389 out:
2390 put_task_struct(p);
2391
2392 return err;
2393 }
2394
2395 static int timerslack_ns_open(struct inode *inode, struct file *filp)
2396 {
2397 return single_open(filp, timerslack_ns_show, inode);
2398 }
2399
2400 static const struct file_operations proc_pid_set_timerslack_ns_operations = {
2401 .open = timerslack_ns_open,
2402 .read = seq_read,
2403 .write = timerslack_ns_write,
2404 .llseek = seq_lseek,
2405 .release = single_release,
2406 };
2407
2408 static int proc_pident_instantiate(struct inode *dir,
2409 struct dentry *dentry, struct task_struct *task, const void *ptr)
2410 {
2411 const struct pid_entry *p = ptr;
2412 struct inode *inode;
2413 struct proc_inode *ei;
2414
2415 inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
2416 if (!inode)
2417 goto out;
2418
2419 ei = PROC_I(inode);
2420 if (S_ISDIR(inode->i_mode))
2421 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2422 if (p->iop)
2423 inode->i_op = p->iop;
2424 if (p->fop)
2425 inode->i_fop = p->fop;
2426 ei->op = p->op;
2427 d_set_d_op(dentry, &pid_dentry_operations);
2428 d_add(dentry, inode);
2429 /* Close the race of the process dying before we return the dentry */
2430 if (pid_revalidate(dentry, 0))
2431 return 0;
2432 out:
2433 return -ENOENT;
2434 }
2435
2436 static struct dentry *proc_pident_lookup(struct inode *dir,
2437 struct dentry *dentry,
2438 const struct pid_entry *ents,
2439 unsigned int nents)
2440 {
2441 int error;
2442 struct task_struct *task = get_proc_task(dir);
2443 const struct pid_entry *p, *last;
2444
2445 error = -ENOENT;
2446
2447 if (!task)
2448 goto out_no_task;
2449
2450 /*
2451 * Yes, it does not scale. And it should not. Don't add
2452 * new entries into /proc/<tgid>/ without very good reasons.
2453 */
2454 last = &ents[nents];
2455 for (p = ents; p < last; p++) {
2456 if (p->len != dentry->d_name.len)
2457 continue;
2458 if (!memcmp(dentry->d_name.name, p->name, p->len))
2459 break;
2460 }
2461 if (p >= last)
2462 goto out;
2463
2464 error = proc_pident_instantiate(dir, dentry, task, p);
2465 out:
2466 put_task_struct(task);
2467 out_no_task:
2468 return ERR_PTR(error);
2469 }
2470
2471 static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
2472 const struct pid_entry *ents, unsigned int nents)
2473 {
2474 struct task_struct *task = get_proc_task(file_inode(file));
2475 const struct pid_entry *p;
2476
2477 if (!task)
2478 return -ENOENT;
2479
2480 if (!dir_emit_dots(file, ctx))
2481 goto out;
2482
2483 if (ctx->pos >= nents + 2)
2484 goto out;
2485
2486 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
2487 if (!proc_fill_cache(file, ctx, p->name, p->len,
2488 proc_pident_instantiate, task, p))
2489 break;
2490 ctx->pos++;
2491 }
2492 out:
2493 put_task_struct(task);
2494 return 0;
2495 }
2496
2497 #ifdef CONFIG_SECURITY
2498 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2499 size_t count, loff_t *ppos)
2500 {
2501 struct inode * inode = file_inode(file);
2502 char *p = NULL;
2503 ssize_t length;
2504 struct task_struct *task = get_proc_task(inode);
2505
2506 if (!task)
2507 return -ESRCH;
2508
2509 length = security_getprocattr(task, PROC_I(inode)->op.lsm,
2510 (char*)file->f_path.dentry->d_name.name,
2511 &p);
2512 put_task_struct(task);
2513 if (length > 0)
2514 length = simple_read_from_buffer(buf, count, ppos, p, length);
2515 kfree(p);
2516 return length;
2517 }
2518
2519 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2520 size_t count, loff_t *ppos)
2521 {
2522 struct inode * inode = file_inode(file);
2523 void *page;
2524 ssize_t length;
2525 struct task_struct *task = get_proc_task(inode);
2526
2527 length = -ESRCH;
2528 if (!task)
2529 goto out_no_task;
2530
2531 /* A task may only write its own attributes. */
2532 length = -EACCES;
2533 if (current != task)
2534 goto out;
2535
2536 if (count > PAGE_SIZE)
2537 count = PAGE_SIZE;
2538
2539 /* No partial writes. */
2540 length = -EINVAL;
2541 if (*ppos != 0)
2542 goto out;
2543
2544 page = memdup_user(buf, count);
2545 if (IS_ERR(page)) {
2546 length = PTR_ERR(page);
2547 goto out;
2548 }
2549
2550 /* Guard against adverse ptrace interaction */
2551 length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
2552 if (length < 0)
2553 goto out_free;
2554
2555 length = security_setprocattr(PROC_I(inode)->op.lsm,
2556 file->f_path.dentry->d_name.name,
2557 page, count);
2558 mutex_unlock(&current->signal->cred_guard_mutex);
2559 out_free:
2560 kfree(page);
2561 out:
2562 put_task_struct(task);
2563 out_no_task:
2564 return length;
2565 }
2566
2567 static const struct file_operations proc_pid_attr_operations = {
2568 .read = proc_pid_attr_read,
2569 .write = proc_pid_attr_write,
2570 .llseek = generic_file_llseek,
2571 };
2572
2573 #define LSM_DIR_OPS(LSM) \
2574 static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2575 struct dir_context *ctx) \
2576 { \
2577 return proc_pident_readdir(filp, ctx, \
2578 LSM##_attr_dir_stuff, \
2579 ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2580 } \
2581 \
2582 static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2583 .read = generic_read_dir, \
2584 .iterate = proc_##LSM##_attr_dir_iterate, \
2585 .llseek = default_llseek, \
2586 }; \
2587 \
2588 static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
2589 struct dentry *dentry, unsigned int flags) \
2590 { \
2591 return proc_pident_lookup(dir, dentry, \
2592 LSM##_attr_dir_stuff, \
2593 ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2594 } \
2595 \
2596 static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2597 .lookup = proc_##LSM##_attr_dir_lookup, \
2598 .getattr = pid_getattr, \
2599 .setattr = proc_setattr, \
2600 }
2601
2602 #ifdef CONFIG_SECURITY_SELINUX
2603 static const struct pid_entry selinux_attr_dir_stuff[] = {
2604 ATTR("selinux", "current", 0666),
2605 ATTR("selinux", "prev", 0444),
2606 ATTR("selinux", "exec", 0666),
2607 ATTR("selinux", "fscreate", 0666),
2608 ATTR("selinux", "keycreate", 0666),
2609 ATTR("selinux", "sockcreate", 0666),
2610 ATTR("selinux", "context", 0666),
2611 };
2612 LSM_DIR_OPS(selinux);
2613 #endif
2614
2615 #ifdef CONFIG_SECURITY_SMACK
2616 static const struct pid_entry smack_attr_dir_stuff[] = {
2617 ATTR("smack", "current", 0666),
2618 ATTR("smack", "context", 0666),
2619 };
2620 LSM_DIR_OPS(smack);
2621 #endif
2622
2623 #ifdef CONFIG_SECURITY_APPARMOR
2624 static const struct pid_entry apparmor_attr_dir_stuff[] = {
2625 ATTR("apparmor", "current", 0666),
2626 ATTR("apparmor", "prev", 0444),
2627 ATTR("apparmor", "exec", 0666),
2628 ATTR("apparmor", "context", 0666),
2629 };
2630 LSM_DIR_OPS(apparmor);
2631 #endif
2632
2633 static const struct pid_entry attr_dir_stuff[] = {
2634 ATTR(NULL, "current", 0666),
2635 ATTR(NULL, "prev", 0444),
2636 ATTR(NULL, "exec", 0666),
2637 ATTR(NULL, "fscreate", 0666),
2638 ATTR(NULL, "keycreate", 0666),
2639 ATTR(NULL, "sockcreate", 0666),
2640 ATTR(NULL, "context", 0666),
2641 ATTR(NULL, "display_lsm", 0666),
2642
2643 #ifdef CONFIG_SECURITY_SELINUX
2644 DIR("selinux", 0555,
2645 proc_selinux_attr_dir_inode_ops, proc_selinux_attr_dir_ops),
2646 #endif
2647 #ifdef CONFIG_SECURITY_SMACK
2648 DIR("smack", 0555,
2649 proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
2650 #endif
2651 #ifdef CONFIG_SECURITY_APPARMOR
2652 DIR("apparmor", 0555,
2653 proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
2654 #endif
2655 };
2656
2657 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
2658 {
2659 return proc_pident_readdir(file, ctx,
2660 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2661 }
2662
2663 static const struct file_operations proc_attr_dir_operations = {
2664 .read = generic_read_dir,
2665 .iterate_shared = proc_attr_dir_readdir,
2666 .llseek = generic_file_llseek,
2667 };
2668
2669 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2670 struct dentry *dentry, unsigned int flags)
2671 {
2672 return proc_pident_lookup(dir, dentry,
2673 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2674 }
2675
2676 static const struct inode_operations proc_attr_dir_inode_operations = {
2677 .lookup = proc_attr_dir_lookup,
2678 .getattr = pid_getattr,
2679 .setattr = proc_setattr,
2680 };
2681
2682 #endif
2683
2684 #ifdef CONFIG_ELF_CORE
2685 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2686 size_t count, loff_t *ppos)
2687 {
2688 struct task_struct *task = get_proc_task(file_inode(file));
2689 struct mm_struct *mm;
2690 char buffer[PROC_NUMBUF];
2691 size_t len;
2692 int ret;
2693
2694 if (!task)
2695 return -ESRCH;
2696
2697 ret = 0;
2698 mm = get_task_mm(task);
2699 if (mm) {
2700 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2701 ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2702 MMF_DUMP_FILTER_SHIFT));
2703 mmput(mm);
2704 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2705 }
2706
2707 put_task_struct(task);
2708
2709 return ret;
2710 }
2711
2712 static ssize_t proc_coredump_filter_write(struct file *file,
2713 const char __user *buf,
2714 size_t count,
2715 loff_t *ppos)
2716 {
2717 struct task_struct *task;
2718 struct mm_struct *mm;
2719 unsigned int val;
2720 int ret;
2721 int i;
2722 unsigned long mask;
2723
2724 ret = kstrtouint_from_user(buf, count, 0, &val);
2725 if (ret < 0)
2726 return ret;
2727
2728 ret = -ESRCH;
2729 task = get_proc_task(file_inode(file));
2730 if (!task)
2731 goto out_no_task;
2732
2733 mm = get_task_mm(task);
2734 if (!mm)
2735 goto out_no_mm;
2736 ret = 0;
2737
2738 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2739 if (val & mask)
2740 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2741 else
2742 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2743 }
2744
2745 mmput(mm);
2746 out_no_mm:
2747 put_task_struct(task);
2748 out_no_task:
2749 if (ret < 0)
2750 return ret;
2751 return count;
2752 }
2753
2754 static const struct file_operations proc_coredump_filter_operations = {
2755 .read = proc_coredump_filter_read,
2756 .write = proc_coredump_filter_write,
2757 .llseek = generic_file_llseek,
2758 };
2759 #endif
2760
2761 #ifdef CONFIG_TASK_IO_ACCOUNTING
2762 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
2763 {
2764 struct task_io_accounting acct = task->ioac;
2765 unsigned long flags;
2766 int result;
2767
2768 result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2769 if (result)
2770 return result;
2771
2772 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
2773 result = -EACCES;
2774 goto out_unlock;
2775 }
2776
2777 if (whole && lock_task_sighand(task, &flags)) {
2778 struct task_struct *t = task;
2779
2780 task_io_accounting_add(&acct, &task->signal->ioac);
2781 while_each_thread(task, t)
2782 task_io_accounting_add(&acct, &t->ioac);
2783
2784 unlock_task_sighand(task, &flags);
2785 }
2786 seq_printf(m,
2787 "rchar: %llu\n"
2788 "wchar: %llu\n"
2789 "syscr: %llu\n"
2790 "syscw: %llu\n"
2791 "read_bytes: %llu\n"
2792 "write_bytes: %llu\n"
2793 "cancelled_write_bytes: %llu\n",
2794 (unsigned long long)acct.rchar,
2795 (unsigned long long)acct.wchar,
2796 (unsigned long long)acct.syscr,
2797 (unsigned long long)acct.syscw,
2798 (unsigned long long)acct.read_bytes,
2799 (unsigned long long)acct.write_bytes,
2800 (unsigned long long)acct.cancelled_write_bytes);
2801 result = 0;
2802
2803 out_unlock:
2804 mutex_unlock(&task->signal->cred_guard_mutex);
2805 return result;
2806 }
2807
2808 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2809 struct pid *pid, struct task_struct *task)
2810 {
2811 return do_io_accounting(task, m, 0);
2812 }
2813
2814 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2815 struct pid *pid, struct task_struct *task)
2816 {
2817 return do_io_accounting(task, m, 1);
2818 }
2819 #endif /* CONFIG_TASK_IO_ACCOUNTING */
2820
2821 #ifdef CONFIG_USER_NS
2822 static int proc_id_map_open(struct inode *inode, struct file *file,
2823 const struct seq_operations *seq_ops)
2824 {
2825 struct user_namespace *ns = NULL;
2826 struct task_struct *task;
2827 struct seq_file *seq;
2828 int ret = -EINVAL;
2829
2830 task = get_proc_task(inode);
2831 if (task) {
2832 rcu_read_lock();
2833 ns = get_user_ns(task_cred_xxx(task, user_ns));
2834 rcu_read_unlock();
2835 put_task_struct(task);
2836 }
2837 if (!ns)
2838 goto err;
2839
2840 ret = seq_open(file, seq_ops);
2841 if (ret)
2842 goto err_put_ns;
2843
2844 seq = file->private_data;
2845 seq->private = ns;
2846
2847 return 0;
2848 err_put_ns:
2849 put_user_ns(ns);
2850 err:
2851 return ret;
2852 }
2853
2854 static int proc_id_map_release(struct inode *inode, struct file *file)
2855 {
2856 struct seq_file *seq = file->private_data;
2857 struct user_namespace *ns = seq->private;
2858 put_user_ns(ns);
2859 return seq_release(inode, file);
2860 }
2861
2862 static int proc_uid_map_open(struct inode *inode, struct file *file)
2863 {
2864 return proc_id_map_open(inode, file, &proc_uid_seq_operations);
2865 }
2866
2867 static int proc_gid_map_open(struct inode *inode, struct file *file)
2868 {
2869 return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2870 }
2871
2872 static int proc_projid_map_open(struct inode *inode, struct file *file)
2873 {
2874 return proc_id_map_open(inode, file, &proc_projid_seq_operations);
2875 }
2876
2877 static const struct file_operations proc_uid_map_operations = {
2878 .open = proc_uid_map_open,
2879 .write = proc_uid_map_write,
2880 .read = seq_read,
2881 .llseek = seq_lseek,
2882 .release = proc_id_map_release,
2883 };
2884
2885 static const struct file_operations proc_gid_map_operations = {
2886 .open = proc_gid_map_open,
2887 .write = proc_gid_map_write,
2888 .read = seq_read,
2889 .llseek = seq_lseek,
2890 .release = proc_id_map_release,
2891 };
2892
2893 static const struct file_operations proc_projid_map_operations = {
2894 .open = proc_projid_map_open,
2895 .write = proc_projid_map_write,
2896 .read = seq_read,
2897 .llseek = seq_lseek,
2898 .release = proc_id_map_release,
2899 };
2900
2901 static int proc_setgroups_open(struct inode *inode, struct file *file)
2902 {
2903 struct user_namespace *ns = NULL;
2904 struct task_struct *task;
2905 int ret;
2906
2907 ret = -ESRCH;
2908 task = get_proc_task(inode);
2909 if (task) {
2910 rcu_read_lock();
2911 ns = get_user_ns(task_cred_xxx(task, user_ns));
2912 rcu_read_unlock();
2913 put_task_struct(task);
2914 }
2915 if (!ns)
2916 goto err;
2917
2918 if (file->f_mode & FMODE_WRITE) {
2919 ret = -EACCES;
2920 if (!ns_capable(ns, CAP_SYS_ADMIN))
2921 goto err_put_ns;
2922 }
2923
2924 ret = single_open(file, &proc_setgroups_show, ns);
2925 if (ret)
2926 goto err_put_ns;
2927
2928 return 0;
2929 err_put_ns:
2930 put_user_ns(ns);
2931 err:
2932 return ret;
2933 }
2934
2935 static int proc_setgroups_release(struct inode *inode, struct file *file)
2936 {
2937 struct seq_file *seq = file->private_data;
2938 struct user_namespace *ns = seq->private;
2939 int ret = single_release(inode, file);
2940 put_user_ns(ns);
2941 return ret;
2942 }
2943
2944 static const struct file_operations proc_setgroups_operations = {
2945 .open = proc_setgroups_open,
2946 .write = proc_setgroups_write,
2947 .read = seq_read,
2948 .llseek = seq_lseek,
2949 .release = proc_setgroups_release,
2950 };
2951 #endif /* CONFIG_USER_NS */
2952
2953 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2954 struct pid *pid, struct task_struct *task)
2955 {
2956 int err = lock_trace(task);
2957 if (!err) {
2958 seq_printf(m, "%08x\n", task->personality);
2959 unlock_trace(task);
2960 }
2961 return err;
2962 }
2963
2964 #ifdef CONFIG_LIVEPATCH
2965 static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
2966 struct pid *pid, struct task_struct *task)
2967 {
2968 seq_printf(m, "%d\n", task->patch_state);
2969 return 0;
2970 }
2971 #endif /* CONFIG_LIVEPATCH */
2972
2973 /*
2974 * Thread groups
2975 */
2976 static const struct file_operations proc_task_operations;
2977 static const struct inode_operations proc_task_inode_operations;
2978
2979 static const struct pid_entry tgid_base_stuff[] = {
2980 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2981 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2982 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
2983 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2984 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2985 #ifdef CONFIG_NET
2986 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2987 #endif
2988 REG("environ", S_IRUSR, proc_environ_operations),
2989 REG("auxv", S_IRUSR, proc_auxv_operations),
2990 ONE("status", S_IRUGO, proc_pid_status),
2991 ONE("personality", S_IRUSR, proc_pid_personality),
2992 ONE("limits", S_IRUGO, proc_pid_limits),
2993 #ifdef CONFIG_SCHED_DEBUG
2994 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2995 #endif
2996 #ifdef CONFIG_SCHED_AUTOGROUP
2997 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2998 #endif
2999 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3000 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3001 ONE("syscall", S_IRUSR, proc_pid_syscall),
3002 #endif
3003 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
3004 ONE("stat", S_IRUGO, proc_tgid_stat),
3005 ONE("statm", S_IRUGO, proc_pid_statm),
3006 REG("maps", S_IRUGO, proc_pid_maps_operations),
3007 #ifdef CONFIG_NUMA
3008 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
3009 #endif
3010 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3011 LNK("cwd", proc_cwd_link),
3012 LNK("root", proc_root_link),
3013 LNK("exe", proc_exe_link),
3014 REG("mounts", S_IRUGO, proc_mounts_operations),
3015 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3016 REG("mountstats", S_IRUSR, proc_mountstats_operations),
3017 #ifdef CONFIG_PROC_PAGE_MONITOR
3018 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3019 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3020 REG("pagemap", S_IRUSR, proc_pagemap_operations),
3021 #endif
3022 #ifdef CONFIG_SECURITY
3023 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3024 #endif
3025 #ifdef CONFIG_KALLSYMS
3026 ONE("wchan", S_IRUGO, proc_pid_wchan),
3027 #endif
3028 #ifdef CONFIG_STACKTRACE
3029 ONE("stack", S_IRUSR, proc_pid_stack),
3030 #endif
3031 #ifdef CONFIG_SCHED_INFO
3032 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3033 #endif
3034 #ifdef CONFIG_LATENCYTOP
3035 REG("latency", S_IRUGO, proc_lstats_operations),
3036 #endif
3037 #ifdef CONFIG_PROC_PID_CPUSET
3038 ONE("cpuset", S_IRUGO, proc_cpuset_show),
3039 #endif
3040 #ifdef CONFIG_CGROUPS
3041 ONE("cgroup", S_IRUGO, proc_cgroup_show),
3042 #endif
3043 ONE("oom_score", S_IRUGO, proc_oom_score),
3044 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
3045 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3046 #ifdef CONFIG_AUDITSYSCALL
3047 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3048 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3049 #endif
3050 #ifdef CONFIG_FAULT_INJECTION
3051 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3052 REG("fail-nth", 0644, proc_fail_nth_operations),
3053 #endif
3054 #ifdef CONFIG_ELF_CORE
3055 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
3056 #endif
3057 #ifdef CONFIG_TASK_IO_ACCOUNTING
3058 ONE("io", S_IRUSR, proc_tgid_io_accounting),
3059 #endif
3060 #ifdef CONFIG_HARDWALL
3061 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
3062 #endif
3063 #ifdef CONFIG_USER_NS
3064 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3065 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3066 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3067 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
3068 #endif
3069 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
3070 REG("timers", S_IRUGO, proc_timers_operations),
3071 #endif
3072 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
3073 #ifdef CONFIG_LIVEPATCH
3074 ONE("patch_state", S_IRUSR, proc_pid_patch_state),
3075 #endif
3076 };
3077
3078 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
3079 {
3080 return proc_pident_readdir(file, ctx,
3081 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3082 }
3083
3084 static const struct file_operations proc_tgid_base_operations = {
3085 .read = generic_read_dir,
3086 .iterate_shared = proc_tgid_base_readdir,
3087 .llseek = generic_file_llseek,
3088 };
3089
3090 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3091 {
3092 return proc_pident_lookup(dir, dentry,
3093 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3094 }
3095
3096 static const struct inode_operations proc_tgid_base_inode_operations = {
3097 .lookup = proc_tgid_base_lookup,
3098 .getattr = pid_getattr,
3099 .setattr = proc_setattr,
3100 .permission = proc_pid_permission,
3101 };
3102
3103 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
3104 {
3105 struct dentry *dentry, *leader, *dir;
3106 char buf[PROC_NUMBUF];
3107 struct qstr name;
3108
3109 name.name = buf;
3110 name.len = snprintf(buf, sizeof(buf), "%d", pid);
3111 /* no ->d_hash() rejects on procfs */
3112 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
3113 if (dentry) {
3114 d_invalidate(dentry);
3115 dput(dentry);
3116 }
3117
3118 if (pid == tgid)
3119 return;
3120
3121 name.name = buf;
3122 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
3123 leader = d_hash_and_lookup(mnt->mnt_root, &name);
3124 if (!leader)
3125 goto out;
3126
3127 name.name = "task";
3128 name.len = strlen(name.name);
3129 dir = d_hash_and_lookup(leader, &name);
3130 if (!dir)
3131 goto out_put_leader;
3132
3133 name.name = buf;
3134 name.len = snprintf(buf, sizeof(buf), "%d", pid);
3135 dentry = d_hash_and_lookup(dir, &name);
3136 if (dentry) {
3137 d_invalidate(dentry);
3138 dput(dentry);
3139 }
3140
3141 dput(dir);
3142 out_put_leader:
3143 dput(leader);
3144 out:
3145 return;
3146 }
3147
3148 /**
3149 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
3150 * @task: task that should be flushed.
3151 *
3152 * When flushing dentries from proc, one needs to flush them from global
3153 * proc (proc_mnt) and from all the namespaces' procs this task was seen
3154 * in. This call is supposed to do all of this job.
3155 *
3156 * Looks in the dcache for
3157 * /proc/@pid
3158 * /proc/@tgid/task/@pid
3159 * if either directory is present flushes it and all of it'ts children
3160 * from the dcache.
3161 *
3162 * It is safe and reasonable to cache /proc entries for a task until
3163 * that task exits. After that they just clog up the dcache with
3164 * useless entries, possibly causing useful dcache entries to be
3165 * flushed instead. This routine is proved to flush those useless
3166 * dcache entries at process exit time.
3167 *
3168 * NOTE: This routine is just an optimization so it does not guarantee
3169 * that no dcache entries will exist at process exit time it
3170 * just makes it very unlikely that any will persist.
3171 */
3172
3173 void proc_flush_task(struct task_struct *task)
3174 {
3175 int i;
3176 struct pid *pid, *tgid;
3177 struct upid *upid;
3178
3179 pid = task_pid(task);
3180 tgid = task_tgid(task);
3181
3182 for (i = 0; i <= pid->level; i++) {
3183 upid = &pid->numbers[i];
3184 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
3185 tgid->numbers[i].nr);
3186 }
3187 }
3188
3189 static int proc_pid_instantiate(struct inode *dir,
3190 struct dentry * dentry,
3191 struct task_struct *task, const void *ptr)
3192 {
3193 struct inode *inode;
3194
3195 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3196 if (!inode)
3197 goto out;
3198
3199 inode->i_op = &proc_tgid_base_inode_operations;
3200 inode->i_fop = &proc_tgid_base_operations;
3201 inode->i_flags|=S_IMMUTABLE;
3202
3203 set_nlink(inode, nlink_tgid);
3204
3205 d_set_d_op(dentry, &pid_dentry_operations);
3206
3207 d_add(dentry, inode);
3208 /* Close the race of the process dying before we return the dentry */
3209 if (pid_revalidate(dentry, 0))
3210 return 0;
3211 out:
3212 return -ENOENT;
3213 }
3214
3215 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3216 {
3217 int result = -ENOENT;
3218 struct task_struct *task;
3219 unsigned tgid;
3220 struct pid_namespace *ns;
3221
3222 tgid = name_to_int(&dentry->d_name);
3223 if (tgid == ~0U)
3224 goto out;
3225
3226 ns = dentry->d_sb->s_fs_info;
3227 rcu_read_lock();
3228 task = find_task_by_pid_ns(tgid, ns);
3229 if (task)
3230 get_task_struct(task);
3231 rcu_read_unlock();
3232 if (!task)
3233 goto out;
3234
3235 result = proc_pid_instantiate(dir, dentry, task, NULL);
3236 put_task_struct(task);
3237 out:
3238 return ERR_PTR(result);
3239 }
3240
3241 /*
3242 * Find the first task with tgid >= tgid
3243 *
3244 */
3245 struct tgid_iter {
3246 unsigned int tgid;
3247 struct task_struct *task;
3248 };
3249 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
3250 {
3251 struct pid *pid;
3252
3253 if (iter.task)
3254 put_task_struct(iter.task);
3255 rcu_read_lock();
3256 retry:
3257 iter.task = NULL;
3258 pid = find_ge_pid(iter.tgid, ns);
3259 if (pid) {
3260 iter.tgid = pid_nr_ns(pid, ns);
3261 iter.task = pid_task(pid, PIDTYPE_PID);
3262 /* What we to know is if the pid we have find is the
3263 * pid of a thread_group_leader. Testing for task
3264 * being a thread_group_leader is the obvious thing
3265 * todo but there is a window when it fails, due to
3266 * the pid transfer logic in de_thread.
3267 *
3268 * So we perform the straight forward test of seeing
3269 * if the pid we have found is the pid of a thread
3270 * group leader, and don't worry if the task we have
3271 * found doesn't happen to be a thread group leader.
3272 * As we don't care in the case of readdir.
3273 */
3274 if (!iter.task || !has_group_leader_pid(iter.task)) {
3275 iter.tgid += 1;
3276 goto retry;
3277 }
3278 get_task_struct(iter.task);
3279 }
3280 rcu_read_unlock();
3281 return iter;
3282 }
3283
3284 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
3285
3286 /* for the /proc/ directory itself, after non-process stuff has been done */
3287 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
3288 {
3289 struct tgid_iter iter;
3290 struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
3291 loff_t pos = ctx->pos;
3292
3293 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
3294 return 0;
3295
3296 if (pos == TGID_OFFSET - 2) {
3297 struct inode *inode = d_inode(ns->proc_self);
3298 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
3299 return 0;
3300 ctx->pos = pos = pos + 1;
3301 }
3302 if (pos == TGID_OFFSET - 1) {
3303 struct inode *inode = d_inode(ns->proc_thread_self);
3304 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
3305 return 0;
3306 ctx->pos = pos = pos + 1;
3307 }
3308 iter.tgid = pos - TGID_OFFSET;
3309 iter.task = NULL;
3310 for (iter = next_tgid(ns, iter);
3311 iter.task;
3312 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3313 char name[PROC_NUMBUF];
3314 int len;
3315
3316 cond_resched();
3317 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
3318 continue;
3319
3320 len = snprintf(name, sizeof(name), "%d", iter.tgid);
3321 ctx->pos = iter.tgid + TGID_OFFSET;
3322 if (!proc_fill_cache(file, ctx, name, len,
3323 proc_pid_instantiate, iter.task, NULL)) {
3324 put_task_struct(iter.task);
3325 return 0;
3326 }
3327 }
3328 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
3329 return 0;
3330 }
3331
3332 /*
3333 * proc_tid_comm_permission is a special permission function exclusively
3334 * used for the node /proc/<pid>/task/<tid>/comm.
3335 * It bypasses generic permission checks in the case where a task of the same
3336 * task group attempts to access the node.
3337 * The rationale behind this is that glibc and bionic access this node for
3338 * cross thread naming (pthread_set/getname_np(!self)). However, if
3339 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
3340 * which locks out the cross thread naming implementation.
3341 * This function makes sure that the node is always accessible for members of
3342 * same thread group.
3343 */
3344 static int proc_tid_comm_permission(struct inode *inode, int mask)
3345 {
3346 bool is_same_tgroup;
3347 struct task_struct *task;
3348
3349 task = get_proc_task(inode);
3350 if (!task)
3351 return -ESRCH;
3352 is_same_tgroup = same_thread_group(current, task);
3353 put_task_struct(task);
3354
3355 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
3356 /* This file (/proc/<pid>/task/<tid>/comm) can always be
3357 * read or written by the members of the corresponding
3358 * thread group.
3359 */
3360 return 0;
3361 }
3362
3363 return generic_permission(inode, mask);
3364 }
3365
3366 static const struct inode_operations proc_tid_comm_inode_operations = {
3367 .permission = proc_tid_comm_permission,
3368 };
3369
3370 /*
3371 * Tasks
3372 */
3373 static const struct pid_entry tid_base_stuff[] = {
3374 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3375 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3376 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3377 #ifdef CONFIG_NET
3378 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
3379 #endif
3380 REG("environ", S_IRUSR, proc_environ_operations),
3381 REG("auxv", S_IRUSR, proc_auxv_operations),
3382 ONE("status", S_IRUGO, proc_pid_status),
3383 ONE("personality", S_IRUSR, proc_pid_personality),
3384 ONE("limits", S_IRUGO, proc_pid_limits),
3385 #ifdef CONFIG_SCHED_DEBUG
3386 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3387 #endif
3388 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR,
3389 &proc_tid_comm_inode_operations,
3390 &proc_pid_set_comm_operations, {}),
3391 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3392 ONE("syscall", S_IRUSR, proc_pid_syscall),
3393 #endif
3394 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
3395 ONE("stat", S_IRUGO, proc_tid_stat),
3396 ONE("statm", S_IRUGO, proc_pid_statm),
3397 REG("maps", S_IRUGO, proc_tid_maps_operations),
3398 #ifdef CONFIG_PROC_CHILDREN
3399 REG("children", S_IRUGO, proc_tid_children_operations),
3400 #endif
3401 #ifdef CONFIG_NUMA
3402 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
3403 #endif
3404 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3405 LNK("cwd", proc_cwd_link),
3406 LNK("root", proc_root_link),
3407 LNK("exe", proc_exe_link),
3408 REG("mounts", S_IRUGO, proc_mounts_operations),
3409 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3410 #ifdef CONFIG_PROC_PAGE_MONITOR
3411 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3412 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
3413 REG("pagemap", S_IRUSR, proc_pagemap_operations),
3414 #endif
3415 #ifdef CONFIG_SECURITY
3416 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3417 #endif
3418 #ifdef CONFIG_KALLSYMS
3419 ONE("wchan", S_IRUGO, proc_pid_wchan),
3420 #endif
3421 #ifdef CONFIG_STACKTRACE
3422 ONE("stack", S_IRUSR, proc_pid_stack),
3423 #endif
3424 #ifdef CONFIG_SCHED_INFO
3425 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3426 #endif
3427 #ifdef CONFIG_LATENCYTOP
3428 REG("latency", S_IRUGO, proc_lstats_operations),
3429 #endif
3430 #ifdef CONFIG_PROC_PID_CPUSET
3431 ONE("cpuset", S_IRUGO, proc_cpuset_show),
3432 #endif
3433 #ifdef CONFIG_CGROUPS
3434 ONE("cgroup", S_IRUGO, proc_cgroup_show),
3435 #endif
3436 ONE("oom_score", S_IRUGO, proc_oom_score),
3437 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
3438 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3439 #ifdef CONFIG_AUDITSYSCALL
3440 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3441 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3442 #endif
3443 #ifdef CONFIG_FAULT_INJECTION
3444 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3445 REG("fail-nth", 0644, proc_fail_nth_operations),
3446 #endif
3447 #ifdef CONFIG_TASK_IO_ACCOUNTING
3448 ONE("io", S_IRUSR, proc_tid_io_accounting),
3449 #endif
3450 #ifdef CONFIG_HARDWALL
3451 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
3452 #endif
3453 #ifdef CONFIG_USER_NS
3454 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3455 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3456 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3457 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
3458 #endif
3459 #ifdef CONFIG_LIVEPATCH
3460 ONE("patch_state", S_IRUSR, proc_pid_patch_state),
3461 #endif
3462 };
3463
3464 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
3465 {
3466 return proc_pident_readdir(file, ctx,
3467 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3468 }
3469
3470 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3471 {
3472 return proc_pident_lookup(dir, dentry,
3473 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3474 }
3475
3476 static const struct file_operations proc_tid_base_operations = {
3477 .read = generic_read_dir,
3478 .iterate_shared = proc_tid_base_readdir,
3479 .llseek = generic_file_llseek,
3480 };
3481
3482 static const struct inode_operations proc_tid_base_inode_operations = {
3483 .lookup = proc_tid_base_lookup,
3484 .getattr = pid_getattr,
3485 .setattr = proc_setattr,
3486 };
3487
3488 static int proc_task_instantiate(struct inode *dir,
3489 struct dentry *dentry, struct task_struct *task, const void *ptr)
3490 {
3491 struct inode *inode;
3492 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3493
3494 if (!inode)
3495 goto out;
3496 inode->i_op = &proc_tid_base_inode_operations;
3497 inode->i_fop = &proc_tid_base_operations;
3498 inode->i_flags|=S_IMMUTABLE;
3499
3500 set_nlink(inode, nlink_tid);
3501
3502 d_set_d_op(dentry, &pid_dentry_operations);
3503
3504 d_add(dentry, inode);
3505 /* Close the race of the process dying before we return the dentry */
3506 if (pid_revalidate(dentry, 0))
3507 return 0;
3508 out:
3509 return -ENOENT;
3510 }
3511
3512 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3513 {
3514 int result = -ENOENT;
3515 struct task_struct *task;
3516 struct task_struct *leader = get_proc_task(dir);
3517 unsigned tid;
3518 struct pid_namespace *ns;
3519
3520 if (!leader)
3521 goto out_no_task;
3522
3523 tid = name_to_int(&dentry->d_name);
3524 if (tid == ~0U)
3525 goto out;
3526
3527 ns = dentry->d_sb->s_fs_info;
3528 rcu_read_lock();
3529 task = find_task_by_pid_ns(tid, ns);
3530 if (task)
3531 get_task_struct(task);
3532 rcu_read_unlock();
3533 if (!task)
3534 goto out;
3535 if (!same_thread_group(leader, task))
3536 goto out_drop_task;
3537
3538 result = proc_task_instantiate(dir, dentry, task, NULL);
3539 out_drop_task:
3540 put_task_struct(task);
3541 out:
3542 put_task_struct(leader);
3543 out_no_task:
3544 return ERR_PTR(result);
3545 }
3546
3547 /*
3548 * Find the first tid of a thread group to return to user space.
3549 *
3550 * Usually this is just the thread group leader, but if the users
3551 * buffer was too small or there was a seek into the middle of the
3552 * directory we have more work todo.
3553 *
3554 * In the case of a short read we start with find_task_by_pid.
3555 *
3556 * In the case of a seek we start with the leader and walk nr
3557 * threads past it.
3558 */
3559 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
3560 struct pid_namespace *ns)
3561 {
3562 struct task_struct *pos, *task;
3563 unsigned long nr = f_pos;
3564
3565 if (nr != f_pos) /* 32bit overflow? */
3566 return NULL;
3567
3568 rcu_read_lock();
3569 task = pid_task(pid, PIDTYPE_PID);
3570 if (!task)
3571 goto fail;
3572
3573 /* Attempt to start with the tid of a thread */
3574 if (tid && nr) {
3575 pos = find_task_by_pid_ns(tid, ns);
3576 if (pos && same_thread_group(pos, task))
3577 goto found;
3578 }
3579
3580 /* If nr exceeds the number of threads there is nothing todo */
3581 if (nr >= get_nr_threads(task))
3582 goto fail;
3583
3584 /* If we haven't found our starting place yet start
3585 * with the leader and walk nr threads forward.
3586 */
3587 pos = task = task->group_leader;
3588 do {
3589 if (!nr--)
3590 goto found;
3591 } while_each_thread(task, pos);
3592 fail:
3593 pos = NULL;
3594 goto out;
3595 found:
3596 get_task_struct(pos);
3597 out:
3598 rcu_read_unlock();
3599 return pos;
3600 }
3601
3602 /*
3603 * Find the next thread in the thread list.
3604 * Return NULL if there is an error or no next thread.
3605 *
3606 * The reference to the input task_struct is released.
3607 */
3608 static struct task_struct *next_tid(struct task_struct *start)
3609 {
3610 struct task_struct *pos = NULL;
3611 rcu_read_lock();
3612 if (pid_alive(start)) {
3613 pos = next_thread(start);
3614 if (thread_group_leader(pos))
3615 pos = NULL;
3616 else
3617 get_task_struct(pos);
3618 }
3619 rcu_read_unlock();
3620 put_task_struct(start);
3621 return pos;
3622 }
3623
3624 /* for the /proc/TGID/task/ directories */
3625 static int proc_task_readdir(struct file *file, struct dir_context *ctx)
3626 {
3627 struct inode *inode = file_inode(file);
3628 struct task_struct *task;
3629 struct pid_namespace *ns;
3630 int tid;
3631
3632 if (proc_inode_is_dead(inode))
3633 return -ENOENT;
3634
3635 if (!dir_emit_dots(file, ctx))
3636 return 0;
3637
3638 /* f_version caches the tgid value that the last readdir call couldn't
3639 * return. lseek aka telldir automagically resets f_version to 0.
3640 */
3641 ns = inode->i_sb->s_fs_info;
3642 tid = (int)file->f_version;
3643 file->f_version = 0;
3644 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
3645 task;
3646 task = next_tid(task), ctx->pos++) {
3647 char name[PROC_NUMBUF];
3648 int len;
3649 tid = task_pid_nr_ns(task, ns);
3650 len = snprintf(name, sizeof(name), "%d", tid);
3651 if (!proc_fill_cache(file, ctx, name, len,
3652 proc_task_instantiate, task, NULL)) {
3653 /* returning this tgid failed, save it as the first
3654 * pid for the next readir call */
3655 file->f_version = (u64)tid;
3656 put_task_struct(task);
3657 break;
3658 }
3659 }
3660
3661 return 0;
3662 }
3663
3664 static int proc_task_getattr(const struct path *path, struct kstat *stat,
3665 u32 request_mask, unsigned int query_flags)
3666 {
3667 struct inode *inode = d_inode(path->dentry);
3668 struct task_struct *p = get_proc_task(inode);
3669 generic_fillattr(inode, stat);
3670
3671 if (p) {
3672 stat->nlink += get_nr_threads(p);
3673 put_task_struct(p);
3674 }
3675
3676 return 0;
3677 }
3678
3679 static const struct inode_operations proc_task_inode_operations = {
3680 .lookup = proc_task_lookup,
3681 .getattr = proc_task_getattr,
3682 .setattr = proc_setattr,
3683 .permission = proc_pid_permission,
3684 };
3685
3686 static const struct file_operations proc_task_operations = {
3687 .read = generic_read_dir,
3688 .iterate_shared = proc_task_readdir,
3689 .llseek = generic_file_llseek,
3690 };
3691
3692 void __init set_proc_pid_nlink(void)
3693 {
3694 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3695 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3696 }