]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - kernel/sysctl.c
x86/bugs: Expose x86_spec_ctrl_base directly
[mirror_ubuntu-artful-kernel.git] / kernel / sysctl.c
1 /*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemcheck.h>
34 #include <linux/kmemleak.h>
35 #include <linux/fs.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/kobject.h>
39 #include <linux/net.h>
40 #include <linux/sysrq.h>
41 #include <linux/highuid.h>
42 #include <linux/writeback.h>
43 #include <linux/ratelimit.h>
44 #include <linux/compaction.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/dnotify.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/kprobes.h>
60 #include <linux/pipe_fs_i.h>
61 #include <linux/oom.h>
62 #include <linux/kmod.h>
63 #include <linux/capability.h>
64 #include <linux/binfmts.h>
65 #include <linux/sched/sysctl.h>
66 #include <linux/sched/coredump.h>
67 #include <linux/kexec.h>
68 #include <linux/bpf.h>
69 #include <linux/mount.h>
70
71 #include <linux/uaccess.h>
72 #include <linux/mutex.h>
73 #include <asm/processor.h>
74
75 #ifdef CONFIG_X86
76 #include <asm/msr.h>
77 #include <asm/nmi.h>
78 #include <asm/stacktrace.h>
79 #include <asm/io.h>
80 #include <asm/spec-ctrl.h>
81 #endif
82 #ifdef CONFIG_SPARC
83 #include <asm/setup.h>
84 #endif
85 #ifdef CONFIG_BSD_PROCESS_ACCT
86 #include <linux/acct.h>
87 #endif
88 #ifdef CONFIG_RT_MUTEXES
89 #include <linux/rtmutex.h>
90 #endif
91 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
92 #include <linux/lockdep.h>
93 #endif
94 #ifdef CONFIG_CHR_DEV_SG
95 #include <scsi/sg.h>
96 #endif
97
98 #ifdef CONFIG_LOCKUP_DETECTOR
99 #include <linux/nmi.h>
100 #endif
101
102 #if defined(CONFIG_SYSCTL)
103
104 /* External variables not in a header file. */
105 extern int suid_dumpable;
106 #ifdef CONFIG_COREDUMP
107 extern int core_uses_pid;
108 extern char core_pattern[];
109 extern unsigned int core_pipe_limit;
110 #endif
111 #ifdef CONFIG_USER_NS
112 extern int unprivileged_userns_clone;
113 #endif
114 extern int pid_max;
115 extern int pid_max_min, pid_max_max;
116 extern int percpu_pagelist_fraction;
117 extern int latencytop_enabled;
118 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
119 #ifndef CONFIG_MMU
120 extern int sysctl_nr_trim_pages;
121 #endif
122
123 /* Constants used for minimum and maximum */
124 #ifdef CONFIG_LOCKUP_DETECTOR
125 static int sixty = 60;
126 #endif
127
128 static int __maybe_unused neg_one = -1;
129
130 static int zero;
131 static int __maybe_unused one = 1;
132 static int __maybe_unused two = 2;
133 static int __maybe_unused four = 4;
134 static unsigned long one_ul = 1;
135 static int one_hundred = 100;
136 static int one_thousand = 1000;
137 #ifdef CONFIG_PRINTK
138 static int ten_thousand = 10000;
139 #endif
140 #ifdef CONFIG_PERF_EVENTS
141 static int six_hundred_forty_kb = 640 * 1024;
142 #endif
143
144 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
145 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
146
147 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
148 static int maxolduid = 65535;
149 static int minolduid;
150
151 static int ngroups_max = NGROUPS_MAX;
152 static const int cap_last_cap = CAP_LAST_CAP;
153
154 /*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
155 #ifdef CONFIG_DETECT_HUNG_TASK
156 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
157 #endif
158
159 #ifdef CONFIG_INOTIFY_USER
160 #include <linux/inotify.h>
161 #endif
162 #ifdef CONFIG_SPARC
163 #endif
164
165 #ifdef __hppa__
166 extern int pwrsw_enabled;
167 #endif
168
169 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
170 extern int unaligned_enabled;
171 #endif
172
173 #ifdef CONFIG_IA64
174 extern int unaligned_dump_stack;
175 #endif
176
177 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
178 extern int no_unaligned_warning;
179 #endif
180
181 #ifdef CONFIG_PROC_SYSCTL
182
183 /**
184 * enum sysctl_writes_mode - supported sysctl write modes
185 *
186 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
187 * to be written, and multiple writes on the same sysctl file descriptor
188 * will rewrite the sysctl value, regardless of file position. No warning
189 * is issued when the initial position is not 0.
190 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
191 * not 0.
192 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
193 * file position 0 and the value must be fully contained in the buffer
194 * sent to the write syscall. If dealing with strings respect the file
195 * position, but restrict this to the max length of the buffer, anything
196 * passed the max lenght will be ignored. Multiple writes will append
197 * to the buffer.
198 *
199 * These write modes control how current file position affects the behavior of
200 * updating sysctl values through the proc interface on each write.
201 */
202 enum sysctl_writes_mode {
203 SYSCTL_WRITES_LEGACY = -1,
204 SYSCTL_WRITES_WARN = 0,
205 SYSCTL_WRITES_STRICT = 1,
206 };
207
208 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
209
210 static int proc_do_cad_pid(struct ctl_table *table, int write,
211 void __user *buffer, size_t *lenp, loff_t *ppos);
212 static int proc_taint(struct ctl_table *table, int write,
213 void __user *buffer, size_t *lenp, loff_t *ppos);
214 #endif
215
216 #ifdef CONFIG_PRINTK
217 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
218 void __user *buffer, size_t *lenp, loff_t *ppos);
219 #endif
220
221 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
222 void __user *buffer, size_t *lenp, loff_t *ppos);
223 #ifdef CONFIG_COREDUMP
224 static int proc_dostring_coredump(struct ctl_table *table, int write,
225 void __user *buffer, size_t *lenp, loff_t *ppos);
226 #endif
227
228 #ifdef CONFIG_X86
229 int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
230 void __user *buffer, size_t *lenp, loff_t *ppos);
231 int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
232 void __user *buffer, size_t *lenp, loff_t *ppos);
233 #endif
234
235 #ifdef CONFIG_MAGIC_SYSRQ
236 /* Note: sysrq code uses it's own private copy */
237 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
238
239 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
240 void __user *buffer, size_t *lenp,
241 loff_t *ppos)
242 {
243 int error;
244
245 error = proc_dointvec(table, write, buffer, lenp, ppos);
246 if (error)
247 return error;
248
249 if (write)
250 sysrq_toggle_support(__sysrq_enabled);
251
252 return 0;
253 }
254
255 #endif
256
257 static struct ctl_table kern_table[];
258 static struct ctl_table vm_table[];
259 static struct ctl_table fs_table[];
260 static struct ctl_table debug_table[];
261 static struct ctl_table dev_table[];
262 extern struct ctl_table random_table[];
263 #ifdef CONFIG_EPOLL
264 extern struct ctl_table epoll_table[];
265 #endif
266
267 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
268 int sysctl_legacy_va_layout;
269 #endif
270
271 u32 sysctl_ibrs_enabled = 0;
272 EXPORT_SYMBOL(sysctl_ibrs_enabled);
273 u32 sysctl_ibpb_enabled = 0;
274 EXPORT_SYMBOL(sysctl_ibpb_enabled);
275
276 /* The default sysctl tables: */
277
278 static struct ctl_table sysctl_base_table[] = {
279 {
280 .procname = "kernel",
281 .mode = 0555,
282 .child = kern_table,
283 },
284 {
285 .procname = "vm",
286 .mode = 0555,
287 .child = vm_table,
288 },
289 {
290 .procname = "fs",
291 .mode = 0555,
292 .child = fs_table,
293 },
294 {
295 .procname = "debug",
296 .mode = 0555,
297 .child = debug_table,
298 },
299 {
300 .procname = "dev",
301 .mode = 0555,
302 .child = dev_table,
303 },
304 { }
305 };
306
307 #ifdef CONFIG_SCHED_DEBUG
308 static int min_sched_granularity_ns = 100000; /* 100 usecs */
309 static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
310 static int min_wakeup_granularity_ns; /* 0 usecs */
311 static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
312 #ifdef CONFIG_SMP
313 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
314 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
315 #endif /* CONFIG_SMP */
316 #endif /* CONFIG_SCHED_DEBUG */
317
318 #ifdef CONFIG_COMPACTION
319 static int min_extfrag_threshold;
320 static int max_extfrag_threshold = 1000;
321 #endif
322
323 static struct ctl_table kern_table[] = {
324 {
325 .procname = "sched_child_runs_first",
326 .data = &sysctl_sched_child_runs_first,
327 .maxlen = sizeof(unsigned int),
328 .mode = 0644,
329 .proc_handler = proc_dointvec,
330 },
331 #ifdef CONFIG_SCHED_DEBUG
332 {
333 .procname = "sched_min_granularity_ns",
334 .data = &sysctl_sched_min_granularity,
335 .maxlen = sizeof(unsigned int),
336 .mode = 0644,
337 .proc_handler = sched_proc_update_handler,
338 .extra1 = &min_sched_granularity_ns,
339 .extra2 = &max_sched_granularity_ns,
340 },
341 {
342 .procname = "sched_latency_ns",
343 .data = &sysctl_sched_latency,
344 .maxlen = sizeof(unsigned int),
345 .mode = 0644,
346 .proc_handler = sched_proc_update_handler,
347 .extra1 = &min_sched_granularity_ns,
348 .extra2 = &max_sched_granularity_ns,
349 },
350 {
351 .procname = "sched_wakeup_granularity_ns",
352 .data = &sysctl_sched_wakeup_granularity,
353 .maxlen = sizeof(unsigned int),
354 .mode = 0644,
355 .proc_handler = sched_proc_update_handler,
356 .extra1 = &min_wakeup_granularity_ns,
357 .extra2 = &max_wakeup_granularity_ns,
358 },
359 #ifdef CONFIG_SMP
360 {
361 .procname = "sched_tunable_scaling",
362 .data = &sysctl_sched_tunable_scaling,
363 .maxlen = sizeof(enum sched_tunable_scaling),
364 .mode = 0644,
365 .proc_handler = sched_proc_update_handler,
366 .extra1 = &min_sched_tunable_scaling,
367 .extra2 = &max_sched_tunable_scaling,
368 },
369 {
370 .procname = "sched_migration_cost_ns",
371 .data = &sysctl_sched_migration_cost,
372 .maxlen = sizeof(unsigned int),
373 .mode = 0644,
374 .proc_handler = proc_dointvec,
375 },
376 {
377 .procname = "sched_nr_migrate",
378 .data = &sysctl_sched_nr_migrate,
379 .maxlen = sizeof(unsigned int),
380 .mode = 0644,
381 .proc_handler = proc_dointvec,
382 },
383 {
384 .procname = "sched_time_avg_ms",
385 .data = &sysctl_sched_time_avg,
386 .maxlen = sizeof(unsigned int),
387 .mode = 0644,
388 .proc_handler = proc_dointvec_minmax,
389 .extra1 = &one,
390 },
391 #ifdef CONFIG_SCHEDSTATS
392 {
393 .procname = "sched_schedstats",
394 .data = NULL,
395 .maxlen = sizeof(unsigned int),
396 .mode = 0644,
397 .proc_handler = sysctl_schedstats,
398 .extra1 = &zero,
399 .extra2 = &one,
400 },
401 #endif /* CONFIG_SCHEDSTATS */
402 #endif /* CONFIG_SMP */
403 #ifdef CONFIG_NUMA_BALANCING
404 {
405 .procname = "numa_balancing_scan_delay_ms",
406 .data = &sysctl_numa_balancing_scan_delay,
407 .maxlen = sizeof(unsigned int),
408 .mode = 0644,
409 .proc_handler = proc_dointvec,
410 },
411 {
412 .procname = "numa_balancing_scan_period_min_ms",
413 .data = &sysctl_numa_balancing_scan_period_min,
414 .maxlen = sizeof(unsigned int),
415 .mode = 0644,
416 .proc_handler = proc_dointvec,
417 },
418 {
419 .procname = "numa_balancing_scan_period_max_ms",
420 .data = &sysctl_numa_balancing_scan_period_max,
421 .maxlen = sizeof(unsigned int),
422 .mode = 0644,
423 .proc_handler = proc_dointvec,
424 },
425 {
426 .procname = "numa_balancing_scan_size_mb",
427 .data = &sysctl_numa_balancing_scan_size,
428 .maxlen = sizeof(unsigned int),
429 .mode = 0644,
430 .proc_handler = proc_dointvec_minmax,
431 .extra1 = &one,
432 },
433 {
434 .procname = "numa_balancing",
435 .data = NULL, /* filled in by handler */
436 .maxlen = sizeof(unsigned int),
437 .mode = 0644,
438 .proc_handler = sysctl_numa_balancing,
439 .extra1 = &zero,
440 .extra2 = &one,
441 },
442 #endif /* CONFIG_NUMA_BALANCING */
443 #endif /* CONFIG_SCHED_DEBUG */
444 {
445 .procname = "sched_rt_period_us",
446 .data = &sysctl_sched_rt_period,
447 .maxlen = sizeof(unsigned int),
448 .mode = 0644,
449 .proc_handler = sched_rt_handler,
450 },
451 {
452 .procname = "sched_rt_runtime_us",
453 .data = &sysctl_sched_rt_runtime,
454 .maxlen = sizeof(int),
455 .mode = 0644,
456 .proc_handler = sched_rt_handler,
457 },
458 {
459 .procname = "sched_rr_timeslice_ms",
460 .data = &sysctl_sched_rr_timeslice,
461 .maxlen = sizeof(int),
462 .mode = 0644,
463 .proc_handler = sched_rr_handler,
464 },
465 #ifdef CONFIG_SCHED_AUTOGROUP
466 {
467 .procname = "sched_autogroup_enabled",
468 .data = &sysctl_sched_autogroup_enabled,
469 .maxlen = sizeof(unsigned int),
470 .mode = 0644,
471 .proc_handler = proc_dointvec_minmax,
472 .extra1 = &zero,
473 .extra2 = &one,
474 },
475 #endif
476 #ifdef CONFIG_CFS_BANDWIDTH
477 {
478 .procname = "sched_cfs_bandwidth_slice_us",
479 .data = &sysctl_sched_cfs_bandwidth_slice,
480 .maxlen = sizeof(unsigned int),
481 .mode = 0644,
482 .proc_handler = proc_dointvec_minmax,
483 .extra1 = &one,
484 },
485 #endif
486 #ifdef CONFIG_PROVE_LOCKING
487 {
488 .procname = "prove_locking",
489 .data = &prove_locking,
490 .maxlen = sizeof(int),
491 .mode = 0644,
492 .proc_handler = proc_dointvec,
493 },
494 #endif
495 #ifdef CONFIG_LOCK_STAT
496 {
497 .procname = "lock_stat",
498 .data = &lock_stat,
499 .maxlen = sizeof(int),
500 .mode = 0644,
501 .proc_handler = proc_dointvec,
502 },
503 #endif
504 {
505 .procname = "panic",
506 .data = &panic_timeout,
507 .maxlen = sizeof(int),
508 .mode = 0644,
509 .proc_handler = proc_dointvec,
510 },
511 #ifdef CONFIG_COREDUMP
512 {
513 .procname = "core_uses_pid",
514 .data = &core_uses_pid,
515 .maxlen = sizeof(int),
516 .mode = 0644,
517 .proc_handler = proc_dointvec,
518 },
519 {
520 .procname = "core_pattern",
521 .data = core_pattern,
522 .maxlen = CORENAME_MAX_SIZE,
523 .mode = 0644,
524 .proc_handler = proc_dostring_coredump,
525 },
526 {
527 .procname = "core_pipe_limit",
528 .data = &core_pipe_limit,
529 .maxlen = sizeof(unsigned int),
530 .mode = 0644,
531 .proc_handler = proc_dointvec,
532 },
533 #endif
534 #ifdef CONFIG_USER_NS
535 {
536 .procname = "unprivileged_userns_clone",
537 .data = &unprivileged_userns_clone,
538 .maxlen = sizeof(int),
539 .mode = 0644,
540 .proc_handler = proc_dointvec,
541 },
542 #endif
543 #ifdef CONFIG_PROC_SYSCTL
544 {
545 .procname = "tainted",
546 .maxlen = sizeof(long),
547 .mode = 0644,
548 .proc_handler = proc_taint,
549 },
550 {
551 .procname = "sysctl_writes_strict",
552 .data = &sysctl_writes_strict,
553 .maxlen = sizeof(int),
554 .mode = 0644,
555 .proc_handler = proc_dointvec_minmax,
556 .extra1 = &neg_one,
557 .extra2 = &one,
558 },
559 #endif
560 #ifdef CONFIG_LATENCYTOP
561 {
562 .procname = "latencytop",
563 .data = &latencytop_enabled,
564 .maxlen = sizeof(int),
565 .mode = 0644,
566 .proc_handler = sysctl_latencytop,
567 },
568 #endif
569 #ifdef CONFIG_BLK_DEV_INITRD
570 {
571 .procname = "real-root-dev",
572 .data = &real_root_dev,
573 .maxlen = sizeof(int),
574 .mode = 0644,
575 .proc_handler = proc_dointvec,
576 },
577 #endif
578 {
579 .procname = "print-fatal-signals",
580 .data = &print_fatal_signals,
581 .maxlen = sizeof(int),
582 .mode = 0644,
583 .proc_handler = proc_dointvec,
584 },
585 #ifdef CONFIG_SPARC
586 {
587 .procname = "reboot-cmd",
588 .data = reboot_command,
589 .maxlen = 256,
590 .mode = 0644,
591 .proc_handler = proc_dostring,
592 },
593 {
594 .procname = "stop-a",
595 .data = &stop_a_enabled,
596 .maxlen = sizeof (int),
597 .mode = 0644,
598 .proc_handler = proc_dointvec,
599 },
600 {
601 .procname = "scons-poweroff",
602 .data = &scons_pwroff,
603 .maxlen = sizeof (int),
604 .mode = 0644,
605 .proc_handler = proc_dointvec,
606 },
607 #endif
608 #ifdef CONFIG_SPARC64
609 {
610 .procname = "tsb-ratio",
611 .data = &sysctl_tsb_ratio,
612 .maxlen = sizeof (int),
613 .mode = 0644,
614 .proc_handler = proc_dointvec,
615 },
616 #endif
617 #ifdef __hppa__
618 {
619 .procname = "soft-power",
620 .data = &pwrsw_enabled,
621 .maxlen = sizeof (int),
622 .mode = 0644,
623 .proc_handler = proc_dointvec,
624 },
625 #endif
626 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
627 {
628 .procname = "unaligned-trap",
629 .data = &unaligned_enabled,
630 .maxlen = sizeof (int),
631 .mode = 0644,
632 .proc_handler = proc_dointvec,
633 },
634 #endif
635 {
636 .procname = "ctrl-alt-del",
637 .data = &C_A_D,
638 .maxlen = sizeof(int),
639 .mode = 0644,
640 .proc_handler = proc_dointvec,
641 },
642 #ifdef CONFIG_FUNCTION_TRACER
643 {
644 .procname = "ftrace_enabled",
645 .data = &ftrace_enabled,
646 .maxlen = sizeof(int),
647 .mode = 0644,
648 .proc_handler = ftrace_enable_sysctl,
649 },
650 #endif
651 #ifdef CONFIG_STACK_TRACER
652 {
653 .procname = "stack_tracer_enabled",
654 .data = &stack_tracer_enabled,
655 .maxlen = sizeof(int),
656 .mode = 0644,
657 .proc_handler = stack_trace_sysctl,
658 },
659 #endif
660 #ifdef CONFIG_TRACING
661 {
662 .procname = "ftrace_dump_on_oops",
663 .data = &ftrace_dump_on_oops,
664 .maxlen = sizeof(int),
665 .mode = 0644,
666 .proc_handler = proc_dointvec,
667 },
668 {
669 .procname = "traceoff_on_warning",
670 .data = &__disable_trace_on_warning,
671 .maxlen = sizeof(__disable_trace_on_warning),
672 .mode = 0644,
673 .proc_handler = proc_dointvec,
674 },
675 {
676 .procname = "tracepoint_printk",
677 .data = &tracepoint_printk,
678 .maxlen = sizeof(tracepoint_printk),
679 .mode = 0644,
680 .proc_handler = tracepoint_printk_sysctl,
681 },
682 #endif
683 #ifdef CONFIG_KEXEC_CORE
684 {
685 .procname = "kexec_load_disabled",
686 .data = &kexec_load_disabled,
687 .maxlen = sizeof(int),
688 .mode = 0644,
689 /* only handle a transition from default "0" to "1" */
690 .proc_handler = proc_dointvec_minmax,
691 .extra1 = &one,
692 .extra2 = &one,
693 },
694 #endif
695 #ifdef CONFIG_MODULES
696 {
697 .procname = "modprobe",
698 .data = &modprobe_path,
699 .maxlen = KMOD_PATH_LEN,
700 .mode = 0644,
701 .proc_handler = proc_dostring,
702 },
703 {
704 .procname = "modules_disabled",
705 .data = &modules_disabled,
706 .maxlen = sizeof(int),
707 .mode = 0644,
708 /* only handle a transition from default "0" to "1" */
709 .proc_handler = proc_dointvec_minmax,
710 .extra1 = &one,
711 .extra2 = &one,
712 },
713 #endif
714 #ifdef CONFIG_UEVENT_HELPER
715 {
716 .procname = "hotplug",
717 .data = &uevent_helper,
718 .maxlen = UEVENT_HELPER_PATH_LEN,
719 .mode = 0644,
720 .proc_handler = proc_dostring,
721 },
722 #endif
723 #ifdef CONFIG_CHR_DEV_SG
724 {
725 .procname = "sg-big-buff",
726 .data = &sg_big_buff,
727 .maxlen = sizeof (int),
728 .mode = 0444,
729 .proc_handler = proc_dointvec,
730 },
731 #endif
732 #ifdef CONFIG_BSD_PROCESS_ACCT
733 {
734 .procname = "acct",
735 .data = &acct_parm,
736 .maxlen = 3*sizeof(int),
737 .mode = 0644,
738 .proc_handler = proc_dointvec,
739 },
740 #endif
741 #ifdef CONFIG_MAGIC_SYSRQ
742 {
743 .procname = "sysrq",
744 .data = &__sysrq_enabled,
745 .maxlen = sizeof (int),
746 .mode = 0644,
747 .proc_handler = sysrq_sysctl_handler,
748 },
749 #endif
750 #ifdef CONFIG_PROC_SYSCTL
751 {
752 .procname = "cad_pid",
753 .data = NULL,
754 .maxlen = sizeof (int),
755 .mode = 0600,
756 .proc_handler = proc_do_cad_pid,
757 },
758 #endif
759 {
760 .procname = "threads-max",
761 .data = NULL,
762 .maxlen = sizeof(int),
763 .mode = 0644,
764 .proc_handler = sysctl_max_threads,
765 },
766 {
767 .procname = "random",
768 .mode = 0555,
769 .child = random_table,
770 },
771 {
772 .procname = "usermodehelper",
773 .mode = 0555,
774 .child = usermodehelper_table,
775 },
776 {
777 .procname = "overflowuid",
778 .data = &overflowuid,
779 .maxlen = sizeof(int),
780 .mode = 0644,
781 .proc_handler = proc_dointvec_minmax,
782 .extra1 = &minolduid,
783 .extra2 = &maxolduid,
784 },
785 {
786 .procname = "overflowgid",
787 .data = &overflowgid,
788 .maxlen = sizeof(int),
789 .mode = 0644,
790 .proc_handler = proc_dointvec_minmax,
791 .extra1 = &minolduid,
792 .extra2 = &maxolduid,
793 },
794 #ifdef CONFIG_S390
795 #ifdef CONFIG_MATHEMU
796 {
797 .procname = "ieee_emulation_warnings",
798 .data = &sysctl_ieee_emulation_warnings,
799 .maxlen = sizeof(int),
800 .mode = 0644,
801 .proc_handler = proc_dointvec,
802 },
803 #endif
804 {
805 .procname = "userprocess_debug",
806 .data = &show_unhandled_signals,
807 .maxlen = sizeof(int),
808 .mode = 0644,
809 .proc_handler = proc_dointvec,
810 },
811 #endif
812 {
813 .procname = "pid_max",
814 .data = &pid_max,
815 .maxlen = sizeof (int),
816 .mode = 0644,
817 .proc_handler = proc_dointvec_minmax,
818 .extra1 = &pid_max_min,
819 .extra2 = &pid_max_max,
820 },
821 {
822 .procname = "panic_on_oops",
823 .data = &panic_on_oops,
824 .maxlen = sizeof(int),
825 .mode = 0644,
826 .proc_handler = proc_dointvec,
827 },
828 #if defined CONFIG_PRINTK
829 {
830 .procname = "printk",
831 .data = &console_loglevel,
832 .maxlen = 4*sizeof(int),
833 .mode = 0644,
834 .proc_handler = proc_dointvec,
835 },
836 {
837 .procname = "printk_ratelimit",
838 .data = &printk_ratelimit_state.interval,
839 .maxlen = sizeof(int),
840 .mode = 0644,
841 .proc_handler = proc_dointvec_jiffies,
842 },
843 {
844 .procname = "printk_ratelimit_burst",
845 .data = &printk_ratelimit_state.burst,
846 .maxlen = sizeof(int),
847 .mode = 0644,
848 .proc_handler = proc_dointvec,
849 },
850 {
851 .procname = "printk_delay",
852 .data = &printk_delay_msec,
853 .maxlen = sizeof(int),
854 .mode = 0644,
855 .proc_handler = proc_dointvec_minmax,
856 .extra1 = &zero,
857 .extra2 = &ten_thousand,
858 },
859 {
860 .procname = "printk_devkmsg",
861 .data = devkmsg_log_str,
862 .maxlen = DEVKMSG_STR_MAX_SIZE,
863 .mode = 0644,
864 .proc_handler = devkmsg_sysctl_set_loglvl,
865 },
866 {
867 .procname = "dmesg_restrict",
868 .data = &dmesg_restrict,
869 .maxlen = sizeof(int),
870 .mode = 0644,
871 .proc_handler = proc_dointvec_minmax_sysadmin,
872 .extra1 = &zero,
873 .extra2 = &one,
874 },
875 {
876 .procname = "kptr_restrict",
877 .data = &kptr_restrict,
878 .maxlen = sizeof(int),
879 .mode = 0644,
880 .proc_handler = proc_dointvec_minmax_sysadmin,
881 .extra1 = &zero,
882 .extra2 = &two,
883 },
884 #endif
885 {
886 .procname = "ngroups_max",
887 .data = &ngroups_max,
888 .maxlen = sizeof (int),
889 .mode = 0444,
890 .proc_handler = proc_dointvec,
891 },
892 {
893 .procname = "cap_last_cap",
894 .data = (void *)&cap_last_cap,
895 .maxlen = sizeof(int),
896 .mode = 0444,
897 .proc_handler = proc_dointvec,
898 },
899 #if defined(CONFIG_LOCKUP_DETECTOR)
900 {
901 .procname = "watchdog",
902 .data = &watchdog_user_enabled,
903 .maxlen = sizeof (int),
904 .mode = 0644,
905 .proc_handler = proc_watchdog,
906 .extra1 = &zero,
907 .extra2 = &one,
908 },
909 {
910 .procname = "watchdog_thresh",
911 .data = &watchdog_thresh,
912 .maxlen = sizeof(int),
913 .mode = 0644,
914 .proc_handler = proc_watchdog_thresh,
915 .extra1 = &zero,
916 .extra2 = &sixty,
917 },
918 {
919 .procname = "nmi_watchdog",
920 .data = &nmi_watchdog_enabled,
921 .maxlen = sizeof (int),
922 .mode = 0644,
923 .proc_handler = proc_nmi_watchdog,
924 .extra1 = &zero,
925 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
926 .extra2 = &one,
927 #else
928 .extra2 = &zero,
929 #endif
930 },
931 {
932 .procname = "watchdog_cpumask",
933 .data = &watchdog_cpumask_bits,
934 .maxlen = NR_CPUS,
935 .mode = 0644,
936 .proc_handler = proc_watchdog_cpumask,
937 },
938 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
939 {
940 .procname = "soft_watchdog",
941 .data = &soft_watchdog_enabled,
942 .maxlen = sizeof (int),
943 .mode = 0644,
944 .proc_handler = proc_soft_watchdog,
945 .extra1 = &zero,
946 .extra2 = &one,
947 },
948 {
949 .procname = "softlockup_panic",
950 .data = &softlockup_panic,
951 .maxlen = sizeof(int),
952 .mode = 0644,
953 .proc_handler = proc_dointvec_minmax,
954 .extra1 = &zero,
955 .extra2 = &one,
956 },
957 #ifdef CONFIG_SMP
958 {
959 .procname = "softlockup_all_cpu_backtrace",
960 .data = &sysctl_softlockup_all_cpu_backtrace,
961 .maxlen = sizeof(int),
962 .mode = 0644,
963 .proc_handler = proc_dointvec_minmax,
964 .extra1 = &zero,
965 .extra2 = &one,
966 },
967 #endif /* CONFIG_SMP */
968 #endif
969 #ifdef CONFIG_HARDLOCKUP_DETECTOR
970 {
971 .procname = "hardlockup_panic",
972 .data = &hardlockup_panic,
973 .maxlen = sizeof(int),
974 .mode = 0644,
975 .proc_handler = proc_dointvec_minmax,
976 .extra1 = &zero,
977 .extra2 = &one,
978 },
979 #ifdef CONFIG_SMP
980 {
981 .procname = "hardlockup_all_cpu_backtrace",
982 .data = &sysctl_hardlockup_all_cpu_backtrace,
983 .maxlen = sizeof(int),
984 .mode = 0644,
985 .proc_handler = proc_dointvec_minmax,
986 .extra1 = &zero,
987 .extra2 = &one,
988 },
989 #endif /* CONFIG_SMP */
990 #endif
991 #endif
992
993 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
994 {
995 .procname = "unknown_nmi_panic",
996 .data = &unknown_nmi_panic,
997 .maxlen = sizeof (int),
998 .mode = 0644,
999 .proc_handler = proc_dointvec,
1000 },
1001 #endif
1002 #if defined(CONFIG_X86)
1003 {
1004 .procname = "panic_on_unrecovered_nmi",
1005 .data = &panic_on_unrecovered_nmi,
1006 .maxlen = sizeof(int),
1007 .mode = 0644,
1008 .proc_handler = proc_dointvec,
1009 },
1010 {
1011 .procname = "panic_on_io_nmi",
1012 .data = &panic_on_io_nmi,
1013 .maxlen = sizeof(int),
1014 .mode = 0644,
1015 .proc_handler = proc_dointvec,
1016 },
1017 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1018 {
1019 .procname = "panic_on_stackoverflow",
1020 .data = &sysctl_panic_on_stackoverflow,
1021 .maxlen = sizeof(int),
1022 .mode = 0644,
1023 .proc_handler = proc_dointvec,
1024 },
1025 #endif
1026 {
1027 .procname = "bootloader_type",
1028 .data = &bootloader_type,
1029 .maxlen = sizeof (int),
1030 .mode = 0444,
1031 .proc_handler = proc_dointvec,
1032 },
1033 {
1034 .procname = "bootloader_version",
1035 .data = &bootloader_version,
1036 .maxlen = sizeof (int),
1037 .mode = 0444,
1038 .proc_handler = proc_dointvec,
1039 },
1040 {
1041 .procname = "io_delay_type",
1042 .data = &io_delay_type,
1043 .maxlen = sizeof(int),
1044 .mode = 0644,
1045 .proc_handler = proc_dointvec,
1046 },
1047 #endif
1048 #if defined(CONFIG_MMU)
1049 {
1050 .procname = "randomize_va_space",
1051 .data = &randomize_va_space,
1052 .maxlen = sizeof(int),
1053 .mode = 0644,
1054 .proc_handler = proc_dointvec,
1055 },
1056 #endif
1057 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1058 {
1059 .procname = "spin_retry",
1060 .data = &spin_retry,
1061 .maxlen = sizeof (int),
1062 .mode = 0644,
1063 .proc_handler = proc_dointvec,
1064 },
1065 #endif
1066 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1067 {
1068 .procname = "acpi_video_flags",
1069 .data = &acpi_realmode_flags,
1070 .maxlen = sizeof (unsigned long),
1071 .mode = 0644,
1072 .proc_handler = proc_doulongvec_minmax,
1073 },
1074 #endif
1075 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1076 {
1077 .procname = "ignore-unaligned-usertrap",
1078 .data = &no_unaligned_warning,
1079 .maxlen = sizeof (int),
1080 .mode = 0644,
1081 .proc_handler = proc_dointvec,
1082 },
1083 #endif
1084 #ifdef CONFIG_IA64
1085 {
1086 .procname = "unaligned-dump-stack",
1087 .data = &unaligned_dump_stack,
1088 .maxlen = sizeof (int),
1089 .mode = 0644,
1090 .proc_handler = proc_dointvec,
1091 },
1092 #endif
1093 #ifdef CONFIG_DETECT_HUNG_TASK
1094 {
1095 .procname = "hung_task_panic",
1096 .data = &sysctl_hung_task_panic,
1097 .maxlen = sizeof(int),
1098 .mode = 0644,
1099 .proc_handler = proc_dointvec_minmax,
1100 .extra1 = &zero,
1101 .extra2 = &one,
1102 },
1103 {
1104 .procname = "hung_task_check_count",
1105 .data = &sysctl_hung_task_check_count,
1106 .maxlen = sizeof(int),
1107 .mode = 0644,
1108 .proc_handler = proc_dointvec_minmax,
1109 .extra1 = &zero,
1110 },
1111 {
1112 .procname = "hung_task_timeout_secs",
1113 .data = &sysctl_hung_task_timeout_secs,
1114 .maxlen = sizeof(unsigned long),
1115 .mode = 0644,
1116 .proc_handler = proc_dohung_task_timeout_secs,
1117 .extra2 = &hung_task_timeout_max,
1118 },
1119 {
1120 .procname = "hung_task_warnings",
1121 .data = &sysctl_hung_task_warnings,
1122 .maxlen = sizeof(int),
1123 .mode = 0644,
1124 .proc_handler = proc_dointvec_minmax,
1125 .extra1 = &neg_one,
1126 },
1127 #endif
1128 #ifdef CONFIG_RT_MUTEXES
1129 {
1130 .procname = "max_lock_depth",
1131 .data = &max_lock_depth,
1132 .maxlen = sizeof(int),
1133 .mode = 0644,
1134 .proc_handler = proc_dointvec,
1135 },
1136 #endif
1137 {
1138 .procname = "poweroff_cmd",
1139 .data = &poweroff_cmd,
1140 .maxlen = POWEROFF_CMD_PATH_LEN,
1141 .mode = 0644,
1142 .proc_handler = proc_dostring,
1143 },
1144 #ifdef CONFIG_KEYS
1145 {
1146 .procname = "keys",
1147 .mode = 0555,
1148 .child = key_sysctls,
1149 },
1150 #endif
1151 #ifdef CONFIG_PERF_EVENTS
1152 /*
1153 * User-space scripts rely on the existence of this file
1154 * as a feature check for perf_events being enabled.
1155 *
1156 * So it's an ABI, do not remove!
1157 */
1158 {
1159 .procname = "perf_event_paranoid",
1160 .data = &sysctl_perf_event_paranoid,
1161 .maxlen = sizeof(sysctl_perf_event_paranoid),
1162 .mode = 0644,
1163 .proc_handler = proc_dointvec,
1164 },
1165 {
1166 .procname = "perf_event_mlock_kb",
1167 .data = &sysctl_perf_event_mlock,
1168 .maxlen = sizeof(sysctl_perf_event_mlock),
1169 .mode = 0644,
1170 .proc_handler = proc_dointvec,
1171 },
1172 {
1173 .procname = "perf_event_max_sample_rate",
1174 .data = &sysctl_perf_event_sample_rate,
1175 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1176 .mode = 0644,
1177 .proc_handler = perf_proc_update_handler,
1178 .extra1 = &one,
1179 },
1180 {
1181 .procname = "perf_cpu_time_max_percent",
1182 .data = &sysctl_perf_cpu_time_max_percent,
1183 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1184 .mode = 0644,
1185 .proc_handler = perf_cpu_time_max_percent_handler,
1186 .extra1 = &zero,
1187 .extra2 = &one_hundred,
1188 },
1189 {
1190 .procname = "perf_event_max_stack",
1191 .data = &sysctl_perf_event_max_stack,
1192 .maxlen = sizeof(sysctl_perf_event_max_stack),
1193 .mode = 0644,
1194 .proc_handler = perf_event_max_stack_handler,
1195 .extra1 = &zero,
1196 .extra2 = &six_hundred_forty_kb,
1197 },
1198 {
1199 .procname = "perf_event_max_contexts_per_stack",
1200 .data = &sysctl_perf_event_max_contexts_per_stack,
1201 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1202 .mode = 0644,
1203 .proc_handler = perf_event_max_stack_handler,
1204 .extra1 = &zero,
1205 .extra2 = &one_thousand,
1206 },
1207 #endif
1208 #ifdef CONFIG_KMEMCHECK
1209 {
1210 .procname = "kmemcheck",
1211 .data = &kmemcheck_enabled,
1212 .maxlen = sizeof(int),
1213 .mode = 0644,
1214 .proc_handler = proc_dointvec,
1215 },
1216 #endif
1217 {
1218 .procname = "panic_on_warn",
1219 .data = &panic_on_warn,
1220 .maxlen = sizeof(int),
1221 .mode = 0644,
1222 .proc_handler = proc_dointvec_minmax,
1223 .extra1 = &zero,
1224 .extra2 = &one,
1225 },
1226 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1227 {
1228 .procname = "timer_migration",
1229 .data = &sysctl_timer_migration,
1230 .maxlen = sizeof(unsigned int),
1231 .mode = 0644,
1232 .proc_handler = timer_migration_handler,
1233 .extra1 = &zero,
1234 .extra2 = &one,
1235 },
1236 #endif
1237 #ifdef CONFIG_BPF_SYSCALL
1238 {
1239 .procname = "unprivileged_bpf_disabled",
1240 .data = &sysctl_unprivileged_bpf_disabled,
1241 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
1242 .mode = 0644,
1243 /* only handle a transition from default "0" to "1" */
1244 .proc_handler = proc_dointvec_minmax,
1245 .extra1 = &one,
1246 .extra2 = &one,
1247 },
1248 #endif
1249 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1250 {
1251 .procname = "panic_on_rcu_stall",
1252 .data = &sysctl_panic_on_rcu_stall,
1253 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
1254 .mode = 0644,
1255 .proc_handler = proc_dointvec_minmax,
1256 .extra1 = &zero,
1257 .extra2 = &one,
1258 },
1259 #endif
1260 #ifdef CONFIG_X86
1261 {
1262 .procname = "ibrs_enabled",
1263 .data = &sysctl_ibrs_enabled,
1264 .maxlen = sizeof(unsigned int),
1265 .mode = 0644,
1266 .proc_handler = proc_dointvec_ibrs_ctrl,
1267 .extra1 = &zero,
1268 .extra2 = &two,
1269 },
1270 {
1271 .procname = "ibpb_enabled",
1272 .data = &sysctl_ibpb_enabled,
1273 .maxlen = sizeof(unsigned int),
1274 .mode = 0644,
1275 .proc_handler = proc_dointvec_ibpb_ctrl,
1276 .extra1 = &zero,
1277 .extra2 = &one,
1278 },
1279 #endif
1280 { }
1281 };
1282
1283 static struct ctl_table vm_table[] = {
1284 {
1285 .procname = "overcommit_memory",
1286 .data = &sysctl_overcommit_memory,
1287 .maxlen = sizeof(sysctl_overcommit_memory),
1288 .mode = 0644,
1289 .proc_handler = proc_dointvec_minmax,
1290 .extra1 = &zero,
1291 .extra2 = &two,
1292 },
1293 {
1294 .procname = "panic_on_oom",
1295 .data = &sysctl_panic_on_oom,
1296 .maxlen = sizeof(sysctl_panic_on_oom),
1297 .mode = 0644,
1298 .proc_handler = proc_dointvec_minmax,
1299 .extra1 = &zero,
1300 .extra2 = &two,
1301 },
1302 {
1303 .procname = "oom_kill_allocating_task",
1304 .data = &sysctl_oom_kill_allocating_task,
1305 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1306 .mode = 0644,
1307 .proc_handler = proc_dointvec,
1308 },
1309 {
1310 .procname = "oom_dump_tasks",
1311 .data = &sysctl_oom_dump_tasks,
1312 .maxlen = sizeof(sysctl_oom_dump_tasks),
1313 .mode = 0644,
1314 .proc_handler = proc_dointvec,
1315 },
1316 {
1317 .procname = "overcommit_ratio",
1318 .data = &sysctl_overcommit_ratio,
1319 .maxlen = sizeof(sysctl_overcommit_ratio),
1320 .mode = 0644,
1321 .proc_handler = overcommit_ratio_handler,
1322 },
1323 {
1324 .procname = "overcommit_kbytes",
1325 .data = &sysctl_overcommit_kbytes,
1326 .maxlen = sizeof(sysctl_overcommit_kbytes),
1327 .mode = 0644,
1328 .proc_handler = overcommit_kbytes_handler,
1329 },
1330 {
1331 .procname = "page-cluster",
1332 .data = &page_cluster,
1333 .maxlen = sizeof(int),
1334 .mode = 0644,
1335 .proc_handler = proc_dointvec_minmax,
1336 .extra1 = &zero,
1337 },
1338 {
1339 .procname = "dirty_background_ratio",
1340 .data = &dirty_background_ratio,
1341 .maxlen = sizeof(dirty_background_ratio),
1342 .mode = 0644,
1343 .proc_handler = dirty_background_ratio_handler,
1344 .extra1 = &zero,
1345 .extra2 = &one_hundred,
1346 },
1347 {
1348 .procname = "dirty_background_bytes",
1349 .data = &dirty_background_bytes,
1350 .maxlen = sizeof(dirty_background_bytes),
1351 .mode = 0644,
1352 .proc_handler = dirty_background_bytes_handler,
1353 .extra1 = &one_ul,
1354 },
1355 {
1356 .procname = "dirty_ratio",
1357 .data = &vm_dirty_ratio,
1358 .maxlen = sizeof(vm_dirty_ratio),
1359 .mode = 0644,
1360 .proc_handler = dirty_ratio_handler,
1361 .extra1 = &zero,
1362 .extra2 = &one_hundred,
1363 },
1364 {
1365 .procname = "dirty_bytes",
1366 .data = &vm_dirty_bytes,
1367 .maxlen = sizeof(vm_dirty_bytes),
1368 .mode = 0644,
1369 .proc_handler = dirty_bytes_handler,
1370 .extra1 = &dirty_bytes_min,
1371 },
1372 {
1373 .procname = "dirty_writeback_centisecs",
1374 .data = &dirty_writeback_interval,
1375 .maxlen = sizeof(dirty_writeback_interval),
1376 .mode = 0644,
1377 .proc_handler = dirty_writeback_centisecs_handler,
1378 },
1379 {
1380 .procname = "dirty_expire_centisecs",
1381 .data = &dirty_expire_interval,
1382 .maxlen = sizeof(dirty_expire_interval),
1383 .mode = 0644,
1384 .proc_handler = proc_dointvec_minmax,
1385 .extra1 = &zero,
1386 },
1387 {
1388 .procname = "dirtytime_expire_seconds",
1389 .data = &dirtytime_expire_interval,
1390 .maxlen = sizeof(dirty_expire_interval),
1391 .mode = 0644,
1392 .proc_handler = dirtytime_interval_handler,
1393 .extra1 = &zero,
1394 },
1395 {
1396 .procname = "nr_pdflush_threads",
1397 .mode = 0444 /* read-only */,
1398 .proc_handler = pdflush_proc_obsolete,
1399 },
1400 {
1401 .procname = "swappiness",
1402 .data = &vm_swappiness,
1403 .maxlen = sizeof(vm_swappiness),
1404 .mode = 0644,
1405 .proc_handler = proc_dointvec_minmax,
1406 .extra1 = &zero,
1407 .extra2 = &one_hundred,
1408 },
1409 #ifdef CONFIG_HUGETLB_PAGE
1410 {
1411 .procname = "nr_hugepages",
1412 .data = NULL,
1413 .maxlen = sizeof(unsigned long),
1414 .mode = 0644,
1415 .proc_handler = hugetlb_sysctl_handler,
1416 },
1417 #ifdef CONFIG_NUMA
1418 {
1419 .procname = "nr_hugepages_mempolicy",
1420 .data = NULL,
1421 .maxlen = sizeof(unsigned long),
1422 .mode = 0644,
1423 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1424 },
1425 #endif
1426 {
1427 .procname = "hugetlb_shm_group",
1428 .data = &sysctl_hugetlb_shm_group,
1429 .maxlen = sizeof(gid_t),
1430 .mode = 0644,
1431 .proc_handler = proc_dointvec,
1432 },
1433 {
1434 .procname = "hugepages_treat_as_movable",
1435 .data = &hugepages_treat_as_movable,
1436 .maxlen = sizeof(int),
1437 .mode = 0644,
1438 .proc_handler = proc_dointvec,
1439 },
1440 {
1441 .procname = "nr_overcommit_hugepages",
1442 .data = NULL,
1443 .maxlen = sizeof(unsigned long),
1444 .mode = 0644,
1445 .proc_handler = hugetlb_overcommit_handler,
1446 },
1447 #endif
1448 {
1449 .procname = "lowmem_reserve_ratio",
1450 .data = &sysctl_lowmem_reserve_ratio,
1451 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1452 .mode = 0644,
1453 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1454 },
1455 {
1456 .procname = "drop_caches",
1457 .data = &sysctl_drop_caches,
1458 .maxlen = sizeof(int),
1459 .mode = 0644,
1460 .proc_handler = drop_caches_sysctl_handler,
1461 .extra1 = &one,
1462 .extra2 = &four,
1463 },
1464 #ifdef CONFIG_COMPACTION
1465 {
1466 .procname = "compact_memory",
1467 .data = &sysctl_compact_memory,
1468 .maxlen = sizeof(int),
1469 .mode = 0200,
1470 .proc_handler = sysctl_compaction_handler,
1471 },
1472 {
1473 .procname = "extfrag_threshold",
1474 .data = &sysctl_extfrag_threshold,
1475 .maxlen = sizeof(int),
1476 .mode = 0644,
1477 .proc_handler = sysctl_extfrag_handler,
1478 .extra1 = &min_extfrag_threshold,
1479 .extra2 = &max_extfrag_threshold,
1480 },
1481 {
1482 .procname = "compact_unevictable_allowed",
1483 .data = &sysctl_compact_unevictable_allowed,
1484 .maxlen = sizeof(int),
1485 .mode = 0644,
1486 .proc_handler = proc_dointvec,
1487 .extra1 = &zero,
1488 .extra2 = &one,
1489 },
1490
1491 #endif /* CONFIG_COMPACTION */
1492 {
1493 .procname = "min_free_kbytes",
1494 .data = &min_free_kbytes,
1495 .maxlen = sizeof(min_free_kbytes),
1496 .mode = 0644,
1497 .proc_handler = min_free_kbytes_sysctl_handler,
1498 .extra1 = &zero,
1499 },
1500 {
1501 .procname = "watermark_scale_factor",
1502 .data = &watermark_scale_factor,
1503 .maxlen = sizeof(watermark_scale_factor),
1504 .mode = 0644,
1505 .proc_handler = watermark_scale_factor_sysctl_handler,
1506 .extra1 = &one,
1507 .extra2 = &one_thousand,
1508 },
1509 {
1510 .procname = "percpu_pagelist_fraction",
1511 .data = &percpu_pagelist_fraction,
1512 .maxlen = sizeof(percpu_pagelist_fraction),
1513 .mode = 0644,
1514 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1515 .extra1 = &zero,
1516 },
1517 #ifdef CONFIG_MMU
1518 {
1519 .procname = "max_map_count",
1520 .data = &sysctl_max_map_count,
1521 .maxlen = sizeof(sysctl_max_map_count),
1522 .mode = 0644,
1523 .proc_handler = proc_dointvec_minmax,
1524 .extra1 = &zero,
1525 },
1526 #else
1527 {
1528 .procname = "nr_trim_pages",
1529 .data = &sysctl_nr_trim_pages,
1530 .maxlen = sizeof(sysctl_nr_trim_pages),
1531 .mode = 0644,
1532 .proc_handler = proc_dointvec_minmax,
1533 .extra1 = &zero,
1534 },
1535 #endif
1536 {
1537 .procname = "laptop_mode",
1538 .data = &laptop_mode,
1539 .maxlen = sizeof(laptop_mode),
1540 .mode = 0644,
1541 .proc_handler = proc_dointvec_jiffies,
1542 },
1543 {
1544 .procname = "block_dump",
1545 .data = &block_dump,
1546 .maxlen = sizeof(block_dump),
1547 .mode = 0644,
1548 .proc_handler = proc_dointvec,
1549 .extra1 = &zero,
1550 },
1551 {
1552 .procname = "vfs_cache_pressure",
1553 .data = &sysctl_vfs_cache_pressure,
1554 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1555 .mode = 0644,
1556 .proc_handler = proc_dointvec,
1557 .extra1 = &zero,
1558 },
1559 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1560 {
1561 .procname = "legacy_va_layout",
1562 .data = &sysctl_legacy_va_layout,
1563 .maxlen = sizeof(sysctl_legacy_va_layout),
1564 .mode = 0644,
1565 .proc_handler = proc_dointvec,
1566 .extra1 = &zero,
1567 },
1568 #endif
1569 #ifdef CONFIG_NUMA
1570 {
1571 .procname = "zone_reclaim_mode",
1572 .data = &node_reclaim_mode,
1573 .maxlen = sizeof(node_reclaim_mode),
1574 .mode = 0644,
1575 .proc_handler = proc_dointvec,
1576 .extra1 = &zero,
1577 },
1578 {
1579 .procname = "min_unmapped_ratio",
1580 .data = &sysctl_min_unmapped_ratio,
1581 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1582 .mode = 0644,
1583 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1584 .extra1 = &zero,
1585 .extra2 = &one_hundred,
1586 },
1587 {
1588 .procname = "min_slab_ratio",
1589 .data = &sysctl_min_slab_ratio,
1590 .maxlen = sizeof(sysctl_min_slab_ratio),
1591 .mode = 0644,
1592 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1593 .extra1 = &zero,
1594 .extra2 = &one_hundred,
1595 },
1596 #endif
1597 #ifdef CONFIG_SMP
1598 {
1599 .procname = "stat_interval",
1600 .data = &sysctl_stat_interval,
1601 .maxlen = sizeof(sysctl_stat_interval),
1602 .mode = 0644,
1603 .proc_handler = proc_dointvec_jiffies,
1604 },
1605 {
1606 .procname = "stat_refresh",
1607 .data = NULL,
1608 .maxlen = 0,
1609 .mode = 0600,
1610 .proc_handler = vmstat_refresh,
1611 },
1612 #endif
1613 #ifdef CONFIG_MMU
1614 {
1615 .procname = "mmap_min_addr",
1616 .data = &dac_mmap_min_addr,
1617 .maxlen = sizeof(unsigned long),
1618 .mode = 0644,
1619 .proc_handler = mmap_min_addr_handler,
1620 },
1621 #endif
1622 #ifdef CONFIG_NUMA
1623 {
1624 .procname = "numa_zonelist_order",
1625 .data = &numa_zonelist_order,
1626 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1627 .mode = 0644,
1628 .proc_handler = numa_zonelist_order_handler,
1629 },
1630 #endif
1631 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1632 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1633 {
1634 .procname = "vdso_enabled",
1635 #ifdef CONFIG_X86_32
1636 .data = &vdso32_enabled,
1637 .maxlen = sizeof(vdso32_enabled),
1638 #else
1639 .data = &vdso_enabled,
1640 .maxlen = sizeof(vdso_enabled),
1641 #endif
1642 .mode = 0644,
1643 .proc_handler = proc_dointvec,
1644 .extra1 = &zero,
1645 },
1646 #endif
1647 #ifdef CONFIG_HIGHMEM
1648 {
1649 .procname = "highmem_is_dirtyable",
1650 .data = &vm_highmem_is_dirtyable,
1651 .maxlen = sizeof(vm_highmem_is_dirtyable),
1652 .mode = 0644,
1653 .proc_handler = proc_dointvec_minmax,
1654 .extra1 = &zero,
1655 .extra2 = &one,
1656 },
1657 #endif
1658 #ifdef CONFIG_MEMORY_FAILURE
1659 {
1660 .procname = "memory_failure_early_kill",
1661 .data = &sysctl_memory_failure_early_kill,
1662 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1663 .mode = 0644,
1664 .proc_handler = proc_dointvec_minmax,
1665 .extra1 = &zero,
1666 .extra2 = &one,
1667 },
1668 {
1669 .procname = "memory_failure_recovery",
1670 .data = &sysctl_memory_failure_recovery,
1671 .maxlen = sizeof(sysctl_memory_failure_recovery),
1672 .mode = 0644,
1673 .proc_handler = proc_dointvec_minmax,
1674 .extra1 = &zero,
1675 .extra2 = &one,
1676 },
1677 #endif
1678 {
1679 .procname = "user_reserve_kbytes",
1680 .data = &sysctl_user_reserve_kbytes,
1681 .maxlen = sizeof(sysctl_user_reserve_kbytes),
1682 .mode = 0644,
1683 .proc_handler = proc_doulongvec_minmax,
1684 },
1685 {
1686 .procname = "admin_reserve_kbytes",
1687 .data = &sysctl_admin_reserve_kbytes,
1688 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
1689 .mode = 0644,
1690 .proc_handler = proc_doulongvec_minmax,
1691 },
1692 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1693 {
1694 .procname = "mmap_rnd_bits",
1695 .data = &mmap_rnd_bits,
1696 .maxlen = sizeof(mmap_rnd_bits),
1697 .mode = 0600,
1698 .proc_handler = proc_dointvec_minmax,
1699 .extra1 = (void *)&mmap_rnd_bits_min,
1700 .extra2 = (void *)&mmap_rnd_bits_max,
1701 },
1702 #endif
1703 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1704 {
1705 .procname = "mmap_rnd_compat_bits",
1706 .data = &mmap_rnd_compat_bits,
1707 .maxlen = sizeof(mmap_rnd_compat_bits),
1708 .mode = 0600,
1709 .proc_handler = proc_dointvec_minmax,
1710 .extra1 = (void *)&mmap_rnd_compat_bits_min,
1711 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1712 },
1713 #endif
1714 { }
1715 };
1716
1717 static struct ctl_table fs_table[] = {
1718 {
1719 .procname = "inode-nr",
1720 .data = &inodes_stat,
1721 .maxlen = 2*sizeof(long),
1722 .mode = 0444,
1723 .proc_handler = proc_nr_inodes,
1724 },
1725 {
1726 .procname = "inode-state",
1727 .data = &inodes_stat,
1728 .maxlen = 7*sizeof(long),
1729 .mode = 0444,
1730 .proc_handler = proc_nr_inodes,
1731 },
1732 {
1733 .procname = "file-nr",
1734 .data = &files_stat,
1735 .maxlen = sizeof(files_stat),
1736 .mode = 0444,
1737 .proc_handler = proc_nr_files,
1738 },
1739 {
1740 .procname = "file-max",
1741 .data = &files_stat.max_files,
1742 .maxlen = sizeof(files_stat.max_files),
1743 .mode = 0644,
1744 .proc_handler = proc_doulongvec_minmax,
1745 },
1746 {
1747 .procname = "nr_open",
1748 .data = &sysctl_nr_open,
1749 .maxlen = sizeof(unsigned int),
1750 .mode = 0644,
1751 .proc_handler = proc_dointvec_minmax,
1752 .extra1 = &sysctl_nr_open_min,
1753 .extra2 = &sysctl_nr_open_max,
1754 },
1755 {
1756 .procname = "dentry-state",
1757 .data = &dentry_stat,
1758 .maxlen = 6*sizeof(long),
1759 .mode = 0444,
1760 .proc_handler = proc_nr_dentry,
1761 },
1762 {
1763 .procname = "overflowuid",
1764 .data = &fs_overflowuid,
1765 .maxlen = sizeof(int),
1766 .mode = 0644,
1767 .proc_handler = proc_dointvec_minmax,
1768 .extra1 = &minolduid,
1769 .extra2 = &maxolduid,
1770 },
1771 {
1772 .procname = "overflowgid",
1773 .data = &fs_overflowgid,
1774 .maxlen = sizeof(int),
1775 .mode = 0644,
1776 .proc_handler = proc_dointvec_minmax,
1777 .extra1 = &minolduid,
1778 .extra2 = &maxolduid,
1779 },
1780 #ifdef CONFIG_FILE_LOCKING
1781 {
1782 .procname = "leases-enable",
1783 .data = &leases_enable,
1784 .maxlen = sizeof(int),
1785 .mode = 0644,
1786 .proc_handler = proc_dointvec,
1787 },
1788 #endif
1789 #ifdef CONFIG_DNOTIFY
1790 {
1791 .procname = "dir-notify-enable",
1792 .data = &dir_notify_enable,
1793 .maxlen = sizeof(int),
1794 .mode = 0644,
1795 .proc_handler = proc_dointvec,
1796 },
1797 #endif
1798 #ifdef CONFIG_MMU
1799 #ifdef CONFIG_FILE_LOCKING
1800 {
1801 .procname = "lease-break-time",
1802 .data = &lease_break_time,
1803 .maxlen = sizeof(int),
1804 .mode = 0644,
1805 .proc_handler = proc_dointvec,
1806 },
1807 #endif
1808 #ifdef CONFIG_AIO
1809 {
1810 .procname = "aio-nr",
1811 .data = &aio_nr,
1812 .maxlen = sizeof(aio_nr),
1813 .mode = 0444,
1814 .proc_handler = proc_doulongvec_minmax,
1815 },
1816 {
1817 .procname = "aio-max-nr",
1818 .data = &aio_max_nr,
1819 .maxlen = sizeof(aio_max_nr),
1820 .mode = 0644,
1821 .proc_handler = proc_doulongvec_minmax,
1822 },
1823 #endif /* CONFIG_AIO */
1824 #ifdef CONFIG_INOTIFY_USER
1825 {
1826 .procname = "inotify",
1827 .mode = 0555,
1828 .child = inotify_table,
1829 },
1830 #endif
1831 #ifdef CONFIG_EPOLL
1832 {
1833 .procname = "epoll",
1834 .mode = 0555,
1835 .child = epoll_table,
1836 },
1837 #endif
1838 #endif
1839 {
1840 .procname = "protected_symlinks",
1841 .data = &sysctl_protected_symlinks,
1842 .maxlen = sizeof(int),
1843 .mode = 0600,
1844 .proc_handler = proc_dointvec_minmax,
1845 .extra1 = &zero,
1846 .extra2 = &one,
1847 },
1848 {
1849 .procname = "protected_hardlinks",
1850 .data = &sysctl_protected_hardlinks,
1851 .maxlen = sizeof(int),
1852 .mode = 0600,
1853 .proc_handler = proc_dointvec_minmax,
1854 .extra1 = &zero,
1855 .extra2 = &one,
1856 },
1857 {
1858 .procname = "suid_dumpable",
1859 .data = &suid_dumpable,
1860 .maxlen = sizeof(int),
1861 .mode = 0644,
1862 .proc_handler = proc_dointvec_minmax_coredump,
1863 .extra1 = &zero,
1864 .extra2 = &two,
1865 },
1866 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1867 {
1868 .procname = "binfmt_misc",
1869 .mode = 0555,
1870 .child = sysctl_mount_point,
1871 },
1872 #endif
1873 {
1874 .procname = "pipe-max-size",
1875 .data = &pipe_max_size,
1876 .maxlen = sizeof(int),
1877 .mode = 0644,
1878 .proc_handler = &pipe_proc_fn,
1879 .extra1 = &pipe_min_size,
1880 },
1881 {
1882 .procname = "pipe-user-pages-hard",
1883 .data = &pipe_user_pages_hard,
1884 .maxlen = sizeof(pipe_user_pages_hard),
1885 .mode = 0644,
1886 .proc_handler = proc_doulongvec_minmax,
1887 },
1888 {
1889 .procname = "pipe-user-pages-soft",
1890 .data = &pipe_user_pages_soft,
1891 .maxlen = sizeof(pipe_user_pages_soft),
1892 .mode = 0644,
1893 .proc_handler = proc_doulongvec_minmax,
1894 },
1895 {
1896 .procname = "mount-max",
1897 .data = &sysctl_mount_max,
1898 .maxlen = sizeof(unsigned int),
1899 .mode = 0644,
1900 .proc_handler = proc_dointvec_minmax,
1901 .extra1 = &one,
1902 },
1903 { }
1904 };
1905
1906 static struct ctl_table debug_table[] = {
1907 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1908 {
1909 .procname = "exception-trace",
1910 .data = &show_unhandled_signals,
1911 .maxlen = sizeof(int),
1912 .mode = 0644,
1913 .proc_handler = proc_dointvec
1914 },
1915 #endif
1916 #if defined(CONFIG_OPTPROBES)
1917 {
1918 .procname = "kprobes-optimization",
1919 .data = &sysctl_kprobes_optimization,
1920 .maxlen = sizeof(int),
1921 .mode = 0644,
1922 .proc_handler = proc_kprobes_optimization_handler,
1923 .extra1 = &zero,
1924 .extra2 = &one,
1925 },
1926 #endif
1927 { }
1928 };
1929
1930 static struct ctl_table dev_table[] = {
1931 { }
1932 };
1933
1934 int __init sysctl_init(void)
1935 {
1936 struct ctl_table_header *hdr;
1937
1938 hdr = register_sysctl_table(sysctl_base_table);
1939 kmemleak_not_leak(hdr);
1940 return 0;
1941 }
1942
1943 #endif /* CONFIG_SYSCTL */
1944
1945 /*
1946 * /proc/sys support
1947 */
1948
1949 #ifdef CONFIG_PROC_SYSCTL
1950
1951 static int _proc_do_string(char *data, int maxlen, int write,
1952 char __user *buffer,
1953 size_t *lenp, loff_t *ppos)
1954 {
1955 size_t len;
1956 char __user *p;
1957 char c;
1958
1959 if (!data || !maxlen || !*lenp) {
1960 *lenp = 0;
1961 return 0;
1962 }
1963
1964 if (write) {
1965 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1966 /* Only continue writes not past the end of buffer. */
1967 len = strlen(data);
1968 if (len > maxlen - 1)
1969 len = maxlen - 1;
1970
1971 if (*ppos > len)
1972 return 0;
1973 len = *ppos;
1974 } else {
1975 /* Start writing from beginning of buffer. */
1976 len = 0;
1977 }
1978
1979 *ppos += *lenp;
1980 p = buffer;
1981 while ((p - buffer) < *lenp && len < maxlen - 1) {
1982 if (get_user(c, p++))
1983 return -EFAULT;
1984 if (c == 0 || c == '\n')
1985 break;
1986 data[len++] = c;
1987 }
1988 data[len] = 0;
1989 } else {
1990 len = strlen(data);
1991 if (len > maxlen)
1992 len = maxlen;
1993
1994 if (*ppos > len) {
1995 *lenp = 0;
1996 return 0;
1997 }
1998
1999 data += *ppos;
2000 len -= *ppos;
2001
2002 if (len > *lenp)
2003 len = *lenp;
2004 if (len)
2005 if (copy_to_user(buffer, data, len))
2006 return -EFAULT;
2007 if (len < *lenp) {
2008 if (put_user('\n', buffer + len))
2009 return -EFAULT;
2010 len++;
2011 }
2012 *lenp = len;
2013 *ppos += len;
2014 }
2015 return 0;
2016 }
2017
2018 static void warn_sysctl_write(struct ctl_table *table)
2019 {
2020 pr_warn_once("%s wrote to %s when file position was not 0!\n"
2021 "This will not be supported in the future. To silence this\n"
2022 "warning, set kernel.sysctl_writes_strict = -1\n",
2023 current->comm, table->procname);
2024 }
2025
2026 /**
2027 * proc_first_pos_non_zero_ignore - check if firs position is allowed
2028 * @ppos: file position
2029 * @table: the sysctl table
2030 *
2031 * Returns true if the first position is non-zero and the sysctl_writes_strict
2032 * mode indicates this is not allowed for numeric input types. String proc
2033 * hadlers can ignore the return value.
2034 */
2035 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2036 struct ctl_table *table)
2037 {
2038 if (!*ppos)
2039 return false;
2040
2041 switch (sysctl_writes_strict) {
2042 case SYSCTL_WRITES_STRICT:
2043 return true;
2044 case SYSCTL_WRITES_WARN:
2045 warn_sysctl_write(table);
2046 return false;
2047 default:
2048 return false;
2049 }
2050 }
2051
2052 /**
2053 * proc_dostring - read a string sysctl
2054 * @table: the sysctl table
2055 * @write: %TRUE if this is a write to the sysctl file
2056 * @buffer: the user buffer
2057 * @lenp: the size of the user buffer
2058 * @ppos: file position
2059 *
2060 * Reads/writes a string from/to the user buffer. If the kernel
2061 * buffer provided is not large enough to hold the string, the
2062 * string is truncated. The copied string is %NULL-terminated.
2063 * If the string is being read by the user process, it is copied
2064 * and a newline '\n' is added. It is truncated if the buffer is
2065 * not large enough.
2066 *
2067 * Returns 0 on success.
2068 */
2069 int proc_dostring(struct ctl_table *table, int write,
2070 void __user *buffer, size_t *lenp, loff_t *ppos)
2071 {
2072 if (write)
2073 proc_first_pos_non_zero_ignore(ppos, table);
2074
2075 return _proc_do_string((char *)(table->data), table->maxlen, write,
2076 (char __user *)buffer, lenp, ppos);
2077 }
2078
2079 static size_t proc_skip_spaces(char **buf)
2080 {
2081 size_t ret;
2082 char *tmp = skip_spaces(*buf);
2083 ret = tmp - *buf;
2084 *buf = tmp;
2085 return ret;
2086 }
2087
2088 static void proc_skip_char(char **buf, size_t *size, const char v)
2089 {
2090 while (*size) {
2091 if (**buf != v)
2092 break;
2093 (*size)--;
2094 (*buf)++;
2095 }
2096 }
2097
2098 #define TMPBUFLEN 22
2099 /**
2100 * proc_get_long - reads an ASCII formatted integer from a user buffer
2101 *
2102 * @buf: a kernel buffer
2103 * @size: size of the kernel buffer
2104 * @val: this is where the number will be stored
2105 * @neg: set to %TRUE if number is negative
2106 * @perm_tr: a vector which contains the allowed trailers
2107 * @perm_tr_len: size of the perm_tr vector
2108 * @tr: pointer to store the trailer character
2109 *
2110 * In case of success %0 is returned and @buf and @size are updated with
2111 * the amount of bytes read. If @tr is non-NULL and a trailing
2112 * character exists (size is non-zero after returning from this
2113 * function), @tr is updated with the trailing character.
2114 */
2115 static int proc_get_long(char **buf, size_t *size,
2116 unsigned long *val, bool *neg,
2117 const char *perm_tr, unsigned perm_tr_len, char *tr)
2118 {
2119 int len;
2120 char *p, tmp[TMPBUFLEN];
2121
2122 if (!*size)
2123 return -EINVAL;
2124
2125 len = *size;
2126 if (len > TMPBUFLEN - 1)
2127 len = TMPBUFLEN - 1;
2128
2129 memcpy(tmp, *buf, len);
2130
2131 tmp[len] = 0;
2132 p = tmp;
2133 if (*p == '-' && *size > 1) {
2134 *neg = true;
2135 p++;
2136 } else
2137 *neg = false;
2138 if (!isdigit(*p))
2139 return -EINVAL;
2140
2141 *val = simple_strtoul(p, &p, 0);
2142
2143 len = p - tmp;
2144
2145 /* We don't know if the next char is whitespace thus we may accept
2146 * invalid integers (e.g. 1234...a) or two integers instead of one
2147 * (e.g. 123...1). So lets not allow such large numbers. */
2148 if (len == TMPBUFLEN - 1)
2149 return -EINVAL;
2150
2151 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2152 return -EINVAL;
2153
2154 if (tr && (len < *size))
2155 *tr = *p;
2156
2157 *buf += len;
2158 *size -= len;
2159
2160 return 0;
2161 }
2162
2163 /**
2164 * proc_put_long - converts an integer to a decimal ASCII formatted string
2165 *
2166 * @buf: the user buffer
2167 * @size: the size of the user buffer
2168 * @val: the integer to be converted
2169 * @neg: sign of the number, %TRUE for negative
2170 *
2171 * In case of success %0 is returned and @buf and @size are updated with
2172 * the amount of bytes written.
2173 */
2174 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2175 bool neg)
2176 {
2177 int len;
2178 char tmp[TMPBUFLEN], *p = tmp;
2179
2180 sprintf(p, "%s%lu", neg ? "-" : "", val);
2181 len = strlen(tmp);
2182 if (len > *size)
2183 len = *size;
2184 if (copy_to_user(*buf, tmp, len))
2185 return -EFAULT;
2186 *size -= len;
2187 *buf += len;
2188 return 0;
2189 }
2190 #undef TMPBUFLEN
2191
2192 static int proc_put_char(void __user **buf, size_t *size, char c)
2193 {
2194 if (*size) {
2195 char __user **buffer = (char __user **)buf;
2196 if (put_user(c, *buffer))
2197 return -EFAULT;
2198 (*size)--, (*buffer)++;
2199 *buf = *buffer;
2200 }
2201 return 0;
2202 }
2203
2204 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2205 int *valp,
2206 int write, void *data)
2207 {
2208 if (write) {
2209 if (*negp) {
2210 if (*lvalp > (unsigned long) INT_MAX + 1)
2211 return -EINVAL;
2212 *valp = -*lvalp;
2213 } else {
2214 if (*lvalp > (unsigned long) INT_MAX)
2215 return -EINVAL;
2216 *valp = *lvalp;
2217 }
2218 } else {
2219 int val = *valp;
2220 if (val < 0) {
2221 *negp = true;
2222 *lvalp = -(unsigned long)val;
2223 } else {
2224 *negp = false;
2225 *lvalp = (unsigned long)val;
2226 }
2227 }
2228 return 0;
2229 }
2230
2231 static int do_proc_douintvec_conv(unsigned long *lvalp,
2232 unsigned int *valp,
2233 int write, void *data)
2234 {
2235 if (write) {
2236 if (*lvalp > UINT_MAX)
2237 return -EINVAL;
2238 if (*lvalp > UINT_MAX)
2239 return -EINVAL;
2240 *valp = *lvalp;
2241 } else {
2242 unsigned int val = *valp;
2243 *lvalp = (unsigned long)val;
2244 }
2245 return 0;
2246 }
2247
2248 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2249
2250 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2251 int write, void __user *buffer,
2252 size_t *lenp, loff_t *ppos,
2253 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2254 int write, void *data),
2255 void *data)
2256 {
2257 int *i, vleft, first = 1, err = 0;
2258 size_t left;
2259 char *kbuf = NULL, *p;
2260
2261 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2262 *lenp = 0;
2263 return 0;
2264 }
2265
2266 i = (int *) tbl_data;
2267 vleft = table->maxlen / sizeof(*i);
2268 left = *lenp;
2269
2270 if (!conv)
2271 conv = do_proc_dointvec_conv;
2272
2273 if (write) {
2274 if (proc_first_pos_non_zero_ignore(ppos, table))
2275 goto out;
2276
2277 if (left > PAGE_SIZE - 1)
2278 left = PAGE_SIZE - 1;
2279 p = kbuf = memdup_user_nul(buffer, left);
2280 if (IS_ERR(kbuf))
2281 return PTR_ERR(kbuf);
2282 }
2283
2284 for (; left && vleft--; i++, first=0) {
2285 unsigned long lval;
2286 bool neg;
2287
2288 if (write) {
2289 left -= proc_skip_spaces(&p);
2290
2291 if (!left)
2292 break;
2293 err = proc_get_long(&p, &left, &lval, &neg,
2294 proc_wspace_sep,
2295 sizeof(proc_wspace_sep), NULL);
2296 if (err)
2297 break;
2298 if (conv(&neg, &lval, i, 1, data)) {
2299 err = -EINVAL;
2300 break;
2301 }
2302 } else {
2303 if (conv(&neg, &lval, i, 0, data)) {
2304 err = -EINVAL;
2305 break;
2306 }
2307 if (!first)
2308 err = proc_put_char(&buffer, &left, '\t');
2309 if (err)
2310 break;
2311 err = proc_put_long(&buffer, &left, lval, neg);
2312 if (err)
2313 break;
2314 }
2315 }
2316
2317 if (!write && !first && left && !err)
2318 err = proc_put_char(&buffer, &left, '\n');
2319 if (write && !err && left)
2320 left -= proc_skip_spaces(&p);
2321 if (write) {
2322 kfree(kbuf);
2323 if (first)
2324 return err ? : -EINVAL;
2325 }
2326 *lenp -= left;
2327 out:
2328 *ppos += *lenp;
2329 return err;
2330 }
2331
2332 static int do_proc_dointvec(struct ctl_table *table, int write,
2333 void __user *buffer, size_t *lenp, loff_t *ppos,
2334 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2335 int write, void *data),
2336 void *data)
2337 {
2338 return __do_proc_dointvec(table->data, table, write,
2339 buffer, lenp, ppos, conv, data);
2340 }
2341
2342 static int do_proc_douintvec_w(unsigned int *tbl_data,
2343 struct ctl_table *table,
2344 void __user *buffer,
2345 size_t *lenp, loff_t *ppos,
2346 int (*conv)(unsigned long *lvalp,
2347 unsigned int *valp,
2348 int write, void *data),
2349 void *data)
2350 {
2351 unsigned long lval;
2352 int err = 0;
2353 size_t left;
2354 bool neg;
2355 char *kbuf = NULL, *p;
2356
2357 left = *lenp;
2358
2359 if (proc_first_pos_non_zero_ignore(ppos, table))
2360 goto bail_early;
2361
2362 if (left > PAGE_SIZE - 1)
2363 left = PAGE_SIZE - 1;
2364
2365 p = kbuf = memdup_user_nul(buffer, left);
2366 if (IS_ERR(kbuf))
2367 return -EINVAL;
2368
2369 left -= proc_skip_spaces(&p);
2370 if (!left) {
2371 err = -EINVAL;
2372 goto out_free;
2373 }
2374
2375 err = proc_get_long(&p, &left, &lval, &neg,
2376 proc_wspace_sep,
2377 sizeof(proc_wspace_sep), NULL);
2378 if (err || neg) {
2379 err = -EINVAL;
2380 goto out_free;
2381 }
2382
2383 if (conv(&lval, tbl_data, 1, data)) {
2384 err = -EINVAL;
2385 goto out_free;
2386 }
2387
2388 if (!err && left)
2389 left -= proc_skip_spaces(&p);
2390
2391 out_free:
2392 kfree(kbuf);
2393 if (err)
2394 return -EINVAL;
2395
2396 return 0;
2397
2398 /* This is in keeping with old __do_proc_dointvec() */
2399 bail_early:
2400 *ppos += *lenp;
2401 return err;
2402 }
2403
2404 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2405 size_t *lenp, loff_t *ppos,
2406 int (*conv)(unsigned long *lvalp,
2407 unsigned int *valp,
2408 int write, void *data),
2409 void *data)
2410 {
2411 unsigned long lval;
2412 int err = 0;
2413 size_t left;
2414
2415 left = *lenp;
2416
2417 if (conv(&lval, tbl_data, 0, data)) {
2418 err = -EINVAL;
2419 goto out;
2420 }
2421
2422 err = proc_put_long(&buffer, &left, lval, false);
2423 if (err || !left)
2424 goto out;
2425
2426 err = proc_put_char(&buffer, &left, '\n');
2427
2428 out:
2429 *lenp -= left;
2430 *ppos += *lenp;
2431
2432 return err;
2433 }
2434
2435 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2436 int write, void __user *buffer,
2437 size_t *lenp, loff_t *ppos,
2438 int (*conv)(unsigned long *lvalp,
2439 unsigned int *valp,
2440 int write, void *data),
2441 void *data)
2442 {
2443 unsigned int *i, vleft;
2444
2445 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2446 *lenp = 0;
2447 return 0;
2448 }
2449
2450 i = (unsigned int *) tbl_data;
2451 vleft = table->maxlen / sizeof(*i);
2452
2453 /*
2454 * Arrays are not supported, keep this simple. *Do not* add
2455 * support for them.
2456 */
2457 if (vleft != 1) {
2458 *lenp = 0;
2459 return -EINVAL;
2460 }
2461
2462 if (!conv)
2463 conv = do_proc_douintvec_conv;
2464
2465 if (write)
2466 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2467 conv, data);
2468 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2469 }
2470
2471 static int do_proc_douintvec(struct ctl_table *table, int write,
2472 void __user *buffer, size_t *lenp, loff_t *ppos,
2473 int (*conv)(unsigned long *lvalp,
2474 unsigned int *valp,
2475 int write, void *data),
2476 void *data)
2477 {
2478 return __do_proc_douintvec(table->data, table, write,
2479 buffer, lenp, ppos, conv, data);
2480 }
2481
2482 /**
2483 * proc_dointvec - read a vector of integers
2484 * @table: the sysctl table
2485 * @write: %TRUE if this is a write to the sysctl file
2486 * @buffer: the user buffer
2487 * @lenp: the size of the user buffer
2488 * @ppos: file position
2489 *
2490 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2491 * values from/to the user buffer, treated as an ASCII string.
2492 *
2493 * Returns 0 on success.
2494 */
2495 int proc_dointvec(struct ctl_table *table, int write,
2496 void __user *buffer, size_t *lenp, loff_t *ppos)
2497 {
2498 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2499 }
2500
2501 /**
2502 * proc_douintvec - read a vector of unsigned integers
2503 * @table: the sysctl table
2504 * @write: %TRUE if this is a write to the sysctl file
2505 * @buffer: the user buffer
2506 * @lenp: the size of the user buffer
2507 * @ppos: file position
2508 *
2509 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2510 * values from/to the user buffer, treated as an ASCII string.
2511 *
2512 * Returns 0 on success.
2513 */
2514 int proc_douintvec(struct ctl_table *table, int write,
2515 void __user *buffer, size_t *lenp, loff_t *ppos)
2516 {
2517 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2518 do_proc_douintvec_conv, NULL);
2519 }
2520
2521 /*
2522 * Taint values can only be increased
2523 * This means we can safely use a temporary.
2524 */
2525 static int proc_taint(struct ctl_table *table, int write,
2526 void __user *buffer, size_t *lenp, loff_t *ppos)
2527 {
2528 struct ctl_table t;
2529 unsigned long tmptaint = get_taint();
2530 int err;
2531
2532 if (write && !capable(CAP_SYS_ADMIN))
2533 return -EPERM;
2534
2535 t = *table;
2536 t.data = &tmptaint;
2537 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2538 if (err < 0)
2539 return err;
2540
2541 if (write) {
2542 /*
2543 * Poor man's atomic or. Not worth adding a primitive
2544 * to everyone's atomic.h for this
2545 */
2546 int i;
2547 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2548 if ((tmptaint >> i) & 1)
2549 add_taint(i, LOCKDEP_STILL_OK);
2550 }
2551 }
2552
2553 return err;
2554 }
2555
2556 #ifdef CONFIG_PRINTK
2557 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2558 void __user *buffer, size_t *lenp, loff_t *ppos)
2559 {
2560 if (write && !capable(CAP_SYS_ADMIN))
2561 return -EPERM;
2562
2563 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2564 }
2565 #endif
2566
2567 struct do_proc_dointvec_minmax_conv_param {
2568 int *min;
2569 int *max;
2570 };
2571
2572 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2573 int *valp,
2574 int write, void *data)
2575 {
2576 struct do_proc_dointvec_minmax_conv_param *param = data;
2577 if (write) {
2578 int val = *negp ? -*lvalp : *lvalp;
2579 if ((param->min && *param->min > val) ||
2580 (param->max && *param->max < val))
2581 return -EINVAL;
2582 *valp = val;
2583 } else {
2584 int val = *valp;
2585 if (val < 0) {
2586 *negp = true;
2587 *lvalp = -(unsigned long)val;
2588 } else {
2589 *negp = false;
2590 *lvalp = (unsigned long)val;
2591 }
2592 }
2593 return 0;
2594 }
2595
2596 /**
2597 * proc_dointvec_minmax - read a vector of integers with min/max values
2598 * @table: the sysctl table
2599 * @write: %TRUE if this is a write to the sysctl file
2600 * @buffer: the user buffer
2601 * @lenp: the size of the user buffer
2602 * @ppos: file position
2603 *
2604 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2605 * values from/to the user buffer, treated as an ASCII string.
2606 *
2607 * This routine will ensure the values are within the range specified by
2608 * table->extra1 (min) and table->extra2 (max).
2609 *
2610 * Returns 0 on success.
2611 */
2612 int proc_dointvec_minmax(struct ctl_table *table, int write,
2613 void __user *buffer, size_t *lenp, loff_t *ppos)
2614 {
2615 struct do_proc_dointvec_minmax_conv_param param = {
2616 .min = (int *) table->extra1,
2617 .max = (int *) table->extra2,
2618 };
2619 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2620 do_proc_dointvec_minmax_conv, &param);
2621 }
2622
2623 #ifdef CONFIG_X86
2624 int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
2625 void __user *buffer, size_t *lenp, loff_t *ppos)
2626 {
2627 int ret;
2628 unsigned int cpu;
2629
2630 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2631 pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
2632 pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
2633 mutex_lock(&spec_ctrl_mutex);
2634 if (sysctl_ibrs_enabled == 0) {
2635 /* always set IBRS off */
2636 set_ibrs_disabled();
2637 if (ibrs_supported) {
2638 for_each_online_cpu(cpu)
2639 wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
2640 }
2641 } else if (sysctl_ibrs_enabled == 2) {
2642 /* always set IBRS on, even in user space */
2643 clear_ibrs_disabled();
2644 if (ibrs_supported) {
2645 for_each_online_cpu(cpu)
2646 wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | SPEC_CTRL_IBRS);
2647 } else {
2648 sysctl_ibrs_enabled = 0;
2649 }
2650 } else if (sysctl_ibrs_enabled == 1) {
2651 /* use IBRS in kernel */
2652 clear_ibrs_disabled();
2653 if (!ibrs_inuse)
2654 /* platform don't support ibrs */
2655 sysctl_ibrs_enabled = 0;
2656 }
2657 mutex_unlock(&spec_ctrl_mutex);
2658 pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
2659 return ret;
2660 }
2661
2662 int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
2663 void __user *buffer, size_t *lenp, loff_t *ppos)
2664 {
2665 int ret;
2666
2667 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2668 pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
2669 pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
2670 mutex_lock(&spec_ctrl_mutex);
2671 if (sysctl_ibpb_enabled == 0)
2672 set_ibpb_disabled();
2673 else if (sysctl_ibpb_enabled == 1) {
2674 clear_ibpb_disabled();
2675 if (!ibpb_inuse)
2676 /* platform don't support ibpb */
2677 sysctl_ibpb_enabled = 0;
2678 }
2679 mutex_unlock(&spec_ctrl_mutex);
2680 pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
2681 return ret;
2682 }
2683 #endif
2684
2685
2686 struct do_proc_douintvec_minmax_conv_param {
2687 unsigned int *min;
2688 unsigned int *max;
2689 };
2690
2691 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2692 unsigned int *valp,
2693 int write, void *data)
2694 {
2695 struct do_proc_douintvec_minmax_conv_param *param = data;
2696
2697 if (write) {
2698 unsigned int val = *lvalp;
2699
2700 if ((param->min && *param->min > val) ||
2701 (param->max && *param->max < val))
2702 return -ERANGE;
2703
2704 if (*lvalp > UINT_MAX)
2705 return -EINVAL;
2706 *valp = val;
2707 } else {
2708 unsigned int val = *valp;
2709 *lvalp = (unsigned long) val;
2710 }
2711
2712 return 0;
2713 }
2714
2715 /**
2716 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2717 * @table: the sysctl table
2718 * @write: %TRUE if this is a write to the sysctl file
2719 * @buffer: the user buffer
2720 * @lenp: the size of the user buffer
2721 * @ppos: file position
2722 *
2723 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2724 * values from/to the user buffer, treated as an ASCII string. Negative
2725 * strings are not allowed.
2726 *
2727 * This routine will ensure the values are within the range specified by
2728 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2729 * check for UINT_MAX to avoid having to support wrap around uses from
2730 * userspace.
2731 *
2732 * Returns 0 on success.
2733 */
2734 int proc_douintvec_minmax(struct ctl_table *table, int write,
2735 void __user *buffer, size_t *lenp, loff_t *ppos)
2736 {
2737 struct do_proc_douintvec_minmax_conv_param param = {
2738 .min = (unsigned int *) table->extra1,
2739 .max = (unsigned int *) table->extra2,
2740 };
2741 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2742 do_proc_douintvec_minmax_conv, &param);
2743 }
2744
2745 static void validate_coredump_safety(void)
2746 {
2747 #ifdef CONFIG_COREDUMP
2748 if (suid_dumpable == SUID_DUMP_ROOT &&
2749 core_pattern[0] != '/' && core_pattern[0] != '|') {
2750 printk(KERN_WARNING
2751 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2752 "Pipe handler or fully qualified core dump path required.\n"
2753 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2754 );
2755 }
2756 #endif
2757 }
2758
2759 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2760 void __user *buffer, size_t *lenp, loff_t *ppos)
2761 {
2762 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2763 if (!error)
2764 validate_coredump_safety();
2765 return error;
2766 }
2767
2768 #ifdef CONFIG_COREDUMP
2769 static int proc_dostring_coredump(struct ctl_table *table, int write,
2770 void __user *buffer, size_t *lenp, loff_t *ppos)
2771 {
2772 int error = proc_dostring(table, write, buffer, lenp, ppos);
2773 if (!error)
2774 validate_coredump_safety();
2775 return error;
2776 }
2777 #endif
2778
2779 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2780 void __user *buffer,
2781 size_t *lenp, loff_t *ppos,
2782 unsigned long convmul,
2783 unsigned long convdiv)
2784 {
2785 unsigned long *i, *min, *max;
2786 int vleft, first = 1, err = 0;
2787 size_t left;
2788 char *kbuf = NULL, *p;
2789
2790 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2791 *lenp = 0;
2792 return 0;
2793 }
2794
2795 i = (unsigned long *) data;
2796 min = (unsigned long *) table->extra1;
2797 max = (unsigned long *) table->extra2;
2798 vleft = table->maxlen / sizeof(unsigned long);
2799 left = *lenp;
2800
2801 if (write) {
2802 if (proc_first_pos_non_zero_ignore(ppos, table))
2803 goto out;
2804
2805 if (left > PAGE_SIZE - 1)
2806 left = PAGE_SIZE - 1;
2807 p = kbuf = memdup_user_nul(buffer, left);
2808 if (IS_ERR(kbuf))
2809 return PTR_ERR(kbuf);
2810 }
2811
2812 for (; left && vleft--; i++, first = 0) {
2813 unsigned long val;
2814
2815 if (write) {
2816 bool neg;
2817
2818 left -= proc_skip_spaces(&p);
2819
2820 err = proc_get_long(&p, &left, &val, &neg,
2821 proc_wspace_sep,
2822 sizeof(proc_wspace_sep), NULL);
2823 if (err)
2824 break;
2825 if (neg)
2826 continue;
2827 val = convmul * val / convdiv;
2828 if ((min && val < *min) || (max && val > *max))
2829 continue;
2830 *i = val;
2831 } else {
2832 val = convdiv * (*i) / convmul;
2833 if (!first) {
2834 err = proc_put_char(&buffer, &left, '\t');
2835 if (err)
2836 break;
2837 }
2838 err = proc_put_long(&buffer, &left, val, false);
2839 if (err)
2840 break;
2841 }
2842 }
2843
2844 if (!write && !first && left && !err)
2845 err = proc_put_char(&buffer, &left, '\n');
2846 if (write && !err)
2847 left -= proc_skip_spaces(&p);
2848 if (write) {
2849 kfree(kbuf);
2850 if (first)
2851 return err ? : -EINVAL;
2852 }
2853 *lenp -= left;
2854 out:
2855 *ppos += *lenp;
2856 return err;
2857 }
2858
2859 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2860 void __user *buffer,
2861 size_t *lenp, loff_t *ppos,
2862 unsigned long convmul,
2863 unsigned long convdiv)
2864 {
2865 return __do_proc_doulongvec_minmax(table->data, table, write,
2866 buffer, lenp, ppos, convmul, convdiv);
2867 }
2868
2869 /**
2870 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2871 * @table: the sysctl table
2872 * @write: %TRUE if this is a write to the sysctl file
2873 * @buffer: the user buffer
2874 * @lenp: the size of the user buffer
2875 * @ppos: file position
2876 *
2877 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2878 * values from/to the user buffer, treated as an ASCII string.
2879 *
2880 * This routine will ensure the values are within the range specified by
2881 * table->extra1 (min) and table->extra2 (max).
2882 *
2883 * Returns 0 on success.
2884 */
2885 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2886 void __user *buffer, size_t *lenp, loff_t *ppos)
2887 {
2888 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2889 }
2890
2891 /**
2892 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2893 * @table: the sysctl table
2894 * @write: %TRUE if this is a write to the sysctl file
2895 * @buffer: the user buffer
2896 * @lenp: the size of the user buffer
2897 * @ppos: file position
2898 *
2899 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2900 * values from/to the user buffer, treated as an ASCII string. The values
2901 * are treated as milliseconds, and converted to jiffies when they are stored.
2902 *
2903 * This routine will ensure the values are within the range specified by
2904 * table->extra1 (min) and table->extra2 (max).
2905 *
2906 * Returns 0 on success.
2907 */
2908 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2909 void __user *buffer,
2910 size_t *lenp, loff_t *ppos)
2911 {
2912 return do_proc_doulongvec_minmax(table, write, buffer,
2913 lenp, ppos, HZ, 1000l);
2914 }
2915
2916
2917 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2918 int *valp,
2919 int write, void *data)
2920 {
2921 if (write) {
2922 if (*lvalp > INT_MAX / HZ)
2923 return 1;
2924 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2925 } else {
2926 int val = *valp;
2927 unsigned long lval;
2928 if (val < 0) {
2929 *negp = true;
2930 lval = -(unsigned long)val;
2931 } else {
2932 *negp = false;
2933 lval = (unsigned long)val;
2934 }
2935 *lvalp = lval / HZ;
2936 }
2937 return 0;
2938 }
2939
2940 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2941 int *valp,
2942 int write, void *data)
2943 {
2944 if (write) {
2945 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2946 return 1;
2947 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2948 } else {
2949 int val = *valp;
2950 unsigned long lval;
2951 if (val < 0) {
2952 *negp = true;
2953 lval = -(unsigned long)val;
2954 } else {
2955 *negp = false;
2956 lval = (unsigned long)val;
2957 }
2958 *lvalp = jiffies_to_clock_t(lval);
2959 }
2960 return 0;
2961 }
2962
2963 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2964 int *valp,
2965 int write, void *data)
2966 {
2967 if (write) {
2968 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2969
2970 if (jif > INT_MAX)
2971 return 1;
2972 *valp = (int)jif;
2973 } else {
2974 int val = *valp;
2975 unsigned long lval;
2976 if (val < 0) {
2977 *negp = true;
2978 lval = -(unsigned long)val;
2979 } else {
2980 *negp = false;
2981 lval = (unsigned long)val;
2982 }
2983 *lvalp = jiffies_to_msecs(lval);
2984 }
2985 return 0;
2986 }
2987
2988 /**
2989 * proc_dointvec_jiffies - read a vector of integers as seconds
2990 * @table: the sysctl table
2991 * @write: %TRUE if this is a write to the sysctl file
2992 * @buffer: the user buffer
2993 * @lenp: the size of the user buffer
2994 * @ppos: file position
2995 *
2996 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2997 * values from/to the user buffer, treated as an ASCII string.
2998 * The values read are assumed to be in seconds, and are converted into
2999 * jiffies.
3000 *
3001 * Returns 0 on success.
3002 */
3003 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3004 void __user *buffer, size_t *lenp, loff_t *ppos)
3005 {
3006 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3007 do_proc_dointvec_jiffies_conv,NULL);
3008 }
3009
3010 /**
3011 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3012 * @table: the sysctl table
3013 * @write: %TRUE if this is a write to the sysctl file
3014 * @buffer: the user buffer
3015 * @lenp: the size of the user buffer
3016 * @ppos: pointer to the file position
3017 *
3018 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3019 * values from/to the user buffer, treated as an ASCII string.
3020 * The values read are assumed to be in 1/USER_HZ seconds, and
3021 * are converted into jiffies.
3022 *
3023 * Returns 0 on success.
3024 */
3025 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3026 void __user *buffer, size_t *lenp, loff_t *ppos)
3027 {
3028 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3029 do_proc_dointvec_userhz_jiffies_conv,NULL);
3030 }
3031
3032 /**
3033 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3034 * @table: the sysctl table
3035 * @write: %TRUE if this is a write to the sysctl file
3036 * @buffer: the user buffer
3037 * @lenp: the size of the user buffer
3038 * @ppos: file position
3039 * @ppos: the current position in the file
3040 *
3041 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3042 * values from/to the user buffer, treated as an ASCII string.
3043 * The values read are assumed to be in 1/1000 seconds, and
3044 * are converted into jiffies.
3045 *
3046 * Returns 0 on success.
3047 */
3048 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3049 void __user *buffer, size_t *lenp, loff_t *ppos)
3050 {
3051 return do_proc_dointvec(table, write, buffer, lenp, ppos,
3052 do_proc_dointvec_ms_jiffies_conv, NULL);
3053 }
3054
3055 static int proc_do_cad_pid(struct ctl_table *table, int write,
3056 void __user *buffer, size_t *lenp, loff_t *ppos)
3057 {
3058 struct pid *new_pid;
3059 pid_t tmp;
3060 int r;
3061
3062 tmp = pid_vnr(cad_pid);
3063
3064 r = __do_proc_dointvec(&tmp, table, write, buffer,
3065 lenp, ppos, NULL, NULL);
3066 if (r || !write)
3067 return r;
3068
3069 new_pid = find_get_pid(tmp);
3070 if (!new_pid)
3071 return -ESRCH;
3072
3073 put_pid(xchg(&cad_pid, new_pid));
3074 return 0;
3075 }
3076
3077 /**
3078 * proc_do_large_bitmap - read/write from/to a large bitmap
3079 * @table: the sysctl table
3080 * @write: %TRUE if this is a write to the sysctl file
3081 * @buffer: the user buffer
3082 * @lenp: the size of the user buffer
3083 * @ppos: file position
3084 *
3085 * The bitmap is stored at table->data and the bitmap length (in bits)
3086 * in table->maxlen.
3087 *
3088 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3089 * large bitmaps may be represented in a compact manner. Writing into
3090 * the file will clear the bitmap then update it with the given input.
3091 *
3092 * Returns 0 on success.
3093 */
3094 int proc_do_large_bitmap(struct ctl_table *table, int write,
3095 void __user *buffer, size_t *lenp, loff_t *ppos)
3096 {
3097 int err = 0;
3098 bool first = 1;
3099 size_t left = *lenp;
3100 unsigned long bitmap_len = table->maxlen;
3101 unsigned long *bitmap = *(unsigned long **) table->data;
3102 unsigned long *tmp_bitmap = NULL;
3103 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3104
3105 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3106 *lenp = 0;
3107 return 0;
3108 }
3109
3110 if (write) {
3111 char *kbuf, *p;
3112
3113 if (left > PAGE_SIZE - 1)
3114 left = PAGE_SIZE - 1;
3115
3116 p = kbuf = memdup_user_nul(buffer, left);
3117 if (IS_ERR(kbuf))
3118 return PTR_ERR(kbuf);
3119
3120 tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
3121 GFP_KERNEL);
3122 if (!tmp_bitmap) {
3123 kfree(kbuf);
3124 return -ENOMEM;
3125 }
3126 proc_skip_char(&p, &left, '\n');
3127 while (!err && left) {
3128 unsigned long val_a, val_b;
3129 bool neg;
3130
3131 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3132 sizeof(tr_a), &c);
3133 if (err)
3134 break;
3135 if (val_a >= bitmap_len || neg) {
3136 err = -EINVAL;
3137 break;
3138 }
3139
3140 val_b = val_a;
3141 if (left) {
3142 p++;
3143 left--;
3144 }
3145
3146 if (c == '-') {
3147 err = proc_get_long(&p, &left, &val_b,
3148 &neg, tr_b, sizeof(tr_b),
3149 &c);
3150 if (err)
3151 break;
3152 if (val_b >= bitmap_len || neg ||
3153 val_a > val_b) {
3154 err = -EINVAL;
3155 break;
3156 }
3157 if (left) {
3158 p++;
3159 left--;
3160 }
3161 }
3162
3163 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3164 first = 0;
3165 proc_skip_char(&p, &left, '\n');
3166 }
3167 kfree(kbuf);
3168 } else {
3169 unsigned long bit_a, bit_b = 0;
3170
3171 while (left) {
3172 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3173 if (bit_a >= bitmap_len)
3174 break;
3175 bit_b = find_next_zero_bit(bitmap, bitmap_len,
3176 bit_a + 1) - 1;
3177
3178 if (!first) {
3179 err = proc_put_char(&buffer, &left, ',');
3180 if (err)
3181 break;
3182 }
3183 err = proc_put_long(&buffer, &left, bit_a, false);
3184 if (err)
3185 break;
3186 if (bit_a != bit_b) {
3187 err = proc_put_char(&buffer, &left, '-');
3188 if (err)
3189 break;
3190 err = proc_put_long(&buffer, &left, bit_b, false);
3191 if (err)
3192 break;
3193 }
3194
3195 first = 0; bit_b++;
3196 }
3197 if (!err)
3198 err = proc_put_char(&buffer, &left, '\n');
3199 }
3200
3201 if (!err) {
3202 if (write) {
3203 if (*ppos)
3204 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3205 else
3206 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3207 }
3208 kfree(tmp_bitmap);
3209 *lenp -= left;
3210 *ppos += *lenp;
3211 return 0;
3212 } else {
3213 kfree(tmp_bitmap);
3214 return err;
3215 }
3216 }
3217
3218 #else /* CONFIG_PROC_SYSCTL */
3219
3220 int proc_dostring(struct ctl_table *table, int write,
3221 void __user *buffer, size_t *lenp, loff_t *ppos)
3222 {
3223 return -ENOSYS;
3224 }
3225
3226 int proc_dointvec(struct ctl_table *table, int write,
3227 void __user *buffer, size_t *lenp, loff_t *ppos)
3228 {
3229 return -ENOSYS;
3230 }
3231
3232 int proc_douintvec(struct ctl_table *table, int write,
3233 void __user *buffer, size_t *lenp, loff_t *ppos)
3234 {
3235 return -ENOSYS;
3236 }
3237
3238 int proc_dointvec_minmax(struct ctl_table *table, int write,
3239 void __user *buffer, size_t *lenp, loff_t *ppos)
3240 {
3241 return -ENOSYS;
3242 }
3243
3244 int proc_douintvec_minmax(struct ctl_table *table, int write,
3245 void __user *buffer, size_t *lenp, loff_t *ppos)
3246 {
3247 return -ENOSYS;
3248 }
3249
3250 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3251 void __user *buffer, size_t *lenp, loff_t *ppos)
3252 {
3253 return -ENOSYS;
3254 }
3255
3256 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3257 void __user *buffer, size_t *lenp, loff_t *ppos)
3258 {
3259 return -ENOSYS;
3260 }
3261
3262 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3263 void __user *buffer, size_t *lenp, loff_t *ppos)
3264 {
3265 return -ENOSYS;
3266 }
3267
3268 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3269 void __user *buffer, size_t *lenp, loff_t *ppos)
3270 {
3271 return -ENOSYS;
3272 }
3273
3274 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3275 void __user *buffer,
3276 size_t *lenp, loff_t *ppos)
3277 {
3278 return -ENOSYS;
3279 }
3280
3281
3282 #endif /* CONFIG_PROC_SYSCTL */
3283
3284 /*
3285 * No sense putting this after each symbol definition, twice,
3286 * exception granted :-)
3287 */
3288 EXPORT_SYMBOL(proc_dointvec);
3289 EXPORT_SYMBOL(proc_douintvec);
3290 EXPORT_SYMBOL(proc_dointvec_jiffies);
3291 EXPORT_SYMBOL(proc_dointvec_minmax);
3292 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3293 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3294 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3295 EXPORT_SYMBOL(proc_dostring);
3296 EXPORT_SYMBOL(proc_doulongvec_minmax);
3297 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);