]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame_incremental - kernel/sysctl.c
UBUNTU: Ubuntu-5.3.0-29.31
[mirror_ubuntu-eoan-kernel.git] / kernel / sysctl.c
... / ...
CommitLineData
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
22#include <linux/module.h>
23#include <linux/aio.h>
24#include <linux/mm.h>
25#include <linux/swap.h>
26#include <linux/slab.h>
27#include <linux/sysctl.h>
28#include <linux/bitmap.h>
29#include <linux/signal.h>
30#include <linux/printk.h>
31#include <linux/proc_fs.h>
32#include <linux/security.h>
33#include <linux/ctype.h>
34#include <linux/kmemleak.h>
35#include <linux/fs.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/kobject.h>
39#include <linux/net.h>
40#include <linux/sysrq.h>
41#include <linux/highuid.h>
42#include <linux/writeback.h>
43#include <linux/ratelimit.h>
44#include <linux/compaction.h>
45#include <linux/hugetlb.h>
46#include <linux/initrd.h>
47#include <linux/key.h>
48#include <linux/times.h>
49#include <linux/limits.h>
50#include <linux/dcache.h>
51#include <linux/dnotify.h>
52#include <linux/syscalls.h>
53#include <linux/vmstat.h>
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
56#include <linux/reboot.h>
57#include <linux/ftrace.h>
58#include <linux/perf_event.h>
59#include <linux/kprobes.h>
60#include <linux/pipe_fs_i.h>
61#include <linux/oom.h>
62#include <linux/kmod.h>
63#include <linux/capability.h>
64#include <linux/binfmts.h>
65#include <linux/sched/sysctl.h>
66#include <linux/sched/coredump.h>
67#include <linux/kexec.h>
68#include <linux/bpf.h>
69#include <linux/mount.h>
70#include <linux/userfaultfd_k.h>
71
72#include "../lib/kstrtox.h"
73
74#include <linux/uaccess.h>
75#include <asm/processor.h>
76
77#ifdef CONFIG_X86
78#include <asm/nmi.h>
79#include <asm/stacktrace.h>
80#include <asm/io.h>
81#endif
82#ifdef CONFIG_SPARC
83#include <asm/setup.h>
84#endif
85#ifdef CONFIG_BSD_PROCESS_ACCT
86#include <linux/acct.h>
87#endif
88#ifdef CONFIG_RT_MUTEXES
89#include <linux/rtmutex.h>
90#endif
91#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
92#include <linux/lockdep.h>
93#endif
94#ifdef CONFIG_CHR_DEV_SG
95#include <scsi/sg.h>
96#endif
97#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
98#include <linux/stackleak.h>
99#endif
100#ifdef CONFIG_LOCKUP_DETECTOR
101#include <linux/nmi.h>
102#endif
103
104#if defined(CONFIG_SYSCTL)
105
106/* External variables not in a header file. */
107extern int suid_dumpable;
108#ifdef CONFIG_COREDUMP
109extern int core_uses_pid;
110extern char core_pattern[];
111extern unsigned int core_pipe_limit;
112#endif
113#ifdef CONFIG_USER_NS
114extern int unprivileged_userns_clone;
115#endif
116extern int pid_max;
117extern int pid_max_min, pid_max_max;
118extern int percpu_pagelist_fraction;
119extern int latencytop_enabled;
120extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
121#ifndef CONFIG_MMU
122extern int sysctl_nr_trim_pages;
123#endif
124
125/* Constants used for minimum and maximum */
126#ifdef CONFIG_LOCKUP_DETECTOR
127static int sixty = 60;
128#endif
129
130static int __maybe_unused neg_one = -1;
131static int __maybe_unused two = 2;
132static int __maybe_unused four = 4;
133static unsigned long zero_ul;
134static unsigned long one_ul = 1;
135static unsigned long long_max = LONG_MAX;
136static int one_hundred = 100;
137static int one_thousand = 1000;
138#ifdef CONFIG_PRINTK
139static int ten_thousand = 10000;
140#endif
141#ifdef CONFIG_PERF_EVENTS
142static int six_hundred_forty_kb = 640 * 1024;
143#endif
144
145/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
146static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
147
148/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
149static int maxolduid = 65535;
150static int minolduid;
151
152static int ngroups_max = NGROUPS_MAX;
153static const int cap_last_cap = CAP_LAST_CAP;
154
155/*
156 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
157 * and hung_task_check_interval_secs
158 */
159#ifdef CONFIG_DETECT_HUNG_TASK
160static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
161#endif
162
163#ifdef CONFIG_INOTIFY_USER
164#include <linux/inotify.h>
165#endif
166#ifdef CONFIG_SPARC
167#endif
168
169#ifdef __hppa__
170extern int pwrsw_enabled;
171#endif
172
173#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
174extern int unaligned_enabled;
175#endif
176
177#ifdef CONFIG_IA64
178extern int unaligned_dump_stack;
179#endif
180
181#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
182extern int no_unaligned_warning;
183#endif
184
185#ifdef CONFIG_PROC_SYSCTL
186
187/**
188 * enum sysctl_writes_mode - supported sysctl write modes
189 *
190 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
191 * to be written, and multiple writes on the same sysctl file descriptor
192 * will rewrite the sysctl value, regardless of file position. No warning
193 * is issued when the initial position is not 0.
194 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
195 * not 0.
196 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
197 * file position 0 and the value must be fully contained in the buffer
198 * sent to the write syscall. If dealing with strings respect the file
199 * position, but restrict this to the max length of the buffer, anything
200 * passed the max length will be ignored. Multiple writes will append
201 * to the buffer.
202 *
203 * These write modes control how current file position affects the behavior of
204 * updating sysctl values through the proc interface on each write.
205 */
206enum sysctl_writes_mode {
207 SYSCTL_WRITES_LEGACY = -1,
208 SYSCTL_WRITES_WARN = 0,
209 SYSCTL_WRITES_STRICT = 1,
210};
211
212static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
213
214static int proc_do_cad_pid(struct ctl_table *table, int write,
215 void __user *buffer, size_t *lenp, loff_t *ppos);
216static int proc_taint(struct ctl_table *table, int write,
217 void __user *buffer, size_t *lenp, loff_t *ppos);
218#endif
219
220#ifdef CONFIG_PRINTK
221static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
222 void __user *buffer, size_t *lenp, loff_t *ppos);
223#endif
224
225static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
226 void __user *buffer, size_t *lenp, loff_t *ppos);
227#ifdef CONFIG_COREDUMP
228static int proc_dostring_coredump(struct ctl_table *table, int write,
229 void __user *buffer, size_t *lenp, loff_t *ppos);
230#endif
231static int proc_dopipe_max_size(struct ctl_table *table, int write,
232 void __user *buffer, size_t *lenp, loff_t *ppos);
233
234#ifdef CONFIG_MAGIC_SYSRQ
235/* Note: sysrq code uses its own private copy */
236static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
237
238static int sysrq_sysctl_handler(struct ctl_table *table, int write,
239 void __user *buffer, size_t *lenp,
240 loff_t *ppos)
241{
242 int error;
243
244 error = proc_dointvec(table, write, buffer, lenp, ppos);
245 if (error)
246 return error;
247
248 if (write)
249 sysrq_toggle_support(__sysrq_enabled);
250
251 return 0;
252}
253
254#endif
255
256static struct ctl_table kern_table[];
257static struct ctl_table vm_table[];
258static struct ctl_table fs_table[];
259static struct ctl_table debug_table[];
260static struct ctl_table dev_table[];
261extern struct ctl_table random_table[];
262#ifdef CONFIG_EPOLL
263extern struct ctl_table epoll_table[];
264#endif
265
266#ifdef CONFIG_FW_LOADER_USER_HELPER
267extern struct ctl_table firmware_config_table[];
268#endif
269
270#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
271int sysctl_legacy_va_layout;
272#endif
273
274/* The default sysctl tables: */
275
276static struct ctl_table sysctl_base_table[] = {
277 {
278 .procname = "kernel",
279 .mode = 0555,
280 .child = kern_table,
281 },
282 {
283 .procname = "vm",
284 .mode = 0555,
285 .child = vm_table,
286 },
287 {
288 .procname = "fs",
289 .mode = 0555,
290 .child = fs_table,
291 },
292 {
293 .procname = "debug",
294 .mode = 0555,
295 .child = debug_table,
296 },
297 {
298 .procname = "dev",
299 .mode = 0555,
300 .child = dev_table,
301 },
302 { }
303};
304
305#ifdef CONFIG_SCHED_DEBUG
306static int min_sched_granularity_ns = 100000; /* 100 usecs */
307static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
308static int min_wakeup_granularity_ns; /* 0 usecs */
309static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
310#ifdef CONFIG_SMP
311static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
312static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
313#endif /* CONFIG_SMP */
314#endif /* CONFIG_SCHED_DEBUG */
315
316#ifdef CONFIG_COMPACTION
317static int min_extfrag_threshold;
318static int max_extfrag_threshold = 1000;
319#endif
320
321static struct ctl_table kern_table[] = {
322 {
323 .procname = "sched_child_runs_first",
324 .data = &sysctl_sched_child_runs_first,
325 .maxlen = sizeof(unsigned int),
326 .mode = 0644,
327 .proc_handler = proc_dointvec,
328 },
329#ifdef CONFIG_SCHED_DEBUG
330 {
331 .procname = "sched_min_granularity_ns",
332 .data = &sysctl_sched_min_granularity,
333 .maxlen = sizeof(unsigned int),
334 .mode = 0644,
335 .proc_handler = sched_proc_update_handler,
336 .extra1 = &min_sched_granularity_ns,
337 .extra2 = &max_sched_granularity_ns,
338 },
339 {
340 .procname = "sched_latency_ns",
341 .data = &sysctl_sched_latency,
342 .maxlen = sizeof(unsigned int),
343 .mode = 0644,
344 .proc_handler = sched_proc_update_handler,
345 .extra1 = &min_sched_granularity_ns,
346 .extra2 = &max_sched_granularity_ns,
347 },
348 {
349 .procname = "sched_wakeup_granularity_ns",
350 .data = &sysctl_sched_wakeup_granularity,
351 .maxlen = sizeof(unsigned int),
352 .mode = 0644,
353 .proc_handler = sched_proc_update_handler,
354 .extra1 = &min_wakeup_granularity_ns,
355 .extra2 = &max_wakeup_granularity_ns,
356 },
357#ifdef CONFIG_SMP
358 {
359 .procname = "sched_tunable_scaling",
360 .data = &sysctl_sched_tunable_scaling,
361 .maxlen = sizeof(enum sched_tunable_scaling),
362 .mode = 0644,
363 .proc_handler = sched_proc_update_handler,
364 .extra1 = &min_sched_tunable_scaling,
365 .extra2 = &max_sched_tunable_scaling,
366 },
367 {
368 .procname = "sched_migration_cost_ns",
369 .data = &sysctl_sched_migration_cost,
370 .maxlen = sizeof(unsigned int),
371 .mode = 0644,
372 .proc_handler = proc_dointvec,
373 },
374 {
375 .procname = "sched_nr_migrate",
376 .data = &sysctl_sched_nr_migrate,
377 .maxlen = sizeof(unsigned int),
378 .mode = 0644,
379 .proc_handler = proc_dointvec,
380 },
381#ifdef CONFIG_SCHEDSTATS
382 {
383 .procname = "sched_schedstats",
384 .data = NULL,
385 .maxlen = sizeof(unsigned int),
386 .mode = 0644,
387 .proc_handler = sysctl_schedstats,
388 .extra1 = SYSCTL_ZERO,
389 .extra2 = SYSCTL_ONE,
390 },
391#endif /* CONFIG_SCHEDSTATS */
392#endif /* CONFIG_SMP */
393#ifdef CONFIG_NUMA_BALANCING
394 {
395 .procname = "numa_balancing_scan_delay_ms",
396 .data = &sysctl_numa_balancing_scan_delay,
397 .maxlen = sizeof(unsigned int),
398 .mode = 0644,
399 .proc_handler = proc_dointvec,
400 },
401 {
402 .procname = "numa_balancing_scan_period_min_ms",
403 .data = &sysctl_numa_balancing_scan_period_min,
404 .maxlen = sizeof(unsigned int),
405 .mode = 0644,
406 .proc_handler = proc_dointvec,
407 },
408 {
409 .procname = "numa_balancing_scan_period_max_ms",
410 .data = &sysctl_numa_balancing_scan_period_max,
411 .maxlen = sizeof(unsigned int),
412 .mode = 0644,
413 .proc_handler = proc_dointvec,
414 },
415 {
416 .procname = "numa_balancing_scan_size_mb",
417 .data = &sysctl_numa_balancing_scan_size,
418 .maxlen = sizeof(unsigned int),
419 .mode = 0644,
420 .proc_handler = proc_dointvec_minmax,
421 .extra1 = SYSCTL_ONE,
422 },
423 {
424 .procname = "numa_balancing",
425 .data = NULL, /* filled in by handler */
426 .maxlen = sizeof(unsigned int),
427 .mode = 0644,
428 .proc_handler = sysctl_numa_balancing,
429 .extra1 = SYSCTL_ZERO,
430 .extra2 = SYSCTL_ONE,
431 },
432#endif /* CONFIG_NUMA_BALANCING */
433#endif /* CONFIG_SCHED_DEBUG */
434 {
435 .procname = "sched_rt_period_us",
436 .data = &sysctl_sched_rt_period,
437 .maxlen = sizeof(unsigned int),
438 .mode = 0644,
439 .proc_handler = sched_rt_handler,
440 },
441 {
442 .procname = "sched_rt_runtime_us",
443 .data = &sysctl_sched_rt_runtime,
444 .maxlen = sizeof(int),
445 .mode = 0644,
446 .proc_handler = sched_rt_handler,
447 },
448 {
449 .procname = "sched_rr_timeslice_ms",
450 .data = &sysctl_sched_rr_timeslice,
451 .maxlen = sizeof(int),
452 .mode = 0644,
453 .proc_handler = sched_rr_handler,
454 },
455#ifdef CONFIG_UCLAMP_TASK
456 {
457 .procname = "sched_util_clamp_min",
458 .data = &sysctl_sched_uclamp_util_min,
459 .maxlen = sizeof(unsigned int),
460 .mode = 0644,
461 .proc_handler = sysctl_sched_uclamp_handler,
462 },
463 {
464 .procname = "sched_util_clamp_max",
465 .data = &sysctl_sched_uclamp_util_max,
466 .maxlen = sizeof(unsigned int),
467 .mode = 0644,
468 .proc_handler = sysctl_sched_uclamp_handler,
469 },
470#endif
471#ifdef CONFIG_SCHED_AUTOGROUP
472 {
473 .procname = "sched_autogroup_enabled",
474 .data = &sysctl_sched_autogroup_enabled,
475 .maxlen = sizeof(unsigned int),
476 .mode = 0644,
477 .proc_handler = proc_dointvec_minmax,
478 .extra1 = SYSCTL_ZERO,
479 .extra2 = SYSCTL_ONE,
480 },
481#endif
482#ifdef CONFIG_CFS_BANDWIDTH
483 {
484 .procname = "sched_cfs_bandwidth_slice_us",
485 .data = &sysctl_sched_cfs_bandwidth_slice,
486 .maxlen = sizeof(unsigned int),
487 .mode = 0644,
488 .proc_handler = proc_dointvec_minmax,
489 .extra1 = SYSCTL_ONE,
490 },
491#endif
492#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
493 {
494 .procname = "sched_energy_aware",
495 .data = &sysctl_sched_energy_aware,
496 .maxlen = sizeof(unsigned int),
497 .mode = 0644,
498 .proc_handler = sched_energy_aware_handler,
499 .extra1 = SYSCTL_ZERO,
500 .extra2 = SYSCTL_ONE,
501 },
502#endif
503#ifdef CONFIG_PROVE_LOCKING
504 {
505 .procname = "prove_locking",
506 .data = &prove_locking,
507 .maxlen = sizeof(int),
508 .mode = 0644,
509 .proc_handler = proc_dointvec,
510 },
511#endif
512#ifdef CONFIG_LOCK_STAT
513 {
514 .procname = "lock_stat",
515 .data = &lock_stat,
516 .maxlen = sizeof(int),
517 .mode = 0644,
518 .proc_handler = proc_dointvec,
519 },
520#endif
521 {
522 .procname = "panic",
523 .data = &panic_timeout,
524 .maxlen = sizeof(int),
525 .mode = 0644,
526 .proc_handler = proc_dointvec,
527 },
528#ifdef CONFIG_COREDUMP
529 {
530 .procname = "core_uses_pid",
531 .data = &core_uses_pid,
532 .maxlen = sizeof(int),
533 .mode = 0644,
534 .proc_handler = proc_dointvec,
535 },
536 {
537 .procname = "core_pattern",
538 .data = core_pattern,
539 .maxlen = CORENAME_MAX_SIZE,
540 .mode = 0644,
541 .proc_handler = proc_dostring_coredump,
542 },
543 {
544 .procname = "core_pipe_limit",
545 .data = &core_pipe_limit,
546 .maxlen = sizeof(unsigned int),
547 .mode = 0644,
548 .proc_handler = proc_dointvec,
549 },
550#endif
551#ifdef CONFIG_USER_NS
552 {
553 .procname = "unprivileged_userns_clone",
554 .data = &unprivileged_userns_clone,
555 .maxlen = sizeof(int),
556 .mode = 0644,
557 .proc_handler = proc_dointvec,
558 },
559#endif
560#ifdef CONFIG_PROC_SYSCTL
561 {
562 .procname = "tainted",
563 .maxlen = sizeof(long),
564 .mode = 0644,
565 .proc_handler = proc_taint,
566 },
567 {
568 .procname = "sysctl_writes_strict",
569 .data = &sysctl_writes_strict,
570 .maxlen = sizeof(int),
571 .mode = 0644,
572 .proc_handler = proc_dointvec_minmax,
573 .extra1 = &neg_one,
574 .extra2 = SYSCTL_ONE,
575 },
576#endif
577#ifdef CONFIG_LATENCYTOP
578 {
579 .procname = "latencytop",
580 .data = &latencytop_enabled,
581 .maxlen = sizeof(int),
582 .mode = 0644,
583 .proc_handler = sysctl_latencytop,
584 },
585#endif
586#ifdef CONFIG_BLK_DEV_INITRD
587 {
588 .procname = "real-root-dev",
589 .data = &real_root_dev,
590 .maxlen = sizeof(int),
591 .mode = 0644,
592 .proc_handler = proc_dointvec,
593 },
594#endif
595 {
596 .procname = "print-fatal-signals",
597 .data = &print_fatal_signals,
598 .maxlen = sizeof(int),
599 .mode = 0644,
600 .proc_handler = proc_dointvec,
601 },
602#ifdef CONFIG_SPARC
603 {
604 .procname = "reboot-cmd",
605 .data = reboot_command,
606 .maxlen = 256,
607 .mode = 0644,
608 .proc_handler = proc_dostring,
609 },
610 {
611 .procname = "stop-a",
612 .data = &stop_a_enabled,
613 .maxlen = sizeof (int),
614 .mode = 0644,
615 .proc_handler = proc_dointvec,
616 },
617 {
618 .procname = "scons-poweroff",
619 .data = &scons_pwroff,
620 .maxlen = sizeof (int),
621 .mode = 0644,
622 .proc_handler = proc_dointvec,
623 },
624#endif
625#ifdef CONFIG_SPARC64
626 {
627 .procname = "tsb-ratio",
628 .data = &sysctl_tsb_ratio,
629 .maxlen = sizeof (int),
630 .mode = 0644,
631 .proc_handler = proc_dointvec,
632 },
633#endif
634#ifdef __hppa__
635 {
636 .procname = "soft-power",
637 .data = &pwrsw_enabled,
638 .maxlen = sizeof (int),
639 .mode = 0644,
640 .proc_handler = proc_dointvec,
641 },
642#endif
643#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
644 {
645 .procname = "unaligned-trap",
646 .data = &unaligned_enabled,
647 .maxlen = sizeof (int),
648 .mode = 0644,
649 .proc_handler = proc_dointvec,
650 },
651#endif
652 {
653 .procname = "ctrl-alt-del",
654 .data = &C_A_D,
655 .maxlen = sizeof(int),
656 .mode = 0644,
657 .proc_handler = proc_dointvec,
658 },
659#ifdef CONFIG_FUNCTION_TRACER
660 {
661 .procname = "ftrace_enabled",
662 .data = &ftrace_enabled,
663 .maxlen = sizeof(int),
664 .mode = 0644,
665 .proc_handler = ftrace_enable_sysctl,
666 },
667#endif
668#ifdef CONFIG_STACK_TRACER
669 {
670 .procname = "stack_tracer_enabled",
671 .data = &stack_tracer_enabled,
672 .maxlen = sizeof(int),
673 .mode = 0644,
674 .proc_handler = stack_trace_sysctl,
675 },
676#endif
677#ifdef CONFIG_TRACING
678 {
679 .procname = "ftrace_dump_on_oops",
680 .data = &ftrace_dump_on_oops,
681 .maxlen = sizeof(int),
682 .mode = 0644,
683 .proc_handler = proc_dointvec,
684 },
685 {
686 .procname = "traceoff_on_warning",
687 .data = &__disable_trace_on_warning,
688 .maxlen = sizeof(__disable_trace_on_warning),
689 .mode = 0644,
690 .proc_handler = proc_dointvec,
691 },
692 {
693 .procname = "tracepoint_printk",
694 .data = &tracepoint_printk,
695 .maxlen = sizeof(tracepoint_printk),
696 .mode = 0644,
697 .proc_handler = tracepoint_printk_sysctl,
698 },
699#endif
700#ifdef CONFIG_KEXEC_CORE
701 {
702 .procname = "kexec_load_disabled",
703 .data = &kexec_load_disabled,
704 .maxlen = sizeof(int),
705 .mode = 0644,
706 /* only handle a transition from default "0" to "1" */
707 .proc_handler = proc_dointvec_minmax,
708 .extra1 = SYSCTL_ONE,
709 .extra2 = SYSCTL_ONE,
710 },
711#endif
712#ifdef CONFIG_MODULES
713 {
714 .procname = "modprobe",
715 .data = &modprobe_path,
716 .maxlen = KMOD_PATH_LEN,
717 .mode = 0644,
718 .proc_handler = proc_dostring,
719 },
720 {
721 .procname = "modules_disabled",
722 .data = &modules_disabled,
723 .maxlen = sizeof(int),
724 .mode = 0644,
725 /* only handle a transition from default "0" to "1" */
726 .proc_handler = proc_dointvec_minmax,
727 .extra1 = SYSCTL_ONE,
728 .extra2 = SYSCTL_ONE,
729 },
730#endif
731#ifdef CONFIG_UEVENT_HELPER
732 {
733 .procname = "hotplug",
734 .data = &uevent_helper,
735 .maxlen = UEVENT_HELPER_PATH_LEN,
736 .mode = 0644,
737 .proc_handler = proc_dostring,
738 },
739#endif
740#ifdef CONFIG_CHR_DEV_SG
741 {
742 .procname = "sg-big-buff",
743 .data = &sg_big_buff,
744 .maxlen = sizeof (int),
745 .mode = 0444,
746 .proc_handler = proc_dointvec,
747 },
748#endif
749#ifdef CONFIG_BSD_PROCESS_ACCT
750 {
751 .procname = "acct",
752 .data = &acct_parm,
753 .maxlen = 3*sizeof(int),
754 .mode = 0644,
755 .proc_handler = proc_dointvec,
756 },
757#endif
758#ifdef CONFIG_MAGIC_SYSRQ
759 {
760 .procname = "sysrq",
761 .data = &__sysrq_enabled,
762 .maxlen = sizeof (int),
763 .mode = 0644,
764 .proc_handler = sysrq_sysctl_handler,
765 },
766#endif
767#ifdef CONFIG_PROC_SYSCTL
768 {
769 .procname = "cad_pid",
770 .data = NULL,
771 .maxlen = sizeof (int),
772 .mode = 0600,
773 .proc_handler = proc_do_cad_pid,
774 },
775#endif
776 {
777 .procname = "threads-max",
778 .data = NULL,
779 .maxlen = sizeof(int),
780 .mode = 0644,
781 .proc_handler = sysctl_max_threads,
782 },
783 {
784 .procname = "random",
785 .mode = 0555,
786 .child = random_table,
787 },
788 {
789 .procname = "usermodehelper",
790 .mode = 0555,
791 .child = usermodehelper_table,
792 },
793#ifdef CONFIG_FW_LOADER_USER_HELPER
794 {
795 .procname = "firmware_config",
796 .mode = 0555,
797 .child = firmware_config_table,
798 },
799#endif
800 {
801 .procname = "overflowuid",
802 .data = &overflowuid,
803 .maxlen = sizeof(int),
804 .mode = 0644,
805 .proc_handler = proc_dointvec_minmax,
806 .extra1 = &minolduid,
807 .extra2 = &maxolduid,
808 },
809 {
810 .procname = "overflowgid",
811 .data = &overflowgid,
812 .maxlen = sizeof(int),
813 .mode = 0644,
814 .proc_handler = proc_dointvec_minmax,
815 .extra1 = &minolduid,
816 .extra2 = &maxolduid,
817 },
818#ifdef CONFIG_S390
819#ifdef CONFIG_MATHEMU
820 {
821 .procname = "ieee_emulation_warnings",
822 .data = &sysctl_ieee_emulation_warnings,
823 .maxlen = sizeof(int),
824 .mode = 0644,
825 .proc_handler = proc_dointvec,
826 },
827#endif
828 {
829 .procname = "userprocess_debug",
830 .data = &show_unhandled_signals,
831 .maxlen = sizeof(int),
832 .mode = 0644,
833 .proc_handler = proc_dointvec,
834 },
835#endif
836 {
837 .procname = "pid_max",
838 .data = &pid_max,
839 .maxlen = sizeof (int),
840 .mode = 0644,
841 .proc_handler = proc_dointvec_minmax,
842 .extra1 = &pid_max_min,
843 .extra2 = &pid_max_max,
844 },
845 {
846 .procname = "panic_on_oops",
847 .data = &panic_on_oops,
848 .maxlen = sizeof(int),
849 .mode = 0644,
850 .proc_handler = proc_dointvec,
851 },
852 {
853 .procname = "panic_print",
854 .data = &panic_print,
855 .maxlen = sizeof(unsigned long),
856 .mode = 0644,
857 .proc_handler = proc_doulongvec_minmax,
858 },
859#if defined CONFIG_PRINTK
860 {
861 .procname = "printk",
862 .data = &console_loglevel,
863 .maxlen = 4*sizeof(int),
864 .mode = 0644,
865 .proc_handler = proc_dointvec,
866 },
867 {
868 .procname = "printk_ratelimit",
869 .data = &printk_ratelimit_state.interval,
870 .maxlen = sizeof(int),
871 .mode = 0644,
872 .proc_handler = proc_dointvec_jiffies,
873 },
874 {
875 .procname = "printk_ratelimit_burst",
876 .data = &printk_ratelimit_state.burst,
877 .maxlen = sizeof(int),
878 .mode = 0644,
879 .proc_handler = proc_dointvec,
880 },
881 {
882 .procname = "printk_delay",
883 .data = &printk_delay_msec,
884 .maxlen = sizeof(int),
885 .mode = 0644,
886 .proc_handler = proc_dointvec_minmax,
887 .extra1 = SYSCTL_ZERO,
888 .extra2 = &ten_thousand,
889 },
890 {
891 .procname = "printk_devkmsg",
892 .data = devkmsg_log_str,
893 .maxlen = DEVKMSG_STR_MAX_SIZE,
894 .mode = 0644,
895 .proc_handler = devkmsg_sysctl_set_loglvl,
896 },
897 {
898 .procname = "dmesg_restrict",
899 .data = &dmesg_restrict,
900 .maxlen = sizeof(int),
901 .mode = 0644,
902 .proc_handler = proc_dointvec_minmax_sysadmin,
903 .extra1 = SYSCTL_ZERO,
904 .extra2 = SYSCTL_ONE,
905 },
906 {
907 .procname = "kptr_restrict",
908 .data = &kptr_restrict,
909 .maxlen = sizeof(int),
910 .mode = 0644,
911 .proc_handler = proc_dointvec_minmax_sysadmin,
912 .extra1 = SYSCTL_ZERO,
913 .extra2 = &two,
914 },
915#endif
916 {
917 .procname = "ngroups_max",
918 .data = &ngroups_max,
919 .maxlen = sizeof (int),
920 .mode = 0444,
921 .proc_handler = proc_dointvec,
922 },
923 {
924 .procname = "cap_last_cap",
925 .data = (void *)&cap_last_cap,
926 .maxlen = sizeof(int),
927 .mode = 0444,
928 .proc_handler = proc_dointvec,
929 },
930#if defined(CONFIG_LOCKUP_DETECTOR)
931 {
932 .procname = "watchdog",
933 .data = &watchdog_user_enabled,
934 .maxlen = sizeof(int),
935 .mode = 0644,
936 .proc_handler = proc_watchdog,
937 .extra1 = SYSCTL_ZERO,
938 .extra2 = SYSCTL_ONE,
939 },
940 {
941 .procname = "watchdog_thresh",
942 .data = &watchdog_thresh,
943 .maxlen = sizeof(int),
944 .mode = 0644,
945 .proc_handler = proc_watchdog_thresh,
946 .extra1 = SYSCTL_ZERO,
947 .extra2 = &sixty,
948 },
949 {
950 .procname = "nmi_watchdog",
951 .data = &nmi_watchdog_user_enabled,
952 .maxlen = sizeof(int),
953 .mode = NMI_WATCHDOG_SYSCTL_PERM,
954 .proc_handler = proc_nmi_watchdog,
955 .extra1 = SYSCTL_ZERO,
956 .extra2 = SYSCTL_ONE,
957 },
958 {
959 .procname = "watchdog_cpumask",
960 .data = &watchdog_cpumask_bits,
961 .maxlen = NR_CPUS,
962 .mode = 0644,
963 .proc_handler = proc_watchdog_cpumask,
964 },
965#ifdef CONFIG_SOFTLOCKUP_DETECTOR
966 {
967 .procname = "soft_watchdog",
968 .data = &soft_watchdog_user_enabled,
969 .maxlen = sizeof(int),
970 .mode = 0644,
971 .proc_handler = proc_soft_watchdog,
972 .extra1 = SYSCTL_ZERO,
973 .extra2 = SYSCTL_ONE,
974 },
975 {
976 .procname = "softlockup_panic",
977 .data = &softlockup_panic,
978 .maxlen = sizeof(int),
979 .mode = 0644,
980 .proc_handler = proc_dointvec_minmax,
981 .extra1 = SYSCTL_ZERO,
982 .extra2 = SYSCTL_ONE,
983 },
984#ifdef CONFIG_SMP
985 {
986 .procname = "softlockup_all_cpu_backtrace",
987 .data = &sysctl_softlockup_all_cpu_backtrace,
988 .maxlen = sizeof(int),
989 .mode = 0644,
990 .proc_handler = proc_dointvec_minmax,
991 .extra1 = SYSCTL_ZERO,
992 .extra2 = SYSCTL_ONE,
993 },
994#endif /* CONFIG_SMP */
995#endif
996#ifdef CONFIG_HARDLOCKUP_DETECTOR
997 {
998 .procname = "hardlockup_panic",
999 .data = &hardlockup_panic,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = proc_dointvec_minmax,
1003 .extra1 = SYSCTL_ZERO,
1004 .extra2 = SYSCTL_ONE,
1005 },
1006#ifdef CONFIG_SMP
1007 {
1008 .procname = "hardlockup_all_cpu_backtrace",
1009 .data = &sysctl_hardlockup_all_cpu_backtrace,
1010 .maxlen = sizeof(int),
1011 .mode = 0644,
1012 .proc_handler = proc_dointvec_minmax,
1013 .extra1 = SYSCTL_ZERO,
1014 .extra2 = SYSCTL_ONE,
1015 },
1016#endif /* CONFIG_SMP */
1017#endif
1018#endif
1019
1020#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1021 {
1022 .procname = "unknown_nmi_panic",
1023 .data = &unknown_nmi_panic,
1024 .maxlen = sizeof (int),
1025 .mode = 0644,
1026 .proc_handler = proc_dointvec,
1027 },
1028#endif
1029#if defined(CONFIG_X86)
1030 {
1031 .procname = "panic_on_unrecovered_nmi",
1032 .data = &panic_on_unrecovered_nmi,
1033 .maxlen = sizeof(int),
1034 .mode = 0644,
1035 .proc_handler = proc_dointvec,
1036 },
1037 {
1038 .procname = "panic_on_io_nmi",
1039 .data = &panic_on_io_nmi,
1040 .maxlen = sizeof(int),
1041 .mode = 0644,
1042 .proc_handler = proc_dointvec,
1043 },
1044#ifdef CONFIG_DEBUG_STACKOVERFLOW
1045 {
1046 .procname = "panic_on_stackoverflow",
1047 .data = &sysctl_panic_on_stackoverflow,
1048 .maxlen = sizeof(int),
1049 .mode = 0644,
1050 .proc_handler = proc_dointvec,
1051 },
1052#endif
1053 {
1054 .procname = "bootloader_type",
1055 .data = &bootloader_type,
1056 .maxlen = sizeof (int),
1057 .mode = 0444,
1058 .proc_handler = proc_dointvec,
1059 },
1060 {
1061 .procname = "bootloader_version",
1062 .data = &bootloader_version,
1063 .maxlen = sizeof (int),
1064 .mode = 0444,
1065 .proc_handler = proc_dointvec,
1066 },
1067 {
1068 .procname = "io_delay_type",
1069 .data = &io_delay_type,
1070 .maxlen = sizeof(int),
1071 .mode = 0644,
1072 .proc_handler = proc_dointvec,
1073 },
1074#endif
1075#if defined(CONFIG_MMU)
1076 {
1077 .procname = "randomize_va_space",
1078 .data = &randomize_va_space,
1079 .maxlen = sizeof(int),
1080 .mode = 0644,
1081 .proc_handler = proc_dointvec,
1082 },
1083#endif
1084#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1085 {
1086 .procname = "spin_retry",
1087 .data = &spin_retry,
1088 .maxlen = sizeof (int),
1089 .mode = 0644,
1090 .proc_handler = proc_dointvec,
1091 },
1092#endif
1093#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1094 {
1095 .procname = "acpi_video_flags",
1096 .data = &acpi_realmode_flags,
1097 .maxlen = sizeof (unsigned long),
1098 .mode = 0644,
1099 .proc_handler = proc_doulongvec_minmax,
1100 },
1101#endif
1102#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1103 {
1104 .procname = "ignore-unaligned-usertrap",
1105 .data = &no_unaligned_warning,
1106 .maxlen = sizeof (int),
1107 .mode = 0644,
1108 .proc_handler = proc_dointvec,
1109 },
1110#endif
1111#ifdef CONFIG_IA64
1112 {
1113 .procname = "unaligned-dump-stack",
1114 .data = &unaligned_dump_stack,
1115 .maxlen = sizeof (int),
1116 .mode = 0644,
1117 .proc_handler = proc_dointvec,
1118 },
1119#endif
1120#ifdef CONFIG_DETECT_HUNG_TASK
1121 {
1122 .procname = "hung_task_panic",
1123 .data = &sysctl_hung_task_panic,
1124 .maxlen = sizeof(int),
1125 .mode = 0644,
1126 .proc_handler = proc_dointvec_minmax,
1127 .extra1 = SYSCTL_ZERO,
1128 .extra2 = SYSCTL_ONE,
1129 },
1130 {
1131 .procname = "hung_task_check_count",
1132 .data = &sysctl_hung_task_check_count,
1133 .maxlen = sizeof(int),
1134 .mode = 0644,
1135 .proc_handler = proc_dointvec_minmax,
1136 .extra1 = SYSCTL_ZERO,
1137 },
1138 {
1139 .procname = "hung_task_timeout_secs",
1140 .data = &sysctl_hung_task_timeout_secs,
1141 .maxlen = sizeof(unsigned long),
1142 .mode = 0644,
1143 .proc_handler = proc_dohung_task_timeout_secs,
1144 .extra2 = &hung_task_timeout_max,
1145 },
1146 {
1147 .procname = "hung_task_check_interval_secs",
1148 .data = &sysctl_hung_task_check_interval_secs,
1149 .maxlen = sizeof(unsigned long),
1150 .mode = 0644,
1151 .proc_handler = proc_dohung_task_timeout_secs,
1152 .extra2 = &hung_task_timeout_max,
1153 },
1154 {
1155 .procname = "hung_task_warnings",
1156 .data = &sysctl_hung_task_warnings,
1157 .maxlen = sizeof(int),
1158 .mode = 0644,
1159 .proc_handler = proc_dointvec_minmax,
1160 .extra1 = &neg_one,
1161 },
1162#endif
1163#ifdef CONFIG_RT_MUTEXES
1164 {
1165 .procname = "max_lock_depth",
1166 .data = &max_lock_depth,
1167 .maxlen = sizeof(int),
1168 .mode = 0644,
1169 .proc_handler = proc_dointvec,
1170 },
1171#endif
1172 {
1173 .procname = "poweroff_cmd",
1174 .data = &poweroff_cmd,
1175 .maxlen = POWEROFF_CMD_PATH_LEN,
1176 .mode = 0644,
1177 .proc_handler = proc_dostring,
1178 },
1179#ifdef CONFIG_KEYS
1180 {
1181 .procname = "keys",
1182 .mode = 0555,
1183 .child = key_sysctls,
1184 },
1185#endif
1186#ifdef CONFIG_PERF_EVENTS
1187 /*
1188 * User-space scripts rely on the existence of this file
1189 * as a feature check for perf_events being enabled.
1190 *
1191 * So it's an ABI, do not remove!
1192 */
1193 {
1194 .procname = "perf_event_paranoid",
1195 .data = &sysctl_perf_event_paranoid,
1196 .maxlen = sizeof(sysctl_perf_event_paranoid),
1197 .mode = 0644,
1198 .proc_handler = proc_dointvec,
1199 },
1200 {
1201 .procname = "perf_event_mlock_kb",
1202 .data = &sysctl_perf_event_mlock,
1203 .maxlen = sizeof(sysctl_perf_event_mlock),
1204 .mode = 0644,
1205 .proc_handler = proc_dointvec,
1206 },
1207 {
1208 .procname = "perf_event_max_sample_rate",
1209 .data = &sysctl_perf_event_sample_rate,
1210 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1211 .mode = 0644,
1212 .proc_handler = perf_proc_update_handler,
1213 .extra1 = SYSCTL_ONE,
1214 },
1215 {
1216 .procname = "perf_cpu_time_max_percent",
1217 .data = &sysctl_perf_cpu_time_max_percent,
1218 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1219 .mode = 0644,
1220 .proc_handler = perf_cpu_time_max_percent_handler,
1221 .extra1 = SYSCTL_ZERO,
1222 .extra2 = &one_hundred,
1223 },
1224 {
1225 .procname = "perf_event_max_stack",
1226 .data = &sysctl_perf_event_max_stack,
1227 .maxlen = sizeof(sysctl_perf_event_max_stack),
1228 .mode = 0644,
1229 .proc_handler = perf_event_max_stack_handler,
1230 .extra1 = SYSCTL_ZERO,
1231 .extra2 = &six_hundred_forty_kb,
1232 },
1233 {
1234 .procname = "perf_event_max_contexts_per_stack",
1235 .data = &sysctl_perf_event_max_contexts_per_stack,
1236 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1237 .mode = 0644,
1238 .proc_handler = perf_event_max_stack_handler,
1239 .extra1 = SYSCTL_ZERO,
1240 .extra2 = &one_thousand,
1241 },
1242#endif
1243 {
1244 .procname = "panic_on_warn",
1245 .data = &panic_on_warn,
1246 .maxlen = sizeof(int),
1247 .mode = 0644,
1248 .proc_handler = proc_dointvec_minmax,
1249 .extra1 = SYSCTL_ZERO,
1250 .extra2 = SYSCTL_ONE,
1251 },
1252#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1253 {
1254 .procname = "timer_migration",
1255 .data = &sysctl_timer_migration,
1256 .maxlen = sizeof(unsigned int),
1257 .mode = 0644,
1258 .proc_handler = timer_migration_handler,
1259 .extra1 = SYSCTL_ZERO,
1260 .extra2 = SYSCTL_ONE,
1261 },
1262#endif
1263#ifdef CONFIG_BPF_SYSCALL
1264 {
1265 .procname = "unprivileged_bpf_disabled",
1266 .data = &sysctl_unprivileged_bpf_disabled,
1267 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
1268 .mode = 0644,
1269 /* only handle a transition from default "0" to "1" */
1270 .proc_handler = proc_dointvec_minmax,
1271 .extra1 = SYSCTL_ONE,
1272 .extra2 = SYSCTL_ONE,
1273 },
1274 {
1275 .procname = "bpf_stats_enabled",
1276 .data = &bpf_stats_enabled_key.key,
1277 .maxlen = sizeof(bpf_stats_enabled_key),
1278 .mode = 0644,
1279 .proc_handler = proc_do_static_key,
1280 },
1281#endif
1282#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1283 {
1284 .procname = "panic_on_rcu_stall",
1285 .data = &sysctl_panic_on_rcu_stall,
1286 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
1287 .mode = 0644,
1288 .proc_handler = proc_dointvec_minmax,
1289 .extra1 = SYSCTL_ZERO,
1290 .extra2 = SYSCTL_ONE,
1291 },
1292#endif
1293#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1294 {
1295 .procname = "stack_erasing",
1296 .data = NULL,
1297 .maxlen = sizeof(int),
1298 .mode = 0600,
1299 .proc_handler = stack_erasing_sysctl,
1300 .extra1 = SYSCTL_ZERO,
1301 .extra2 = SYSCTL_ONE,
1302 },
1303#endif
1304 { }
1305};
1306
1307static struct ctl_table vm_table[] = {
1308 {
1309 .procname = "overcommit_memory",
1310 .data = &sysctl_overcommit_memory,
1311 .maxlen = sizeof(sysctl_overcommit_memory),
1312 .mode = 0644,
1313 .proc_handler = proc_dointvec_minmax,
1314 .extra1 = SYSCTL_ZERO,
1315 .extra2 = &two,
1316 },
1317 {
1318 .procname = "panic_on_oom",
1319 .data = &sysctl_panic_on_oom,
1320 .maxlen = sizeof(sysctl_panic_on_oom),
1321 .mode = 0644,
1322 .proc_handler = proc_dointvec_minmax,
1323 .extra1 = SYSCTL_ZERO,
1324 .extra2 = &two,
1325 },
1326 {
1327 .procname = "oom_kill_allocating_task",
1328 .data = &sysctl_oom_kill_allocating_task,
1329 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1330 .mode = 0644,
1331 .proc_handler = proc_dointvec,
1332 },
1333 {
1334 .procname = "oom_dump_tasks",
1335 .data = &sysctl_oom_dump_tasks,
1336 .maxlen = sizeof(sysctl_oom_dump_tasks),
1337 .mode = 0644,
1338 .proc_handler = proc_dointvec,
1339 },
1340 {
1341 .procname = "overcommit_ratio",
1342 .data = &sysctl_overcommit_ratio,
1343 .maxlen = sizeof(sysctl_overcommit_ratio),
1344 .mode = 0644,
1345 .proc_handler = overcommit_ratio_handler,
1346 },
1347 {
1348 .procname = "overcommit_kbytes",
1349 .data = &sysctl_overcommit_kbytes,
1350 .maxlen = sizeof(sysctl_overcommit_kbytes),
1351 .mode = 0644,
1352 .proc_handler = overcommit_kbytes_handler,
1353 },
1354 {
1355 .procname = "page-cluster",
1356 .data = &page_cluster,
1357 .maxlen = sizeof(int),
1358 .mode = 0644,
1359 .proc_handler = proc_dointvec_minmax,
1360 .extra1 = SYSCTL_ZERO,
1361 },
1362 {
1363 .procname = "dirty_background_ratio",
1364 .data = &dirty_background_ratio,
1365 .maxlen = sizeof(dirty_background_ratio),
1366 .mode = 0644,
1367 .proc_handler = dirty_background_ratio_handler,
1368 .extra1 = SYSCTL_ZERO,
1369 .extra2 = &one_hundred,
1370 },
1371 {
1372 .procname = "dirty_background_bytes",
1373 .data = &dirty_background_bytes,
1374 .maxlen = sizeof(dirty_background_bytes),
1375 .mode = 0644,
1376 .proc_handler = dirty_background_bytes_handler,
1377 .extra1 = &one_ul,
1378 },
1379 {
1380 .procname = "dirty_ratio",
1381 .data = &vm_dirty_ratio,
1382 .maxlen = sizeof(vm_dirty_ratio),
1383 .mode = 0644,
1384 .proc_handler = dirty_ratio_handler,
1385 .extra1 = SYSCTL_ZERO,
1386 .extra2 = &one_hundred,
1387 },
1388 {
1389 .procname = "dirty_bytes",
1390 .data = &vm_dirty_bytes,
1391 .maxlen = sizeof(vm_dirty_bytes),
1392 .mode = 0644,
1393 .proc_handler = dirty_bytes_handler,
1394 .extra1 = &dirty_bytes_min,
1395 },
1396 {
1397 .procname = "dirty_writeback_centisecs",
1398 .data = &dirty_writeback_interval,
1399 .maxlen = sizeof(dirty_writeback_interval),
1400 .mode = 0644,
1401 .proc_handler = dirty_writeback_centisecs_handler,
1402 },
1403 {
1404 .procname = "dirty_expire_centisecs",
1405 .data = &dirty_expire_interval,
1406 .maxlen = sizeof(dirty_expire_interval),
1407 .mode = 0644,
1408 .proc_handler = proc_dointvec_minmax,
1409 .extra1 = SYSCTL_ZERO,
1410 },
1411 {
1412 .procname = "dirtytime_expire_seconds",
1413 .data = &dirtytime_expire_interval,
1414 .maxlen = sizeof(dirtytime_expire_interval),
1415 .mode = 0644,
1416 .proc_handler = dirtytime_interval_handler,
1417 .extra1 = SYSCTL_ZERO,
1418 },
1419 {
1420 .procname = "swappiness",
1421 .data = &vm_swappiness,
1422 .maxlen = sizeof(vm_swappiness),
1423 .mode = 0644,
1424 .proc_handler = proc_dointvec_minmax,
1425 .extra1 = SYSCTL_ZERO,
1426 .extra2 = &one_hundred,
1427 },
1428#ifdef CONFIG_HUGETLB_PAGE
1429 {
1430 .procname = "nr_hugepages",
1431 .data = NULL,
1432 .maxlen = sizeof(unsigned long),
1433 .mode = 0644,
1434 .proc_handler = hugetlb_sysctl_handler,
1435 },
1436#ifdef CONFIG_NUMA
1437 {
1438 .procname = "nr_hugepages_mempolicy",
1439 .data = NULL,
1440 .maxlen = sizeof(unsigned long),
1441 .mode = 0644,
1442 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1443 },
1444 {
1445 .procname = "numa_stat",
1446 .data = &sysctl_vm_numa_stat,
1447 .maxlen = sizeof(int),
1448 .mode = 0644,
1449 .proc_handler = sysctl_vm_numa_stat_handler,
1450 .extra1 = SYSCTL_ZERO,
1451 .extra2 = SYSCTL_ONE,
1452 },
1453#endif
1454 {
1455 .procname = "hugetlb_shm_group",
1456 .data = &sysctl_hugetlb_shm_group,
1457 .maxlen = sizeof(gid_t),
1458 .mode = 0644,
1459 .proc_handler = proc_dointvec,
1460 },
1461 {
1462 .procname = "nr_overcommit_hugepages",
1463 .data = NULL,
1464 .maxlen = sizeof(unsigned long),
1465 .mode = 0644,
1466 .proc_handler = hugetlb_overcommit_handler,
1467 },
1468#endif
1469 {
1470 .procname = "lowmem_reserve_ratio",
1471 .data = &sysctl_lowmem_reserve_ratio,
1472 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1473 .mode = 0644,
1474 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1475 },
1476 {
1477 .procname = "drop_caches",
1478 .data = &sysctl_drop_caches,
1479 .maxlen = sizeof(int),
1480 .mode = 0644,
1481 .proc_handler = drop_caches_sysctl_handler,
1482 .extra1 = SYSCTL_ONE,
1483 .extra2 = &four,
1484 },
1485#ifdef CONFIG_COMPACTION
1486 {
1487 .procname = "compact_memory",
1488 .data = &sysctl_compact_memory,
1489 .maxlen = sizeof(int),
1490 .mode = 0200,
1491 .proc_handler = sysctl_compaction_handler,
1492 },
1493 {
1494 .procname = "extfrag_threshold",
1495 .data = &sysctl_extfrag_threshold,
1496 .maxlen = sizeof(int),
1497 .mode = 0644,
1498 .proc_handler = proc_dointvec_minmax,
1499 .extra1 = &min_extfrag_threshold,
1500 .extra2 = &max_extfrag_threshold,
1501 },
1502 {
1503 .procname = "compact_unevictable_allowed",
1504 .data = &sysctl_compact_unevictable_allowed,
1505 .maxlen = sizeof(int),
1506 .mode = 0644,
1507 .proc_handler = proc_dointvec,
1508 .extra1 = SYSCTL_ZERO,
1509 .extra2 = SYSCTL_ONE,
1510 },
1511
1512#endif /* CONFIG_COMPACTION */
1513 {
1514 .procname = "min_free_kbytes",
1515 .data = &min_free_kbytes,
1516 .maxlen = sizeof(min_free_kbytes),
1517 .mode = 0644,
1518 .proc_handler = min_free_kbytes_sysctl_handler,
1519 .extra1 = SYSCTL_ZERO,
1520 },
1521 {
1522 .procname = "watermark_boost_factor",
1523 .data = &watermark_boost_factor,
1524 .maxlen = sizeof(watermark_boost_factor),
1525 .mode = 0644,
1526 .proc_handler = watermark_boost_factor_sysctl_handler,
1527 .extra1 = SYSCTL_ZERO,
1528 },
1529 {
1530 .procname = "watermark_scale_factor",
1531 .data = &watermark_scale_factor,
1532 .maxlen = sizeof(watermark_scale_factor),
1533 .mode = 0644,
1534 .proc_handler = watermark_scale_factor_sysctl_handler,
1535 .extra1 = SYSCTL_ONE,
1536 .extra2 = &one_thousand,
1537 },
1538 {
1539 .procname = "percpu_pagelist_fraction",
1540 .data = &percpu_pagelist_fraction,
1541 .maxlen = sizeof(percpu_pagelist_fraction),
1542 .mode = 0644,
1543 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1544 .extra1 = SYSCTL_ZERO,
1545 },
1546#ifdef CONFIG_MMU
1547 {
1548 .procname = "max_map_count",
1549 .data = &sysctl_max_map_count,
1550 .maxlen = sizeof(sysctl_max_map_count),
1551 .mode = 0644,
1552 .proc_handler = proc_dointvec_minmax,
1553 .extra1 = SYSCTL_ZERO,
1554 },
1555#else
1556 {
1557 .procname = "nr_trim_pages",
1558 .data = &sysctl_nr_trim_pages,
1559 .maxlen = sizeof(sysctl_nr_trim_pages),
1560 .mode = 0644,
1561 .proc_handler = proc_dointvec_minmax,
1562 .extra1 = SYSCTL_ZERO,
1563 },
1564#endif
1565 {
1566 .procname = "laptop_mode",
1567 .data = &laptop_mode,
1568 .maxlen = sizeof(laptop_mode),
1569 .mode = 0644,
1570 .proc_handler = proc_dointvec_jiffies,
1571 },
1572 {
1573 .procname = "block_dump",
1574 .data = &block_dump,
1575 .maxlen = sizeof(block_dump),
1576 .mode = 0644,
1577 .proc_handler = proc_dointvec,
1578 .extra1 = SYSCTL_ZERO,
1579 },
1580 {
1581 .procname = "vfs_cache_pressure",
1582 .data = &sysctl_vfs_cache_pressure,
1583 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1584 .mode = 0644,
1585 .proc_handler = proc_dointvec,
1586 .extra1 = SYSCTL_ZERO,
1587 },
1588#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1589 {
1590 .procname = "legacy_va_layout",
1591 .data = &sysctl_legacy_va_layout,
1592 .maxlen = sizeof(sysctl_legacy_va_layout),
1593 .mode = 0644,
1594 .proc_handler = proc_dointvec,
1595 .extra1 = SYSCTL_ZERO,
1596 },
1597#endif
1598#ifdef CONFIG_NUMA
1599 {
1600 .procname = "zone_reclaim_mode",
1601 .data = &node_reclaim_mode,
1602 .maxlen = sizeof(node_reclaim_mode),
1603 .mode = 0644,
1604 .proc_handler = proc_dointvec,
1605 .extra1 = SYSCTL_ZERO,
1606 },
1607 {
1608 .procname = "min_unmapped_ratio",
1609 .data = &sysctl_min_unmapped_ratio,
1610 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1611 .mode = 0644,
1612 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1613 .extra1 = SYSCTL_ZERO,
1614 .extra2 = &one_hundred,
1615 },
1616 {
1617 .procname = "min_slab_ratio",
1618 .data = &sysctl_min_slab_ratio,
1619 .maxlen = sizeof(sysctl_min_slab_ratio),
1620 .mode = 0644,
1621 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1622 .extra1 = SYSCTL_ZERO,
1623 .extra2 = &one_hundred,
1624 },
1625#endif
1626#ifdef CONFIG_SMP
1627 {
1628 .procname = "stat_interval",
1629 .data = &sysctl_stat_interval,
1630 .maxlen = sizeof(sysctl_stat_interval),
1631 .mode = 0644,
1632 .proc_handler = proc_dointvec_jiffies,
1633 },
1634 {
1635 .procname = "stat_refresh",
1636 .data = NULL,
1637 .maxlen = 0,
1638 .mode = 0600,
1639 .proc_handler = vmstat_refresh,
1640 },
1641#endif
1642#ifdef CONFIG_MMU
1643 {
1644 .procname = "mmap_min_addr",
1645 .data = &dac_mmap_min_addr,
1646 .maxlen = sizeof(unsigned long),
1647 .mode = 0644,
1648 .proc_handler = mmap_min_addr_handler,
1649 },
1650#endif
1651#ifdef CONFIG_NUMA
1652 {
1653 .procname = "numa_zonelist_order",
1654 .data = &numa_zonelist_order,
1655 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1656 .mode = 0644,
1657 .proc_handler = numa_zonelist_order_handler,
1658 },
1659#endif
1660#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1661 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1662 {
1663 .procname = "vdso_enabled",
1664#ifdef CONFIG_X86_32
1665 .data = &vdso32_enabled,
1666 .maxlen = sizeof(vdso32_enabled),
1667#else
1668 .data = &vdso_enabled,
1669 .maxlen = sizeof(vdso_enabled),
1670#endif
1671 .mode = 0644,
1672 .proc_handler = proc_dointvec,
1673 .extra1 = SYSCTL_ZERO,
1674 },
1675#endif
1676#ifdef CONFIG_HIGHMEM
1677 {
1678 .procname = "highmem_is_dirtyable",
1679 .data = &vm_highmem_is_dirtyable,
1680 .maxlen = sizeof(vm_highmem_is_dirtyable),
1681 .mode = 0644,
1682 .proc_handler = proc_dointvec_minmax,
1683 .extra1 = SYSCTL_ZERO,
1684 .extra2 = SYSCTL_ONE,
1685 },
1686#endif
1687#ifdef CONFIG_MEMORY_FAILURE
1688 {
1689 .procname = "memory_failure_early_kill",
1690 .data = &sysctl_memory_failure_early_kill,
1691 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1692 .mode = 0644,
1693 .proc_handler = proc_dointvec_minmax,
1694 .extra1 = SYSCTL_ZERO,
1695 .extra2 = SYSCTL_ONE,
1696 },
1697 {
1698 .procname = "memory_failure_recovery",
1699 .data = &sysctl_memory_failure_recovery,
1700 .maxlen = sizeof(sysctl_memory_failure_recovery),
1701 .mode = 0644,
1702 .proc_handler = proc_dointvec_minmax,
1703 .extra1 = SYSCTL_ZERO,
1704 .extra2 = SYSCTL_ONE,
1705 },
1706#endif
1707 {
1708 .procname = "user_reserve_kbytes",
1709 .data = &sysctl_user_reserve_kbytes,
1710 .maxlen = sizeof(sysctl_user_reserve_kbytes),
1711 .mode = 0644,
1712 .proc_handler = proc_doulongvec_minmax,
1713 },
1714 {
1715 .procname = "admin_reserve_kbytes",
1716 .data = &sysctl_admin_reserve_kbytes,
1717 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
1718 .mode = 0644,
1719 .proc_handler = proc_doulongvec_minmax,
1720 },
1721#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1722 {
1723 .procname = "mmap_rnd_bits",
1724 .data = &mmap_rnd_bits,
1725 .maxlen = sizeof(mmap_rnd_bits),
1726 .mode = 0600,
1727 .proc_handler = proc_dointvec_minmax,
1728 .extra1 = (void *)&mmap_rnd_bits_min,
1729 .extra2 = (void *)&mmap_rnd_bits_max,
1730 },
1731#endif
1732#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1733 {
1734 .procname = "mmap_rnd_compat_bits",
1735 .data = &mmap_rnd_compat_bits,
1736 .maxlen = sizeof(mmap_rnd_compat_bits),
1737 .mode = 0600,
1738 .proc_handler = proc_dointvec_minmax,
1739 .extra1 = (void *)&mmap_rnd_compat_bits_min,
1740 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1741 },
1742#endif
1743#ifdef CONFIG_USERFAULTFD
1744 {
1745 .procname = "unprivileged_userfaultfd",
1746 .data = &sysctl_unprivileged_userfaultfd,
1747 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
1748 .mode = 0644,
1749 .proc_handler = proc_dointvec_minmax,
1750 .extra1 = SYSCTL_ZERO,
1751 .extra2 = SYSCTL_ONE,
1752 },
1753#endif
1754 { }
1755};
1756
1757static struct ctl_table fs_table[] = {
1758 {
1759 .procname = "inode-nr",
1760 .data = &inodes_stat,
1761 .maxlen = 2*sizeof(long),
1762 .mode = 0444,
1763 .proc_handler = proc_nr_inodes,
1764 },
1765 {
1766 .procname = "inode-state",
1767 .data = &inodes_stat,
1768 .maxlen = 7*sizeof(long),
1769 .mode = 0444,
1770 .proc_handler = proc_nr_inodes,
1771 },
1772 {
1773 .procname = "file-nr",
1774 .data = &files_stat,
1775 .maxlen = sizeof(files_stat),
1776 .mode = 0444,
1777 .proc_handler = proc_nr_files,
1778 },
1779 {
1780 .procname = "file-max",
1781 .data = &files_stat.max_files,
1782 .maxlen = sizeof(files_stat.max_files),
1783 .mode = 0644,
1784 .proc_handler = proc_doulongvec_minmax,
1785 .extra1 = &zero_ul,
1786 .extra2 = &long_max,
1787 },
1788 {
1789 .procname = "nr_open",
1790 .data = &sysctl_nr_open,
1791 .maxlen = sizeof(unsigned int),
1792 .mode = 0644,
1793 .proc_handler = proc_dointvec_minmax,
1794 .extra1 = &sysctl_nr_open_min,
1795 .extra2 = &sysctl_nr_open_max,
1796 },
1797 {
1798 .procname = "dentry-state",
1799 .data = &dentry_stat,
1800 .maxlen = 6*sizeof(long),
1801 .mode = 0444,
1802 .proc_handler = proc_nr_dentry,
1803 },
1804 {
1805 .procname = "overflowuid",
1806 .data = &fs_overflowuid,
1807 .maxlen = sizeof(int),
1808 .mode = 0644,
1809 .proc_handler = proc_dointvec_minmax,
1810 .extra1 = &minolduid,
1811 .extra2 = &maxolduid,
1812 },
1813 {
1814 .procname = "overflowgid",
1815 .data = &fs_overflowgid,
1816 .maxlen = sizeof(int),
1817 .mode = 0644,
1818 .proc_handler = proc_dointvec_minmax,
1819 .extra1 = &minolduid,
1820 .extra2 = &maxolduid,
1821 },
1822#ifdef CONFIG_FILE_LOCKING
1823 {
1824 .procname = "leases-enable",
1825 .data = &leases_enable,
1826 .maxlen = sizeof(int),
1827 .mode = 0644,
1828 .proc_handler = proc_dointvec,
1829 },
1830#endif
1831#ifdef CONFIG_DNOTIFY
1832 {
1833 .procname = "dir-notify-enable",
1834 .data = &dir_notify_enable,
1835 .maxlen = sizeof(int),
1836 .mode = 0644,
1837 .proc_handler = proc_dointvec,
1838 },
1839#endif
1840#ifdef CONFIG_MMU
1841#ifdef CONFIG_FILE_LOCKING
1842 {
1843 .procname = "lease-break-time",
1844 .data = &lease_break_time,
1845 .maxlen = sizeof(int),
1846 .mode = 0644,
1847 .proc_handler = proc_dointvec,
1848 },
1849#endif
1850#ifdef CONFIG_AIO
1851 {
1852 .procname = "aio-nr",
1853 .data = &aio_nr,
1854 .maxlen = sizeof(aio_nr),
1855 .mode = 0444,
1856 .proc_handler = proc_doulongvec_minmax,
1857 },
1858 {
1859 .procname = "aio-max-nr",
1860 .data = &aio_max_nr,
1861 .maxlen = sizeof(aio_max_nr),
1862 .mode = 0644,
1863 .proc_handler = proc_doulongvec_minmax,
1864 },
1865#endif /* CONFIG_AIO */
1866#ifdef CONFIG_INOTIFY_USER
1867 {
1868 .procname = "inotify",
1869 .mode = 0555,
1870 .child = inotify_table,
1871 },
1872#endif
1873#ifdef CONFIG_EPOLL
1874 {
1875 .procname = "epoll",
1876 .mode = 0555,
1877 .child = epoll_table,
1878 },
1879#endif
1880#endif
1881 {
1882 .procname = "protected_symlinks",
1883 .data = &sysctl_protected_symlinks,
1884 .maxlen = sizeof(int),
1885 .mode = 0600,
1886 .proc_handler = proc_dointvec_minmax,
1887 .extra1 = SYSCTL_ZERO,
1888 .extra2 = SYSCTL_ONE,
1889 },
1890 {
1891 .procname = "protected_hardlinks",
1892 .data = &sysctl_protected_hardlinks,
1893 .maxlen = sizeof(int),
1894 .mode = 0600,
1895 .proc_handler = proc_dointvec_minmax,
1896 .extra1 = SYSCTL_ZERO,
1897 .extra2 = SYSCTL_ONE,
1898 },
1899 {
1900 .procname = "protected_fifos",
1901 .data = &sysctl_protected_fifos,
1902 .maxlen = sizeof(int),
1903 .mode = 0600,
1904 .proc_handler = proc_dointvec_minmax,
1905 .extra1 = SYSCTL_ZERO,
1906 .extra2 = &two,
1907 },
1908 {
1909 .procname = "protected_regular",
1910 .data = &sysctl_protected_regular,
1911 .maxlen = sizeof(int),
1912 .mode = 0600,
1913 .proc_handler = proc_dointvec_minmax,
1914 .extra1 = SYSCTL_ZERO,
1915 .extra2 = &two,
1916 },
1917 {
1918 .procname = "suid_dumpable",
1919 .data = &suid_dumpable,
1920 .maxlen = sizeof(int),
1921 .mode = 0644,
1922 .proc_handler = proc_dointvec_minmax_coredump,
1923 .extra1 = SYSCTL_ZERO,
1924 .extra2 = &two,
1925 },
1926#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1927 {
1928 .procname = "binfmt_misc",
1929 .mode = 0555,
1930 .child = sysctl_mount_point,
1931 },
1932#endif
1933 {
1934 .procname = "pipe-max-size",
1935 .data = &pipe_max_size,
1936 .maxlen = sizeof(pipe_max_size),
1937 .mode = 0644,
1938 .proc_handler = proc_dopipe_max_size,
1939 },
1940 {
1941 .procname = "pipe-user-pages-hard",
1942 .data = &pipe_user_pages_hard,
1943 .maxlen = sizeof(pipe_user_pages_hard),
1944 .mode = 0644,
1945 .proc_handler = proc_doulongvec_minmax,
1946 },
1947 {
1948 .procname = "pipe-user-pages-soft",
1949 .data = &pipe_user_pages_soft,
1950 .maxlen = sizeof(pipe_user_pages_soft),
1951 .mode = 0644,
1952 .proc_handler = proc_doulongvec_minmax,
1953 },
1954 {
1955 .procname = "mount-max",
1956 .data = &sysctl_mount_max,
1957 .maxlen = sizeof(unsigned int),
1958 .mode = 0644,
1959 .proc_handler = proc_dointvec_minmax,
1960 .extra1 = SYSCTL_ONE,
1961 },
1962 { }
1963};
1964
1965static struct ctl_table debug_table[] = {
1966#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1967 {
1968 .procname = "exception-trace",
1969 .data = &show_unhandled_signals,
1970 .maxlen = sizeof(int),
1971 .mode = 0644,
1972 .proc_handler = proc_dointvec
1973 },
1974#endif
1975#if defined(CONFIG_OPTPROBES)
1976 {
1977 .procname = "kprobes-optimization",
1978 .data = &sysctl_kprobes_optimization,
1979 .maxlen = sizeof(int),
1980 .mode = 0644,
1981 .proc_handler = proc_kprobes_optimization_handler,
1982 .extra1 = SYSCTL_ZERO,
1983 .extra2 = SYSCTL_ONE,
1984 },
1985#endif
1986 { }
1987};
1988
1989static struct ctl_table dev_table[] = {
1990 { }
1991};
1992
1993int __init sysctl_init(void)
1994{
1995 struct ctl_table_header *hdr;
1996
1997 hdr = register_sysctl_table(sysctl_base_table);
1998 kmemleak_not_leak(hdr);
1999 return 0;
2000}
2001
2002#endif /* CONFIG_SYSCTL */
2003
2004/*
2005 * /proc/sys support
2006 */
2007
2008#ifdef CONFIG_PROC_SYSCTL
2009
2010static int _proc_do_string(char *data, int maxlen, int write,
2011 char __user *buffer,
2012 size_t *lenp, loff_t *ppos)
2013{
2014 size_t len;
2015 char __user *p;
2016 char c;
2017
2018 if (!data || !maxlen || !*lenp) {
2019 *lenp = 0;
2020 return 0;
2021 }
2022
2023 if (write) {
2024 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
2025 /* Only continue writes not past the end of buffer. */
2026 len = strlen(data);
2027 if (len > maxlen - 1)
2028 len = maxlen - 1;
2029
2030 if (*ppos > len)
2031 return 0;
2032 len = *ppos;
2033 } else {
2034 /* Start writing from beginning of buffer. */
2035 len = 0;
2036 }
2037
2038 *ppos += *lenp;
2039 p = buffer;
2040 while ((p - buffer) < *lenp && len < maxlen - 1) {
2041 if (get_user(c, p++))
2042 return -EFAULT;
2043 if (c == 0 || c == '\n')
2044 break;
2045 data[len++] = c;
2046 }
2047 data[len] = 0;
2048 } else {
2049 len = strlen(data);
2050 if (len > maxlen)
2051 len = maxlen;
2052
2053 if (*ppos > len) {
2054 *lenp = 0;
2055 return 0;
2056 }
2057
2058 data += *ppos;
2059 len -= *ppos;
2060
2061 if (len > *lenp)
2062 len = *lenp;
2063 if (len)
2064 if (copy_to_user(buffer, data, len))
2065 return -EFAULT;
2066 if (len < *lenp) {
2067 if (put_user('\n', buffer + len))
2068 return -EFAULT;
2069 len++;
2070 }
2071 *lenp = len;
2072 *ppos += len;
2073 }
2074 return 0;
2075}
2076
2077static void warn_sysctl_write(struct ctl_table *table)
2078{
2079 pr_warn_once("%s wrote to %s when file position was not 0!\n"
2080 "This will not be supported in the future. To silence this\n"
2081 "warning, set kernel.sysctl_writes_strict = -1\n",
2082 current->comm, table->procname);
2083}
2084
2085/**
2086 * proc_first_pos_non_zero_ignore - check if first position is allowed
2087 * @ppos: file position
2088 * @table: the sysctl table
2089 *
2090 * Returns true if the first position is non-zero and the sysctl_writes_strict
2091 * mode indicates this is not allowed for numeric input types. String proc
2092 * handlers can ignore the return value.
2093 */
2094static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2095 struct ctl_table *table)
2096{
2097 if (!*ppos)
2098 return false;
2099
2100 switch (sysctl_writes_strict) {
2101 case SYSCTL_WRITES_STRICT:
2102 return true;
2103 case SYSCTL_WRITES_WARN:
2104 warn_sysctl_write(table);
2105 return false;
2106 default:
2107 return false;
2108 }
2109}
2110
2111/**
2112 * proc_dostring - read a string sysctl
2113 * @table: the sysctl table
2114 * @write: %TRUE if this is a write to the sysctl file
2115 * @buffer: the user buffer
2116 * @lenp: the size of the user buffer
2117 * @ppos: file position
2118 *
2119 * Reads/writes a string from/to the user buffer. If the kernel
2120 * buffer provided is not large enough to hold the string, the
2121 * string is truncated. The copied string is %NULL-terminated.
2122 * If the string is being read by the user process, it is copied
2123 * and a newline '\n' is added. It is truncated if the buffer is
2124 * not large enough.
2125 *
2126 * Returns 0 on success.
2127 */
2128int proc_dostring(struct ctl_table *table, int write,
2129 void __user *buffer, size_t *lenp, loff_t *ppos)
2130{
2131 if (write)
2132 proc_first_pos_non_zero_ignore(ppos, table);
2133
2134 return _proc_do_string((char *)(table->data), table->maxlen, write,
2135 (char __user *)buffer, lenp, ppos);
2136}
2137
2138static size_t proc_skip_spaces(char **buf)
2139{
2140 size_t ret;
2141 char *tmp = skip_spaces(*buf);
2142 ret = tmp - *buf;
2143 *buf = tmp;
2144 return ret;
2145}
2146
2147static void proc_skip_char(char **buf, size_t *size, const char v)
2148{
2149 while (*size) {
2150 if (**buf != v)
2151 break;
2152 (*size)--;
2153 (*buf)++;
2154 }
2155}
2156
2157/**
2158 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2159 * fail on overflow
2160 *
2161 * @cp: kernel buffer containing the string to parse
2162 * @endp: pointer to store the trailing characters
2163 * @base: the base to use
2164 * @res: where the parsed integer will be stored
2165 *
2166 * In case of success 0 is returned and @res will contain the parsed integer,
2167 * @endp will hold any trailing characters.
2168 * This function will fail the parse on overflow. If there wasn't an overflow
2169 * the function will defer the decision what characters count as invalid to the
2170 * caller.
2171 */
2172static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2173 unsigned long *res)
2174{
2175 unsigned long long result;
2176 unsigned int rv;
2177
2178 cp = _parse_integer_fixup_radix(cp, &base);
2179 rv = _parse_integer(cp, base, &result);
2180 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2181 return -ERANGE;
2182
2183 cp += rv;
2184
2185 if (endp)
2186 *endp = (char *)cp;
2187
2188 *res = (unsigned long)result;
2189 return 0;
2190}
2191
2192#define TMPBUFLEN 22
2193/**
2194 * proc_get_long - reads an ASCII formatted integer from a user buffer
2195 *
2196 * @buf: a kernel buffer
2197 * @size: size of the kernel buffer
2198 * @val: this is where the number will be stored
2199 * @neg: set to %TRUE if number is negative
2200 * @perm_tr: a vector which contains the allowed trailers
2201 * @perm_tr_len: size of the perm_tr vector
2202 * @tr: pointer to store the trailer character
2203 *
2204 * In case of success %0 is returned and @buf and @size are updated with
2205 * the amount of bytes read. If @tr is non-NULL and a trailing
2206 * character exists (size is non-zero after returning from this
2207 * function), @tr is updated with the trailing character.
2208 */
2209static int proc_get_long(char **buf, size_t *size,
2210 unsigned long *val, bool *neg,
2211 const char *perm_tr, unsigned perm_tr_len, char *tr)
2212{
2213 int len;
2214 char *p, tmp[TMPBUFLEN];
2215
2216 if (!*size)
2217 return -EINVAL;
2218
2219 len = *size;
2220 if (len > TMPBUFLEN - 1)
2221 len = TMPBUFLEN - 1;
2222
2223 memcpy(tmp, *buf, len);
2224
2225 tmp[len] = 0;
2226 p = tmp;
2227 if (*p == '-' && *size > 1) {
2228 *neg = true;
2229 p++;
2230 } else
2231 *neg = false;
2232 if (!isdigit(*p))
2233 return -EINVAL;
2234
2235 if (strtoul_lenient(p, &p, 0, val))
2236 return -EINVAL;
2237
2238 len = p - tmp;
2239
2240 /* We don't know if the next char is whitespace thus we may accept
2241 * invalid integers (e.g. 1234...a) or two integers instead of one
2242 * (e.g. 123...1). So lets not allow such large numbers. */
2243 if (len == TMPBUFLEN - 1)
2244 return -EINVAL;
2245
2246 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2247 return -EINVAL;
2248
2249 if (tr && (len < *size))
2250 *tr = *p;
2251
2252 *buf += len;
2253 *size -= len;
2254
2255 return 0;
2256}
2257
2258/**
2259 * proc_put_long - converts an integer to a decimal ASCII formatted string
2260 *
2261 * @buf: the user buffer
2262 * @size: the size of the user buffer
2263 * @val: the integer to be converted
2264 * @neg: sign of the number, %TRUE for negative
2265 *
2266 * In case of success %0 is returned and @buf and @size are updated with
2267 * the amount of bytes written.
2268 */
2269static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2270 bool neg)
2271{
2272 int len;
2273 char tmp[TMPBUFLEN], *p = tmp;
2274
2275 sprintf(p, "%s%lu", neg ? "-" : "", val);
2276 len = strlen(tmp);
2277 if (len > *size)
2278 len = *size;
2279 if (copy_to_user(*buf, tmp, len))
2280 return -EFAULT;
2281 *size -= len;
2282 *buf += len;
2283 return 0;
2284}
2285#undef TMPBUFLEN
2286
2287static int proc_put_char(void __user **buf, size_t *size, char c)
2288{
2289 if (*size) {
2290 char __user **buffer = (char __user **)buf;
2291 if (put_user(c, *buffer))
2292 return -EFAULT;
2293 (*size)--, (*buffer)++;
2294 *buf = *buffer;
2295 }
2296 return 0;
2297}
2298
2299static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2300 int *valp,
2301 int write, void *data)
2302{
2303 if (write) {
2304 if (*negp) {
2305 if (*lvalp > (unsigned long) INT_MAX + 1)
2306 return -EINVAL;
2307 *valp = -*lvalp;
2308 } else {
2309 if (*lvalp > (unsigned long) INT_MAX)
2310 return -EINVAL;
2311 *valp = *lvalp;
2312 }
2313 } else {
2314 int val = *valp;
2315 if (val < 0) {
2316 *negp = true;
2317 *lvalp = -(unsigned long)val;
2318 } else {
2319 *negp = false;
2320 *lvalp = (unsigned long)val;
2321 }
2322 }
2323 return 0;
2324}
2325
2326static int do_proc_douintvec_conv(unsigned long *lvalp,
2327 unsigned int *valp,
2328 int write, void *data)
2329{
2330 if (write) {
2331 if (*lvalp > UINT_MAX)
2332 return -EINVAL;
2333 *valp = *lvalp;
2334 } else {
2335 unsigned int val = *valp;
2336 *lvalp = (unsigned long)val;
2337 }
2338 return 0;
2339}
2340
2341static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2342
2343static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2344 int write, void __user *buffer,
2345 size_t *lenp, loff_t *ppos,
2346 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2347 int write, void *data),
2348 void *data)
2349{
2350 int *i, vleft, first = 1, err = 0;
2351 size_t left;
2352 char *kbuf = NULL, *p;
2353
2354 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2355 *lenp = 0;
2356 return 0;
2357 }
2358
2359 i = (int *) tbl_data;
2360 vleft = table->maxlen / sizeof(*i);
2361 left = *lenp;
2362
2363 if (!conv)
2364 conv = do_proc_dointvec_conv;
2365
2366 if (write) {
2367 if (proc_first_pos_non_zero_ignore(ppos, table))
2368 goto out;
2369
2370 if (left > PAGE_SIZE - 1)
2371 left = PAGE_SIZE - 1;
2372 p = kbuf = memdup_user_nul(buffer, left);
2373 if (IS_ERR(kbuf))
2374 return PTR_ERR(kbuf);
2375 }
2376
2377 for (; left && vleft--; i++, first=0) {
2378 unsigned long lval;
2379 bool neg;
2380
2381 if (write) {
2382 left -= proc_skip_spaces(&p);
2383
2384 if (!left)
2385 break;
2386 err = proc_get_long(&p, &left, &lval, &neg,
2387 proc_wspace_sep,
2388 sizeof(proc_wspace_sep), NULL);
2389 if (err)
2390 break;
2391 if (conv(&neg, &lval, i, 1, data)) {
2392 err = -EINVAL;
2393 break;
2394 }
2395 } else {
2396 if (conv(&neg, &lval, i, 0, data)) {
2397 err = -EINVAL;
2398 break;
2399 }
2400 if (!first)
2401 err = proc_put_char(&buffer, &left, '\t');
2402 if (err)
2403 break;
2404 err = proc_put_long(&buffer, &left, lval, neg);
2405 if (err)
2406 break;
2407 }
2408 }
2409
2410 if (!write && !first && left && !err)
2411 err = proc_put_char(&buffer, &left, '\n');
2412 if (write && !err && left)
2413 left -= proc_skip_spaces(&p);
2414 if (write) {
2415 kfree(kbuf);
2416 if (first)
2417 return err ? : -EINVAL;
2418 }
2419 *lenp -= left;
2420out:
2421 *ppos += *lenp;
2422 return err;
2423}
2424
2425static int do_proc_dointvec(struct ctl_table *table, int write,
2426 void __user *buffer, size_t *lenp, loff_t *ppos,
2427 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2428 int write, void *data),
2429 void *data)
2430{
2431 return __do_proc_dointvec(table->data, table, write,
2432 buffer, lenp, ppos, conv, data);
2433}
2434
2435static int do_proc_douintvec_w(unsigned int *tbl_data,
2436 struct ctl_table *table,
2437 void __user *buffer,
2438 size_t *lenp, loff_t *ppos,
2439 int (*conv)(unsigned long *lvalp,
2440 unsigned int *valp,
2441 int write, void *data),
2442 void *data)
2443{
2444 unsigned long lval;
2445 int err = 0;
2446 size_t left;
2447 bool neg;
2448 char *kbuf = NULL, *p;
2449
2450 left = *lenp;
2451
2452 if (proc_first_pos_non_zero_ignore(ppos, table))
2453 goto bail_early;
2454
2455 if (left > PAGE_SIZE - 1)
2456 left = PAGE_SIZE - 1;
2457
2458 p = kbuf = memdup_user_nul(buffer, left);
2459 if (IS_ERR(kbuf))
2460 return -EINVAL;
2461
2462 left -= proc_skip_spaces(&p);
2463 if (!left) {
2464 err = -EINVAL;
2465 goto out_free;
2466 }
2467
2468 err = proc_get_long(&p, &left, &lval, &neg,
2469 proc_wspace_sep,
2470 sizeof(proc_wspace_sep), NULL);
2471 if (err || neg) {
2472 err = -EINVAL;
2473 goto out_free;
2474 }
2475
2476 if (conv(&lval, tbl_data, 1, data)) {
2477 err = -EINVAL;
2478 goto out_free;
2479 }
2480
2481 if (!err && left)
2482 left -= proc_skip_spaces(&p);
2483
2484out_free:
2485 kfree(kbuf);
2486 if (err)
2487 return -EINVAL;
2488
2489 return 0;
2490
2491 /* This is in keeping with old __do_proc_dointvec() */
2492bail_early:
2493 *ppos += *lenp;
2494 return err;
2495}
2496
2497static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2498 size_t *lenp, loff_t *ppos,
2499 int (*conv)(unsigned long *lvalp,
2500 unsigned int *valp,
2501 int write, void *data),
2502 void *data)
2503{
2504 unsigned long lval;
2505 int err = 0;
2506 size_t left;
2507
2508 left = *lenp;
2509
2510 if (conv(&lval, tbl_data, 0, data)) {
2511 err = -EINVAL;
2512 goto out;
2513 }
2514
2515 err = proc_put_long(&buffer, &left, lval, false);
2516 if (err || !left)
2517 goto out;
2518
2519 err = proc_put_char(&buffer, &left, '\n');
2520
2521out:
2522 *lenp -= left;
2523 *ppos += *lenp;
2524
2525 return err;
2526}
2527
2528static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2529 int write, void __user *buffer,
2530 size_t *lenp, loff_t *ppos,
2531 int (*conv)(unsigned long *lvalp,
2532 unsigned int *valp,
2533 int write, void *data),
2534 void *data)
2535{
2536 unsigned int *i, vleft;
2537
2538 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2539 *lenp = 0;
2540 return 0;
2541 }
2542
2543 i = (unsigned int *) tbl_data;
2544 vleft = table->maxlen / sizeof(*i);
2545
2546 /*
2547 * Arrays are not supported, keep this simple. *Do not* add
2548 * support for them.
2549 */
2550 if (vleft != 1) {
2551 *lenp = 0;
2552 return -EINVAL;
2553 }
2554
2555 if (!conv)
2556 conv = do_proc_douintvec_conv;
2557
2558 if (write)
2559 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2560 conv, data);
2561 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2562}
2563
2564static int do_proc_douintvec(struct ctl_table *table, int write,
2565 void __user *buffer, size_t *lenp, loff_t *ppos,
2566 int (*conv)(unsigned long *lvalp,
2567 unsigned int *valp,
2568 int write, void *data),
2569 void *data)
2570{
2571 return __do_proc_douintvec(table->data, table, write,
2572 buffer, lenp, ppos, conv, data);
2573}
2574
2575/**
2576 * proc_dointvec - read a vector of integers
2577 * @table: the sysctl table
2578 * @write: %TRUE if this is a write to the sysctl file
2579 * @buffer: the user buffer
2580 * @lenp: the size of the user buffer
2581 * @ppos: file position
2582 *
2583 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2584 * values from/to the user buffer, treated as an ASCII string.
2585 *
2586 * Returns 0 on success.
2587 */
2588int proc_dointvec(struct ctl_table *table, int write,
2589 void __user *buffer, size_t *lenp, loff_t *ppos)
2590{
2591 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2592}
2593
2594/**
2595 * proc_douintvec - read a vector of unsigned integers
2596 * @table: the sysctl table
2597 * @write: %TRUE if this is a write to the sysctl file
2598 * @buffer: the user buffer
2599 * @lenp: the size of the user buffer
2600 * @ppos: file position
2601 *
2602 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2603 * values from/to the user buffer, treated as an ASCII string.
2604 *
2605 * Returns 0 on success.
2606 */
2607int proc_douintvec(struct ctl_table *table, int write,
2608 void __user *buffer, size_t *lenp, loff_t *ppos)
2609{
2610 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2611 do_proc_douintvec_conv, NULL);
2612}
2613
2614/*
2615 * Taint values can only be increased
2616 * This means we can safely use a temporary.
2617 */
2618static int proc_taint(struct ctl_table *table, int write,
2619 void __user *buffer, size_t *lenp, loff_t *ppos)
2620{
2621 struct ctl_table t;
2622 unsigned long tmptaint = get_taint();
2623 int err;
2624
2625 if (write && !capable(CAP_SYS_ADMIN))
2626 return -EPERM;
2627
2628 t = *table;
2629 t.data = &tmptaint;
2630 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2631 if (err < 0)
2632 return err;
2633
2634 if (write) {
2635 /*
2636 * Poor man's atomic or. Not worth adding a primitive
2637 * to everyone's atomic.h for this
2638 */
2639 int i;
2640 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2641 if ((tmptaint >> i) & 1)
2642 add_taint(i, LOCKDEP_STILL_OK);
2643 }
2644 }
2645
2646 return err;
2647}
2648
2649#ifdef CONFIG_PRINTK
2650static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2651 void __user *buffer, size_t *lenp, loff_t *ppos)
2652{
2653 if (write && !capable(CAP_SYS_ADMIN))
2654 return -EPERM;
2655
2656 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2657}
2658#endif
2659
2660/**
2661 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2662 * @min: pointer to minimum allowable value
2663 * @max: pointer to maximum allowable value
2664 *
2665 * The do_proc_dointvec_minmax_conv_param structure provides the
2666 * minimum and maximum values for doing range checking for those sysctl
2667 * parameters that use the proc_dointvec_minmax() handler.
2668 */
2669struct do_proc_dointvec_minmax_conv_param {
2670 int *min;
2671 int *max;
2672};
2673
2674static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2675 int *valp,
2676 int write, void *data)
2677{
2678 int tmp, ret;
2679 struct do_proc_dointvec_minmax_conv_param *param = data;
2680 /*
2681 * If writing, first do so via a temporary local int so we can
2682 * bounds-check it before touching *valp.
2683 */
2684 int *ip = write ? &tmp : valp;
2685
2686 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2687 if (ret)
2688 return ret;
2689
2690 if (write) {
2691 if ((param->min && *param->min > tmp) ||
2692 (param->max && *param->max < tmp))
2693 return -EINVAL;
2694 *valp = tmp;
2695 }
2696
2697 return 0;
2698}
2699
2700/**
2701 * proc_dointvec_minmax - read a vector of integers with min/max values
2702 * @table: the sysctl table
2703 * @write: %TRUE if this is a write to the sysctl file
2704 * @buffer: the user buffer
2705 * @lenp: the size of the user buffer
2706 * @ppos: file position
2707 *
2708 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2709 * values from/to the user buffer, treated as an ASCII string.
2710 *
2711 * This routine will ensure the values are within the range specified by
2712 * table->extra1 (min) and table->extra2 (max).
2713 *
2714 * Returns 0 on success or -EINVAL on write when the range check fails.
2715 */
2716int proc_dointvec_minmax(struct ctl_table *table, int write,
2717 void __user *buffer, size_t *lenp, loff_t *ppos)
2718{
2719 struct do_proc_dointvec_minmax_conv_param param = {
2720 .min = (int *) table->extra1,
2721 .max = (int *) table->extra2,
2722 };
2723 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2724 do_proc_dointvec_minmax_conv, &param);
2725}
2726
2727/**
2728 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2729 * @min: pointer to minimum allowable value
2730 * @max: pointer to maximum allowable value
2731 *
2732 * The do_proc_douintvec_minmax_conv_param structure provides the
2733 * minimum and maximum values for doing range checking for those sysctl
2734 * parameters that use the proc_douintvec_minmax() handler.
2735 */
2736struct do_proc_douintvec_minmax_conv_param {
2737 unsigned int *min;
2738 unsigned int *max;
2739};
2740
2741static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2742 unsigned int *valp,
2743 int write, void *data)
2744{
2745 int ret;
2746 unsigned int tmp;
2747 struct do_proc_douintvec_minmax_conv_param *param = data;
2748 /* write via temporary local uint for bounds-checking */
2749 unsigned int *up = write ? &tmp : valp;
2750
2751 ret = do_proc_douintvec_conv(lvalp, up, write, data);
2752 if (ret)
2753 return ret;
2754
2755 if (write) {
2756 if ((param->min && *param->min > tmp) ||
2757 (param->max && *param->max < tmp))
2758 return -ERANGE;
2759
2760 *valp = tmp;
2761 }
2762
2763 return 0;
2764}
2765
2766/**
2767 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2768 * @table: the sysctl table
2769 * @write: %TRUE if this is a write to the sysctl file
2770 * @buffer: the user buffer
2771 * @lenp: the size of the user buffer
2772 * @ppos: file position
2773 *
2774 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2775 * values from/to the user buffer, treated as an ASCII string. Negative
2776 * strings are not allowed.
2777 *
2778 * This routine will ensure the values are within the range specified by
2779 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2780 * check for UINT_MAX to avoid having to support wrap around uses from
2781 * userspace.
2782 *
2783 * Returns 0 on success or -ERANGE on write when the range check fails.
2784 */
2785int proc_douintvec_minmax(struct ctl_table *table, int write,
2786 void __user *buffer, size_t *lenp, loff_t *ppos)
2787{
2788 struct do_proc_douintvec_minmax_conv_param param = {
2789 .min = (unsigned int *) table->extra1,
2790 .max = (unsigned int *) table->extra2,
2791 };
2792 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2793 do_proc_douintvec_minmax_conv, &param);
2794}
2795
2796static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2797 unsigned int *valp,
2798 int write, void *data)
2799{
2800 if (write) {
2801 unsigned int val;
2802
2803 val = round_pipe_size(*lvalp);
2804 if (val == 0)
2805 return -EINVAL;
2806
2807 *valp = val;
2808 } else {
2809 unsigned int val = *valp;
2810 *lvalp = (unsigned long) val;
2811 }
2812
2813 return 0;
2814}
2815
2816static int proc_dopipe_max_size(struct ctl_table *table, int write,
2817 void __user *buffer, size_t *lenp, loff_t *ppos)
2818{
2819 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2820 do_proc_dopipe_max_size_conv, NULL);
2821}
2822
2823static void validate_coredump_safety(void)
2824{
2825#ifdef CONFIG_COREDUMP
2826 if (suid_dumpable == SUID_DUMP_ROOT &&
2827 core_pattern[0] != '/' && core_pattern[0] != '|') {
2828 printk(KERN_WARNING
2829"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2830"Pipe handler or fully qualified core dump path required.\n"
2831"Set kernel.core_pattern before fs.suid_dumpable.\n"
2832 );
2833 }
2834#endif
2835}
2836
2837static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2838 void __user *buffer, size_t *lenp, loff_t *ppos)
2839{
2840 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2841 if (!error)
2842 validate_coredump_safety();
2843 return error;
2844}
2845
2846#ifdef CONFIG_COREDUMP
2847static int proc_dostring_coredump(struct ctl_table *table, int write,
2848 void __user *buffer, size_t *lenp, loff_t *ppos)
2849{
2850 int error = proc_dostring(table, write, buffer, lenp, ppos);
2851 if (!error)
2852 validate_coredump_safety();
2853 return error;
2854}
2855#endif
2856
2857static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2858 void __user *buffer,
2859 size_t *lenp, loff_t *ppos,
2860 unsigned long convmul,
2861 unsigned long convdiv)
2862{
2863 unsigned long *i, *min, *max;
2864 int vleft, first = 1, err = 0;
2865 size_t left;
2866 char *kbuf = NULL, *p;
2867
2868 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2869 *lenp = 0;
2870 return 0;
2871 }
2872
2873 i = (unsigned long *) data;
2874 min = (unsigned long *) table->extra1;
2875 max = (unsigned long *) table->extra2;
2876 vleft = table->maxlen / sizeof(unsigned long);
2877 left = *lenp;
2878
2879 if (write) {
2880 if (proc_first_pos_non_zero_ignore(ppos, table))
2881 goto out;
2882
2883 if (left > PAGE_SIZE - 1)
2884 left = PAGE_SIZE - 1;
2885 p = kbuf = memdup_user_nul(buffer, left);
2886 if (IS_ERR(kbuf))
2887 return PTR_ERR(kbuf);
2888 }
2889
2890 for (; left && vleft--; i++, first = 0) {
2891 unsigned long val;
2892
2893 if (write) {
2894 bool neg;
2895
2896 left -= proc_skip_spaces(&p);
2897 if (!left)
2898 break;
2899
2900 err = proc_get_long(&p, &left, &val, &neg,
2901 proc_wspace_sep,
2902 sizeof(proc_wspace_sep), NULL);
2903 if (err)
2904 break;
2905 if (neg)
2906 continue;
2907 val = convmul * val / convdiv;
2908 if ((min && val < *min) || (max && val > *max)) {
2909 err = -EINVAL;
2910 break;
2911 }
2912 *i = val;
2913 } else {
2914 val = convdiv * (*i) / convmul;
2915 if (!first) {
2916 err = proc_put_char(&buffer, &left, '\t');
2917 if (err)
2918 break;
2919 }
2920 err = proc_put_long(&buffer, &left, val, false);
2921 if (err)
2922 break;
2923 }
2924 }
2925
2926 if (!write && !first && left && !err)
2927 err = proc_put_char(&buffer, &left, '\n');
2928 if (write && !err)
2929 left -= proc_skip_spaces(&p);
2930 if (write) {
2931 kfree(kbuf);
2932 if (first)
2933 return err ? : -EINVAL;
2934 }
2935 *lenp -= left;
2936out:
2937 *ppos += *lenp;
2938 return err;
2939}
2940
2941static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2942 void __user *buffer,
2943 size_t *lenp, loff_t *ppos,
2944 unsigned long convmul,
2945 unsigned long convdiv)
2946{
2947 return __do_proc_doulongvec_minmax(table->data, table, write,
2948 buffer, lenp, ppos, convmul, convdiv);
2949}
2950
2951/**
2952 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2953 * @table: the sysctl table
2954 * @write: %TRUE if this is a write to the sysctl file
2955 * @buffer: the user buffer
2956 * @lenp: the size of the user buffer
2957 * @ppos: file position
2958 *
2959 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2960 * values from/to the user buffer, treated as an ASCII string.
2961 *
2962 * This routine will ensure the values are within the range specified by
2963 * table->extra1 (min) and table->extra2 (max).
2964 *
2965 * Returns 0 on success.
2966 */
2967int proc_doulongvec_minmax(struct ctl_table *table, int write,
2968 void __user *buffer, size_t *lenp, loff_t *ppos)
2969{
2970 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2971}
2972
2973/**
2974 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2975 * @table: the sysctl table
2976 * @write: %TRUE if this is a write to the sysctl file
2977 * @buffer: the user buffer
2978 * @lenp: the size of the user buffer
2979 * @ppos: file position
2980 *
2981 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2982 * values from/to the user buffer, treated as an ASCII string. The values
2983 * are treated as milliseconds, and converted to jiffies when they are stored.
2984 *
2985 * This routine will ensure the values are within the range specified by
2986 * table->extra1 (min) and table->extra2 (max).
2987 *
2988 * Returns 0 on success.
2989 */
2990int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2991 void __user *buffer,
2992 size_t *lenp, loff_t *ppos)
2993{
2994 return do_proc_doulongvec_minmax(table, write, buffer,
2995 lenp, ppos, HZ, 1000l);
2996}
2997
2998
2999static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
3000 int *valp,
3001 int write, void *data)
3002{
3003 if (write) {
3004 if (*lvalp > INT_MAX / HZ)
3005 return 1;
3006 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
3007 } else {
3008 int val = *valp;
3009 unsigned long lval;
3010 if (val < 0) {
3011 *negp = true;
3012 lval = -(unsigned long)val;
3013 } else {
3014 *negp = false;
3015 lval = (unsigned long)val;
3016 }
3017 *lvalp = lval / HZ;
3018 }
3019 return 0;
3020}
3021
3022static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
3023 int *valp,
3024 int write, void *data)
3025{
3026 if (write) {
3027 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
3028 return 1;
3029 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
3030 } else {
3031 int val = *valp;
3032 unsigned long lval;
3033 if (val < 0) {
3034 *negp = true;
3035 lval = -(unsigned long)val;
3036 } else {
3037 *negp = false;
3038 lval = (unsigned long)val;
3039 }
3040 *lvalp = jiffies_to_clock_t(lval);
3041 }
3042 return 0;
3043}
3044
3045static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3046 int *valp,
3047 int write, void *data)
3048{
3049 if (write) {
3050 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3051
3052 if (jif > INT_MAX)
3053 return 1;
3054 *valp = (int)jif;
3055 } else {
3056 int val = *valp;
3057 unsigned long lval;
3058 if (val < 0) {
3059 *negp = true;
3060 lval = -(unsigned long)val;
3061 } else {
3062 *negp = false;
3063 lval = (unsigned long)val;
3064 }
3065 *lvalp = jiffies_to_msecs(lval);
3066 }
3067 return 0;
3068}
3069
3070/**
3071 * proc_dointvec_jiffies - read a vector of integers as seconds
3072 * @table: the sysctl table
3073 * @write: %TRUE if this is a write to the sysctl file
3074 * @buffer: the user buffer
3075 * @lenp: the size of the user buffer
3076 * @ppos: file position
3077 *
3078 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3079 * values from/to the user buffer, treated as an ASCII string.
3080 * The values read are assumed to be in seconds, and are converted into
3081 * jiffies.
3082 *
3083 * Returns 0 on success.
3084 */
3085int proc_dointvec_jiffies(struct ctl_table *table, int write,
3086 void __user *buffer, size_t *lenp, loff_t *ppos)
3087{
3088 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3089 do_proc_dointvec_jiffies_conv,NULL);
3090}
3091
3092/**
3093 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3094 * @table: the sysctl table
3095 * @write: %TRUE if this is a write to the sysctl file
3096 * @buffer: the user buffer
3097 * @lenp: the size of the user buffer
3098 * @ppos: pointer to the file position
3099 *
3100 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3101 * values from/to the user buffer, treated as an ASCII string.
3102 * The values read are assumed to be in 1/USER_HZ seconds, and
3103 * are converted into jiffies.
3104 *
3105 * Returns 0 on success.
3106 */
3107int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3108 void __user *buffer, size_t *lenp, loff_t *ppos)
3109{
3110 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3111 do_proc_dointvec_userhz_jiffies_conv,NULL);
3112}
3113
3114/**
3115 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3116 * @table: the sysctl table
3117 * @write: %TRUE if this is a write to the sysctl file
3118 * @buffer: the user buffer
3119 * @lenp: the size of the user buffer
3120 * @ppos: file position
3121 * @ppos: the current position in the file
3122 *
3123 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3124 * values from/to the user buffer, treated as an ASCII string.
3125 * The values read are assumed to be in 1/1000 seconds, and
3126 * are converted into jiffies.
3127 *
3128 * Returns 0 on success.
3129 */
3130int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3131 void __user *buffer, size_t *lenp, loff_t *ppos)
3132{
3133 return do_proc_dointvec(table, write, buffer, lenp, ppos,
3134 do_proc_dointvec_ms_jiffies_conv, NULL);
3135}
3136
3137static int proc_do_cad_pid(struct ctl_table *table, int write,
3138 void __user *buffer, size_t *lenp, loff_t *ppos)
3139{
3140 struct pid *new_pid;
3141 pid_t tmp;
3142 int r;
3143
3144 tmp = pid_vnr(cad_pid);
3145
3146 r = __do_proc_dointvec(&tmp, table, write, buffer,
3147 lenp, ppos, NULL, NULL);
3148 if (r || !write)
3149 return r;
3150
3151 new_pid = find_get_pid(tmp);
3152 if (!new_pid)
3153 return -ESRCH;
3154
3155 put_pid(xchg(&cad_pid, new_pid));
3156 return 0;
3157}
3158
3159/**
3160 * proc_do_large_bitmap - read/write from/to a large bitmap
3161 * @table: the sysctl table
3162 * @write: %TRUE if this is a write to the sysctl file
3163 * @buffer: the user buffer
3164 * @lenp: the size of the user buffer
3165 * @ppos: file position
3166 *
3167 * The bitmap is stored at table->data and the bitmap length (in bits)
3168 * in table->maxlen.
3169 *
3170 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3171 * large bitmaps may be represented in a compact manner. Writing into
3172 * the file will clear the bitmap then update it with the given input.
3173 *
3174 * Returns 0 on success.
3175 */
3176int proc_do_large_bitmap(struct ctl_table *table, int write,
3177 void __user *buffer, size_t *lenp, loff_t *ppos)
3178{
3179 int err = 0;
3180 bool first = 1;
3181 size_t left = *lenp;
3182 unsigned long bitmap_len = table->maxlen;
3183 unsigned long *bitmap = *(unsigned long **) table->data;
3184 unsigned long *tmp_bitmap = NULL;
3185 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3186
3187 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3188 *lenp = 0;
3189 return 0;
3190 }
3191
3192 if (write) {
3193 char *kbuf, *p;
3194 size_t skipped = 0;
3195
3196 if (left > PAGE_SIZE - 1) {
3197 left = PAGE_SIZE - 1;
3198 /* How much of the buffer we'll skip this pass */
3199 skipped = *lenp - left;
3200 }
3201
3202 p = kbuf = memdup_user_nul(buffer, left);
3203 if (IS_ERR(kbuf))
3204 return PTR_ERR(kbuf);
3205
3206 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
3207 if (!tmp_bitmap) {
3208 kfree(kbuf);
3209 return -ENOMEM;
3210 }
3211 proc_skip_char(&p, &left, '\n');
3212 while (!err && left) {
3213 unsigned long val_a, val_b;
3214 bool neg;
3215 size_t saved_left;
3216
3217 /* In case we stop parsing mid-number, we can reset */
3218 saved_left = left;
3219 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3220 sizeof(tr_a), &c);
3221 /*
3222 * If we consumed the entirety of a truncated buffer or
3223 * only one char is left (may be a "-"), then stop here,
3224 * reset, & come back for more.
3225 */
3226 if ((left <= 1) && skipped) {
3227 left = saved_left;
3228 break;
3229 }
3230
3231 if (err)
3232 break;
3233 if (val_a >= bitmap_len || neg) {
3234 err = -EINVAL;
3235 break;
3236 }
3237
3238 val_b = val_a;
3239 if (left) {
3240 p++;
3241 left--;
3242 }
3243
3244 if (c == '-') {
3245 err = proc_get_long(&p, &left, &val_b,
3246 &neg, tr_b, sizeof(tr_b),
3247 &c);
3248 /*
3249 * If we consumed all of a truncated buffer or
3250 * then stop here, reset, & come back for more.
3251 */
3252 if (!left && skipped) {
3253 left = saved_left;
3254 break;
3255 }
3256
3257 if (err)
3258 break;
3259 if (val_b >= bitmap_len || neg ||
3260 val_a > val_b) {
3261 err = -EINVAL;
3262 break;
3263 }
3264 if (left) {
3265 p++;
3266 left--;
3267 }
3268 }
3269
3270 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3271 first = 0;
3272 proc_skip_char(&p, &left, '\n');
3273 }
3274 kfree(kbuf);
3275 left += skipped;
3276 } else {
3277 unsigned long bit_a, bit_b = 0;
3278
3279 while (left) {
3280 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3281 if (bit_a >= bitmap_len)
3282 break;
3283 bit_b = find_next_zero_bit(bitmap, bitmap_len,
3284 bit_a + 1) - 1;
3285
3286 if (!first) {
3287 err = proc_put_char(&buffer, &left, ',');
3288 if (err)
3289 break;
3290 }
3291 err = proc_put_long(&buffer, &left, bit_a, false);
3292 if (err)
3293 break;
3294 if (bit_a != bit_b) {
3295 err = proc_put_char(&buffer, &left, '-');
3296 if (err)
3297 break;
3298 err = proc_put_long(&buffer, &left, bit_b, false);
3299 if (err)
3300 break;
3301 }
3302
3303 first = 0; bit_b++;
3304 }
3305 if (!err)
3306 err = proc_put_char(&buffer, &left, '\n');
3307 }
3308
3309 if (!err) {
3310 if (write) {
3311 if (*ppos)
3312 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3313 else
3314 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3315 }
3316 *lenp -= left;
3317 *ppos += *lenp;
3318 }
3319
3320 bitmap_free(tmp_bitmap);
3321 return err;
3322}
3323
3324#else /* CONFIG_PROC_SYSCTL */
3325
3326int proc_dostring(struct ctl_table *table, int write,
3327 void __user *buffer, size_t *lenp, loff_t *ppos)
3328{
3329 return -ENOSYS;
3330}
3331
3332int proc_dointvec(struct ctl_table *table, int write,
3333 void __user *buffer, size_t *lenp, loff_t *ppos)
3334{
3335 return -ENOSYS;
3336}
3337
3338int proc_douintvec(struct ctl_table *table, int write,
3339 void __user *buffer, size_t *lenp, loff_t *ppos)
3340{
3341 return -ENOSYS;
3342}
3343
3344int proc_dointvec_minmax(struct ctl_table *table, int write,
3345 void __user *buffer, size_t *lenp, loff_t *ppos)
3346{
3347 return -ENOSYS;
3348}
3349
3350int proc_douintvec_minmax(struct ctl_table *table, int write,
3351 void __user *buffer, size_t *lenp, loff_t *ppos)
3352{
3353 return -ENOSYS;
3354}
3355
3356int proc_dointvec_jiffies(struct ctl_table *table, int write,
3357 void __user *buffer, size_t *lenp, loff_t *ppos)
3358{
3359 return -ENOSYS;
3360}
3361
3362int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3363 void __user *buffer, size_t *lenp, loff_t *ppos)
3364{
3365 return -ENOSYS;
3366}
3367
3368int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3369 void __user *buffer, size_t *lenp, loff_t *ppos)
3370{
3371 return -ENOSYS;
3372}
3373
3374int proc_doulongvec_minmax(struct ctl_table *table, int write,
3375 void __user *buffer, size_t *lenp, loff_t *ppos)
3376{
3377 return -ENOSYS;
3378}
3379
3380int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3381 void __user *buffer,
3382 size_t *lenp, loff_t *ppos)
3383{
3384 return -ENOSYS;
3385}
3386
3387int proc_do_large_bitmap(struct ctl_table *table, int write,
3388 void __user *buffer, size_t *lenp, loff_t *ppos)
3389{
3390 return -ENOSYS;
3391}
3392
3393#endif /* CONFIG_PROC_SYSCTL */
3394
3395#if defined(CONFIG_SYSCTL)
3396int proc_do_static_key(struct ctl_table *table, int write,
3397 void __user *buffer, size_t *lenp,
3398 loff_t *ppos)
3399{
3400 struct static_key *key = (struct static_key *)table->data;
3401 static DEFINE_MUTEX(static_key_mutex);
3402 int val, ret;
3403 struct ctl_table tmp = {
3404 .data = &val,
3405 .maxlen = sizeof(val),
3406 .mode = table->mode,
3407 .extra1 = SYSCTL_ZERO,
3408 .extra2 = SYSCTL_ONE,
3409 };
3410
3411 if (write && !capable(CAP_SYS_ADMIN))
3412 return -EPERM;
3413
3414 mutex_lock(&static_key_mutex);
3415 val = static_key_enabled(key);
3416 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3417 if (write && !ret) {
3418 if (val)
3419 static_key_enable(key);
3420 else
3421 static_key_disable(key);
3422 }
3423 mutex_unlock(&static_key_mutex);
3424 return ret;
3425}
3426#endif
3427/*
3428 * No sense putting this after each symbol definition, twice,
3429 * exception granted :-)
3430 */
3431EXPORT_SYMBOL(proc_dointvec);
3432EXPORT_SYMBOL(proc_douintvec);
3433EXPORT_SYMBOL(proc_dointvec_jiffies);
3434EXPORT_SYMBOL(proc_dointvec_minmax);
3435EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3436EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3437EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3438EXPORT_SYMBOL(proc_dostring);
3439EXPORT_SYMBOL(proc_doulongvec_minmax);
3440EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3441EXPORT_SYMBOL(proc_do_large_bitmap);