]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - kernel/sysctl.c
[PATCH] IPC namespace - shm
[mirror_ubuntu-zesty-kernel.git] / kernel / sysctl.c
CommitLineData
1da177e4
LT
1/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
1da177e4
LT
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/slab.h>
25#include <linux/sysctl.h>
26#include <linux/proc_fs.h>
c59ede7b 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/ctype.h>
29#include <linux/utsname.h>
30#include <linux/capability.h>
31#include <linux/smp_lock.h>
32#include <linux/init.h>
33#include <linux/kernel.h>
0296b228 34#include <linux/kobject.h>
20380731 35#include <linux/net.h>
1da177e4
LT
36#include <linux/sysrq.h>
37#include <linux/highuid.h>
38#include <linux/writeback.h>
39#include <linux/hugetlb.h>
40#include <linux/security.h>
41#include <linux/initrd.h>
42#include <linux/times.h>
43#include <linux/limits.h>
44#include <linux/dcache.h>
45#include <linux/syscalls.h>
c255d844
PM
46#include <linux/nfs_fs.h>
47#include <linux/acpi.h>
1da177e4
LT
48
49#include <asm/uaccess.h>
50#include <asm/processor.h>
51
529bf6be
DS
52extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53 void __user *buffer, size_t *lenp, loff_t *ppos);
54
29cbc78b
AK
55#ifdef CONFIG_X86
56#include <asm/nmi.h>
57#endif
58
1da177e4
LT
59#if defined(CONFIG_SYSCTL)
60
61/* External variables not in a header file. */
62extern int C_A_D;
63extern int sysctl_overcommit_memory;
64extern int sysctl_overcommit_ratio;
fadd8fbd 65extern int sysctl_panic_on_oom;
1da177e4
LT
66extern int max_threads;
67extern int sysrq_enabled;
68extern int core_uses_pid;
d6e71144 69extern int suid_dumpable;
1da177e4
LT
70extern char core_pattern[];
71extern int cad_pid;
72extern int pid_max;
73extern int min_free_kbytes;
74extern int printk_ratelimit_jiffies;
75extern int printk_ratelimit_burst;
76extern int pid_max_min, pid_max_max;
9d0243bc 77extern int sysctl_drop_caches;
8ad4b1fb 78extern int percpu_pagelist_fraction;
bebfa101 79extern int compat_log;
1da177e4 80
1da177e4
LT
81/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
82static int maxolduid = 65535;
83static int minolduid;
8ad4b1fb 84static int min_percpu_pagelist_fract = 8;
1da177e4
LT
85
86static int ngroups_max = NGROUPS_MAX;
87
88#ifdef CONFIG_KMOD
89extern char modprobe_path[];
90#endif
1da177e4
LT
91#ifdef CONFIG_CHR_DEV_SG
92extern int sg_big_buff;
93#endif
94#ifdef CONFIG_SYSVIPC
95extern size_t shm_ctlmax;
96extern size_t shm_ctlall;
97extern int shm_ctlmni;
98extern int msg_ctlmax;
99extern int msg_ctlmnb;
100extern int msg_ctlmni;
101extern int sem_ctls[];
102#endif
103
104#ifdef __sparc__
105extern char reboot_command [];
106extern int stop_a_enabled;
107extern int scons_pwroff;
108#endif
109
110#ifdef __hppa__
111extern int pwrsw_enabled;
112extern int unaligned_enabled;
113#endif
114
347a8dc3 115#ifdef CONFIG_S390
1da177e4
LT
116#ifdef CONFIG_MATHEMU
117extern int sysctl_ieee_emulation_warnings;
118#endif
119extern int sysctl_userprocess_debug;
951f22d5 120extern int spin_retry;
1da177e4
LT
121#endif
122
123extern int sysctl_hz_timer;
124
125#ifdef CONFIG_BSD_PROCESS_ACCT
126extern int acct_parm[];
127#endif
128
d2b176ed
JS
129#ifdef CONFIG_IA64
130extern int no_unaligned_warning;
131#endif
132
23f78d4a
IM
133#ifdef CONFIG_RT_MUTEXES
134extern int max_lock_depth;
135#endif
136
b89a8171
EB
137#ifdef CONFIG_SYSCTL_SYSCALL
138static int parse_table(int __user *, int, void __user *, size_t __user *,
139 void __user *, size_t, ctl_table *, void **);
140#endif
141
8218c74c 142static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
1da177e4
LT
143 void __user *buffer, size_t *lenp, loff_t *ppos);
144
145static ctl_table root_table[];
146static struct ctl_table_header root_table_header =
147 { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
148
149static ctl_table kern_table[];
150static ctl_table vm_table[];
1da177e4
LT
151static ctl_table fs_table[];
152static ctl_table debug_table[];
153static ctl_table dev_table[];
154extern ctl_table random_table[];
155#ifdef CONFIG_UNIX98_PTYS
156extern ctl_table pty_table[];
157#endif
2d9048e2 158#ifdef CONFIG_INOTIFY_USER
0399cb08
RL
159extern ctl_table inotify_table[];
160#endif
1da177e4
LT
161
162#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
163int sysctl_legacy_va_layout;
164#endif
165
166/* /proc declarations: */
167
b89a8171 168#ifdef CONFIG_PROC_SYSCTL
1da177e4
LT
169
170static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
171static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
172static int proc_opensys(struct inode *, struct file *);
173
174struct file_operations proc_sys_file_operations = {
175 .open = proc_opensys,
176 .read = proc_readsys,
177 .write = proc_writesys,
178};
179
180extern struct proc_dir_entry *proc_sys_root;
181
330d57fb 182static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
1da177e4
LT
183static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
184#endif
185
186/* The default sysctl tables: */
187
188static ctl_table root_table[] = {
189 {
190 .ctl_name = CTL_KERN,
191 .procname = "kernel",
192 .mode = 0555,
193 .child = kern_table,
194 },
195 {
196 .ctl_name = CTL_VM,
197 .procname = "vm",
198 .mode = 0555,
199 .child = vm_table,
200 },
201#ifdef CONFIG_NET
202 {
203 .ctl_name = CTL_NET,
204 .procname = "net",
205 .mode = 0555,
206 .child = net_table,
207 },
208#endif
1da177e4
LT
209 {
210 .ctl_name = CTL_FS,
211 .procname = "fs",
212 .mode = 0555,
213 .child = fs_table,
214 },
215 {
216 .ctl_name = CTL_DEBUG,
217 .procname = "debug",
218 .mode = 0555,
219 .child = debug_table,
220 },
221 {
222 .ctl_name = CTL_DEV,
223 .procname = "dev",
224 .mode = 0555,
225 .child = dev_table,
226 },
0eeca283 227
1da177e4
LT
228 { .ctl_name = 0 }
229};
230
231static ctl_table kern_table[] = {
8218c74c 232#ifndef CONFIG_UTS_NS
1da177e4
LT
233 {
234 .ctl_name = KERN_OSTYPE,
235 .procname = "ostype",
8218c74c
SH
236 .data = init_uts_ns.name.sysname,
237 .maxlen = sizeof(init_uts_ns.name.sysname),
1da177e4 238 .mode = 0444,
8218c74c 239 .proc_handler = &proc_do_uts_string,
1da177e4
LT
240 .strategy = &sysctl_string,
241 },
242 {
243 .ctl_name = KERN_OSRELEASE,
244 .procname = "osrelease",
8218c74c
SH
245 .data = init_uts_ns.name.release,
246 .maxlen = sizeof(init_uts_ns.name.release),
1da177e4 247 .mode = 0444,
8218c74c 248 .proc_handler = &proc_do_uts_string,
1da177e4
LT
249 .strategy = &sysctl_string,
250 },
251 {
252 .ctl_name = KERN_VERSION,
253 .procname = "version",
8218c74c
SH
254 .data = init_uts_ns.name.version,
255 .maxlen = sizeof(init_uts_ns.name.version),
1da177e4 256 .mode = 0444,
8218c74c 257 .proc_handler = &proc_do_uts_string,
1da177e4
LT
258 .strategy = &sysctl_string,
259 },
260 {
261 .ctl_name = KERN_NODENAME,
262 .procname = "hostname",
8218c74c
SH
263 .data = init_uts_ns.name.nodename,
264 .maxlen = sizeof(init_uts_ns.name.nodename),
1da177e4 265 .mode = 0644,
8218c74c 266 .proc_handler = &proc_do_uts_string,
1da177e4
LT
267 .strategy = &sysctl_string,
268 },
269 {
270 .ctl_name = KERN_DOMAINNAME,
271 .procname = "domainname",
8218c74c
SH
272 .data = init_uts_ns.name.domainname,
273 .maxlen = sizeof(init_uts_ns.name.domainname),
1da177e4 274 .mode = 0644,
8218c74c 275 .proc_handler = &proc_do_uts_string,
1da177e4
LT
276 .strategy = &sysctl_string,
277 },
8218c74c
SH
278#else /* !CONFIG_UTS_NS */
279 {
280 .ctl_name = KERN_OSTYPE,
281 .procname = "ostype",
282 .data = NULL,
283 /* could maybe use __NEW_UTS_LEN here? */
284 .maxlen = FIELD_SIZEOF(struct new_utsname, sysname),
285 .mode = 0444,
286 .proc_handler = &proc_do_uts_string,
287 .strategy = &sysctl_string,
288 },
289 {
290 .ctl_name = KERN_OSRELEASE,
291 .procname = "osrelease",
292 .data = NULL,
293 .maxlen = FIELD_SIZEOF(struct new_utsname, release),
294 .mode = 0444,
295 .proc_handler = &proc_do_uts_string,
296 .strategy = &sysctl_string,
297 },
298 {
299 .ctl_name = KERN_VERSION,
300 .procname = "version",
301 .data = NULL,
302 .maxlen = FIELD_SIZEOF(struct new_utsname, version),
303 .mode = 0444,
304 .proc_handler = &proc_do_uts_string,
305 .strategy = &sysctl_string,
306 },
307 {
308 .ctl_name = KERN_NODENAME,
309 .procname = "hostname",
310 .data = NULL,
311 .maxlen = FIELD_SIZEOF(struct new_utsname, nodename),
312 .mode = 0644,
313 .proc_handler = &proc_do_uts_string,
314 .strategy = &sysctl_string,
315 },
316 {
317 .ctl_name = KERN_DOMAINNAME,
318 .procname = "domainname",
319 .data = NULL,
320 .maxlen = FIELD_SIZEOF(struct new_utsname, domainname),
321 .mode = 0644,
322 .proc_handler = &proc_do_uts_string,
323 .strategy = &sysctl_string,
324 },
325#endif /* !CONFIG_UTS_NS */
1da177e4
LT
326 {
327 .ctl_name = KERN_PANIC,
328 .procname = "panic",
329 .data = &panic_timeout,
330 .maxlen = sizeof(int),
331 .mode = 0644,
332 .proc_handler = &proc_dointvec,
333 },
334 {
335 .ctl_name = KERN_CORE_USES_PID,
336 .procname = "core_uses_pid",
337 .data = &core_uses_pid,
338 .maxlen = sizeof(int),
339 .mode = 0644,
340 .proc_handler = &proc_dointvec,
341 },
342 {
343 .ctl_name = KERN_CORE_PATTERN,
344 .procname = "core_pattern",
345 .data = core_pattern,
d025c9db 346 .maxlen = 128,
1da177e4
LT
347 .mode = 0644,
348 .proc_handler = &proc_dostring,
349 .strategy = &sysctl_string,
350 },
351 {
352 .ctl_name = KERN_TAINTED,
353 .procname = "tainted",
354 .data = &tainted,
355 .maxlen = sizeof(int),
356 .mode = 0444,
357 .proc_handler = &proc_dointvec,
358 },
359 {
360 .ctl_name = KERN_CAP_BSET,
361 .procname = "cap-bound",
362 .data = &cap_bset,
363 .maxlen = sizeof(kernel_cap_t),
364 .mode = 0600,
365 .proc_handler = &proc_dointvec_bset,
366 },
367#ifdef CONFIG_BLK_DEV_INITRD
368 {
369 .ctl_name = KERN_REALROOTDEV,
370 .procname = "real-root-dev",
371 .data = &real_root_dev,
372 .maxlen = sizeof(int),
373 .mode = 0644,
374 .proc_handler = &proc_dointvec,
375 },
376#endif
377#ifdef __sparc__
378 {
379 .ctl_name = KERN_SPARC_REBOOT,
380 .procname = "reboot-cmd",
381 .data = reboot_command,
382 .maxlen = 256,
383 .mode = 0644,
384 .proc_handler = &proc_dostring,
385 .strategy = &sysctl_string,
386 },
387 {
388 .ctl_name = KERN_SPARC_STOP_A,
389 .procname = "stop-a",
390 .data = &stop_a_enabled,
391 .maxlen = sizeof (int),
392 .mode = 0644,
393 .proc_handler = &proc_dointvec,
394 },
395 {
396 .ctl_name = KERN_SPARC_SCONS_PWROFF,
397 .procname = "scons-poweroff",
398 .data = &scons_pwroff,
399 .maxlen = sizeof (int),
400 .mode = 0644,
401 .proc_handler = &proc_dointvec,
402 },
403#endif
404#ifdef __hppa__
405 {
406 .ctl_name = KERN_HPPA_PWRSW,
407 .procname = "soft-power",
408 .data = &pwrsw_enabled,
409 .maxlen = sizeof (int),
410 .mode = 0644,
411 .proc_handler = &proc_dointvec,
412 },
413 {
414 .ctl_name = KERN_HPPA_UNALIGNED,
415 .procname = "unaligned-trap",
416 .data = &unaligned_enabled,
417 .maxlen = sizeof (int),
418 .mode = 0644,
419 .proc_handler = &proc_dointvec,
420 },
421#endif
422 {
423 .ctl_name = KERN_CTLALTDEL,
424 .procname = "ctrl-alt-del",
425 .data = &C_A_D,
426 .maxlen = sizeof(int),
427 .mode = 0644,
428 .proc_handler = &proc_dointvec,
429 },
430 {
431 .ctl_name = KERN_PRINTK,
432 .procname = "printk",
433 .data = &console_loglevel,
434 .maxlen = 4*sizeof(int),
435 .mode = 0644,
436 .proc_handler = &proc_dointvec,
437 },
438#ifdef CONFIG_KMOD
439 {
440 .ctl_name = KERN_MODPROBE,
441 .procname = "modprobe",
442 .data = &modprobe_path,
443 .maxlen = KMOD_PATH_LEN,
444 .mode = 0644,
445 .proc_handler = &proc_dostring,
446 .strategy = &sysctl_string,
447 },
448#endif
57ae2508 449#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
1da177e4
LT
450 {
451 .ctl_name = KERN_HOTPLUG,
452 .procname = "hotplug",
312c004d
KS
453 .data = &uevent_helper,
454 .maxlen = UEVENT_HELPER_PATH_LEN,
1da177e4
LT
455 .mode = 0644,
456 .proc_handler = &proc_dostring,
457 .strategy = &sysctl_string,
458 },
459#endif
460#ifdef CONFIG_CHR_DEV_SG
461 {
462 .ctl_name = KERN_SG_BIG_BUFF,
463 .procname = "sg-big-buff",
464 .data = &sg_big_buff,
465 .maxlen = sizeof (int),
466 .mode = 0444,
467 .proc_handler = &proc_dointvec,
468 },
469#endif
470#ifdef CONFIG_BSD_PROCESS_ACCT
471 {
472 .ctl_name = KERN_ACCT,
473 .procname = "acct",
474 .data = &acct_parm,
475 .maxlen = 3*sizeof(int),
476 .mode = 0644,
477 .proc_handler = &proc_dointvec,
478 },
479#endif
480#ifdef CONFIG_SYSVIPC
481 {
482 .ctl_name = KERN_SHMMAX,
483 .procname = "shmmax",
484 .data = &shm_ctlmax,
485 .maxlen = sizeof (size_t),
486 .mode = 0644,
487 .proc_handler = &proc_doulongvec_minmax,
488 },
489 {
490 .ctl_name = KERN_SHMALL,
491 .procname = "shmall",
492 .data = &shm_ctlall,
493 .maxlen = sizeof (size_t),
494 .mode = 0644,
495 .proc_handler = &proc_doulongvec_minmax,
496 },
497 {
498 .ctl_name = KERN_SHMMNI,
499 .procname = "shmmni",
500 .data = &shm_ctlmni,
501 .maxlen = sizeof (int),
502 .mode = 0644,
503 .proc_handler = &proc_dointvec,
504 },
505 {
506 .ctl_name = KERN_MSGMAX,
507 .procname = "msgmax",
508 .data = &msg_ctlmax,
509 .maxlen = sizeof (int),
510 .mode = 0644,
511 .proc_handler = &proc_dointvec,
512 },
513 {
514 .ctl_name = KERN_MSGMNI,
515 .procname = "msgmni",
516 .data = &msg_ctlmni,
517 .maxlen = sizeof (int),
518 .mode = 0644,
519 .proc_handler = &proc_dointvec,
520 },
521 {
522 .ctl_name = KERN_MSGMNB,
523 .procname = "msgmnb",
524 .data = &msg_ctlmnb,
525 .maxlen = sizeof (int),
526 .mode = 0644,
527 .proc_handler = &proc_dointvec,
528 },
529 {
530 .ctl_name = KERN_SEM,
531 .procname = "sem",
532 .data = &sem_ctls,
533 .maxlen = 4*sizeof (int),
534 .mode = 0644,
535 .proc_handler = &proc_dointvec,
536 },
537#endif
538#ifdef CONFIG_MAGIC_SYSRQ
539 {
540 .ctl_name = KERN_SYSRQ,
541 .procname = "sysrq",
542 .data = &sysrq_enabled,
543 .maxlen = sizeof (int),
544 .mode = 0644,
545 .proc_handler = &proc_dointvec,
546 },
547#endif
548 {
549 .ctl_name = KERN_CADPID,
550 .procname = "cad_pid",
551 .data = &cad_pid,
552 .maxlen = sizeof (int),
553 .mode = 0600,
554 .proc_handler = &proc_dointvec,
555 },
556 {
557 .ctl_name = KERN_MAX_THREADS,
558 .procname = "threads-max",
559 .data = &max_threads,
560 .maxlen = sizeof(int),
561 .mode = 0644,
562 .proc_handler = &proc_dointvec,
563 },
564 {
565 .ctl_name = KERN_RANDOM,
566 .procname = "random",
567 .mode = 0555,
568 .child = random_table,
569 },
570#ifdef CONFIG_UNIX98_PTYS
571 {
572 .ctl_name = KERN_PTY,
573 .procname = "pty",
574 .mode = 0555,
575 .child = pty_table,
576 },
577#endif
578 {
579 .ctl_name = KERN_OVERFLOWUID,
580 .procname = "overflowuid",
581 .data = &overflowuid,
582 .maxlen = sizeof(int),
583 .mode = 0644,
584 .proc_handler = &proc_dointvec_minmax,
585 .strategy = &sysctl_intvec,
586 .extra1 = &minolduid,
587 .extra2 = &maxolduid,
588 },
589 {
590 .ctl_name = KERN_OVERFLOWGID,
591 .procname = "overflowgid",
592 .data = &overflowgid,
593 .maxlen = sizeof(int),
594 .mode = 0644,
595 .proc_handler = &proc_dointvec_minmax,
596 .strategy = &sysctl_intvec,
597 .extra1 = &minolduid,
598 .extra2 = &maxolduid,
599 },
347a8dc3 600#ifdef CONFIG_S390
1da177e4
LT
601#ifdef CONFIG_MATHEMU
602 {
603 .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
604 .procname = "ieee_emulation_warnings",
605 .data = &sysctl_ieee_emulation_warnings,
606 .maxlen = sizeof(int),
607 .mode = 0644,
608 .proc_handler = &proc_dointvec,
609 },
610#endif
611#ifdef CONFIG_NO_IDLE_HZ
612 {
613 .ctl_name = KERN_HZ_TIMER,
614 .procname = "hz_timer",
615 .data = &sysctl_hz_timer,
616 .maxlen = sizeof(int),
617 .mode = 0644,
618 .proc_handler = &proc_dointvec,
619 },
620#endif
621 {
622 .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
623 .procname = "userprocess_debug",
624 .data = &sysctl_userprocess_debug,
625 .maxlen = sizeof(int),
626 .mode = 0644,
627 .proc_handler = &proc_dointvec,
628 },
629#endif
630 {
631 .ctl_name = KERN_PIDMAX,
632 .procname = "pid_max",
633 .data = &pid_max,
634 .maxlen = sizeof (int),
635 .mode = 0644,
636 .proc_handler = &proc_dointvec_minmax,
637 .strategy = sysctl_intvec,
638 .extra1 = &pid_max_min,
639 .extra2 = &pid_max_max,
640 },
641 {
642 .ctl_name = KERN_PANIC_ON_OOPS,
643 .procname = "panic_on_oops",
644 .data = &panic_on_oops,
645 .maxlen = sizeof(int),
646 .mode = 0644,
647 .proc_handler = &proc_dointvec,
648 },
649 {
650 .ctl_name = KERN_PRINTK_RATELIMIT,
651 .procname = "printk_ratelimit",
652 .data = &printk_ratelimit_jiffies,
653 .maxlen = sizeof(int),
654 .mode = 0644,
655 .proc_handler = &proc_dointvec_jiffies,
656 .strategy = &sysctl_jiffies,
657 },
658 {
659 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
660 .procname = "printk_ratelimit_burst",
661 .data = &printk_ratelimit_burst,
662 .maxlen = sizeof(int),
663 .mode = 0644,
664 .proc_handler = &proc_dointvec,
665 },
666 {
667 .ctl_name = KERN_NGROUPS_MAX,
668 .procname = "ngroups_max",
669 .data = &ngroups_max,
670 .maxlen = sizeof (int),
671 .mode = 0444,
672 .proc_handler = &proc_dointvec,
673 },
674#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
675 {
676 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
677 .procname = "unknown_nmi_panic",
678 .data = &unknown_nmi_panic,
679 .maxlen = sizeof (int),
680 .mode = 0644,
2fbe7b25 681 .proc_handler = &proc_dointvec,
1da177e4 682 },
407984f1
DZ
683 {
684 .ctl_name = KERN_NMI_WATCHDOG,
685 .procname = "nmi_watchdog",
686 .data = &nmi_watchdog_enabled,
687 .maxlen = sizeof (int),
688 .mode = 0644,
689 .proc_handler = &proc_nmi_enabled,
1da177e4
LT
690 },
691#endif
692#if defined(CONFIG_X86)
8da5adda
DZ
693 {
694 .ctl_name = KERN_PANIC_ON_NMI,
695 .procname = "panic_on_unrecovered_nmi",
696 .data = &panic_on_unrecovered_nmi,
697 .maxlen = sizeof(int),
698 .mode = 0644,
699 .proc_handler = &proc_dointvec,
700 },
1da177e4
LT
701 {
702 .ctl_name = KERN_BOOTLOADER_TYPE,
703 .procname = "bootloader_type",
704 .data = &bootloader_type,
705 .maxlen = sizeof (int),
706 .mode = 0444,
707 .proc_handler = &proc_dointvec,
708 },
709#endif
7a9166e3 710#if defined(CONFIG_MMU)
1da177e4
LT
711 {
712 .ctl_name = KERN_RANDOMIZE,
713 .procname = "randomize_va_space",
714 .data = &randomize_va_space,
715 .maxlen = sizeof(int),
716 .mode = 0644,
717 .proc_handler = &proc_dointvec,
718 },
7a9166e3 719#endif
0152fb37 720#if defined(CONFIG_S390) && defined(CONFIG_SMP)
951f22d5
MS
721 {
722 .ctl_name = KERN_SPIN_RETRY,
723 .procname = "spin_retry",
724 .data = &spin_retry,
725 .maxlen = sizeof (int),
726 .mode = 0644,
727 .proc_handler = &proc_dointvec,
728 },
c255d844
PM
729#endif
730#ifdef CONFIG_ACPI_SLEEP
731 {
732 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
733 .procname = "acpi_video_flags",
734 .data = &acpi_video_flags,
735 .maxlen = sizeof (unsigned long),
736 .mode = 0644,
7f99f06f 737 .proc_handler = &proc_doulongvec_minmax,
c255d844 738 },
d2b176ed
JS
739#endif
740#ifdef CONFIG_IA64
741 {
742 .ctl_name = KERN_IA64_UNALIGNED,
743 .procname = "ignore-unaligned-usertrap",
744 .data = &no_unaligned_warning,
745 .maxlen = sizeof (int),
746 .mode = 0644,
747 .proc_handler = &proc_dointvec,
748 },
bebfa101
AK
749#endif
750#ifdef CONFIG_COMPAT
751 {
752 .ctl_name = KERN_COMPAT_LOG,
753 .procname = "compat-log",
754 .data = &compat_log,
755 .maxlen = sizeof (int),
756 .mode = 0644,
757 .proc_handler = &proc_dointvec,
758 },
951f22d5 759#endif
23f78d4a
IM
760#ifdef CONFIG_RT_MUTEXES
761 {
762 .ctl_name = KERN_MAX_LOCK_DEPTH,
763 .procname = "max_lock_depth",
764 .data = &max_lock_depth,
765 .maxlen = sizeof(int),
766 .mode = 0644,
767 .proc_handler = &proc_dointvec,
768 },
769#endif
770
1da177e4
LT
771 { .ctl_name = 0 }
772};
773
774/* Constants for minimum and maximum testing in vm_table.
775 We use these as one-element integer vectors. */
776static int zero;
777static int one_hundred = 100;
778
779
780static ctl_table vm_table[] = {
781 {
782 .ctl_name = VM_OVERCOMMIT_MEMORY,
783 .procname = "overcommit_memory",
784 .data = &sysctl_overcommit_memory,
785 .maxlen = sizeof(sysctl_overcommit_memory),
786 .mode = 0644,
787 .proc_handler = &proc_dointvec,
788 },
fadd8fbd
KH
789 {
790 .ctl_name = VM_PANIC_ON_OOM,
791 .procname = "panic_on_oom",
792 .data = &sysctl_panic_on_oom,
793 .maxlen = sizeof(sysctl_panic_on_oom),
794 .mode = 0644,
795 .proc_handler = &proc_dointvec,
796 },
1da177e4
LT
797 {
798 .ctl_name = VM_OVERCOMMIT_RATIO,
799 .procname = "overcommit_ratio",
800 .data = &sysctl_overcommit_ratio,
801 .maxlen = sizeof(sysctl_overcommit_ratio),
802 .mode = 0644,
803 .proc_handler = &proc_dointvec,
804 },
805 {
806 .ctl_name = VM_PAGE_CLUSTER,
807 .procname = "page-cluster",
808 .data = &page_cluster,
809 .maxlen = sizeof(int),
810 .mode = 0644,
811 .proc_handler = &proc_dointvec,
812 },
813 {
814 .ctl_name = VM_DIRTY_BACKGROUND,
815 .procname = "dirty_background_ratio",
816 .data = &dirty_background_ratio,
817 .maxlen = sizeof(dirty_background_ratio),
818 .mode = 0644,
819 .proc_handler = &proc_dointvec_minmax,
820 .strategy = &sysctl_intvec,
821 .extra1 = &zero,
822 .extra2 = &one_hundred,
823 },
824 {
825 .ctl_name = VM_DIRTY_RATIO,
826 .procname = "dirty_ratio",
827 .data = &vm_dirty_ratio,
828 .maxlen = sizeof(vm_dirty_ratio),
829 .mode = 0644,
830 .proc_handler = &proc_dointvec_minmax,
831 .strategy = &sysctl_intvec,
832 .extra1 = &zero,
833 .extra2 = &one_hundred,
834 },
835 {
836 .ctl_name = VM_DIRTY_WB_CS,
837 .procname = "dirty_writeback_centisecs",
f6ef9438
BS
838 .data = &dirty_writeback_interval,
839 .maxlen = sizeof(dirty_writeback_interval),
1da177e4
LT
840 .mode = 0644,
841 .proc_handler = &dirty_writeback_centisecs_handler,
842 },
843 {
844 .ctl_name = VM_DIRTY_EXPIRE_CS,
845 .procname = "dirty_expire_centisecs",
f6ef9438
BS
846 .data = &dirty_expire_interval,
847 .maxlen = sizeof(dirty_expire_interval),
1da177e4 848 .mode = 0644,
f6ef9438 849 .proc_handler = &proc_dointvec_userhz_jiffies,
1da177e4
LT
850 },
851 {
852 .ctl_name = VM_NR_PDFLUSH_THREADS,
853 .procname = "nr_pdflush_threads",
854 .data = &nr_pdflush_threads,
855 .maxlen = sizeof nr_pdflush_threads,
856 .mode = 0444 /* read-only*/,
857 .proc_handler = &proc_dointvec,
858 },
859 {
860 .ctl_name = VM_SWAPPINESS,
861 .procname = "swappiness",
862 .data = &vm_swappiness,
863 .maxlen = sizeof(vm_swappiness),
864 .mode = 0644,
865 .proc_handler = &proc_dointvec_minmax,
866 .strategy = &sysctl_intvec,
867 .extra1 = &zero,
868 .extra2 = &one_hundred,
869 },
870#ifdef CONFIG_HUGETLB_PAGE
871 {
872 .ctl_name = VM_HUGETLB_PAGES,
873 .procname = "nr_hugepages",
874 .data = &max_huge_pages,
875 .maxlen = sizeof(unsigned long),
876 .mode = 0644,
877 .proc_handler = &hugetlb_sysctl_handler,
878 .extra1 = (void *)&hugetlb_zero,
879 .extra2 = (void *)&hugetlb_infinity,
880 },
881 {
882 .ctl_name = VM_HUGETLB_GROUP,
883 .procname = "hugetlb_shm_group",
884 .data = &sysctl_hugetlb_shm_group,
885 .maxlen = sizeof(gid_t),
886 .mode = 0644,
887 .proc_handler = &proc_dointvec,
888 },
889#endif
890 {
891 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
892 .procname = "lowmem_reserve_ratio",
893 .data = &sysctl_lowmem_reserve_ratio,
894 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
895 .mode = 0644,
896 .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
897 .strategy = &sysctl_intvec,
898 },
9d0243bc
AM
899 {
900 .ctl_name = VM_DROP_PAGECACHE,
901 .procname = "drop_caches",
902 .data = &sysctl_drop_caches,
903 .maxlen = sizeof(int),
904 .mode = 0644,
905 .proc_handler = drop_caches_sysctl_handler,
906 .strategy = &sysctl_intvec,
907 },
1da177e4
LT
908 {
909 .ctl_name = VM_MIN_FREE_KBYTES,
910 .procname = "min_free_kbytes",
911 .data = &min_free_kbytes,
912 .maxlen = sizeof(min_free_kbytes),
913 .mode = 0644,
914 .proc_handler = &min_free_kbytes_sysctl_handler,
915 .strategy = &sysctl_intvec,
916 .extra1 = &zero,
917 },
8ad4b1fb
RS
918 {
919 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
920 .procname = "percpu_pagelist_fraction",
921 .data = &percpu_pagelist_fraction,
922 .maxlen = sizeof(percpu_pagelist_fraction),
923 .mode = 0644,
924 .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
925 .strategy = &sysctl_intvec,
926 .extra1 = &min_percpu_pagelist_fract,
927 },
1da177e4
LT
928#ifdef CONFIG_MMU
929 {
930 .ctl_name = VM_MAX_MAP_COUNT,
931 .procname = "max_map_count",
932 .data = &sysctl_max_map_count,
933 .maxlen = sizeof(sysctl_max_map_count),
934 .mode = 0644,
935 .proc_handler = &proc_dointvec
936 },
937#endif
938 {
939 .ctl_name = VM_LAPTOP_MODE,
940 .procname = "laptop_mode",
941 .data = &laptop_mode,
942 .maxlen = sizeof(laptop_mode),
943 .mode = 0644,
ed5b43f1
BS
944 .proc_handler = &proc_dointvec_jiffies,
945 .strategy = &sysctl_jiffies,
1da177e4
LT
946 },
947 {
948 .ctl_name = VM_BLOCK_DUMP,
949 .procname = "block_dump",
950 .data = &block_dump,
951 .maxlen = sizeof(block_dump),
952 .mode = 0644,
953 .proc_handler = &proc_dointvec,
954 .strategy = &sysctl_intvec,
955 .extra1 = &zero,
956 },
957 {
958 .ctl_name = VM_VFS_CACHE_PRESSURE,
959 .procname = "vfs_cache_pressure",
960 .data = &sysctl_vfs_cache_pressure,
961 .maxlen = sizeof(sysctl_vfs_cache_pressure),
962 .mode = 0644,
963 .proc_handler = &proc_dointvec,
964 .strategy = &sysctl_intvec,
965 .extra1 = &zero,
966 },
967#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
968 {
969 .ctl_name = VM_LEGACY_VA_LAYOUT,
970 .procname = "legacy_va_layout",
971 .data = &sysctl_legacy_va_layout,
972 .maxlen = sizeof(sysctl_legacy_va_layout),
973 .mode = 0644,
974 .proc_handler = &proc_dointvec,
975 .strategy = &sysctl_intvec,
976 .extra1 = &zero,
977 },
978#endif
979#ifdef CONFIG_SWAP
980 {
981 .ctl_name = VM_SWAP_TOKEN_TIMEOUT,
982 .procname = "swap_token_timeout",
983 .data = &swap_token_default_timeout,
984 .maxlen = sizeof(swap_token_default_timeout),
985 .mode = 0644,
986 .proc_handler = &proc_dointvec_jiffies,
987 .strategy = &sysctl_jiffies,
988 },
1743660b
CL
989#endif
990#ifdef CONFIG_NUMA
991 {
992 .ctl_name = VM_ZONE_RECLAIM_MODE,
993 .procname = "zone_reclaim_mode",
994 .data = &zone_reclaim_mode,
995 .maxlen = sizeof(zone_reclaim_mode),
996 .mode = 0644,
997 .proc_handler = &proc_dointvec,
c84db23c
CL
998 .strategy = &sysctl_intvec,
999 .extra1 = &zero,
1743660b 1000 },
9614634f
CL
1001 {
1002 .ctl_name = VM_MIN_UNMAPPED,
1003 .procname = "min_unmapped_ratio",
1004 .data = &sysctl_min_unmapped_ratio,
1005 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1006 .mode = 0644,
1007 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
1008 .strategy = &sysctl_intvec,
1009 .extra1 = &zero,
1010 .extra2 = &one_hundred,
1011 },
0ff38490
CL
1012 {
1013 .ctl_name = VM_MIN_SLAB,
1014 .procname = "min_slab_ratio",
1015 .data = &sysctl_min_slab_ratio,
1016 .maxlen = sizeof(sysctl_min_slab_ratio),
1017 .mode = 0644,
1018 .proc_handler = &sysctl_min_slab_ratio_sysctl_handler,
1019 .strategy = &sysctl_intvec,
1020 .extra1 = &zero,
1021 .extra2 = &one_hundred,
1022 },
e6e5494c
IM
1023#endif
1024#ifdef CONFIG_X86_32
1025 {
1026 .ctl_name = VM_VDSO_ENABLED,
1027 .procname = "vdso_enabled",
1028 .data = &vdso_enabled,
1029 .maxlen = sizeof(vdso_enabled),
1030 .mode = 0644,
1031 .proc_handler = &proc_dointvec,
1032 .strategy = &sysctl_intvec,
1033 .extra1 = &zero,
1034 },
1da177e4
LT
1035#endif
1036 { .ctl_name = 0 }
1037};
1038
1da177e4
LT
1039static ctl_table fs_table[] = {
1040 {
1041 .ctl_name = FS_NRINODE,
1042 .procname = "inode-nr",
1043 .data = &inodes_stat,
1044 .maxlen = 2*sizeof(int),
1045 .mode = 0444,
1046 .proc_handler = &proc_dointvec,
1047 },
1048 {
1049 .ctl_name = FS_STATINODE,
1050 .procname = "inode-state",
1051 .data = &inodes_stat,
1052 .maxlen = 7*sizeof(int),
1053 .mode = 0444,
1054 .proc_handler = &proc_dointvec,
1055 },
1056 {
1057 .ctl_name = FS_NRFILE,
1058 .procname = "file-nr",
1059 .data = &files_stat,
1060 .maxlen = 3*sizeof(int),
1061 .mode = 0444,
529bf6be 1062 .proc_handler = &proc_nr_files,
1da177e4
LT
1063 },
1064 {
1065 .ctl_name = FS_MAXFILE,
1066 .procname = "file-max",
1067 .data = &files_stat.max_files,
1068 .maxlen = sizeof(int),
1069 .mode = 0644,
1070 .proc_handler = &proc_dointvec,
1071 },
1072 {
1073 .ctl_name = FS_DENTRY,
1074 .procname = "dentry-state",
1075 .data = &dentry_stat,
1076 .maxlen = 6*sizeof(int),
1077 .mode = 0444,
1078 .proc_handler = &proc_dointvec,
1079 },
1080 {
1081 .ctl_name = FS_OVERFLOWUID,
1082 .procname = "overflowuid",
1083 .data = &fs_overflowuid,
1084 .maxlen = sizeof(int),
1085 .mode = 0644,
1086 .proc_handler = &proc_dointvec_minmax,
1087 .strategy = &sysctl_intvec,
1088 .extra1 = &minolduid,
1089 .extra2 = &maxolduid,
1090 },
1091 {
1092 .ctl_name = FS_OVERFLOWGID,
1093 .procname = "overflowgid",
1094 .data = &fs_overflowgid,
1095 .maxlen = sizeof(int),
1096 .mode = 0644,
1097 .proc_handler = &proc_dointvec_minmax,
1098 .strategy = &sysctl_intvec,
1099 .extra1 = &minolduid,
1100 .extra2 = &maxolduid,
1101 },
1102 {
1103 .ctl_name = FS_LEASES,
1104 .procname = "leases-enable",
1105 .data = &leases_enable,
1106 .maxlen = sizeof(int),
1107 .mode = 0644,
1108 .proc_handler = &proc_dointvec,
1109 },
1110#ifdef CONFIG_DNOTIFY
1111 {
1112 .ctl_name = FS_DIR_NOTIFY,
1113 .procname = "dir-notify-enable",
1114 .data = &dir_notify_enable,
1115 .maxlen = sizeof(int),
1116 .mode = 0644,
1117 .proc_handler = &proc_dointvec,
1118 },
1119#endif
1120#ifdef CONFIG_MMU
1121 {
1122 .ctl_name = FS_LEASE_TIME,
1123 .procname = "lease-break-time",
1124 .data = &lease_break_time,
1125 .maxlen = sizeof(int),
1126 .mode = 0644,
1127 .proc_handler = &proc_dointvec,
1128 },
1129 {
1130 .ctl_name = FS_AIO_NR,
1131 .procname = "aio-nr",
1132 .data = &aio_nr,
1133 .maxlen = sizeof(aio_nr),
1134 .mode = 0444,
d55b5fda 1135 .proc_handler = &proc_doulongvec_minmax,
1da177e4
LT
1136 },
1137 {
1138 .ctl_name = FS_AIO_MAX_NR,
1139 .procname = "aio-max-nr",
1140 .data = &aio_max_nr,
1141 .maxlen = sizeof(aio_max_nr),
1142 .mode = 0644,
d55b5fda 1143 .proc_handler = &proc_doulongvec_minmax,
1da177e4 1144 },
2d9048e2 1145#ifdef CONFIG_INOTIFY_USER
0399cb08
RL
1146 {
1147 .ctl_name = FS_INOTIFY,
1148 .procname = "inotify",
1149 .mode = 0555,
1150 .child = inotify_table,
1151 },
1152#endif
1da177e4 1153#endif
d6e71144
AC
1154 {
1155 .ctl_name = KERN_SETUID_DUMPABLE,
1156 .procname = "suid_dumpable",
1157 .data = &suid_dumpable,
1158 .maxlen = sizeof(int),
1159 .mode = 0644,
1160 .proc_handler = &proc_dointvec,
1161 },
1da177e4
LT
1162 { .ctl_name = 0 }
1163};
1164
1165static ctl_table debug_table[] = {
1166 { .ctl_name = 0 }
1167};
1168
1169static ctl_table dev_table[] = {
1170 { .ctl_name = 0 }
0eeca283 1171};
1da177e4
LT
1172
1173extern void init_irq_proc (void);
1174
330d57fb
AV
1175static DEFINE_SPINLOCK(sysctl_lock);
1176
1177/* called under sysctl_lock */
1178static int use_table(struct ctl_table_header *p)
1179{
1180 if (unlikely(p->unregistering))
1181 return 0;
1182 p->used++;
1183 return 1;
1184}
1185
1186/* called under sysctl_lock */
1187static void unuse_table(struct ctl_table_header *p)
1188{
1189 if (!--p->used)
1190 if (unlikely(p->unregistering))
1191 complete(p->unregistering);
1192}
1193
1194/* called under sysctl_lock, will reacquire if has to wait */
1195static void start_unregistering(struct ctl_table_header *p)
1196{
1197 /*
1198 * if p->used is 0, nobody will ever touch that entry again;
1199 * we'll eliminate all paths to it before dropping sysctl_lock
1200 */
1201 if (unlikely(p->used)) {
1202 struct completion wait;
1203 init_completion(&wait);
1204 p->unregistering = &wait;
1205 spin_unlock(&sysctl_lock);
1206 wait_for_completion(&wait);
1207 spin_lock(&sysctl_lock);
1208 }
1209 /*
1210 * do not remove from the list until nobody holds it; walking the
1211 * list in do_sysctl() relies on that.
1212 */
1213 list_del_init(&p->ctl_entry);
1214}
1215
1da177e4
LT
1216void __init sysctl_init(void)
1217{
b89a8171 1218#ifdef CONFIG_PROC_SYSCTL
330d57fb 1219 register_proc_table(root_table, proc_sys_root, &root_table_header);
1da177e4
LT
1220 init_irq_proc();
1221#endif
1222}
1223
b89a8171 1224#ifdef CONFIG_SYSCTL_SYSCALL
1da177e4
LT
1225int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1226 void __user *newval, size_t newlen)
1227{
1228 struct list_head *tmp;
330d57fb 1229 int error = -ENOTDIR;
1da177e4
LT
1230
1231 if (nlen <= 0 || nlen >= CTL_MAXNAME)
1232 return -ENOTDIR;
1233 if (oldval) {
1234 int old_len;
1235 if (!oldlenp || get_user(old_len, oldlenp))
1236 return -EFAULT;
1237 }
330d57fb 1238 spin_lock(&sysctl_lock);
1da177e4
LT
1239 tmp = &root_table_header.ctl_entry;
1240 do {
1241 struct ctl_table_header *head =
1242 list_entry(tmp, struct ctl_table_header, ctl_entry);
1243 void *context = NULL;
330d57fb
AV
1244
1245 if (!use_table(head))
1246 continue;
1247
1248 spin_unlock(&sysctl_lock);
1249
1250 error = parse_table(name, nlen, oldval, oldlenp,
1da177e4
LT
1251 newval, newlen, head->ctl_table,
1252 &context);
5a6b454f 1253 kfree(context);
330d57fb
AV
1254
1255 spin_lock(&sysctl_lock);
1256 unuse_table(head);
1da177e4 1257 if (error != -ENOTDIR)
330d57fb
AV
1258 break;
1259 } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1260 spin_unlock(&sysctl_lock);
1261 return error;
1da177e4
LT
1262}
1263
1264asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1265{
1266 struct __sysctl_args tmp;
1267 int error;
1268
1269 if (copy_from_user(&tmp, args, sizeof(tmp)))
1270 return -EFAULT;
1271
1272 lock_kernel();
1273 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1274 tmp.newval, tmp.newlen);
1275 unlock_kernel();
1276 return error;
1277}
b89a8171 1278#endif /* CONFIG_SYSCTL_SYSCALL */
1da177e4
LT
1279
1280/*
1281 * ctl_perm does NOT grant the superuser all rights automatically, because
1282 * some sysctl variables are readonly even to root.
1283 */
1284
1285static int test_perm(int mode, int op)
1286{
1287 if (!current->euid)
1288 mode >>= 6;
1289 else if (in_egroup_p(0))
1290 mode >>= 3;
1291 if ((mode & op & 0007) == op)
1292 return 0;
1293 return -EACCES;
1294}
1295
1296static inline int ctl_perm(ctl_table *table, int op)
1297{
1298 int error;
1299 error = security_sysctl(table, op);
1300 if (error)
1301 return error;
1302 return test_perm(table->mode, op);
1303}
1304
b89a8171 1305#ifdef CONFIG_SYSCTL_SYSCALL
1da177e4
LT
1306static int parse_table(int __user *name, int nlen,
1307 void __user *oldval, size_t __user *oldlenp,
1308 void __user *newval, size_t newlen,
1309 ctl_table *table, void **context)
1310{
1311 int n;
1312repeat:
1313 if (!nlen)
1314 return -ENOTDIR;
1315 if (get_user(n, name))
1316 return -EFAULT;
1317 for ( ; table->ctl_name; table++) {
1318 if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1319 int error;
1320 if (table->child) {
1321 if (ctl_perm(table, 001))
1322 return -EPERM;
1323 if (table->strategy) {
1324 error = table->strategy(
1325 table, name, nlen,
1326 oldval, oldlenp,
1327 newval, newlen, context);
1328 if (error)
1329 return error;
1330 }
1331 name++;
1332 nlen--;
1333 table = table->child;
1334 goto repeat;
1335 }
1336 error = do_sysctl_strategy(table, name, nlen,
1337 oldval, oldlenp,
1338 newval, newlen, context);
1339 return error;
1340 }
1341 }
1342 return -ENOTDIR;
1343}
1344
1345/* Perform the actual read/write of a sysctl table entry. */
1346int do_sysctl_strategy (ctl_table *table,
1347 int __user *name, int nlen,
1348 void __user *oldval, size_t __user *oldlenp,
1349 void __user *newval, size_t newlen, void **context)
1350{
1351 int op = 0, rc;
1352 size_t len;
1353
1354 if (oldval)
1355 op |= 004;
1356 if (newval)
1357 op |= 002;
1358 if (ctl_perm(table, op))
1359 return -EPERM;
1360
1361 if (table->strategy) {
1362 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1363 newval, newlen, context);
1364 if (rc < 0)
1365 return rc;
1366 if (rc > 0)
1367 return 0;
1368 }
1369
1370 /* If there is no strategy routine, or if the strategy returns
1371 * zero, proceed with automatic r/w */
1372 if (table->data && table->maxlen) {
1373 if (oldval && oldlenp) {
1374 if (get_user(len, oldlenp))
1375 return -EFAULT;
1376 if (len) {
1377 if (len > table->maxlen)
1378 len = table->maxlen;
1379 if(copy_to_user(oldval, table->data, len))
1380 return -EFAULT;
1381 if(put_user(len, oldlenp))
1382 return -EFAULT;
1383 }
1384 }
1385 if (newval && newlen) {
1386 len = newlen;
1387 if (len > table->maxlen)
1388 len = table->maxlen;
1389 if(copy_from_user(table->data, newval, len))
1390 return -EFAULT;
1391 }
1392 }
1393 return 0;
1394}
b89a8171 1395#endif /* CONFIG_SYSCTL_SYSCALL */
1da177e4
LT
1396
1397/**
1398 * register_sysctl_table - register a sysctl hierarchy
1399 * @table: the top-level table structure
1400 * @insert_at_head: whether the entry should be inserted in front or at the end
1401 *
1402 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1403 * array. An entry with a ctl_name of 0 terminates the table.
1404 *
1405 * The members of the &ctl_table structure are used as follows:
1406 *
1407 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1408 * must be unique within that level of sysctl
1409 *
1410 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1411 * enter a sysctl file
1412 *
1413 * data - a pointer to data for use by proc_handler
1414 *
1415 * maxlen - the maximum size in bytes of the data
1416 *
1417 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1418 *
1419 * child - a pointer to the child sysctl table if this entry is a directory, or
1420 * %NULL.
1421 *
1422 * proc_handler - the text handler routine (described below)
1423 *
1424 * strategy - the strategy routine (described below)
1425 *
1426 * de - for internal use by the sysctl routines
1427 *
1428 * extra1, extra2 - extra pointers usable by the proc handler routines
1429 *
1430 * Leaf nodes in the sysctl tree will be represented by a single file
1431 * under /proc; non-leaf nodes will be represented by directories.
1432 *
1433 * sysctl(2) can automatically manage read and write requests through
1434 * the sysctl table. The data and maxlen fields of the ctl_table
1435 * struct enable minimal validation of the values being written to be
1436 * performed, and the mode field allows minimal authentication.
1437 *
1438 * More sophisticated management can be enabled by the provision of a
1439 * strategy routine with the table entry. This will be called before
1440 * any automatic read or write of the data is performed.
1441 *
1442 * The strategy routine may return
1443 *
1444 * < 0 - Error occurred (error is passed to user process)
1445 *
1446 * 0 - OK - proceed with automatic read or write.
1447 *
1448 * > 0 - OK - read or write has been done by the strategy routine, so
1449 * return immediately.
1450 *
1451 * There must be a proc_handler routine for any terminal nodes
1452 * mirrored under /proc/sys (non-terminals are handled by a built-in
1453 * directory handler). Several default handlers are available to
1454 * cover common cases -
1455 *
1456 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1457 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1458 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1459 *
1460 * It is the handler's job to read the input buffer from user memory
1461 * and process it. The handler should return 0 on success.
1462 *
1463 * This routine returns %NULL on a failure to register, and a pointer
1464 * to the table header on success.
1465 */
1466struct ctl_table_header *register_sysctl_table(ctl_table * table,
1467 int insert_at_head)
1468{
1469 struct ctl_table_header *tmp;
1470 tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1471 if (!tmp)
1472 return NULL;
1473 tmp->ctl_table = table;
1474 INIT_LIST_HEAD(&tmp->ctl_entry);
330d57fb
AV
1475 tmp->used = 0;
1476 tmp->unregistering = NULL;
1477 spin_lock(&sysctl_lock);
1da177e4
LT
1478 if (insert_at_head)
1479 list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1480 else
1481 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
330d57fb 1482 spin_unlock(&sysctl_lock);
b89a8171 1483#ifdef CONFIG_PROC_SYSCTL
330d57fb 1484 register_proc_table(table, proc_sys_root, tmp);
1da177e4
LT
1485#endif
1486 return tmp;
1487}
1488
1489/**
1490 * unregister_sysctl_table - unregister a sysctl table hierarchy
1491 * @header: the header returned from register_sysctl_table
1492 *
1493 * Unregisters the sysctl table and all children. proc entries may not
1494 * actually be removed until they are no longer used by anyone.
1495 */
1496void unregister_sysctl_table(struct ctl_table_header * header)
1497{
330d57fb
AV
1498 might_sleep();
1499 spin_lock(&sysctl_lock);
1500 start_unregistering(header);
b89a8171 1501#ifdef CONFIG_PROC_SYSCTL
1da177e4
LT
1502 unregister_proc_table(header->ctl_table, proc_sys_root);
1503#endif
330d57fb 1504 spin_unlock(&sysctl_lock);
1da177e4
LT
1505 kfree(header);
1506}
1507
b89a8171
EB
1508#else /* !CONFIG_SYSCTL */
1509struct ctl_table_header * register_sysctl_table(ctl_table * table,
1510 int insert_at_head)
1511{
1512 return NULL;
1513}
1514
1515void unregister_sysctl_table(struct ctl_table_header * table)
1516{
1517}
1518
1519#endif /* CONFIG_SYSCTL */
1520
1da177e4
LT
1521/*
1522 * /proc/sys support
1523 */
1524
b89a8171 1525#ifdef CONFIG_PROC_SYSCTL
1da177e4
LT
1526
1527/* Scan the sysctl entries in table and add them all into /proc */
330d57fb 1528static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1da177e4
LT
1529{
1530 struct proc_dir_entry *de;
1531 int len;
1532 mode_t mode;
1533
1534 for (; table->ctl_name; table++) {
1535 /* Can't do anything without a proc name. */
1536 if (!table->procname)
1537 continue;
1538 /* Maybe we can't do anything with it... */
1539 if (!table->proc_handler && !table->child) {
1540 printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1541 table->procname);
1542 continue;
1543 }
1544
1545 len = strlen(table->procname);
1546 mode = table->mode;
1547
1548 de = NULL;
1549 if (table->proc_handler)
1550 mode |= S_IFREG;
1551 else {
1552 mode |= S_IFDIR;
1553 for (de = root->subdir; de; de = de->next) {
1554 if (proc_match(len, table->procname, de))
1555 break;
1556 }
1557 /* If the subdir exists already, de is non-NULL */
1558 }
1559
1560 if (!de) {
1561 de = create_proc_entry(table->procname, mode, root);
1562 if (!de)
1563 continue;
330d57fb 1564 de->set = set;
1da177e4
LT
1565 de->data = (void *) table;
1566 if (table->proc_handler)
1567 de->proc_fops = &proc_sys_file_operations;
1568 }
1569 table->de = de;
1570 if (de->mode & S_IFDIR)
330d57fb 1571 register_proc_table(table->child, de, set);
1da177e4
LT
1572 }
1573}
1574
1575/*
1576 * Unregister a /proc sysctl table and any subdirectories.
1577 */
1578static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1579{
1580 struct proc_dir_entry *de;
1581 for (; table->ctl_name; table++) {
1582 if (!(de = table->de))
1583 continue;
1584 if (de->mode & S_IFDIR) {
1585 if (!table->child) {
1586 printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1587 continue;
1588 }
1589 unregister_proc_table(table->child, de);
1590
1591 /* Don't unregister directories which still have entries.. */
1592 if (de->subdir)
1593 continue;
1594 }
1595
330d57fb
AV
1596 /*
1597 * In any case, mark the entry as goner; we'll keep it
1598 * around if it's busy, but we'll know to do nothing with
1599 * its fields. We are under sysctl_lock here.
1600 */
1601 de->data = NULL;
1602
1da177e4
LT
1603 /* Don't unregister proc entries that are still being used.. */
1604 if (atomic_read(&de->count))
1605 continue;
1606
1607 table->de = NULL;
1608 remove_proc_entry(table->procname, root);
1609 }
1610}
1611
1612static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1613 size_t count, loff_t *ppos)
1614{
1615 int op;
330d57fb 1616 struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
1da177e4
LT
1617 struct ctl_table *table;
1618 size_t res;
330d57fb 1619 ssize_t error = -ENOTDIR;
1da177e4 1620
330d57fb
AV
1621 spin_lock(&sysctl_lock);
1622 if (de && de->data && use_table(de->set)) {
1623 /*
1624 * at that point we know that sysctl was not unregistered
1625 * and won't be until we finish
1626 */
1627 spin_unlock(&sysctl_lock);
1628 table = (struct ctl_table *) de->data;
1629 if (!table || !table->proc_handler)
1630 goto out;
1631 error = -EPERM;
1632 op = (write ? 002 : 004);
1633 if (ctl_perm(table, op))
1634 goto out;
1635
1636 /* careful: calling conventions are nasty here */
1637 res = count;
1638 error = (*table->proc_handler)(table, write, file,
1639 buf, &res, ppos);
1640 if (!error)
1641 error = res;
1642 out:
1643 spin_lock(&sysctl_lock);
1644 unuse_table(de->set);
1645 }
1646 spin_unlock(&sysctl_lock);
1647 return error;
1da177e4
LT
1648}
1649
1650static int proc_opensys(struct inode *inode, struct file *file)
1651{
1652 if (file->f_mode & FMODE_WRITE) {
1653 /*
1654 * sysctl entries that are not writable,
1655 * are _NOT_ writable, capabilities or not.
1656 */
1657 if (!(inode->i_mode & S_IWUSR))
1658 return -EPERM;
1659 }
1660
1661 return 0;
1662}
1663
1664static ssize_t proc_readsys(struct file * file, char __user * buf,
1665 size_t count, loff_t *ppos)
1666{
1667 return do_rw_proc(0, file, buf, count, ppos);
1668}
1669
1670static ssize_t proc_writesys(struct file * file, const char __user * buf,
1671 size_t count, loff_t *ppos)
1672{
1673 return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1674}
1675
b1ba4ddd
AB
1676static int _proc_do_string(void* data, int maxlen, int write,
1677 struct file *filp, void __user *buffer,
1678 size_t *lenp, loff_t *ppos)
1da177e4
LT
1679{
1680 size_t len;
1681 char __user *p;
1682 char c;
1683
f5dd3d6f 1684 if (!data || !maxlen || !*lenp ||
1da177e4
LT
1685 (*ppos && !write)) {
1686 *lenp = 0;
1687 return 0;
1688 }
1689
1690 if (write) {
1691 len = 0;
1692 p = buffer;
1693 while (len < *lenp) {
1694 if (get_user(c, p++))
1695 return -EFAULT;
1696 if (c == 0 || c == '\n')
1697 break;
1698 len++;
1699 }
f5dd3d6f
SV
1700 if (len >= maxlen)
1701 len = maxlen-1;
1702 if(copy_from_user(data, buffer, len))
1da177e4 1703 return -EFAULT;
f5dd3d6f 1704 ((char *) data)[len] = 0;
1da177e4
LT
1705 *ppos += *lenp;
1706 } else {
f5dd3d6f
SV
1707 len = strlen(data);
1708 if (len > maxlen)
1709 len = maxlen;
1da177e4
LT
1710 if (len > *lenp)
1711 len = *lenp;
1712 if (len)
f5dd3d6f 1713 if(copy_to_user(buffer, data, len))
1da177e4
LT
1714 return -EFAULT;
1715 if (len < *lenp) {
1716 if(put_user('\n', ((char __user *) buffer) + len))
1717 return -EFAULT;
1718 len++;
1719 }
1720 *lenp = len;
1721 *ppos += len;
1722 }
1723 return 0;
1724}
1725
f5dd3d6f
SV
1726/**
1727 * proc_dostring - read a string sysctl
1728 * @table: the sysctl table
1729 * @write: %TRUE if this is a write to the sysctl file
1730 * @filp: the file structure
1731 * @buffer: the user buffer
1732 * @lenp: the size of the user buffer
1733 * @ppos: file position
1734 *
1735 * Reads/writes a string from/to the user buffer. If the kernel
1736 * buffer provided is not large enough to hold the string, the
1737 * string is truncated. The copied string is %NULL-terminated.
1738 * If the string is being read by the user process, it is copied
1739 * and a newline '\n' is added. It is truncated if the buffer is
1740 * not large enough.
1741 *
1742 * Returns 0 on success.
1743 */
1744int proc_dostring(ctl_table *table, int write, struct file *filp,
1745 void __user *buffer, size_t *lenp, loff_t *ppos)
1746{
1747 return _proc_do_string(table->data, table->maxlen, write, filp,
1748 buffer, lenp, ppos);
1749}
1750
1da177e4
LT
1751/*
1752 * Special case of dostring for the UTS structure. This has locks
1753 * to observe. Should this be in kernel/sys.c ????
1754 */
1755
8218c74c
SH
1756#ifndef CONFIG_UTS_NS
1757static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
1da177e4
LT
1758 void __user *buffer, size_t *lenp, loff_t *ppos)
1759{
1760 int r;
1761
1762 if (!write) {
1763 down_read(&uts_sem);
1764 r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1765 up_read(&uts_sem);
1766 } else {
1767 down_write(&uts_sem);
1768 r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1769 up_write(&uts_sem);
1770 }
1771 return r;
1772}
8218c74c
SH
1773#else /* !CONFIG_UTS_NS */
1774static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
1775 void __user *buffer, size_t *lenp, loff_t *ppos)
1776{
1777 int r;
1778 struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
1779 char* which;
1780
1781 switch (table->ctl_name) {
1782 case KERN_OSTYPE:
1783 which = uts_ns->name.sysname;
1784 break;
1785 case KERN_NODENAME:
1786 which = uts_ns->name.nodename;
1787 break;
1788 case KERN_OSRELEASE:
1789 which = uts_ns->name.release;
1790 break;
1791 case KERN_VERSION:
1792 which = uts_ns->name.version;
1793 break;
1794 case KERN_DOMAINNAME:
1795 which = uts_ns->name.domainname;
1796 break;
1797 default:
1798 r = -EINVAL;
1799 goto out;
1800 }
1801
1802 if (!write) {
1803 down_read(&uts_sem);
1804 r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
1805 up_read(&uts_sem);
1806 } else {
1807 down_write(&uts_sem);
1808 r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
1809 up_write(&uts_sem);
1810 }
1811 out:
1812 return r;
1813}
1814#endif /* !CONFIG_UTS_NS */
1da177e4
LT
1815
1816static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1817 int *valp,
1818 int write, void *data)
1819{
1820 if (write) {
1821 *valp = *negp ? -*lvalp : *lvalp;
1822 } else {
1823 int val = *valp;
1824 if (val < 0) {
1825 *negp = -1;
1826 *lvalp = (unsigned long)-val;
1827 } else {
1828 *negp = 0;
1829 *lvalp = (unsigned long)val;
1830 }
1831 }
1832 return 0;
1833}
1834
1835static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1836 void __user *buffer, size_t *lenp, loff_t *ppos,
1837 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1838 int write, void *data),
1839 void *data)
1840{
1841#define TMPBUFLEN 21
1842 int *i, vleft, first=1, neg, val;
1843 unsigned long lval;
1844 size_t left, len;
1845
1846 char buf[TMPBUFLEN], *p;
1847 char __user *s = buffer;
1848
1849 if (!table->data || !table->maxlen || !*lenp ||
1850 (*ppos && !write)) {
1851 *lenp = 0;
1852 return 0;
1853 }
1854
1855 i = (int *) table->data;
1856 vleft = table->maxlen / sizeof(*i);
1857 left = *lenp;
1858
1859 if (!conv)
1860 conv = do_proc_dointvec_conv;
1861
1862 for (; left && vleft--; i++, first=0) {
1863 if (write) {
1864 while (left) {
1865 char c;
1866 if (get_user(c, s))
1867 return -EFAULT;
1868 if (!isspace(c))
1869 break;
1870 left--;
1871 s++;
1872 }
1873 if (!left)
1874 break;
1875 neg = 0;
1876 len = left;
1877 if (len > sizeof(buf) - 1)
1878 len = sizeof(buf) - 1;
1879 if (copy_from_user(buf, s, len))
1880 return -EFAULT;
1881 buf[len] = 0;
1882 p = buf;
1883 if (*p == '-' && left > 1) {
1884 neg = 1;
1885 left--, p++;
1886 }
1887 if (*p < '0' || *p > '9')
1888 break;
1889
1890 lval = simple_strtoul(p, &p, 0);
1891
1892 len = p-buf;
1893 if ((len < left) && *p && !isspace(*p))
1894 break;
1895 if (neg)
1896 val = -val;
1897 s += len;
1898 left -= len;
1899
1900 if (conv(&neg, &lval, i, 1, data))
1901 break;
1902 } else {
1903 p = buf;
1904 if (!first)
1905 *p++ = '\t';
1906
1907 if (conv(&neg, &lval, i, 0, data))
1908 break;
1909
1910 sprintf(p, "%s%lu", neg ? "-" : "", lval);
1911 len = strlen(buf);
1912 if (len > left)
1913 len = left;
1914 if(copy_to_user(s, buf, len))
1915 return -EFAULT;
1916 left -= len;
1917 s += len;
1918 }
1919 }
1920
1921 if (!write && !first && left) {
1922 if(put_user('\n', s))
1923 return -EFAULT;
1924 left--, s++;
1925 }
1926 if (write) {
1927 while (left) {
1928 char c;
1929 if (get_user(c, s++))
1930 return -EFAULT;
1931 if (!isspace(c))
1932 break;
1933 left--;
1934 }
1935 }
1936 if (write && first)
1937 return -EINVAL;
1938 *lenp -= left;
1939 *ppos += *lenp;
1940 return 0;
1941#undef TMPBUFLEN
1942}
1943
1944/**
1945 * proc_dointvec - read a vector of integers
1946 * @table: the sysctl table
1947 * @write: %TRUE if this is a write to the sysctl file
1948 * @filp: the file structure
1949 * @buffer: the user buffer
1950 * @lenp: the size of the user buffer
1951 * @ppos: file position
1952 *
1953 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1954 * values from/to the user buffer, treated as an ASCII string.
1955 *
1956 * Returns 0 on success.
1957 */
1958int proc_dointvec(ctl_table *table, int write, struct file *filp,
1959 void __user *buffer, size_t *lenp, loff_t *ppos)
1960{
1961 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1962 NULL,NULL);
1963}
1964
1965#define OP_SET 0
1966#define OP_AND 1
1967#define OP_OR 2
1968#define OP_MAX 3
1969#define OP_MIN 4
1970
1971static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1972 int *valp,
1973 int write, void *data)
1974{
1975 int op = *(int *)data;
1976 if (write) {
1977 int val = *negp ? -*lvalp : *lvalp;
1978 switch(op) {
1979 case OP_SET: *valp = val; break;
1980 case OP_AND: *valp &= val; break;
1981 case OP_OR: *valp |= val; break;
1982 case OP_MAX: if(*valp < val)
1983 *valp = val;
1984 break;
1985 case OP_MIN: if(*valp > val)
1986 *valp = val;
1987 break;
1988 }
1989 } else {
1990 int val = *valp;
1991 if (val < 0) {
1992 *negp = -1;
1993 *lvalp = (unsigned long)-val;
1994 } else {
1995 *negp = 0;
1996 *lvalp = (unsigned long)val;
1997 }
1998 }
1999 return 0;
2000}
2001
2002/*
2003 * init may raise the set.
2004 */
2005
2006int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2007 void __user *buffer, size_t *lenp, loff_t *ppos)
2008{
2009 int op;
2010
2011 if (!capable(CAP_SYS_MODULE)) {
2012 return -EPERM;
2013 }
2014
f400e198 2015 op = is_init(current) ? OP_SET : OP_AND;
1da177e4
LT
2016 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2017 do_proc_dointvec_bset_conv,&op);
2018}
2019
2020struct do_proc_dointvec_minmax_conv_param {
2021 int *min;
2022 int *max;
2023};
2024
2025static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2026 int *valp,
2027 int write, void *data)
2028{
2029 struct do_proc_dointvec_minmax_conv_param *param = data;
2030 if (write) {
2031 int val = *negp ? -*lvalp : *lvalp;
2032 if ((param->min && *param->min > val) ||
2033 (param->max && *param->max < val))
2034 return -EINVAL;
2035 *valp = val;
2036 } else {
2037 int val = *valp;
2038 if (val < 0) {
2039 *negp = -1;
2040 *lvalp = (unsigned long)-val;
2041 } else {
2042 *negp = 0;
2043 *lvalp = (unsigned long)val;
2044 }
2045 }
2046 return 0;
2047}
2048
2049/**
2050 * proc_dointvec_minmax - read a vector of integers with min/max values
2051 * @table: the sysctl table
2052 * @write: %TRUE if this is a write to the sysctl file
2053 * @filp: the file structure
2054 * @buffer: the user buffer
2055 * @lenp: the size of the user buffer
2056 * @ppos: file position
2057 *
2058 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2059 * values from/to the user buffer, treated as an ASCII string.
2060 *
2061 * This routine will ensure the values are within the range specified by
2062 * table->extra1 (min) and table->extra2 (max).
2063 *
2064 * Returns 0 on success.
2065 */
2066int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2067 void __user *buffer, size_t *lenp, loff_t *ppos)
2068{
2069 struct do_proc_dointvec_minmax_conv_param param = {
2070 .min = (int *) table->extra1,
2071 .max = (int *) table->extra2,
2072 };
2073 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2074 do_proc_dointvec_minmax_conv, &param);
2075}
2076
2077static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2078 struct file *filp,
2079 void __user *buffer,
2080 size_t *lenp, loff_t *ppos,
2081 unsigned long convmul,
2082 unsigned long convdiv)
2083{
2084#define TMPBUFLEN 21
2085 unsigned long *i, *min, *max, val;
2086 int vleft, first=1, neg;
2087 size_t len, left;
2088 char buf[TMPBUFLEN], *p;
2089 char __user *s = buffer;
2090
2091 if (!table->data || !table->maxlen || !*lenp ||
2092 (*ppos && !write)) {
2093 *lenp = 0;
2094 return 0;
2095 }
2096
2097 i = (unsigned long *) table->data;
2098 min = (unsigned long *) table->extra1;
2099 max = (unsigned long *) table->extra2;
2100 vleft = table->maxlen / sizeof(unsigned long);
2101 left = *lenp;
2102
2103 for (; left && vleft--; i++, min++, max++, first=0) {
2104 if (write) {
2105 while (left) {
2106 char c;
2107 if (get_user(c, s))
2108 return -EFAULT;
2109 if (!isspace(c))
2110 break;
2111 left--;
2112 s++;
2113 }
2114 if (!left)
2115 break;
2116 neg = 0;
2117 len = left;
2118 if (len > TMPBUFLEN-1)
2119 len = TMPBUFLEN-1;
2120 if (copy_from_user(buf, s, len))
2121 return -EFAULT;
2122 buf[len] = 0;
2123 p = buf;
2124 if (*p == '-' && left > 1) {
2125 neg = 1;
2126 left--, p++;
2127 }
2128 if (*p < '0' || *p > '9')
2129 break;
2130 val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2131 len = p-buf;
2132 if ((len < left) && *p && !isspace(*p))
2133 break;
2134 if (neg)
2135 val = -val;
2136 s += len;
2137 left -= len;
2138
2139 if(neg)
2140 continue;
2141 if ((min && val < *min) || (max && val > *max))
2142 continue;
2143 *i = val;
2144 } else {
2145 p = buf;
2146 if (!first)
2147 *p++ = '\t';
2148 sprintf(p, "%lu", convdiv * (*i) / convmul);
2149 len = strlen(buf);
2150 if (len > left)
2151 len = left;
2152 if(copy_to_user(s, buf, len))
2153 return -EFAULT;
2154 left -= len;
2155 s += len;
2156 }
2157 }
2158
2159 if (!write && !first && left) {
2160 if(put_user('\n', s))
2161 return -EFAULT;
2162 left--, s++;
2163 }
2164 if (write) {
2165 while (left) {
2166 char c;
2167 if (get_user(c, s++))
2168 return -EFAULT;
2169 if (!isspace(c))
2170 break;
2171 left--;
2172 }
2173 }
2174 if (write && first)
2175 return -EINVAL;
2176 *lenp -= left;
2177 *ppos += *lenp;
2178 return 0;
2179#undef TMPBUFLEN
2180}
2181
2182/**
2183 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2184 * @table: the sysctl table
2185 * @write: %TRUE if this is a write to the sysctl file
2186 * @filp: the file structure
2187 * @buffer: the user buffer
2188 * @lenp: the size of the user buffer
2189 * @ppos: file position
2190 *
2191 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2192 * values from/to the user buffer, treated as an ASCII string.
2193 *
2194 * This routine will ensure the values are within the range specified by
2195 * table->extra1 (min) and table->extra2 (max).
2196 *
2197 * Returns 0 on success.
2198 */
2199int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2200 void __user *buffer, size_t *lenp, loff_t *ppos)
2201{
2202 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2203}
2204
2205/**
2206 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2207 * @table: the sysctl table
2208 * @write: %TRUE if this is a write to the sysctl file
2209 * @filp: the file structure
2210 * @buffer: the user buffer
2211 * @lenp: the size of the user buffer
2212 * @ppos: file position
2213 *
2214 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2215 * values from/to the user buffer, treated as an ASCII string. The values
2216 * are treated as milliseconds, and converted to jiffies when they are stored.
2217 *
2218 * This routine will ensure the values are within the range specified by
2219 * table->extra1 (min) and table->extra2 (max).
2220 *
2221 * Returns 0 on success.
2222 */
2223int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2224 struct file *filp,
2225 void __user *buffer,
2226 size_t *lenp, loff_t *ppos)
2227{
2228 return do_proc_doulongvec_minmax(table, write, filp, buffer,
2229 lenp, ppos, HZ, 1000l);
2230}
2231
2232
2233static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2234 int *valp,
2235 int write, void *data)
2236{
2237 if (write) {
cba9f33d
BS
2238 if (*lvalp > LONG_MAX / HZ)
2239 return 1;
1da177e4
LT
2240 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2241 } else {
2242 int val = *valp;
2243 unsigned long lval;
2244 if (val < 0) {
2245 *negp = -1;
2246 lval = (unsigned long)-val;
2247 } else {
2248 *negp = 0;
2249 lval = (unsigned long)val;
2250 }
2251 *lvalp = lval / HZ;
2252 }
2253 return 0;
2254}
2255
2256static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2257 int *valp,
2258 int write, void *data)
2259{
2260 if (write) {
cba9f33d
BS
2261 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2262 return 1;
1da177e4
LT
2263 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2264 } else {
2265 int val = *valp;
2266 unsigned long lval;
2267 if (val < 0) {
2268 *negp = -1;
2269 lval = (unsigned long)-val;
2270 } else {
2271 *negp = 0;
2272 lval = (unsigned long)val;
2273 }
2274 *lvalp = jiffies_to_clock_t(lval);
2275 }
2276 return 0;
2277}
2278
2279static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2280 int *valp,
2281 int write, void *data)
2282{
2283 if (write) {
2284 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2285 } else {
2286 int val = *valp;
2287 unsigned long lval;
2288 if (val < 0) {
2289 *negp = -1;
2290 lval = (unsigned long)-val;
2291 } else {
2292 *negp = 0;
2293 lval = (unsigned long)val;
2294 }
2295 *lvalp = jiffies_to_msecs(lval);
2296 }
2297 return 0;
2298}
2299
2300/**
2301 * proc_dointvec_jiffies - read a vector of integers as seconds
2302 * @table: the sysctl table
2303 * @write: %TRUE if this is a write to the sysctl file
2304 * @filp: the file structure
2305 * @buffer: the user buffer
2306 * @lenp: the size of the user buffer
2307 * @ppos: file position
2308 *
2309 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2310 * values from/to the user buffer, treated as an ASCII string.
2311 * The values read are assumed to be in seconds, and are converted into
2312 * jiffies.
2313 *
2314 * Returns 0 on success.
2315 */
2316int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2317 void __user *buffer, size_t *lenp, loff_t *ppos)
2318{
2319 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2320 do_proc_dointvec_jiffies_conv,NULL);
2321}
2322
2323/**
2324 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2325 * @table: the sysctl table
2326 * @write: %TRUE if this is a write to the sysctl file
2327 * @filp: the file structure
2328 * @buffer: the user buffer
2329 * @lenp: the size of the user buffer
1e5d5331 2330 * @ppos: pointer to the file position
1da177e4
LT
2331 *
2332 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2333 * values from/to the user buffer, treated as an ASCII string.
2334 * The values read are assumed to be in 1/USER_HZ seconds, and
2335 * are converted into jiffies.
2336 *
2337 * Returns 0 on success.
2338 */
2339int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2340 void __user *buffer, size_t *lenp, loff_t *ppos)
2341{
2342 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2343 do_proc_dointvec_userhz_jiffies_conv,NULL);
2344}
2345
2346/**
2347 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2348 * @table: the sysctl table
2349 * @write: %TRUE if this is a write to the sysctl file
2350 * @filp: the file structure
2351 * @buffer: the user buffer
2352 * @lenp: the size of the user buffer
67be2dd1
MW
2353 * @ppos: file position
2354 * @ppos: the current position in the file
1da177e4
LT
2355 *
2356 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2357 * values from/to the user buffer, treated as an ASCII string.
2358 * The values read are assumed to be in 1/1000 seconds, and
2359 * are converted into jiffies.
2360 *
2361 * Returns 0 on success.
2362 */
2363int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2364 void __user *buffer, size_t *lenp, loff_t *ppos)
2365{
2366 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2367 do_proc_dointvec_ms_jiffies_conv, NULL);
2368}
2369
2370#else /* CONFIG_PROC_FS */
2371
2372int proc_dostring(ctl_table *table, int write, struct file *filp,
2373 void __user *buffer, size_t *lenp, loff_t *ppos)
2374{
2375 return -ENOSYS;
2376}
2377
8218c74c
SH
2378static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
2379 void __user *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
2380{
2381 return -ENOSYS;
2382}
2383
8218c74c
SH
2384#ifdef CONFIG_SYSVIPC
2385static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
2386 void __user *buffer, size_t *lenp, loff_t *ppos)
2387{
2388 return -ENOSYS;
2389}
2390#endif
2391
1da177e4
LT
2392int proc_dointvec(ctl_table *table, int write, struct file *filp,
2393 void __user *buffer, size_t *lenp, loff_t *ppos)
2394{
2395 return -ENOSYS;
2396}
2397
2398int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2399 void __user *buffer, size_t *lenp, loff_t *ppos)
2400{
2401 return -ENOSYS;
2402}
2403
2404int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2405 void __user *buffer, size_t *lenp, loff_t *ppos)
2406{
2407 return -ENOSYS;
2408}
2409
2410int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2411 void __user *buffer, size_t *lenp, loff_t *ppos)
2412{
2413 return -ENOSYS;
2414}
2415
2416int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2417 void __user *buffer, size_t *lenp, loff_t *ppos)
2418{
2419 return -ENOSYS;
2420}
2421
2422int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2423 void __user *buffer, size_t *lenp, loff_t *ppos)
2424{
2425 return -ENOSYS;
2426}
2427
2428int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2429 void __user *buffer, size_t *lenp, loff_t *ppos)
2430{
2431 return -ENOSYS;
2432}
2433
2434int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2435 struct file *filp,
2436 void __user *buffer,
2437 size_t *lenp, loff_t *ppos)
2438{
2439 return -ENOSYS;
2440}
2441
2442
2443#endif /* CONFIG_PROC_FS */
2444
2445
b89a8171 2446#ifdef CONFIG_SYSCTL_SYSCALL
1da177e4
LT
2447/*
2448 * General sysctl support routines
2449 */
2450
2451/* The generic string strategy routine: */
2452int sysctl_string(ctl_table *table, int __user *name, int nlen,
2453 void __user *oldval, size_t __user *oldlenp,
2454 void __user *newval, size_t newlen, void **context)
2455{
1da177e4
LT
2456 if (!table->data || !table->maxlen)
2457 return -ENOTDIR;
2458
2459 if (oldval && oldlenp) {
de9e007d
LT
2460 size_t bufsize;
2461 if (get_user(bufsize, oldlenp))
1da177e4 2462 return -EFAULT;
de9e007d
LT
2463 if (bufsize) {
2464 size_t len = strlen(table->data), copied;
2465
2466 /* This shouldn't trigger for a well-formed sysctl */
2467 if (len > table->maxlen)
1da177e4 2468 len = table->maxlen;
de9e007d
LT
2469
2470 /* Copy up to a max of bufsize-1 bytes of the string */
2471 copied = (len >= bufsize) ? bufsize - 1 : len;
2472
2473 if (copy_to_user(oldval, table->data, copied) ||
2474 put_user(0, (char __user *)(oldval + copied)))
1da177e4 2475 return -EFAULT;
de9e007d 2476 if (put_user(len, oldlenp))
1da177e4
LT
2477 return -EFAULT;
2478 }
2479 }
2480 if (newval && newlen) {
de9e007d 2481 size_t len = newlen;
1da177e4
LT
2482 if (len > table->maxlen)
2483 len = table->maxlen;
2484 if(copy_from_user(table->data, newval, len))
2485 return -EFAULT;
2486 if (len == table->maxlen)
2487 len--;
2488 ((char *) table->data)[len] = 0;
2489 }
82c9df82 2490 return 1;
1da177e4
LT
2491}
2492
2493/*
2494 * This function makes sure that all of the integers in the vector
2495 * are between the minimum and maximum values given in the arrays
2496 * table->extra1 and table->extra2, respectively.
2497 */
2498int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2499 void __user *oldval, size_t __user *oldlenp,
2500 void __user *newval, size_t newlen, void **context)
2501{
2502
2503 if (newval && newlen) {
2504 int __user *vec = (int __user *) newval;
2505 int *min = (int *) table->extra1;
2506 int *max = (int *) table->extra2;
2507 size_t length;
2508 int i;
2509
2510 if (newlen % sizeof(int) != 0)
2511 return -EINVAL;
2512
2513 if (!table->extra1 && !table->extra2)
2514 return 0;
2515
2516 if (newlen > table->maxlen)
2517 newlen = table->maxlen;
2518 length = newlen / sizeof(int);
2519
2520 for (i = 0; i < length; i++) {
2521 int value;
2522 if (get_user(value, vec + i))
2523 return -EFAULT;
2524 if (min && value < min[i])
2525 return -EINVAL;
2526 if (max && value > max[i])
2527 return -EINVAL;
2528 }
2529 }
2530 return 0;
2531}
2532
2533/* Strategy function to convert jiffies to seconds */
2534int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2535 void __user *oldval, size_t __user *oldlenp,
2536 void __user *newval, size_t newlen, void **context)
2537{
2538 if (oldval) {
2539 size_t olen;
2540 if (oldlenp) {
2541 if (get_user(olen, oldlenp))
2542 return -EFAULT;
2543 if (olen!=sizeof(int))
2544 return -EINVAL;
2545 }
2546 if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2547 (oldlenp && put_user(sizeof(int),oldlenp)))
2548 return -EFAULT;
2549 }
2550 if (newval && newlen) {
2551 int new;
2552 if (newlen != sizeof(int))
2553 return -EINVAL;
2554 if (get_user(new, (int __user *)newval))
2555 return -EFAULT;
2556 *(int *)(table->data) = new*HZ;
2557 }
2558 return 1;
2559}
2560
2561/* Strategy function to convert jiffies to seconds */
2562int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2563 void __user *oldval, size_t __user *oldlenp,
2564 void __user *newval, size_t newlen, void **context)
2565{
2566 if (oldval) {
2567 size_t olen;
2568 if (oldlenp) {
2569 if (get_user(olen, oldlenp))
2570 return -EFAULT;
2571 if (olen!=sizeof(int))
2572 return -EINVAL;
2573 }
2574 if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2575 (oldlenp && put_user(sizeof(int),oldlenp)))
2576 return -EFAULT;
2577 }
2578 if (newval && newlen) {
2579 int new;
2580 if (newlen != sizeof(int))
2581 return -EINVAL;
2582 if (get_user(new, (int __user *)newval))
2583 return -EFAULT;
2584 *(int *)(table->data) = msecs_to_jiffies(new);
2585 }
2586 return 1;
2587}
2588
b89a8171 2589#else /* CONFIG_SYSCTL_SYSCALL */
1da177e4
LT
2590
2591
2592asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2593{
b89a8171
EB
2594 static int msg_count;
2595
2596 if (msg_count < 5) {
2597 msg_count++;
2598 printk(KERN_INFO
2599 "warning: process `%s' used the removed sysctl "
2600 "system call\n", current->comm);
2601 }
1da177e4
LT
2602 return -ENOSYS;
2603}
2604
2605int sysctl_string(ctl_table *table, int __user *name, int nlen,
2606 void __user *oldval, size_t __user *oldlenp,
2607 void __user *newval, size_t newlen, void **context)
2608{
2609 return -ENOSYS;
2610}
2611
2612int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2613 void __user *oldval, size_t __user *oldlenp,
2614 void __user *newval, size_t newlen, void **context)
2615{
2616 return -ENOSYS;
2617}
2618
2619int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2620 void __user *oldval, size_t __user *oldlenp,
2621 void __user *newval, size_t newlen, void **context)
2622{
2623 return -ENOSYS;
2624}
2625
2626int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2627 void __user *oldval, size_t __user *oldlenp,
2628 void __user *newval, size_t newlen, void **context)
2629{
2630 return -ENOSYS;
2631}
2632
b89a8171 2633#endif /* CONFIG_SYSCTL_SYSCALL */
1da177e4
LT
2634
2635/*
2636 * No sense putting this after each symbol definition, twice,
2637 * exception granted :-)
2638 */
2639EXPORT_SYMBOL(proc_dointvec);
2640EXPORT_SYMBOL(proc_dointvec_jiffies);
2641EXPORT_SYMBOL(proc_dointvec_minmax);
2642EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2643EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2644EXPORT_SYMBOL(proc_dostring);
2645EXPORT_SYMBOL(proc_doulongvec_minmax);
2646EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2647EXPORT_SYMBOL(register_sysctl_table);
2648EXPORT_SYMBOL(sysctl_intvec);
2649EXPORT_SYMBOL(sysctl_jiffies);
2650EXPORT_SYMBOL(sysctl_ms_jiffies);
2651EXPORT_SYMBOL(sysctl_string);
2652EXPORT_SYMBOL(unregister_sysctl_table);