]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
UBUNTU: Ubuntu-snapdragon-4.4.0-1082.87
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
297 SLAB_MEM_SPREAD),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
536static struct socket *sock_alloc(void)
537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
557
1da177e4
LT
558/**
559 * sock_release - close a socket
560 * @sock: socket to close
561 *
562 * The socket is released from the protocol stack if it has a release
563 * callback, and the inode is then released if the socket is bound to
89bddce5 564 * an inode not a file.
1da177e4 565 */
89bddce5 566
1da177e4
LT
567void sock_release(struct socket *sock)
568{
569 if (sock->ops) {
570 struct module *owner = sock->ops->owner;
571
572 sock->ops->release(sock);
573 sock->ops = NULL;
574 module_put(owner);
575 }
576
eaefd110 577 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 578 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 579
19e8d69c 580 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
581 if (!sock->file) {
582 iput(SOCK_INODE(sock));
583 return;
584 }
89bddce5 585 sock->file = NULL;
1da177e4 586}
c6d409cf 587EXPORT_SYMBOL(sock_release);
1da177e4 588
67cc0d40 589void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 590{
140c55d4
ED
591 u8 flags = *tx_flags;
592
b9f40e21 593 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
594 flags |= SKBTX_HW_TSTAMP;
595
b9f40e21 596 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
597 flags |= SKBTX_SW_TSTAMP;
598
e7fd2885 599 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
600 flags |= SKBTX_SCHED_TSTAMP;
601
e1c8a607 602 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 603 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 604
140c55d4 605 *tx_flags = flags;
20d49473 606}
67cc0d40 607EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 608
d8725c86 609static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 610{
01e97e65 611 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
612 BUG_ON(ret == -EIOCBQUEUED);
613 return ret;
1da177e4
LT
614}
615
d8725c86 616int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 617{
d8725c86 618 int err = security_socket_sendmsg(sock, msg,
01e97e65 619 msg_data_left(msg));
228e548e 620
d8725c86 621 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 622}
c6d409cf 623EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
624
625int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
626 struct kvec *vec, size_t num, size_t size)
627{
6aa24814 628 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 629 return sock_sendmsg(sock, msg);
1da177e4 630}
c6d409cf 631EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 632
92f37fd2
ED
633/*
634 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
635 */
636void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
637 struct sk_buff *skb)
638{
20d49473 639 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 640 struct scm_timestamping tss;
20d49473
PO
641 int empty = 1;
642 struct skb_shared_hwtstamps *shhwtstamps =
643 skb_hwtstamps(skb);
644
645 /* Race occurred between timestamp enabling and packet
646 receiving. Fill in the current time for now. */
647 if (need_software_tstamp && skb->tstamp.tv64 == 0)
648 __net_timestamp(skb);
649
650 if (need_software_tstamp) {
651 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
652 struct timeval tv;
653 skb_get_timestamp(skb, &tv);
654 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
655 sizeof(tv), &tv);
656 } else {
f24b9be5
WB
657 struct timespec ts;
658 skb_get_timestampns(skb, &ts);
20d49473 659 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 660 sizeof(ts), &ts);
20d49473
PO
661 }
662 }
663
f24b9be5 664 memset(&tss, 0, sizeof(tss));
c199105d 665 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 666 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 667 empty = 0;
4d276eb6 668 if (shhwtstamps &&
b9f40e21 669 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 670 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 671 empty = 0;
20d49473
PO
672 if (!empty)
673 put_cmsg(msg, SOL_SOCKET,
f24b9be5 674 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 675}
7c81fd8b
ACM
676EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
677
6e3e939f
JB
678void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
679 struct sk_buff *skb)
680{
681 int ack;
682
683 if (!sock_flag(sk, SOCK_WIFI_STATUS))
684 return;
685 if (!skb->wifi_acked_valid)
686 return;
687
688 ack = skb->wifi_acked;
689
690 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
691}
692EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
693
11165f14 694static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
695 struct sk_buff *skb)
3b885787 696{
744d5a3e 697 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 698 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 699 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
700}
701
767dd033 702void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
703 struct sk_buff *skb)
704{
705 sock_recv_timestamp(msg, sk, skb);
706 sock_recv_drops(msg, sk, skb);
707}
767dd033 708EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 709
1b784140 710static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
b9b7745a 711 int flags)
1da177e4 712{
b9b7745a 713 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
714}
715
b9b7745a 716int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 717{
b9b7745a 718 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 719
b9b7745a 720 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 721}
c6d409cf 722EXPORT_SYMBOL(sock_recvmsg);
1da177e4 723
c1249c0a
ML
724/**
725 * kernel_recvmsg - Receive a message from a socket (kernel space)
726 * @sock: The socket to receive the message from
727 * @msg: Received message
728 * @vec: Input s/g array for message data
729 * @num: Size of input s/g array
730 * @size: Number of bytes to read
731 * @flags: Message flags (MSG_DONTWAIT, etc...)
732 *
733 * On return the msg structure contains the scatter/gather array passed in the
734 * vec argument. The array is modified so that it consists of the unfilled
735 * portion of the original array.
736 *
737 * The returned value is the total number of bytes received, or an error.
738 */
89bddce5
SH
739int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
741{
742 mm_segment_t oldfs = get_fs();
743 int result;
744
6aa24814 745 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 746 set_fs(KERNEL_DS);
b9b7745a 747 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
748 set_fs(oldfs);
749 return result;
750}
c6d409cf 751EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 752
ce1d4d3e
CH
753static ssize_t sock_sendpage(struct file *file, struct page *page,
754 int offset, size_t size, loff_t *ppos, int more)
1da177e4 755{
1da177e4
LT
756 struct socket *sock;
757 int flags;
758
ce1d4d3e
CH
759 sock = file->private_data;
760
35f9c09f
ED
761 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
762 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
763 flags |= more;
ce1d4d3e 764
e6949583 765 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 766}
1da177e4 767
9c55e01c 768static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 769 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
770 unsigned int flags)
771{
772 struct socket *sock = file->private_data;
773
997b37da
RDC
774 if (unlikely(!sock->ops->splice_read))
775 return -EINVAL;
776
9c55e01c
JA
777 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
778}
779
8ae5e030 780static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 781{
6d652330
AV
782 struct file *file = iocb->ki_filp;
783 struct socket *sock = file->private_data;
0345f931 784 struct msghdr msg = {.msg_iter = *to,
785 .msg_iocb = iocb};
8ae5e030 786 ssize_t res;
ce1d4d3e 787
8ae5e030
AV
788 if (file->f_flags & O_NONBLOCK)
789 msg.msg_flags = MSG_DONTWAIT;
790
791 if (iocb->ki_pos != 0)
1da177e4 792 return -ESPIPE;
027445c3 793
66ee59af 794 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
795 return 0;
796
b9b7745a 797 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
798 *to = msg.msg_iter;
799 return res;
1da177e4
LT
800}
801
8ae5e030 802static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 803{
6d652330
AV
804 struct file *file = iocb->ki_filp;
805 struct socket *sock = file->private_data;
0345f931 806 struct msghdr msg = {.msg_iter = *from,
807 .msg_iocb = iocb};
8ae5e030 808 ssize_t res;
1da177e4 809
8ae5e030 810 if (iocb->ki_pos != 0)
ce1d4d3e 811 return -ESPIPE;
027445c3 812
8ae5e030
AV
813 if (file->f_flags & O_NONBLOCK)
814 msg.msg_flags = MSG_DONTWAIT;
815
6d652330
AV
816 if (sock->type == SOCK_SEQPACKET)
817 msg.msg_flags |= MSG_EOR;
818
d8725c86 819 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
820 *from = msg.msg_iter;
821 return res;
1da177e4
LT
822}
823
1da177e4
LT
824/*
825 * Atomic setting of ioctl hooks to avoid race
826 * with module unload.
827 */
828
4a3e2f71 829static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 830static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 831
881d966b 832void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 833{
4a3e2f71 834 mutex_lock(&br_ioctl_mutex);
1da177e4 835 br_ioctl_hook = hook;
4a3e2f71 836 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
837}
838EXPORT_SYMBOL(brioctl_set);
839
4a3e2f71 840static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 841static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 842
881d966b 843void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 844{
4a3e2f71 845 mutex_lock(&vlan_ioctl_mutex);
1da177e4 846 vlan_ioctl_hook = hook;
4a3e2f71 847 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
848}
849EXPORT_SYMBOL(vlan_ioctl_set);
850
4a3e2f71 851static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 852static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 853
89bddce5 854void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 855{
4a3e2f71 856 mutex_lock(&dlci_ioctl_mutex);
1da177e4 857 dlci_ioctl_hook = hook;
4a3e2f71 858 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
859}
860EXPORT_SYMBOL(dlci_ioctl_set);
861
6b96018b
AB
862static long sock_do_ioctl(struct net *net, struct socket *sock,
863 unsigned int cmd, unsigned long arg)
864{
865 int err;
866 void __user *argp = (void __user *)arg;
867
868 err = sock->ops->ioctl(sock, cmd, arg);
869
870 /*
871 * If this ioctl is unknown try to hand it down
872 * to the NIC driver.
873 */
874 if (err == -ENOIOCTLCMD)
875 err = dev_ioctl(net, cmd, argp);
876
877 return err;
878}
879
1da177e4
LT
880/*
881 * With an ioctl, arg may well be a user mode pointer, but we don't know
882 * what to do with it - that's up to the protocol still.
883 */
884
885static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
886{
887 struct socket *sock;
881d966b 888 struct sock *sk;
1da177e4
LT
889 void __user *argp = (void __user *)arg;
890 int pid, err;
881d966b 891 struct net *net;
1da177e4 892
b69aee04 893 sock = file->private_data;
881d966b 894 sk = sock->sk;
3b1e0a65 895 net = sock_net(sk);
1da177e4 896 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 897 err = dev_ioctl(net, cmd, argp);
1da177e4 898 } else
3d23e349 899#ifdef CONFIG_WEXT_CORE
1da177e4 900 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 901 err = dev_ioctl(net, cmd, argp);
1da177e4 902 } else
3d23e349 903#endif
89bddce5 904 switch (cmd) {
1da177e4
LT
905 case FIOSETOWN:
906 case SIOCSPGRP:
907 err = -EFAULT;
908 if (get_user(pid, (int __user *)argp))
909 break;
e0b93edd
JL
910 f_setown(sock->file, pid, 1);
911 err = 0;
1da177e4
LT
912 break;
913 case FIOGETOWN:
914 case SIOCGPGRP:
609d7fa9 915 err = put_user(f_getown(sock->file),
89bddce5 916 (int __user *)argp);
1da177e4
LT
917 break;
918 case SIOCGIFBR:
919 case SIOCSIFBR:
920 case SIOCBRADDBR:
921 case SIOCBRDELBR:
922 err = -ENOPKG;
923 if (!br_ioctl_hook)
924 request_module("bridge");
925
4a3e2f71 926 mutex_lock(&br_ioctl_mutex);
89bddce5 927 if (br_ioctl_hook)
881d966b 928 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 929 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
930 break;
931 case SIOCGIFVLAN:
932 case SIOCSIFVLAN:
933 err = -ENOPKG;
934 if (!vlan_ioctl_hook)
935 request_module("8021q");
936
4a3e2f71 937 mutex_lock(&vlan_ioctl_mutex);
1da177e4 938 if (vlan_ioctl_hook)
881d966b 939 err = vlan_ioctl_hook(net, argp);
4a3e2f71 940 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 941 break;
1da177e4
LT
942 case SIOCADDDLCI:
943 case SIOCDELDLCI:
944 err = -ENOPKG;
945 if (!dlci_ioctl_hook)
946 request_module("dlci");
947
7512cbf6
PE
948 mutex_lock(&dlci_ioctl_mutex);
949 if (dlci_ioctl_hook)
1da177e4 950 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 951 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
952 break;
953 default:
6b96018b 954 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 955 break;
89bddce5 956 }
1da177e4
LT
957 return err;
958}
959
960int sock_create_lite(int family, int type, int protocol, struct socket **res)
961{
962 int err;
963 struct socket *sock = NULL;
89bddce5 964
1da177e4
LT
965 err = security_socket_create(family, type, protocol, 1);
966 if (err)
967 goto out;
968
969 sock = sock_alloc();
970 if (!sock) {
971 err = -ENOMEM;
972 goto out;
973 }
974
1da177e4 975 sock->type = type;
7420ed23
VY
976 err = security_socket_post_create(sock, family, type, protocol, 1);
977 if (err)
978 goto out_release;
979
1da177e4
LT
980out:
981 *res = sock;
982 return err;
7420ed23
VY
983out_release:
984 sock_release(sock);
985 sock = NULL;
986 goto out;
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
989
990/* No kernel lock held - perfect */
89bddce5 991static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 992{
cbf55001 993 unsigned int busy_flag = 0;
1da177e4
LT
994 struct socket *sock;
995
996 /*
89bddce5 997 * We can't return errors to poll, so it's either yes or no.
1da177e4 998 */
b69aee04 999 sock = file->private_data;
2d48d67f 1000
cbf55001 1001 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1002 /* this socket can poll_ll so tell the system call */
cbf55001 1003 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1004
1005 /* once, only if requested by syscall */
cbf55001
ET
1006 if (wait && (wait->_key & POLL_BUSY_LOOP))
1007 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1008 }
1009
cbf55001 1010 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1011}
1012
89bddce5 1013static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1014{
b69aee04 1015 struct socket *sock = file->private_data;
1da177e4
LT
1016
1017 return sock->ops->mmap(file, sock, vma);
1018}
1019
20380731 1020static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1021{
1da177e4
LT
1022 sock_release(SOCKET_I(inode));
1023 return 0;
1024}
1025
1026/*
1027 * Update the socket async list
1028 *
1029 * Fasync_list locking strategy.
1030 *
1031 * 1. fasync_list is modified only under process context socket lock
1032 * i.e. under semaphore.
1033 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1034 * or under socket lock
1da177e4
LT
1035 */
1036
1037static int sock_fasync(int fd, struct file *filp, int on)
1038{
989a2979
ED
1039 struct socket *sock = filp->private_data;
1040 struct sock *sk = sock->sk;
eaefd110 1041 struct socket_wq *wq;
1da177e4 1042
989a2979 1043 if (sk == NULL)
1da177e4 1044 return -EINVAL;
1da177e4
LT
1045
1046 lock_sock(sk);
eaefd110
ED
1047 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1048 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1049
eaefd110 1050 if (!wq->fasync_list)
989a2979
ED
1051 sock_reset_flag(sk, SOCK_FASYNC);
1052 else
bcdce719 1053 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1054
989a2979 1055 release_sock(sk);
1da177e4
LT
1056 return 0;
1057}
1058
ceb5d58b 1059/* This function may be called only under rcu_lock */
1da177e4 1060
ceb5d58b 1061int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1062{
ceb5d58b 1063 if (!wq || !wq->fasync_list)
1da177e4 1064 return -1;
ceb5d58b 1065
89bddce5 1066 switch (how) {
8d8ad9d7 1067 case SOCK_WAKE_WAITD:
ceb5d58b 1068 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1069 break;
1070 goto call_kill;
8d8ad9d7 1071 case SOCK_WAKE_SPACE:
ceb5d58b 1072 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1073 break;
1074 /* fall through */
8d8ad9d7 1075 case SOCK_WAKE_IO:
89bddce5 1076call_kill:
43815482 1077 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1078 break;
8d8ad9d7 1079 case SOCK_WAKE_URG:
43815482 1080 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1081 }
ceb5d58b 1082
1da177e4
LT
1083 return 0;
1084}
c6d409cf 1085EXPORT_SYMBOL(sock_wake_async);
1da177e4 1086
721db93a 1087int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1088 struct socket **res, int kern)
1da177e4
LT
1089{
1090 int err;
1091 struct socket *sock;
55737fda 1092 const struct net_proto_family *pf;
1da177e4
LT
1093
1094 /*
89bddce5 1095 * Check protocol is in range
1da177e4
LT
1096 */
1097 if (family < 0 || family >= NPROTO)
1098 return -EAFNOSUPPORT;
1099 if (type < 0 || type >= SOCK_MAX)
1100 return -EINVAL;
1101
1102 /* Compatibility.
1103
1104 This uglymoron is moved from INET layer to here to avoid
1105 deadlock in module load.
1106 */
1107 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1108 static int warned;
1da177e4
LT
1109 if (!warned) {
1110 warned = 1;
3410f22e
YY
1111 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1112 current->comm);
1da177e4
LT
1113 }
1114 family = PF_PACKET;
1115 }
1116
1117 err = security_socket_create(family, type, protocol, kern);
1118 if (err)
1119 return err;
89bddce5 1120
55737fda
SH
1121 /*
1122 * Allocate the socket and allow the family to set things up. if
1123 * the protocol is 0, the family is instructed to select an appropriate
1124 * default.
1125 */
1126 sock = sock_alloc();
1127 if (!sock) {
e87cc472 1128 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1129 return -ENFILE; /* Not exactly a match, but its the
1130 closest posix thing */
1131 }
1132
1133 sock->type = type;
1134
95a5afca 1135#ifdef CONFIG_MODULES
89bddce5
SH
1136 /* Attempt to load a protocol module if the find failed.
1137 *
1138 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1139 * requested real, full-featured networking support upon configuration.
1140 * Otherwise module support will break!
1141 */
190683a9 1142 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1143 request_module("net-pf-%d", family);
1da177e4
LT
1144#endif
1145
55737fda
SH
1146 rcu_read_lock();
1147 pf = rcu_dereference(net_families[family]);
1148 err = -EAFNOSUPPORT;
1149 if (!pf)
1150 goto out_release;
1da177e4
LT
1151
1152 /*
1153 * We will call the ->create function, that possibly is in a loadable
1154 * module, so we have to bump that loadable module refcnt first.
1155 */
55737fda 1156 if (!try_module_get(pf->owner))
1da177e4
LT
1157 goto out_release;
1158
55737fda
SH
1159 /* Now protected by module ref count */
1160 rcu_read_unlock();
1161
3f378b68 1162 err = pf->create(net, sock, protocol, kern);
55737fda 1163 if (err < 0)
1da177e4 1164 goto out_module_put;
a79af59e 1165
1da177e4
LT
1166 /*
1167 * Now to bump the refcnt of the [loadable] module that owns this
1168 * socket at sock_release time we decrement its refcnt.
1169 */
55737fda
SH
1170 if (!try_module_get(sock->ops->owner))
1171 goto out_module_busy;
1172
1da177e4
LT
1173 /*
1174 * Now that we're done with the ->create function, the [loadable]
1175 * module can have its refcnt decremented
1176 */
55737fda 1177 module_put(pf->owner);
7420ed23
VY
1178 err = security_socket_post_create(sock, family, type, protocol, kern);
1179 if (err)
3b185525 1180 goto out_sock_release;
55737fda 1181 *res = sock;
1da177e4 1182
55737fda
SH
1183 return 0;
1184
1185out_module_busy:
1186 err = -EAFNOSUPPORT;
1da177e4 1187out_module_put:
55737fda
SH
1188 sock->ops = NULL;
1189 module_put(pf->owner);
1190out_sock_release:
1da177e4 1191 sock_release(sock);
55737fda
SH
1192 return err;
1193
1194out_release:
1195 rcu_read_unlock();
1196 goto out_sock_release;
1da177e4 1197}
721db93a 1198EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1199
1200int sock_create(int family, int type, int protocol, struct socket **res)
1201{
1b8d7ae4 1202 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1203}
c6d409cf 1204EXPORT_SYMBOL(sock_create);
1da177e4 1205
eeb1bd5c 1206int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1207{
eeb1bd5c 1208 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1209}
c6d409cf 1210EXPORT_SYMBOL(sock_create_kern);
1da177e4 1211
3e0fa65f 1212SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1213{
1214 int retval;
1215 struct socket *sock;
a677a039
UD
1216 int flags;
1217
e38b36f3
UD
1218 /* Check the SOCK_* constants for consistency. */
1219 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1220 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1221 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1222 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1223
a677a039 1224 flags = type & ~SOCK_TYPE_MASK;
77d27200 1225 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1226 return -EINVAL;
1227 type &= SOCK_TYPE_MASK;
1da177e4 1228
aaca0bdc
UD
1229 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1230 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1231
1da177e4
LT
1232 retval = sock_create(family, type, protocol, &sock);
1233 if (retval < 0)
1234 goto out;
1235
77d27200 1236 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1237 if (retval < 0)
1238 goto out_release;
1239
1240out:
1241 /* It may be already another descriptor 8) Not kernel problem. */
1242 return retval;
1243
1244out_release:
1245 sock_release(sock);
1246 return retval;
1247}
1248
1249/*
1250 * Create a pair of connected sockets.
1251 */
1252
3e0fa65f
HC
1253SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1254 int __user *, usockvec)
1da177e4
LT
1255{
1256 struct socket *sock1, *sock2;
1257 int fd1, fd2, err;
db349509 1258 struct file *newfile1, *newfile2;
a677a039
UD
1259 int flags;
1260
1261 flags = type & ~SOCK_TYPE_MASK;
77d27200 1262 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1263 return -EINVAL;
1264 type &= SOCK_TYPE_MASK;
1da177e4 1265
aaca0bdc
UD
1266 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1267 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1268
1da177e4
LT
1269 /*
1270 * Obtain the first socket and check if the underlying protocol
1271 * supports the socketpair call.
1272 */
1273
1274 err = sock_create(family, type, protocol, &sock1);
1275 if (err < 0)
1276 goto out;
1277
1278 err = sock_create(family, type, protocol, &sock2);
1279 if (err < 0)
1280 goto out_release_1;
1281
1282 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1283 if (err < 0)
1da177e4
LT
1284 goto out_release_both;
1285
28407630 1286 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1287 if (unlikely(fd1 < 0)) {
1288 err = fd1;
db349509 1289 goto out_release_both;
bf3c23d1 1290 }
d73aa286 1291
28407630 1292 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1293 if (unlikely(fd2 < 0)) {
1294 err = fd2;
d73aa286 1295 goto out_put_unused_1;
28407630
AV
1296 }
1297
aab174f0 1298 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1299 if (IS_ERR(newfile1)) {
28407630 1300 err = PTR_ERR(newfile1);
d73aa286 1301 goto out_put_unused_both;
28407630
AV
1302 }
1303
aab174f0 1304 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1305 if (IS_ERR(newfile2)) {
1306 err = PTR_ERR(newfile2);
d73aa286 1307 goto out_fput_1;
db349509
AV
1308 }
1309
d73aa286
YD
1310 err = put_user(fd1, &usockvec[0]);
1311 if (err)
1312 goto out_fput_both;
1313
1314 err = put_user(fd2, &usockvec[1]);
1315 if (err)
1316 goto out_fput_both;
1317
157cf649 1318 audit_fd_pair(fd1, fd2);
d73aa286 1319
db349509
AV
1320 fd_install(fd1, newfile1);
1321 fd_install(fd2, newfile2);
1da177e4
LT
1322 /* fd1 and fd2 may be already another descriptors.
1323 * Not kernel problem.
1324 */
1325
d73aa286 1326 return 0;
1da177e4 1327
d73aa286
YD
1328out_fput_both:
1329 fput(newfile2);
1330 fput(newfile1);
1331 put_unused_fd(fd2);
1332 put_unused_fd(fd1);
1333 goto out;
1334
1335out_fput_1:
1336 fput(newfile1);
1337 put_unused_fd(fd2);
1338 put_unused_fd(fd1);
1339 sock_release(sock2);
1340 goto out;
1da177e4 1341
d73aa286
YD
1342out_put_unused_both:
1343 put_unused_fd(fd2);
1344out_put_unused_1:
1345 put_unused_fd(fd1);
1da177e4 1346out_release_both:
89bddce5 1347 sock_release(sock2);
1da177e4 1348out_release_1:
89bddce5 1349 sock_release(sock1);
1da177e4
LT
1350out:
1351 return err;
1352}
1353
1da177e4
LT
1354/*
1355 * Bind a name to a socket. Nothing much to do here since it's
1356 * the protocol's responsibility to handle the local address.
1357 *
1358 * We move the socket address to kernel space before we call
1359 * the protocol layer (having also checked the address is ok).
1360 */
1361
20f37034 1362SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1363{
1364 struct socket *sock;
230b1839 1365 struct sockaddr_storage address;
6cb153ca 1366 int err, fput_needed;
1da177e4 1367
89bddce5 1368 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1369 if (sock) {
43db362d 1370 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1371 if (err >= 0) {
1372 err = security_socket_bind(sock,
230b1839 1373 (struct sockaddr *)&address,
89bddce5 1374 addrlen);
6cb153ca
BL
1375 if (!err)
1376 err = sock->ops->bind(sock,
89bddce5 1377 (struct sockaddr *)
230b1839 1378 &address, addrlen);
1da177e4 1379 }
6cb153ca 1380 fput_light(sock->file, fput_needed);
89bddce5 1381 }
1da177e4
LT
1382 return err;
1383}
1384
1da177e4
LT
1385/*
1386 * Perform a listen. Basically, we allow the protocol to do anything
1387 * necessary for a listen, and if that works, we mark the socket as
1388 * ready for listening.
1389 */
1390
3e0fa65f 1391SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1392{
1393 struct socket *sock;
6cb153ca 1394 int err, fput_needed;
b8e1f9b5 1395 int somaxconn;
89bddce5
SH
1396
1397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1398 if (sock) {
8efa6e93 1399 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1400 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1401 backlog = somaxconn;
1da177e4
LT
1402
1403 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1404 if (!err)
1405 err = sock->ops->listen(sock, backlog);
1da177e4 1406
6cb153ca 1407 fput_light(sock->file, fput_needed);
1da177e4
LT
1408 }
1409 return err;
1410}
1411
1da177e4
LT
1412/*
1413 * For accept, we attempt to create a new socket, set up the link
1414 * with the client, wake up the client, then return the new
1415 * connected fd. We collect the address of the connector in kernel
1416 * space and move it to user at the very end. This is unclean because
1417 * we open the socket then return an error.
1418 *
1419 * 1003.1g adds the ability to recvmsg() to query connection pending
1420 * status to recvmsg. We need to add that support in a way thats
1421 * clean when we restucture accept also.
1422 */
1423
20f37034
HC
1424SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1425 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1426{
1427 struct socket *sock, *newsock;
39d8c1b6 1428 struct file *newfile;
6cb153ca 1429 int err, len, newfd, fput_needed;
230b1839 1430 struct sockaddr_storage address;
1da177e4 1431
77d27200 1432 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1433 return -EINVAL;
1434
1435 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1436 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1437
6cb153ca 1438 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1439 if (!sock)
1440 goto out;
1441
1442 err = -ENFILE;
c6d409cf
ED
1443 newsock = sock_alloc();
1444 if (!newsock)
1da177e4
LT
1445 goto out_put;
1446
1447 newsock->type = sock->type;
1448 newsock->ops = sock->ops;
1449
1da177e4
LT
1450 /*
1451 * We don't need try_module_get here, as the listening socket (sock)
1452 * has the protocol module (sock->ops->owner) held.
1453 */
1454 __module_get(newsock->ops->owner);
1455
28407630 1456 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1457 if (unlikely(newfd < 0)) {
1458 err = newfd;
9a1875e6
DM
1459 sock_release(newsock);
1460 goto out_put;
39d8c1b6 1461 }
aab174f0 1462 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1463 if (IS_ERR(newfile)) {
28407630
AV
1464 err = PTR_ERR(newfile);
1465 put_unused_fd(newfd);
1466 sock_release(newsock);
1467 goto out_put;
1468 }
39d8c1b6 1469
a79af59e
FF
1470 err = security_socket_accept(sock, newsock);
1471 if (err)
39d8c1b6 1472 goto out_fd;
a79af59e 1473
1da177e4
LT
1474 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1475 if (err < 0)
39d8c1b6 1476 goto out_fd;
1da177e4
LT
1477
1478 if (upeer_sockaddr) {
230b1839 1479 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1480 &len, 2) < 0) {
1da177e4 1481 err = -ECONNABORTED;
39d8c1b6 1482 goto out_fd;
1da177e4 1483 }
43db362d 1484 err = move_addr_to_user(&address,
230b1839 1485 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1486 if (err < 0)
39d8c1b6 1487 goto out_fd;
1da177e4
LT
1488 }
1489
1490 /* File flags are not inherited via accept() unlike another OSes. */
1491
39d8c1b6
DM
1492 fd_install(newfd, newfile);
1493 err = newfd;
1da177e4 1494
1da177e4 1495out_put:
6cb153ca 1496 fput_light(sock->file, fput_needed);
1da177e4
LT
1497out:
1498 return err;
39d8c1b6 1499out_fd:
9606a216 1500 fput(newfile);
39d8c1b6 1501 put_unused_fd(newfd);
1da177e4
LT
1502 goto out_put;
1503}
1504
20f37034
HC
1505SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1506 int __user *, upeer_addrlen)
aaca0bdc 1507{
de11defe 1508 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1509}
1510
1da177e4
LT
1511/*
1512 * Attempt to connect to a socket with the server address. The address
1513 * is in user space so we verify it is OK and move it to kernel space.
1514 *
1515 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1516 * break bindings
1517 *
1518 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1519 * other SEQPACKET protocols that take time to connect() as it doesn't
1520 * include the -EINPROGRESS status for such sockets.
1521 */
1522
20f37034
HC
1523SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1524 int, addrlen)
1da177e4
LT
1525{
1526 struct socket *sock;
230b1839 1527 struct sockaddr_storage address;
6cb153ca 1528 int err, fput_needed;
1da177e4 1529
6cb153ca 1530 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1531 if (!sock)
1532 goto out;
43db362d 1533 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1534 if (err < 0)
1535 goto out_put;
1536
89bddce5 1537 err =
230b1839 1538 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1539 if (err)
1540 goto out_put;
1541
230b1839 1542 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1543 sock->file->f_flags);
1544out_put:
6cb153ca 1545 fput_light(sock->file, fput_needed);
1da177e4
LT
1546out:
1547 return err;
1548}
1549
1550/*
1551 * Get the local address ('name') of a socket object. Move the obtained
1552 * name to user space.
1553 */
1554
20f37034
HC
1555SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1556 int __user *, usockaddr_len)
1da177e4
LT
1557{
1558 struct socket *sock;
230b1839 1559 struct sockaddr_storage address;
6cb153ca 1560 int len, err, fput_needed;
89bddce5 1561
6cb153ca 1562 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1563 if (!sock)
1564 goto out;
1565
1566 err = security_socket_getsockname(sock);
1567 if (err)
1568 goto out_put;
1569
230b1839 1570 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1571 if (err)
1572 goto out_put;
43db362d 1573 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1574
1575out_put:
6cb153ca 1576 fput_light(sock->file, fput_needed);
1da177e4
LT
1577out:
1578 return err;
1579}
1580
1581/*
1582 * Get the remote address ('name') of a socket object. Move the obtained
1583 * name to user space.
1584 */
1585
20f37034
HC
1586SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1587 int __user *, usockaddr_len)
1da177e4
LT
1588{
1589 struct socket *sock;
230b1839 1590 struct sockaddr_storage address;
6cb153ca 1591 int len, err, fput_needed;
1da177e4 1592
89bddce5
SH
1593 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1594 if (sock != NULL) {
1da177e4
LT
1595 err = security_socket_getpeername(sock);
1596 if (err) {
6cb153ca 1597 fput_light(sock->file, fput_needed);
1da177e4
LT
1598 return err;
1599 }
1600
89bddce5 1601 err =
230b1839 1602 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1603 1);
1da177e4 1604 if (!err)
43db362d 1605 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1606 usockaddr_len);
6cb153ca 1607 fput_light(sock->file, fput_needed);
1da177e4
LT
1608 }
1609 return err;
1610}
1611
1612/*
1613 * Send a datagram to a given address. We move the address into kernel
1614 * space and check the user space data area is readable before invoking
1615 * the protocol.
1616 */
1617
3e0fa65f 1618SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1619 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1620 int, addr_len)
1da177e4
LT
1621{
1622 struct socket *sock;
230b1839 1623 struct sockaddr_storage address;
1da177e4
LT
1624 int err;
1625 struct msghdr msg;
1626 struct iovec iov;
6cb153ca 1627 int fput_needed;
6cb153ca 1628
602bd0e9
AV
1629 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1630 if (unlikely(err))
1631 return err;
de0fa95c
PE
1632 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1633 if (!sock)
4387ff75 1634 goto out;
6cb153ca 1635
89bddce5 1636 msg.msg_name = NULL;
89bddce5
SH
1637 msg.msg_control = NULL;
1638 msg.msg_controllen = 0;
1639 msg.msg_namelen = 0;
6cb153ca 1640 if (addr) {
43db362d 1641 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1642 if (err < 0)
1643 goto out_put;
230b1839 1644 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1645 msg.msg_namelen = addr_len;
1da177e4
LT
1646 }
1647 if (sock->file->f_flags & O_NONBLOCK)
1648 flags |= MSG_DONTWAIT;
1649 msg.msg_flags = flags;
d8725c86 1650 err = sock_sendmsg(sock, &msg);
1da177e4 1651
89bddce5 1652out_put:
de0fa95c 1653 fput_light(sock->file, fput_needed);
4387ff75 1654out:
1da177e4
LT
1655 return err;
1656}
1657
1658/*
89bddce5 1659 * Send a datagram down a socket.
1da177e4
LT
1660 */
1661
3e0fa65f 1662SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1663 unsigned int, flags)
1da177e4
LT
1664{
1665 return sys_sendto(fd, buff, len, flags, NULL, 0);
1666}
1667
1668/*
89bddce5 1669 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1670 * sender. We verify the buffers are writable and if needed move the
1671 * sender address from kernel to user space.
1672 */
1673
3e0fa65f 1674SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1675 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1676 int __user *, addr_len)
1da177e4
LT
1677{
1678 struct socket *sock;
1679 struct iovec iov;
1680 struct msghdr msg;
230b1839 1681 struct sockaddr_storage address;
89bddce5 1682 int err, err2;
6cb153ca
BL
1683 int fput_needed;
1684
602bd0e9
AV
1685 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1686 if (unlikely(err))
1687 return err;
de0fa95c 1688 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1689 if (!sock)
de0fa95c 1690 goto out;
1da177e4 1691
89bddce5
SH
1692 msg.msg_control = NULL;
1693 msg.msg_controllen = 0;
f3d33426
HFS
1694 /* Save some cycles and don't copy the address if not needed */
1695 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1696 /* We assume all kernel code knows the size of sockaddr_storage */
1697 msg.msg_namelen = 0;
130ed5d1 1698 msg.msg_iocb = NULL;
667d348a 1699 msg.msg_flags = 0;
1da177e4
LT
1700 if (sock->file->f_flags & O_NONBLOCK)
1701 flags |= MSG_DONTWAIT;
b9b7745a 1702 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1703
89bddce5 1704 if (err >= 0 && addr != NULL) {
43db362d 1705 err2 = move_addr_to_user(&address,
230b1839 1706 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1707 if (err2 < 0)
1708 err = err2;
1da177e4 1709 }
de0fa95c
PE
1710
1711 fput_light(sock->file, fput_needed);
4387ff75 1712out:
1da177e4
LT
1713 return err;
1714}
1715
1716/*
89bddce5 1717 * Receive a datagram from a socket.
1da177e4
LT
1718 */
1719
b7c0ddf5
JG
1720SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1721 unsigned int, flags)
1da177e4
LT
1722{
1723 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1724}
1725
1726/*
1727 * Set a socket option. Because we don't know the option lengths we have
1728 * to pass the user mode parameter for the protocols to sort out.
1729 */
1730
20f37034
HC
1731SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1732 char __user *, optval, int, optlen)
1da177e4 1733{
6cb153ca 1734 int err, fput_needed;
1da177e4
LT
1735 struct socket *sock;
1736
1737 if (optlen < 0)
1738 return -EINVAL;
89bddce5
SH
1739
1740 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1741 if (sock != NULL) {
1742 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1743 if (err)
1744 goto out_put;
1da177e4
LT
1745
1746 if (level == SOL_SOCKET)
89bddce5
SH
1747 err =
1748 sock_setsockopt(sock, level, optname, optval,
1749 optlen);
1da177e4 1750 else
89bddce5
SH
1751 err =
1752 sock->ops->setsockopt(sock, level, optname, optval,
1753 optlen);
6cb153ca
BL
1754out_put:
1755 fput_light(sock->file, fput_needed);
1da177e4
LT
1756 }
1757 return err;
1758}
1759
1760/*
1761 * Get a socket option. Because we don't know the option lengths we have
1762 * to pass a user mode parameter for the protocols to sort out.
1763 */
1764
20f37034
HC
1765SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1766 char __user *, optval, int __user *, optlen)
1da177e4 1767{
6cb153ca 1768 int err, fput_needed;
1da177e4
LT
1769 struct socket *sock;
1770
89bddce5
SH
1771 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1772 if (sock != NULL) {
6cb153ca
BL
1773 err = security_socket_getsockopt(sock, level, optname);
1774 if (err)
1775 goto out_put;
1da177e4
LT
1776
1777 if (level == SOL_SOCKET)
89bddce5
SH
1778 err =
1779 sock_getsockopt(sock, level, optname, optval,
1780 optlen);
1da177e4 1781 else
89bddce5
SH
1782 err =
1783 sock->ops->getsockopt(sock, level, optname, optval,
1784 optlen);
6cb153ca
BL
1785out_put:
1786 fput_light(sock->file, fput_needed);
1da177e4
LT
1787 }
1788 return err;
1789}
1790
1da177e4
LT
1791/*
1792 * Shutdown a socket.
1793 */
1794
754fe8d2 1795SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1796{
6cb153ca 1797 int err, fput_needed;
1da177e4
LT
1798 struct socket *sock;
1799
89bddce5
SH
1800 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1801 if (sock != NULL) {
1da177e4 1802 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1803 if (!err)
1804 err = sock->ops->shutdown(sock, how);
1805 fput_light(sock->file, fput_needed);
1da177e4
LT
1806 }
1807 return err;
1808}
1809
89bddce5 1810/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1811 * fields which are the same type (int / unsigned) on our platforms.
1812 */
1813#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1814#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1815#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1816
c71d8ebe
TH
1817struct used_address {
1818 struct sockaddr_storage name;
1819 unsigned int name_len;
1820};
1821
da184284
AV
1822static int copy_msghdr_from_user(struct msghdr *kmsg,
1823 struct user_msghdr __user *umsg,
1824 struct sockaddr __user **save_addr,
1825 struct iovec **iov)
1661bf36 1826{
08adb7da
AV
1827 struct sockaddr __user *uaddr;
1828 struct iovec __user *uiov;
c0371da6 1829 size_t nr_segs;
08adb7da
AV
1830 ssize_t err;
1831
1832 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1833 __get_user(uaddr, &umsg->msg_name) ||
1834 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1835 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1836 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1837 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1838 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1839 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1840 return -EFAULT;
dbb490b9 1841
08adb7da 1842 if (!uaddr)
6a2a2b3a
AS
1843 kmsg->msg_namelen = 0;
1844
dbb490b9
ML
1845 if (kmsg->msg_namelen < 0)
1846 return -EINVAL;
1847
1661bf36 1848 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1849 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1850
1851 if (save_addr)
1852 *save_addr = uaddr;
1853
1854 if (uaddr && kmsg->msg_namelen) {
1855 if (!save_addr) {
1856 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1857 kmsg->msg_name);
1858 if (err < 0)
1859 return err;
1860 }
1861 } else {
1862 kmsg->msg_name = NULL;
1863 kmsg->msg_namelen = 0;
1864 }
1865
c0371da6 1866 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1867 return -EMSGSIZE;
1868
0345f931 1869 kmsg->msg_iocb = NULL;
1870
da184284
AV
1871 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1872 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1873}
1874
666547ff 1875static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1876 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1877 struct used_address *used_address)
1da177e4 1878{
89bddce5
SH
1879 struct compat_msghdr __user *msg_compat =
1880 (struct compat_msghdr __user *)msg;
230b1839 1881 struct sockaddr_storage address;
1da177e4 1882 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1883 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1884 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1885 /* 20 is size of ipv6_pktinfo */
1da177e4 1886 unsigned char *ctl_buf = ctl;
d8725c86 1887 int ctl_len;
08adb7da 1888 ssize_t err;
89bddce5 1889
08adb7da 1890 msg_sys->msg_name = &address;
1da177e4 1891
08449320 1892 if (MSG_CMSG_COMPAT & flags)
08adb7da 1893 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1894 else
08adb7da 1895 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1896 if (err < 0)
da184284 1897 return err;
1da177e4
LT
1898
1899 err = -ENOBUFS;
1900
228e548e 1901 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1902 goto out_freeiov;
228e548e 1903 ctl_len = msg_sys->msg_controllen;
1da177e4 1904 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1905 err =
228e548e 1906 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1907 sizeof(ctl));
1da177e4
LT
1908 if (err)
1909 goto out_freeiov;
228e548e
AB
1910 ctl_buf = msg_sys->msg_control;
1911 ctl_len = msg_sys->msg_controllen;
1da177e4 1912 } else if (ctl_len) {
89bddce5 1913 if (ctl_len > sizeof(ctl)) {
1da177e4 1914 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1915 if (ctl_buf == NULL)
1da177e4
LT
1916 goto out_freeiov;
1917 }
1918 err = -EFAULT;
1919 /*
228e548e 1920 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1921 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1922 * checking falls down on this.
1923 */
fb8621bb 1924 if (copy_from_user(ctl_buf,
228e548e 1925 (void __user __force *)msg_sys->msg_control,
89bddce5 1926 ctl_len))
1da177e4 1927 goto out_freectl;
228e548e 1928 msg_sys->msg_control = ctl_buf;
1da177e4 1929 }
228e548e 1930 msg_sys->msg_flags = flags;
1da177e4
LT
1931
1932 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1933 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1934 /*
1935 * If this is sendmmsg() and current destination address is same as
1936 * previously succeeded address, omit asking LSM's decision.
1937 * used_address->name_len is initialized to UINT_MAX so that the first
1938 * destination address never matches.
1939 */
bc909d9d
MD
1940 if (used_address && msg_sys->msg_name &&
1941 used_address->name_len == msg_sys->msg_namelen &&
1942 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1943 used_address->name_len)) {
d8725c86 1944 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1945 goto out_freectl;
1946 }
d8725c86 1947 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1948 /*
1949 * If this is sendmmsg() and sending to current destination address was
1950 * successful, remember it.
1951 */
1952 if (used_address && err >= 0) {
1953 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1954 if (msg_sys->msg_name)
1955 memcpy(&used_address->name, msg_sys->msg_name,
1956 used_address->name_len);
c71d8ebe 1957 }
1da177e4
LT
1958
1959out_freectl:
89bddce5 1960 if (ctl_buf != ctl)
1da177e4
LT
1961 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1962out_freeiov:
da184284 1963 kfree(iov);
228e548e
AB
1964 return err;
1965}
1966
1967/*
1968 * BSD sendmsg interface
1969 */
1970
666547ff 1971long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1972{
1973 int fput_needed, err;
1974 struct msghdr msg_sys;
1be374a0
AL
1975 struct socket *sock;
1976
1be374a0 1977 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1978 if (!sock)
1979 goto out;
1980
a7526eb5 1981 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 1982
6cb153ca 1983 fput_light(sock->file, fput_needed);
89bddce5 1984out:
1da177e4
LT
1985 return err;
1986}
1987
666547ff 1988SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1989{
1990 if (flags & MSG_CMSG_COMPAT)
1991 return -EINVAL;
1992 return __sys_sendmsg(fd, msg, flags);
1993}
1994
228e548e
AB
1995/*
1996 * Linux sendmmsg interface
1997 */
1998
1999int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2000 unsigned int flags)
2001{
2002 int fput_needed, err, datagrams;
2003 struct socket *sock;
2004 struct mmsghdr __user *entry;
2005 struct compat_mmsghdr __user *compat_entry;
2006 struct msghdr msg_sys;
c71d8ebe 2007 struct used_address used_address;
228e548e 2008
98382f41
AB
2009 if (vlen > UIO_MAXIOV)
2010 vlen = UIO_MAXIOV;
228e548e
AB
2011
2012 datagrams = 0;
2013
2014 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2015 if (!sock)
2016 return err;
2017
c71d8ebe 2018 used_address.name_len = UINT_MAX;
228e548e
AB
2019 entry = mmsg;
2020 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2021 err = 0;
228e548e
AB
2022
2023 while (datagrams < vlen) {
228e548e 2024 if (MSG_CMSG_COMPAT & flags) {
666547ff 2025 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2026 &msg_sys, flags, &used_address);
228e548e
AB
2027 if (err < 0)
2028 break;
2029 err = __put_user(err, &compat_entry->msg_len);
2030 ++compat_entry;
2031 } else {
a7526eb5 2032 err = ___sys_sendmsg(sock,
666547ff 2033 (struct user_msghdr __user *)entry,
a7526eb5 2034 &msg_sys, flags, &used_address);
228e548e
AB
2035 if (err < 0)
2036 break;
2037 err = put_user(err, &entry->msg_len);
2038 ++entry;
2039 }
2040
2041 if (err)
2042 break;
2043 ++datagrams;
0f5d2bc2
SHY
2044 if (msg_data_left(&msg_sys))
2045 break;
228e548e
AB
2046 }
2047
228e548e
AB
2048 fput_light(sock->file, fput_needed);
2049
728ffb86
AB
2050 /* We only return an error if no datagrams were able to be sent */
2051 if (datagrams != 0)
228e548e
AB
2052 return datagrams;
2053
228e548e
AB
2054 return err;
2055}
2056
2057SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2058 unsigned int, vlen, unsigned int, flags)
2059{
1be374a0
AL
2060 if (flags & MSG_CMSG_COMPAT)
2061 return -EINVAL;
228e548e
AB
2062 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2063}
2064
666547ff 2065static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2066 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2067{
89bddce5
SH
2068 struct compat_msghdr __user *msg_compat =
2069 (struct compat_msghdr __user *)msg;
1da177e4 2070 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2071 struct iovec *iov = iovstack;
1da177e4 2072 unsigned long cmsg_ptr;
b9b7745a 2073 int len;
08adb7da 2074 ssize_t err;
1da177e4
LT
2075
2076 /* kernel mode address */
230b1839 2077 struct sockaddr_storage addr;
1da177e4
LT
2078
2079 /* user mode address pointers */
2080 struct sockaddr __user *uaddr;
08adb7da 2081 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2082
08adb7da 2083 msg_sys->msg_name = &addr;
1da177e4 2084
f3d33426 2085 if (MSG_CMSG_COMPAT & flags)
08adb7da 2086 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2087 else
08adb7da 2088 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2089 if (err < 0)
da184284 2090 return err;
1da177e4 2091
a2e27255
ACM
2092 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2093 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2094
f3d33426
HFS
2095 /* We assume all kernel code knows the size of sockaddr_storage */
2096 msg_sys->msg_namelen = 0;
2097
1da177e4
LT
2098 if (sock->file->f_flags & O_NONBLOCK)
2099 flags |= MSG_DONTWAIT;
b9b7745a 2100 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2101 if (err < 0)
2102 goto out_freeiov;
2103 len = err;
2104
2105 if (uaddr != NULL) {
43db362d 2106 err = move_addr_to_user(&addr,
a2e27255 2107 msg_sys->msg_namelen, uaddr,
89bddce5 2108 uaddr_len);
1da177e4
LT
2109 if (err < 0)
2110 goto out_freeiov;
2111 }
a2e27255 2112 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2113 COMPAT_FLAGS(msg));
1da177e4
LT
2114 if (err)
2115 goto out_freeiov;
2116 if (MSG_CMSG_COMPAT & flags)
a2e27255 2117 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2118 &msg_compat->msg_controllen);
2119 else
a2e27255 2120 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2121 &msg->msg_controllen);
2122 if (err)
2123 goto out_freeiov;
2124 err = len;
2125
2126out_freeiov:
da184284 2127 kfree(iov);
a2e27255
ACM
2128 return err;
2129}
2130
2131/*
2132 * BSD recvmsg interface
2133 */
2134
666547ff 2135long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2136{
2137 int fput_needed, err;
2138 struct msghdr msg_sys;
1be374a0
AL
2139 struct socket *sock;
2140
1be374a0 2141 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2142 if (!sock)
2143 goto out;
2144
a7526eb5 2145 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2146
6cb153ca 2147 fput_light(sock->file, fput_needed);
1da177e4
LT
2148out:
2149 return err;
2150}
2151
666547ff 2152SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2153 unsigned int, flags)
2154{
2155 if (flags & MSG_CMSG_COMPAT)
2156 return -EINVAL;
2157 return __sys_recvmsg(fd, msg, flags);
2158}
2159
a2e27255
ACM
2160/*
2161 * Linux recvmmsg interface
2162 */
2163
2164int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2165 unsigned int flags, struct timespec *timeout)
2166{
2167 int fput_needed, err, datagrams;
2168 struct socket *sock;
2169 struct mmsghdr __user *entry;
d7256d0e 2170 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2171 struct msghdr msg_sys;
2172 struct timespec end_time;
2173
2174 if (timeout &&
2175 poll_select_set_timeout(&end_time, timeout->tv_sec,
2176 timeout->tv_nsec))
2177 return -EINVAL;
2178
2179 datagrams = 0;
2180
2181 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2182 if (!sock)
2183 return err;
2184
2185 err = sock_error(sock->sk);
ef7f0a2e
MJ
2186 if (err) {
2187 datagrams = err;
a2e27255 2188 goto out_put;
ef7f0a2e 2189 }
a2e27255
ACM
2190
2191 entry = mmsg;
d7256d0e 2192 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2193
2194 while (datagrams < vlen) {
2195 /*
2196 * No need to ask LSM for more than the first datagram.
2197 */
d7256d0e 2198 if (MSG_CMSG_COMPAT & flags) {
666547ff 2199 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2200 &msg_sys, flags & ~MSG_WAITFORONE,
2201 datagrams);
d7256d0e
JMG
2202 if (err < 0)
2203 break;
2204 err = __put_user(err, &compat_entry->msg_len);
2205 ++compat_entry;
2206 } else {
a7526eb5 2207 err = ___sys_recvmsg(sock,
666547ff 2208 (struct user_msghdr __user *)entry,
a7526eb5
AL
2209 &msg_sys, flags & ~MSG_WAITFORONE,
2210 datagrams);
d7256d0e
JMG
2211 if (err < 0)
2212 break;
2213 err = put_user(err, &entry->msg_len);
2214 ++entry;
2215 }
2216
a2e27255
ACM
2217 if (err)
2218 break;
a2e27255
ACM
2219 ++datagrams;
2220
71c5c159
BB
2221 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2222 if (flags & MSG_WAITFORONE)
2223 flags |= MSG_DONTWAIT;
2224
a2e27255
ACM
2225 if (timeout) {
2226 ktime_get_ts(timeout);
2227 *timeout = timespec_sub(end_time, *timeout);
2228 if (timeout->tv_sec < 0) {
2229 timeout->tv_sec = timeout->tv_nsec = 0;
2230 break;
2231 }
2232
2233 /* Timeout, return less than vlen datagrams */
2234 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2235 break;
2236 }
2237
2238 /* Out of band data, return right away */
2239 if (msg_sys.msg_flags & MSG_OOB)
2240 break;
2241 }
2242
a2e27255 2243 if (err == 0)
9cae0234
ACM
2244 goto out_put;
2245
2246 if (datagrams == 0) {
2247 datagrams = err;
2248 goto out_put;
2249 }
a2e27255 2250
9cae0234
ACM
2251 /*
2252 * We may return less entries than requested (vlen) if the
2253 * sock is non block and there aren't enough datagrams...
2254 */
2255 if (err != -EAGAIN) {
a2e27255 2256 /*
9cae0234
ACM
2257 * ... or if recvmsg returns an error after we
2258 * received some datagrams, where we record the
2259 * error to return on the next call or if the
2260 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2261 */
9cae0234 2262 sock->sk->sk_err = -err;
a2e27255 2263 }
9cae0234
ACM
2264out_put:
2265 fput_light(sock->file, fput_needed);
a2e27255 2266
9cae0234 2267 return datagrams;
a2e27255
ACM
2268}
2269
2270SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2271 unsigned int, vlen, unsigned int, flags,
2272 struct timespec __user *, timeout)
2273{
2274 int datagrams;
2275 struct timespec timeout_sys;
2276
1be374a0
AL
2277 if (flags & MSG_CMSG_COMPAT)
2278 return -EINVAL;
2279
a2e27255
ACM
2280 if (!timeout)
2281 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2282
2283 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2284 return -EFAULT;
2285
2286 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2287
2288 if (datagrams > 0 &&
2289 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2290 datagrams = -EFAULT;
2291
2292 return datagrams;
2293}
2294
2295#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2296/* Argument list sizes for sys_socketcall */
2297#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2298static const unsigned char nargs[21] = {
c6d409cf
ED
2299 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2300 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2301 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2302 AL(4), AL(5), AL(4)
89bddce5
SH
2303};
2304
1da177e4
LT
2305#undef AL
2306
2307/*
89bddce5 2308 * System call vectors.
1da177e4
LT
2309 *
2310 * Argument checking cleaned up. Saved 20% in size.
2311 * This function doesn't need to set the kernel lock because
89bddce5 2312 * it is set by the callees.
1da177e4
LT
2313 */
2314
3e0fa65f 2315SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2316{
2950fa9d 2317 unsigned long a[AUDITSC_ARGS];
89bddce5 2318 unsigned long a0, a1;
1da177e4 2319 int err;
47379052 2320 unsigned int len;
1da177e4 2321
228e548e 2322 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2323 return -EINVAL;
2324
47379052
AV
2325 len = nargs[call];
2326 if (len > sizeof(a))
2327 return -EINVAL;
2328
1da177e4 2329 /* copy_from_user should be SMP safe. */
47379052 2330 if (copy_from_user(a, args, len))
1da177e4 2331 return -EFAULT;
3ec3b2fb 2332
2950fa9d
CG
2333 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2334 if (err)
2335 return err;
3ec3b2fb 2336
89bddce5
SH
2337 a0 = a[0];
2338 a1 = a[1];
2339
2340 switch (call) {
2341 case SYS_SOCKET:
2342 err = sys_socket(a0, a1, a[2]);
2343 break;
2344 case SYS_BIND:
2345 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2346 break;
2347 case SYS_CONNECT:
2348 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2349 break;
2350 case SYS_LISTEN:
2351 err = sys_listen(a0, a1);
2352 break;
2353 case SYS_ACCEPT:
de11defe
UD
2354 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2355 (int __user *)a[2], 0);
89bddce5
SH
2356 break;
2357 case SYS_GETSOCKNAME:
2358 err =
2359 sys_getsockname(a0, (struct sockaddr __user *)a1,
2360 (int __user *)a[2]);
2361 break;
2362 case SYS_GETPEERNAME:
2363 err =
2364 sys_getpeername(a0, (struct sockaddr __user *)a1,
2365 (int __user *)a[2]);
2366 break;
2367 case SYS_SOCKETPAIR:
2368 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2369 break;
2370 case SYS_SEND:
2371 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2372 break;
2373 case SYS_SENDTO:
2374 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2375 (struct sockaddr __user *)a[4], a[5]);
2376 break;
2377 case SYS_RECV:
2378 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2379 break;
2380 case SYS_RECVFROM:
2381 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2382 (struct sockaddr __user *)a[4],
2383 (int __user *)a[5]);
2384 break;
2385 case SYS_SHUTDOWN:
2386 err = sys_shutdown(a0, a1);
2387 break;
2388 case SYS_SETSOCKOPT:
2389 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2390 break;
2391 case SYS_GETSOCKOPT:
2392 err =
2393 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2394 (int __user *)a[4]);
2395 break;
2396 case SYS_SENDMSG:
666547ff 2397 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2398 break;
228e548e
AB
2399 case SYS_SENDMMSG:
2400 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2401 break;
89bddce5 2402 case SYS_RECVMSG:
666547ff 2403 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2404 break;
a2e27255
ACM
2405 case SYS_RECVMMSG:
2406 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2407 (struct timespec __user *)a[4]);
2408 break;
de11defe
UD
2409 case SYS_ACCEPT4:
2410 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2411 (int __user *)a[2], a[3]);
aaca0bdc 2412 break;
89bddce5
SH
2413 default:
2414 err = -EINVAL;
2415 break;
1da177e4
LT
2416 }
2417 return err;
2418}
2419
89bddce5 2420#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2421
55737fda
SH
2422/**
2423 * sock_register - add a socket protocol handler
2424 * @ops: description of protocol
2425 *
1da177e4
LT
2426 * This function is called by a protocol handler that wants to
2427 * advertise its address family, and have it linked into the
e793c0f7 2428 * socket interface. The value ops->family corresponds to the
55737fda 2429 * socket system call protocol family.
1da177e4 2430 */
f0fd27d4 2431int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2432{
2433 int err;
2434
2435 if (ops->family >= NPROTO) {
3410f22e 2436 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2437 return -ENOBUFS;
2438 }
55737fda
SH
2439
2440 spin_lock(&net_family_lock);
190683a9
ED
2441 if (rcu_dereference_protected(net_families[ops->family],
2442 lockdep_is_held(&net_family_lock)))
55737fda
SH
2443 err = -EEXIST;
2444 else {
cf778b00 2445 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2446 err = 0;
2447 }
55737fda
SH
2448 spin_unlock(&net_family_lock);
2449
3410f22e 2450 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2451 return err;
2452}
c6d409cf 2453EXPORT_SYMBOL(sock_register);
1da177e4 2454
55737fda
SH
2455/**
2456 * sock_unregister - remove a protocol handler
2457 * @family: protocol family to remove
2458 *
1da177e4
LT
2459 * This function is called by a protocol handler that wants to
2460 * remove its address family, and have it unlinked from the
55737fda
SH
2461 * new socket creation.
2462 *
2463 * If protocol handler is a module, then it can use module reference
2464 * counts to protect against new references. If protocol handler is not
2465 * a module then it needs to provide its own protection in
2466 * the ops->create routine.
1da177e4 2467 */
f0fd27d4 2468void sock_unregister(int family)
1da177e4 2469{
f0fd27d4 2470 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2471
55737fda 2472 spin_lock(&net_family_lock);
a9b3cd7f 2473 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2474 spin_unlock(&net_family_lock);
2475
2476 synchronize_rcu();
2477
3410f22e 2478 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2479}
c6d409cf 2480EXPORT_SYMBOL(sock_unregister);
1da177e4 2481
77d76ea3 2482static int __init sock_init(void)
1da177e4 2483{
b3e19d92 2484 int err;
2ca794e5
EB
2485 /*
2486 * Initialize the network sysctl infrastructure.
2487 */
2488 err = net_sysctl_init();
2489 if (err)
2490 goto out;
b3e19d92 2491
1da177e4 2492 /*
89bddce5 2493 * Initialize skbuff SLAB cache
1da177e4
LT
2494 */
2495 skb_init();
1da177e4
LT
2496
2497 /*
89bddce5 2498 * Initialize the protocols module.
1da177e4
LT
2499 */
2500
2501 init_inodecache();
b3e19d92
NP
2502
2503 err = register_filesystem(&sock_fs_type);
2504 if (err)
2505 goto out_fs;
1da177e4 2506 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2507 if (IS_ERR(sock_mnt)) {
2508 err = PTR_ERR(sock_mnt);
2509 goto out_mount;
2510 }
77d76ea3
AK
2511
2512 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2513 */
2514
2515#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2516 err = netfilter_init();
2517 if (err)
2518 goto out;
1da177e4 2519#endif
cbeb321a 2520
408eccce 2521 ptp_classifier_init();
c1f19b51 2522
b3e19d92
NP
2523out:
2524 return err;
2525
2526out_mount:
2527 unregister_filesystem(&sock_fs_type);
2528out_fs:
2529 goto out;
1da177e4
LT
2530}
2531
77d76ea3
AK
2532core_initcall(sock_init); /* early initcall */
2533
79e83d1a
AS
2534static int __init jit_init(void)
2535{
2536#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2537 bpf_jit_enable = 1;
2538#endif
2539 return 0;
2540}
2541pure_initcall(jit_init);
2542
1da177e4
LT
2543#ifdef CONFIG_PROC_FS
2544void socket_seq_show(struct seq_file *seq)
2545{
2546 int cpu;
2547 int counter = 0;
2548
6f912042 2549 for_each_possible_cpu(cpu)
89bddce5 2550 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2551
2552 /* It can be negative, by the way. 8) */
2553 if (counter < 0)
2554 counter = 0;
2555
2556 seq_printf(seq, "sockets: used %d\n", counter);
2557}
89bddce5 2558#endif /* CONFIG_PROC_FS */
1da177e4 2559
89bbfc95 2560#ifdef CONFIG_COMPAT
6b96018b 2561static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2562 unsigned int cmd, void __user *up)
7a229387 2563{
7a229387
AB
2564 mm_segment_t old_fs = get_fs();
2565 struct timeval ktv;
2566 int err;
2567
2568 set_fs(KERNEL_DS);
6b96018b 2569 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2570 set_fs(old_fs);
644595f8 2571 if (!err)
ed6fe9d6 2572 err = compat_put_timeval(&ktv, up);
644595f8 2573
7a229387
AB
2574 return err;
2575}
2576
6b96018b 2577static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2578 unsigned int cmd, void __user *up)
7a229387 2579{
7a229387
AB
2580 mm_segment_t old_fs = get_fs();
2581 struct timespec kts;
2582 int err;
2583
2584 set_fs(KERNEL_DS);
6b96018b 2585 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2586 set_fs(old_fs);
644595f8 2587 if (!err)
ed6fe9d6 2588 err = compat_put_timespec(&kts, up);
644595f8 2589
7a229387
AB
2590 return err;
2591}
2592
6b96018b 2593static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2594{
2595 struct ifreq __user *uifr;
2596 int err;
2597
2598 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2599 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2600 return -EFAULT;
2601
6b96018b 2602 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2603 if (err)
2604 return err;
2605
6b96018b 2606 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2607 return -EFAULT;
2608
2609 return 0;
2610}
2611
6b96018b 2612static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2613{
6b96018b 2614 struct compat_ifconf ifc32;
7a229387
AB
2615 struct ifconf ifc;
2616 struct ifconf __user *uifc;
6b96018b 2617 struct compat_ifreq __user *ifr32;
7a229387
AB
2618 struct ifreq __user *ifr;
2619 unsigned int i, j;
2620 int err;
2621
6b96018b 2622 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2623 return -EFAULT;
2624
43da5f2e 2625 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2626 if (ifc32.ifcbuf == 0) {
2627 ifc32.ifc_len = 0;
2628 ifc.ifc_len = 0;
2629 ifc.ifc_req = NULL;
2630 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2631 } else {
c6d409cf
ED
2632 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2633 sizeof(struct ifreq);
7a229387
AB
2634 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2635 ifc.ifc_len = len;
2636 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2637 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2638 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2639 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2640 return -EFAULT;
2641 ifr++;
2642 ifr32++;
2643 }
2644 }
2645 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2646 return -EFAULT;
2647
6b96018b 2648 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2649 if (err)
2650 return err;
2651
2652 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2653 return -EFAULT;
2654
2655 ifr = ifc.ifc_req;
2656 ifr32 = compat_ptr(ifc32.ifcbuf);
2657 for (i = 0, j = 0;
c6d409cf
ED
2658 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2659 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2660 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2661 return -EFAULT;
2662 ifr32++;
2663 ifr++;
2664 }
2665
2666 if (ifc32.ifcbuf == 0) {
2667 /* Translate from 64-bit structure multiple to
2668 * a 32-bit one.
2669 */
2670 i = ifc.ifc_len;
6b96018b 2671 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2672 ifc32.ifc_len = i;
2673 } else {
2674 ifc32.ifc_len = i;
2675 }
6b96018b 2676 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2677 return -EFAULT;
2678
2679 return 0;
2680}
2681
6b96018b 2682static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2683{
3a7da39d
BH
2684 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2685 bool convert_in = false, convert_out = false;
2686 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2687 struct ethtool_rxnfc __user *rxnfc;
7a229387 2688 struct ifreq __user *ifr;
3a7da39d
BH
2689 u32 rule_cnt = 0, actual_rule_cnt;
2690 u32 ethcmd;
7a229387 2691 u32 data;
3a7da39d 2692 int ret;
7a229387 2693
3a7da39d
BH
2694 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2695 return -EFAULT;
7a229387 2696
3a7da39d
BH
2697 compat_rxnfc = compat_ptr(data);
2698
2699 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2700 return -EFAULT;
2701
3a7da39d
BH
2702 /* Most ethtool structures are defined without padding.
2703 * Unfortunately struct ethtool_rxnfc is an exception.
2704 */
2705 switch (ethcmd) {
2706 default:
2707 break;
2708 case ETHTOOL_GRXCLSRLALL:
2709 /* Buffer size is variable */
2710 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2711 return -EFAULT;
2712 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2713 return -ENOMEM;
2714 buf_size += rule_cnt * sizeof(u32);
2715 /* fall through */
2716 case ETHTOOL_GRXRINGS:
2717 case ETHTOOL_GRXCLSRLCNT:
2718 case ETHTOOL_GRXCLSRULE:
55664f32 2719 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2720 convert_out = true;
2721 /* fall through */
2722 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2723 buf_size += sizeof(struct ethtool_rxnfc);
2724 convert_in = true;
2725 break;
2726 }
2727
2728 ifr = compat_alloc_user_space(buf_size);
954b1244 2729 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2730
2731 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2732 return -EFAULT;
2733
3a7da39d
BH
2734 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2735 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2736 return -EFAULT;
2737
3a7da39d 2738 if (convert_in) {
127fe533 2739 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2740 * fs.ring_cookie and at the end of fs, but nowhere else.
2741 */
127fe533
AD
2742 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2743 sizeof(compat_rxnfc->fs.m_ext) !=
2744 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2745 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2746 BUILD_BUG_ON(
2747 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2748 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2749 offsetof(struct ethtool_rxnfc, fs.location) -
2750 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2751
2752 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2753 (void __user *)(&rxnfc->fs.m_ext + 1) -
2754 (void __user *)rxnfc) ||
3a7da39d
BH
2755 copy_in_user(&rxnfc->fs.ring_cookie,
2756 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2757 (void __user *)(&rxnfc->fs.location + 1) -
2758 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2759 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2760 sizeof(rxnfc->rule_cnt)))
2761 return -EFAULT;
2762 }
2763
2764 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2765 if (ret)
2766 return ret;
2767
2768 if (convert_out) {
2769 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2770 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2771 (const void __user *)rxnfc) ||
3a7da39d
BH
2772 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2773 &rxnfc->fs.ring_cookie,
954b1244
SH
2774 (const void __user *)(&rxnfc->fs.location + 1) -
2775 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2776 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2777 sizeof(rxnfc->rule_cnt)))
2778 return -EFAULT;
2779
2780 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2781 /* As an optimisation, we only copy the actual
2782 * number of rules that the underlying
2783 * function returned. Since Mallory might
2784 * change the rule count in user memory, we
2785 * check that it is less than the rule count
2786 * originally given (as the user buffer size),
2787 * which has been range-checked.
2788 */
2789 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2790 return -EFAULT;
2791 if (actual_rule_cnt < rule_cnt)
2792 rule_cnt = actual_rule_cnt;
2793 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2794 &rxnfc->rule_locs[0],
2795 rule_cnt * sizeof(u32)))
2796 return -EFAULT;
2797 }
2798 }
2799
2800 return 0;
7a229387
AB
2801}
2802
7a50a240
AB
2803static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2804{
2805 void __user *uptr;
2806 compat_uptr_t uptr32;
2807 struct ifreq __user *uifr;
2808
c6d409cf 2809 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2810 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2811 return -EFAULT;
2812
2813 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2814 return -EFAULT;
2815
2816 uptr = compat_ptr(uptr32);
2817
2818 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2819 return -EFAULT;
2820
2821 return dev_ioctl(net, SIOCWANDEV, uifr);
2822}
2823
6b96018b
AB
2824static int bond_ioctl(struct net *net, unsigned int cmd,
2825 struct compat_ifreq __user *ifr32)
7a229387
AB
2826{
2827 struct ifreq kifr;
7a229387
AB
2828 mm_segment_t old_fs;
2829 int err;
7a229387
AB
2830
2831 switch (cmd) {
2832 case SIOCBONDENSLAVE:
2833 case SIOCBONDRELEASE:
2834 case SIOCBONDSETHWADDR:
2835 case SIOCBONDCHANGEACTIVE:
6b96018b 2836 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2837 return -EFAULT;
2838
2839 old_fs = get_fs();
c6d409cf 2840 set_fs(KERNEL_DS);
c3f52ae6 2841 err = dev_ioctl(net, cmd,
2842 (struct ifreq __user __force *) &kifr);
c6d409cf 2843 set_fs(old_fs);
7a229387
AB
2844
2845 return err;
7a229387 2846 default:
07d106d0 2847 return -ENOIOCTLCMD;
ccbd6a5a 2848 }
7a229387
AB
2849}
2850
590d4693
BH
2851/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2852static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2853 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2854{
2855 struct ifreq __user *u_ifreq64;
7a229387
AB
2856 char tmp_buf[IFNAMSIZ];
2857 void __user *data64;
2858 u32 data32;
2859
2860 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2861 IFNAMSIZ))
2862 return -EFAULT;
417c3522 2863 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2864 return -EFAULT;
2865 data64 = compat_ptr(data32);
2866
2867 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2868
7a229387
AB
2869 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2870 IFNAMSIZ))
2871 return -EFAULT;
417c3522 2872 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2873 return -EFAULT;
2874
6b96018b 2875 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2876}
2877
6b96018b
AB
2878static int dev_ifsioc(struct net *net, struct socket *sock,
2879 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2880{
a2116ed2 2881 struct ifreq __user *uifr;
7a229387
AB
2882 int err;
2883
a2116ed2
AB
2884 uifr = compat_alloc_user_space(sizeof(*uifr));
2885 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2886 return -EFAULT;
2887
2888 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2889
7a229387
AB
2890 if (!err) {
2891 switch (cmd) {
2892 case SIOCGIFFLAGS:
2893 case SIOCGIFMETRIC:
2894 case SIOCGIFMTU:
2895 case SIOCGIFMEM:
2896 case SIOCGIFHWADDR:
2897 case SIOCGIFINDEX:
2898 case SIOCGIFADDR:
2899 case SIOCGIFBRDADDR:
2900 case SIOCGIFDSTADDR:
2901 case SIOCGIFNETMASK:
fab2532b 2902 case SIOCGIFPFLAGS:
7a229387 2903 case SIOCGIFTXQLEN:
fab2532b
AB
2904 case SIOCGMIIPHY:
2905 case SIOCGMIIREG:
a2116ed2 2906 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2907 err = -EFAULT;
2908 break;
2909 }
2910 }
2911 return err;
2912}
2913
a2116ed2
AB
2914static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2915 struct compat_ifreq __user *uifr32)
2916{
2917 struct ifreq ifr;
2918 struct compat_ifmap __user *uifmap32;
2919 mm_segment_t old_fs;
2920 int err;
2921
2922 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2923 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2924 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2925 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2926 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2927 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2928 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2929 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2930 if (err)
2931 return -EFAULT;
2932
2933 old_fs = get_fs();
c6d409cf 2934 set_fs(KERNEL_DS);
c3f52ae6 2935 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2936 set_fs(old_fs);
a2116ed2
AB
2937
2938 if (cmd == SIOCGIFMAP && !err) {
2939 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2940 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2941 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2942 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2943 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2944 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2945 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2946 if (err)
2947 err = -EFAULT;
2948 }
2949 return err;
2950}
2951
7a229387 2952struct rtentry32 {
c6d409cf 2953 u32 rt_pad1;
7a229387
AB
2954 struct sockaddr rt_dst; /* target address */
2955 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2956 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2957 unsigned short rt_flags;
2958 short rt_pad2;
2959 u32 rt_pad3;
2960 unsigned char rt_tos;
2961 unsigned char rt_class;
2962 short rt_pad4;
2963 short rt_metric; /* +1 for binary compatibility! */
7a229387 2964 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2965 u32 rt_mtu; /* per route MTU/Window */
2966 u32 rt_window; /* Window clamping */
7a229387
AB
2967 unsigned short rt_irtt; /* Initial RTT */
2968};
2969
2970struct in6_rtmsg32 {
2971 struct in6_addr rtmsg_dst;
2972 struct in6_addr rtmsg_src;
2973 struct in6_addr rtmsg_gateway;
2974 u32 rtmsg_type;
2975 u16 rtmsg_dst_len;
2976 u16 rtmsg_src_len;
2977 u32 rtmsg_metric;
2978 u32 rtmsg_info;
2979 u32 rtmsg_flags;
2980 s32 rtmsg_ifindex;
2981};
2982
6b96018b
AB
2983static int routing_ioctl(struct net *net, struct socket *sock,
2984 unsigned int cmd, void __user *argp)
7a229387
AB
2985{
2986 int ret;
2987 void *r = NULL;
2988 struct in6_rtmsg r6;
2989 struct rtentry r4;
2990 char devname[16];
2991 u32 rtdev;
2992 mm_segment_t old_fs = get_fs();
2993
6b96018b
AB
2994 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2995 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2996 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2997 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2998 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2999 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3000 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3001 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3002 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3003 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3004 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3005
3006 r = (void *) &r6;
3007 } else { /* ipv4 */
6b96018b 3008 struct rtentry32 __user *ur4 = argp;
c6d409cf 3009 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3010 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3011 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3012 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3013 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3014 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3015 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3016 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3017 if (rtdev) {
c6d409cf 3018 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3019 r4.rt_dev = (char __user __force *)devname;
3020 devname[15] = 0;
7a229387
AB
3021 } else
3022 r4.rt_dev = NULL;
3023
3024 r = (void *) &r4;
3025 }
3026
3027 if (ret) {
3028 ret = -EFAULT;
3029 goto out;
3030 }
3031
c6d409cf 3032 set_fs(KERNEL_DS);
6b96018b 3033 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3034 set_fs(old_fs);
7a229387
AB
3035
3036out:
7a229387
AB
3037 return ret;
3038}
3039
3040/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3041 * for some operations; this forces use of the newer bridge-utils that
25985edc 3042 * use compatible ioctls
7a229387 3043 */
6b96018b 3044static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3045{
6b96018b 3046 compat_ulong_t tmp;
7a229387 3047
6b96018b 3048 if (get_user(tmp, argp))
7a229387
AB
3049 return -EFAULT;
3050 if (tmp == BRCTL_GET_VERSION)
3051 return BRCTL_VERSION + 1;
3052 return -EINVAL;
3053}
3054
6b96018b
AB
3055static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3056 unsigned int cmd, unsigned long arg)
3057{
3058 void __user *argp = compat_ptr(arg);
3059 struct sock *sk = sock->sk;
3060 struct net *net = sock_net(sk);
7a229387 3061
6b96018b 3062 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3063 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3064
3065 switch (cmd) {
3066 case SIOCSIFBR:
3067 case SIOCGIFBR:
3068 return old_bridge_ioctl(argp);
3069 case SIOCGIFNAME:
3070 return dev_ifname32(net, argp);
3071 case SIOCGIFCONF:
3072 return dev_ifconf(net, argp);
3073 case SIOCETHTOOL:
3074 return ethtool_ioctl(net, argp);
7a50a240
AB
3075 case SIOCWANDEV:
3076 return compat_siocwandev(net, argp);
a2116ed2
AB
3077 case SIOCGIFMAP:
3078 case SIOCSIFMAP:
3079 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3080 case SIOCBONDENSLAVE:
3081 case SIOCBONDRELEASE:
3082 case SIOCBONDSETHWADDR:
6b96018b
AB
3083 case SIOCBONDCHANGEACTIVE:
3084 return bond_ioctl(net, cmd, argp);
3085 case SIOCADDRT:
3086 case SIOCDELRT:
3087 return routing_ioctl(net, sock, cmd, argp);
3088 case SIOCGSTAMP:
3089 return do_siocgstamp(net, sock, cmd, argp);
3090 case SIOCGSTAMPNS:
3091 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3092 case SIOCBONDSLAVEINFOQUERY:
3093 case SIOCBONDINFOQUERY:
a2116ed2 3094 case SIOCSHWTSTAMP:
fd468c74 3095 case SIOCGHWTSTAMP:
590d4693 3096 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3097
3098 case FIOSETOWN:
3099 case SIOCSPGRP:
3100 case FIOGETOWN:
3101 case SIOCGPGRP:
3102 case SIOCBRADDBR:
3103 case SIOCBRDELBR:
3104 case SIOCGIFVLAN:
3105 case SIOCSIFVLAN:
3106 case SIOCADDDLCI:
3107 case SIOCDELDLCI:
3108 return sock_ioctl(file, cmd, arg);
3109
3110 case SIOCGIFFLAGS:
3111 case SIOCSIFFLAGS:
3112 case SIOCGIFMETRIC:
3113 case SIOCSIFMETRIC:
3114 case SIOCGIFMTU:
3115 case SIOCSIFMTU:
3116 case SIOCGIFMEM:
3117 case SIOCSIFMEM:
3118 case SIOCGIFHWADDR:
3119 case SIOCSIFHWADDR:
3120 case SIOCADDMULTI:
3121 case SIOCDELMULTI:
3122 case SIOCGIFINDEX:
6b96018b
AB
3123 case SIOCGIFADDR:
3124 case SIOCSIFADDR:
3125 case SIOCSIFHWBROADCAST:
6b96018b 3126 case SIOCDIFADDR:
6b96018b
AB
3127 case SIOCGIFBRDADDR:
3128 case SIOCSIFBRDADDR:
3129 case SIOCGIFDSTADDR:
3130 case SIOCSIFDSTADDR:
3131 case SIOCGIFNETMASK:
3132 case SIOCSIFNETMASK:
3133 case SIOCSIFPFLAGS:
3134 case SIOCGIFPFLAGS:
3135 case SIOCGIFTXQLEN:
3136 case SIOCSIFTXQLEN:
3137 case SIOCBRADDIF:
3138 case SIOCBRDELIF:
9177efd3
AB
3139 case SIOCSIFNAME:
3140 case SIOCGMIIPHY:
3141 case SIOCGMIIREG:
3142 case SIOCSMIIREG:
6b96018b 3143 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3144
6b96018b
AB
3145 case SIOCSARP:
3146 case SIOCGARP:
3147 case SIOCDARP:
6b96018b 3148 case SIOCATMARK:
9177efd3
AB
3149 return sock_do_ioctl(net, sock, cmd, arg);
3150 }
3151
6b96018b
AB
3152 return -ENOIOCTLCMD;
3153}
7a229387 3154
95c96174 3155static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3156 unsigned long arg)
89bbfc95
SP
3157{
3158 struct socket *sock = file->private_data;
3159 int ret = -ENOIOCTLCMD;
87de87d5
DM
3160 struct sock *sk;
3161 struct net *net;
3162
3163 sk = sock->sk;
3164 net = sock_net(sk);
89bbfc95
SP
3165
3166 if (sock->ops->compat_ioctl)
3167 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3168
87de87d5
DM
3169 if (ret == -ENOIOCTLCMD &&
3170 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3171 ret = compat_wext_handle_ioctl(net, cmd, arg);
3172
6b96018b
AB
3173 if (ret == -ENOIOCTLCMD)
3174 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3175
89bbfc95
SP
3176 return ret;
3177}
3178#endif
3179
ac5a488e
SS
3180int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3181{
3182 return sock->ops->bind(sock, addr, addrlen);
3183}
c6d409cf 3184EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3185
3186int kernel_listen(struct socket *sock, int backlog)
3187{
3188 return sock->ops->listen(sock, backlog);
3189}
c6d409cf 3190EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3191
3192int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3193{
3194 struct sock *sk = sock->sk;
3195 int err;
3196
3197 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3198 newsock);
3199 if (err < 0)
3200 goto done;
3201
3202 err = sock->ops->accept(sock, *newsock, flags);
3203 if (err < 0) {
3204 sock_release(*newsock);
fa8705b0 3205 *newsock = NULL;
ac5a488e
SS
3206 goto done;
3207 }
3208
3209 (*newsock)->ops = sock->ops;
1b08534e 3210 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3211
3212done:
3213 return err;
3214}
c6d409cf 3215EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3216
3217int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3218 int flags)
ac5a488e
SS
3219{
3220 return sock->ops->connect(sock, addr, addrlen, flags);
3221}
c6d409cf 3222EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3223
3224int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3225 int *addrlen)
3226{
3227 return sock->ops->getname(sock, addr, addrlen, 0);
3228}
c6d409cf 3229EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3230
3231int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3232 int *addrlen)
3233{
3234 return sock->ops->getname(sock, addr, addrlen, 1);
3235}
c6d409cf 3236EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3237
3238int kernel_getsockopt(struct socket *sock, int level, int optname,
3239 char *optval, int *optlen)
3240{
3241 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3242 char __user *uoptval;
3243 int __user *uoptlen;
ac5a488e
SS
3244 int err;
3245
fb8621bb
NK
3246 uoptval = (char __user __force *) optval;
3247 uoptlen = (int __user __force *) optlen;
3248
ac5a488e
SS
3249 set_fs(KERNEL_DS);
3250 if (level == SOL_SOCKET)
fb8621bb 3251 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3252 else
fb8621bb
NK
3253 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3254 uoptlen);
ac5a488e
SS
3255 set_fs(oldfs);
3256 return err;
3257}
c6d409cf 3258EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3259
3260int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3261 char *optval, unsigned int optlen)
ac5a488e
SS
3262{
3263 mm_segment_t oldfs = get_fs();
fb8621bb 3264 char __user *uoptval;
ac5a488e
SS
3265 int err;
3266
fb8621bb
NK
3267 uoptval = (char __user __force *) optval;
3268
ac5a488e
SS
3269 set_fs(KERNEL_DS);
3270 if (level == SOL_SOCKET)
fb8621bb 3271 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3272 else
fb8621bb 3273 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3274 optlen);
3275 set_fs(oldfs);
3276 return err;
3277}
c6d409cf 3278EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3279
3280int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3281 size_t size, int flags)
3282{
3283 if (sock->ops->sendpage)
3284 return sock->ops->sendpage(sock, page, offset, size, flags);
3285
3286 return sock_no_sendpage(sock, page, offset, size, flags);
3287}
c6d409cf 3288EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3289
3290int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3291{
3292 mm_segment_t oldfs = get_fs();
3293 int err;
3294
3295 set_fs(KERNEL_DS);
3296 err = sock->ops->ioctl(sock, cmd, arg);
3297 set_fs(oldfs);
3298
3299 return err;
3300}
c6d409cf 3301EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3302
91cf45f0
TM
3303int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3304{
3305 return sock->ops->shutdown(sock, how);
3306}
91cf45f0 3307EXPORT_SYMBOL(kernel_sock_shutdown);