]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
Merge branch 'for-4.3/upstream' into for-4.3/wacom
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
260 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 261
1da177e4
LT
262 ei->socket.state = SS_UNCONNECTED;
263 ei->socket.flags = 0;
264 ei->socket.ops = NULL;
265 ei->socket.sk = NULL;
266 ei->socket.file = NULL;
1da177e4
LT
267
268 return &ei->vfs_inode;
269}
270
271static void sock_destroy_inode(struct inode *inode)
272{
43815482 273 struct socket_alloc *ei;
eaefd110 274 struct socket_wq *wq;
43815482
ED
275
276 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 277 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 278 kfree_rcu(wq, rcu);
43815482 279 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
280}
281
51cc5068 282static void init_once(void *foo)
1da177e4 283{
89bddce5 284 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 285
a35afb83 286 inode_init_once(&ei->vfs_inode);
1da177e4 287}
89bddce5 288
1da177e4
LT
289static int init_inodecache(void)
290{
291 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
292 sizeof(struct socket_alloc),
293 0,
294 (SLAB_HWCACHE_ALIGN |
295 SLAB_RECLAIM_ACCOUNT |
296 SLAB_MEM_SPREAD),
20c2df83 297 init_once);
1da177e4
LT
298 if (sock_inode_cachep == NULL)
299 return -ENOMEM;
300 return 0;
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
c74a1cbb
AV
322static struct dentry *sockfs_mount(struct file_system_type *fs_type,
323 int flags, const char *dev_name, void *data)
324{
325 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
326 &sockfs_dentry_operations, SOCKFS_MAGIC);
327}
328
329static struct vfsmount *sock_mnt __read_mostly;
330
331static struct file_system_type sock_fs_type = {
332 .name = "sockfs",
333 .mount = sockfs_mount,
334 .kill_sb = kill_anon_super,
335};
336
1da177e4
LT
337/*
338 * Obtains the first available file descriptor and sets it up for use.
339 *
39d8c1b6
DM
340 * These functions create file structures and maps them to fd space
341 * of the current process. On success it returns file descriptor
1da177e4
LT
342 * and file struct implicitly stored in sock->file.
343 * Note that another thread may close file descriptor before we return
344 * from this function. We use the fact that now we do not refer
345 * to socket after mapping. If one day we will need it, this
346 * function will increment ref. count on file by 1.
347 *
348 * In any case returned fd MAY BE not valid!
349 * This race condition is unavoidable
350 * with shared fd spaces, we cannot solve it inside kernel,
351 * but we take care of internal coherence yet.
352 */
353
aab174f0 354struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 355{
7cbe66b6 356 struct qstr name = { .name = "" };
2c48b9c4 357 struct path path;
7cbe66b6 358 struct file *file;
1da177e4 359
600e1779
MY
360 if (dname) {
361 name.name = dname;
362 name.len = strlen(name.name);
363 } else if (sock->sk) {
364 name.name = sock->sk->sk_prot_creator->name;
365 name.len = strlen(name.name);
366 }
4b936885 367 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
368 if (unlikely(!path.dentry))
369 return ERR_PTR(-ENOMEM);
2c48b9c4 370 path.mnt = mntget(sock_mnt);
39d8c1b6 371
2c48b9c4 372 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 373
2c48b9c4 374 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 375 &socket_file_ops);
39b65252 376 if (unlikely(IS_ERR(file))) {
cc3808f8 377 /* drop dentry, keep inode */
c5ef6035 378 ihold(d_inode(path.dentry));
2c48b9c4 379 path_put(&path);
39b65252 380 return file;
cc3808f8
AV
381 }
382
383 sock->file = file;
77d27200 384 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 385 file->private_data = sock;
28407630 386 return file;
39d8c1b6 387}
56b31d1c 388EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 389
56b31d1c 390static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
391{
392 struct file *newfile;
28407630
AV
393 int fd = get_unused_fd_flags(flags);
394 if (unlikely(fd < 0))
395 return fd;
39d8c1b6 396
aab174f0 397 newfile = sock_alloc_file(sock, flags, NULL);
28407630 398 if (likely(!IS_ERR(newfile))) {
39d8c1b6 399 fd_install(fd, newfile);
28407630
AV
400 return fd;
401 }
7cbe66b6 402
28407630
AV
403 put_unused_fd(fd);
404 return PTR_ERR(newfile);
1da177e4
LT
405}
406
406a3c63 407struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 408{
6cb153ca
BL
409 if (file->f_op == &socket_file_ops)
410 return file->private_data; /* set in sock_map_fd */
411
23bb80d2
ED
412 *err = -ENOTSOCK;
413 return NULL;
6cb153ca 414}
406a3c63 415EXPORT_SYMBOL(sock_from_file);
6cb153ca 416
1da177e4 417/**
c6d409cf 418 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
c6d409cf 446EXPORT_SYMBOL(sockfd_lookup);
1da177e4 447
6cb153ca
BL
448static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
449{
00e188ef 450 struct fd f = fdget(fd);
6cb153ca
BL
451 struct socket *sock;
452
3672558c 453 *err = -EBADF;
00e188ef
AV
454 if (f.file) {
455 sock = sock_from_file(f.file, err);
456 if (likely(sock)) {
457 *fput_needed = f.flags;
6cb153ca 458 return sock;
00e188ef
AV
459 }
460 fdput(f);
1da177e4 461 }
6cb153ca 462 return NULL;
1da177e4
LT
463}
464
600e1779
MY
465#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
466#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
467#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
468static ssize_t sockfs_getxattr(struct dentry *dentry,
469 const char *name, void *value, size_t size)
470{
471 const char *proto_name;
472 size_t proto_size;
473 int error;
474
475 error = -ENODATA;
476 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
477 proto_name = dentry->d_name.name;
478 proto_size = strlen(proto_name);
479
480 if (value) {
481 error = -ERANGE;
482 if (proto_size + 1 > size)
483 goto out;
484
485 strncpy(value, proto_name, proto_size + 1);
486 }
487 error = proto_size + 1;
488 }
489
490out:
491 return error;
492}
493
494static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
495 size_t size)
496{
497 ssize_t len;
498 ssize_t used = 0;
499
c5ef6035 500 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
501 if (len < 0)
502 return len;
503 used += len;
504 if (buffer) {
505 if (size < used)
506 return -ERANGE;
507 buffer += len;
508 }
509
510 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
511 used += len;
512 if (buffer) {
513 if (size < used)
514 return -ERANGE;
515 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
516 buffer += len;
517 }
518
519 return used;
520}
521
522static const struct inode_operations sockfs_inode_ops = {
523 .getxattr = sockfs_getxattr,
524 .listxattr = sockfs_listxattr,
525};
526
1da177e4
LT
527/**
528 * sock_alloc - allocate a socket
89bddce5 529 *
1da177e4
LT
530 * Allocate a new inode and socket object. The two are bound together
531 * and initialised. The socket is then returned. If we are out of inodes
532 * NULL is returned.
533 */
534
535static struct socket *sock_alloc(void)
536{
89bddce5
SH
537 struct inode *inode;
538 struct socket *sock;
1da177e4 539
a209dfc7 540 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
541 if (!inode)
542 return NULL;
543
544 sock = SOCKET_I(inode);
545
29a020d3 546 kmemcheck_annotate_bitfield(sock, type);
85fe4025 547 inode->i_ino = get_next_ino();
89bddce5 548 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
549 inode->i_uid = current_fsuid();
550 inode->i_gid = current_fsgid();
600e1779 551 inode->i_op = &sockfs_inode_ops;
1da177e4 552
19e8d69c 553 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
554 return sock;
555}
556
1da177e4
LT
557/**
558 * sock_release - close a socket
559 * @sock: socket to close
560 *
561 * The socket is released from the protocol stack if it has a release
562 * callback, and the inode is then released if the socket is bound to
89bddce5 563 * an inode not a file.
1da177e4 564 */
89bddce5 565
1da177e4
LT
566void sock_release(struct socket *sock)
567{
568 if (sock->ops) {
569 struct module *owner = sock->ops->owner;
570
571 sock->ops->release(sock);
572 sock->ops = NULL;
573 module_put(owner);
574 }
575
eaefd110 576 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 577 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 578
b09e786b
MP
579 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
580 return;
581
19e8d69c 582 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
583 if (!sock->file) {
584 iput(SOCK_INODE(sock));
585 return;
586 }
89bddce5 587 sock->file = NULL;
1da177e4 588}
c6d409cf 589EXPORT_SYMBOL(sock_release);
1da177e4 590
67cc0d40 591void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 592{
140c55d4
ED
593 u8 flags = *tx_flags;
594
b9f40e21 595 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
596 flags |= SKBTX_HW_TSTAMP;
597
b9f40e21 598 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
599 flags |= SKBTX_SW_TSTAMP;
600
e7fd2885 601 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
602 flags |= SKBTX_SCHED_TSTAMP;
603
e1c8a607 604 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 605 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 606
140c55d4 607 *tx_flags = flags;
20d49473 608}
67cc0d40 609EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 610
d8725c86 611static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 612{
01e97e65 613 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
614 BUG_ON(ret == -EIOCBQUEUED);
615 return ret;
1da177e4
LT
616}
617
d8725c86 618int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 619{
d8725c86 620 int err = security_socket_sendmsg(sock, msg,
01e97e65 621 msg_data_left(msg));
228e548e 622
d8725c86 623 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 624}
c6d409cf 625EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
626
627int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
628 struct kvec *vec, size_t num, size_t size)
629{
6aa24814 630 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 631 return sock_sendmsg(sock, msg);
1da177e4 632}
c6d409cf 633EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 634
92f37fd2
ED
635/*
636 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
637 */
638void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
639 struct sk_buff *skb)
640{
20d49473 641 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 642 struct scm_timestamping tss;
20d49473
PO
643 int empty = 1;
644 struct skb_shared_hwtstamps *shhwtstamps =
645 skb_hwtstamps(skb);
646
647 /* Race occurred between timestamp enabling and packet
648 receiving. Fill in the current time for now. */
649 if (need_software_tstamp && skb->tstamp.tv64 == 0)
650 __net_timestamp(skb);
651
652 if (need_software_tstamp) {
653 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
654 struct timeval tv;
655 skb_get_timestamp(skb, &tv);
656 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
657 sizeof(tv), &tv);
658 } else {
f24b9be5
WB
659 struct timespec ts;
660 skb_get_timestampns(skb, &ts);
20d49473 661 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 662 sizeof(ts), &ts);
20d49473
PO
663 }
664 }
665
f24b9be5 666 memset(&tss, 0, sizeof(tss));
c199105d 667 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 668 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 669 empty = 0;
4d276eb6 670 if (shhwtstamps &&
b9f40e21 671 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 672 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 673 empty = 0;
20d49473
PO
674 if (!empty)
675 put_cmsg(msg, SOL_SOCKET,
f24b9be5 676 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 677}
7c81fd8b
ACM
678EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
679
6e3e939f
JB
680void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
681 struct sk_buff *skb)
682{
683 int ack;
684
685 if (!sock_flag(sk, SOCK_WIFI_STATUS))
686 return;
687 if (!skb->wifi_acked_valid)
688 return;
689
690 ack = skb->wifi_acked;
691
692 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
693}
694EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
695
11165f14 696static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
697 struct sk_buff *skb)
3b885787 698{
744d5a3e 699 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 700 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 701 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
702}
703
767dd033 704void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
705 struct sk_buff *skb)
706{
707 sock_recv_timestamp(msg, sk, skb);
708 sock_recv_drops(msg, sk, skb);
709}
767dd033 710EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 711
1b784140
YX
712static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
713 size_t size, int flags)
1da177e4 714{
1b784140 715 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
716}
717
1b784140
YX
718int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
719 int flags)
a2e27255
ACM
720{
721 int err = security_socket_recvmsg(sock, msg, size, flags);
722
1b784140 723 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 724}
c6d409cf 725EXPORT_SYMBOL(sock_recvmsg);
1da177e4 726
c1249c0a
ML
727/**
728 * kernel_recvmsg - Receive a message from a socket (kernel space)
729 * @sock: The socket to receive the message from
730 * @msg: Received message
731 * @vec: Input s/g array for message data
732 * @num: Size of input s/g array
733 * @size: Number of bytes to read
734 * @flags: Message flags (MSG_DONTWAIT, etc...)
735 *
736 * On return the msg structure contains the scatter/gather array passed in the
737 * vec argument. The array is modified so that it consists of the unfilled
738 * portion of the original array.
739 *
740 * The returned value is the total number of bytes received, or an error.
741 */
89bddce5
SH
742int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
743 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
744{
745 mm_segment_t oldfs = get_fs();
746 int result;
747
6aa24814 748 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 749 set_fs(KERNEL_DS);
1da177e4
LT
750 result = sock_recvmsg(sock, msg, size, flags);
751 set_fs(oldfs);
752 return result;
753}
c6d409cf 754EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 755
ce1d4d3e
CH
756static ssize_t sock_sendpage(struct file *file, struct page *page,
757 int offset, size_t size, loff_t *ppos, int more)
1da177e4 758{
1da177e4
LT
759 struct socket *sock;
760 int flags;
761
ce1d4d3e
CH
762 sock = file->private_data;
763
35f9c09f
ED
764 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
765 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
766 flags |= more;
ce1d4d3e 767
e6949583 768 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 769}
1da177e4 770
9c55e01c 771static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 772 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
773 unsigned int flags)
774{
775 struct socket *sock = file->private_data;
776
997b37da
RDC
777 if (unlikely(!sock->ops->splice_read))
778 return -EINVAL;
779
9c55e01c
JA
780 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
781}
782
8ae5e030 783static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 784{
6d652330
AV
785 struct file *file = iocb->ki_filp;
786 struct socket *sock = file->private_data;
0345f931 787 struct msghdr msg = {.msg_iter = *to,
788 .msg_iocb = iocb};
8ae5e030 789 ssize_t res;
ce1d4d3e 790
8ae5e030
AV
791 if (file->f_flags & O_NONBLOCK)
792 msg.msg_flags = MSG_DONTWAIT;
793
794 if (iocb->ki_pos != 0)
1da177e4 795 return -ESPIPE;
027445c3 796
66ee59af 797 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
798 return 0;
799
237dae88 800 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
801 *to = msg.msg_iter;
802 return res;
1da177e4
LT
803}
804
8ae5e030 805static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 806{
6d652330
AV
807 struct file *file = iocb->ki_filp;
808 struct socket *sock = file->private_data;
0345f931 809 struct msghdr msg = {.msg_iter = *from,
810 .msg_iocb = iocb};
8ae5e030 811 ssize_t res;
1da177e4 812
8ae5e030 813 if (iocb->ki_pos != 0)
ce1d4d3e 814 return -ESPIPE;
027445c3 815
8ae5e030
AV
816 if (file->f_flags & O_NONBLOCK)
817 msg.msg_flags = MSG_DONTWAIT;
818
6d652330
AV
819 if (sock->type == SOCK_SEQPACKET)
820 msg.msg_flags |= MSG_EOR;
821
d8725c86 822 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
823 *from = msg.msg_iter;
824 return res;
1da177e4
LT
825}
826
1da177e4
LT
827/*
828 * Atomic setting of ioctl hooks to avoid race
829 * with module unload.
830 */
831
4a3e2f71 832static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 833static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 834
881d966b 835void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 836{
4a3e2f71 837 mutex_lock(&br_ioctl_mutex);
1da177e4 838 br_ioctl_hook = hook;
4a3e2f71 839 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
840}
841EXPORT_SYMBOL(brioctl_set);
842
4a3e2f71 843static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 844static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 845
881d966b 846void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 847{
4a3e2f71 848 mutex_lock(&vlan_ioctl_mutex);
1da177e4 849 vlan_ioctl_hook = hook;
4a3e2f71 850 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
851}
852EXPORT_SYMBOL(vlan_ioctl_set);
853
4a3e2f71 854static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 855static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 856
89bddce5 857void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 858{
4a3e2f71 859 mutex_lock(&dlci_ioctl_mutex);
1da177e4 860 dlci_ioctl_hook = hook;
4a3e2f71 861 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
862}
863EXPORT_SYMBOL(dlci_ioctl_set);
864
6b96018b
AB
865static long sock_do_ioctl(struct net *net, struct socket *sock,
866 unsigned int cmd, unsigned long arg)
867{
868 int err;
869 void __user *argp = (void __user *)arg;
870
871 err = sock->ops->ioctl(sock, cmd, arg);
872
873 /*
874 * If this ioctl is unknown try to hand it down
875 * to the NIC driver.
876 */
877 if (err == -ENOIOCTLCMD)
878 err = dev_ioctl(net, cmd, argp);
879
880 return err;
881}
882
1da177e4
LT
883/*
884 * With an ioctl, arg may well be a user mode pointer, but we don't know
885 * what to do with it - that's up to the protocol still.
886 */
887
888static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
889{
890 struct socket *sock;
881d966b 891 struct sock *sk;
1da177e4
LT
892 void __user *argp = (void __user *)arg;
893 int pid, err;
881d966b 894 struct net *net;
1da177e4 895
b69aee04 896 sock = file->private_data;
881d966b 897 sk = sock->sk;
3b1e0a65 898 net = sock_net(sk);
1da177e4 899 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 900 err = dev_ioctl(net, cmd, argp);
1da177e4 901 } else
3d23e349 902#ifdef CONFIG_WEXT_CORE
1da177e4 903 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 904 err = dev_ioctl(net, cmd, argp);
1da177e4 905 } else
3d23e349 906#endif
89bddce5 907 switch (cmd) {
1da177e4
LT
908 case FIOSETOWN:
909 case SIOCSPGRP:
910 err = -EFAULT;
911 if (get_user(pid, (int __user *)argp))
912 break;
e0b93edd
JL
913 f_setown(sock->file, pid, 1);
914 err = 0;
1da177e4
LT
915 break;
916 case FIOGETOWN:
917 case SIOCGPGRP:
609d7fa9 918 err = put_user(f_getown(sock->file),
89bddce5 919 (int __user *)argp);
1da177e4
LT
920 break;
921 case SIOCGIFBR:
922 case SIOCSIFBR:
923 case SIOCBRADDBR:
924 case SIOCBRDELBR:
925 err = -ENOPKG;
926 if (!br_ioctl_hook)
927 request_module("bridge");
928
4a3e2f71 929 mutex_lock(&br_ioctl_mutex);
89bddce5 930 if (br_ioctl_hook)
881d966b 931 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 932 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
933 break;
934 case SIOCGIFVLAN:
935 case SIOCSIFVLAN:
936 err = -ENOPKG;
937 if (!vlan_ioctl_hook)
938 request_module("8021q");
939
4a3e2f71 940 mutex_lock(&vlan_ioctl_mutex);
1da177e4 941 if (vlan_ioctl_hook)
881d966b 942 err = vlan_ioctl_hook(net, argp);
4a3e2f71 943 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 944 break;
1da177e4
LT
945 case SIOCADDDLCI:
946 case SIOCDELDLCI:
947 err = -ENOPKG;
948 if (!dlci_ioctl_hook)
949 request_module("dlci");
950
7512cbf6
PE
951 mutex_lock(&dlci_ioctl_mutex);
952 if (dlci_ioctl_hook)
1da177e4 953 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 954 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
955 break;
956 default:
6b96018b 957 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 958 break;
89bddce5 959 }
1da177e4
LT
960 return err;
961}
962
963int sock_create_lite(int family, int type, int protocol, struct socket **res)
964{
965 int err;
966 struct socket *sock = NULL;
89bddce5 967
1da177e4
LT
968 err = security_socket_create(family, type, protocol, 1);
969 if (err)
970 goto out;
971
972 sock = sock_alloc();
973 if (!sock) {
974 err = -ENOMEM;
975 goto out;
976 }
977
1da177e4 978 sock->type = type;
7420ed23
VY
979 err = security_socket_post_create(sock, family, type, protocol, 1);
980 if (err)
981 goto out_release;
982
1da177e4
LT
983out:
984 *res = sock;
985 return err;
7420ed23
VY
986out_release:
987 sock_release(sock);
988 sock = NULL;
989 goto out;
1da177e4 990}
c6d409cf 991EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
992
993/* No kernel lock held - perfect */
89bddce5 994static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 995{
cbf55001 996 unsigned int busy_flag = 0;
1da177e4
LT
997 struct socket *sock;
998
999 /*
89bddce5 1000 * We can't return errors to poll, so it's either yes or no.
1da177e4 1001 */
b69aee04 1002 sock = file->private_data;
2d48d67f 1003
cbf55001 1004 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1005 /* this socket can poll_ll so tell the system call */
cbf55001 1006 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1007
1008 /* once, only if requested by syscall */
cbf55001
ET
1009 if (wait && (wait->_key & POLL_BUSY_LOOP))
1010 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1011 }
1012
cbf55001 1013 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1014}
1015
89bddce5 1016static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1017{
b69aee04 1018 struct socket *sock = file->private_data;
1da177e4
LT
1019
1020 return sock->ops->mmap(file, sock, vma);
1021}
1022
20380731 1023static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1024{
1da177e4
LT
1025 sock_release(SOCKET_I(inode));
1026 return 0;
1027}
1028
1029/*
1030 * Update the socket async list
1031 *
1032 * Fasync_list locking strategy.
1033 *
1034 * 1. fasync_list is modified only under process context socket lock
1035 * i.e. under semaphore.
1036 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1037 * or under socket lock
1da177e4
LT
1038 */
1039
1040static int sock_fasync(int fd, struct file *filp, int on)
1041{
989a2979
ED
1042 struct socket *sock = filp->private_data;
1043 struct sock *sk = sock->sk;
eaefd110 1044 struct socket_wq *wq;
1da177e4 1045
989a2979 1046 if (sk == NULL)
1da177e4 1047 return -EINVAL;
1da177e4
LT
1048
1049 lock_sock(sk);
eaefd110
ED
1050 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1051 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1052
eaefd110 1053 if (!wq->fasync_list)
989a2979
ED
1054 sock_reset_flag(sk, SOCK_FASYNC);
1055 else
bcdce719 1056 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1057
989a2979 1058 release_sock(sk);
1da177e4
LT
1059 return 0;
1060}
1061
43815482 1062/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1063
1064int sock_wake_async(struct socket *sock, int how, int band)
1065{
43815482
ED
1066 struct socket_wq *wq;
1067
1068 if (!sock)
1069 return -1;
1070 rcu_read_lock();
1071 wq = rcu_dereference(sock->wq);
1072 if (!wq || !wq->fasync_list) {
1073 rcu_read_unlock();
1da177e4 1074 return -1;
43815482 1075 }
89bddce5 1076 switch (how) {
8d8ad9d7 1077 case SOCK_WAKE_WAITD:
1da177e4
LT
1078 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1079 break;
1080 goto call_kill;
8d8ad9d7 1081 case SOCK_WAKE_SPACE:
1da177e4
LT
1082 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1083 break;
1084 /* fall through */
8d8ad9d7 1085 case SOCK_WAKE_IO:
89bddce5 1086call_kill:
43815482 1087 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1088 break;
8d8ad9d7 1089 case SOCK_WAKE_URG:
43815482 1090 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1091 }
43815482 1092 rcu_read_unlock();
1da177e4
LT
1093 return 0;
1094}
c6d409cf 1095EXPORT_SYMBOL(sock_wake_async);
1da177e4 1096
721db93a 1097int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1098 struct socket **res, int kern)
1da177e4
LT
1099{
1100 int err;
1101 struct socket *sock;
55737fda 1102 const struct net_proto_family *pf;
1da177e4
LT
1103
1104 /*
89bddce5 1105 * Check protocol is in range
1da177e4
LT
1106 */
1107 if (family < 0 || family >= NPROTO)
1108 return -EAFNOSUPPORT;
1109 if (type < 0 || type >= SOCK_MAX)
1110 return -EINVAL;
1111
1112 /* Compatibility.
1113
1114 This uglymoron is moved from INET layer to here to avoid
1115 deadlock in module load.
1116 */
1117 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1118 static int warned;
1da177e4
LT
1119 if (!warned) {
1120 warned = 1;
3410f22e
YY
1121 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1122 current->comm);
1da177e4
LT
1123 }
1124 family = PF_PACKET;
1125 }
1126
1127 err = security_socket_create(family, type, protocol, kern);
1128 if (err)
1129 return err;
89bddce5 1130
55737fda
SH
1131 /*
1132 * Allocate the socket and allow the family to set things up. if
1133 * the protocol is 0, the family is instructed to select an appropriate
1134 * default.
1135 */
1136 sock = sock_alloc();
1137 if (!sock) {
e87cc472 1138 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1139 return -ENFILE; /* Not exactly a match, but its the
1140 closest posix thing */
1141 }
1142
1143 sock->type = type;
1144
95a5afca 1145#ifdef CONFIG_MODULES
89bddce5
SH
1146 /* Attempt to load a protocol module if the find failed.
1147 *
1148 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1149 * requested real, full-featured networking support upon configuration.
1150 * Otherwise module support will break!
1151 */
190683a9 1152 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1153 request_module("net-pf-%d", family);
1da177e4
LT
1154#endif
1155
55737fda
SH
1156 rcu_read_lock();
1157 pf = rcu_dereference(net_families[family]);
1158 err = -EAFNOSUPPORT;
1159 if (!pf)
1160 goto out_release;
1da177e4
LT
1161
1162 /*
1163 * We will call the ->create function, that possibly is in a loadable
1164 * module, so we have to bump that loadable module refcnt first.
1165 */
55737fda 1166 if (!try_module_get(pf->owner))
1da177e4
LT
1167 goto out_release;
1168
55737fda
SH
1169 /* Now protected by module ref count */
1170 rcu_read_unlock();
1171
3f378b68 1172 err = pf->create(net, sock, protocol, kern);
55737fda 1173 if (err < 0)
1da177e4 1174 goto out_module_put;
a79af59e 1175
1da177e4
LT
1176 /*
1177 * Now to bump the refcnt of the [loadable] module that owns this
1178 * socket at sock_release time we decrement its refcnt.
1179 */
55737fda
SH
1180 if (!try_module_get(sock->ops->owner))
1181 goto out_module_busy;
1182
1da177e4
LT
1183 /*
1184 * Now that we're done with the ->create function, the [loadable]
1185 * module can have its refcnt decremented
1186 */
55737fda 1187 module_put(pf->owner);
7420ed23
VY
1188 err = security_socket_post_create(sock, family, type, protocol, kern);
1189 if (err)
3b185525 1190 goto out_sock_release;
55737fda 1191 *res = sock;
1da177e4 1192
55737fda
SH
1193 return 0;
1194
1195out_module_busy:
1196 err = -EAFNOSUPPORT;
1da177e4 1197out_module_put:
55737fda
SH
1198 sock->ops = NULL;
1199 module_put(pf->owner);
1200out_sock_release:
1da177e4 1201 sock_release(sock);
55737fda
SH
1202 return err;
1203
1204out_release:
1205 rcu_read_unlock();
1206 goto out_sock_release;
1da177e4 1207}
721db93a 1208EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1209
1210int sock_create(int family, int type, int protocol, struct socket **res)
1211{
1b8d7ae4 1212 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1213}
c6d409cf 1214EXPORT_SYMBOL(sock_create);
1da177e4
LT
1215
1216int sock_create_kern(int family, int type, int protocol, struct socket **res)
1217{
1b8d7ae4 1218 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1219}
c6d409cf 1220EXPORT_SYMBOL(sock_create_kern);
1da177e4 1221
3e0fa65f 1222SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1223{
1224 int retval;
1225 struct socket *sock;
a677a039
UD
1226 int flags;
1227
e38b36f3
UD
1228 /* Check the SOCK_* constants for consistency. */
1229 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1230 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1231 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1232 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1233
a677a039 1234 flags = type & ~SOCK_TYPE_MASK;
77d27200 1235 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1236 return -EINVAL;
1237 type &= SOCK_TYPE_MASK;
1da177e4 1238
aaca0bdc
UD
1239 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1240 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1241
1da177e4
LT
1242 retval = sock_create(family, type, protocol, &sock);
1243 if (retval < 0)
1244 goto out;
1245
77d27200 1246 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1247 if (retval < 0)
1248 goto out_release;
1249
1250out:
1251 /* It may be already another descriptor 8) Not kernel problem. */
1252 return retval;
1253
1254out_release:
1255 sock_release(sock);
1256 return retval;
1257}
1258
1259/*
1260 * Create a pair of connected sockets.
1261 */
1262
3e0fa65f
HC
1263SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1264 int __user *, usockvec)
1da177e4
LT
1265{
1266 struct socket *sock1, *sock2;
1267 int fd1, fd2, err;
db349509 1268 struct file *newfile1, *newfile2;
a677a039
UD
1269 int flags;
1270
1271 flags = type & ~SOCK_TYPE_MASK;
77d27200 1272 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1273 return -EINVAL;
1274 type &= SOCK_TYPE_MASK;
1da177e4 1275
aaca0bdc
UD
1276 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1277 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1278
1da177e4
LT
1279 /*
1280 * Obtain the first socket and check if the underlying protocol
1281 * supports the socketpair call.
1282 */
1283
1284 err = sock_create(family, type, protocol, &sock1);
1285 if (err < 0)
1286 goto out;
1287
1288 err = sock_create(family, type, protocol, &sock2);
1289 if (err < 0)
1290 goto out_release_1;
1291
1292 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1293 if (err < 0)
1da177e4
LT
1294 goto out_release_both;
1295
28407630 1296 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1297 if (unlikely(fd1 < 0)) {
1298 err = fd1;
db349509 1299 goto out_release_both;
bf3c23d1 1300 }
d73aa286 1301
28407630 1302 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1303 if (unlikely(fd2 < 0)) {
1304 err = fd2;
d73aa286 1305 goto out_put_unused_1;
28407630
AV
1306 }
1307
aab174f0 1308 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1309 if (unlikely(IS_ERR(newfile1))) {
1310 err = PTR_ERR(newfile1);
d73aa286 1311 goto out_put_unused_both;
28407630
AV
1312 }
1313
aab174f0 1314 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1315 if (IS_ERR(newfile2)) {
1316 err = PTR_ERR(newfile2);
d73aa286 1317 goto out_fput_1;
db349509
AV
1318 }
1319
d73aa286
YD
1320 err = put_user(fd1, &usockvec[0]);
1321 if (err)
1322 goto out_fput_both;
1323
1324 err = put_user(fd2, &usockvec[1]);
1325 if (err)
1326 goto out_fput_both;
1327
157cf649 1328 audit_fd_pair(fd1, fd2);
d73aa286 1329
db349509
AV
1330 fd_install(fd1, newfile1);
1331 fd_install(fd2, newfile2);
1da177e4
LT
1332 /* fd1 and fd2 may be already another descriptors.
1333 * Not kernel problem.
1334 */
1335
d73aa286 1336 return 0;
1da177e4 1337
d73aa286
YD
1338out_fput_both:
1339 fput(newfile2);
1340 fput(newfile1);
1341 put_unused_fd(fd2);
1342 put_unused_fd(fd1);
1343 goto out;
1344
1345out_fput_1:
1346 fput(newfile1);
1347 put_unused_fd(fd2);
1348 put_unused_fd(fd1);
1349 sock_release(sock2);
1350 goto out;
1da177e4 1351
d73aa286
YD
1352out_put_unused_both:
1353 put_unused_fd(fd2);
1354out_put_unused_1:
1355 put_unused_fd(fd1);
1da177e4 1356out_release_both:
89bddce5 1357 sock_release(sock2);
1da177e4 1358out_release_1:
89bddce5 1359 sock_release(sock1);
1da177e4
LT
1360out:
1361 return err;
1362}
1363
1da177e4
LT
1364/*
1365 * Bind a name to a socket. Nothing much to do here since it's
1366 * the protocol's responsibility to handle the local address.
1367 *
1368 * We move the socket address to kernel space before we call
1369 * the protocol layer (having also checked the address is ok).
1370 */
1371
20f37034 1372SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1373{
1374 struct socket *sock;
230b1839 1375 struct sockaddr_storage address;
6cb153ca 1376 int err, fput_needed;
1da177e4 1377
89bddce5 1378 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1379 if (sock) {
43db362d 1380 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1381 if (err >= 0) {
1382 err = security_socket_bind(sock,
230b1839 1383 (struct sockaddr *)&address,
89bddce5 1384 addrlen);
6cb153ca
BL
1385 if (!err)
1386 err = sock->ops->bind(sock,
89bddce5 1387 (struct sockaddr *)
230b1839 1388 &address, addrlen);
1da177e4 1389 }
6cb153ca 1390 fput_light(sock->file, fput_needed);
89bddce5 1391 }
1da177e4
LT
1392 return err;
1393}
1394
1da177e4
LT
1395/*
1396 * Perform a listen. Basically, we allow the protocol to do anything
1397 * necessary for a listen, and if that works, we mark the socket as
1398 * ready for listening.
1399 */
1400
3e0fa65f 1401SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1402{
1403 struct socket *sock;
6cb153ca 1404 int err, fput_needed;
b8e1f9b5 1405 int somaxconn;
89bddce5
SH
1406
1407 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1408 if (sock) {
8efa6e93 1409 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1410 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1411 backlog = somaxconn;
1da177e4
LT
1412
1413 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1414 if (!err)
1415 err = sock->ops->listen(sock, backlog);
1da177e4 1416
6cb153ca 1417 fput_light(sock->file, fput_needed);
1da177e4
LT
1418 }
1419 return err;
1420}
1421
1da177e4
LT
1422/*
1423 * For accept, we attempt to create a new socket, set up the link
1424 * with the client, wake up the client, then return the new
1425 * connected fd. We collect the address of the connector in kernel
1426 * space and move it to user at the very end. This is unclean because
1427 * we open the socket then return an error.
1428 *
1429 * 1003.1g adds the ability to recvmsg() to query connection pending
1430 * status to recvmsg. We need to add that support in a way thats
1431 * clean when we restucture accept also.
1432 */
1433
20f37034
HC
1434SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1435 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1436{
1437 struct socket *sock, *newsock;
39d8c1b6 1438 struct file *newfile;
6cb153ca 1439 int err, len, newfd, fput_needed;
230b1839 1440 struct sockaddr_storage address;
1da177e4 1441
77d27200 1442 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1443 return -EINVAL;
1444
1445 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1446 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1447
6cb153ca 1448 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1449 if (!sock)
1450 goto out;
1451
1452 err = -ENFILE;
c6d409cf
ED
1453 newsock = sock_alloc();
1454 if (!newsock)
1da177e4
LT
1455 goto out_put;
1456
1457 newsock->type = sock->type;
1458 newsock->ops = sock->ops;
1459
1da177e4
LT
1460 /*
1461 * We don't need try_module_get here, as the listening socket (sock)
1462 * has the protocol module (sock->ops->owner) held.
1463 */
1464 __module_get(newsock->ops->owner);
1465
28407630 1466 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1467 if (unlikely(newfd < 0)) {
1468 err = newfd;
9a1875e6
DM
1469 sock_release(newsock);
1470 goto out_put;
39d8c1b6 1471 }
aab174f0 1472 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1473 if (unlikely(IS_ERR(newfile))) {
1474 err = PTR_ERR(newfile);
1475 put_unused_fd(newfd);
1476 sock_release(newsock);
1477 goto out_put;
1478 }
39d8c1b6 1479
a79af59e
FF
1480 err = security_socket_accept(sock, newsock);
1481 if (err)
39d8c1b6 1482 goto out_fd;
a79af59e 1483
1da177e4
LT
1484 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1485 if (err < 0)
39d8c1b6 1486 goto out_fd;
1da177e4
LT
1487
1488 if (upeer_sockaddr) {
230b1839 1489 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1490 &len, 2) < 0) {
1da177e4 1491 err = -ECONNABORTED;
39d8c1b6 1492 goto out_fd;
1da177e4 1493 }
43db362d 1494 err = move_addr_to_user(&address,
230b1839 1495 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1496 if (err < 0)
39d8c1b6 1497 goto out_fd;
1da177e4
LT
1498 }
1499
1500 /* File flags are not inherited via accept() unlike another OSes. */
1501
39d8c1b6
DM
1502 fd_install(newfd, newfile);
1503 err = newfd;
1da177e4 1504
1da177e4 1505out_put:
6cb153ca 1506 fput_light(sock->file, fput_needed);
1da177e4
LT
1507out:
1508 return err;
39d8c1b6 1509out_fd:
9606a216 1510 fput(newfile);
39d8c1b6 1511 put_unused_fd(newfd);
1da177e4
LT
1512 goto out_put;
1513}
1514
20f37034
HC
1515SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1516 int __user *, upeer_addrlen)
aaca0bdc 1517{
de11defe 1518 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1519}
1520
1da177e4
LT
1521/*
1522 * Attempt to connect to a socket with the server address. The address
1523 * is in user space so we verify it is OK and move it to kernel space.
1524 *
1525 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1526 * break bindings
1527 *
1528 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1529 * other SEQPACKET protocols that take time to connect() as it doesn't
1530 * include the -EINPROGRESS status for such sockets.
1531 */
1532
20f37034
HC
1533SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1534 int, addrlen)
1da177e4
LT
1535{
1536 struct socket *sock;
230b1839 1537 struct sockaddr_storage address;
6cb153ca 1538 int err, fput_needed;
1da177e4 1539
6cb153ca 1540 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1541 if (!sock)
1542 goto out;
43db362d 1543 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1544 if (err < 0)
1545 goto out_put;
1546
89bddce5 1547 err =
230b1839 1548 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1549 if (err)
1550 goto out_put;
1551
230b1839 1552 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1553 sock->file->f_flags);
1554out_put:
6cb153ca 1555 fput_light(sock->file, fput_needed);
1da177e4
LT
1556out:
1557 return err;
1558}
1559
1560/*
1561 * Get the local address ('name') of a socket object. Move the obtained
1562 * name to user space.
1563 */
1564
20f37034
HC
1565SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1566 int __user *, usockaddr_len)
1da177e4
LT
1567{
1568 struct socket *sock;
230b1839 1569 struct sockaddr_storage address;
6cb153ca 1570 int len, err, fput_needed;
89bddce5 1571
6cb153ca 1572 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1573 if (!sock)
1574 goto out;
1575
1576 err = security_socket_getsockname(sock);
1577 if (err)
1578 goto out_put;
1579
230b1839 1580 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1581 if (err)
1582 goto out_put;
43db362d 1583 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1584
1585out_put:
6cb153ca 1586 fput_light(sock->file, fput_needed);
1da177e4
LT
1587out:
1588 return err;
1589}
1590
1591/*
1592 * Get the remote address ('name') of a socket object. Move the obtained
1593 * name to user space.
1594 */
1595
20f37034
HC
1596SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1597 int __user *, usockaddr_len)
1da177e4
LT
1598{
1599 struct socket *sock;
230b1839 1600 struct sockaddr_storage address;
6cb153ca 1601 int len, err, fput_needed;
1da177e4 1602
89bddce5
SH
1603 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1604 if (sock != NULL) {
1da177e4
LT
1605 err = security_socket_getpeername(sock);
1606 if (err) {
6cb153ca 1607 fput_light(sock->file, fput_needed);
1da177e4
LT
1608 return err;
1609 }
1610
89bddce5 1611 err =
230b1839 1612 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1613 1);
1da177e4 1614 if (!err)
43db362d 1615 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1616 usockaddr_len);
6cb153ca 1617 fput_light(sock->file, fput_needed);
1da177e4
LT
1618 }
1619 return err;
1620}
1621
1622/*
1623 * Send a datagram to a given address. We move the address into kernel
1624 * space and check the user space data area is readable before invoking
1625 * the protocol.
1626 */
1627
3e0fa65f 1628SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1629 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1630 int, addr_len)
1da177e4
LT
1631{
1632 struct socket *sock;
230b1839 1633 struct sockaddr_storage address;
1da177e4
LT
1634 int err;
1635 struct msghdr msg;
1636 struct iovec iov;
6cb153ca 1637 int fput_needed;
6cb153ca 1638
602bd0e9
AV
1639 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1640 if (unlikely(err))
1641 return err;
de0fa95c
PE
1642 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1643 if (!sock)
4387ff75 1644 goto out;
6cb153ca 1645
89bddce5 1646 msg.msg_name = NULL;
89bddce5
SH
1647 msg.msg_control = NULL;
1648 msg.msg_controllen = 0;
1649 msg.msg_namelen = 0;
6cb153ca 1650 if (addr) {
43db362d 1651 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1652 if (err < 0)
1653 goto out_put;
230b1839 1654 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1655 msg.msg_namelen = addr_len;
1da177e4
LT
1656 }
1657 if (sock->file->f_flags & O_NONBLOCK)
1658 flags |= MSG_DONTWAIT;
1659 msg.msg_flags = flags;
d8725c86 1660 err = sock_sendmsg(sock, &msg);
1da177e4 1661
89bddce5 1662out_put:
de0fa95c 1663 fput_light(sock->file, fput_needed);
4387ff75 1664out:
1da177e4
LT
1665 return err;
1666}
1667
1668/*
89bddce5 1669 * Send a datagram down a socket.
1da177e4
LT
1670 */
1671
3e0fa65f 1672SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1673 unsigned int, flags)
1da177e4
LT
1674{
1675 return sys_sendto(fd, buff, len, flags, NULL, 0);
1676}
1677
1678/*
89bddce5 1679 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1680 * sender. We verify the buffers are writable and if needed move the
1681 * sender address from kernel to user space.
1682 */
1683
3e0fa65f 1684SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1685 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1686 int __user *, addr_len)
1da177e4
LT
1687{
1688 struct socket *sock;
1689 struct iovec iov;
1690 struct msghdr msg;
230b1839 1691 struct sockaddr_storage address;
89bddce5 1692 int err, err2;
6cb153ca
BL
1693 int fput_needed;
1694
602bd0e9
AV
1695 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1696 if (unlikely(err))
1697 return err;
de0fa95c 1698 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1699 if (!sock)
de0fa95c 1700 goto out;
1da177e4 1701
89bddce5
SH
1702 msg.msg_control = NULL;
1703 msg.msg_controllen = 0;
f3d33426
HFS
1704 /* Save some cycles and don't copy the address if not needed */
1705 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1706 /* We assume all kernel code knows the size of sockaddr_storage */
1707 msg.msg_namelen = 0;
1da177e4
LT
1708 if (sock->file->f_flags & O_NONBLOCK)
1709 flags |= MSG_DONTWAIT;
602bd0e9 1710 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1711
89bddce5 1712 if (err >= 0 && addr != NULL) {
43db362d 1713 err2 = move_addr_to_user(&address,
230b1839 1714 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1715 if (err2 < 0)
1716 err = err2;
1da177e4 1717 }
de0fa95c
PE
1718
1719 fput_light(sock->file, fput_needed);
4387ff75 1720out:
1da177e4
LT
1721 return err;
1722}
1723
1724/*
89bddce5 1725 * Receive a datagram from a socket.
1da177e4
LT
1726 */
1727
b7c0ddf5
JG
1728SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1729 unsigned int, flags)
1da177e4
LT
1730{
1731 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1732}
1733
1734/*
1735 * Set a socket option. Because we don't know the option lengths we have
1736 * to pass the user mode parameter for the protocols to sort out.
1737 */
1738
20f37034
HC
1739SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1740 char __user *, optval, int, optlen)
1da177e4 1741{
6cb153ca 1742 int err, fput_needed;
1da177e4
LT
1743 struct socket *sock;
1744
1745 if (optlen < 0)
1746 return -EINVAL;
89bddce5
SH
1747
1748 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1749 if (sock != NULL) {
1750 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1751 if (err)
1752 goto out_put;
1da177e4
LT
1753
1754 if (level == SOL_SOCKET)
89bddce5
SH
1755 err =
1756 sock_setsockopt(sock, level, optname, optval,
1757 optlen);
1da177e4 1758 else
89bddce5
SH
1759 err =
1760 sock->ops->setsockopt(sock, level, optname, optval,
1761 optlen);
6cb153ca
BL
1762out_put:
1763 fput_light(sock->file, fput_needed);
1da177e4
LT
1764 }
1765 return err;
1766}
1767
1768/*
1769 * Get a socket option. Because we don't know the option lengths we have
1770 * to pass a user mode parameter for the protocols to sort out.
1771 */
1772
20f37034
HC
1773SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1774 char __user *, optval, int __user *, optlen)
1da177e4 1775{
6cb153ca 1776 int err, fput_needed;
1da177e4
LT
1777 struct socket *sock;
1778
89bddce5
SH
1779 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1780 if (sock != NULL) {
6cb153ca
BL
1781 err = security_socket_getsockopt(sock, level, optname);
1782 if (err)
1783 goto out_put;
1da177e4
LT
1784
1785 if (level == SOL_SOCKET)
89bddce5
SH
1786 err =
1787 sock_getsockopt(sock, level, optname, optval,
1788 optlen);
1da177e4 1789 else
89bddce5
SH
1790 err =
1791 sock->ops->getsockopt(sock, level, optname, optval,
1792 optlen);
6cb153ca
BL
1793out_put:
1794 fput_light(sock->file, fput_needed);
1da177e4
LT
1795 }
1796 return err;
1797}
1798
1da177e4
LT
1799/*
1800 * Shutdown a socket.
1801 */
1802
754fe8d2 1803SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1804{
6cb153ca 1805 int err, fput_needed;
1da177e4
LT
1806 struct socket *sock;
1807
89bddce5
SH
1808 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1809 if (sock != NULL) {
1da177e4 1810 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1811 if (!err)
1812 err = sock->ops->shutdown(sock, how);
1813 fput_light(sock->file, fput_needed);
1da177e4
LT
1814 }
1815 return err;
1816}
1817
89bddce5 1818/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1819 * fields which are the same type (int / unsigned) on our platforms.
1820 */
1821#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1822#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1823#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1824
c71d8ebe
TH
1825struct used_address {
1826 struct sockaddr_storage name;
1827 unsigned int name_len;
1828};
1829
da184284
AV
1830static int copy_msghdr_from_user(struct msghdr *kmsg,
1831 struct user_msghdr __user *umsg,
1832 struct sockaddr __user **save_addr,
1833 struct iovec **iov)
1661bf36 1834{
08adb7da
AV
1835 struct sockaddr __user *uaddr;
1836 struct iovec __user *uiov;
c0371da6 1837 size_t nr_segs;
08adb7da
AV
1838 ssize_t err;
1839
1840 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1841 __get_user(uaddr, &umsg->msg_name) ||
1842 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1843 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1844 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1845 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1846 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1847 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1848 return -EFAULT;
dbb490b9 1849
08adb7da 1850 if (!uaddr)
6a2a2b3a
AS
1851 kmsg->msg_namelen = 0;
1852
dbb490b9
ML
1853 if (kmsg->msg_namelen < 0)
1854 return -EINVAL;
1855
1661bf36 1856 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1857 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1858
1859 if (save_addr)
1860 *save_addr = uaddr;
1861
1862 if (uaddr && kmsg->msg_namelen) {
1863 if (!save_addr) {
1864 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1865 kmsg->msg_name);
1866 if (err < 0)
1867 return err;
1868 }
1869 } else {
1870 kmsg->msg_name = NULL;
1871 kmsg->msg_namelen = 0;
1872 }
1873
c0371da6 1874 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1875 return -EMSGSIZE;
1876
0345f931 1877 kmsg->msg_iocb = NULL;
1878
da184284
AV
1879 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1880 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1881}
1882
666547ff 1883static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1884 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1885 struct used_address *used_address)
1da177e4 1886{
89bddce5
SH
1887 struct compat_msghdr __user *msg_compat =
1888 (struct compat_msghdr __user *)msg;
230b1839 1889 struct sockaddr_storage address;
1da177e4 1890 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1891 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1892 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1893 /* 20 is size of ipv6_pktinfo */
1da177e4 1894 unsigned char *ctl_buf = ctl;
d8725c86 1895 int ctl_len;
08adb7da 1896 ssize_t err;
89bddce5 1897
08adb7da 1898 msg_sys->msg_name = &address;
1da177e4 1899
08449320 1900 if (MSG_CMSG_COMPAT & flags)
08adb7da 1901 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1902 else
08adb7da 1903 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1904 if (err < 0)
da184284 1905 return err;
1da177e4
LT
1906
1907 err = -ENOBUFS;
1908
228e548e 1909 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1910 goto out_freeiov;
228e548e 1911 ctl_len = msg_sys->msg_controllen;
1da177e4 1912 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1913 err =
228e548e 1914 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1915 sizeof(ctl));
1da177e4
LT
1916 if (err)
1917 goto out_freeiov;
228e548e
AB
1918 ctl_buf = msg_sys->msg_control;
1919 ctl_len = msg_sys->msg_controllen;
1da177e4 1920 } else if (ctl_len) {
89bddce5 1921 if (ctl_len > sizeof(ctl)) {
1da177e4 1922 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1923 if (ctl_buf == NULL)
1da177e4
LT
1924 goto out_freeiov;
1925 }
1926 err = -EFAULT;
1927 /*
228e548e 1928 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1929 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1930 * checking falls down on this.
1931 */
fb8621bb 1932 if (copy_from_user(ctl_buf,
228e548e 1933 (void __user __force *)msg_sys->msg_control,
89bddce5 1934 ctl_len))
1da177e4 1935 goto out_freectl;
228e548e 1936 msg_sys->msg_control = ctl_buf;
1da177e4 1937 }
228e548e 1938 msg_sys->msg_flags = flags;
1da177e4
LT
1939
1940 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1941 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1942 /*
1943 * If this is sendmmsg() and current destination address is same as
1944 * previously succeeded address, omit asking LSM's decision.
1945 * used_address->name_len is initialized to UINT_MAX so that the first
1946 * destination address never matches.
1947 */
bc909d9d
MD
1948 if (used_address && msg_sys->msg_name &&
1949 used_address->name_len == msg_sys->msg_namelen &&
1950 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1951 used_address->name_len)) {
d8725c86 1952 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1953 goto out_freectl;
1954 }
d8725c86 1955 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1956 /*
1957 * If this is sendmmsg() and sending to current destination address was
1958 * successful, remember it.
1959 */
1960 if (used_address && err >= 0) {
1961 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1962 if (msg_sys->msg_name)
1963 memcpy(&used_address->name, msg_sys->msg_name,
1964 used_address->name_len);
c71d8ebe 1965 }
1da177e4
LT
1966
1967out_freectl:
89bddce5 1968 if (ctl_buf != ctl)
1da177e4
LT
1969 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1970out_freeiov:
da184284 1971 kfree(iov);
228e548e
AB
1972 return err;
1973}
1974
1975/*
1976 * BSD sendmsg interface
1977 */
1978
666547ff 1979long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1980{
1981 int fput_needed, err;
1982 struct msghdr msg_sys;
1be374a0
AL
1983 struct socket *sock;
1984
1be374a0 1985 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1986 if (!sock)
1987 goto out;
1988
a7526eb5 1989 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 1990
6cb153ca 1991 fput_light(sock->file, fput_needed);
89bddce5 1992out:
1da177e4
LT
1993 return err;
1994}
1995
666547ff 1996SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1997{
1998 if (flags & MSG_CMSG_COMPAT)
1999 return -EINVAL;
2000 return __sys_sendmsg(fd, msg, flags);
2001}
2002
228e548e
AB
2003/*
2004 * Linux sendmmsg interface
2005 */
2006
2007int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2008 unsigned int flags)
2009{
2010 int fput_needed, err, datagrams;
2011 struct socket *sock;
2012 struct mmsghdr __user *entry;
2013 struct compat_mmsghdr __user *compat_entry;
2014 struct msghdr msg_sys;
c71d8ebe 2015 struct used_address used_address;
228e548e 2016
98382f41
AB
2017 if (vlen > UIO_MAXIOV)
2018 vlen = UIO_MAXIOV;
228e548e
AB
2019
2020 datagrams = 0;
2021
2022 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2023 if (!sock)
2024 return err;
2025
c71d8ebe 2026 used_address.name_len = UINT_MAX;
228e548e
AB
2027 entry = mmsg;
2028 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2029 err = 0;
228e548e
AB
2030
2031 while (datagrams < vlen) {
228e548e 2032 if (MSG_CMSG_COMPAT & flags) {
666547ff 2033 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2034 &msg_sys, flags, &used_address);
228e548e
AB
2035 if (err < 0)
2036 break;
2037 err = __put_user(err, &compat_entry->msg_len);
2038 ++compat_entry;
2039 } else {
a7526eb5 2040 err = ___sys_sendmsg(sock,
666547ff 2041 (struct user_msghdr __user *)entry,
a7526eb5 2042 &msg_sys, flags, &used_address);
228e548e
AB
2043 if (err < 0)
2044 break;
2045 err = put_user(err, &entry->msg_len);
2046 ++entry;
2047 }
2048
2049 if (err)
2050 break;
2051 ++datagrams;
2052 }
2053
228e548e
AB
2054 fput_light(sock->file, fput_needed);
2055
728ffb86
AB
2056 /* We only return an error if no datagrams were able to be sent */
2057 if (datagrams != 0)
228e548e
AB
2058 return datagrams;
2059
228e548e
AB
2060 return err;
2061}
2062
2063SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2064 unsigned int, vlen, unsigned int, flags)
2065{
1be374a0
AL
2066 if (flags & MSG_CMSG_COMPAT)
2067 return -EINVAL;
228e548e
AB
2068 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2069}
2070
666547ff 2071static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2072 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2073{
89bddce5
SH
2074 struct compat_msghdr __user *msg_compat =
2075 (struct compat_msghdr __user *)msg;
1da177e4 2076 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2077 struct iovec *iov = iovstack;
1da177e4 2078 unsigned long cmsg_ptr;
08adb7da
AV
2079 int total_len, len;
2080 ssize_t err;
1da177e4
LT
2081
2082 /* kernel mode address */
230b1839 2083 struct sockaddr_storage addr;
1da177e4
LT
2084
2085 /* user mode address pointers */
2086 struct sockaddr __user *uaddr;
08adb7da 2087 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2088
08adb7da 2089 msg_sys->msg_name = &addr;
1da177e4 2090
f3d33426 2091 if (MSG_CMSG_COMPAT & flags)
08adb7da 2092 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2093 else
08adb7da 2094 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2095 if (err < 0)
da184284
AV
2096 return err;
2097 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2098
a2e27255
ACM
2099 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2100 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2101
f3d33426
HFS
2102 /* We assume all kernel code knows the size of sockaddr_storage */
2103 msg_sys->msg_namelen = 0;
2104
1da177e4
LT
2105 if (sock->file->f_flags & O_NONBLOCK)
2106 flags |= MSG_DONTWAIT;
a2e27255
ACM
2107 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2108 total_len, flags);
1da177e4
LT
2109 if (err < 0)
2110 goto out_freeiov;
2111 len = err;
2112
2113 if (uaddr != NULL) {
43db362d 2114 err = move_addr_to_user(&addr,
a2e27255 2115 msg_sys->msg_namelen, uaddr,
89bddce5 2116 uaddr_len);
1da177e4
LT
2117 if (err < 0)
2118 goto out_freeiov;
2119 }
a2e27255 2120 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2121 COMPAT_FLAGS(msg));
1da177e4
LT
2122 if (err)
2123 goto out_freeiov;
2124 if (MSG_CMSG_COMPAT & flags)
a2e27255 2125 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2126 &msg_compat->msg_controllen);
2127 else
a2e27255 2128 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2129 &msg->msg_controllen);
2130 if (err)
2131 goto out_freeiov;
2132 err = len;
2133
2134out_freeiov:
da184284 2135 kfree(iov);
a2e27255
ACM
2136 return err;
2137}
2138
2139/*
2140 * BSD recvmsg interface
2141 */
2142
666547ff 2143long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2144{
2145 int fput_needed, err;
2146 struct msghdr msg_sys;
1be374a0
AL
2147 struct socket *sock;
2148
1be374a0 2149 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2150 if (!sock)
2151 goto out;
2152
a7526eb5 2153 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2154
6cb153ca 2155 fput_light(sock->file, fput_needed);
1da177e4
LT
2156out:
2157 return err;
2158}
2159
666547ff 2160SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2161 unsigned int, flags)
2162{
2163 if (flags & MSG_CMSG_COMPAT)
2164 return -EINVAL;
2165 return __sys_recvmsg(fd, msg, flags);
2166}
2167
a2e27255
ACM
2168/*
2169 * Linux recvmmsg interface
2170 */
2171
2172int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2173 unsigned int flags, struct timespec *timeout)
2174{
2175 int fput_needed, err, datagrams;
2176 struct socket *sock;
2177 struct mmsghdr __user *entry;
d7256d0e 2178 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2179 struct msghdr msg_sys;
2180 struct timespec end_time;
2181
2182 if (timeout &&
2183 poll_select_set_timeout(&end_time, timeout->tv_sec,
2184 timeout->tv_nsec))
2185 return -EINVAL;
2186
2187 datagrams = 0;
2188
2189 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2190 if (!sock)
2191 return err;
2192
2193 err = sock_error(sock->sk);
2194 if (err)
2195 goto out_put;
2196
2197 entry = mmsg;
d7256d0e 2198 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2199
2200 while (datagrams < vlen) {
2201 /*
2202 * No need to ask LSM for more than the first datagram.
2203 */
d7256d0e 2204 if (MSG_CMSG_COMPAT & flags) {
666547ff 2205 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2206 &msg_sys, flags & ~MSG_WAITFORONE,
2207 datagrams);
d7256d0e
JMG
2208 if (err < 0)
2209 break;
2210 err = __put_user(err, &compat_entry->msg_len);
2211 ++compat_entry;
2212 } else {
a7526eb5 2213 err = ___sys_recvmsg(sock,
666547ff 2214 (struct user_msghdr __user *)entry,
a7526eb5
AL
2215 &msg_sys, flags & ~MSG_WAITFORONE,
2216 datagrams);
d7256d0e
JMG
2217 if (err < 0)
2218 break;
2219 err = put_user(err, &entry->msg_len);
2220 ++entry;
2221 }
2222
a2e27255
ACM
2223 if (err)
2224 break;
a2e27255
ACM
2225 ++datagrams;
2226
71c5c159
BB
2227 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2228 if (flags & MSG_WAITFORONE)
2229 flags |= MSG_DONTWAIT;
2230
a2e27255
ACM
2231 if (timeout) {
2232 ktime_get_ts(timeout);
2233 *timeout = timespec_sub(end_time, *timeout);
2234 if (timeout->tv_sec < 0) {
2235 timeout->tv_sec = timeout->tv_nsec = 0;
2236 break;
2237 }
2238
2239 /* Timeout, return less than vlen datagrams */
2240 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2241 break;
2242 }
2243
2244 /* Out of band data, return right away */
2245 if (msg_sys.msg_flags & MSG_OOB)
2246 break;
2247 }
2248
2249out_put:
2250 fput_light(sock->file, fput_needed);
1da177e4 2251
a2e27255
ACM
2252 if (err == 0)
2253 return datagrams;
2254
2255 if (datagrams != 0) {
2256 /*
2257 * We may return less entries than requested (vlen) if the
2258 * sock is non block and there aren't enough datagrams...
2259 */
2260 if (err != -EAGAIN) {
2261 /*
2262 * ... or if recvmsg returns an error after we
2263 * received some datagrams, where we record the
2264 * error to return on the next call or if the
2265 * app asks about it using getsockopt(SO_ERROR).
2266 */
2267 sock->sk->sk_err = -err;
2268 }
2269
2270 return datagrams;
2271 }
2272
2273 return err;
2274}
2275
2276SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2277 unsigned int, vlen, unsigned int, flags,
2278 struct timespec __user *, timeout)
2279{
2280 int datagrams;
2281 struct timespec timeout_sys;
2282
1be374a0
AL
2283 if (flags & MSG_CMSG_COMPAT)
2284 return -EINVAL;
2285
a2e27255
ACM
2286 if (!timeout)
2287 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2288
2289 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2290 return -EFAULT;
2291
2292 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2293
2294 if (datagrams > 0 &&
2295 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2296 datagrams = -EFAULT;
2297
2298 return datagrams;
2299}
2300
2301#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2302/* Argument list sizes for sys_socketcall */
2303#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2304static const unsigned char nargs[21] = {
c6d409cf
ED
2305 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2306 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2307 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2308 AL(4), AL(5), AL(4)
89bddce5
SH
2309};
2310
1da177e4
LT
2311#undef AL
2312
2313/*
89bddce5 2314 * System call vectors.
1da177e4
LT
2315 *
2316 * Argument checking cleaned up. Saved 20% in size.
2317 * This function doesn't need to set the kernel lock because
89bddce5 2318 * it is set by the callees.
1da177e4
LT
2319 */
2320
3e0fa65f 2321SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2322{
2950fa9d 2323 unsigned long a[AUDITSC_ARGS];
89bddce5 2324 unsigned long a0, a1;
1da177e4 2325 int err;
47379052 2326 unsigned int len;
1da177e4 2327
228e548e 2328 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2329 return -EINVAL;
2330
47379052
AV
2331 len = nargs[call];
2332 if (len > sizeof(a))
2333 return -EINVAL;
2334
1da177e4 2335 /* copy_from_user should be SMP safe. */
47379052 2336 if (copy_from_user(a, args, len))
1da177e4 2337 return -EFAULT;
3ec3b2fb 2338
2950fa9d
CG
2339 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2340 if (err)
2341 return err;
3ec3b2fb 2342
89bddce5
SH
2343 a0 = a[0];
2344 a1 = a[1];
2345
2346 switch (call) {
2347 case SYS_SOCKET:
2348 err = sys_socket(a0, a1, a[2]);
2349 break;
2350 case SYS_BIND:
2351 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2352 break;
2353 case SYS_CONNECT:
2354 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2355 break;
2356 case SYS_LISTEN:
2357 err = sys_listen(a0, a1);
2358 break;
2359 case SYS_ACCEPT:
de11defe
UD
2360 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2361 (int __user *)a[2], 0);
89bddce5
SH
2362 break;
2363 case SYS_GETSOCKNAME:
2364 err =
2365 sys_getsockname(a0, (struct sockaddr __user *)a1,
2366 (int __user *)a[2]);
2367 break;
2368 case SYS_GETPEERNAME:
2369 err =
2370 sys_getpeername(a0, (struct sockaddr __user *)a1,
2371 (int __user *)a[2]);
2372 break;
2373 case SYS_SOCKETPAIR:
2374 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2375 break;
2376 case SYS_SEND:
2377 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2378 break;
2379 case SYS_SENDTO:
2380 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2381 (struct sockaddr __user *)a[4], a[5]);
2382 break;
2383 case SYS_RECV:
2384 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2385 break;
2386 case SYS_RECVFROM:
2387 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2388 (struct sockaddr __user *)a[4],
2389 (int __user *)a[5]);
2390 break;
2391 case SYS_SHUTDOWN:
2392 err = sys_shutdown(a0, a1);
2393 break;
2394 case SYS_SETSOCKOPT:
2395 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2396 break;
2397 case SYS_GETSOCKOPT:
2398 err =
2399 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2400 (int __user *)a[4]);
2401 break;
2402 case SYS_SENDMSG:
666547ff 2403 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2404 break;
228e548e
AB
2405 case SYS_SENDMMSG:
2406 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2407 break;
89bddce5 2408 case SYS_RECVMSG:
666547ff 2409 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2410 break;
a2e27255
ACM
2411 case SYS_RECVMMSG:
2412 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2413 (struct timespec __user *)a[4]);
2414 break;
de11defe
UD
2415 case SYS_ACCEPT4:
2416 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2417 (int __user *)a[2], a[3]);
aaca0bdc 2418 break;
89bddce5
SH
2419 default:
2420 err = -EINVAL;
2421 break;
1da177e4
LT
2422 }
2423 return err;
2424}
2425
89bddce5 2426#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2427
55737fda
SH
2428/**
2429 * sock_register - add a socket protocol handler
2430 * @ops: description of protocol
2431 *
1da177e4
LT
2432 * This function is called by a protocol handler that wants to
2433 * advertise its address family, and have it linked into the
e793c0f7 2434 * socket interface. The value ops->family corresponds to the
55737fda 2435 * socket system call protocol family.
1da177e4 2436 */
f0fd27d4 2437int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2438{
2439 int err;
2440
2441 if (ops->family >= NPROTO) {
3410f22e 2442 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2443 return -ENOBUFS;
2444 }
55737fda
SH
2445
2446 spin_lock(&net_family_lock);
190683a9
ED
2447 if (rcu_dereference_protected(net_families[ops->family],
2448 lockdep_is_held(&net_family_lock)))
55737fda
SH
2449 err = -EEXIST;
2450 else {
cf778b00 2451 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2452 err = 0;
2453 }
55737fda
SH
2454 spin_unlock(&net_family_lock);
2455
3410f22e 2456 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2457 return err;
2458}
c6d409cf 2459EXPORT_SYMBOL(sock_register);
1da177e4 2460
55737fda
SH
2461/**
2462 * sock_unregister - remove a protocol handler
2463 * @family: protocol family to remove
2464 *
1da177e4
LT
2465 * This function is called by a protocol handler that wants to
2466 * remove its address family, and have it unlinked from the
55737fda
SH
2467 * new socket creation.
2468 *
2469 * If protocol handler is a module, then it can use module reference
2470 * counts to protect against new references. If protocol handler is not
2471 * a module then it needs to provide its own protection in
2472 * the ops->create routine.
1da177e4 2473 */
f0fd27d4 2474void sock_unregister(int family)
1da177e4 2475{
f0fd27d4 2476 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2477
55737fda 2478 spin_lock(&net_family_lock);
a9b3cd7f 2479 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2480 spin_unlock(&net_family_lock);
2481
2482 synchronize_rcu();
2483
3410f22e 2484 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2485}
c6d409cf 2486EXPORT_SYMBOL(sock_unregister);
1da177e4 2487
77d76ea3 2488static int __init sock_init(void)
1da177e4 2489{
b3e19d92 2490 int err;
2ca794e5
EB
2491 /*
2492 * Initialize the network sysctl infrastructure.
2493 */
2494 err = net_sysctl_init();
2495 if (err)
2496 goto out;
b3e19d92 2497
1da177e4 2498 /*
89bddce5 2499 * Initialize skbuff SLAB cache
1da177e4
LT
2500 */
2501 skb_init();
1da177e4
LT
2502
2503 /*
89bddce5 2504 * Initialize the protocols module.
1da177e4
LT
2505 */
2506
2507 init_inodecache();
b3e19d92
NP
2508
2509 err = register_filesystem(&sock_fs_type);
2510 if (err)
2511 goto out_fs;
1da177e4 2512 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2513 if (IS_ERR(sock_mnt)) {
2514 err = PTR_ERR(sock_mnt);
2515 goto out_mount;
2516 }
77d76ea3
AK
2517
2518 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2519 */
2520
2521#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2522 err = netfilter_init();
2523 if (err)
2524 goto out;
1da177e4 2525#endif
cbeb321a 2526
408eccce 2527 ptp_classifier_init();
c1f19b51 2528
b3e19d92
NP
2529out:
2530 return err;
2531
2532out_mount:
2533 unregister_filesystem(&sock_fs_type);
2534out_fs:
2535 goto out;
1da177e4
LT
2536}
2537
77d76ea3
AK
2538core_initcall(sock_init); /* early initcall */
2539
1da177e4
LT
2540#ifdef CONFIG_PROC_FS
2541void socket_seq_show(struct seq_file *seq)
2542{
2543 int cpu;
2544 int counter = 0;
2545
6f912042 2546 for_each_possible_cpu(cpu)
89bddce5 2547 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2548
2549 /* It can be negative, by the way. 8) */
2550 if (counter < 0)
2551 counter = 0;
2552
2553 seq_printf(seq, "sockets: used %d\n", counter);
2554}
89bddce5 2555#endif /* CONFIG_PROC_FS */
1da177e4 2556
89bbfc95 2557#ifdef CONFIG_COMPAT
6b96018b 2558static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2559 unsigned int cmd, void __user *up)
7a229387 2560{
7a229387
AB
2561 mm_segment_t old_fs = get_fs();
2562 struct timeval ktv;
2563 int err;
2564
2565 set_fs(KERNEL_DS);
6b96018b 2566 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2567 set_fs(old_fs);
644595f8 2568 if (!err)
ed6fe9d6 2569 err = compat_put_timeval(&ktv, up);
644595f8 2570
7a229387
AB
2571 return err;
2572}
2573
6b96018b 2574static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2575 unsigned int cmd, void __user *up)
7a229387 2576{
7a229387
AB
2577 mm_segment_t old_fs = get_fs();
2578 struct timespec kts;
2579 int err;
2580
2581 set_fs(KERNEL_DS);
6b96018b 2582 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2583 set_fs(old_fs);
644595f8 2584 if (!err)
ed6fe9d6 2585 err = compat_put_timespec(&kts, up);
644595f8 2586
7a229387
AB
2587 return err;
2588}
2589
6b96018b 2590static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2591{
2592 struct ifreq __user *uifr;
2593 int err;
2594
2595 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2596 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2597 return -EFAULT;
2598
6b96018b 2599 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2600 if (err)
2601 return err;
2602
6b96018b 2603 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2604 return -EFAULT;
2605
2606 return 0;
2607}
2608
6b96018b 2609static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2610{
6b96018b 2611 struct compat_ifconf ifc32;
7a229387
AB
2612 struct ifconf ifc;
2613 struct ifconf __user *uifc;
6b96018b 2614 struct compat_ifreq __user *ifr32;
7a229387
AB
2615 struct ifreq __user *ifr;
2616 unsigned int i, j;
2617 int err;
2618
6b96018b 2619 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2620 return -EFAULT;
2621
43da5f2e 2622 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2623 if (ifc32.ifcbuf == 0) {
2624 ifc32.ifc_len = 0;
2625 ifc.ifc_len = 0;
2626 ifc.ifc_req = NULL;
2627 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2628 } else {
c6d409cf
ED
2629 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2630 sizeof(struct ifreq);
7a229387
AB
2631 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2632 ifc.ifc_len = len;
2633 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2634 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2635 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2636 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2637 return -EFAULT;
2638 ifr++;
2639 ifr32++;
2640 }
2641 }
2642 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2643 return -EFAULT;
2644
6b96018b 2645 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2646 if (err)
2647 return err;
2648
2649 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2650 return -EFAULT;
2651
2652 ifr = ifc.ifc_req;
2653 ifr32 = compat_ptr(ifc32.ifcbuf);
2654 for (i = 0, j = 0;
c6d409cf
ED
2655 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2656 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2657 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2658 return -EFAULT;
2659 ifr32++;
2660 ifr++;
2661 }
2662
2663 if (ifc32.ifcbuf == 0) {
2664 /* Translate from 64-bit structure multiple to
2665 * a 32-bit one.
2666 */
2667 i = ifc.ifc_len;
6b96018b 2668 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2669 ifc32.ifc_len = i;
2670 } else {
2671 ifc32.ifc_len = i;
2672 }
6b96018b 2673 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2674 return -EFAULT;
2675
2676 return 0;
2677}
2678
6b96018b 2679static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2680{
3a7da39d
BH
2681 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2682 bool convert_in = false, convert_out = false;
2683 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2684 struct ethtool_rxnfc __user *rxnfc;
7a229387 2685 struct ifreq __user *ifr;
3a7da39d
BH
2686 u32 rule_cnt = 0, actual_rule_cnt;
2687 u32 ethcmd;
7a229387 2688 u32 data;
3a7da39d 2689 int ret;
7a229387 2690
3a7da39d
BH
2691 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2692 return -EFAULT;
7a229387 2693
3a7da39d
BH
2694 compat_rxnfc = compat_ptr(data);
2695
2696 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2697 return -EFAULT;
2698
3a7da39d
BH
2699 /* Most ethtool structures are defined without padding.
2700 * Unfortunately struct ethtool_rxnfc is an exception.
2701 */
2702 switch (ethcmd) {
2703 default:
2704 break;
2705 case ETHTOOL_GRXCLSRLALL:
2706 /* Buffer size is variable */
2707 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2708 return -EFAULT;
2709 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2710 return -ENOMEM;
2711 buf_size += rule_cnt * sizeof(u32);
2712 /* fall through */
2713 case ETHTOOL_GRXRINGS:
2714 case ETHTOOL_GRXCLSRLCNT:
2715 case ETHTOOL_GRXCLSRULE:
55664f32 2716 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2717 convert_out = true;
2718 /* fall through */
2719 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2720 buf_size += sizeof(struct ethtool_rxnfc);
2721 convert_in = true;
2722 break;
2723 }
2724
2725 ifr = compat_alloc_user_space(buf_size);
954b1244 2726 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2727
2728 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2729 return -EFAULT;
2730
3a7da39d
BH
2731 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2732 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2733 return -EFAULT;
2734
3a7da39d 2735 if (convert_in) {
127fe533 2736 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2737 * fs.ring_cookie and at the end of fs, but nowhere else.
2738 */
127fe533
AD
2739 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2740 sizeof(compat_rxnfc->fs.m_ext) !=
2741 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2742 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2743 BUILD_BUG_ON(
2744 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2745 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2746 offsetof(struct ethtool_rxnfc, fs.location) -
2747 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2748
2749 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2750 (void __user *)(&rxnfc->fs.m_ext + 1) -
2751 (void __user *)rxnfc) ||
3a7da39d
BH
2752 copy_in_user(&rxnfc->fs.ring_cookie,
2753 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2754 (void __user *)(&rxnfc->fs.location + 1) -
2755 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2756 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2757 sizeof(rxnfc->rule_cnt)))
2758 return -EFAULT;
2759 }
2760
2761 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2762 if (ret)
2763 return ret;
2764
2765 if (convert_out) {
2766 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2767 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2768 (const void __user *)rxnfc) ||
3a7da39d
BH
2769 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2770 &rxnfc->fs.ring_cookie,
954b1244
SH
2771 (const void __user *)(&rxnfc->fs.location + 1) -
2772 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2773 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2774 sizeof(rxnfc->rule_cnt)))
2775 return -EFAULT;
2776
2777 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2778 /* As an optimisation, we only copy the actual
2779 * number of rules that the underlying
2780 * function returned. Since Mallory might
2781 * change the rule count in user memory, we
2782 * check that it is less than the rule count
2783 * originally given (as the user buffer size),
2784 * which has been range-checked.
2785 */
2786 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2787 return -EFAULT;
2788 if (actual_rule_cnt < rule_cnt)
2789 rule_cnt = actual_rule_cnt;
2790 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2791 &rxnfc->rule_locs[0],
2792 rule_cnt * sizeof(u32)))
2793 return -EFAULT;
2794 }
2795 }
2796
2797 return 0;
7a229387
AB
2798}
2799
7a50a240
AB
2800static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2801{
2802 void __user *uptr;
2803 compat_uptr_t uptr32;
2804 struct ifreq __user *uifr;
2805
c6d409cf 2806 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2807 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2808 return -EFAULT;
2809
2810 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2811 return -EFAULT;
2812
2813 uptr = compat_ptr(uptr32);
2814
2815 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2816 return -EFAULT;
2817
2818 return dev_ioctl(net, SIOCWANDEV, uifr);
2819}
2820
6b96018b
AB
2821static int bond_ioctl(struct net *net, unsigned int cmd,
2822 struct compat_ifreq __user *ifr32)
7a229387
AB
2823{
2824 struct ifreq kifr;
7a229387
AB
2825 mm_segment_t old_fs;
2826 int err;
7a229387
AB
2827
2828 switch (cmd) {
2829 case SIOCBONDENSLAVE:
2830 case SIOCBONDRELEASE:
2831 case SIOCBONDSETHWADDR:
2832 case SIOCBONDCHANGEACTIVE:
6b96018b 2833 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2834 return -EFAULT;
2835
2836 old_fs = get_fs();
c6d409cf 2837 set_fs(KERNEL_DS);
c3f52ae6 2838 err = dev_ioctl(net, cmd,
2839 (struct ifreq __user __force *) &kifr);
c6d409cf 2840 set_fs(old_fs);
7a229387
AB
2841
2842 return err;
7a229387 2843 default:
07d106d0 2844 return -ENOIOCTLCMD;
ccbd6a5a 2845 }
7a229387
AB
2846}
2847
590d4693
BH
2848/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2849static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2850 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2851{
2852 struct ifreq __user *u_ifreq64;
7a229387
AB
2853 char tmp_buf[IFNAMSIZ];
2854 void __user *data64;
2855 u32 data32;
2856
2857 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2858 IFNAMSIZ))
2859 return -EFAULT;
417c3522 2860 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2861 return -EFAULT;
2862 data64 = compat_ptr(data32);
2863
2864 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2865
7a229387
AB
2866 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2867 IFNAMSIZ))
2868 return -EFAULT;
417c3522 2869 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2870 return -EFAULT;
2871
6b96018b 2872 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2873}
2874
6b96018b
AB
2875static int dev_ifsioc(struct net *net, struct socket *sock,
2876 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2877{
a2116ed2 2878 struct ifreq __user *uifr;
7a229387
AB
2879 int err;
2880
a2116ed2
AB
2881 uifr = compat_alloc_user_space(sizeof(*uifr));
2882 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2883 return -EFAULT;
2884
2885 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2886
7a229387
AB
2887 if (!err) {
2888 switch (cmd) {
2889 case SIOCGIFFLAGS:
2890 case SIOCGIFMETRIC:
2891 case SIOCGIFMTU:
2892 case SIOCGIFMEM:
2893 case SIOCGIFHWADDR:
2894 case SIOCGIFINDEX:
2895 case SIOCGIFADDR:
2896 case SIOCGIFBRDADDR:
2897 case SIOCGIFDSTADDR:
2898 case SIOCGIFNETMASK:
fab2532b 2899 case SIOCGIFPFLAGS:
7a229387 2900 case SIOCGIFTXQLEN:
fab2532b
AB
2901 case SIOCGMIIPHY:
2902 case SIOCGMIIREG:
a2116ed2 2903 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2904 err = -EFAULT;
2905 break;
2906 }
2907 }
2908 return err;
2909}
2910
a2116ed2
AB
2911static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2912 struct compat_ifreq __user *uifr32)
2913{
2914 struct ifreq ifr;
2915 struct compat_ifmap __user *uifmap32;
2916 mm_segment_t old_fs;
2917 int err;
2918
2919 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2920 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2921 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2922 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2923 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2924 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2925 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2926 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2927 if (err)
2928 return -EFAULT;
2929
2930 old_fs = get_fs();
c6d409cf 2931 set_fs(KERNEL_DS);
c3f52ae6 2932 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2933 set_fs(old_fs);
a2116ed2
AB
2934
2935 if (cmd == SIOCGIFMAP && !err) {
2936 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2937 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2938 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2939 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2940 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2941 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2942 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2943 if (err)
2944 err = -EFAULT;
2945 }
2946 return err;
2947}
2948
7a229387 2949struct rtentry32 {
c6d409cf 2950 u32 rt_pad1;
7a229387
AB
2951 struct sockaddr rt_dst; /* target address */
2952 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2953 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2954 unsigned short rt_flags;
2955 short rt_pad2;
2956 u32 rt_pad3;
2957 unsigned char rt_tos;
2958 unsigned char rt_class;
2959 short rt_pad4;
2960 short rt_metric; /* +1 for binary compatibility! */
7a229387 2961 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2962 u32 rt_mtu; /* per route MTU/Window */
2963 u32 rt_window; /* Window clamping */
7a229387
AB
2964 unsigned short rt_irtt; /* Initial RTT */
2965};
2966
2967struct in6_rtmsg32 {
2968 struct in6_addr rtmsg_dst;
2969 struct in6_addr rtmsg_src;
2970 struct in6_addr rtmsg_gateway;
2971 u32 rtmsg_type;
2972 u16 rtmsg_dst_len;
2973 u16 rtmsg_src_len;
2974 u32 rtmsg_metric;
2975 u32 rtmsg_info;
2976 u32 rtmsg_flags;
2977 s32 rtmsg_ifindex;
2978};
2979
6b96018b
AB
2980static int routing_ioctl(struct net *net, struct socket *sock,
2981 unsigned int cmd, void __user *argp)
7a229387
AB
2982{
2983 int ret;
2984 void *r = NULL;
2985 struct in6_rtmsg r6;
2986 struct rtentry r4;
2987 char devname[16];
2988 u32 rtdev;
2989 mm_segment_t old_fs = get_fs();
2990
6b96018b
AB
2991 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2992 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2993 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2994 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2995 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2996 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2997 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2998 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2999 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3000 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3001 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3002
3003 r = (void *) &r6;
3004 } else { /* ipv4 */
6b96018b 3005 struct rtentry32 __user *ur4 = argp;
c6d409cf 3006 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3007 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3008 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3009 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3010 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3011 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3012 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3013 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3014 if (rtdev) {
c6d409cf 3015 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3016 r4.rt_dev = (char __user __force *)devname;
3017 devname[15] = 0;
7a229387
AB
3018 } else
3019 r4.rt_dev = NULL;
3020
3021 r = (void *) &r4;
3022 }
3023
3024 if (ret) {
3025 ret = -EFAULT;
3026 goto out;
3027 }
3028
c6d409cf 3029 set_fs(KERNEL_DS);
6b96018b 3030 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3031 set_fs(old_fs);
7a229387
AB
3032
3033out:
7a229387
AB
3034 return ret;
3035}
3036
3037/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3038 * for some operations; this forces use of the newer bridge-utils that
25985edc 3039 * use compatible ioctls
7a229387 3040 */
6b96018b 3041static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3042{
6b96018b 3043 compat_ulong_t tmp;
7a229387 3044
6b96018b 3045 if (get_user(tmp, argp))
7a229387
AB
3046 return -EFAULT;
3047 if (tmp == BRCTL_GET_VERSION)
3048 return BRCTL_VERSION + 1;
3049 return -EINVAL;
3050}
3051
6b96018b
AB
3052static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3053 unsigned int cmd, unsigned long arg)
3054{
3055 void __user *argp = compat_ptr(arg);
3056 struct sock *sk = sock->sk;
3057 struct net *net = sock_net(sk);
7a229387 3058
6b96018b 3059 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3060 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3061
3062 switch (cmd) {
3063 case SIOCSIFBR:
3064 case SIOCGIFBR:
3065 return old_bridge_ioctl(argp);
3066 case SIOCGIFNAME:
3067 return dev_ifname32(net, argp);
3068 case SIOCGIFCONF:
3069 return dev_ifconf(net, argp);
3070 case SIOCETHTOOL:
3071 return ethtool_ioctl(net, argp);
7a50a240
AB
3072 case SIOCWANDEV:
3073 return compat_siocwandev(net, argp);
a2116ed2
AB
3074 case SIOCGIFMAP:
3075 case SIOCSIFMAP:
3076 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3077 case SIOCBONDENSLAVE:
3078 case SIOCBONDRELEASE:
3079 case SIOCBONDSETHWADDR:
6b96018b
AB
3080 case SIOCBONDCHANGEACTIVE:
3081 return bond_ioctl(net, cmd, argp);
3082 case SIOCADDRT:
3083 case SIOCDELRT:
3084 return routing_ioctl(net, sock, cmd, argp);
3085 case SIOCGSTAMP:
3086 return do_siocgstamp(net, sock, cmd, argp);
3087 case SIOCGSTAMPNS:
3088 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3089 case SIOCBONDSLAVEINFOQUERY:
3090 case SIOCBONDINFOQUERY:
a2116ed2 3091 case SIOCSHWTSTAMP:
fd468c74 3092 case SIOCGHWTSTAMP:
590d4693 3093 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3094
3095 case FIOSETOWN:
3096 case SIOCSPGRP:
3097 case FIOGETOWN:
3098 case SIOCGPGRP:
3099 case SIOCBRADDBR:
3100 case SIOCBRDELBR:
3101 case SIOCGIFVLAN:
3102 case SIOCSIFVLAN:
3103 case SIOCADDDLCI:
3104 case SIOCDELDLCI:
3105 return sock_ioctl(file, cmd, arg);
3106
3107 case SIOCGIFFLAGS:
3108 case SIOCSIFFLAGS:
3109 case SIOCGIFMETRIC:
3110 case SIOCSIFMETRIC:
3111 case SIOCGIFMTU:
3112 case SIOCSIFMTU:
3113 case SIOCGIFMEM:
3114 case SIOCSIFMEM:
3115 case SIOCGIFHWADDR:
3116 case SIOCSIFHWADDR:
3117 case SIOCADDMULTI:
3118 case SIOCDELMULTI:
3119 case SIOCGIFINDEX:
6b96018b
AB
3120 case SIOCGIFADDR:
3121 case SIOCSIFADDR:
3122 case SIOCSIFHWBROADCAST:
6b96018b 3123 case SIOCDIFADDR:
6b96018b
AB
3124 case SIOCGIFBRDADDR:
3125 case SIOCSIFBRDADDR:
3126 case SIOCGIFDSTADDR:
3127 case SIOCSIFDSTADDR:
3128 case SIOCGIFNETMASK:
3129 case SIOCSIFNETMASK:
3130 case SIOCSIFPFLAGS:
3131 case SIOCGIFPFLAGS:
3132 case SIOCGIFTXQLEN:
3133 case SIOCSIFTXQLEN:
3134 case SIOCBRADDIF:
3135 case SIOCBRDELIF:
9177efd3
AB
3136 case SIOCSIFNAME:
3137 case SIOCGMIIPHY:
3138 case SIOCGMIIREG:
3139 case SIOCSMIIREG:
6b96018b 3140 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3141
6b96018b
AB
3142 case SIOCSARP:
3143 case SIOCGARP:
3144 case SIOCDARP:
6b96018b 3145 case SIOCATMARK:
9177efd3
AB
3146 return sock_do_ioctl(net, sock, cmd, arg);
3147 }
3148
6b96018b
AB
3149 return -ENOIOCTLCMD;
3150}
7a229387 3151
95c96174 3152static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3153 unsigned long arg)
89bbfc95
SP
3154{
3155 struct socket *sock = file->private_data;
3156 int ret = -ENOIOCTLCMD;
87de87d5
DM
3157 struct sock *sk;
3158 struct net *net;
3159
3160 sk = sock->sk;
3161 net = sock_net(sk);
89bbfc95
SP
3162
3163 if (sock->ops->compat_ioctl)
3164 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3165
87de87d5
DM
3166 if (ret == -ENOIOCTLCMD &&
3167 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3168 ret = compat_wext_handle_ioctl(net, cmd, arg);
3169
6b96018b
AB
3170 if (ret == -ENOIOCTLCMD)
3171 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3172
89bbfc95
SP
3173 return ret;
3174}
3175#endif
3176
ac5a488e
SS
3177int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3178{
3179 return sock->ops->bind(sock, addr, addrlen);
3180}
c6d409cf 3181EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3182
3183int kernel_listen(struct socket *sock, int backlog)
3184{
3185 return sock->ops->listen(sock, backlog);
3186}
c6d409cf 3187EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3188
3189int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3190{
3191 struct sock *sk = sock->sk;
3192 int err;
3193
3194 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3195 newsock);
3196 if (err < 0)
3197 goto done;
3198
3199 err = sock->ops->accept(sock, *newsock, flags);
3200 if (err < 0) {
3201 sock_release(*newsock);
fa8705b0 3202 *newsock = NULL;
ac5a488e
SS
3203 goto done;
3204 }
3205
3206 (*newsock)->ops = sock->ops;
1b08534e 3207 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3208
3209done:
3210 return err;
3211}
c6d409cf 3212EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3213
3214int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3215 int flags)
ac5a488e
SS
3216{
3217 return sock->ops->connect(sock, addr, addrlen, flags);
3218}
c6d409cf 3219EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3220
3221int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3222 int *addrlen)
3223{
3224 return sock->ops->getname(sock, addr, addrlen, 0);
3225}
c6d409cf 3226EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3227
3228int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3229 int *addrlen)
3230{
3231 return sock->ops->getname(sock, addr, addrlen, 1);
3232}
c6d409cf 3233EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3234
3235int kernel_getsockopt(struct socket *sock, int level, int optname,
3236 char *optval, int *optlen)
3237{
3238 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3239 char __user *uoptval;
3240 int __user *uoptlen;
ac5a488e
SS
3241 int err;
3242
fb8621bb
NK
3243 uoptval = (char __user __force *) optval;
3244 uoptlen = (int __user __force *) optlen;
3245
ac5a488e
SS
3246 set_fs(KERNEL_DS);
3247 if (level == SOL_SOCKET)
fb8621bb 3248 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3249 else
fb8621bb
NK
3250 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3251 uoptlen);
ac5a488e
SS
3252 set_fs(oldfs);
3253 return err;
3254}
c6d409cf 3255EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3256
3257int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3258 char *optval, unsigned int optlen)
ac5a488e
SS
3259{
3260 mm_segment_t oldfs = get_fs();
fb8621bb 3261 char __user *uoptval;
ac5a488e
SS
3262 int err;
3263
fb8621bb
NK
3264 uoptval = (char __user __force *) optval;
3265
ac5a488e
SS
3266 set_fs(KERNEL_DS);
3267 if (level == SOL_SOCKET)
fb8621bb 3268 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3269 else
fb8621bb 3270 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3271 optlen);
3272 set_fs(oldfs);
3273 return err;
3274}
c6d409cf 3275EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3276
3277int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3278 size_t size, int flags)
3279{
3280 if (sock->ops->sendpage)
3281 return sock->ops->sendpage(sock, page, offset, size, flags);
3282
3283 return sock_no_sendpage(sock, page, offset, size, flags);
3284}
c6d409cf 3285EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3286
3287int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3288{
3289 mm_segment_t oldfs = get_fs();
3290 int err;
3291
3292 set_fs(KERNEL_DS);
3293 err = sock->ops->ioctl(sock, cmd, arg);
3294 set_fs(oldfs);
3295
3296 return err;
3297}
c6d409cf 3298EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3299
91cf45f0
TM
3300int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3301{
3302 return sock->ops->shutdown(sock, how);
3303}
91cf45f0 3304EXPORT_SYMBOL(kernel_sock_shutdown);