]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/socket.c
Merge tag 'powerpc-4.15-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[mirror_ubuntu-bionic-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
aab174f0 395struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 396{
7cbe66b6 397 struct qstr name = { .name = "" };
2c48b9c4 398 struct path path;
7cbe66b6 399 struct file *file;
1da177e4 400
600e1779
MY
401 if (dname) {
402 name.name = dname;
403 name.len = strlen(name.name);
404 } else if (sock->sk) {
405 name.name = sock->sk->sk_prot_creator->name;
406 name.len = strlen(name.name);
407 }
4b936885 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
8e1611e2
AV
409 if (unlikely(!path.dentry)) {
410 sock_release(sock);
28407630 411 return ERR_PTR(-ENOMEM);
8e1611e2 412 }
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
8e1611e2 420 /* drop dentry, keep inode for a bit */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
8e1611e2
AV
423 /* ... and now kill it properly */
424 sock_release(sock);
39b65252 425 return file;
cc3808f8
AV
426 }
427
428 sock->file = file;
77d27200 429 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 430 file->private_data = sock;
28407630 431 return file;
39d8c1b6 432}
56b31d1c 433EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 434
56b31d1c 435static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
436{
437 struct file *newfile;
28407630
AV
438 int fd = get_unused_fd_flags(flags);
439 if (unlikely(fd < 0))
440 return fd;
39d8c1b6 441
aab174f0 442 newfile = sock_alloc_file(sock, flags, NULL);
28407630 443 if (likely(!IS_ERR(newfile))) {
39d8c1b6 444 fd_install(fd, newfile);
28407630
AV
445 return fd;
446 }
7cbe66b6 447
28407630
AV
448 put_unused_fd(fd);
449 return PTR_ERR(newfile);
1da177e4
LT
450}
451
406a3c63 452struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 453{
6cb153ca
BL
454 if (file->f_op == &socket_file_ops)
455 return file->private_data; /* set in sock_map_fd */
456
23bb80d2
ED
457 *err = -ENOTSOCK;
458 return NULL;
6cb153ca 459}
406a3c63 460EXPORT_SYMBOL(sock_from_file);
6cb153ca 461
1da177e4 462/**
c6d409cf 463 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
464 * @fd: file handle
465 * @err: pointer to an error code return
466 *
467 * The file handle passed in is locked and the socket it is bound
241c4667 468 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
469 * with a negative errno code and NULL is returned. The function checks
470 * for both invalid handles and passing a handle which is not a socket.
471 *
472 * On a success the socket object pointer is returned.
473 */
474
475struct socket *sockfd_lookup(int fd, int *err)
476{
477 struct file *file;
1da177e4
LT
478 struct socket *sock;
479
89bddce5
SH
480 file = fget(fd);
481 if (!file) {
1da177e4
LT
482 *err = -EBADF;
483 return NULL;
484 }
89bddce5 485
6cb153ca
BL
486 sock = sock_from_file(file, err);
487 if (!sock)
1da177e4 488 fput(file);
6cb153ca
BL
489 return sock;
490}
c6d409cf 491EXPORT_SYMBOL(sockfd_lookup);
1da177e4 492
6cb153ca
BL
493static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
494{
00e188ef 495 struct fd f = fdget(fd);
6cb153ca
BL
496 struct socket *sock;
497
3672558c 498 *err = -EBADF;
00e188ef
AV
499 if (f.file) {
500 sock = sock_from_file(f.file, err);
501 if (likely(sock)) {
502 *fput_needed = f.flags;
6cb153ca 503 return sock;
00e188ef
AV
504 }
505 fdput(f);
1da177e4 506 }
6cb153ca 507 return NULL;
1da177e4
LT
508}
509
600e1779
MY
510static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
511 size_t size)
512{
513 ssize_t len;
514 ssize_t used = 0;
515
c5ef6035 516 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
517 if (len < 0)
518 return len;
519 used += len;
520 if (buffer) {
521 if (size < used)
522 return -ERANGE;
523 buffer += len;
524 }
525
526 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
527 used += len;
528 if (buffer) {
529 if (size < used)
530 return -ERANGE;
531 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
532 buffer += len;
533 }
534
535 return used;
536}
537
dc647ec8 538static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
539{
540 int err = simple_setattr(dentry, iattr);
541
e1a3a60a 542 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
543 struct socket *sock = SOCKET_I(d_inode(dentry));
544
545 sock->sk->sk_uid = iattr->ia_uid;
546 }
547
548 return err;
549}
550
600e1779 551static const struct inode_operations sockfs_inode_ops = {
600e1779 552 .listxattr = sockfs_listxattr,
86741ec2 553 .setattr = sockfs_setattr,
600e1779
MY
554};
555
1da177e4
LT
556/**
557 * sock_alloc - allocate a socket
89bddce5 558 *
1da177e4
LT
559 * Allocate a new inode and socket object. The two are bound together
560 * and initialised. The socket is then returned. If we are out of inodes
561 * NULL is returned.
562 */
563
f4a00aac 564struct socket *sock_alloc(void)
1da177e4 565{
89bddce5
SH
566 struct inode *inode;
567 struct socket *sock;
1da177e4 568
a209dfc7 569 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
570 if (!inode)
571 return NULL;
572
573 sock = SOCKET_I(inode);
574
85fe4025 575 inode->i_ino = get_next_ino();
89bddce5 576 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
577 inode->i_uid = current_fsuid();
578 inode->i_gid = current_fsgid();
600e1779 579 inode->i_op = &sockfs_inode_ops;
1da177e4 580
19e8d69c 581 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
582 return sock;
583}
f4a00aac 584EXPORT_SYMBOL(sock_alloc);
1da177e4 585
1da177e4
LT
586/**
587 * sock_release - close a socket
588 * @sock: socket to close
589 *
590 * The socket is released from the protocol stack if it has a release
591 * callback, and the inode is then released if the socket is bound to
89bddce5 592 * an inode not a file.
1da177e4 593 */
89bddce5 594
1da177e4
LT
595void sock_release(struct socket *sock)
596{
597 if (sock->ops) {
598 struct module *owner = sock->ops->owner;
599
600 sock->ops->release(sock);
601 sock->ops = NULL;
602 module_put(owner);
603 }
604
eaefd110 605 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 606 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 607
19e8d69c 608 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
609 if (!sock->file) {
610 iput(SOCK_INODE(sock));
611 return;
612 }
89bddce5 613 sock->file = NULL;
1da177e4 614}
c6d409cf 615EXPORT_SYMBOL(sock_release);
1da177e4 616
c14ac945 617void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 618{
140c55d4
ED
619 u8 flags = *tx_flags;
620
c14ac945 621 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
622 flags |= SKBTX_HW_TSTAMP;
623
c14ac945 624 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
625 flags |= SKBTX_SW_TSTAMP;
626
c14ac945 627 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
628 flags |= SKBTX_SCHED_TSTAMP;
629
140c55d4 630 *tx_flags = flags;
20d49473 631}
67cc0d40 632EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 633
d8725c86 634static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 635{
01e97e65 636 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
637 BUG_ON(ret == -EIOCBQUEUED);
638 return ret;
1da177e4
LT
639}
640
d8725c86 641int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 642{
d8725c86 643 int err = security_socket_sendmsg(sock, msg,
01e97e65 644 msg_data_left(msg));
228e548e 645
d8725c86 646 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 647}
c6d409cf 648EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
649
650int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
651 struct kvec *vec, size_t num, size_t size)
652{
6aa24814 653 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 654 return sock_sendmsg(sock, msg);
1da177e4 655}
c6d409cf 656EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 657
306b13eb
TH
658int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
659 struct kvec *vec, size_t num, size_t size)
660{
661 struct socket *sock = sk->sk_socket;
662
663 if (!sock->ops->sendmsg_locked)
db5980d8 664 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
665
666 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
667
668 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
669}
670EXPORT_SYMBOL(kernel_sendmsg_locked);
671
8605330a
SHY
672static bool skb_is_err_queue(const struct sk_buff *skb)
673{
674 /* pkt_type of skbs enqueued on the error queue are set to
675 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
676 * in recvmsg, since skbs received on a local socket will never
677 * have a pkt_type of PACKET_OUTGOING.
678 */
679 return skb->pkt_type == PACKET_OUTGOING;
680}
681
b50a5c70
ML
682/* On transmit, software and hardware timestamps are returned independently.
683 * As the two skb clones share the hardware timestamp, which may be updated
684 * before the software timestamp is received, a hardware TX timestamp may be
685 * returned only if there is no software TX timestamp. Ignore false software
686 * timestamps, which may be made in the __sock_recv_timestamp() call when the
687 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
688 * hardware timestamp.
689 */
690static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
691{
692 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
693}
694
aad9c8c4
ML
695static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
696{
697 struct scm_ts_pktinfo ts_pktinfo;
698 struct net_device *orig_dev;
699
700 if (!skb_mac_header_was_set(skb))
701 return;
702
703 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
704
705 rcu_read_lock();
706 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
707 if (orig_dev)
708 ts_pktinfo.if_index = orig_dev->ifindex;
709 rcu_read_unlock();
710
711 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
712 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
713 sizeof(ts_pktinfo), &ts_pktinfo);
714}
715
92f37fd2
ED
716/*
717 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
718 */
719void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
720 struct sk_buff *skb)
721{
20d49473 722 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 723 struct scm_timestamping tss;
b50a5c70 724 int empty = 1, false_tstamp = 0;
20d49473
PO
725 struct skb_shared_hwtstamps *shhwtstamps =
726 skb_hwtstamps(skb);
727
728 /* Race occurred between timestamp enabling and packet
729 receiving. Fill in the current time for now. */
b50a5c70 730 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 731 __net_timestamp(skb);
b50a5c70
ML
732 false_tstamp = 1;
733 }
20d49473
PO
734
735 if (need_software_tstamp) {
736 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
737 struct timeval tv;
738 skb_get_timestamp(skb, &tv);
739 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
740 sizeof(tv), &tv);
741 } else {
f24b9be5
WB
742 struct timespec ts;
743 skb_get_timestampns(skb, &ts);
20d49473 744 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 745 sizeof(ts), &ts);
20d49473
PO
746 }
747 }
748
f24b9be5 749 memset(&tss, 0, sizeof(tss));
c199105d 750 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 751 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 752 empty = 0;
4d276eb6 753 if (shhwtstamps &&
b9f40e21 754 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 755 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 756 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 757 empty = 0;
aad9c8c4
ML
758 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
759 !skb_is_err_queue(skb))
760 put_ts_pktinfo(msg, skb);
761 }
1c885808 762 if (!empty) {
20d49473 763 put_cmsg(msg, SOL_SOCKET,
f24b9be5 764 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 765
8605330a 766 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 767 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
768 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
769 skb->len, skb->data);
770 }
92f37fd2 771}
7c81fd8b
ACM
772EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
773
6e3e939f
JB
774void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
775 struct sk_buff *skb)
776{
777 int ack;
778
779 if (!sock_flag(sk, SOCK_WIFI_STATUS))
780 return;
781 if (!skb->wifi_acked_valid)
782 return;
783
784 ack = skb->wifi_acked;
785
786 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
787}
788EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
789
11165f14 790static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
791 struct sk_buff *skb)
3b885787 792{
744d5a3e 793 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 794 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 795 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
796}
797
767dd033 798void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
799 struct sk_buff *skb)
800{
801 sock_recv_timestamp(msg, sk, skb);
802 sock_recv_drops(msg, sk, skb);
803}
767dd033 804EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 805
1b784140 806static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 807 int flags)
1da177e4 808{
2da62906 809 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
810}
811
2da62906 812int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 813{
2da62906 814 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 815
2da62906 816 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 817}
c6d409cf 818EXPORT_SYMBOL(sock_recvmsg);
1da177e4 819
c1249c0a
ML
820/**
821 * kernel_recvmsg - Receive a message from a socket (kernel space)
822 * @sock: The socket to receive the message from
823 * @msg: Received message
824 * @vec: Input s/g array for message data
825 * @num: Size of input s/g array
826 * @size: Number of bytes to read
827 * @flags: Message flags (MSG_DONTWAIT, etc...)
828 *
829 * On return the msg structure contains the scatter/gather array passed in the
830 * vec argument. The array is modified so that it consists of the unfilled
831 * portion of the original array.
832 *
833 * The returned value is the total number of bytes received, or an error.
834 */
89bddce5
SH
835int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
836 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
837{
838 mm_segment_t oldfs = get_fs();
839 int result;
840
6aa24814 841 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 842 set_fs(KERNEL_DS);
2da62906 843 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
844 set_fs(oldfs);
845 return result;
846}
c6d409cf 847EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 848
ce1d4d3e
CH
849static ssize_t sock_sendpage(struct file *file, struct page *page,
850 int offset, size_t size, loff_t *ppos, int more)
1da177e4 851{
1da177e4
LT
852 struct socket *sock;
853 int flags;
854
ce1d4d3e
CH
855 sock = file->private_data;
856
35f9c09f
ED
857 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
858 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
859 flags |= more;
ce1d4d3e 860
e6949583 861 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 862}
1da177e4 863
9c55e01c 864static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 865 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
866 unsigned int flags)
867{
868 struct socket *sock = file->private_data;
869
997b37da
RDC
870 if (unlikely(!sock->ops->splice_read))
871 return -EINVAL;
872
9c55e01c
JA
873 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
874}
875
8ae5e030 876static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 877{
6d652330
AV
878 struct file *file = iocb->ki_filp;
879 struct socket *sock = file->private_data;
0345f931 880 struct msghdr msg = {.msg_iter = *to,
881 .msg_iocb = iocb};
8ae5e030 882 ssize_t res;
ce1d4d3e 883
8ae5e030
AV
884 if (file->f_flags & O_NONBLOCK)
885 msg.msg_flags = MSG_DONTWAIT;
886
887 if (iocb->ki_pos != 0)
1da177e4 888 return -ESPIPE;
027445c3 889
66ee59af 890 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
891 return 0;
892
2da62906 893 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
894 *to = msg.msg_iter;
895 return res;
1da177e4
LT
896}
897
8ae5e030 898static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 899{
6d652330
AV
900 struct file *file = iocb->ki_filp;
901 struct socket *sock = file->private_data;
0345f931 902 struct msghdr msg = {.msg_iter = *from,
903 .msg_iocb = iocb};
8ae5e030 904 ssize_t res;
1da177e4 905
8ae5e030 906 if (iocb->ki_pos != 0)
ce1d4d3e 907 return -ESPIPE;
027445c3 908
8ae5e030
AV
909 if (file->f_flags & O_NONBLOCK)
910 msg.msg_flags = MSG_DONTWAIT;
911
6d652330
AV
912 if (sock->type == SOCK_SEQPACKET)
913 msg.msg_flags |= MSG_EOR;
914
d8725c86 915 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
916 *from = msg.msg_iter;
917 return res;
1da177e4
LT
918}
919
1da177e4
LT
920/*
921 * Atomic setting of ioctl hooks to avoid race
922 * with module unload.
923 */
924
4a3e2f71 925static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 926static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 927
881d966b 928void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 929{
4a3e2f71 930 mutex_lock(&br_ioctl_mutex);
1da177e4 931 br_ioctl_hook = hook;
4a3e2f71 932 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
933}
934EXPORT_SYMBOL(brioctl_set);
935
4a3e2f71 936static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 937static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 938
881d966b 939void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 940{
4a3e2f71 941 mutex_lock(&vlan_ioctl_mutex);
1da177e4 942 vlan_ioctl_hook = hook;
4a3e2f71 943 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
944}
945EXPORT_SYMBOL(vlan_ioctl_set);
946
4a3e2f71 947static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 948static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 949
89bddce5 950void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 951{
4a3e2f71 952 mutex_lock(&dlci_ioctl_mutex);
1da177e4 953 dlci_ioctl_hook = hook;
4a3e2f71 954 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
955}
956EXPORT_SYMBOL(dlci_ioctl_set);
957
6b96018b
AB
958static long sock_do_ioctl(struct net *net, struct socket *sock,
959 unsigned int cmd, unsigned long arg)
960{
961 int err;
962 void __user *argp = (void __user *)arg;
963
964 err = sock->ops->ioctl(sock, cmd, arg);
965
966 /*
967 * If this ioctl is unknown try to hand it down
968 * to the NIC driver.
969 */
970 if (err == -ENOIOCTLCMD)
971 err = dev_ioctl(net, cmd, argp);
972
973 return err;
974}
975
1da177e4
LT
976/*
977 * With an ioctl, arg may well be a user mode pointer, but we don't know
978 * what to do with it - that's up to the protocol still.
979 */
980
c62cce2c
AV
981static struct ns_common *get_net_ns(struct ns_common *ns)
982{
983 return &get_net(container_of(ns, struct net, ns))->ns;
984}
985
1da177e4
LT
986static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
987{
988 struct socket *sock;
881d966b 989 struct sock *sk;
1da177e4
LT
990 void __user *argp = (void __user *)arg;
991 int pid, err;
881d966b 992 struct net *net;
1da177e4 993
b69aee04 994 sock = file->private_data;
881d966b 995 sk = sock->sk;
3b1e0a65 996 net = sock_net(sk);
1da177e4 997 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 998 err = dev_ioctl(net, cmd, argp);
1da177e4 999 } else
3d23e349 1000#ifdef CONFIG_WEXT_CORE
1da177e4 1001 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 } else
3d23e349 1004#endif
89bddce5 1005 switch (cmd) {
1da177e4
LT
1006 case FIOSETOWN:
1007 case SIOCSPGRP:
1008 err = -EFAULT;
1009 if (get_user(pid, (int __user *)argp))
1010 break;
393cc3f5 1011 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1012 break;
1013 case FIOGETOWN:
1014 case SIOCGPGRP:
609d7fa9 1015 err = put_user(f_getown(sock->file),
89bddce5 1016 (int __user *)argp);
1da177e4
LT
1017 break;
1018 case SIOCGIFBR:
1019 case SIOCSIFBR:
1020 case SIOCBRADDBR:
1021 case SIOCBRDELBR:
1022 err = -ENOPKG;
1023 if (!br_ioctl_hook)
1024 request_module("bridge");
1025
4a3e2f71 1026 mutex_lock(&br_ioctl_mutex);
89bddce5 1027 if (br_ioctl_hook)
881d966b 1028 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1029 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1030 break;
1031 case SIOCGIFVLAN:
1032 case SIOCSIFVLAN:
1033 err = -ENOPKG;
1034 if (!vlan_ioctl_hook)
1035 request_module("8021q");
1036
4a3e2f71 1037 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1038 if (vlan_ioctl_hook)
881d966b 1039 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1040 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1041 break;
1da177e4
LT
1042 case SIOCADDDLCI:
1043 case SIOCDELDLCI:
1044 err = -ENOPKG;
1045 if (!dlci_ioctl_hook)
1046 request_module("dlci");
1047
7512cbf6
PE
1048 mutex_lock(&dlci_ioctl_mutex);
1049 if (dlci_ioctl_hook)
1da177e4 1050 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1051 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1052 break;
c62cce2c
AV
1053 case SIOCGSKNS:
1054 err = -EPERM;
1055 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1056 break;
1057
1058 err = open_related_ns(&net->ns, get_net_ns);
1059 break;
1da177e4 1060 default:
6b96018b 1061 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1062 break;
89bddce5 1063 }
1da177e4
LT
1064 return err;
1065}
1066
1067int sock_create_lite(int family, int type, int protocol, struct socket **res)
1068{
1069 int err;
1070 struct socket *sock = NULL;
89bddce5 1071
1da177e4
LT
1072 err = security_socket_create(family, type, protocol, 1);
1073 if (err)
1074 goto out;
1075
1076 sock = sock_alloc();
1077 if (!sock) {
1078 err = -ENOMEM;
1079 goto out;
1080 }
1081
1da177e4 1082 sock->type = type;
7420ed23
VY
1083 err = security_socket_post_create(sock, family, type, protocol, 1);
1084 if (err)
1085 goto out_release;
1086
1da177e4
LT
1087out:
1088 *res = sock;
1089 return err;
7420ed23
VY
1090out_release:
1091 sock_release(sock);
1092 sock = NULL;
1093 goto out;
1da177e4 1094}
c6d409cf 1095EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1096
1097/* No kernel lock held - perfect */
89bddce5 1098static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1099{
cbf55001 1100 unsigned int busy_flag = 0;
1da177e4
LT
1101 struct socket *sock;
1102
1103 /*
89bddce5 1104 * We can't return errors to poll, so it's either yes or no.
1da177e4 1105 */
b69aee04 1106 sock = file->private_data;
2d48d67f 1107
cbf55001 1108 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1109 /* this socket can poll_ll so tell the system call */
cbf55001 1110 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1111
1112 /* once, only if requested by syscall */
cbf55001
ET
1113 if (wait && (wait->_key & POLL_BUSY_LOOP))
1114 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1115 }
1116
cbf55001 1117 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1118}
1119
89bddce5 1120static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1121{
b69aee04 1122 struct socket *sock = file->private_data;
1da177e4
LT
1123
1124 return sock->ops->mmap(file, sock, vma);
1125}
1126
20380731 1127static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1128{
1da177e4
LT
1129 sock_release(SOCKET_I(inode));
1130 return 0;
1131}
1132
1133/*
1134 * Update the socket async list
1135 *
1136 * Fasync_list locking strategy.
1137 *
1138 * 1. fasync_list is modified only under process context socket lock
1139 * i.e. under semaphore.
1140 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1141 * or under socket lock
1da177e4
LT
1142 */
1143
1144static int sock_fasync(int fd, struct file *filp, int on)
1145{
989a2979
ED
1146 struct socket *sock = filp->private_data;
1147 struct sock *sk = sock->sk;
eaefd110 1148 struct socket_wq *wq;
1da177e4 1149
989a2979 1150 if (sk == NULL)
1da177e4 1151 return -EINVAL;
1da177e4
LT
1152
1153 lock_sock(sk);
1e1d04e6 1154 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1155 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1156
eaefd110 1157 if (!wq->fasync_list)
989a2979
ED
1158 sock_reset_flag(sk, SOCK_FASYNC);
1159 else
bcdce719 1160 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1161
989a2979 1162 release_sock(sk);
1da177e4
LT
1163 return 0;
1164}
1165
ceb5d58b 1166/* This function may be called only under rcu_lock */
1da177e4 1167
ceb5d58b 1168int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1169{
ceb5d58b 1170 if (!wq || !wq->fasync_list)
1da177e4 1171 return -1;
ceb5d58b 1172
89bddce5 1173 switch (how) {
8d8ad9d7 1174 case SOCK_WAKE_WAITD:
ceb5d58b 1175 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1176 break;
1177 goto call_kill;
8d8ad9d7 1178 case SOCK_WAKE_SPACE:
ceb5d58b 1179 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1180 break;
1181 /* fall through */
8d8ad9d7 1182 case SOCK_WAKE_IO:
89bddce5 1183call_kill:
43815482 1184 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1185 break;
8d8ad9d7 1186 case SOCK_WAKE_URG:
43815482 1187 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1188 }
ceb5d58b 1189
1da177e4
LT
1190 return 0;
1191}
c6d409cf 1192EXPORT_SYMBOL(sock_wake_async);
1da177e4 1193
721db93a 1194int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1195 struct socket **res, int kern)
1da177e4
LT
1196{
1197 int err;
1198 struct socket *sock;
55737fda 1199 const struct net_proto_family *pf;
1da177e4
LT
1200
1201 /*
89bddce5 1202 * Check protocol is in range
1da177e4
LT
1203 */
1204 if (family < 0 || family >= NPROTO)
1205 return -EAFNOSUPPORT;
1206 if (type < 0 || type >= SOCK_MAX)
1207 return -EINVAL;
1208
1209 /* Compatibility.
1210
1211 This uglymoron is moved from INET layer to here to avoid
1212 deadlock in module load.
1213 */
1214 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1215 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1216 current->comm);
1da177e4
LT
1217 family = PF_PACKET;
1218 }
1219
1220 err = security_socket_create(family, type, protocol, kern);
1221 if (err)
1222 return err;
89bddce5 1223
55737fda
SH
1224 /*
1225 * Allocate the socket and allow the family to set things up. if
1226 * the protocol is 0, the family is instructed to select an appropriate
1227 * default.
1228 */
1229 sock = sock_alloc();
1230 if (!sock) {
e87cc472 1231 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1232 return -ENFILE; /* Not exactly a match, but its the
1233 closest posix thing */
1234 }
1235
1236 sock->type = type;
1237
95a5afca 1238#ifdef CONFIG_MODULES
89bddce5
SH
1239 /* Attempt to load a protocol module if the find failed.
1240 *
1241 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1242 * requested real, full-featured networking support upon configuration.
1243 * Otherwise module support will break!
1244 */
190683a9 1245 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1246 request_module("net-pf-%d", family);
1da177e4
LT
1247#endif
1248
55737fda
SH
1249 rcu_read_lock();
1250 pf = rcu_dereference(net_families[family]);
1251 err = -EAFNOSUPPORT;
1252 if (!pf)
1253 goto out_release;
1da177e4
LT
1254
1255 /*
1256 * We will call the ->create function, that possibly is in a loadable
1257 * module, so we have to bump that loadable module refcnt first.
1258 */
55737fda 1259 if (!try_module_get(pf->owner))
1da177e4
LT
1260 goto out_release;
1261
55737fda
SH
1262 /* Now protected by module ref count */
1263 rcu_read_unlock();
1264
3f378b68 1265 err = pf->create(net, sock, protocol, kern);
55737fda 1266 if (err < 0)
1da177e4 1267 goto out_module_put;
a79af59e 1268
1da177e4
LT
1269 /*
1270 * Now to bump the refcnt of the [loadable] module that owns this
1271 * socket at sock_release time we decrement its refcnt.
1272 */
55737fda
SH
1273 if (!try_module_get(sock->ops->owner))
1274 goto out_module_busy;
1275
1da177e4
LT
1276 /*
1277 * Now that we're done with the ->create function, the [loadable]
1278 * module can have its refcnt decremented
1279 */
55737fda 1280 module_put(pf->owner);
7420ed23
VY
1281 err = security_socket_post_create(sock, family, type, protocol, kern);
1282 if (err)
3b185525 1283 goto out_sock_release;
55737fda 1284 *res = sock;
1da177e4 1285
55737fda
SH
1286 return 0;
1287
1288out_module_busy:
1289 err = -EAFNOSUPPORT;
1da177e4 1290out_module_put:
55737fda
SH
1291 sock->ops = NULL;
1292 module_put(pf->owner);
1293out_sock_release:
1da177e4 1294 sock_release(sock);
55737fda
SH
1295 return err;
1296
1297out_release:
1298 rcu_read_unlock();
1299 goto out_sock_release;
1da177e4 1300}
721db93a 1301EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1302
1303int sock_create(int family, int type, int protocol, struct socket **res)
1304{
1b8d7ae4 1305 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1306}
c6d409cf 1307EXPORT_SYMBOL(sock_create);
1da177e4 1308
eeb1bd5c 1309int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1310{
eeb1bd5c 1311 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1312}
c6d409cf 1313EXPORT_SYMBOL(sock_create_kern);
1da177e4 1314
3e0fa65f 1315SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1316{
1317 int retval;
1318 struct socket *sock;
a677a039
UD
1319 int flags;
1320
e38b36f3
UD
1321 /* Check the SOCK_* constants for consistency. */
1322 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1323 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1324 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1325 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1326
a677a039 1327 flags = type & ~SOCK_TYPE_MASK;
77d27200 1328 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1329 return -EINVAL;
1330 type &= SOCK_TYPE_MASK;
1da177e4 1331
aaca0bdc
UD
1332 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1333 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1334
1da177e4
LT
1335 retval = sock_create(family, type, protocol, &sock);
1336 if (retval < 0)
8e1611e2 1337 return retval;
1da177e4 1338
8e1611e2 1339 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1340}
1341
1342/*
1343 * Create a pair of connected sockets.
1344 */
1345
3e0fa65f
HC
1346SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1347 int __user *, usockvec)
1da177e4
LT
1348{
1349 struct socket *sock1, *sock2;
1350 int fd1, fd2, err;
db349509 1351 struct file *newfile1, *newfile2;
a677a039
UD
1352 int flags;
1353
1354 flags = type & ~SOCK_TYPE_MASK;
77d27200 1355 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1356 return -EINVAL;
1357 type &= SOCK_TYPE_MASK;
1da177e4 1358
aaca0bdc
UD
1359 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1360 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1361
016a266b
AV
1362 /*
1363 * reserve descriptors and make sure we won't fail
1364 * to return them to userland.
1365 */
1366 fd1 = get_unused_fd_flags(flags);
1367 if (unlikely(fd1 < 0))
1368 return fd1;
1369
1370 fd2 = get_unused_fd_flags(flags);
1371 if (unlikely(fd2 < 0)) {
1372 put_unused_fd(fd1);
1373 return fd2;
1374 }
1375
1376 err = put_user(fd1, &usockvec[0]);
1377 if (err)
1378 goto out;
1379
1380 err = put_user(fd2, &usockvec[1]);
1381 if (err)
1382 goto out;
1383
1da177e4
LT
1384 /*
1385 * Obtain the first socket and check if the underlying protocol
1386 * supports the socketpair call.
1387 */
1388
1389 err = sock_create(family, type, protocol, &sock1);
016a266b 1390 if (unlikely(err < 0))
1da177e4
LT
1391 goto out;
1392
1393 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1394 if (unlikely(err < 0)) {
1395 sock_release(sock1);
1396 goto out;
bf3c23d1 1397 }
d73aa286 1398
016a266b
AV
1399 err = sock1->ops->socketpair(sock1, sock2);
1400 if (unlikely(err < 0)) {
1401 sock_release(sock2);
1402 sock_release(sock1);
1403 goto out;
28407630
AV
1404 }
1405
aab174f0 1406 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1407 if (IS_ERR(newfile1)) {
28407630 1408 err = PTR_ERR(newfile1);
016a266b
AV
1409 sock_release(sock2);
1410 goto out;
28407630
AV
1411 }
1412
aab174f0 1413 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1414 if (IS_ERR(newfile2)) {
1415 err = PTR_ERR(newfile2);
016a266b
AV
1416 fput(newfile1);
1417 goto out;
db349509
AV
1418 }
1419
157cf649 1420 audit_fd_pair(fd1, fd2);
d73aa286 1421
db349509
AV
1422 fd_install(fd1, newfile1);
1423 fd_install(fd2, newfile2);
d73aa286 1424 return 0;
1da177e4 1425
016a266b 1426out:
d73aa286 1427 put_unused_fd(fd2);
d73aa286 1428 put_unused_fd(fd1);
1da177e4
LT
1429 return err;
1430}
1431
1da177e4
LT
1432/*
1433 * Bind a name to a socket. Nothing much to do here since it's
1434 * the protocol's responsibility to handle the local address.
1435 *
1436 * We move the socket address to kernel space before we call
1437 * the protocol layer (having also checked the address is ok).
1438 */
1439
20f37034 1440SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1441{
1442 struct socket *sock;
230b1839 1443 struct sockaddr_storage address;
6cb153ca 1444 int err, fput_needed;
1da177e4 1445
89bddce5 1446 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1447 if (sock) {
43db362d 1448 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1449 if (err >= 0) {
1450 err = security_socket_bind(sock,
230b1839 1451 (struct sockaddr *)&address,
89bddce5 1452 addrlen);
6cb153ca
BL
1453 if (!err)
1454 err = sock->ops->bind(sock,
89bddce5 1455 (struct sockaddr *)
230b1839 1456 &address, addrlen);
1da177e4 1457 }
6cb153ca 1458 fput_light(sock->file, fput_needed);
89bddce5 1459 }
1da177e4
LT
1460 return err;
1461}
1462
1da177e4
LT
1463/*
1464 * Perform a listen. Basically, we allow the protocol to do anything
1465 * necessary for a listen, and if that works, we mark the socket as
1466 * ready for listening.
1467 */
1468
3e0fa65f 1469SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1470{
1471 struct socket *sock;
6cb153ca 1472 int err, fput_needed;
b8e1f9b5 1473 int somaxconn;
89bddce5
SH
1474
1475 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1476 if (sock) {
8efa6e93 1477 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1478 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1479 backlog = somaxconn;
1da177e4
LT
1480
1481 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1482 if (!err)
1483 err = sock->ops->listen(sock, backlog);
1da177e4 1484
6cb153ca 1485 fput_light(sock->file, fput_needed);
1da177e4
LT
1486 }
1487 return err;
1488}
1489
1da177e4
LT
1490/*
1491 * For accept, we attempt to create a new socket, set up the link
1492 * with the client, wake up the client, then return the new
1493 * connected fd. We collect the address of the connector in kernel
1494 * space and move it to user at the very end. This is unclean because
1495 * we open the socket then return an error.
1496 *
1497 * 1003.1g adds the ability to recvmsg() to query connection pending
1498 * status to recvmsg. We need to add that support in a way thats
1499 * clean when we restucture accept also.
1500 */
1501
20f37034
HC
1502SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1503 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1504{
1505 struct socket *sock, *newsock;
39d8c1b6 1506 struct file *newfile;
6cb153ca 1507 int err, len, newfd, fput_needed;
230b1839 1508 struct sockaddr_storage address;
1da177e4 1509
77d27200 1510 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1511 return -EINVAL;
1512
1513 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1514 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1515
6cb153ca 1516 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1517 if (!sock)
1518 goto out;
1519
1520 err = -ENFILE;
c6d409cf
ED
1521 newsock = sock_alloc();
1522 if (!newsock)
1da177e4
LT
1523 goto out_put;
1524
1525 newsock->type = sock->type;
1526 newsock->ops = sock->ops;
1527
1da177e4
LT
1528 /*
1529 * We don't need try_module_get here, as the listening socket (sock)
1530 * has the protocol module (sock->ops->owner) held.
1531 */
1532 __module_get(newsock->ops->owner);
1533
28407630 1534 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1535 if (unlikely(newfd < 0)) {
1536 err = newfd;
9a1875e6
DM
1537 sock_release(newsock);
1538 goto out_put;
39d8c1b6 1539 }
aab174f0 1540 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1541 if (IS_ERR(newfile)) {
28407630
AV
1542 err = PTR_ERR(newfile);
1543 put_unused_fd(newfd);
28407630
AV
1544 goto out_put;
1545 }
39d8c1b6 1546
a79af59e
FF
1547 err = security_socket_accept(sock, newsock);
1548 if (err)
39d8c1b6 1549 goto out_fd;
a79af59e 1550
cdfbabfb 1551 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1552 if (err < 0)
39d8c1b6 1553 goto out_fd;
1da177e4
LT
1554
1555 if (upeer_sockaddr) {
230b1839 1556 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1557 &len, 2) < 0) {
1da177e4 1558 err = -ECONNABORTED;
39d8c1b6 1559 goto out_fd;
1da177e4 1560 }
43db362d 1561 err = move_addr_to_user(&address,
230b1839 1562 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1563 if (err < 0)
39d8c1b6 1564 goto out_fd;
1da177e4
LT
1565 }
1566
1567 /* File flags are not inherited via accept() unlike another OSes. */
1568
39d8c1b6
DM
1569 fd_install(newfd, newfile);
1570 err = newfd;
1da177e4 1571
1da177e4 1572out_put:
6cb153ca 1573 fput_light(sock->file, fput_needed);
1da177e4
LT
1574out:
1575 return err;
39d8c1b6 1576out_fd:
9606a216 1577 fput(newfile);
39d8c1b6 1578 put_unused_fd(newfd);
1da177e4
LT
1579 goto out_put;
1580}
1581
20f37034
HC
1582SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1583 int __user *, upeer_addrlen)
aaca0bdc 1584{
de11defe 1585 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1586}
1587
1da177e4
LT
1588/*
1589 * Attempt to connect to a socket with the server address. The address
1590 * is in user space so we verify it is OK and move it to kernel space.
1591 *
1592 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1593 * break bindings
1594 *
1595 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1596 * other SEQPACKET protocols that take time to connect() as it doesn't
1597 * include the -EINPROGRESS status for such sockets.
1598 */
1599
20f37034
HC
1600SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1601 int, addrlen)
1da177e4
LT
1602{
1603 struct socket *sock;
230b1839 1604 struct sockaddr_storage address;
6cb153ca 1605 int err, fput_needed;
1da177e4 1606
6cb153ca 1607 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1608 if (!sock)
1609 goto out;
43db362d 1610 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1611 if (err < 0)
1612 goto out_put;
1613
89bddce5 1614 err =
230b1839 1615 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1616 if (err)
1617 goto out_put;
1618
230b1839 1619 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1620 sock->file->f_flags);
1621out_put:
6cb153ca 1622 fput_light(sock->file, fput_needed);
1da177e4
LT
1623out:
1624 return err;
1625}
1626
1627/*
1628 * Get the local address ('name') of a socket object. Move the obtained
1629 * name to user space.
1630 */
1631
20f37034
HC
1632SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1633 int __user *, usockaddr_len)
1da177e4
LT
1634{
1635 struct socket *sock;
230b1839 1636 struct sockaddr_storage address;
6cb153ca 1637 int len, err, fput_needed;
89bddce5 1638
6cb153ca 1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1640 if (!sock)
1641 goto out;
1642
1643 err = security_socket_getsockname(sock);
1644 if (err)
1645 goto out_put;
1646
230b1839 1647 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1648 if (err)
1649 goto out_put;
43db362d 1650 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1651
1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
1656}
1657
1658/*
1659 * Get the remote address ('name') of a socket object. Move the obtained
1660 * name to user space.
1661 */
1662
20f37034
HC
1663SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1664 int __user *, usockaddr_len)
1da177e4
LT
1665{
1666 struct socket *sock;
230b1839 1667 struct sockaddr_storage address;
6cb153ca 1668 int len, err, fput_needed;
1da177e4 1669
89bddce5
SH
1670 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1671 if (sock != NULL) {
1da177e4
LT
1672 err = security_socket_getpeername(sock);
1673 if (err) {
6cb153ca 1674 fput_light(sock->file, fput_needed);
1da177e4
LT
1675 return err;
1676 }
1677
89bddce5 1678 err =
230b1839 1679 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1680 1);
1da177e4 1681 if (!err)
43db362d 1682 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1683 usockaddr_len);
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685 }
1686 return err;
1687}
1688
1689/*
1690 * Send a datagram to a given address. We move the address into kernel
1691 * space and check the user space data area is readable before invoking
1692 * the protocol.
1693 */
1694
3e0fa65f 1695SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1696 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1697 int, addr_len)
1da177e4
LT
1698{
1699 struct socket *sock;
230b1839 1700 struct sockaddr_storage address;
1da177e4
LT
1701 int err;
1702 struct msghdr msg;
1703 struct iovec iov;
6cb153ca 1704 int fput_needed;
6cb153ca 1705
602bd0e9
AV
1706 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1707 if (unlikely(err))
1708 return err;
de0fa95c
PE
1709 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1710 if (!sock)
4387ff75 1711 goto out;
6cb153ca 1712
89bddce5 1713 msg.msg_name = NULL;
89bddce5
SH
1714 msg.msg_control = NULL;
1715 msg.msg_controllen = 0;
1716 msg.msg_namelen = 0;
6cb153ca 1717 if (addr) {
43db362d 1718 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1719 if (err < 0)
1720 goto out_put;
230b1839 1721 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1722 msg.msg_namelen = addr_len;
1da177e4
LT
1723 }
1724 if (sock->file->f_flags & O_NONBLOCK)
1725 flags |= MSG_DONTWAIT;
1726 msg.msg_flags = flags;
d8725c86 1727 err = sock_sendmsg(sock, &msg);
1da177e4 1728
89bddce5 1729out_put:
de0fa95c 1730 fput_light(sock->file, fput_needed);
4387ff75 1731out:
1da177e4
LT
1732 return err;
1733}
1734
1735/*
89bddce5 1736 * Send a datagram down a socket.
1da177e4
LT
1737 */
1738
3e0fa65f 1739SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1740 unsigned int, flags)
1da177e4
LT
1741{
1742 return sys_sendto(fd, buff, len, flags, NULL, 0);
1743}
1744
1745/*
89bddce5 1746 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1747 * sender. We verify the buffers are writable and if needed move the
1748 * sender address from kernel to user space.
1749 */
1750
3e0fa65f 1751SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1752 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1753 int __user *, addr_len)
1da177e4
LT
1754{
1755 struct socket *sock;
1756 struct iovec iov;
1757 struct msghdr msg;
230b1839 1758 struct sockaddr_storage address;
89bddce5 1759 int err, err2;
6cb153ca
BL
1760 int fput_needed;
1761
602bd0e9
AV
1762 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1763 if (unlikely(err))
1764 return err;
de0fa95c 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1766 if (!sock)
de0fa95c 1767 goto out;
1da177e4 1768
89bddce5
SH
1769 msg.msg_control = NULL;
1770 msg.msg_controllen = 0;
f3d33426
HFS
1771 /* Save some cycles and don't copy the address if not needed */
1772 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1773 /* We assume all kernel code knows the size of sockaddr_storage */
1774 msg.msg_namelen = 0;
130ed5d1 1775 msg.msg_iocb = NULL;
9f138fa6 1776 msg.msg_flags = 0;
1da177e4
LT
1777 if (sock->file->f_flags & O_NONBLOCK)
1778 flags |= MSG_DONTWAIT;
2da62906 1779 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1780
89bddce5 1781 if (err >= 0 && addr != NULL) {
43db362d 1782 err2 = move_addr_to_user(&address,
230b1839 1783 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1784 if (err2 < 0)
1785 err = err2;
1da177e4 1786 }
de0fa95c
PE
1787
1788 fput_light(sock->file, fput_needed);
4387ff75 1789out:
1da177e4
LT
1790 return err;
1791}
1792
1793/*
89bddce5 1794 * Receive a datagram from a socket.
1da177e4
LT
1795 */
1796
b7c0ddf5
JG
1797SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1798 unsigned int, flags)
1da177e4
LT
1799{
1800 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1801}
1802
1803/*
1804 * Set a socket option. Because we don't know the option lengths we have
1805 * to pass the user mode parameter for the protocols to sort out.
1806 */
1807
20f37034
HC
1808SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int, optlen)
1da177e4 1810{
6cb153ca 1811 int err, fput_needed;
1da177e4
LT
1812 struct socket *sock;
1813
1814 if (optlen < 0)
1815 return -EINVAL;
89bddce5
SH
1816
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock != NULL) {
1819 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1820 if (err)
1821 goto out_put;
1da177e4
LT
1822
1823 if (level == SOL_SOCKET)
89bddce5
SH
1824 err =
1825 sock_setsockopt(sock, level, optname, optval,
1826 optlen);
1da177e4 1827 else
89bddce5
SH
1828 err =
1829 sock->ops->setsockopt(sock, level, optname, optval,
1830 optlen);
6cb153ca
BL
1831out_put:
1832 fput_light(sock->file, fput_needed);
1da177e4
LT
1833 }
1834 return err;
1835}
1836
1837/*
1838 * Get a socket option. Because we don't know the option lengths we have
1839 * to pass a user mode parameter for the protocols to sort out.
1840 */
1841
20f37034
HC
1842SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1843 char __user *, optval, int __user *, optlen)
1da177e4 1844{
6cb153ca 1845 int err, fput_needed;
1da177e4
LT
1846 struct socket *sock;
1847
89bddce5
SH
1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1849 if (sock != NULL) {
6cb153ca
BL
1850 err = security_socket_getsockopt(sock, level, optname);
1851 if (err)
1852 goto out_put;
1da177e4
LT
1853
1854 if (level == SOL_SOCKET)
89bddce5
SH
1855 err =
1856 sock_getsockopt(sock, level, optname, optval,
1857 optlen);
1da177e4 1858 else
89bddce5
SH
1859 err =
1860 sock->ops->getsockopt(sock, level, optname, optval,
1861 optlen);
6cb153ca
BL
1862out_put:
1863 fput_light(sock->file, fput_needed);
1da177e4
LT
1864 }
1865 return err;
1866}
1867
1da177e4
LT
1868/*
1869 * Shutdown a socket.
1870 */
1871
754fe8d2 1872SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1873{
6cb153ca 1874 int err, fput_needed;
1da177e4
LT
1875 struct socket *sock;
1876
89bddce5
SH
1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1878 if (sock != NULL) {
1da177e4 1879 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1880 if (!err)
1881 err = sock->ops->shutdown(sock, how);
1882 fput_light(sock->file, fput_needed);
1da177e4
LT
1883 }
1884 return err;
1885}
1886
89bddce5 1887/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1888 * fields which are the same type (int / unsigned) on our platforms.
1889 */
1890#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1891#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1892#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1893
c71d8ebe
TH
1894struct used_address {
1895 struct sockaddr_storage name;
1896 unsigned int name_len;
1897};
1898
da184284
AV
1899static int copy_msghdr_from_user(struct msghdr *kmsg,
1900 struct user_msghdr __user *umsg,
1901 struct sockaddr __user **save_addr,
1902 struct iovec **iov)
1661bf36 1903{
ffb07550 1904 struct user_msghdr msg;
08adb7da
AV
1905 ssize_t err;
1906
ffb07550 1907 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1908 return -EFAULT;
dbb490b9 1909
864d9664 1910 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1911 kmsg->msg_controllen = msg.msg_controllen;
1912 kmsg->msg_flags = msg.msg_flags;
1913
1914 kmsg->msg_namelen = msg.msg_namelen;
1915 if (!msg.msg_name)
6a2a2b3a
AS
1916 kmsg->msg_namelen = 0;
1917
dbb490b9
ML
1918 if (kmsg->msg_namelen < 0)
1919 return -EINVAL;
1920
1661bf36 1921 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1922 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1923
1924 if (save_addr)
ffb07550 1925 *save_addr = msg.msg_name;
08adb7da 1926
ffb07550 1927 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 1928 if (!save_addr) {
864d9664
PA
1929 err = move_addr_to_kernel(msg.msg_name,
1930 kmsg->msg_namelen,
08adb7da
AV
1931 kmsg->msg_name);
1932 if (err < 0)
1933 return err;
1934 }
1935 } else {
1936 kmsg->msg_name = NULL;
1937 kmsg->msg_namelen = 0;
1938 }
1939
ffb07550 1940 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
1941 return -EMSGSIZE;
1942
0345f931 1943 kmsg->msg_iocb = NULL;
1944
ffb07550
AV
1945 return import_iovec(save_addr ? READ : WRITE,
1946 msg.msg_iov, msg.msg_iovlen,
da184284 1947 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1948}
1949
666547ff 1950static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1951 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1952 struct used_address *used_address,
1953 unsigned int allowed_msghdr_flags)
1da177e4 1954{
89bddce5
SH
1955 struct compat_msghdr __user *msg_compat =
1956 (struct compat_msghdr __user *)msg;
230b1839 1957 struct sockaddr_storage address;
1da177e4 1958 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1959 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1960 __aligned(sizeof(__kernel_size_t));
89bddce5 1961 /* 20 is size of ipv6_pktinfo */
1da177e4 1962 unsigned char *ctl_buf = ctl;
d8725c86 1963 int ctl_len;
08adb7da 1964 ssize_t err;
89bddce5 1965
08adb7da 1966 msg_sys->msg_name = &address;
1da177e4 1967
08449320 1968 if (MSG_CMSG_COMPAT & flags)
08adb7da 1969 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1970 else
08adb7da 1971 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1972 if (err < 0)
da184284 1973 return err;
1da177e4
LT
1974
1975 err = -ENOBUFS;
1976
228e548e 1977 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1978 goto out_freeiov;
28a94d8f 1979 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1980 ctl_len = msg_sys->msg_controllen;
1da177e4 1981 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1982 err =
228e548e 1983 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1984 sizeof(ctl));
1da177e4
LT
1985 if (err)
1986 goto out_freeiov;
228e548e
AB
1987 ctl_buf = msg_sys->msg_control;
1988 ctl_len = msg_sys->msg_controllen;
1da177e4 1989 } else if (ctl_len) {
ac4340fc
DM
1990 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
1991 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 1992 if (ctl_len > sizeof(ctl)) {
1da177e4 1993 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1994 if (ctl_buf == NULL)
1da177e4
LT
1995 goto out_freeiov;
1996 }
1997 err = -EFAULT;
1998 /*
228e548e 1999 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2000 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2001 * checking falls down on this.
2002 */
fb8621bb 2003 if (copy_from_user(ctl_buf,
228e548e 2004 (void __user __force *)msg_sys->msg_control,
89bddce5 2005 ctl_len))
1da177e4 2006 goto out_freectl;
228e548e 2007 msg_sys->msg_control = ctl_buf;
1da177e4 2008 }
228e548e 2009 msg_sys->msg_flags = flags;
1da177e4
LT
2010
2011 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2012 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2013 /*
2014 * If this is sendmmsg() and current destination address is same as
2015 * previously succeeded address, omit asking LSM's decision.
2016 * used_address->name_len is initialized to UINT_MAX so that the first
2017 * destination address never matches.
2018 */
bc909d9d
MD
2019 if (used_address && msg_sys->msg_name &&
2020 used_address->name_len == msg_sys->msg_namelen &&
2021 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2022 used_address->name_len)) {
d8725c86 2023 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2024 goto out_freectl;
2025 }
d8725c86 2026 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2027 /*
2028 * If this is sendmmsg() and sending to current destination address was
2029 * successful, remember it.
2030 */
2031 if (used_address && err >= 0) {
2032 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2033 if (msg_sys->msg_name)
2034 memcpy(&used_address->name, msg_sys->msg_name,
2035 used_address->name_len);
c71d8ebe 2036 }
1da177e4
LT
2037
2038out_freectl:
89bddce5 2039 if (ctl_buf != ctl)
1da177e4
LT
2040 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2041out_freeiov:
da184284 2042 kfree(iov);
228e548e
AB
2043 return err;
2044}
2045
2046/*
2047 * BSD sendmsg interface
2048 */
2049
666547ff 2050long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2051{
2052 int fput_needed, err;
2053 struct msghdr msg_sys;
1be374a0
AL
2054 struct socket *sock;
2055
1be374a0 2056 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2057 if (!sock)
2058 goto out;
2059
28a94d8f 2060 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2061
6cb153ca 2062 fput_light(sock->file, fput_needed);
89bddce5 2063out:
1da177e4
LT
2064 return err;
2065}
2066
666547ff 2067SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2068{
2069 if (flags & MSG_CMSG_COMPAT)
2070 return -EINVAL;
2071 return __sys_sendmsg(fd, msg, flags);
2072}
2073
228e548e
AB
2074/*
2075 * Linux sendmmsg interface
2076 */
2077
2078int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2079 unsigned int flags)
2080{
2081 int fput_needed, err, datagrams;
2082 struct socket *sock;
2083 struct mmsghdr __user *entry;
2084 struct compat_mmsghdr __user *compat_entry;
2085 struct msghdr msg_sys;
c71d8ebe 2086 struct used_address used_address;
f092276d 2087 unsigned int oflags = flags;
228e548e 2088
98382f41
AB
2089 if (vlen > UIO_MAXIOV)
2090 vlen = UIO_MAXIOV;
228e548e
AB
2091
2092 datagrams = 0;
2093
2094 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2095 if (!sock)
2096 return err;
2097
c71d8ebe 2098 used_address.name_len = UINT_MAX;
228e548e
AB
2099 entry = mmsg;
2100 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2101 err = 0;
f092276d 2102 flags |= MSG_BATCH;
228e548e
AB
2103
2104 while (datagrams < vlen) {
f092276d
TH
2105 if (datagrams == vlen - 1)
2106 flags = oflags;
2107
228e548e 2108 if (MSG_CMSG_COMPAT & flags) {
666547ff 2109 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2110 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2111 if (err < 0)
2112 break;
2113 err = __put_user(err, &compat_entry->msg_len);
2114 ++compat_entry;
2115 } else {
a7526eb5 2116 err = ___sys_sendmsg(sock,
666547ff 2117 (struct user_msghdr __user *)entry,
28a94d8f 2118 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2119 if (err < 0)
2120 break;
2121 err = put_user(err, &entry->msg_len);
2122 ++entry;
2123 }
2124
2125 if (err)
2126 break;
2127 ++datagrams;
3023898b
SHY
2128 if (msg_data_left(&msg_sys))
2129 break;
a78cb84c 2130 cond_resched();
228e548e
AB
2131 }
2132
228e548e
AB
2133 fput_light(sock->file, fput_needed);
2134
728ffb86
AB
2135 /* We only return an error if no datagrams were able to be sent */
2136 if (datagrams != 0)
228e548e
AB
2137 return datagrams;
2138
228e548e
AB
2139 return err;
2140}
2141
2142SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2143 unsigned int, vlen, unsigned int, flags)
2144{
1be374a0
AL
2145 if (flags & MSG_CMSG_COMPAT)
2146 return -EINVAL;
228e548e
AB
2147 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2148}
2149
666547ff 2150static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2151 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2152{
89bddce5
SH
2153 struct compat_msghdr __user *msg_compat =
2154 (struct compat_msghdr __user *)msg;
1da177e4 2155 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2156 struct iovec *iov = iovstack;
1da177e4 2157 unsigned long cmsg_ptr;
2da62906 2158 int len;
08adb7da 2159 ssize_t err;
1da177e4
LT
2160
2161 /* kernel mode address */
230b1839 2162 struct sockaddr_storage addr;
1da177e4
LT
2163
2164 /* user mode address pointers */
2165 struct sockaddr __user *uaddr;
08adb7da 2166 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2167
08adb7da 2168 msg_sys->msg_name = &addr;
1da177e4 2169
f3d33426 2170 if (MSG_CMSG_COMPAT & flags)
08adb7da 2171 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2172 else
08adb7da 2173 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2174 if (err < 0)
da184284 2175 return err;
1da177e4 2176
a2e27255
ACM
2177 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2178 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2179
f3d33426
HFS
2180 /* We assume all kernel code knows the size of sockaddr_storage */
2181 msg_sys->msg_namelen = 0;
2182
1da177e4
LT
2183 if (sock->file->f_flags & O_NONBLOCK)
2184 flags |= MSG_DONTWAIT;
2da62906 2185 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2186 if (err < 0)
2187 goto out_freeiov;
2188 len = err;
2189
2190 if (uaddr != NULL) {
43db362d 2191 err = move_addr_to_user(&addr,
a2e27255 2192 msg_sys->msg_namelen, uaddr,
89bddce5 2193 uaddr_len);
1da177e4
LT
2194 if (err < 0)
2195 goto out_freeiov;
2196 }
a2e27255 2197 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2198 COMPAT_FLAGS(msg));
1da177e4
LT
2199 if (err)
2200 goto out_freeiov;
2201 if (MSG_CMSG_COMPAT & flags)
a2e27255 2202 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2203 &msg_compat->msg_controllen);
2204 else
a2e27255 2205 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2206 &msg->msg_controllen);
2207 if (err)
2208 goto out_freeiov;
2209 err = len;
2210
2211out_freeiov:
da184284 2212 kfree(iov);
a2e27255
ACM
2213 return err;
2214}
2215
2216/*
2217 * BSD recvmsg interface
2218 */
2219
666547ff 2220long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2221{
2222 int fput_needed, err;
2223 struct msghdr msg_sys;
1be374a0
AL
2224 struct socket *sock;
2225
1be374a0 2226 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2227 if (!sock)
2228 goto out;
2229
a7526eb5 2230 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2231
6cb153ca 2232 fput_light(sock->file, fput_needed);
1da177e4
LT
2233out:
2234 return err;
2235}
2236
666547ff 2237SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2238 unsigned int, flags)
2239{
2240 if (flags & MSG_CMSG_COMPAT)
2241 return -EINVAL;
2242 return __sys_recvmsg(fd, msg, flags);
2243}
2244
a2e27255
ACM
2245/*
2246 * Linux recvmmsg interface
2247 */
2248
2249int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2250 unsigned int flags, struct timespec *timeout)
2251{
2252 int fput_needed, err, datagrams;
2253 struct socket *sock;
2254 struct mmsghdr __user *entry;
d7256d0e 2255 struct compat_mmsghdr __user *compat_entry;
a2e27255 2256 struct msghdr msg_sys;
766b9f92
DD
2257 struct timespec64 end_time;
2258 struct timespec64 timeout64;
a2e27255
ACM
2259
2260 if (timeout &&
2261 poll_select_set_timeout(&end_time, timeout->tv_sec,
2262 timeout->tv_nsec))
2263 return -EINVAL;
2264
2265 datagrams = 0;
2266
2267 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2268 if (!sock)
2269 return err;
2270
2271 err = sock_error(sock->sk);
e623a9e9
MJ
2272 if (err) {
2273 datagrams = err;
a2e27255 2274 goto out_put;
e623a9e9 2275 }
a2e27255
ACM
2276
2277 entry = mmsg;
d7256d0e 2278 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2279
2280 while (datagrams < vlen) {
2281 /*
2282 * No need to ask LSM for more than the first datagram.
2283 */
d7256d0e 2284 if (MSG_CMSG_COMPAT & flags) {
666547ff 2285 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2286 &msg_sys, flags & ~MSG_WAITFORONE,
2287 datagrams);
d7256d0e
JMG
2288 if (err < 0)
2289 break;
2290 err = __put_user(err, &compat_entry->msg_len);
2291 ++compat_entry;
2292 } else {
a7526eb5 2293 err = ___sys_recvmsg(sock,
666547ff 2294 (struct user_msghdr __user *)entry,
a7526eb5
AL
2295 &msg_sys, flags & ~MSG_WAITFORONE,
2296 datagrams);
d7256d0e
JMG
2297 if (err < 0)
2298 break;
2299 err = put_user(err, &entry->msg_len);
2300 ++entry;
2301 }
2302
a2e27255
ACM
2303 if (err)
2304 break;
a2e27255
ACM
2305 ++datagrams;
2306
71c5c159
BB
2307 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2308 if (flags & MSG_WAITFORONE)
2309 flags |= MSG_DONTWAIT;
2310
a2e27255 2311 if (timeout) {
766b9f92
DD
2312 ktime_get_ts64(&timeout64);
2313 *timeout = timespec64_to_timespec(
2314 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2315 if (timeout->tv_sec < 0) {
2316 timeout->tv_sec = timeout->tv_nsec = 0;
2317 break;
2318 }
2319
2320 /* Timeout, return less than vlen datagrams */
2321 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2322 break;
2323 }
2324
2325 /* Out of band data, return right away */
2326 if (msg_sys.msg_flags & MSG_OOB)
2327 break;
a78cb84c 2328 cond_resched();
a2e27255
ACM
2329 }
2330
a2e27255 2331 if (err == 0)
34b88a68
ACM
2332 goto out_put;
2333
2334 if (datagrams == 0) {
2335 datagrams = err;
2336 goto out_put;
2337 }
a2e27255 2338
34b88a68
ACM
2339 /*
2340 * We may return less entries than requested (vlen) if the
2341 * sock is non block and there aren't enough datagrams...
2342 */
2343 if (err != -EAGAIN) {
a2e27255 2344 /*
34b88a68
ACM
2345 * ... or if recvmsg returns an error after we
2346 * received some datagrams, where we record the
2347 * error to return on the next call or if the
2348 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2349 */
34b88a68 2350 sock->sk->sk_err = -err;
a2e27255 2351 }
34b88a68
ACM
2352out_put:
2353 fput_light(sock->file, fput_needed);
a2e27255 2354
34b88a68 2355 return datagrams;
a2e27255
ACM
2356}
2357
2358SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2359 unsigned int, vlen, unsigned int, flags,
2360 struct timespec __user *, timeout)
2361{
2362 int datagrams;
2363 struct timespec timeout_sys;
2364
1be374a0
AL
2365 if (flags & MSG_CMSG_COMPAT)
2366 return -EINVAL;
2367
a2e27255
ACM
2368 if (!timeout)
2369 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2370
2371 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2372 return -EFAULT;
2373
2374 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2375
2376 if (datagrams > 0 &&
2377 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2378 datagrams = -EFAULT;
2379
2380 return datagrams;
2381}
2382
2383#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2384/* Argument list sizes for sys_socketcall */
2385#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2386static const unsigned char nargs[21] = {
c6d409cf
ED
2387 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2388 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2389 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2390 AL(4), AL(5), AL(4)
89bddce5
SH
2391};
2392
1da177e4
LT
2393#undef AL
2394
2395/*
89bddce5 2396 * System call vectors.
1da177e4
LT
2397 *
2398 * Argument checking cleaned up. Saved 20% in size.
2399 * This function doesn't need to set the kernel lock because
89bddce5 2400 * it is set by the callees.
1da177e4
LT
2401 */
2402
3e0fa65f 2403SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2404{
2950fa9d 2405 unsigned long a[AUDITSC_ARGS];
89bddce5 2406 unsigned long a0, a1;
1da177e4 2407 int err;
47379052 2408 unsigned int len;
1da177e4 2409
228e548e 2410 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2411 return -EINVAL;
2412
47379052
AV
2413 len = nargs[call];
2414 if (len > sizeof(a))
2415 return -EINVAL;
2416
1da177e4 2417 /* copy_from_user should be SMP safe. */
47379052 2418 if (copy_from_user(a, args, len))
1da177e4 2419 return -EFAULT;
3ec3b2fb 2420
2950fa9d
CG
2421 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2422 if (err)
2423 return err;
3ec3b2fb 2424
89bddce5
SH
2425 a0 = a[0];
2426 a1 = a[1];
2427
2428 switch (call) {
2429 case SYS_SOCKET:
2430 err = sys_socket(a0, a1, a[2]);
2431 break;
2432 case SYS_BIND:
2433 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2434 break;
2435 case SYS_CONNECT:
2436 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2437 break;
2438 case SYS_LISTEN:
2439 err = sys_listen(a0, a1);
2440 break;
2441 case SYS_ACCEPT:
de11defe
UD
2442 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2443 (int __user *)a[2], 0);
89bddce5
SH
2444 break;
2445 case SYS_GETSOCKNAME:
2446 err =
2447 sys_getsockname(a0, (struct sockaddr __user *)a1,
2448 (int __user *)a[2]);
2449 break;
2450 case SYS_GETPEERNAME:
2451 err =
2452 sys_getpeername(a0, (struct sockaddr __user *)a1,
2453 (int __user *)a[2]);
2454 break;
2455 case SYS_SOCKETPAIR:
2456 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2457 break;
2458 case SYS_SEND:
2459 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2460 break;
2461 case SYS_SENDTO:
2462 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2463 (struct sockaddr __user *)a[4], a[5]);
2464 break;
2465 case SYS_RECV:
2466 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2467 break;
2468 case SYS_RECVFROM:
2469 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2470 (struct sockaddr __user *)a[4],
2471 (int __user *)a[5]);
2472 break;
2473 case SYS_SHUTDOWN:
2474 err = sys_shutdown(a0, a1);
2475 break;
2476 case SYS_SETSOCKOPT:
2477 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2478 break;
2479 case SYS_GETSOCKOPT:
2480 err =
2481 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2482 (int __user *)a[4]);
2483 break;
2484 case SYS_SENDMSG:
666547ff 2485 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2486 break;
228e548e
AB
2487 case SYS_SENDMMSG:
2488 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2489 break;
89bddce5 2490 case SYS_RECVMSG:
666547ff 2491 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2492 break;
a2e27255
ACM
2493 case SYS_RECVMMSG:
2494 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2495 (struct timespec __user *)a[4]);
2496 break;
de11defe
UD
2497 case SYS_ACCEPT4:
2498 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2499 (int __user *)a[2], a[3]);
aaca0bdc 2500 break;
89bddce5
SH
2501 default:
2502 err = -EINVAL;
2503 break;
1da177e4
LT
2504 }
2505 return err;
2506}
2507
89bddce5 2508#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2509
55737fda
SH
2510/**
2511 * sock_register - add a socket protocol handler
2512 * @ops: description of protocol
2513 *
1da177e4
LT
2514 * This function is called by a protocol handler that wants to
2515 * advertise its address family, and have it linked into the
e793c0f7 2516 * socket interface. The value ops->family corresponds to the
55737fda 2517 * socket system call protocol family.
1da177e4 2518 */
f0fd27d4 2519int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2520{
2521 int err;
2522
2523 if (ops->family >= NPROTO) {
3410f22e 2524 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2525 return -ENOBUFS;
2526 }
55737fda
SH
2527
2528 spin_lock(&net_family_lock);
190683a9
ED
2529 if (rcu_dereference_protected(net_families[ops->family],
2530 lockdep_is_held(&net_family_lock)))
55737fda
SH
2531 err = -EEXIST;
2532 else {
cf778b00 2533 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2534 err = 0;
2535 }
55737fda
SH
2536 spin_unlock(&net_family_lock);
2537
3410f22e 2538 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2539 return err;
2540}
c6d409cf 2541EXPORT_SYMBOL(sock_register);
1da177e4 2542
55737fda
SH
2543/**
2544 * sock_unregister - remove a protocol handler
2545 * @family: protocol family to remove
2546 *
1da177e4
LT
2547 * This function is called by a protocol handler that wants to
2548 * remove its address family, and have it unlinked from the
55737fda
SH
2549 * new socket creation.
2550 *
2551 * If protocol handler is a module, then it can use module reference
2552 * counts to protect against new references. If protocol handler is not
2553 * a module then it needs to provide its own protection in
2554 * the ops->create routine.
1da177e4 2555 */
f0fd27d4 2556void sock_unregister(int family)
1da177e4 2557{
f0fd27d4 2558 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2559
55737fda 2560 spin_lock(&net_family_lock);
a9b3cd7f 2561 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2562 spin_unlock(&net_family_lock);
2563
2564 synchronize_rcu();
2565
3410f22e 2566 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2567}
c6d409cf 2568EXPORT_SYMBOL(sock_unregister);
1da177e4 2569
77d76ea3 2570static int __init sock_init(void)
1da177e4 2571{
b3e19d92 2572 int err;
2ca794e5
EB
2573 /*
2574 * Initialize the network sysctl infrastructure.
2575 */
2576 err = net_sysctl_init();
2577 if (err)
2578 goto out;
b3e19d92 2579
1da177e4 2580 /*
89bddce5 2581 * Initialize skbuff SLAB cache
1da177e4
LT
2582 */
2583 skb_init();
1da177e4
LT
2584
2585 /*
89bddce5 2586 * Initialize the protocols module.
1da177e4
LT
2587 */
2588
2589 init_inodecache();
b3e19d92
NP
2590
2591 err = register_filesystem(&sock_fs_type);
2592 if (err)
2593 goto out_fs;
1da177e4 2594 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2595 if (IS_ERR(sock_mnt)) {
2596 err = PTR_ERR(sock_mnt);
2597 goto out_mount;
2598 }
77d76ea3
AK
2599
2600 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2601 */
2602
2603#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2604 err = netfilter_init();
2605 if (err)
2606 goto out;
1da177e4 2607#endif
cbeb321a 2608
408eccce 2609 ptp_classifier_init();
c1f19b51 2610
b3e19d92
NP
2611out:
2612 return err;
2613
2614out_mount:
2615 unregister_filesystem(&sock_fs_type);
2616out_fs:
2617 goto out;
1da177e4
LT
2618}
2619
77d76ea3
AK
2620core_initcall(sock_init); /* early initcall */
2621
1da177e4
LT
2622#ifdef CONFIG_PROC_FS
2623void socket_seq_show(struct seq_file *seq)
2624{
2625 int cpu;
2626 int counter = 0;
2627
6f912042 2628 for_each_possible_cpu(cpu)
89bddce5 2629 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2630
2631 /* It can be negative, by the way. 8) */
2632 if (counter < 0)
2633 counter = 0;
2634
2635 seq_printf(seq, "sockets: used %d\n", counter);
2636}
89bddce5 2637#endif /* CONFIG_PROC_FS */
1da177e4 2638
89bbfc95 2639#ifdef CONFIG_COMPAT
6b96018b 2640static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2641 unsigned int cmd, void __user *up)
7a229387 2642{
7a229387
AB
2643 mm_segment_t old_fs = get_fs();
2644 struct timeval ktv;
2645 int err;
2646
2647 set_fs(KERNEL_DS);
6b96018b 2648 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2649 set_fs(old_fs);
644595f8 2650 if (!err)
ed6fe9d6 2651 err = compat_put_timeval(&ktv, up);
644595f8 2652
7a229387
AB
2653 return err;
2654}
2655
6b96018b 2656static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2657 unsigned int cmd, void __user *up)
7a229387 2658{
7a229387
AB
2659 mm_segment_t old_fs = get_fs();
2660 struct timespec kts;
2661 int err;
2662
2663 set_fs(KERNEL_DS);
6b96018b 2664 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2665 set_fs(old_fs);
644595f8 2666 if (!err)
ed6fe9d6 2667 err = compat_put_timespec(&kts, up);
644595f8 2668
7a229387
AB
2669 return err;
2670}
2671
6b96018b 2672static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2673{
2674 struct ifreq __user *uifr;
2675 int err;
2676
2677 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2678 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2679 return -EFAULT;
2680
6b96018b 2681 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2682 if (err)
2683 return err;
2684
6b96018b 2685 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2686 return -EFAULT;
2687
2688 return 0;
2689}
2690
6b96018b 2691static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2692{
6b96018b 2693 struct compat_ifconf ifc32;
7a229387
AB
2694 struct ifconf ifc;
2695 struct ifconf __user *uifc;
6b96018b 2696 struct compat_ifreq __user *ifr32;
7a229387
AB
2697 struct ifreq __user *ifr;
2698 unsigned int i, j;
2699 int err;
2700
6b96018b 2701 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2702 return -EFAULT;
2703
43da5f2e 2704 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2705 if (ifc32.ifcbuf == 0) {
2706 ifc32.ifc_len = 0;
2707 ifc.ifc_len = 0;
2708 ifc.ifc_req = NULL;
2709 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2710 } else {
c6d409cf
ED
2711 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2712 sizeof(struct ifreq);
7a229387
AB
2713 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2714 ifc.ifc_len = len;
2715 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2716 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2717 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2718 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2719 return -EFAULT;
2720 ifr++;
2721 ifr32++;
2722 }
2723 }
2724 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2725 return -EFAULT;
2726
6b96018b 2727 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2728 if (err)
2729 return err;
2730
2731 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2732 return -EFAULT;
2733
2734 ifr = ifc.ifc_req;
2735 ifr32 = compat_ptr(ifc32.ifcbuf);
2736 for (i = 0, j = 0;
c6d409cf
ED
2737 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2738 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2739 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2740 return -EFAULT;
2741 ifr32++;
2742 ifr++;
2743 }
2744
2745 if (ifc32.ifcbuf == 0) {
2746 /* Translate from 64-bit structure multiple to
2747 * a 32-bit one.
2748 */
2749 i = ifc.ifc_len;
6b96018b 2750 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2751 ifc32.ifc_len = i;
2752 } else {
2753 ifc32.ifc_len = i;
2754 }
6b96018b 2755 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2756 return -EFAULT;
2757
2758 return 0;
2759}
2760
6b96018b 2761static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2762{
3a7da39d
BH
2763 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2764 bool convert_in = false, convert_out = false;
2765 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2766 struct ethtool_rxnfc __user *rxnfc;
7a229387 2767 struct ifreq __user *ifr;
3a7da39d
BH
2768 u32 rule_cnt = 0, actual_rule_cnt;
2769 u32 ethcmd;
7a229387 2770 u32 data;
3a7da39d 2771 int ret;
7a229387 2772
3a7da39d
BH
2773 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2774 return -EFAULT;
7a229387 2775
3a7da39d
BH
2776 compat_rxnfc = compat_ptr(data);
2777
2778 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2779 return -EFAULT;
2780
3a7da39d
BH
2781 /* Most ethtool structures are defined without padding.
2782 * Unfortunately struct ethtool_rxnfc is an exception.
2783 */
2784 switch (ethcmd) {
2785 default:
2786 break;
2787 case ETHTOOL_GRXCLSRLALL:
2788 /* Buffer size is variable */
2789 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2790 return -EFAULT;
2791 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2792 return -ENOMEM;
2793 buf_size += rule_cnt * sizeof(u32);
2794 /* fall through */
2795 case ETHTOOL_GRXRINGS:
2796 case ETHTOOL_GRXCLSRLCNT:
2797 case ETHTOOL_GRXCLSRULE:
55664f32 2798 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2799 convert_out = true;
2800 /* fall through */
2801 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2802 buf_size += sizeof(struct ethtool_rxnfc);
2803 convert_in = true;
2804 break;
2805 }
2806
2807 ifr = compat_alloc_user_space(buf_size);
954b1244 2808 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2809
2810 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2811 return -EFAULT;
2812
3a7da39d
BH
2813 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2814 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2815 return -EFAULT;
2816
3a7da39d 2817 if (convert_in) {
127fe533 2818 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2819 * fs.ring_cookie and at the end of fs, but nowhere else.
2820 */
127fe533
AD
2821 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2822 sizeof(compat_rxnfc->fs.m_ext) !=
2823 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2824 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2825 BUILD_BUG_ON(
2826 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2827 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2828 offsetof(struct ethtool_rxnfc, fs.location) -
2829 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2830
2831 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2832 (void __user *)(&rxnfc->fs.m_ext + 1) -
2833 (void __user *)rxnfc) ||
3a7da39d
BH
2834 copy_in_user(&rxnfc->fs.ring_cookie,
2835 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2836 (void __user *)(&rxnfc->fs.location + 1) -
2837 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2838 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2839 sizeof(rxnfc->rule_cnt)))
2840 return -EFAULT;
2841 }
2842
2843 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2844 if (ret)
2845 return ret;
2846
2847 if (convert_out) {
2848 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2849 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2850 (const void __user *)rxnfc) ||
3a7da39d
BH
2851 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2852 &rxnfc->fs.ring_cookie,
954b1244
SH
2853 (const void __user *)(&rxnfc->fs.location + 1) -
2854 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2855 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2856 sizeof(rxnfc->rule_cnt)))
2857 return -EFAULT;
2858
2859 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2860 /* As an optimisation, we only copy the actual
2861 * number of rules that the underlying
2862 * function returned. Since Mallory might
2863 * change the rule count in user memory, we
2864 * check that it is less than the rule count
2865 * originally given (as the user buffer size),
2866 * which has been range-checked.
2867 */
2868 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2869 return -EFAULT;
2870 if (actual_rule_cnt < rule_cnt)
2871 rule_cnt = actual_rule_cnt;
2872 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2873 &rxnfc->rule_locs[0],
2874 rule_cnt * sizeof(u32)))
2875 return -EFAULT;
2876 }
2877 }
2878
2879 return 0;
7a229387
AB
2880}
2881
7a50a240
AB
2882static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2883{
2884 void __user *uptr;
2885 compat_uptr_t uptr32;
2886 struct ifreq __user *uifr;
2887
c6d409cf 2888 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2889 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2890 return -EFAULT;
2891
2892 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2893 return -EFAULT;
2894
2895 uptr = compat_ptr(uptr32);
2896
2897 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2898 return -EFAULT;
2899
2900 return dev_ioctl(net, SIOCWANDEV, uifr);
2901}
2902
6b96018b
AB
2903static int bond_ioctl(struct net *net, unsigned int cmd,
2904 struct compat_ifreq __user *ifr32)
7a229387
AB
2905{
2906 struct ifreq kifr;
7a229387
AB
2907 mm_segment_t old_fs;
2908 int err;
7a229387
AB
2909
2910 switch (cmd) {
2911 case SIOCBONDENSLAVE:
2912 case SIOCBONDRELEASE:
2913 case SIOCBONDSETHWADDR:
2914 case SIOCBONDCHANGEACTIVE:
6b96018b 2915 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2916 return -EFAULT;
2917
2918 old_fs = get_fs();
c6d409cf 2919 set_fs(KERNEL_DS);
c3f52ae6 2920 err = dev_ioctl(net, cmd,
2921 (struct ifreq __user __force *) &kifr);
c6d409cf 2922 set_fs(old_fs);
7a229387
AB
2923
2924 return err;
7a229387 2925 default:
07d106d0 2926 return -ENOIOCTLCMD;
ccbd6a5a 2927 }
7a229387
AB
2928}
2929
590d4693
BH
2930/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2931static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2932 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2933{
2934 struct ifreq __user *u_ifreq64;
7a229387
AB
2935 char tmp_buf[IFNAMSIZ];
2936 void __user *data64;
2937 u32 data32;
2938
2939 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2940 IFNAMSIZ))
2941 return -EFAULT;
417c3522 2942 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2943 return -EFAULT;
2944 data64 = compat_ptr(data32);
2945
2946 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2947
7a229387
AB
2948 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2949 IFNAMSIZ))
2950 return -EFAULT;
417c3522 2951 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2952 return -EFAULT;
2953
6b96018b 2954 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2955}
2956
6b96018b
AB
2957static int dev_ifsioc(struct net *net, struct socket *sock,
2958 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2959{
a2116ed2 2960 struct ifreq __user *uifr;
7a229387
AB
2961 int err;
2962
a2116ed2
AB
2963 uifr = compat_alloc_user_space(sizeof(*uifr));
2964 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2965 return -EFAULT;
2966
2967 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2968
7a229387
AB
2969 if (!err) {
2970 switch (cmd) {
2971 case SIOCGIFFLAGS:
2972 case SIOCGIFMETRIC:
2973 case SIOCGIFMTU:
2974 case SIOCGIFMEM:
2975 case SIOCGIFHWADDR:
2976 case SIOCGIFINDEX:
2977 case SIOCGIFADDR:
2978 case SIOCGIFBRDADDR:
2979 case SIOCGIFDSTADDR:
2980 case SIOCGIFNETMASK:
fab2532b 2981 case SIOCGIFPFLAGS:
7a229387 2982 case SIOCGIFTXQLEN:
fab2532b
AB
2983 case SIOCGMIIPHY:
2984 case SIOCGMIIREG:
a2116ed2 2985 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2986 err = -EFAULT;
2987 break;
2988 }
2989 }
2990 return err;
2991}
2992
a2116ed2
AB
2993static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2994 struct compat_ifreq __user *uifr32)
2995{
2996 struct ifreq ifr;
2997 struct compat_ifmap __user *uifmap32;
2998 mm_segment_t old_fs;
2999 int err;
3000
3001 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3002 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3003 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3004 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3005 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3006 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3007 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3008 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3009 if (err)
3010 return -EFAULT;
3011
3012 old_fs = get_fs();
c6d409cf 3013 set_fs(KERNEL_DS);
c3f52ae6 3014 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3015 set_fs(old_fs);
a2116ed2
AB
3016
3017 if (cmd == SIOCGIFMAP && !err) {
3018 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3019 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3020 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3021 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3022 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3023 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3024 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3025 if (err)
3026 err = -EFAULT;
3027 }
3028 return err;
3029}
3030
7a229387 3031struct rtentry32 {
c6d409cf 3032 u32 rt_pad1;
7a229387
AB
3033 struct sockaddr rt_dst; /* target address */
3034 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3035 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3036 unsigned short rt_flags;
3037 short rt_pad2;
3038 u32 rt_pad3;
3039 unsigned char rt_tos;
3040 unsigned char rt_class;
3041 short rt_pad4;
3042 short rt_metric; /* +1 for binary compatibility! */
7a229387 3043 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3044 u32 rt_mtu; /* per route MTU/Window */
3045 u32 rt_window; /* Window clamping */
7a229387
AB
3046 unsigned short rt_irtt; /* Initial RTT */
3047};
3048
3049struct in6_rtmsg32 {
3050 struct in6_addr rtmsg_dst;
3051 struct in6_addr rtmsg_src;
3052 struct in6_addr rtmsg_gateway;
3053 u32 rtmsg_type;
3054 u16 rtmsg_dst_len;
3055 u16 rtmsg_src_len;
3056 u32 rtmsg_metric;
3057 u32 rtmsg_info;
3058 u32 rtmsg_flags;
3059 s32 rtmsg_ifindex;
3060};
3061
6b96018b
AB
3062static int routing_ioctl(struct net *net, struct socket *sock,
3063 unsigned int cmd, void __user *argp)
7a229387
AB
3064{
3065 int ret;
3066 void *r = NULL;
3067 struct in6_rtmsg r6;
3068 struct rtentry r4;
3069 char devname[16];
3070 u32 rtdev;
3071 mm_segment_t old_fs = get_fs();
3072
6b96018b
AB
3073 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3074 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3075 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3076 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3077 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3078 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3079 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3080 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3081 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3082 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3083 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3084
3085 r = (void *) &r6;
3086 } else { /* ipv4 */
6b96018b 3087 struct rtentry32 __user *ur4 = argp;
c6d409cf 3088 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3089 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3090 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3091 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3092 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3093 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3094 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3095 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3096 if (rtdev) {
c6d409cf 3097 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3098 r4.rt_dev = (char __user __force *)devname;
3099 devname[15] = 0;
7a229387
AB
3100 } else
3101 r4.rt_dev = NULL;
3102
3103 r = (void *) &r4;
3104 }
3105
3106 if (ret) {
3107 ret = -EFAULT;
3108 goto out;
3109 }
3110
c6d409cf 3111 set_fs(KERNEL_DS);
6b96018b 3112 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3113 set_fs(old_fs);
7a229387
AB
3114
3115out:
7a229387
AB
3116 return ret;
3117}
3118
3119/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3120 * for some operations; this forces use of the newer bridge-utils that
25985edc 3121 * use compatible ioctls
7a229387 3122 */
6b96018b 3123static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3124{
6b96018b 3125 compat_ulong_t tmp;
7a229387 3126
6b96018b 3127 if (get_user(tmp, argp))
7a229387
AB
3128 return -EFAULT;
3129 if (tmp == BRCTL_GET_VERSION)
3130 return BRCTL_VERSION + 1;
3131 return -EINVAL;
3132}
3133
6b96018b
AB
3134static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3135 unsigned int cmd, unsigned long arg)
3136{
3137 void __user *argp = compat_ptr(arg);
3138 struct sock *sk = sock->sk;
3139 struct net *net = sock_net(sk);
7a229387 3140
6b96018b 3141 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3142 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3143
3144 switch (cmd) {
3145 case SIOCSIFBR:
3146 case SIOCGIFBR:
3147 return old_bridge_ioctl(argp);
3148 case SIOCGIFNAME:
3149 return dev_ifname32(net, argp);
3150 case SIOCGIFCONF:
3151 return dev_ifconf(net, argp);
3152 case SIOCETHTOOL:
3153 return ethtool_ioctl(net, argp);
7a50a240
AB
3154 case SIOCWANDEV:
3155 return compat_siocwandev(net, argp);
a2116ed2
AB
3156 case SIOCGIFMAP:
3157 case SIOCSIFMAP:
3158 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3159 case SIOCBONDENSLAVE:
3160 case SIOCBONDRELEASE:
3161 case SIOCBONDSETHWADDR:
6b96018b
AB
3162 case SIOCBONDCHANGEACTIVE:
3163 return bond_ioctl(net, cmd, argp);
3164 case SIOCADDRT:
3165 case SIOCDELRT:
3166 return routing_ioctl(net, sock, cmd, argp);
3167 case SIOCGSTAMP:
3168 return do_siocgstamp(net, sock, cmd, argp);
3169 case SIOCGSTAMPNS:
3170 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3171 case SIOCBONDSLAVEINFOQUERY:
3172 case SIOCBONDINFOQUERY:
a2116ed2 3173 case SIOCSHWTSTAMP:
fd468c74 3174 case SIOCGHWTSTAMP:
590d4693 3175 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3176
3177 case FIOSETOWN:
3178 case SIOCSPGRP:
3179 case FIOGETOWN:
3180 case SIOCGPGRP:
3181 case SIOCBRADDBR:
3182 case SIOCBRDELBR:
3183 case SIOCGIFVLAN:
3184 case SIOCSIFVLAN:
3185 case SIOCADDDLCI:
3186 case SIOCDELDLCI:
c62cce2c 3187 case SIOCGSKNS:
6b96018b
AB
3188 return sock_ioctl(file, cmd, arg);
3189
3190 case SIOCGIFFLAGS:
3191 case SIOCSIFFLAGS:
3192 case SIOCGIFMETRIC:
3193 case SIOCSIFMETRIC:
3194 case SIOCGIFMTU:
3195 case SIOCSIFMTU:
3196 case SIOCGIFMEM:
3197 case SIOCSIFMEM:
3198 case SIOCGIFHWADDR:
3199 case SIOCSIFHWADDR:
3200 case SIOCADDMULTI:
3201 case SIOCDELMULTI:
3202 case SIOCGIFINDEX:
6b96018b
AB
3203 case SIOCGIFADDR:
3204 case SIOCSIFADDR:
3205 case SIOCSIFHWBROADCAST:
6b96018b 3206 case SIOCDIFADDR:
6b96018b
AB
3207 case SIOCGIFBRDADDR:
3208 case SIOCSIFBRDADDR:
3209 case SIOCGIFDSTADDR:
3210 case SIOCSIFDSTADDR:
3211 case SIOCGIFNETMASK:
3212 case SIOCSIFNETMASK:
3213 case SIOCSIFPFLAGS:
3214 case SIOCGIFPFLAGS:
3215 case SIOCGIFTXQLEN:
3216 case SIOCSIFTXQLEN:
3217 case SIOCBRADDIF:
3218 case SIOCBRDELIF:
9177efd3
AB
3219 case SIOCSIFNAME:
3220 case SIOCGMIIPHY:
3221 case SIOCGMIIREG:
3222 case SIOCSMIIREG:
6b96018b 3223 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3224
6b96018b
AB
3225 case SIOCSARP:
3226 case SIOCGARP:
3227 case SIOCDARP:
6b96018b 3228 case SIOCATMARK:
9177efd3
AB
3229 return sock_do_ioctl(net, sock, cmd, arg);
3230 }
3231
6b96018b
AB
3232 return -ENOIOCTLCMD;
3233}
7a229387 3234
95c96174 3235static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3236 unsigned long arg)
89bbfc95
SP
3237{
3238 struct socket *sock = file->private_data;
3239 int ret = -ENOIOCTLCMD;
87de87d5
DM
3240 struct sock *sk;
3241 struct net *net;
3242
3243 sk = sock->sk;
3244 net = sock_net(sk);
89bbfc95
SP
3245
3246 if (sock->ops->compat_ioctl)
3247 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3248
87de87d5
DM
3249 if (ret == -ENOIOCTLCMD &&
3250 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3251 ret = compat_wext_handle_ioctl(net, cmd, arg);
3252
6b96018b
AB
3253 if (ret == -ENOIOCTLCMD)
3254 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3255
89bbfc95
SP
3256 return ret;
3257}
3258#endif
3259
ac5a488e
SS
3260int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3261{
3262 return sock->ops->bind(sock, addr, addrlen);
3263}
c6d409cf 3264EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3265
3266int kernel_listen(struct socket *sock, int backlog)
3267{
3268 return sock->ops->listen(sock, backlog);
3269}
c6d409cf 3270EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3271
3272int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3273{
3274 struct sock *sk = sock->sk;
3275 int err;
3276
3277 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3278 newsock);
3279 if (err < 0)
3280 goto done;
3281
cdfbabfb 3282 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3283 if (err < 0) {
3284 sock_release(*newsock);
fa8705b0 3285 *newsock = NULL;
ac5a488e
SS
3286 goto done;
3287 }
3288
3289 (*newsock)->ops = sock->ops;
1b08534e 3290 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3291
3292done:
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3296
3297int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3298 int flags)
ac5a488e
SS
3299{
3300 return sock->ops->connect(sock, addr, addrlen, flags);
3301}
c6d409cf 3302EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3303
3304int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3305 int *addrlen)
3306{
3307 return sock->ops->getname(sock, addr, addrlen, 0);
3308}
c6d409cf 3309EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3310
3311int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3312 int *addrlen)
3313{
3314 return sock->ops->getname(sock, addr, addrlen, 1);
3315}
c6d409cf 3316EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3317
3318int kernel_getsockopt(struct socket *sock, int level, int optname,
3319 char *optval, int *optlen)
3320{
3321 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3322 char __user *uoptval;
3323 int __user *uoptlen;
ac5a488e
SS
3324 int err;
3325
fb8621bb
NK
3326 uoptval = (char __user __force *) optval;
3327 uoptlen = (int __user __force *) optlen;
3328
ac5a488e
SS
3329 set_fs(KERNEL_DS);
3330 if (level == SOL_SOCKET)
fb8621bb 3331 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3332 else
fb8621bb
NK
3333 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3334 uoptlen);
ac5a488e
SS
3335 set_fs(oldfs);
3336 return err;
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3339
3340int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3341 char *optval, unsigned int optlen)
ac5a488e
SS
3342{
3343 mm_segment_t oldfs = get_fs();
fb8621bb 3344 char __user *uoptval;
ac5a488e
SS
3345 int err;
3346
fb8621bb
NK
3347 uoptval = (char __user __force *) optval;
3348
ac5a488e
SS
3349 set_fs(KERNEL_DS);
3350 if (level == SOL_SOCKET)
fb8621bb 3351 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3352 else
fb8621bb 3353 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3354 optlen);
3355 set_fs(oldfs);
3356 return err;
3357}
c6d409cf 3358EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3359
3360int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3361 size_t size, int flags)
3362{
3363 if (sock->ops->sendpage)
3364 return sock->ops->sendpage(sock, page, offset, size, flags);
3365
3366 return sock_no_sendpage(sock, page, offset, size, flags);
3367}
c6d409cf 3368EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3369
306b13eb
TH
3370int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3371 size_t size, int flags)
3372{
3373 struct socket *sock = sk->sk_socket;
3374
3375 if (sock->ops->sendpage_locked)
3376 return sock->ops->sendpage_locked(sk, page, offset, size,
3377 flags);
3378
3379 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3380}
3381EXPORT_SYMBOL(kernel_sendpage_locked);
3382
ac5a488e
SS
3383int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3384{
3385 mm_segment_t oldfs = get_fs();
3386 int err;
3387
3388 set_fs(KERNEL_DS);
3389 err = sock->ops->ioctl(sock, cmd, arg);
3390 set_fs(oldfs);
3391
3392 return err;
3393}
c6d409cf 3394EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3395
91cf45f0
TM
3396int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3397{
3398 return sock->ops->shutdown(sock, how);
3399}
91cf45f0 3400EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3401
3402/* This routine returns the IP overhead imposed by a socket i.e.
3403 * the length of the underlying IP header, depending on whether
3404 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3405 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3406 */
3407u32 kernel_sock_ip_overhead(struct sock *sk)
3408{
3409 struct inet_sock *inet;
3410 struct ip_options_rcu *opt;
3411 u32 overhead = 0;
113c3075
P
3412#if IS_ENABLED(CONFIG_IPV6)
3413 struct ipv6_pinfo *np;
3414 struct ipv6_txoptions *optv6 = NULL;
3415#endif /* IS_ENABLED(CONFIG_IPV6) */
3416
3417 if (!sk)
3418 return overhead;
3419
113c3075
P
3420 switch (sk->sk_family) {
3421 case AF_INET:
3422 inet = inet_sk(sk);
3423 overhead += sizeof(struct iphdr);
3424 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3425 sock_owned_by_user(sk));
113c3075
P
3426 if (opt)
3427 overhead += opt->opt.optlen;
3428 return overhead;
3429#if IS_ENABLED(CONFIG_IPV6)
3430 case AF_INET6:
3431 np = inet6_sk(sk);
3432 overhead += sizeof(struct ipv6hdr);
3433 if (np)
3434 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3435 sock_owned_by_user(sk));
113c3075
P
3436 if (optv6)
3437 overhead += (optv6->opt_flen + optv6->opt_nflen);
3438 return overhead;
3439#endif /* IS_ENABLED(CONFIG_IPV6) */
3440 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3441 return overhead;
3442 }
3443}
3444EXPORT_SYMBOL(kernel_sock_ip_overhead);