]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blame - net/socket.c
socket: Add SO_TIMESTAMP[NS]_NEW
[mirror_ubuntu-kernels.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
aab174f0 387struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 388{
7cbe66b6 389 struct file *file;
1da177e4 390
d93aa9d8
AV
391 if (!dname)
392 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 393
d93aa9d8
AV
394 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
395 O_RDWR | (flags & O_NONBLOCK),
396 &socket_file_ops);
b5ffe634 397 if (IS_ERR(file)) {
8e1611e2 398 sock_release(sock);
39b65252 399 return file;
cc3808f8
AV
400 }
401
402 sock->file = file;
39d8c1b6 403 file->private_data = sock;
28407630 404 return file;
39d8c1b6 405}
56b31d1c 406EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 407
56b31d1c 408static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
409{
410 struct file *newfile;
28407630 411 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
412 if (unlikely(fd < 0)) {
413 sock_release(sock);
28407630 414 return fd;
ce4bb04c 415 }
39d8c1b6 416
aab174f0 417 newfile = sock_alloc_file(sock, flags, NULL);
28407630 418 if (likely(!IS_ERR(newfile))) {
39d8c1b6 419 fd_install(fd, newfile);
28407630
AV
420 return fd;
421 }
7cbe66b6 422
28407630
AV
423 put_unused_fd(fd);
424 return PTR_ERR(newfile);
1da177e4
LT
425}
426
406a3c63 427struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 428{
6cb153ca
BL
429 if (file->f_op == &socket_file_ops)
430 return file->private_data; /* set in sock_map_fd */
431
23bb80d2
ED
432 *err = -ENOTSOCK;
433 return NULL;
6cb153ca 434}
406a3c63 435EXPORT_SYMBOL(sock_from_file);
6cb153ca 436
1da177e4 437/**
c6d409cf 438 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
439 * @fd: file handle
440 * @err: pointer to an error code return
441 *
442 * The file handle passed in is locked and the socket it is bound
241c4667 443 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
444 * with a negative errno code and NULL is returned. The function checks
445 * for both invalid handles and passing a handle which is not a socket.
446 *
447 * On a success the socket object pointer is returned.
448 */
449
450struct socket *sockfd_lookup(int fd, int *err)
451{
452 struct file *file;
1da177e4
LT
453 struct socket *sock;
454
89bddce5
SH
455 file = fget(fd);
456 if (!file) {
1da177e4
LT
457 *err = -EBADF;
458 return NULL;
459 }
89bddce5 460
6cb153ca
BL
461 sock = sock_from_file(file, err);
462 if (!sock)
1da177e4 463 fput(file);
6cb153ca
BL
464 return sock;
465}
c6d409cf 466EXPORT_SYMBOL(sockfd_lookup);
1da177e4 467
6cb153ca
BL
468static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
469{
00e188ef 470 struct fd f = fdget(fd);
6cb153ca
BL
471 struct socket *sock;
472
3672558c 473 *err = -EBADF;
00e188ef
AV
474 if (f.file) {
475 sock = sock_from_file(f.file, err);
476 if (likely(sock)) {
477 *fput_needed = f.flags;
6cb153ca 478 return sock;
00e188ef
AV
479 }
480 fdput(f);
1da177e4 481 }
6cb153ca 482 return NULL;
1da177e4
LT
483}
484
600e1779
MY
485static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
486 size_t size)
487{
488 ssize_t len;
489 ssize_t used = 0;
490
c5ef6035 491 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
492 if (len < 0)
493 return len;
494 used += len;
495 if (buffer) {
496 if (size < used)
497 return -ERANGE;
498 buffer += len;
499 }
500
501 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
502 used += len;
503 if (buffer) {
504 if (size < used)
505 return -ERANGE;
506 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
507 buffer += len;
508 }
509
510 return used;
511}
512
dc647ec8 513static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
514{
515 int err = simple_setattr(dentry, iattr);
516
e1a3a60a 517 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
518 struct socket *sock = SOCKET_I(d_inode(dentry));
519
6d8c50dc
CW
520 if (sock->sk)
521 sock->sk->sk_uid = iattr->ia_uid;
522 else
523 err = -ENOENT;
86741ec2
LC
524 }
525
526 return err;
527}
528
600e1779 529static const struct inode_operations sockfs_inode_ops = {
600e1779 530 .listxattr = sockfs_listxattr,
86741ec2 531 .setattr = sockfs_setattr,
600e1779
MY
532};
533
1da177e4
LT
534/**
535 * sock_alloc - allocate a socket
89bddce5 536 *
1da177e4
LT
537 * Allocate a new inode and socket object. The two are bound together
538 * and initialised. The socket is then returned. If we are out of inodes
539 * NULL is returned.
540 */
541
f4a00aac 542struct socket *sock_alloc(void)
1da177e4 543{
89bddce5
SH
544 struct inode *inode;
545 struct socket *sock;
1da177e4 546
a209dfc7 547 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
548 if (!inode)
549 return NULL;
550
551 sock = SOCKET_I(inode);
552
85fe4025 553 inode->i_ino = get_next_ino();
89bddce5 554 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
555 inode->i_uid = current_fsuid();
556 inode->i_gid = current_fsgid();
600e1779 557 inode->i_op = &sockfs_inode_ops;
1da177e4 558
1da177e4
LT
559 return sock;
560}
f4a00aac 561EXPORT_SYMBOL(sock_alloc);
1da177e4 562
1da177e4
LT
563/**
564 * sock_release - close a socket
565 * @sock: socket to close
566 *
567 * The socket is released from the protocol stack if it has a release
568 * callback, and the inode is then released if the socket is bound to
89bddce5 569 * an inode not a file.
1da177e4 570 */
89bddce5 571
6d8c50dc 572static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
573{
574 if (sock->ops) {
575 struct module *owner = sock->ops->owner;
576
6d8c50dc
CW
577 if (inode)
578 inode_lock(inode);
1da177e4 579 sock->ops->release(sock);
6d8c50dc
CW
580 if (inode)
581 inode_unlock(inode);
1da177e4
LT
582 sock->ops = NULL;
583 module_put(owner);
584 }
585
e6476c21 586 if (sock->wq->fasync_list)
3410f22e 587 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 588
1da177e4
LT
589 if (!sock->file) {
590 iput(SOCK_INODE(sock));
591 return;
592 }
89bddce5 593 sock->file = NULL;
1da177e4 594}
6d8c50dc
CW
595
596void sock_release(struct socket *sock)
597{
598 __sock_release(sock, NULL);
599}
c6d409cf 600EXPORT_SYMBOL(sock_release);
1da177e4 601
c14ac945 602void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 603{
140c55d4
ED
604 u8 flags = *tx_flags;
605
c14ac945 606 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
607 flags |= SKBTX_HW_TSTAMP;
608
c14ac945 609 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
610 flags |= SKBTX_SW_TSTAMP;
611
c14ac945 612 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
613 flags |= SKBTX_SCHED_TSTAMP;
614
140c55d4 615 *tx_flags = flags;
20d49473 616}
67cc0d40 617EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 618
d8725c86 619static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 620{
01e97e65 621 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
622 BUG_ON(ret == -EIOCBQUEUED);
623 return ret;
1da177e4
LT
624}
625
d8725c86 626int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 627{
d8725c86 628 int err = security_socket_sendmsg(sock, msg,
01e97e65 629 msg_data_left(msg));
228e548e 630
d8725c86 631 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 632}
c6d409cf 633EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
634
635int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
636 struct kvec *vec, size_t num, size_t size)
637{
aa563d7b 638 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 639 return sock_sendmsg(sock, msg);
1da177e4 640}
c6d409cf 641EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 642
306b13eb
TH
643int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
644 struct kvec *vec, size_t num, size_t size)
645{
646 struct socket *sock = sk->sk_socket;
647
648 if (!sock->ops->sendmsg_locked)
db5980d8 649 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 650
aa563d7b 651 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
652
653 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
654}
655EXPORT_SYMBOL(kernel_sendmsg_locked);
656
8605330a
SHY
657static bool skb_is_err_queue(const struct sk_buff *skb)
658{
659 /* pkt_type of skbs enqueued on the error queue are set to
660 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
661 * in recvmsg, since skbs received on a local socket will never
662 * have a pkt_type of PACKET_OUTGOING.
663 */
664 return skb->pkt_type == PACKET_OUTGOING;
665}
666
b50a5c70
ML
667/* On transmit, software and hardware timestamps are returned independently.
668 * As the two skb clones share the hardware timestamp, which may be updated
669 * before the software timestamp is received, a hardware TX timestamp may be
670 * returned only if there is no software TX timestamp. Ignore false software
671 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 672 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
673 * hardware timestamp.
674 */
675static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
676{
677 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
678}
679
aad9c8c4
ML
680static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
681{
682 struct scm_ts_pktinfo ts_pktinfo;
683 struct net_device *orig_dev;
684
685 if (!skb_mac_header_was_set(skb))
686 return;
687
688 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
689
690 rcu_read_lock();
691 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
692 if (orig_dev)
693 ts_pktinfo.if_index = orig_dev->ifindex;
694 rcu_read_unlock();
695
696 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
697 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
698 sizeof(ts_pktinfo), &ts_pktinfo);
699}
700
92f37fd2
ED
701/*
702 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
703 */
704void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
705 struct sk_buff *skb)
706{
20d49473 707 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 708 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
f24b9be5 709 struct scm_timestamping tss;
b50a5c70 710 int empty = 1, false_tstamp = 0;
20d49473
PO
711 struct skb_shared_hwtstamps *shhwtstamps =
712 skb_hwtstamps(skb);
713
714 /* Race occurred between timestamp enabling and packet
715 receiving. Fill in the current time for now. */
b50a5c70 716 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 717 __net_timestamp(skb);
b50a5c70
ML
718 false_tstamp = 1;
719 }
20d49473
PO
720
721 if (need_software_tstamp) {
722 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
723 if (new_tstamp) {
724 struct __kernel_sock_timeval tv;
725
726 skb_get_new_timestamp(skb, &tv);
727 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
728 sizeof(tv), &tv);
729 } else {
730 struct __kernel_old_timeval tv;
731
732 skb_get_timestamp(skb, &tv);
733 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
734 sizeof(tv), &tv);
735 }
20d49473 736 } else {
887feae3
DD
737 if (new_tstamp) {
738 struct __kernel_timespec ts;
739
740 skb_get_new_timestampns(skb, &ts);
741 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
742 sizeof(ts), &ts);
743 } else {
744 struct timespec ts;
745
746 skb_get_timestampns(skb, &ts);
747 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
748 sizeof(ts), &ts);
749 }
20d49473
PO
750 }
751 }
752
f24b9be5 753 memset(&tss, 0, sizeof(tss));
c199105d 754 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 755 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 756 empty = 0;
4d276eb6 757 if (shhwtstamps &&
b9f40e21 758 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 759 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 760 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 761 empty = 0;
aad9c8c4
ML
762 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
763 !skb_is_err_queue(skb))
764 put_ts_pktinfo(msg, skb);
765 }
1c885808 766 if (!empty) {
20d49473 767 put_cmsg(msg, SOL_SOCKET,
7f1bc6e9 768 SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
1c885808 769
8605330a 770 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 771 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
772 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
773 skb->len, skb->data);
774 }
92f37fd2 775}
7c81fd8b
ACM
776EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
777
6e3e939f
JB
778void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
779 struct sk_buff *skb)
780{
781 int ack;
782
783 if (!sock_flag(sk, SOCK_WIFI_STATUS))
784 return;
785 if (!skb->wifi_acked_valid)
786 return;
787
788 ack = skb->wifi_acked;
789
790 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
791}
792EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
793
11165f14 794static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
795 struct sk_buff *skb)
3b885787 796{
744d5a3e 797 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 798 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 799 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
800}
801
767dd033 802void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
803 struct sk_buff *skb)
804{
805 sock_recv_timestamp(msg, sk, skb);
806 sock_recv_drops(msg, sk, skb);
807}
767dd033 808EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 809
1b784140 810static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 811 int flags)
1da177e4 812{
2da62906 813 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
814}
815
2da62906 816int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 817{
2da62906 818 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 819
2da62906 820 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 821}
c6d409cf 822EXPORT_SYMBOL(sock_recvmsg);
1da177e4 823
c1249c0a
ML
824/**
825 * kernel_recvmsg - Receive a message from a socket (kernel space)
826 * @sock: The socket to receive the message from
827 * @msg: Received message
828 * @vec: Input s/g array for message data
829 * @num: Size of input s/g array
830 * @size: Number of bytes to read
831 * @flags: Message flags (MSG_DONTWAIT, etc...)
832 *
833 * On return the msg structure contains the scatter/gather array passed in the
834 * vec argument. The array is modified so that it consists of the unfilled
835 * portion of the original array.
836 *
837 * The returned value is the total number of bytes received, or an error.
838 */
89bddce5
SH
839int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
840 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
841{
842 mm_segment_t oldfs = get_fs();
843 int result;
844
aa563d7b 845 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 846 set_fs(KERNEL_DS);
2da62906 847 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
848 set_fs(oldfs);
849 return result;
850}
c6d409cf 851EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 852
ce1d4d3e
CH
853static ssize_t sock_sendpage(struct file *file, struct page *page,
854 int offset, size_t size, loff_t *ppos, int more)
1da177e4 855{
1da177e4
LT
856 struct socket *sock;
857 int flags;
858
ce1d4d3e
CH
859 sock = file->private_data;
860
35f9c09f
ED
861 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
862 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
863 flags |= more;
ce1d4d3e 864
e6949583 865 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 866}
1da177e4 867
9c55e01c 868static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 869 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
870 unsigned int flags)
871{
872 struct socket *sock = file->private_data;
873
997b37da 874 if (unlikely(!sock->ops->splice_read))
95506588 875 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 876
9c55e01c
JA
877 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
878}
879
8ae5e030 880static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 881{
6d652330
AV
882 struct file *file = iocb->ki_filp;
883 struct socket *sock = file->private_data;
0345f931 884 struct msghdr msg = {.msg_iter = *to,
885 .msg_iocb = iocb};
8ae5e030 886 ssize_t res;
ce1d4d3e 887
8ae5e030
AV
888 if (file->f_flags & O_NONBLOCK)
889 msg.msg_flags = MSG_DONTWAIT;
890
891 if (iocb->ki_pos != 0)
1da177e4 892 return -ESPIPE;
027445c3 893
66ee59af 894 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
895 return 0;
896
2da62906 897 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
898 *to = msg.msg_iter;
899 return res;
1da177e4
LT
900}
901
8ae5e030 902static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 903{
6d652330
AV
904 struct file *file = iocb->ki_filp;
905 struct socket *sock = file->private_data;
0345f931 906 struct msghdr msg = {.msg_iter = *from,
907 .msg_iocb = iocb};
8ae5e030 908 ssize_t res;
1da177e4 909
8ae5e030 910 if (iocb->ki_pos != 0)
ce1d4d3e 911 return -ESPIPE;
027445c3 912
8ae5e030
AV
913 if (file->f_flags & O_NONBLOCK)
914 msg.msg_flags = MSG_DONTWAIT;
915
6d652330
AV
916 if (sock->type == SOCK_SEQPACKET)
917 msg.msg_flags |= MSG_EOR;
918
d8725c86 919 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
920 *from = msg.msg_iter;
921 return res;
1da177e4
LT
922}
923
1da177e4
LT
924/*
925 * Atomic setting of ioctl hooks to avoid race
926 * with module unload.
927 */
928
4a3e2f71 929static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 930static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 931
881d966b 932void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 933{
4a3e2f71 934 mutex_lock(&br_ioctl_mutex);
1da177e4 935 br_ioctl_hook = hook;
4a3e2f71 936 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
937}
938EXPORT_SYMBOL(brioctl_set);
939
4a3e2f71 940static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 941static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 942
881d966b 943void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 944{
4a3e2f71 945 mutex_lock(&vlan_ioctl_mutex);
1da177e4 946 vlan_ioctl_hook = hook;
4a3e2f71 947 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
948}
949EXPORT_SYMBOL(vlan_ioctl_set);
950
4a3e2f71 951static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 952static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 953
89bddce5 954void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 955{
4a3e2f71 956 mutex_lock(&dlci_ioctl_mutex);
1da177e4 957 dlci_ioctl_hook = hook;
4a3e2f71 958 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
959}
960EXPORT_SYMBOL(dlci_ioctl_set);
961
6b96018b 962static long sock_do_ioctl(struct net *net, struct socket *sock,
1cebf8f1
JB
963 unsigned int cmd, unsigned long arg,
964 unsigned int ifreq_size)
6b96018b
AB
965{
966 int err;
967 void __user *argp = (void __user *)arg;
968
969 err = sock->ops->ioctl(sock, cmd, arg);
970
971 /*
972 * If this ioctl is unknown try to hand it down
973 * to the NIC driver.
974 */
36fd633e
AV
975 if (err != -ENOIOCTLCMD)
976 return err;
6b96018b 977
36fd633e
AV
978 if (cmd == SIOCGIFCONF) {
979 struct ifconf ifc;
980 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
981 return -EFAULT;
982 rtnl_lock();
983 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
984 rtnl_unlock();
985 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
986 err = -EFAULT;
44c02a2c
AV
987 } else {
988 struct ifreq ifr;
989 bool need_copyout;
1cebf8f1 990 if (copy_from_user(&ifr, argp, ifreq_size))
44c02a2c
AV
991 return -EFAULT;
992 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
993 if (!err && need_copyout)
1cebf8f1 994 if (copy_to_user(argp, &ifr, ifreq_size))
44c02a2c 995 return -EFAULT;
36fd633e 996 }
6b96018b
AB
997 return err;
998}
999
1da177e4
LT
1000/*
1001 * With an ioctl, arg may well be a user mode pointer, but we don't know
1002 * what to do with it - that's up to the protocol still.
1003 */
1004
d8d211a2 1005struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1006{
1007 return &get_net(container_of(ns, struct net, ns))->ns;
1008}
d8d211a2 1009EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1010
1da177e4
LT
1011static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1012{
1013 struct socket *sock;
881d966b 1014 struct sock *sk;
1da177e4
LT
1015 void __user *argp = (void __user *)arg;
1016 int pid, err;
881d966b 1017 struct net *net;
1da177e4 1018
b69aee04 1019 sock = file->private_data;
881d966b 1020 sk = sock->sk;
3b1e0a65 1021 net = sock_net(sk);
44c02a2c
AV
1022 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1023 struct ifreq ifr;
1024 bool need_copyout;
1025 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1026 return -EFAULT;
1027 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1028 if (!err && need_copyout)
1029 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1030 return -EFAULT;
1da177e4 1031 } else
3d23e349 1032#ifdef CONFIG_WEXT_CORE
1da177e4 1033 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1034 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1035 } else
3d23e349 1036#endif
89bddce5 1037 switch (cmd) {
1da177e4
LT
1038 case FIOSETOWN:
1039 case SIOCSPGRP:
1040 err = -EFAULT;
1041 if (get_user(pid, (int __user *)argp))
1042 break;
393cc3f5 1043 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1044 break;
1045 case FIOGETOWN:
1046 case SIOCGPGRP:
609d7fa9 1047 err = put_user(f_getown(sock->file),
89bddce5 1048 (int __user *)argp);
1da177e4
LT
1049 break;
1050 case SIOCGIFBR:
1051 case SIOCSIFBR:
1052 case SIOCBRADDBR:
1053 case SIOCBRDELBR:
1054 err = -ENOPKG;
1055 if (!br_ioctl_hook)
1056 request_module("bridge");
1057
4a3e2f71 1058 mutex_lock(&br_ioctl_mutex);
89bddce5 1059 if (br_ioctl_hook)
881d966b 1060 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1061 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1062 break;
1063 case SIOCGIFVLAN:
1064 case SIOCSIFVLAN:
1065 err = -ENOPKG;
1066 if (!vlan_ioctl_hook)
1067 request_module("8021q");
1068
4a3e2f71 1069 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1070 if (vlan_ioctl_hook)
881d966b 1071 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1072 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1073 break;
1da177e4
LT
1074 case SIOCADDDLCI:
1075 case SIOCDELDLCI:
1076 err = -ENOPKG;
1077 if (!dlci_ioctl_hook)
1078 request_module("dlci");
1079
7512cbf6
PE
1080 mutex_lock(&dlci_ioctl_mutex);
1081 if (dlci_ioctl_hook)
1da177e4 1082 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1083 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1084 break;
c62cce2c
AV
1085 case SIOCGSKNS:
1086 err = -EPERM;
1087 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1088 break;
1089
1090 err = open_related_ns(&net->ns, get_net_ns);
1091 break;
1da177e4 1092 default:
1cebf8f1
JB
1093 err = sock_do_ioctl(net, sock, cmd, arg,
1094 sizeof(struct ifreq));
1da177e4 1095 break;
89bddce5 1096 }
1da177e4
LT
1097 return err;
1098}
1099
1100int sock_create_lite(int family, int type, int protocol, struct socket **res)
1101{
1102 int err;
1103 struct socket *sock = NULL;
89bddce5 1104
1da177e4
LT
1105 err = security_socket_create(family, type, protocol, 1);
1106 if (err)
1107 goto out;
1108
1109 sock = sock_alloc();
1110 if (!sock) {
1111 err = -ENOMEM;
1112 goto out;
1113 }
1114
1da177e4 1115 sock->type = type;
7420ed23
VY
1116 err = security_socket_post_create(sock, family, type, protocol, 1);
1117 if (err)
1118 goto out_release;
1119
1da177e4
LT
1120out:
1121 *res = sock;
1122 return err;
7420ed23
VY
1123out_release:
1124 sock_release(sock);
1125 sock = NULL;
1126 goto out;
1da177e4 1127}
c6d409cf 1128EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1129
1130/* No kernel lock held - perfect */
ade994f4 1131static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1132{
3cafb376 1133 struct socket *sock = file->private_data;
a331de3b 1134 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1135
e88958e6
CH
1136 if (!sock->ops->poll)
1137 return 0;
f641f13b 1138
a331de3b
CH
1139 if (sk_can_busy_loop(sock->sk)) {
1140 /* poll once if requested by the syscall */
1141 if (events & POLL_BUSY_LOOP)
1142 sk_busy_loop(sock->sk, 1);
1143
1144 /* if this socket can poll_ll, tell the system call */
1145 flag = POLL_BUSY_LOOP;
1146 }
1147
1148 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1149}
1150
89bddce5 1151static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1152{
b69aee04 1153 struct socket *sock = file->private_data;
1da177e4
LT
1154
1155 return sock->ops->mmap(file, sock, vma);
1156}
1157
20380731 1158static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1159{
6d8c50dc 1160 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1161 return 0;
1162}
1163
1164/*
1165 * Update the socket async list
1166 *
1167 * Fasync_list locking strategy.
1168 *
1169 * 1. fasync_list is modified only under process context socket lock
1170 * i.e. under semaphore.
1171 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1172 * or under socket lock
1da177e4
LT
1173 */
1174
1175static int sock_fasync(int fd, struct file *filp, int on)
1176{
989a2979
ED
1177 struct socket *sock = filp->private_data;
1178 struct sock *sk = sock->sk;
eaefd110 1179 struct socket_wq *wq;
1da177e4 1180
989a2979 1181 if (sk == NULL)
1da177e4 1182 return -EINVAL;
1da177e4
LT
1183
1184 lock_sock(sk);
e6476c21 1185 wq = sock->wq;
eaefd110 1186 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1187
eaefd110 1188 if (!wq->fasync_list)
989a2979
ED
1189 sock_reset_flag(sk, SOCK_FASYNC);
1190 else
bcdce719 1191 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1192
989a2979 1193 release_sock(sk);
1da177e4
LT
1194 return 0;
1195}
1196
ceb5d58b 1197/* This function may be called only under rcu_lock */
1da177e4 1198
ceb5d58b 1199int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1200{
ceb5d58b 1201 if (!wq || !wq->fasync_list)
1da177e4 1202 return -1;
ceb5d58b 1203
89bddce5 1204 switch (how) {
8d8ad9d7 1205 case SOCK_WAKE_WAITD:
ceb5d58b 1206 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1207 break;
1208 goto call_kill;
8d8ad9d7 1209 case SOCK_WAKE_SPACE:
ceb5d58b 1210 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1211 break;
1212 /* fall through */
8d8ad9d7 1213 case SOCK_WAKE_IO:
89bddce5 1214call_kill:
43815482 1215 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1216 break;
8d8ad9d7 1217 case SOCK_WAKE_URG:
43815482 1218 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1219 }
ceb5d58b 1220
1da177e4
LT
1221 return 0;
1222}
c6d409cf 1223EXPORT_SYMBOL(sock_wake_async);
1da177e4 1224
721db93a 1225int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1226 struct socket **res, int kern)
1da177e4
LT
1227{
1228 int err;
1229 struct socket *sock;
55737fda 1230 const struct net_proto_family *pf;
1da177e4
LT
1231
1232 /*
89bddce5 1233 * Check protocol is in range
1da177e4
LT
1234 */
1235 if (family < 0 || family >= NPROTO)
1236 return -EAFNOSUPPORT;
1237 if (type < 0 || type >= SOCK_MAX)
1238 return -EINVAL;
1239
1240 /* Compatibility.
1241
1242 This uglymoron is moved from INET layer to here to avoid
1243 deadlock in module load.
1244 */
1245 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1246 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1247 current->comm);
1da177e4
LT
1248 family = PF_PACKET;
1249 }
1250
1251 err = security_socket_create(family, type, protocol, kern);
1252 if (err)
1253 return err;
89bddce5 1254
55737fda
SH
1255 /*
1256 * Allocate the socket and allow the family to set things up. if
1257 * the protocol is 0, the family is instructed to select an appropriate
1258 * default.
1259 */
1260 sock = sock_alloc();
1261 if (!sock) {
e87cc472 1262 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1263 return -ENFILE; /* Not exactly a match, but its the
1264 closest posix thing */
1265 }
1266
1267 sock->type = type;
1268
95a5afca 1269#ifdef CONFIG_MODULES
89bddce5
SH
1270 /* Attempt to load a protocol module if the find failed.
1271 *
1272 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1273 * requested real, full-featured networking support upon configuration.
1274 * Otherwise module support will break!
1275 */
190683a9 1276 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1277 request_module("net-pf-%d", family);
1da177e4
LT
1278#endif
1279
55737fda
SH
1280 rcu_read_lock();
1281 pf = rcu_dereference(net_families[family]);
1282 err = -EAFNOSUPPORT;
1283 if (!pf)
1284 goto out_release;
1da177e4
LT
1285
1286 /*
1287 * We will call the ->create function, that possibly is in a loadable
1288 * module, so we have to bump that loadable module refcnt first.
1289 */
55737fda 1290 if (!try_module_get(pf->owner))
1da177e4
LT
1291 goto out_release;
1292
55737fda
SH
1293 /* Now protected by module ref count */
1294 rcu_read_unlock();
1295
3f378b68 1296 err = pf->create(net, sock, protocol, kern);
55737fda 1297 if (err < 0)
1da177e4 1298 goto out_module_put;
a79af59e 1299
1da177e4
LT
1300 /*
1301 * Now to bump the refcnt of the [loadable] module that owns this
1302 * socket at sock_release time we decrement its refcnt.
1303 */
55737fda
SH
1304 if (!try_module_get(sock->ops->owner))
1305 goto out_module_busy;
1306
1da177e4
LT
1307 /*
1308 * Now that we're done with the ->create function, the [loadable]
1309 * module can have its refcnt decremented
1310 */
55737fda 1311 module_put(pf->owner);
7420ed23
VY
1312 err = security_socket_post_create(sock, family, type, protocol, kern);
1313 if (err)
3b185525 1314 goto out_sock_release;
55737fda 1315 *res = sock;
1da177e4 1316
55737fda
SH
1317 return 0;
1318
1319out_module_busy:
1320 err = -EAFNOSUPPORT;
1da177e4 1321out_module_put:
55737fda
SH
1322 sock->ops = NULL;
1323 module_put(pf->owner);
1324out_sock_release:
1da177e4 1325 sock_release(sock);
55737fda
SH
1326 return err;
1327
1328out_release:
1329 rcu_read_unlock();
1330 goto out_sock_release;
1da177e4 1331}
721db93a 1332EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1333
1334int sock_create(int family, int type, int protocol, struct socket **res)
1335{
1b8d7ae4 1336 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1337}
c6d409cf 1338EXPORT_SYMBOL(sock_create);
1da177e4 1339
eeb1bd5c 1340int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1341{
eeb1bd5c 1342 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1343}
c6d409cf 1344EXPORT_SYMBOL(sock_create_kern);
1da177e4 1345
9d6a15c3 1346int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1347{
1348 int retval;
1349 struct socket *sock;
a677a039
UD
1350 int flags;
1351
e38b36f3
UD
1352 /* Check the SOCK_* constants for consistency. */
1353 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1354 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1355 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1356 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1357
a677a039 1358 flags = type & ~SOCK_TYPE_MASK;
77d27200 1359 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1360 return -EINVAL;
1361 type &= SOCK_TYPE_MASK;
1da177e4 1362
aaca0bdc
UD
1363 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1364 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1365
1da177e4
LT
1366 retval = sock_create(family, type, protocol, &sock);
1367 if (retval < 0)
8e1611e2 1368 return retval;
1da177e4 1369
8e1611e2 1370 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1371}
1372
9d6a15c3
DB
1373SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1374{
1375 return __sys_socket(family, type, protocol);
1376}
1377
1da177e4
LT
1378/*
1379 * Create a pair of connected sockets.
1380 */
1381
6debc8d8 1382int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1383{
1384 struct socket *sock1, *sock2;
1385 int fd1, fd2, err;
db349509 1386 struct file *newfile1, *newfile2;
a677a039
UD
1387 int flags;
1388
1389 flags = type & ~SOCK_TYPE_MASK;
77d27200 1390 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1391 return -EINVAL;
1392 type &= SOCK_TYPE_MASK;
1da177e4 1393
aaca0bdc
UD
1394 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1395 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1396
016a266b
AV
1397 /*
1398 * reserve descriptors and make sure we won't fail
1399 * to return them to userland.
1400 */
1401 fd1 = get_unused_fd_flags(flags);
1402 if (unlikely(fd1 < 0))
1403 return fd1;
1404
1405 fd2 = get_unused_fd_flags(flags);
1406 if (unlikely(fd2 < 0)) {
1407 put_unused_fd(fd1);
1408 return fd2;
1409 }
1410
1411 err = put_user(fd1, &usockvec[0]);
1412 if (err)
1413 goto out;
1414
1415 err = put_user(fd2, &usockvec[1]);
1416 if (err)
1417 goto out;
1418
1da177e4
LT
1419 /*
1420 * Obtain the first socket and check if the underlying protocol
1421 * supports the socketpair call.
1422 */
1423
1424 err = sock_create(family, type, protocol, &sock1);
016a266b 1425 if (unlikely(err < 0))
1da177e4
LT
1426 goto out;
1427
1428 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1429 if (unlikely(err < 0)) {
1430 sock_release(sock1);
1431 goto out;
bf3c23d1 1432 }
d73aa286 1433
d47cd945
DH
1434 err = security_socket_socketpair(sock1, sock2);
1435 if (unlikely(err)) {
1436 sock_release(sock2);
1437 sock_release(sock1);
1438 goto out;
1439 }
1440
016a266b
AV
1441 err = sock1->ops->socketpair(sock1, sock2);
1442 if (unlikely(err < 0)) {
1443 sock_release(sock2);
1444 sock_release(sock1);
1445 goto out;
28407630
AV
1446 }
1447
aab174f0 1448 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1449 if (IS_ERR(newfile1)) {
28407630 1450 err = PTR_ERR(newfile1);
016a266b
AV
1451 sock_release(sock2);
1452 goto out;
28407630
AV
1453 }
1454
aab174f0 1455 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1456 if (IS_ERR(newfile2)) {
1457 err = PTR_ERR(newfile2);
016a266b
AV
1458 fput(newfile1);
1459 goto out;
db349509
AV
1460 }
1461
157cf649 1462 audit_fd_pair(fd1, fd2);
d73aa286 1463
db349509
AV
1464 fd_install(fd1, newfile1);
1465 fd_install(fd2, newfile2);
d73aa286 1466 return 0;
1da177e4 1467
016a266b 1468out:
d73aa286 1469 put_unused_fd(fd2);
d73aa286 1470 put_unused_fd(fd1);
1da177e4
LT
1471 return err;
1472}
1473
6debc8d8
DB
1474SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1475 int __user *, usockvec)
1476{
1477 return __sys_socketpair(family, type, protocol, usockvec);
1478}
1479
1da177e4
LT
1480/*
1481 * Bind a name to a socket. Nothing much to do here since it's
1482 * the protocol's responsibility to handle the local address.
1483 *
1484 * We move the socket address to kernel space before we call
1485 * the protocol layer (having also checked the address is ok).
1486 */
1487
a87d35d8 1488int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1489{
1490 struct socket *sock;
230b1839 1491 struct sockaddr_storage address;
6cb153ca 1492 int err, fput_needed;
1da177e4 1493
89bddce5 1494 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1495 if (sock) {
43db362d 1496 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1497 if (!err) {
89bddce5 1498 err = security_socket_bind(sock,
230b1839 1499 (struct sockaddr *)&address,
89bddce5 1500 addrlen);
6cb153ca
BL
1501 if (!err)
1502 err = sock->ops->bind(sock,
89bddce5 1503 (struct sockaddr *)
230b1839 1504 &address, addrlen);
1da177e4 1505 }
6cb153ca 1506 fput_light(sock->file, fput_needed);
89bddce5 1507 }
1da177e4
LT
1508 return err;
1509}
1510
a87d35d8
DB
1511SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1512{
1513 return __sys_bind(fd, umyaddr, addrlen);
1514}
1515
1da177e4
LT
1516/*
1517 * Perform a listen. Basically, we allow the protocol to do anything
1518 * necessary for a listen, and if that works, we mark the socket as
1519 * ready for listening.
1520 */
1521
25e290ee 1522int __sys_listen(int fd, int backlog)
1da177e4
LT
1523{
1524 struct socket *sock;
6cb153ca 1525 int err, fput_needed;
b8e1f9b5 1526 int somaxconn;
89bddce5
SH
1527
1528 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1529 if (sock) {
8efa6e93 1530 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1531 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1532 backlog = somaxconn;
1da177e4
LT
1533
1534 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1535 if (!err)
1536 err = sock->ops->listen(sock, backlog);
1da177e4 1537
6cb153ca 1538 fput_light(sock->file, fput_needed);
1da177e4
LT
1539 }
1540 return err;
1541}
1542
25e290ee
DB
1543SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1544{
1545 return __sys_listen(fd, backlog);
1546}
1547
1da177e4
LT
1548/*
1549 * For accept, we attempt to create a new socket, set up the link
1550 * with the client, wake up the client, then return the new
1551 * connected fd. We collect the address of the connector in kernel
1552 * space and move it to user at the very end. This is unclean because
1553 * we open the socket then return an error.
1554 *
1555 * 1003.1g adds the ability to recvmsg() to query connection pending
1556 * status to recvmsg. We need to add that support in a way thats
b903036a 1557 * clean when we restructure accept also.
1da177e4
LT
1558 */
1559
4541e805
DB
1560int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1561 int __user *upeer_addrlen, int flags)
1da177e4
LT
1562{
1563 struct socket *sock, *newsock;
39d8c1b6 1564 struct file *newfile;
6cb153ca 1565 int err, len, newfd, fput_needed;
230b1839 1566 struct sockaddr_storage address;
1da177e4 1567
77d27200 1568 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1569 return -EINVAL;
1570
1571 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1572 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1573
6cb153ca 1574 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1575 if (!sock)
1576 goto out;
1577
1578 err = -ENFILE;
c6d409cf
ED
1579 newsock = sock_alloc();
1580 if (!newsock)
1da177e4
LT
1581 goto out_put;
1582
1583 newsock->type = sock->type;
1584 newsock->ops = sock->ops;
1585
1da177e4
LT
1586 /*
1587 * We don't need try_module_get here, as the listening socket (sock)
1588 * has the protocol module (sock->ops->owner) held.
1589 */
1590 __module_get(newsock->ops->owner);
1591
28407630 1592 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1593 if (unlikely(newfd < 0)) {
1594 err = newfd;
9a1875e6
DM
1595 sock_release(newsock);
1596 goto out_put;
39d8c1b6 1597 }
aab174f0 1598 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1599 if (IS_ERR(newfile)) {
28407630
AV
1600 err = PTR_ERR(newfile);
1601 put_unused_fd(newfd);
28407630
AV
1602 goto out_put;
1603 }
39d8c1b6 1604
a79af59e
FF
1605 err = security_socket_accept(sock, newsock);
1606 if (err)
39d8c1b6 1607 goto out_fd;
a79af59e 1608
cdfbabfb 1609 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1610 if (err < 0)
39d8c1b6 1611 goto out_fd;
1da177e4
LT
1612
1613 if (upeer_sockaddr) {
9b2c45d4
DV
1614 len = newsock->ops->getname(newsock,
1615 (struct sockaddr *)&address, 2);
1616 if (len < 0) {
1da177e4 1617 err = -ECONNABORTED;
39d8c1b6 1618 goto out_fd;
1da177e4 1619 }
43db362d 1620 err = move_addr_to_user(&address,
230b1839 1621 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1622 if (err < 0)
39d8c1b6 1623 goto out_fd;
1da177e4
LT
1624 }
1625
1626 /* File flags are not inherited via accept() unlike another OSes. */
1627
39d8c1b6
DM
1628 fd_install(newfd, newfile);
1629 err = newfd;
1da177e4 1630
1da177e4 1631out_put:
6cb153ca 1632 fput_light(sock->file, fput_needed);
1da177e4
LT
1633out:
1634 return err;
39d8c1b6 1635out_fd:
9606a216 1636 fput(newfile);
39d8c1b6 1637 put_unused_fd(newfd);
1da177e4
LT
1638 goto out_put;
1639}
1640
4541e805
DB
1641SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1642 int __user *, upeer_addrlen, int, flags)
1643{
1644 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1645}
1646
20f37034
HC
1647SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1648 int __user *, upeer_addrlen)
aaca0bdc 1649{
4541e805 1650 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1651}
1652
1da177e4
LT
1653/*
1654 * Attempt to connect to a socket with the server address. The address
1655 * is in user space so we verify it is OK and move it to kernel space.
1656 *
1657 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1658 * break bindings
1659 *
1660 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1661 * other SEQPACKET protocols that take time to connect() as it doesn't
1662 * include the -EINPROGRESS status for such sockets.
1663 */
1664
1387c2c2 1665int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1666{
1667 struct socket *sock;
230b1839 1668 struct sockaddr_storage address;
6cb153ca 1669 int err, fput_needed;
1da177e4 1670
6cb153ca 1671 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1672 if (!sock)
1673 goto out;
43db362d 1674 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1675 if (err < 0)
1676 goto out_put;
1677
89bddce5 1678 err =
230b1839 1679 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1680 if (err)
1681 goto out_put;
1682
230b1839 1683 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1684 sock->file->f_flags);
1685out_put:
6cb153ca 1686 fput_light(sock->file, fput_needed);
1da177e4
LT
1687out:
1688 return err;
1689}
1690
1387c2c2
DB
1691SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1692 int, addrlen)
1693{
1694 return __sys_connect(fd, uservaddr, addrlen);
1695}
1696
1da177e4
LT
1697/*
1698 * Get the local address ('name') of a socket object. Move the obtained
1699 * name to user space.
1700 */
1701
8882a107
DB
1702int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1703 int __user *usockaddr_len)
1da177e4
LT
1704{
1705 struct socket *sock;
230b1839 1706 struct sockaddr_storage address;
9b2c45d4 1707 int err, fput_needed;
89bddce5 1708
6cb153ca 1709 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1710 if (!sock)
1711 goto out;
1712
1713 err = security_socket_getsockname(sock);
1714 if (err)
1715 goto out_put;
1716
9b2c45d4
DV
1717 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1718 if (err < 0)
1da177e4 1719 goto out_put;
9b2c45d4
DV
1720 /* "err" is actually length in this case */
1721 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1722
1723out_put:
6cb153ca 1724 fput_light(sock->file, fput_needed);
1da177e4
LT
1725out:
1726 return err;
1727}
1728
8882a107
DB
1729SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1730 int __user *, usockaddr_len)
1731{
1732 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1733}
1734
1da177e4
LT
1735/*
1736 * Get the remote address ('name') of a socket object. Move the obtained
1737 * name to user space.
1738 */
1739
b21c8f83
DB
1740int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1741 int __user *usockaddr_len)
1da177e4
LT
1742{
1743 struct socket *sock;
230b1839 1744 struct sockaddr_storage address;
9b2c45d4 1745 int err, fput_needed;
1da177e4 1746
89bddce5
SH
1747 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1748 if (sock != NULL) {
1da177e4
LT
1749 err = security_socket_getpeername(sock);
1750 if (err) {
6cb153ca 1751 fput_light(sock->file, fput_needed);
1da177e4
LT
1752 return err;
1753 }
1754
9b2c45d4
DV
1755 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1756 if (err >= 0)
1757 /* "err" is actually length in this case */
1758 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1759 usockaddr_len);
6cb153ca 1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
b21c8f83
DB
1765SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1766 int __user *, usockaddr_len)
1767{
1768 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1769}
1770
1da177e4
LT
1771/*
1772 * Send a datagram to a given address. We move the address into kernel
1773 * space and check the user space data area is readable before invoking
1774 * the protocol.
1775 */
211b634b
DB
1776int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1777 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1778{
1779 struct socket *sock;
230b1839 1780 struct sockaddr_storage address;
1da177e4
LT
1781 int err;
1782 struct msghdr msg;
1783 struct iovec iov;
6cb153ca 1784 int fput_needed;
6cb153ca 1785
602bd0e9
AV
1786 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1787 if (unlikely(err))
1788 return err;
de0fa95c
PE
1789 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1790 if (!sock)
4387ff75 1791 goto out;
6cb153ca 1792
89bddce5 1793 msg.msg_name = NULL;
89bddce5
SH
1794 msg.msg_control = NULL;
1795 msg.msg_controllen = 0;
1796 msg.msg_namelen = 0;
6cb153ca 1797 if (addr) {
43db362d 1798 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1799 if (err < 0)
1800 goto out_put;
230b1839 1801 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1802 msg.msg_namelen = addr_len;
1da177e4
LT
1803 }
1804 if (sock->file->f_flags & O_NONBLOCK)
1805 flags |= MSG_DONTWAIT;
1806 msg.msg_flags = flags;
d8725c86 1807 err = sock_sendmsg(sock, &msg);
1da177e4 1808
89bddce5 1809out_put:
de0fa95c 1810 fput_light(sock->file, fput_needed);
4387ff75 1811out:
1da177e4
LT
1812 return err;
1813}
1814
211b634b
DB
1815SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1816 unsigned int, flags, struct sockaddr __user *, addr,
1817 int, addr_len)
1818{
1819 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1820}
1821
1da177e4 1822/*
89bddce5 1823 * Send a datagram down a socket.
1da177e4
LT
1824 */
1825
3e0fa65f 1826SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1827 unsigned int, flags)
1da177e4 1828{
211b634b 1829 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1830}
1831
1832/*
89bddce5 1833 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1834 * sender. We verify the buffers are writable and if needed move the
1835 * sender address from kernel to user space.
1836 */
7a09e1eb
DB
1837int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1838 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1839{
1840 struct socket *sock;
1841 struct iovec iov;
1842 struct msghdr msg;
230b1839 1843 struct sockaddr_storage address;
89bddce5 1844 int err, err2;
6cb153ca
BL
1845 int fput_needed;
1846
602bd0e9
AV
1847 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1848 if (unlikely(err))
1849 return err;
de0fa95c 1850 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1851 if (!sock)
de0fa95c 1852 goto out;
1da177e4 1853
89bddce5
SH
1854 msg.msg_control = NULL;
1855 msg.msg_controllen = 0;
f3d33426
HFS
1856 /* Save some cycles and don't copy the address if not needed */
1857 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1858 /* We assume all kernel code knows the size of sockaddr_storage */
1859 msg.msg_namelen = 0;
130ed5d1 1860 msg.msg_iocb = NULL;
9f138fa6 1861 msg.msg_flags = 0;
1da177e4
LT
1862 if (sock->file->f_flags & O_NONBLOCK)
1863 flags |= MSG_DONTWAIT;
2da62906 1864 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1865
89bddce5 1866 if (err >= 0 && addr != NULL) {
43db362d 1867 err2 = move_addr_to_user(&address,
230b1839 1868 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1869 if (err2 < 0)
1870 err = err2;
1da177e4 1871 }
de0fa95c
PE
1872
1873 fput_light(sock->file, fput_needed);
4387ff75 1874out:
1da177e4
LT
1875 return err;
1876}
1877
7a09e1eb
DB
1878SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1879 unsigned int, flags, struct sockaddr __user *, addr,
1880 int __user *, addr_len)
1881{
1882 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
1883}
1884
1da177e4 1885/*
89bddce5 1886 * Receive a datagram from a socket.
1da177e4
LT
1887 */
1888
b7c0ddf5
JG
1889SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1890 unsigned int, flags)
1da177e4 1891{
7a09e1eb 1892 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
1893}
1894
1895/*
1896 * Set a socket option. Because we don't know the option lengths we have
1897 * to pass the user mode parameter for the protocols to sort out.
1898 */
1899
cc36dca0
DB
1900static int __sys_setsockopt(int fd, int level, int optname,
1901 char __user *optval, int optlen)
1da177e4 1902{
6cb153ca 1903 int err, fput_needed;
1da177e4
LT
1904 struct socket *sock;
1905
1906 if (optlen < 0)
1907 return -EINVAL;
89bddce5
SH
1908
1909 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1910 if (sock != NULL) {
1911 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1912 if (err)
1913 goto out_put;
1da177e4
LT
1914
1915 if (level == SOL_SOCKET)
89bddce5
SH
1916 err =
1917 sock_setsockopt(sock, level, optname, optval,
1918 optlen);
1da177e4 1919 else
89bddce5
SH
1920 err =
1921 sock->ops->setsockopt(sock, level, optname, optval,
1922 optlen);
6cb153ca
BL
1923out_put:
1924 fput_light(sock->file, fput_needed);
1da177e4
LT
1925 }
1926 return err;
1927}
1928
cc36dca0
DB
1929SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1930 char __user *, optval, int, optlen)
1931{
1932 return __sys_setsockopt(fd, level, optname, optval, optlen);
1933}
1934
1da177e4
LT
1935/*
1936 * Get a socket option. Because we don't know the option lengths we have
1937 * to pass a user mode parameter for the protocols to sort out.
1938 */
1939
13a2d70e
DB
1940static int __sys_getsockopt(int fd, int level, int optname,
1941 char __user *optval, int __user *optlen)
1da177e4 1942{
6cb153ca 1943 int err, fput_needed;
1da177e4
LT
1944 struct socket *sock;
1945
89bddce5
SH
1946 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1947 if (sock != NULL) {
6cb153ca
BL
1948 err = security_socket_getsockopt(sock, level, optname);
1949 if (err)
1950 goto out_put;
1da177e4
LT
1951
1952 if (level == SOL_SOCKET)
89bddce5
SH
1953 err =
1954 sock_getsockopt(sock, level, optname, optval,
1955 optlen);
1da177e4 1956 else
89bddce5
SH
1957 err =
1958 sock->ops->getsockopt(sock, level, optname, optval,
1959 optlen);
6cb153ca
BL
1960out_put:
1961 fput_light(sock->file, fput_needed);
1da177e4
LT
1962 }
1963 return err;
1964}
1965
13a2d70e
DB
1966SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1967 char __user *, optval, int __user *, optlen)
1968{
1969 return __sys_getsockopt(fd, level, optname, optval, optlen);
1970}
1971
1da177e4
LT
1972/*
1973 * Shutdown a socket.
1974 */
1975
005a1aea 1976int __sys_shutdown(int fd, int how)
1da177e4 1977{
6cb153ca 1978 int err, fput_needed;
1da177e4
LT
1979 struct socket *sock;
1980
89bddce5
SH
1981 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1982 if (sock != NULL) {
1da177e4 1983 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1984 if (!err)
1985 err = sock->ops->shutdown(sock, how);
1986 fput_light(sock->file, fput_needed);
1da177e4
LT
1987 }
1988 return err;
1989}
1990
005a1aea
DB
1991SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1992{
1993 return __sys_shutdown(fd, how);
1994}
1995
89bddce5 1996/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1997 * fields which are the same type (int / unsigned) on our platforms.
1998 */
1999#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2000#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2001#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2002
c71d8ebe
TH
2003struct used_address {
2004 struct sockaddr_storage name;
2005 unsigned int name_len;
2006};
2007
da184284
AV
2008static int copy_msghdr_from_user(struct msghdr *kmsg,
2009 struct user_msghdr __user *umsg,
2010 struct sockaddr __user **save_addr,
2011 struct iovec **iov)
1661bf36 2012{
ffb07550 2013 struct user_msghdr msg;
08adb7da
AV
2014 ssize_t err;
2015
ffb07550 2016 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2017 return -EFAULT;
dbb490b9 2018
864d9664 2019 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2020 kmsg->msg_controllen = msg.msg_controllen;
2021 kmsg->msg_flags = msg.msg_flags;
2022
2023 kmsg->msg_namelen = msg.msg_namelen;
2024 if (!msg.msg_name)
6a2a2b3a
AS
2025 kmsg->msg_namelen = 0;
2026
dbb490b9
ML
2027 if (kmsg->msg_namelen < 0)
2028 return -EINVAL;
2029
1661bf36 2030 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2031 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2032
2033 if (save_addr)
ffb07550 2034 *save_addr = msg.msg_name;
08adb7da 2035
ffb07550 2036 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2037 if (!save_addr) {
864d9664
PA
2038 err = move_addr_to_kernel(msg.msg_name,
2039 kmsg->msg_namelen,
08adb7da
AV
2040 kmsg->msg_name);
2041 if (err < 0)
2042 return err;
2043 }
2044 } else {
2045 kmsg->msg_name = NULL;
2046 kmsg->msg_namelen = 0;
2047 }
2048
ffb07550 2049 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2050 return -EMSGSIZE;
2051
0345f931 2052 kmsg->msg_iocb = NULL;
2053
ffb07550
AV
2054 return import_iovec(save_addr ? READ : WRITE,
2055 msg.msg_iov, msg.msg_iovlen,
da184284 2056 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2057}
2058
666547ff 2059static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2060 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2061 struct used_address *used_address,
2062 unsigned int allowed_msghdr_flags)
1da177e4 2063{
89bddce5
SH
2064 struct compat_msghdr __user *msg_compat =
2065 (struct compat_msghdr __user *)msg;
230b1839 2066 struct sockaddr_storage address;
1da177e4 2067 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2068 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2069 __aligned(sizeof(__kernel_size_t));
89bddce5 2070 /* 20 is size of ipv6_pktinfo */
1da177e4 2071 unsigned char *ctl_buf = ctl;
d8725c86 2072 int ctl_len;
08adb7da 2073 ssize_t err;
89bddce5 2074
08adb7da 2075 msg_sys->msg_name = &address;
1da177e4 2076
08449320 2077 if (MSG_CMSG_COMPAT & flags)
08adb7da 2078 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2079 else
08adb7da 2080 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2081 if (err < 0)
da184284 2082 return err;
1da177e4
LT
2083
2084 err = -ENOBUFS;
2085
228e548e 2086 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2087 goto out_freeiov;
28a94d8f 2088 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2089 ctl_len = msg_sys->msg_controllen;
1da177e4 2090 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2091 err =
228e548e 2092 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2093 sizeof(ctl));
1da177e4
LT
2094 if (err)
2095 goto out_freeiov;
228e548e
AB
2096 ctl_buf = msg_sys->msg_control;
2097 ctl_len = msg_sys->msg_controllen;
1da177e4 2098 } else if (ctl_len) {
ac4340fc
DM
2099 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2100 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2101 if (ctl_len > sizeof(ctl)) {
1da177e4 2102 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2103 if (ctl_buf == NULL)
1da177e4
LT
2104 goto out_freeiov;
2105 }
2106 err = -EFAULT;
2107 /*
228e548e 2108 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2109 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2110 * checking falls down on this.
2111 */
fb8621bb 2112 if (copy_from_user(ctl_buf,
228e548e 2113 (void __user __force *)msg_sys->msg_control,
89bddce5 2114 ctl_len))
1da177e4 2115 goto out_freectl;
228e548e 2116 msg_sys->msg_control = ctl_buf;
1da177e4 2117 }
228e548e 2118 msg_sys->msg_flags = flags;
1da177e4
LT
2119
2120 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2121 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2122 /*
2123 * If this is sendmmsg() and current destination address is same as
2124 * previously succeeded address, omit asking LSM's decision.
2125 * used_address->name_len is initialized to UINT_MAX so that the first
2126 * destination address never matches.
2127 */
bc909d9d
MD
2128 if (used_address && msg_sys->msg_name &&
2129 used_address->name_len == msg_sys->msg_namelen &&
2130 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2131 used_address->name_len)) {
d8725c86 2132 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2133 goto out_freectl;
2134 }
d8725c86 2135 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2136 /*
2137 * If this is sendmmsg() and sending to current destination address was
2138 * successful, remember it.
2139 */
2140 if (used_address && err >= 0) {
2141 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2142 if (msg_sys->msg_name)
2143 memcpy(&used_address->name, msg_sys->msg_name,
2144 used_address->name_len);
c71d8ebe 2145 }
1da177e4
LT
2146
2147out_freectl:
89bddce5 2148 if (ctl_buf != ctl)
1da177e4
LT
2149 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2150out_freeiov:
da184284 2151 kfree(iov);
228e548e
AB
2152 return err;
2153}
2154
2155/*
2156 * BSD sendmsg interface
2157 */
2158
e1834a32
DB
2159long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2160 bool forbid_cmsg_compat)
228e548e
AB
2161{
2162 int fput_needed, err;
2163 struct msghdr msg_sys;
1be374a0
AL
2164 struct socket *sock;
2165
e1834a32
DB
2166 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2167 return -EINVAL;
2168
1be374a0 2169 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2170 if (!sock)
2171 goto out;
2172
28a94d8f 2173 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2174
6cb153ca 2175 fput_light(sock->file, fput_needed);
89bddce5 2176out:
1da177e4
LT
2177 return err;
2178}
2179
666547ff 2180SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2181{
e1834a32 2182 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2183}
2184
228e548e
AB
2185/*
2186 * Linux sendmmsg interface
2187 */
2188
2189int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2190 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2191{
2192 int fput_needed, err, datagrams;
2193 struct socket *sock;
2194 struct mmsghdr __user *entry;
2195 struct compat_mmsghdr __user *compat_entry;
2196 struct msghdr msg_sys;
c71d8ebe 2197 struct used_address used_address;
f092276d 2198 unsigned int oflags = flags;
228e548e 2199
e1834a32
DB
2200 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2201 return -EINVAL;
2202
98382f41
AB
2203 if (vlen > UIO_MAXIOV)
2204 vlen = UIO_MAXIOV;
228e548e
AB
2205
2206 datagrams = 0;
2207
2208 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2209 if (!sock)
2210 return err;
2211
c71d8ebe 2212 used_address.name_len = UINT_MAX;
228e548e
AB
2213 entry = mmsg;
2214 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2215 err = 0;
f092276d 2216 flags |= MSG_BATCH;
228e548e
AB
2217
2218 while (datagrams < vlen) {
f092276d
TH
2219 if (datagrams == vlen - 1)
2220 flags = oflags;
2221
228e548e 2222 if (MSG_CMSG_COMPAT & flags) {
666547ff 2223 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2224 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2225 if (err < 0)
2226 break;
2227 err = __put_user(err, &compat_entry->msg_len);
2228 ++compat_entry;
2229 } else {
a7526eb5 2230 err = ___sys_sendmsg(sock,
666547ff 2231 (struct user_msghdr __user *)entry,
28a94d8f 2232 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2233 if (err < 0)
2234 break;
2235 err = put_user(err, &entry->msg_len);
2236 ++entry;
2237 }
2238
2239 if (err)
2240 break;
2241 ++datagrams;
3023898b
SHY
2242 if (msg_data_left(&msg_sys))
2243 break;
a78cb84c 2244 cond_resched();
228e548e
AB
2245 }
2246
228e548e
AB
2247 fput_light(sock->file, fput_needed);
2248
728ffb86
AB
2249 /* We only return an error if no datagrams were able to be sent */
2250 if (datagrams != 0)
228e548e
AB
2251 return datagrams;
2252
228e548e
AB
2253 return err;
2254}
2255
2256SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2257 unsigned int, vlen, unsigned int, flags)
2258{
e1834a32 2259 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2260}
2261
666547ff 2262static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2263 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2264{
89bddce5
SH
2265 struct compat_msghdr __user *msg_compat =
2266 (struct compat_msghdr __user *)msg;
1da177e4 2267 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2268 struct iovec *iov = iovstack;
1da177e4 2269 unsigned long cmsg_ptr;
2da62906 2270 int len;
08adb7da 2271 ssize_t err;
1da177e4
LT
2272
2273 /* kernel mode address */
230b1839 2274 struct sockaddr_storage addr;
1da177e4
LT
2275
2276 /* user mode address pointers */
2277 struct sockaddr __user *uaddr;
08adb7da 2278 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2279
08adb7da 2280 msg_sys->msg_name = &addr;
1da177e4 2281
f3d33426 2282 if (MSG_CMSG_COMPAT & flags)
08adb7da 2283 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2284 else
08adb7da 2285 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2286 if (err < 0)
da184284 2287 return err;
1da177e4 2288
a2e27255
ACM
2289 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2290 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2291
f3d33426
HFS
2292 /* We assume all kernel code knows the size of sockaddr_storage */
2293 msg_sys->msg_namelen = 0;
2294
1da177e4
LT
2295 if (sock->file->f_flags & O_NONBLOCK)
2296 flags |= MSG_DONTWAIT;
2da62906 2297 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2298 if (err < 0)
2299 goto out_freeiov;
2300 len = err;
2301
2302 if (uaddr != NULL) {
43db362d 2303 err = move_addr_to_user(&addr,
a2e27255 2304 msg_sys->msg_namelen, uaddr,
89bddce5 2305 uaddr_len);
1da177e4
LT
2306 if (err < 0)
2307 goto out_freeiov;
2308 }
a2e27255 2309 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2310 COMPAT_FLAGS(msg));
1da177e4
LT
2311 if (err)
2312 goto out_freeiov;
2313 if (MSG_CMSG_COMPAT & flags)
a2e27255 2314 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2315 &msg_compat->msg_controllen);
2316 else
a2e27255 2317 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2318 &msg->msg_controllen);
2319 if (err)
2320 goto out_freeiov;
2321 err = len;
2322
2323out_freeiov:
da184284 2324 kfree(iov);
a2e27255
ACM
2325 return err;
2326}
2327
2328/*
2329 * BSD recvmsg interface
2330 */
2331
e1834a32
DB
2332long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2333 bool forbid_cmsg_compat)
a2e27255
ACM
2334{
2335 int fput_needed, err;
2336 struct msghdr msg_sys;
1be374a0
AL
2337 struct socket *sock;
2338
e1834a32
DB
2339 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2340 return -EINVAL;
2341
1be374a0 2342 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2343 if (!sock)
2344 goto out;
2345
a7526eb5 2346 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2347
6cb153ca 2348 fput_light(sock->file, fput_needed);
1da177e4
LT
2349out:
2350 return err;
2351}
2352
666547ff 2353SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2354 unsigned int, flags)
2355{
e1834a32 2356 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2357}
2358
a2e27255
ACM
2359/*
2360 * Linux recvmmsg interface
2361 */
2362
e11d4284
AB
2363static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2364 unsigned int vlen, unsigned int flags,
2365 struct timespec64 *timeout)
a2e27255
ACM
2366{
2367 int fput_needed, err, datagrams;
2368 struct socket *sock;
2369 struct mmsghdr __user *entry;
d7256d0e 2370 struct compat_mmsghdr __user *compat_entry;
a2e27255 2371 struct msghdr msg_sys;
766b9f92
DD
2372 struct timespec64 end_time;
2373 struct timespec64 timeout64;
a2e27255
ACM
2374
2375 if (timeout &&
2376 poll_select_set_timeout(&end_time, timeout->tv_sec,
2377 timeout->tv_nsec))
2378 return -EINVAL;
2379
2380 datagrams = 0;
2381
2382 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2383 if (!sock)
2384 return err;
2385
7797dc41
SHY
2386 if (likely(!(flags & MSG_ERRQUEUE))) {
2387 err = sock_error(sock->sk);
2388 if (err) {
2389 datagrams = err;
2390 goto out_put;
2391 }
e623a9e9 2392 }
a2e27255
ACM
2393
2394 entry = mmsg;
d7256d0e 2395 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2396
2397 while (datagrams < vlen) {
2398 /*
2399 * No need to ask LSM for more than the first datagram.
2400 */
d7256d0e 2401 if (MSG_CMSG_COMPAT & flags) {
666547ff 2402 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2403 &msg_sys, flags & ~MSG_WAITFORONE,
2404 datagrams);
d7256d0e
JMG
2405 if (err < 0)
2406 break;
2407 err = __put_user(err, &compat_entry->msg_len);
2408 ++compat_entry;
2409 } else {
a7526eb5 2410 err = ___sys_recvmsg(sock,
666547ff 2411 (struct user_msghdr __user *)entry,
a7526eb5
AL
2412 &msg_sys, flags & ~MSG_WAITFORONE,
2413 datagrams);
d7256d0e
JMG
2414 if (err < 0)
2415 break;
2416 err = put_user(err, &entry->msg_len);
2417 ++entry;
2418 }
2419
a2e27255
ACM
2420 if (err)
2421 break;
a2e27255
ACM
2422 ++datagrams;
2423
71c5c159
BB
2424 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2425 if (flags & MSG_WAITFORONE)
2426 flags |= MSG_DONTWAIT;
2427
a2e27255 2428 if (timeout) {
766b9f92 2429 ktime_get_ts64(&timeout64);
c2e6c856 2430 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2431 if (timeout->tv_sec < 0) {
2432 timeout->tv_sec = timeout->tv_nsec = 0;
2433 break;
2434 }
2435
2436 /* Timeout, return less than vlen datagrams */
2437 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2438 break;
2439 }
2440
2441 /* Out of band data, return right away */
2442 if (msg_sys.msg_flags & MSG_OOB)
2443 break;
a78cb84c 2444 cond_resched();
a2e27255
ACM
2445 }
2446
a2e27255 2447 if (err == 0)
34b88a68
ACM
2448 goto out_put;
2449
2450 if (datagrams == 0) {
2451 datagrams = err;
2452 goto out_put;
2453 }
a2e27255 2454
34b88a68
ACM
2455 /*
2456 * We may return less entries than requested (vlen) if the
2457 * sock is non block and there aren't enough datagrams...
2458 */
2459 if (err != -EAGAIN) {
a2e27255 2460 /*
34b88a68
ACM
2461 * ... or if recvmsg returns an error after we
2462 * received some datagrams, where we record the
2463 * error to return on the next call or if the
2464 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2465 */
34b88a68 2466 sock->sk->sk_err = -err;
a2e27255 2467 }
34b88a68
ACM
2468out_put:
2469 fput_light(sock->file, fput_needed);
a2e27255 2470
34b88a68 2471 return datagrams;
a2e27255
ACM
2472}
2473
e11d4284
AB
2474int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2475 unsigned int vlen, unsigned int flags,
2476 struct __kernel_timespec __user *timeout,
2477 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2478{
2479 int datagrams;
c2e6c856 2480 struct timespec64 timeout_sys;
a2e27255 2481
e11d4284
AB
2482 if (timeout && get_timespec64(&timeout_sys, timeout))
2483 return -EFAULT;
a2e27255 2484
e11d4284 2485 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2486 return -EFAULT;
2487
e11d4284
AB
2488 if (!timeout && !timeout32)
2489 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2490
2491 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2492
e11d4284
AB
2493 if (datagrams <= 0)
2494 return datagrams;
2495
2496 if (timeout && put_timespec64(&timeout_sys, timeout))
2497 datagrams = -EFAULT;
2498
2499 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2500 datagrams = -EFAULT;
2501
2502 return datagrams;
2503}
2504
1255e269
DB
2505SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2506 unsigned int, vlen, unsigned int, flags,
c2e6c856 2507 struct __kernel_timespec __user *, timeout)
1255e269 2508{
e11d4284
AB
2509 if (flags & MSG_CMSG_COMPAT)
2510 return -EINVAL;
2511
2512 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2513}
2514
2515#ifdef CONFIG_COMPAT_32BIT_TIME
2516SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2517 unsigned int, vlen, unsigned int, flags,
2518 struct old_timespec32 __user *, timeout)
2519{
2520 if (flags & MSG_CMSG_COMPAT)
2521 return -EINVAL;
2522
2523 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2524}
e11d4284 2525#endif
1255e269 2526
a2e27255 2527#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2528/* Argument list sizes for sys_socketcall */
2529#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2530static const unsigned char nargs[21] = {
c6d409cf
ED
2531 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2532 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2533 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2534 AL(4), AL(5), AL(4)
89bddce5
SH
2535};
2536
1da177e4
LT
2537#undef AL
2538
2539/*
89bddce5 2540 * System call vectors.
1da177e4
LT
2541 *
2542 * Argument checking cleaned up. Saved 20% in size.
2543 * This function doesn't need to set the kernel lock because
89bddce5 2544 * it is set by the callees.
1da177e4
LT
2545 */
2546
3e0fa65f 2547SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2548{
2950fa9d 2549 unsigned long a[AUDITSC_ARGS];
89bddce5 2550 unsigned long a0, a1;
1da177e4 2551 int err;
47379052 2552 unsigned int len;
1da177e4 2553
228e548e 2554 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2555 return -EINVAL;
c8e8cd57 2556 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2557
47379052
AV
2558 len = nargs[call];
2559 if (len > sizeof(a))
2560 return -EINVAL;
2561
1da177e4 2562 /* copy_from_user should be SMP safe. */
47379052 2563 if (copy_from_user(a, args, len))
1da177e4 2564 return -EFAULT;
3ec3b2fb 2565
2950fa9d
CG
2566 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2567 if (err)
2568 return err;
3ec3b2fb 2569
89bddce5
SH
2570 a0 = a[0];
2571 a1 = a[1];
2572
2573 switch (call) {
2574 case SYS_SOCKET:
9d6a15c3 2575 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2576 break;
2577 case SYS_BIND:
a87d35d8 2578 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2579 break;
2580 case SYS_CONNECT:
1387c2c2 2581 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2582 break;
2583 case SYS_LISTEN:
25e290ee 2584 err = __sys_listen(a0, a1);
89bddce5
SH
2585 break;
2586 case SYS_ACCEPT:
4541e805
DB
2587 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2588 (int __user *)a[2], 0);
89bddce5
SH
2589 break;
2590 case SYS_GETSOCKNAME:
2591 err =
8882a107
DB
2592 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2593 (int __user *)a[2]);
89bddce5
SH
2594 break;
2595 case SYS_GETPEERNAME:
2596 err =
b21c8f83
DB
2597 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2598 (int __user *)a[2]);
89bddce5
SH
2599 break;
2600 case SYS_SOCKETPAIR:
6debc8d8 2601 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2602 break;
2603 case SYS_SEND:
f3bf896b
DB
2604 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2605 NULL, 0);
89bddce5
SH
2606 break;
2607 case SYS_SENDTO:
211b634b
DB
2608 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2609 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2610 break;
2611 case SYS_RECV:
d27e9afc
DB
2612 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2613 NULL, NULL);
89bddce5
SH
2614 break;
2615 case SYS_RECVFROM:
7a09e1eb
DB
2616 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2617 (struct sockaddr __user *)a[4],
2618 (int __user *)a[5]);
89bddce5
SH
2619 break;
2620 case SYS_SHUTDOWN:
005a1aea 2621 err = __sys_shutdown(a0, a1);
89bddce5
SH
2622 break;
2623 case SYS_SETSOCKOPT:
cc36dca0
DB
2624 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2625 a[4]);
89bddce5
SH
2626 break;
2627 case SYS_GETSOCKOPT:
2628 err =
13a2d70e
DB
2629 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2630 (int __user *)a[4]);
89bddce5
SH
2631 break;
2632 case SYS_SENDMSG:
e1834a32
DB
2633 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2634 a[2], true);
89bddce5 2635 break;
228e548e 2636 case SYS_SENDMMSG:
e1834a32
DB
2637 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2638 a[3], true);
228e548e 2639 break;
89bddce5 2640 case SYS_RECVMSG:
e1834a32
DB
2641 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2642 a[2], true);
89bddce5 2643 break;
a2e27255 2644 case SYS_RECVMMSG:
e11d4284
AB
2645 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2646 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2647 a[2], a[3],
2648 (struct __kernel_timespec __user *)a[4],
2649 NULL);
2650 else
2651 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2652 a[2], a[3], NULL,
2653 (struct old_timespec32 __user *)a[4]);
a2e27255 2654 break;
de11defe 2655 case SYS_ACCEPT4:
4541e805
DB
2656 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2657 (int __user *)a[2], a[3]);
aaca0bdc 2658 break;
89bddce5
SH
2659 default:
2660 err = -EINVAL;
2661 break;
1da177e4
LT
2662 }
2663 return err;
2664}
2665
89bddce5 2666#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2667
55737fda
SH
2668/**
2669 * sock_register - add a socket protocol handler
2670 * @ops: description of protocol
2671 *
1da177e4
LT
2672 * This function is called by a protocol handler that wants to
2673 * advertise its address family, and have it linked into the
e793c0f7 2674 * socket interface. The value ops->family corresponds to the
55737fda 2675 * socket system call protocol family.
1da177e4 2676 */
f0fd27d4 2677int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2678{
2679 int err;
2680
2681 if (ops->family >= NPROTO) {
3410f22e 2682 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2683 return -ENOBUFS;
2684 }
55737fda
SH
2685
2686 spin_lock(&net_family_lock);
190683a9
ED
2687 if (rcu_dereference_protected(net_families[ops->family],
2688 lockdep_is_held(&net_family_lock)))
55737fda
SH
2689 err = -EEXIST;
2690 else {
cf778b00 2691 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2692 err = 0;
2693 }
55737fda
SH
2694 spin_unlock(&net_family_lock);
2695
3410f22e 2696 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2697 return err;
2698}
c6d409cf 2699EXPORT_SYMBOL(sock_register);
1da177e4 2700
55737fda
SH
2701/**
2702 * sock_unregister - remove a protocol handler
2703 * @family: protocol family to remove
2704 *
1da177e4
LT
2705 * This function is called by a protocol handler that wants to
2706 * remove its address family, and have it unlinked from the
55737fda
SH
2707 * new socket creation.
2708 *
2709 * If protocol handler is a module, then it can use module reference
2710 * counts to protect against new references. If protocol handler is not
2711 * a module then it needs to provide its own protection in
2712 * the ops->create routine.
1da177e4 2713 */
f0fd27d4 2714void sock_unregister(int family)
1da177e4 2715{
f0fd27d4 2716 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2717
55737fda 2718 spin_lock(&net_family_lock);
a9b3cd7f 2719 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2720 spin_unlock(&net_family_lock);
2721
2722 synchronize_rcu();
2723
3410f22e 2724 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2725}
c6d409cf 2726EXPORT_SYMBOL(sock_unregister);
1da177e4 2727
bf2ae2e4
XL
2728bool sock_is_registered(int family)
2729{
66b51b0a 2730 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2731}
2732
77d76ea3 2733static int __init sock_init(void)
1da177e4 2734{
b3e19d92 2735 int err;
2ca794e5
EB
2736 /*
2737 * Initialize the network sysctl infrastructure.
2738 */
2739 err = net_sysctl_init();
2740 if (err)
2741 goto out;
b3e19d92 2742
1da177e4 2743 /*
89bddce5 2744 * Initialize skbuff SLAB cache
1da177e4
LT
2745 */
2746 skb_init();
1da177e4
LT
2747
2748 /*
89bddce5 2749 * Initialize the protocols module.
1da177e4
LT
2750 */
2751
2752 init_inodecache();
b3e19d92
NP
2753
2754 err = register_filesystem(&sock_fs_type);
2755 if (err)
2756 goto out_fs;
1da177e4 2757 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2758 if (IS_ERR(sock_mnt)) {
2759 err = PTR_ERR(sock_mnt);
2760 goto out_mount;
2761 }
77d76ea3
AK
2762
2763 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2764 */
2765
2766#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2767 err = netfilter_init();
2768 if (err)
2769 goto out;
1da177e4 2770#endif
cbeb321a 2771
408eccce 2772 ptp_classifier_init();
c1f19b51 2773
b3e19d92
NP
2774out:
2775 return err;
2776
2777out_mount:
2778 unregister_filesystem(&sock_fs_type);
2779out_fs:
2780 goto out;
1da177e4
LT
2781}
2782
77d76ea3
AK
2783core_initcall(sock_init); /* early initcall */
2784
1da177e4
LT
2785#ifdef CONFIG_PROC_FS
2786void socket_seq_show(struct seq_file *seq)
2787{
648845ab
TZ
2788 seq_printf(seq, "sockets: used %d\n",
2789 sock_inuse_get(seq->private));
1da177e4 2790}
89bddce5 2791#endif /* CONFIG_PROC_FS */
1da177e4 2792
89bbfc95 2793#ifdef CONFIG_COMPAT
6b96018b 2794static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2795 unsigned int cmd, void __user *up)
7a229387 2796{
7a229387
AB
2797 mm_segment_t old_fs = get_fs();
2798 struct timeval ktv;
2799 int err;
2800
2801 set_fs(KERNEL_DS);
1cebf8f1
JB
2802 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
2803 sizeof(struct compat_ifreq));
7a229387 2804 set_fs(old_fs);
644595f8 2805 if (!err)
ed6fe9d6 2806 err = compat_put_timeval(&ktv, up);
644595f8 2807
7a229387
AB
2808 return err;
2809}
2810
6b96018b 2811static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2812 unsigned int cmd, void __user *up)
7a229387 2813{
7a229387
AB
2814 mm_segment_t old_fs = get_fs();
2815 struct timespec kts;
2816 int err;
2817
2818 set_fs(KERNEL_DS);
1cebf8f1
JB
2819 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
2820 sizeof(struct compat_ifreq));
7a229387 2821 set_fs(old_fs);
644595f8 2822 if (!err)
ed6fe9d6 2823 err = compat_put_timespec(&kts, up);
644595f8 2824
7a229387
AB
2825 return err;
2826}
2827
36fd633e 2828static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2829{
6b96018b 2830 struct compat_ifconf ifc32;
7a229387 2831 struct ifconf ifc;
7a229387
AB
2832 int err;
2833
6b96018b 2834 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2835 return -EFAULT;
2836
36fd633e
AV
2837 ifc.ifc_len = ifc32.ifc_len;
2838 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2839
36fd633e
AV
2840 rtnl_lock();
2841 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2842 rtnl_unlock();
7a229387
AB
2843 if (err)
2844 return err;
2845
36fd633e 2846 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2847 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2848 return -EFAULT;
2849
2850 return 0;
2851}
2852
6b96018b 2853static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2854{
3a7da39d
BH
2855 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2856 bool convert_in = false, convert_out = false;
44c02a2c
AV
2857 size_t buf_size = 0;
2858 struct ethtool_rxnfc __user *rxnfc = NULL;
2859 struct ifreq ifr;
3a7da39d
BH
2860 u32 rule_cnt = 0, actual_rule_cnt;
2861 u32 ethcmd;
7a229387 2862 u32 data;
3a7da39d 2863 int ret;
7a229387 2864
3a7da39d
BH
2865 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2866 return -EFAULT;
7a229387 2867
3a7da39d
BH
2868 compat_rxnfc = compat_ptr(data);
2869
2870 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2871 return -EFAULT;
2872
3a7da39d
BH
2873 /* Most ethtool structures are defined without padding.
2874 * Unfortunately struct ethtool_rxnfc is an exception.
2875 */
2876 switch (ethcmd) {
2877 default:
2878 break;
2879 case ETHTOOL_GRXCLSRLALL:
2880 /* Buffer size is variable */
2881 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2882 return -EFAULT;
2883 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2884 return -ENOMEM;
2885 buf_size += rule_cnt * sizeof(u32);
2886 /* fall through */
2887 case ETHTOOL_GRXRINGS:
2888 case ETHTOOL_GRXCLSRLCNT:
2889 case ETHTOOL_GRXCLSRULE:
55664f32 2890 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2891 convert_out = true;
2892 /* fall through */
2893 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2894 buf_size += sizeof(struct ethtool_rxnfc);
2895 convert_in = true;
44c02a2c 2896 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
2897 break;
2898 }
2899
44c02a2c 2900 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2901 return -EFAULT;
2902
44c02a2c 2903 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 2904
3a7da39d 2905 if (convert_in) {
127fe533 2906 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2907 * fs.ring_cookie and at the end of fs, but nowhere else.
2908 */
127fe533
AD
2909 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2910 sizeof(compat_rxnfc->fs.m_ext) !=
2911 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2912 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2913 BUILD_BUG_ON(
2914 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2915 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2916 offsetof(struct ethtool_rxnfc, fs.location) -
2917 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2918
2919 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2920 (void __user *)(&rxnfc->fs.m_ext + 1) -
2921 (void __user *)rxnfc) ||
3a7da39d
BH
2922 copy_in_user(&rxnfc->fs.ring_cookie,
2923 &compat_rxnfc->fs.ring_cookie,
954b1244 2924 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
2925 (void __user *)&rxnfc->fs.ring_cookie))
2926 return -EFAULT;
2927 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2928 if (put_user(rule_cnt, &rxnfc->rule_cnt))
2929 return -EFAULT;
2930 } else if (copy_in_user(&rxnfc->rule_cnt,
2931 &compat_rxnfc->rule_cnt,
2932 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
2933 return -EFAULT;
2934 }
2935
44c02a2c 2936 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
2937 if (ret)
2938 return ret;
2939
2940 if (convert_out) {
2941 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2942 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2943 (const void __user *)rxnfc) ||
3a7da39d
BH
2944 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2945 &rxnfc->fs.ring_cookie,
954b1244
SH
2946 (const void __user *)(&rxnfc->fs.location + 1) -
2947 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2948 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2949 sizeof(rxnfc->rule_cnt)))
2950 return -EFAULT;
2951
2952 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2953 /* As an optimisation, we only copy the actual
2954 * number of rules that the underlying
2955 * function returned. Since Mallory might
2956 * change the rule count in user memory, we
2957 * check that it is less than the rule count
2958 * originally given (as the user buffer size),
2959 * which has been range-checked.
2960 */
2961 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2962 return -EFAULT;
2963 if (actual_rule_cnt < rule_cnt)
2964 rule_cnt = actual_rule_cnt;
2965 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2966 &rxnfc->rule_locs[0],
2967 rule_cnt * sizeof(u32)))
2968 return -EFAULT;
2969 }
2970 }
2971
2972 return 0;
7a229387
AB
2973}
2974
7a50a240
AB
2975static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2976{
7a50a240 2977 compat_uptr_t uptr32;
44c02a2c
AV
2978 struct ifreq ifr;
2979 void __user *saved;
2980 int err;
7a50a240 2981
44c02a2c 2982 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
2983 return -EFAULT;
2984
2985 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2986 return -EFAULT;
2987
44c02a2c
AV
2988 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2989 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 2990
44c02a2c
AV
2991 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2992 if (!err) {
2993 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2994 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2995 err = -EFAULT;
ccbd6a5a 2996 }
44c02a2c 2997 return err;
7a229387
AB
2998}
2999
590d4693
BH
3000/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3001static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3002 struct compat_ifreq __user *u_ifreq32)
7a229387 3003{
44c02a2c 3004 struct ifreq ifreq;
7a229387
AB
3005 u32 data32;
3006
44c02a2c 3007 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3008 return -EFAULT;
44c02a2c 3009 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3010 return -EFAULT;
44c02a2c 3011 ifreq.ifr_data = compat_ptr(data32);
7a229387 3012
44c02a2c 3013 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3014}
3015
a2116ed2
AB
3016static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3017 struct compat_ifreq __user *uifr32)
3018{
3019 struct ifreq ifr;
3020 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3021 int err;
3022
3023 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3024 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3025 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3026 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3027 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3028 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3029 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3030 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3031 if (err)
3032 return -EFAULT;
3033
44c02a2c 3034 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3035
3036 if (cmd == SIOCGIFMAP && !err) {
3037 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3038 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3039 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3040 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3041 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3042 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3043 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3044 if (err)
3045 err = -EFAULT;
3046 }
3047 return err;
3048}
3049
7a229387 3050struct rtentry32 {
c6d409cf 3051 u32 rt_pad1;
7a229387
AB
3052 struct sockaddr rt_dst; /* target address */
3053 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3054 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3055 unsigned short rt_flags;
3056 short rt_pad2;
3057 u32 rt_pad3;
3058 unsigned char rt_tos;
3059 unsigned char rt_class;
3060 short rt_pad4;
3061 short rt_metric; /* +1 for binary compatibility! */
7a229387 3062 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3063 u32 rt_mtu; /* per route MTU/Window */
3064 u32 rt_window; /* Window clamping */
7a229387
AB
3065 unsigned short rt_irtt; /* Initial RTT */
3066};
3067
3068struct in6_rtmsg32 {
3069 struct in6_addr rtmsg_dst;
3070 struct in6_addr rtmsg_src;
3071 struct in6_addr rtmsg_gateway;
3072 u32 rtmsg_type;
3073 u16 rtmsg_dst_len;
3074 u16 rtmsg_src_len;
3075 u32 rtmsg_metric;
3076 u32 rtmsg_info;
3077 u32 rtmsg_flags;
3078 s32 rtmsg_ifindex;
3079};
3080
6b96018b
AB
3081static int routing_ioctl(struct net *net, struct socket *sock,
3082 unsigned int cmd, void __user *argp)
7a229387
AB
3083{
3084 int ret;
3085 void *r = NULL;
3086 struct in6_rtmsg r6;
3087 struct rtentry r4;
3088 char devname[16];
3089 u32 rtdev;
3090 mm_segment_t old_fs = get_fs();
3091
6b96018b
AB
3092 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3093 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3094 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3095 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3096 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3097 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3098 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3099 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3100 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3101 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3102 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3103
3104 r = (void *) &r6;
3105 } else { /* ipv4 */
6b96018b 3106 struct rtentry32 __user *ur4 = argp;
c6d409cf 3107 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3108 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3109 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3110 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3111 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3112 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3113 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3114 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3115 if (rtdev) {
c6d409cf 3116 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3117 r4.rt_dev = (char __user __force *)devname;
3118 devname[15] = 0;
7a229387
AB
3119 } else
3120 r4.rt_dev = NULL;
3121
3122 r = (void *) &r4;
3123 }
3124
3125 if (ret) {
3126 ret = -EFAULT;
3127 goto out;
3128 }
3129
c6d409cf 3130 set_fs(KERNEL_DS);
1cebf8f1
JB
3131 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
3132 sizeof(struct compat_ifreq));
c6d409cf 3133 set_fs(old_fs);
7a229387
AB
3134
3135out:
7a229387
AB
3136 return ret;
3137}
3138
3139/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3140 * for some operations; this forces use of the newer bridge-utils that
25985edc 3141 * use compatible ioctls
7a229387 3142 */
6b96018b 3143static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3144{
6b96018b 3145 compat_ulong_t tmp;
7a229387 3146
6b96018b 3147 if (get_user(tmp, argp))
7a229387
AB
3148 return -EFAULT;
3149 if (tmp == BRCTL_GET_VERSION)
3150 return BRCTL_VERSION + 1;
3151 return -EINVAL;
3152}
3153
6b96018b
AB
3154static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3155 unsigned int cmd, unsigned long arg)
3156{
3157 void __user *argp = compat_ptr(arg);
3158 struct sock *sk = sock->sk;
3159 struct net *net = sock_net(sk);
7a229387 3160
6b96018b 3161 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3162 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3163
3164 switch (cmd) {
3165 case SIOCSIFBR:
3166 case SIOCGIFBR:
3167 return old_bridge_ioctl(argp);
6b96018b 3168 case SIOCGIFCONF:
36fd633e 3169 return compat_dev_ifconf(net, argp);
6b96018b
AB
3170 case SIOCETHTOOL:
3171 return ethtool_ioctl(net, argp);
7a50a240
AB
3172 case SIOCWANDEV:
3173 return compat_siocwandev(net, argp);
a2116ed2
AB
3174 case SIOCGIFMAP:
3175 case SIOCSIFMAP:
3176 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3177 case SIOCADDRT:
3178 case SIOCDELRT:
3179 return routing_ioctl(net, sock, cmd, argp);
3180 case SIOCGSTAMP:
3181 return do_siocgstamp(net, sock, cmd, argp);
3182 case SIOCGSTAMPNS:
3183 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3184 case SIOCBONDSLAVEINFOQUERY:
3185 case SIOCBONDINFOQUERY:
a2116ed2 3186 case SIOCSHWTSTAMP:
fd468c74 3187 case SIOCGHWTSTAMP:
590d4693 3188 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3189
3190 case FIOSETOWN:
3191 case SIOCSPGRP:
3192 case FIOGETOWN:
3193 case SIOCGPGRP:
3194 case SIOCBRADDBR:
3195 case SIOCBRDELBR:
3196 case SIOCGIFVLAN:
3197 case SIOCSIFVLAN:
3198 case SIOCADDDLCI:
3199 case SIOCDELDLCI:
c62cce2c 3200 case SIOCGSKNS:
6b96018b
AB
3201 return sock_ioctl(file, cmd, arg);
3202
3203 case SIOCGIFFLAGS:
3204 case SIOCSIFFLAGS:
3205 case SIOCGIFMETRIC:
3206 case SIOCSIFMETRIC:
3207 case SIOCGIFMTU:
3208 case SIOCSIFMTU:
3209 case SIOCGIFMEM:
3210 case SIOCSIFMEM:
3211 case SIOCGIFHWADDR:
3212 case SIOCSIFHWADDR:
3213 case SIOCADDMULTI:
3214 case SIOCDELMULTI:
3215 case SIOCGIFINDEX:
6b96018b
AB
3216 case SIOCGIFADDR:
3217 case SIOCSIFADDR:
3218 case SIOCSIFHWBROADCAST:
6b96018b 3219 case SIOCDIFADDR:
6b96018b
AB
3220 case SIOCGIFBRDADDR:
3221 case SIOCSIFBRDADDR:
3222 case SIOCGIFDSTADDR:
3223 case SIOCSIFDSTADDR:
3224 case SIOCGIFNETMASK:
3225 case SIOCSIFNETMASK:
3226 case SIOCSIFPFLAGS:
3227 case SIOCGIFPFLAGS:
3228 case SIOCGIFTXQLEN:
3229 case SIOCSIFTXQLEN:
3230 case SIOCBRADDIF:
3231 case SIOCBRDELIF:
9177efd3
AB
3232 case SIOCSIFNAME:
3233 case SIOCGMIIPHY:
3234 case SIOCGMIIREG:
3235 case SIOCSMIIREG:
6b96018b
AB
3236 case SIOCSARP:
3237 case SIOCGARP:
3238 case SIOCDARP:
6b96018b 3239 case SIOCATMARK:
f92d4fc9
AV
3240 case SIOCBONDENSLAVE:
3241 case SIOCBONDRELEASE:
3242 case SIOCBONDSETHWADDR:
3243 case SIOCBONDCHANGEACTIVE:
4cf808e7 3244 case SIOCGIFNAME:
1cebf8f1
JB
3245 return sock_do_ioctl(net, sock, cmd, arg,
3246 sizeof(struct compat_ifreq));
9177efd3
AB
3247 }
3248
6b96018b
AB
3249 return -ENOIOCTLCMD;
3250}
7a229387 3251
95c96174 3252static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3253 unsigned long arg)
89bbfc95
SP
3254{
3255 struct socket *sock = file->private_data;
3256 int ret = -ENOIOCTLCMD;
87de87d5
DM
3257 struct sock *sk;
3258 struct net *net;
3259
3260 sk = sock->sk;
3261 net = sock_net(sk);
89bbfc95
SP
3262
3263 if (sock->ops->compat_ioctl)
3264 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3265
87de87d5
DM
3266 if (ret == -ENOIOCTLCMD &&
3267 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3268 ret = compat_wext_handle_ioctl(net, cmd, arg);
3269
6b96018b
AB
3270 if (ret == -ENOIOCTLCMD)
3271 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3272
89bbfc95
SP
3273 return ret;
3274}
3275#endif
3276
ac5a488e
SS
3277int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3278{
3279 return sock->ops->bind(sock, addr, addrlen);
3280}
c6d409cf 3281EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3282
3283int kernel_listen(struct socket *sock, int backlog)
3284{
3285 return sock->ops->listen(sock, backlog);
3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3288
3289int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3290{
3291 struct sock *sk = sock->sk;
3292 int err;
3293
3294 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3295 newsock);
3296 if (err < 0)
3297 goto done;
3298
cdfbabfb 3299 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3300 if (err < 0) {
3301 sock_release(*newsock);
fa8705b0 3302 *newsock = NULL;
ac5a488e
SS
3303 goto done;
3304 }
3305
3306 (*newsock)->ops = sock->ops;
1b08534e 3307 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3308
3309done:
3310 return err;
3311}
c6d409cf 3312EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3313
3314int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3315 int flags)
ac5a488e
SS
3316{
3317 return sock->ops->connect(sock, addr, addrlen, flags);
3318}
c6d409cf 3319EXPORT_SYMBOL(kernel_connect);
ac5a488e 3320
9b2c45d4 3321int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3322{
9b2c45d4 3323 return sock->ops->getname(sock, addr, 0);
ac5a488e 3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3326
9b2c45d4 3327int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3328{
9b2c45d4 3329 return sock->ops->getname(sock, addr, 1);
ac5a488e 3330}
c6d409cf 3331EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3332
3333int kernel_getsockopt(struct socket *sock, int level, int optname,
3334 char *optval, int *optlen)
3335{
3336 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3337 char __user *uoptval;
3338 int __user *uoptlen;
ac5a488e
SS
3339 int err;
3340
fb8621bb
NK
3341 uoptval = (char __user __force *) optval;
3342 uoptlen = (int __user __force *) optlen;
3343
ac5a488e
SS
3344 set_fs(KERNEL_DS);
3345 if (level == SOL_SOCKET)
fb8621bb 3346 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3347 else
fb8621bb
NK
3348 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3349 uoptlen);
ac5a488e
SS
3350 set_fs(oldfs);
3351 return err;
3352}
c6d409cf 3353EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3354
3355int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3356 char *optval, unsigned int optlen)
ac5a488e
SS
3357{
3358 mm_segment_t oldfs = get_fs();
fb8621bb 3359 char __user *uoptval;
ac5a488e
SS
3360 int err;
3361
fb8621bb
NK
3362 uoptval = (char __user __force *) optval;
3363
ac5a488e
SS
3364 set_fs(KERNEL_DS);
3365 if (level == SOL_SOCKET)
fb8621bb 3366 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3367 else
fb8621bb 3368 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3369 optlen);
3370 set_fs(oldfs);
3371 return err;
3372}
c6d409cf 3373EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3374
3375int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3376 size_t size, int flags)
3377{
3378 if (sock->ops->sendpage)
3379 return sock->ops->sendpage(sock, page, offset, size, flags);
3380
3381 return sock_no_sendpage(sock, page, offset, size, flags);
3382}
c6d409cf 3383EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3384
306b13eb
TH
3385int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3386 size_t size, int flags)
3387{
3388 struct socket *sock = sk->sk_socket;
3389
3390 if (sock->ops->sendpage_locked)
3391 return sock->ops->sendpage_locked(sk, page, offset, size,
3392 flags);
3393
3394 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3395}
3396EXPORT_SYMBOL(kernel_sendpage_locked);
3397
91cf45f0
TM
3398int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3399{
3400 return sock->ops->shutdown(sock, how);
3401}
91cf45f0 3402EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3403
3404/* This routine returns the IP overhead imposed by a socket i.e.
3405 * the length of the underlying IP header, depending on whether
3406 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3407 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3408 */
3409u32 kernel_sock_ip_overhead(struct sock *sk)
3410{
3411 struct inet_sock *inet;
3412 struct ip_options_rcu *opt;
3413 u32 overhead = 0;
113c3075
P
3414#if IS_ENABLED(CONFIG_IPV6)
3415 struct ipv6_pinfo *np;
3416 struct ipv6_txoptions *optv6 = NULL;
3417#endif /* IS_ENABLED(CONFIG_IPV6) */
3418
3419 if (!sk)
3420 return overhead;
3421
113c3075
P
3422 switch (sk->sk_family) {
3423 case AF_INET:
3424 inet = inet_sk(sk);
3425 overhead += sizeof(struct iphdr);
3426 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3427 sock_owned_by_user(sk));
113c3075
P
3428 if (opt)
3429 overhead += opt->opt.optlen;
3430 return overhead;
3431#if IS_ENABLED(CONFIG_IPV6)
3432 case AF_INET6:
3433 np = inet6_sk(sk);
3434 overhead += sizeof(struct ipv6hdr);
3435 if (np)
3436 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3437 sock_owned_by_user(sk));
113c3075
P
3438 if (optv6)
3439 overhead += (optv6->opt_flen + optv6->opt_nflen);
3440 return overhead;
3441#endif /* IS_ENABLED(CONFIG_IPV6) */
3442 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3443 return overhead;
3444 }
3445}
3446EXPORT_SYMBOL(kernel_sock_ip_overhead);