]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/socket.c
io_uring: add support for sendmsg()
[mirror_ubuntu-hirsute-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
8c3c447b 93#include <linux/indirect_call_wrapper.h>
1da177e4 94
7c0f6ba6 95#include <linux/uaccess.h>
1da177e4
LT
96#include <asm/unistd.h>
97
98#include <net/compat.h>
87de87d5 99#include <net/wext.h>
f8451725 100#include <net/cls_cgroup.h>
1da177e4
LT
101
102#include <net/sock.h>
103#include <linux/netfilter.h>
104
6b96018b
AB
105#include <linux/if_tun.h>
106#include <linux/ipv6_route.h>
107#include <linux/route.h>
6b96018b 108#include <linux/sockios.h>
076bb0c8 109#include <net/busy_poll.h>
f24b9be5 110#include <linux/errqueue.h>
06021292 111
8c3c447b
PA
112/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
113#if IS_ENABLED(CONFIG_INET)
114#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
115#else
116#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
117#endif
118
e0d1095a 119#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
120unsigned int sysctl_net_busy_read __read_mostly;
121unsigned int sysctl_net_busy_poll __read_mostly;
06021292 122#endif
6b96018b 123
8ae5e030
AV
124static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
125static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 126static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
127
128static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
129static __poll_t sock_poll(struct file *file,
130 struct poll_table_struct *wait);
89bddce5 131static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133static long compat_sock_ioctl(struct file *file,
89bddce5 134 unsigned int cmd, unsigned long arg);
89bbfc95 135#endif
1da177e4 136static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
137static ssize_t sock_sendpage(struct file *file, struct page *page,
138 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 139static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 140 struct pipe_inode_info *pipe, size_t len,
9c55e01c 141 unsigned int flags);
1da177e4 142
1da177e4
LT
143/*
144 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
145 * in the operation structures but are done directly via the socketcall() multiplexor.
146 */
147
da7071d7 148static const struct file_operations socket_file_ops = {
1da177e4
LT
149 .owner = THIS_MODULE,
150 .llseek = no_llseek,
8ae5e030
AV
151 .read_iter = sock_read_iter,
152 .write_iter = sock_write_iter,
1da177e4
LT
153 .poll = sock_poll,
154 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
155#ifdef CONFIG_COMPAT
156 .compat_ioctl = compat_sock_ioctl,
157#endif
1da177e4 158 .mmap = sock_mmap,
1da177e4
LT
159 .release = sock_close,
160 .fasync = sock_fasync,
5274f052
JA
161 .sendpage = sock_sendpage,
162 .splice_write = generic_splice_sendpage,
9c55e01c 163 .splice_read = sock_splice_read,
1da177e4
LT
164};
165
166/*
167 * The protocol list. Each protocol is registered in here.
168 */
169
1da177e4 170static DEFINE_SPINLOCK(net_family_lock);
190683a9 171static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 172
1da177e4 173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
08009a76 245static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
574aab1e 262 wq->flags = 0;
e6476c21 263 ei->socket.wq = wq;
89bddce5 264
1da177e4
LT
265 ei->socket.state = SS_UNCONNECTED;
266 ei->socket.flags = 0;
267 ei->socket.ops = NULL;
268 ei->socket.sk = NULL;
269 ei->socket.file = NULL;
1da177e4
LT
270
271 return &ei->vfs_inode;
272}
273
274static void sock_destroy_inode(struct inode *inode)
275{
43815482
ED
276 struct socket_alloc *ei;
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 279 kfree_rcu(ei->socket.wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
8a3c245c
PT
395/**
396 * sock_alloc_file - Bind a &socket to a &file
397 * @sock: socket
398 * @flags: file status flags
399 * @dname: protocol name
400 *
401 * Returns the &file bound with @sock, implicitly storing it
402 * in sock->file. If dname is %NULL, sets to "".
403 * On failure the return is a ERR pointer (see linux/err.h).
404 * This function uses GFP_KERNEL internally.
405 */
406
aab174f0 407struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 408{
7cbe66b6 409 struct file *file;
1da177e4 410
d93aa9d8
AV
411 if (!dname)
412 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 413
d93aa9d8
AV
414 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
415 O_RDWR | (flags & O_NONBLOCK),
416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
8e1611e2 418 sock_release(sock);
39b65252 419 return file;
cc3808f8
AV
420 }
421
422 sock->file = file;
39d8c1b6 423 file->private_data = sock;
28407630 424 return file;
39d8c1b6 425}
56b31d1c 426EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 427
56b31d1c 428static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
429{
430 struct file *newfile;
28407630 431 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
432 if (unlikely(fd < 0)) {
433 sock_release(sock);
28407630 434 return fd;
ce4bb04c 435 }
39d8c1b6 436
aab174f0 437 newfile = sock_alloc_file(sock, flags, NULL);
28407630 438 if (likely(!IS_ERR(newfile))) {
39d8c1b6 439 fd_install(fd, newfile);
28407630
AV
440 return fd;
441 }
7cbe66b6 442
28407630
AV
443 put_unused_fd(fd);
444 return PTR_ERR(newfile);
1da177e4
LT
445}
446
8a3c245c
PT
447/**
448 * sock_from_file - Return the &socket bounded to @file.
449 * @file: file
450 * @err: pointer to an error code return
451 *
452 * On failure returns %NULL and assigns -ENOTSOCK to @err.
453 */
454
406a3c63 455struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 456{
6cb153ca
BL
457 if (file->f_op == &socket_file_ops)
458 return file->private_data; /* set in sock_map_fd */
459
23bb80d2
ED
460 *err = -ENOTSOCK;
461 return NULL;
6cb153ca 462}
406a3c63 463EXPORT_SYMBOL(sock_from_file);
6cb153ca 464
1da177e4 465/**
c6d409cf 466 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
467 * @fd: file handle
468 * @err: pointer to an error code return
469 *
470 * The file handle passed in is locked and the socket it is bound
241c4667 471 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
472 * with a negative errno code and NULL is returned. The function checks
473 * for both invalid handles and passing a handle which is not a socket.
474 *
475 * On a success the socket object pointer is returned.
476 */
477
478struct socket *sockfd_lookup(int fd, int *err)
479{
480 struct file *file;
1da177e4
LT
481 struct socket *sock;
482
89bddce5
SH
483 file = fget(fd);
484 if (!file) {
1da177e4
LT
485 *err = -EBADF;
486 return NULL;
487 }
89bddce5 488
6cb153ca
BL
489 sock = sock_from_file(file, err);
490 if (!sock)
1da177e4 491 fput(file);
6cb153ca
BL
492 return sock;
493}
c6d409cf 494EXPORT_SYMBOL(sockfd_lookup);
1da177e4 495
6cb153ca
BL
496static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
497{
00e188ef 498 struct fd f = fdget(fd);
6cb153ca
BL
499 struct socket *sock;
500
3672558c 501 *err = -EBADF;
00e188ef
AV
502 if (f.file) {
503 sock = sock_from_file(f.file, err);
504 if (likely(sock)) {
505 *fput_needed = f.flags;
6cb153ca 506 return sock;
00e188ef
AV
507 }
508 fdput(f);
1da177e4 509 }
6cb153ca 510 return NULL;
1da177e4
LT
511}
512
600e1779
MY
513static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
514 size_t size)
515{
516 ssize_t len;
517 ssize_t used = 0;
518
c5ef6035 519 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
520 if (len < 0)
521 return len;
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 buffer += len;
527 }
528
529 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
530 used += len;
531 if (buffer) {
532 if (size < used)
533 return -ERANGE;
534 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
535 buffer += len;
536 }
537
538 return used;
539}
540
dc647ec8 541static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
542{
543 int err = simple_setattr(dentry, iattr);
544
e1a3a60a 545 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
546 struct socket *sock = SOCKET_I(d_inode(dentry));
547
6d8c50dc
CW
548 if (sock->sk)
549 sock->sk->sk_uid = iattr->ia_uid;
550 else
551 err = -ENOENT;
86741ec2
LC
552 }
553
554 return err;
555}
556
600e1779 557static const struct inode_operations sockfs_inode_ops = {
600e1779 558 .listxattr = sockfs_listxattr,
86741ec2 559 .setattr = sockfs_setattr,
600e1779
MY
560};
561
1da177e4 562/**
8a3c245c 563 * sock_alloc - allocate a socket
89bddce5 564 *
1da177e4
LT
565 * Allocate a new inode and socket object. The two are bound together
566 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 567 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
568 */
569
f4a00aac 570struct socket *sock_alloc(void)
1da177e4 571{
89bddce5
SH
572 struct inode *inode;
573 struct socket *sock;
1da177e4 574
a209dfc7 575 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
576 if (!inode)
577 return NULL;
578
579 sock = SOCKET_I(inode);
580
85fe4025 581 inode->i_ino = get_next_ino();
89bddce5 582 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
583 inode->i_uid = current_fsuid();
584 inode->i_gid = current_fsgid();
600e1779 585 inode->i_op = &sockfs_inode_ops;
1da177e4 586
1da177e4
LT
587 return sock;
588}
f4a00aac 589EXPORT_SYMBOL(sock_alloc);
1da177e4 590
1da177e4 591/**
8a3c245c 592 * sock_release - close a socket
1da177e4
LT
593 * @sock: socket to close
594 *
595 * The socket is released from the protocol stack if it has a release
596 * callback, and the inode is then released if the socket is bound to
89bddce5 597 * an inode not a file.
1da177e4 598 */
89bddce5 599
6d8c50dc 600static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
601{
602 if (sock->ops) {
603 struct module *owner = sock->ops->owner;
604
6d8c50dc
CW
605 if (inode)
606 inode_lock(inode);
1da177e4 607 sock->ops->release(sock);
ff7b11aa 608 sock->sk = NULL;
6d8c50dc
CW
609 if (inode)
610 inode_unlock(inode);
1da177e4
LT
611 sock->ops = NULL;
612 module_put(owner);
613 }
614
e6476c21 615 if (sock->wq->fasync_list)
3410f22e 616 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 617
1da177e4
LT
618 if (!sock->file) {
619 iput(SOCK_INODE(sock));
620 return;
621 }
89bddce5 622 sock->file = NULL;
1da177e4 623}
6d8c50dc
CW
624
625void sock_release(struct socket *sock)
626{
627 __sock_release(sock, NULL);
628}
c6d409cf 629EXPORT_SYMBOL(sock_release);
1da177e4 630
c14ac945 631void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 632{
140c55d4
ED
633 u8 flags = *tx_flags;
634
c14ac945 635 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
636 flags |= SKBTX_HW_TSTAMP;
637
c14ac945 638 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
639 flags |= SKBTX_SW_TSTAMP;
640
c14ac945 641 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
642 flags |= SKBTX_SCHED_TSTAMP;
643
140c55d4 644 *tx_flags = flags;
20d49473 645}
67cc0d40 646EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 647
8c3c447b
PA
648INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
649 size_t));
d8725c86 650static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 651{
8c3c447b
PA
652 int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
653 msg, msg_data_left(msg));
d8725c86
AV
654 BUG_ON(ret == -EIOCBQUEUED);
655 return ret;
1da177e4
LT
656}
657
85806af0
RD
658/**
659 * sock_sendmsg - send a message through @sock
660 * @sock: socket
661 * @msg: message to send
662 *
663 * Sends @msg through @sock, passing through LSM.
664 * Returns the number of bytes sent, or an error code.
665 */
d8725c86 666int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 667{
d8725c86 668 int err = security_socket_sendmsg(sock, msg,
01e97e65 669 msg_data_left(msg));
228e548e 670
d8725c86 671 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 672}
c6d409cf 673EXPORT_SYMBOL(sock_sendmsg);
1da177e4 674
8a3c245c
PT
675/**
676 * kernel_sendmsg - send a message through @sock (kernel-space)
677 * @sock: socket
678 * @msg: message header
679 * @vec: kernel vec
680 * @num: vec array length
681 * @size: total message data size
682 *
683 * Builds the message data with @vec and sends it through @sock.
684 * Returns the number of bytes sent, or an error code.
685 */
686
1da177e4
LT
687int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
688 struct kvec *vec, size_t num, size_t size)
689{
aa563d7b 690 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 691 return sock_sendmsg(sock, msg);
1da177e4 692}
c6d409cf 693EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 694
8a3c245c
PT
695/**
696 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
697 * @sk: sock
698 * @msg: message header
699 * @vec: output s/g array
700 * @num: output s/g array length
701 * @size: total message data size
702 *
703 * Builds the message data with @vec and sends it through @sock.
704 * Returns the number of bytes sent, or an error code.
705 * Caller must hold @sk.
706 */
707
306b13eb
TH
708int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
709 struct kvec *vec, size_t num, size_t size)
710{
711 struct socket *sock = sk->sk_socket;
712
713 if (!sock->ops->sendmsg_locked)
db5980d8 714 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 715
aa563d7b 716 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
717
718 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
719}
720EXPORT_SYMBOL(kernel_sendmsg_locked);
721
8605330a
SHY
722static bool skb_is_err_queue(const struct sk_buff *skb)
723{
724 /* pkt_type of skbs enqueued on the error queue are set to
725 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
726 * in recvmsg, since skbs received on a local socket will never
727 * have a pkt_type of PACKET_OUTGOING.
728 */
729 return skb->pkt_type == PACKET_OUTGOING;
730}
731
b50a5c70
ML
732/* On transmit, software and hardware timestamps are returned independently.
733 * As the two skb clones share the hardware timestamp, which may be updated
734 * before the software timestamp is received, a hardware TX timestamp may be
735 * returned only if there is no software TX timestamp. Ignore false software
736 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 737 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
738 * hardware timestamp.
739 */
740static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
741{
742 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
743}
744
aad9c8c4
ML
745static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
746{
747 struct scm_ts_pktinfo ts_pktinfo;
748 struct net_device *orig_dev;
749
750 if (!skb_mac_header_was_set(skb))
751 return;
752
753 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
754
755 rcu_read_lock();
756 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
757 if (orig_dev)
758 ts_pktinfo.if_index = orig_dev->ifindex;
759 rcu_read_unlock();
760
761 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
762 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
763 sizeof(ts_pktinfo), &ts_pktinfo);
764}
765
92f37fd2
ED
766/*
767 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
768 */
769void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
770 struct sk_buff *skb)
771{
20d49473 772 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 773 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
774 struct scm_timestamping_internal tss;
775
b50a5c70 776 int empty = 1, false_tstamp = 0;
20d49473
PO
777 struct skb_shared_hwtstamps *shhwtstamps =
778 skb_hwtstamps(skb);
779
780 /* Race occurred between timestamp enabling and packet
781 receiving. Fill in the current time for now. */
b50a5c70 782 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 783 __net_timestamp(skb);
b50a5c70
ML
784 false_tstamp = 1;
785 }
20d49473
PO
786
787 if (need_software_tstamp) {
788 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
789 if (new_tstamp) {
790 struct __kernel_sock_timeval tv;
791
792 skb_get_new_timestamp(skb, &tv);
793 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
794 sizeof(tv), &tv);
795 } else {
796 struct __kernel_old_timeval tv;
797
798 skb_get_timestamp(skb, &tv);
799 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
800 sizeof(tv), &tv);
801 }
20d49473 802 } else {
887feae3
DD
803 if (new_tstamp) {
804 struct __kernel_timespec ts;
805
806 skb_get_new_timestampns(skb, &ts);
807 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
808 sizeof(ts), &ts);
809 } else {
810 struct timespec ts;
811
812 skb_get_timestampns(skb, &ts);
813 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
814 sizeof(ts), &ts);
815 }
20d49473
PO
816 }
817 }
818
f24b9be5 819 memset(&tss, 0, sizeof(tss));
c199105d 820 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 821 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 822 empty = 0;
4d276eb6 823 if (shhwtstamps &&
b9f40e21 824 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 825 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 826 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 827 empty = 0;
aad9c8c4
ML
828 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
829 !skb_is_err_queue(skb))
830 put_ts_pktinfo(msg, skb);
831 }
1c885808 832 if (!empty) {
9718475e
DD
833 if (sock_flag(sk, SOCK_TSTAMP_NEW))
834 put_cmsg_scm_timestamping64(msg, &tss);
835 else
836 put_cmsg_scm_timestamping(msg, &tss);
1c885808 837
8605330a 838 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 839 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
840 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
841 skb->len, skb->data);
842 }
92f37fd2 843}
7c81fd8b
ACM
844EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
845
6e3e939f
JB
846void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
847 struct sk_buff *skb)
848{
849 int ack;
850
851 if (!sock_flag(sk, SOCK_WIFI_STATUS))
852 return;
853 if (!skb->wifi_acked_valid)
854 return;
855
856 ack = skb->wifi_acked;
857
858 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
859}
860EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
861
11165f14 862static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
863 struct sk_buff *skb)
3b885787 864{
744d5a3e 865 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 866 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 867 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
868}
869
767dd033 870void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
871 struct sk_buff *skb)
872{
873 sock_recv_timestamp(msg, sk, skb);
874 sock_recv_drops(msg, sk, skb);
875}
767dd033 876EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 877
8c3c447b
PA
878INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
879 size_t , int ));
1b784140 880static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 881 int flags)
1da177e4 882{
8c3c447b
PA
883 return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
884 msg_data_left(msg), flags);
1da177e4
LT
885}
886
85806af0
RD
887/**
888 * sock_recvmsg - receive a message from @sock
889 * @sock: socket
890 * @msg: message to receive
891 * @flags: message flags
892 *
893 * Receives @msg from @sock, passing through LSM. Returns the total number
894 * of bytes received, or an error.
895 */
2da62906 896int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 897{
2da62906 898 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 899
2da62906 900 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 901}
c6d409cf 902EXPORT_SYMBOL(sock_recvmsg);
1da177e4 903
c1249c0a 904/**
8a3c245c
PT
905 * kernel_recvmsg - Receive a message from a socket (kernel space)
906 * @sock: The socket to receive the message from
907 * @msg: Received message
908 * @vec: Input s/g array for message data
909 * @num: Size of input s/g array
910 * @size: Number of bytes to read
911 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 912 *
8a3c245c
PT
913 * On return the msg structure contains the scatter/gather array passed in the
914 * vec argument. The array is modified so that it consists of the unfilled
915 * portion of the original array.
c1249c0a 916 *
8a3c245c 917 * The returned value is the total number of bytes received, or an error.
c1249c0a 918 */
8a3c245c 919
89bddce5
SH
920int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
921 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
922{
923 mm_segment_t oldfs = get_fs();
924 int result;
925
aa563d7b 926 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 927 set_fs(KERNEL_DS);
2da62906 928 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
929 set_fs(oldfs);
930 return result;
931}
c6d409cf 932EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 933
ce1d4d3e
CH
934static ssize_t sock_sendpage(struct file *file, struct page *page,
935 int offset, size_t size, loff_t *ppos, int more)
1da177e4 936{
1da177e4
LT
937 struct socket *sock;
938 int flags;
939
ce1d4d3e
CH
940 sock = file->private_data;
941
35f9c09f
ED
942 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
943 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
944 flags |= more;
ce1d4d3e 945
e6949583 946 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 947}
1da177e4 948
9c55e01c 949static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 950 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
951 unsigned int flags)
952{
953 struct socket *sock = file->private_data;
954
997b37da 955 if (unlikely(!sock->ops->splice_read))
95506588 956 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 957
9c55e01c
JA
958 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
959}
960
8ae5e030 961static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 962{
6d652330
AV
963 struct file *file = iocb->ki_filp;
964 struct socket *sock = file->private_data;
0345f931 965 struct msghdr msg = {.msg_iter = *to,
966 .msg_iocb = iocb};
8ae5e030 967 ssize_t res;
ce1d4d3e 968
8ae5e030
AV
969 if (file->f_flags & O_NONBLOCK)
970 msg.msg_flags = MSG_DONTWAIT;
971
972 if (iocb->ki_pos != 0)
1da177e4 973 return -ESPIPE;
027445c3 974
66ee59af 975 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
976 return 0;
977
2da62906 978 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
979 *to = msg.msg_iter;
980 return res;
1da177e4
LT
981}
982
8ae5e030 983static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 984{
6d652330
AV
985 struct file *file = iocb->ki_filp;
986 struct socket *sock = file->private_data;
0345f931 987 struct msghdr msg = {.msg_iter = *from,
988 .msg_iocb = iocb};
8ae5e030 989 ssize_t res;
1da177e4 990
8ae5e030 991 if (iocb->ki_pos != 0)
ce1d4d3e 992 return -ESPIPE;
027445c3 993
8ae5e030
AV
994 if (file->f_flags & O_NONBLOCK)
995 msg.msg_flags = MSG_DONTWAIT;
996
6d652330
AV
997 if (sock->type == SOCK_SEQPACKET)
998 msg.msg_flags |= MSG_EOR;
999
d8725c86 1000 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1001 *from = msg.msg_iter;
1002 return res;
1da177e4
LT
1003}
1004
1da177e4
LT
1005/*
1006 * Atomic setting of ioctl hooks to avoid race
1007 * with module unload.
1008 */
1009
4a3e2f71 1010static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1011static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1012
881d966b 1013void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1014{
4a3e2f71 1015 mutex_lock(&br_ioctl_mutex);
1da177e4 1016 br_ioctl_hook = hook;
4a3e2f71 1017 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1018}
1019EXPORT_SYMBOL(brioctl_set);
1020
4a3e2f71 1021static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1022static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1023
881d966b 1024void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1025{
4a3e2f71 1026 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1027 vlan_ioctl_hook = hook;
4a3e2f71 1028 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1029}
1030EXPORT_SYMBOL(vlan_ioctl_set);
1031
4a3e2f71 1032static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1033static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1034
89bddce5 1035void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1036{
4a3e2f71 1037 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1038 dlci_ioctl_hook = hook;
4a3e2f71 1039 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1040}
1041EXPORT_SYMBOL(dlci_ioctl_set);
1042
6b96018b 1043static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1044 unsigned int cmd, unsigned long arg)
6b96018b
AB
1045{
1046 int err;
1047 void __user *argp = (void __user *)arg;
1048
1049 err = sock->ops->ioctl(sock, cmd, arg);
1050
1051 /*
1052 * If this ioctl is unknown try to hand it down
1053 * to the NIC driver.
1054 */
36fd633e
AV
1055 if (err != -ENOIOCTLCMD)
1056 return err;
6b96018b 1057
36fd633e
AV
1058 if (cmd == SIOCGIFCONF) {
1059 struct ifconf ifc;
1060 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1061 return -EFAULT;
1062 rtnl_lock();
1063 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1064 rtnl_unlock();
1065 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1066 err = -EFAULT;
44c02a2c
AV
1067 } else {
1068 struct ifreq ifr;
1069 bool need_copyout;
63ff03ab 1070 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1071 return -EFAULT;
1072 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1073 if (!err && need_copyout)
63ff03ab 1074 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1075 return -EFAULT;
36fd633e 1076 }
6b96018b
AB
1077 return err;
1078}
1079
1da177e4
LT
1080/*
1081 * With an ioctl, arg may well be a user mode pointer, but we don't know
1082 * what to do with it - that's up to the protocol still.
1083 */
1084
8a3c245c
PT
1085/**
1086 * get_net_ns - increment the refcount of the network namespace
1087 * @ns: common namespace (net)
1088 *
1089 * Returns the net's common namespace.
1090 */
1091
d8d211a2 1092struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1093{
1094 return &get_net(container_of(ns, struct net, ns))->ns;
1095}
d8d211a2 1096EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1097
1da177e4
LT
1098static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1099{
1100 struct socket *sock;
881d966b 1101 struct sock *sk;
1da177e4
LT
1102 void __user *argp = (void __user *)arg;
1103 int pid, err;
881d966b 1104 struct net *net;
1da177e4 1105
b69aee04 1106 sock = file->private_data;
881d966b 1107 sk = sock->sk;
3b1e0a65 1108 net = sock_net(sk);
44c02a2c
AV
1109 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1110 struct ifreq ifr;
1111 bool need_copyout;
1112 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1113 return -EFAULT;
1114 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1115 if (!err && need_copyout)
1116 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1117 return -EFAULT;
1da177e4 1118 } else
3d23e349 1119#ifdef CONFIG_WEXT_CORE
1da177e4 1120 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1121 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1122 } else
3d23e349 1123#endif
89bddce5 1124 switch (cmd) {
1da177e4
LT
1125 case FIOSETOWN:
1126 case SIOCSPGRP:
1127 err = -EFAULT;
1128 if (get_user(pid, (int __user *)argp))
1129 break;
393cc3f5 1130 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1131 break;
1132 case FIOGETOWN:
1133 case SIOCGPGRP:
609d7fa9 1134 err = put_user(f_getown(sock->file),
89bddce5 1135 (int __user *)argp);
1da177e4
LT
1136 break;
1137 case SIOCGIFBR:
1138 case SIOCSIFBR:
1139 case SIOCBRADDBR:
1140 case SIOCBRDELBR:
1141 err = -ENOPKG;
1142 if (!br_ioctl_hook)
1143 request_module("bridge");
1144
4a3e2f71 1145 mutex_lock(&br_ioctl_mutex);
89bddce5 1146 if (br_ioctl_hook)
881d966b 1147 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1148 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1149 break;
1150 case SIOCGIFVLAN:
1151 case SIOCSIFVLAN:
1152 err = -ENOPKG;
1153 if (!vlan_ioctl_hook)
1154 request_module("8021q");
1155
4a3e2f71 1156 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1157 if (vlan_ioctl_hook)
881d966b 1158 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1159 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1160 break;
1da177e4
LT
1161 case SIOCADDDLCI:
1162 case SIOCDELDLCI:
1163 err = -ENOPKG;
1164 if (!dlci_ioctl_hook)
1165 request_module("dlci");
1166
7512cbf6
PE
1167 mutex_lock(&dlci_ioctl_mutex);
1168 if (dlci_ioctl_hook)
1da177e4 1169 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1170 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1171 break;
c62cce2c
AV
1172 case SIOCGSKNS:
1173 err = -EPERM;
1174 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1175 break;
1176
1177 err = open_related_ns(&net->ns, get_net_ns);
1178 break;
0768e170
AB
1179 case SIOCGSTAMP_OLD:
1180 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1181 if (!sock->ops->gettstamp) {
1182 err = -ENOIOCTLCMD;
1183 break;
1184 }
1185 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1186 cmd == SIOCGSTAMP_OLD,
1187 !IS_ENABLED(CONFIG_64BIT));
60747828 1188 break;
0768e170
AB
1189 case SIOCGSTAMP_NEW:
1190 case SIOCGSTAMPNS_NEW:
1191 if (!sock->ops->gettstamp) {
1192 err = -ENOIOCTLCMD;
1193 break;
1194 }
1195 err = sock->ops->gettstamp(sock, argp,
1196 cmd == SIOCGSTAMP_NEW,
1197 false);
c7cbdbf2 1198 break;
1da177e4 1199 default:
63ff03ab 1200 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1201 break;
89bddce5 1202 }
1da177e4
LT
1203 return err;
1204}
1205
8a3c245c
PT
1206/**
1207 * sock_create_lite - creates a socket
1208 * @family: protocol family (AF_INET, ...)
1209 * @type: communication type (SOCK_STREAM, ...)
1210 * @protocol: protocol (0, ...)
1211 * @res: new socket
1212 *
1213 * Creates a new socket and assigns it to @res, passing through LSM.
1214 * The new socket initialization is not complete, see kernel_accept().
1215 * Returns 0 or an error. On failure @res is set to %NULL.
1216 * This function internally uses GFP_KERNEL.
1217 */
1218
1da177e4
LT
1219int sock_create_lite(int family, int type, int protocol, struct socket **res)
1220{
1221 int err;
1222 struct socket *sock = NULL;
89bddce5 1223
1da177e4
LT
1224 err = security_socket_create(family, type, protocol, 1);
1225 if (err)
1226 goto out;
1227
1228 sock = sock_alloc();
1229 if (!sock) {
1230 err = -ENOMEM;
1231 goto out;
1232 }
1233
1da177e4 1234 sock->type = type;
7420ed23
VY
1235 err = security_socket_post_create(sock, family, type, protocol, 1);
1236 if (err)
1237 goto out_release;
1238
1da177e4
LT
1239out:
1240 *res = sock;
1241 return err;
7420ed23
VY
1242out_release:
1243 sock_release(sock);
1244 sock = NULL;
1245 goto out;
1da177e4 1246}
c6d409cf 1247EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1248
1249/* No kernel lock held - perfect */
ade994f4 1250static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1251{
3cafb376 1252 struct socket *sock = file->private_data;
a331de3b 1253 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1254
e88958e6
CH
1255 if (!sock->ops->poll)
1256 return 0;
f641f13b 1257
a331de3b
CH
1258 if (sk_can_busy_loop(sock->sk)) {
1259 /* poll once if requested by the syscall */
1260 if (events & POLL_BUSY_LOOP)
1261 sk_busy_loop(sock->sk, 1);
1262
1263 /* if this socket can poll_ll, tell the system call */
1264 flag = POLL_BUSY_LOOP;
1265 }
1266
1267 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1268}
1269
89bddce5 1270static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1271{
b69aee04 1272 struct socket *sock = file->private_data;
1da177e4
LT
1273
1274 return sock->ops->mmap(file, sock, vma);
1275}
1276
20380731 1277static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1278{
6d8c50dc 1279 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1280 return 0;
1281}
1282
1283/*
1284 * Update the socket async list
1285 *
1286 * Fasync_list locking strategy.
1287 *
1288 * 1. fasync_list is modified only under process context socket lock
1289 * i.e. under semaphore.
1290 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1291 * or under socket lock
1da177e4
LT
1292 */
1293
1294static int sock_fasync(int fd, struct file *filp, int on)
1295{
989a2979
ED
1296 struct socket *sock = filp->private_data;
1297 struct sock *sk = sock->sk;
eaefd110 1298 struct socket_wq *wq;
1da177e4 1299
989a2979 1300 if (sk == NULL)
1da177e4 1301 return -EINVAL;
1da177e4
LT
1302
1303 lock_sock(sk);
e6476c21 1304 wq = sock->wq;
eaefd110 1305 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1306
eaefd110 1307 if (!wq->fasync_list)
989a2979
ED
1308 sock_reset_flag(sk, SOCK_FASYNC);
1309 else
bcdce719 1310 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1311
989a2979 1312 release_sock(sk);
1da177e4
LT
1313 return 0;
1314}
1315
ceb5d58b 1316/* This function may be called only under rcu_lock */
1da177e4 1317
ceb5d58b 1318int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1319{
ceb5d58b 1320 if (!wq || !wq->fasync_list)
1da177e4 1321 return -1;
ceb5d58b 1322
89bddce5 1323 switch (how) {
8d8ad9d7 1324 case SOCK_WAKE_WAITD:
ceb5d58b 1325 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1326 break;
1327 goto call_kill;
8d8ad9d7 1328 case SOCK_WAKE_SPACE:
ceb5d58b 1329 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1330 break;
1331 /* fall through */
8d8ad9d7 1332 case SOCK_WAKE_IO:
89bddce5 1333call_kill:
43815482 1334 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1335 break;
8d8ad9d7 1336 case SOCK_WAKE_URG:
43815482 1337 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1338 }
ceb5d58b 1339
1da177e4
LT
1340 return 0;
1341}
c6d409cf 1342EXPORT_SYMBOL(sock_wake_async);
1da177e4 1343
8a3c245c
PT
1344/**
1345 * __sock_create - creates a socket
1346 * @net: net namespace
1347 * @family: protocol family (AF_INET, ...)
1348 * @type: communication type (SOCK_STREAM, ...)
1349 * @protocol: protocol (0, ...)
1350 * @res: new socket
1351 * @kern: boolean for kernel space sockets
1352 *
1353 * Creates a new socket and assigns it to @res, passing through LSM.
1354 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1355 * be set to true if the socket resides in kernel space.
1356 * This function internally uses GFP_KERNEL.
1357 */
1358
721db93a 1359int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1360 struct socket **res, int kern)
1da177e4
LT
1361{
1362 int err;
1363 struct socket *sock;
55737fda 1364 const struct net_proto_family *pf;
1da177e4
LT
1365
1366 /*
89bddce5 1367 * Check protocol is in range
1da177e4
LT
1368 */
1369 if (family < 0 || family >= NPROTO)
1370 return -EAFNOSUPPORT;
1371 if (type < 0 || type >= SOCK_MAX)
1372 return -EINVAL;
1373
1374 /* Compatibility.
1375
1376 This uglymoron is moved from INET layer to here to avoid
1377 deadlock in module load.
1378 */
1379 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1380 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1381 current->comm);
1da177e4
LT
1382 family = PF_PACKET;
1383 }
1384
1385 err = security_socket_create(family, type, protocol, kern);
1386 if (err)
1387 return err;
89bddce5 1388
55737fda
SH
1389 /*
1390 * Allocate the socket and allow the family to set things up. if
1391 * the protocol is 0, the family is instructed to select an appropriate
1392 * default.
1393 */
1394 sock = sock_alloc();
1395 if (!sock) {
e87cc472 1396 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1397 return -ENFILE; /* Not exactly a match, but its the
1398 closest posix thing */
1399 }
1400
1401 sock->type = type;
1402
95a5afca 1403#ifdef CONFIG_MODULES
89bddce5
SH
1404 /* Attempt to load a protocol module if the find failed.
1405 *
1406 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1407 * requested real, full-featured networking support upon configuration.
1408 * Otherwise module support will break!
1409 */
190683a9 1410 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1411 request_module("net-pf-%d", family);
1da177e4
LT
1412#endif
1413
55737fda
SH
1414 rcu_read_lock();
1415 pf = rcu_dereference(net_families[family]);
1416 err = -EAFNOSUPPORT;
1417 if (!pf)
1418 goto out_release;
1da177e4
LT
1419
1420 /*
1421 * We will call the ->create function, that possibly is in a loadable
1422 * module, so we have to bump that loadable module refcnt first.
1423 */
55737fda 1424 if (!try_module_get(pf->owner))
1da177e4
LT
1425 goto out_release;
1426
55737fda
SH
1427 /* Now protected by module ref count */
1428 rcu_read_unlock();
1429
3f378b68 1430 err = pf->create(net, sock, protocol, kern);
55737fda 1431 if (err < 0)
1da177e4 1432 goto out_module_put;
a79af59e 1433
1da177e4
LT
1434 /*
1435 * Now to bump the refcnt of the [loadable] module that owns this
1436 * socket at sock_release time we decrement its refcnt.
1437 */
55737fda
SH
1438 if (!try_module_get(sock->ops->owner))
1439 goto out_module_busy;
1440
1da177e4
LT
1441 /*
1442 * Now that we're done with the ->create function, the [loadable]
1443 * module can have its refcnt decremented
1444 */
55737fda 1445 module_put(pf->owner);
7420ed23
VY
1446 err = security_socket_post_create(sock, family, type, protocol, kern);
1447 if (err)
3b185525 1448 goto out_sock_release;
55737fda 1449 *res = sock;
1da177e4 1450
55737fda
SH
1451 return 0;
1452
1453out_module_busy:
1454 err = -EAFNOSUPPORT;
1da177e4 1455out_module_put:
55737fda
SH
1456 sock->ops = NULL;
1457 module_put(pf->owner);
1458out_sock_release:
1da177e4 1459 sock_release(sock);
55737fda
SH
1460 return err;
1461
1462out_release:
1463 rcu_read_unlock();
1464 goto out_sock_release;
1da177e4 1465}
721db93a 1466EXPORT_SYMBOL(__sock_create);
1da177e4 1467
8a3c245c
PT
1468/**
1469 * sock_create - creates a socket
1470 * @family: protocol family (AF_INET, ...)
1471 * @type: communication type (SOCK_STREAM, ...)
1472 * @protocol: protocol (0, ...)
1473 * @res: new socket
1474 *
1475 * A wrapper around __sock_create().
1476 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1477 */
1478
1da177e4
LT
1479int sock_create(int family, int type, int protocol, struct socket **res)
1480{
1b8d7ae4 1481 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1482}
c6d409cf 1483EXPORT_SYMBOL(sock_create);
1da177e4 1484
8a3c245c
PT
1485/**
1486 * sock_create_kern - creates a socket (kernel space)
1487 * @net: net namespace
1488 * @family: protocol family (AF_INET, ...)
1489 * @type: communication type (SOCK_STREAM, ...)
1490 * @protocol: protocol (0, ...)
1491 * @res: new socket
1492 *
1493 * A wrapper around __sock_create().
1494 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1495 */
1496
eeb1bd5c 1497int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1498{
eeb1bd5c 1499 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1500}
c6d409cf 1501EXPORT_SYMBOL(sock_create_kern);
1da177e4 1502
9d6a15c3 1503int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1504{
1505 int retval;
1506 struct socket *sock;
a677a039
UD
1507 int flags;
1508
e38b36f3
UD
1509 /* Check the SOCK_* constants for consistency. */
1510 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1511 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1512 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1513 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1514
a677a039 1515 flags = type & ~SOCK_TYPE_MASK;
77d27200 1516 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1517 return -EINVAL;
1518 type &= SOCK_TYPE_MASK;
1da177e4 1519
aaca0bdc
UD
1520 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1521 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1522
1da177e4
LT
1523 retval = sock_create(family, type, protocol, &sock);
1524 if (retval < 0)
8e1611e2 1525 return retval;
1da177e4 1526
8e1611e2 1527 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1528}
1529
9d6a15c3
DB
1530SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1531{
1532 return __sys_socket(family, type, protocol);
1533}
1534
1da177e4
LT
1535/*
1536 * Create a pair of connected sockets.
1537 */
1538
6debc8d8 1539int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1540{
1541 struct socket *sock1, *sock2;
1542 int fd1, fd2, err;
db349509 1543 struct file *newfile1, *newfile2;
a677a039
UD
1544 int flags;
1545
1546 flags = type & ~SOCK_TYPE_MASK;
77d27200 1547 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1548 return -EINVAL;
1549 type &= SOCK_TYPE_MASK;
1da177e4 1550
aaca0bdc
UD
1551 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1552 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1553
016a266b
AV
1554 /*
1555 * reserve descriptors and make sure we won't fail
1556 * to return them to userland.
1557 */
1558 fd1 = get_unused_fd_flags(flags);
1559 if (unlikely(fd1 < 0))
1560 return fd1;
1561
1562 fd2 = get_unused_fd_flags(flags);
1563 if (unlikely(fd2 < 0)) {
1564 put_unused_fd(fd1);
1565 return fd2;
1566 }
1567
1568 err = put_user(fd1, &usockvec[0]);
1569 if (err)
1570 goto out;
1571
1572 err = put_user(fd2, &usockvec[1]);
1573 if (err)
1574 goto out;
1575
1da177e4
LT
1576 /*
1577 * Obtain the first socket and check if the underlying protocol
1578 * supports the socketpair call.
1579 */
1580
1581 err = sock_create(family, type, protocol, &sock1);
016a266b 1582 if (unlikely(err < 0))
1da177e4
LT
1583 goto out;
1584
1585 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1586 if (unlikely(err < 0)) {
1587 sock_release(sock1);
1588 goto out;
bf3c23d1 1589 }
d73aa286 1590
d47cd945
DH
1591 err = security_socket_socketpair(sock1, sock2);
1592 if (unlikely(err)) {
1593 sock_release(sock2);
1594 sock_release(sock1);
1595 goto out;
1596 }
1597
016a266b
AV
1598 err = sock1->ops->socketpair(sock1, sock2);
1599 if (unlikely(err < 0)) {
1600 sock_release(sock2);
1601 sock_release(sock1);
1602 goto out;
28407630
AV
1603 }
1604
aab174f0 1605 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1606 if (IS_ERR(newfile1)) {
28407630 1607 err = PTR_ERR(newfile1);
016a266b
AV
1608 sock_release(sock2);
1609 goto out;
28407630
AV
1610 }
1611
aab174f0 1612 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1613 if (IS_ERR(newfile2)) {
1614 err = PTR_ERR(newfile2);
016a266b
AV
1615 fput(newfile1);
1616 goto out;
db349509
AV
1617 }
1618
157cf649 1619 audit_fd_pair(fd1, fd2);
d73aa286 1620
db349509
AV
1621 fd_install(fd1, newfile1);
1622 fd_install(fd2, newfile2);
d73aa286 1623 return 0;
1da177e4 1624
016a266b 1625out:
d73aa286 1626 put_unused_fd(fd2);
d73aa286 1627 put_unused_fd(fd1);
1da177e4
LT
1628 return err;
1629}
1630
6debc8d8
DB
1631SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1632 int __user *, usockvec)
1633{
1634 return __sys_socketpair(family, type, protocol, usockvec);
1635}
1636
1da177e4
LT
1637/*
1638 * Bind a name to a socket. Nothing much to do here since it's
1639 * the protocol's responsibility to handle the local address.
1640 *
1641 * We move the socket address to kernel space before we call
1642 * the protocol layer (having also checked the address is ok).
1643 */
1644
a87d35d8 1645int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1646{
1647 struct socket *sock;
230b1839 1648 struct sockaddr_storage address;
6cb153ca 1649 int err, fput_needed;
1da177e4 1650
89bddce5 1651 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1652 if (sock) {
43db362d 1653 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1654 if (!err) {
89bddce5 1655 err = security_socket_bind(sock,
230b1839 1656 (struct sockaddr *)&address,
89bddce5 1657 addrlen);
6cb153ca
BL
1658 if (!err)
1659 err = sock->ops->bind(sock,
89bddce5 1660 (struct sockaddr *)
230b1839 1661 &address, addrlen);
1da177e4 1662 }
6cb153ca 1663 fput_light(sock->file, fput_needed);
89bddce5 1664 }
1da177e4
LT
1665 return err;
1666}
1667
a87d35d8
DB
1668SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1669{
1670 return __sys_bind(fd, umyaddr, addrlen);
1671}
1672
1da177e4
LT
1673/*
1674 * Perform a listen. Basically, we allow the protocol to do anything
1675 * necessary for a listen, and if that works, we mark the socket as
1676 * ready for listening.
1677 */
1678
25e290ee 1679int __sys_listen(int fd, int backlog)
1da177e4
LT
1680{
1681 struct socket *sock;
6cb153ca 1682 int err, fput_needed;
b8e1f9b5 1683 int somaxconn;
89bddce5
SH
1684
1685 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1686 if (sock) {
8efa6e93 1687 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1688 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1689 backlog = somaxconn;
1da177e4
LT
1690
1691 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1692 if (!err)
1693 err = sock->ops->listen(sock, backlog);
1da177e4 1694
6cb153ca 1695 fput_light(sock->file, fput_needed);
1da177e4
LT
1696 }
1697 return err;
1698}
1699
25e290ee
DB
1700SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1701{
1702 return __sys_listen(fd, backlog);
1703}
1704
1da177e4
LT
1705/*
1706 * For accept, we attempt to create a new socket, set up the link
1707 * with the client, wake up the client, then return the new
1708 * connected fd. We collect the address of the connector in kernel
1709 * space and move it to user at the very end. This is unclean because
1710 * we open the socket then return an error.
1711 *
1712 * 1003.1g adds the ability to recvmsg() to query connection pending
1713 * status to recvmsg. We need to add that support in a way thats
b903036a 1714 * clean when we restructure accept also.
1da177e4
LT
1715 */
1716
4541e805
DB
1717int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1718 int __user *upeer_addrlen, int flags)
1da177e4
LT
1719{
1720 struct socket *sock, *newsock;
39d8c1b6 1721 struct file *newfile;
6cb153ca 1722 int err, len, newfd, fput_needed;
230b1839 1723 struct sockaddr_storage address;
1da177e4 1724
77d27200 1725 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1726 return -EINVAL;
1727
1728 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1729 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1730
6cb153ca 1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1732 if (!sock)
1733 goto out;
1734
1735 err = -ENFILE;
c6d409cf
ED
1736 newsock = sock_alloc();
1737 if (!newsock)
1da177e4
LT
1738 goto out_put;
1739
1740 newsock->type = sock->type;
1741 newsock->ops = sock->ops;
1742
1da177e4
LT
1743 /*
1744 * We don't need try_module_get here, as the listening socket (sock)
1745 * has the protocol module (sock->ops->owner) held.
1746 */
1747 __module_get(newsock->ops->owner);
1748
28407630 1749 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1750 if (unlikely(newfd < 0)) {
1751 err = newfd;
9a1875e6
DM
1752 sock_release(newsock);
1753 goto out_put;
39d8c1b6 1754 }
aab174f0 1755 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1756 if (IS_ERR(newfile)) {
28407630
AV
1757 err = PTR_ERR(newfile);
1758 put_unused_fd(newfd);
28407630
AV
1759 goto out_put;
1760 }
39d8c1b6 1761
a79af59e
FF
1762 err = security_socket_accept(sock, newsock);
1763 if (err)
39d8c1b6 1764 goto out_fd;
a79af59e 1765
cdfbabfb 1766 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1767 if (err < 0)
39d8c1b6 1768 goto out_fd;
1da177e4
LT
1769
1770 if (upeer_sockaddr) {
9b2c45d4
DV
1771 len = newsock->ops->getname(newsock,
1772 (struct sockaddr *)&address, 2);
1773 if (len < 0) {
1da177e4 1774 err = -ECONNABORTED;
39d8c1b6 1775 goto out_fd;
1da177e4 1776 }
43db362d 1777 err = move_addr_to_user(&address,
230b1839 1778 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1779 if (err < 0)
39d8c1b6 1780 goto out_fd;
1da177e4
LT
1781 }
1782
1783 /* File flags are not inherited via accept() unlike another OSes. */
1784
39d8c1b6
DM
1785 fd_install(newfd, newfile);
1786 err = newfd;
1da177e4 1787
1da177e4 1788out_put:
6cb153ca 1789 fput_light(sock->file, fput_needed);
1da177e4
LT
1790out:
1791 return err;
39d8c1b6 1792out_fd:
9606a216 1793 fput(newfile);
39d8c1b6 1794 put_unused_fd(newfd);
1da177e4
LT
1795 goto out_put;
1796}
1797
4541e805
DB
1798SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1799 int __user *, upeer_addrlen, int, flags)
1800{
1801 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1802}
1803
20f37034
HC
1804SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1805 int __user *, upeer_addrlen)
aaca0bdc 1806{
4541e805 1807 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1808}
1809
1da177e4
LT
1810/*
1811 * Attempt to connect to a socket with the server address. The address
1812 * is in user space so we verify it is OK and move it to kernel space.
1813 *
1814 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1815 * break bindings
1816 *
1817 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1818 * other SEQPACKET protocols that take time to connect() as it doesn't
1819 * include the -EINPROGRESS status for such sockets.
1820 */
1821
1387c2c2 1822int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1823{
1824 struct socket *sock;
230b1839 1825 struct sockaddr_storage address;
6cb153ca 1826 int err, fput_needed;
1da177e4 1827
6cb153ca 1828 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1829 if (!sock)
1830 goto out;
43db362d 1831 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1832 if (err < 0)
1833 goto out_put;
1834
89bddce5 1835 err =
230b1839 1836 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1837 if (err)
1838 goto out_put;
1839
230b1839 1840 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1841 sock->file->f_flags);
1842out_put:
6cb153ca 1843 fput_light(sock->file, fput_needed);
1da177e4
LT
1844out:
1845 return err;
1846}
1847
1387c2c2
DB
1848SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1849 int, addrlen)
1850{
1851 return __sys_connect(fd, uservaddr, addrlen);
1852}
1853
1da177e4
LT
1854/*
1855 * Get the local address ('name') of a socket object. Move the obtained
1856 * name to user space.
1857 */
1858
8882a107
DB
1859int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1860 int __user *usockaddr_len)
1da177e4
LT
1861{
1862 struct socket *sock;
230b1839 1863 struct sockaddr_storage address;
9b2c45d4 1864 int err, fput_needed;
89bddce5 1865
6cb153ca 1866 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1867 if (!sock)
1868 goto out;
1869
1870 err = security_socket_getsockname(sock);
1871 if (err)
1872 goto out_put;
1873
9b2c45d4
DV
1874 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1875 if (err < 0)
1da177e4 1876 goto out_put;
9b2c45d4
DV
1877 /* "err" is actually length in this case */
1878 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1879
1880out_put:
6cb153ca 1881 fput_light(sock->file, fput_needed);
1da177e4
LT
1882out:
1883 return err;
1884}
1885
8882a107
DB
1886SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1887 int __user *, usockaddr_len)
1888{
1889 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1890}
1891
1da177e4
LT
1892/*
1893 * Get the remote address ('name') of a socket object. Move the obtained
1894 * name to user space.
1895 */
1896
b21c8f83
DB
1897int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1898 int __user *usockaddr_len)
1da177e4
LT
1899{
1900 struct socket *sock;
230b1839 1901 struct sockaddr_storage address;
9b2c45d4 1902 int err, fput_needed;
1da177e4 1903
89bddce5
SH
1904 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1905 if (sock != NULL) {
1da177e4
LT
1906 err = security_socket_getpeername(sock);
1907 if (err) {
6cb153ca 1908 fput_light(sock->file, fput_needed);
1da177e4
LT
1909 return err;
1910 }
1911
9b2c45d4
DV
1912 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1913 if (err >= 0)
1914 /* "err" is actually length in this case */
1915 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1916 usockaddr_len);
6cb153ca 1917 fput_light(sock->file, fput_needed);
1da177e4
LT
1918 }
1919 return err;
1920}
1921
b21c8f83
DB
1922SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1923 int __user *, usockaddr_len)
1924{
1925 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1926}
1927
1da177e4
LT
1928/*
1929 * Send a datagram to a given address. We move the address into kernel
1930 * space and check the user space data area is readable before invoking
1931 * the protocol.
1932 */
211b634b
DB
1933int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1934 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1935{
1936 struct socket *sock;
230b1839 1937 struct sockaddr_storage address;
1da177e4
LT
1938 int err;
1939 struct msghdr msg;
1940 struct iovec iov;
6cb153ca 1941 int fput_needed;
6cb153ca 1942
602bd0e9
AV
1943 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1944 if (unlikely(err))
1945 return err;
de0fa95c
PE
1946 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1947 if (!sock)
4387ff75 1948 goto out;
6cb153ca 1949
89bddce5 1950 msg.msg_name = NULL;
89bddce5
SH
1951 msg.msg_control = NULL;
1952 msg.msg_controllen = 0;
1953 msg.msg_namelen = 0;
6cb153ca 1954 if (addr) {
43db362d 1955 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1956 if (err < 0)
1957 goto out_put;
230b1839 1958 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1959 msg.msg_namelen = addr_len;
1da177e4
LT
1960 }
1961 if (sock->file->f_flags & O_NONBLOCK)
1962 flags |= MSG_DONTWAIT;
1963 msg.msg_flags = flags;
d8725c86 1964 err = sock_sendmsg(sock, &msg);
1da177e4 1965
89bddce5 1966out_put:
de0fa95c 1967 fput_light(sock->file, fput_needed);
4387ff75 1968out:
1da177e4
LT
1969 return err;
1970}
1971
211b634b
DB
1972SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1973 unsigned int, flags, struct sockaddr __user *, addr,
1974 int, addr_len)
1975{
1976 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1977}
1978
1da177e4 1979/*
89bddce5 1980 * Send a datagram down a socket.
1da177e4
LT
1981 */
1982
3e0fa65f 1983SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1984 unsigned int, flags)
1da177e4 1985{
211b634b 1986 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1987}
1988
1989/*
89bddce5 1990 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1991 * sender. We verify the buffers are writable and if needed move the
1992 * sender address from kernel to user space.
1993 */
7a09e1eb
DB
1994int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1995 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1996{
1997 struct socket *sock;
1998 struct iovec iov;
1999 struct msghdr msg;
230b1839 2000 struct sockaddr_storage address;
89bddce5 2001 int err, err2;
6cb153ca
BL
2002 int fput_needed;
2003
602bd0e9
AV
2004 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2005 if (unlikely(err))
2006 return err;
de0fa95c 2007 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2008 if (!sock)
de0fa95c 2009 goto out;
1da177e4 2010
89bddce5
SH
2011 msg.msg_control = NULL;
2012 msg.msg_controllen = 0;
f3d33426
HFS
2013 /* Save some cycles and don't copy the address if not needed */
2014 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2015 /* We assume all kernel code knows the size of sockaddr_storage */
2016 msg.msg_namelen = 0;
130ed5d1 2017 msg.msg_iocb = NULL;
9f138fa6 2018 msg.msg_flags = 0;
1da177e4
LT
2019 if (sock->file->f_flags & O_NONBLOCK)
2020 flags |= MSG_DONTWAIT;
2da62906 2021 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2022
89bddce5 2023 if (err >= 0 && addr != NULL) {
43db362d 2024 err2 = move_addr_to_user(&address,
230b1839 2025 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2026 if (err2 < 0)
2027 err = err2;
1da177e4 2028 }
de0fa95c
PE
2029
2030 fput_light(sock->file, fput_needed);
4387ff75 2031out:
1da177e4
LT
2032 return err;
2033}
2034
7a09e1eb
DB
2035SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2036 unsigned int, flags, struct sockaddr __user *, addr,
2037 int __user *, addr_len)
2038{
2039 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2040}
2041
1da177e4 2042/*
89bddce5 2043 * Receive a datagram from a socket.
1da177e4
LT
2044 */
2045
b7c0ddf5
JG
2046SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2047 unsigned int, flags)
1da177e4 2048{
7a09e1eb 2049 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2050}
2051
2052/*
2053 * Set a socket option. Because we don't know the option lengths we have
2054 * to pass the user mode parameter for the protocols to sort out.
2055 */
2056
cc36dca0
DB
2057static int __sys_setsockopt(int fd, int level, int optname,
2058 char __user *optval, int optlen)
1da177e4 2059{
6cb153ca 2060 int err, fput_needed;
1da177e4
LT
2061 struct socket *sock;
2062
2063 if (optlen < 0)
2064 return -EINVAL;
89bddce5
SH
2065
2066 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2067 if (sock != NULL) {
2068 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2069 if (err)
2070 goto out_put;
1da177e4
LT
2071
2072 if (level == SOL_SOCKET)
89bddce5
SH
2073 err =
2074 sock_setsockopt(sock, level, optname, optval,
2075 optlen);
1da177e4 2076 else
89bddce5
SH
2077 err =
2078 sock->ops->setsockopt(sock, level, optname, optval,
2079 optlen);
6cb153ca
BL
2080out_put:
2081 fput_light(sock->file, fput_needed);
1da177e4
LT
2082 }
2083 return err;
2084}
2085
cc36dca0
DB
2086SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2087 char __user *, optval, int, optlen)
2088{
2089 return __sys_setsockopt(fd, level, optname, optval, optlen);
2090}
2091
1da177e4
LT
2092/*
2093 * Get a socket option. Because we don't know the option lengths we have
2094 * to pass a user mode parameter for the protocols to sort out.
2095 */
2096
13a2d70e
DB
2097static int __sys_getsockopt(int fd, int level, int optname,
2098 char __user *optval, int __user *optlen)
1da177e4 2099{
6cb153ca 2100 int err, fput_needed;
1da177e4
LT
2101 struct socket *sock;
2102
89bddce5
SH
2103 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2104 if (sock != NULL) {
6cb153ca
BL
2105 err = security_socket_getsockopt(sock, level, optname);
2106 if (err)
2107 goto out_put;
1da177e4
LT
2108
2109 if (level == SOL_SOCKET)
89bddce5
SH
2110 err =
2111 sock_getsockopt(sock, level, optname, optval,
2112 optlen);
1da177e4 2113 else
89bddce5
SH
2114 err =
2115 sock->ops->getsockopt(sock, level, optname, optval,
2116 optlen);
6cb153ca
BL
2117out_put:
2118 fput_light(sock->file, fput_needed);
1da177e4
LT
2119 }
2120 return err;
2121}
2122
13a2d70e
DB
2123SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2124 char __user *, optval, int __user *, optlen)
2125{
2126 return __sys_getsockopt(fd, level, optname, optval, optlen);
2127}
2128
1da177e4
LT
2129/*
2130 * Shutdown a socket.
2131 */
2132
005a1aea 2133int __sys_shutdown(int fd, int how)
1da177e4 2134{
6cb153ca 2135 int err, fput_needed;
1da177e4
LT
2136 struct socket *sock;
2137
89bddce5
SH
2138 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2139 if (sock != NULL) {
1da177e4 2140 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2141 if (!err)
2142 err = sock->ops->shutdown(sock, how);
2143 fput_light(sock->file, fput_needed);
1da177e4
LT
2144 }
2145 return err;
2146}
2147
005a1aea
DB
2148SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2149{
2150 return __sys_shutdown(fd, how);
2151}
2152
89bddce5 2153/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2154 * fields which are the same type (int / unsigned) on our platforms.
2155 */
2156#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2157#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2158#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2159
c71d8ebe
TH
2160struct used_address {
2161 struct sockaddr_storage name;
2162 unsigned int name_len;
2163};
2164
da184284
AV
2165static int copy_msghdr_from_user(struct msghdr *kmsg,
2166 struct user_msghdr __user *umsg,
2167 struct sockaddr __user **save_addr,
2168 struct iovec **iov)
1661bf36 2169{
ffb07550 2170 struct user_msghdr msg;
08adb7da
AV
2171 ssize_t err;
2172
ffb07550 2173 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2174 return -EFAULT;
dbb490b9 2175
864d9664 2176 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2177 kmsg->msg_controllen = msg.msg_controllen;
2178 kmsg->msg_flags = msg.msg_flags;
2179
2180 kmsg->msg_namelen = msg.msg_namelen;
2181 if (!msg.msg_name)
6a2a2b3a
AS
2182 kmsg->msg_namelen = 0;
2183
dbb490b9
ML
2184 if (kmsg->msg_namelen < 0)
2185 return -EINVAL;
2186
1661bf36 2187 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2188 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2189
2190 if (save_addr)
ffb07550 2191 *save_addr = msg.msg_name;
08adb7da 2192
ffb07550 2193 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2194 if (!save_addr) {
864d9664
PA
2195 err = move_addr_to_kernel(msg.msg_name,
2196 kmsg->msg_namelen,
08adb7da
AV
2197 kmsg->msg_name);
2198 if (err < 0)
2199 return err;
2200 }
2201 } else {
2202 kmsg->msg_name = NULL;
2203 kmsg->msg_namelen = 0;
2204 }
2205
ffb07550 2206 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2207 return -EMSGSIZE;
2208
0345f931 2209 kmsg->msg_iocb = NULL;
2210
87e5e6da 2211 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2212 msg.msg_iov, msg.msg_iovlen,
da184284 2213 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2214 return err < 0 ? err : 0;
1661bf36
DC
2215}
2216
666547ff 2217static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2218 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2219 struct used_address *used_address,
2220 unsigned int allowed_msghdr_flags)
1da177e4 2221{
89bddce5
SH
2222 struct compat_msghdr __user *msg_compat =
2223 (struct compat_msghdr __user *)msg;
230b1839 2224 struct sockaddr_storage address;
1da177e4 2225 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2226 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2227 __aligned(sizeof(__kernel_size_t));
89bddce5 2228 /* 20 is size of ipv6_pktinfo */
1da177e4 2229 unsigned char *ctl_buf = ctl;
d8725c86 2230 int ctl_len;
08adb7da 2231 ssize_t err;
89bddce5 2232
08adb7da 2233 msg_sys->msg_name = &address;
1da177e4 2234
08449320 2235 if (MSG_CMSG_COMPAT & flags)
08adb7da 2236 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2237 else
08adb7da 2238 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2239 if (err < 0)
da184284 2240 return err;
1da177e4
LT
2241
2242 err = -ENOBUFS;
2243
228e548e 2244 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2245 goto out_freeiov;
28a94d8f 2246 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2247 ctl_len = msg_sys->msg_controllen;
1da177e4 2248 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2249 err =
228e548e 2250 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2251 sizeof(ctl));
1da177e4
LT
2252 if (err)
2253 goto out_freeiov;
228e548e
AB
2254 ctl_buf = msg_sys->msg_control;
2255 ctl_len = msg_sys->msg_controllen;
1da177e4 2256 } else if (ctl_len) {
ac4340fc
DM
2257 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2258 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2259 if (ctl_len > sizeof(ctl)) {
1da177e4 2260 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2261 if (ctl_buf == NULL)
1da177e4
LT
2262 goto out_freeiov;
2263 }
2264 err = -EFAULT;
2265 /*
228e548e 2266 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2267 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2268 * checking falls down on this.
2269 */
fb8621bb 2270 if (copy_from_user(ctl_buf,
228e548e 2271 (void __user __force *)msg_sys->msg_control,
89bddce5 2272 ctl_len))
1da177e4 2273 goto out_freectl;
228e548e 2274 msg_sys->msg_control = ctl_buf;
1da177e4 2275 }
228e548e 2276 msg_sys->msg_flags = flags;
1da177e4
LT
2277
2278 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2279 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2280 /*
2281 * If this is sendmmsg() and current destination address is same as
2282 * previously succeeded address, omit asking LSM's decision.
2283 * used_address->name_len is initialized to UINT_MAX so that the first
2284 * destination address never matches.
2285 */
bc909d9d
MD
2286 if (used_address && msg_sys->msg_name &&
2287 used_address->name_len == msg_sys->msg_namelen &&
2288 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2289 used_address->name_len)) {
d8725c86 2290 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2291 goto out_freectl;
2292 }
d8725c86 2293 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2294 /*
2295 * If this is sendmmsg() and sending to current destination address was
2296 * successful, remember it.
2297 */
2298 if (used_address && err >= 0) {
2299 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2300 if (msg_sys->msg_name)
2301 memcpy(&used_address->name, msg_sys->msg_name,
2302 used_address->name_len);
c71d8ebe 2303 }
1da177e4
LT
2304
2305out_freectl:
89bddce5 2306 if (ctl_buf != ctl)
1da177e4
LT
2307 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2308out_freeiov:
da184284 2309 kfree(iov);
228e548e
AB
2310 return err;
2311}
2312
2313/*
2314 * BSD sendmsg interface
2315 */
0fa03c62
JA
2316long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2317 unsigned int flags)
2318{
2319 struct msghdr msg_sys;
2320
2321 return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2322}
228e548e 2323
e1834a32
DB
2324long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2325 bool forbid_cmsg_compat)
228e548e
AB
2326{
2327 int fput_needed, err;
2328 struct msghdr msg_sys;
1be374a0
AL
2329 struct socket *sock;
2330
e1834a32
DB
2331 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2332 return -EINVAL;
2333
1be374a0 2334 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2335 if (!sock)
2336 goto out;
2337
28a94d8f 2338 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2339
6cb153ca 2340 fput_light(sock->file, fput_needed);
89bddce5 2341out:
1da177e4
LT
2342 return err;
2343}
2344
666547ff 2345SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2346{
e1834a32 2347 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2348}
2349
228e548e
AB
2350/*
2351 * Linux sendmmsg interface
2352 */
2353
2354int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2355 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2356{
2357 int fput_needed, err, datagrams;
2358 struct socket *sock;
2359 struct mmsghdr __user *entry;
2360 struct compat_mmsghdr __user *compat_entry;
2361 struct msghdr msg_sys;
c71d8ebe 2362 struct used_address used_address;
f092276d 2363 unsigned int oflags = flags;
228e548e 2364
e1834a32
DB
2365 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2366 return -EINVAL;
2367
98382f41
AB
2368 if (vlen > UIO_MAXIOV)
2369 vlen = UIO_MAXIOV;
228e548e
AB
2370
2371 datagrams = 0;
2372
2373 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2374 if (!sock)
2375 return err;
2376
c71d8ebe 2377 used_address.name_len = UINT_MAX;
228e548e
AB
2378 entry = mmsg;
2379 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2380 err = 0;
f092276d 2381 flags |= MSG_BATCH;
228e548e
AB
2382
2383 while (datagrams < vlen) {
f092276d
TH
2384 if (datagrams == vlen - 1)
2385 flags = oflags;
2386
228e548e 2387 if (MSG_CMSG_COMPAT & flags) {
666547ff 2388 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2389 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2390 if (err < 0)
2391 break;
2392 err = __put_user(err, &compat_entry->msg_len);
2393 ++compat_entry;
2394 } else {
a7526eb5 2395 err = ___sys_sendmsg(sock,
666547ff 2396 (struct user_msghdr __user *)entry,
28a94d8f 2397 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2398 if (err < 0)
2399 break;
2400 err = put_user(err, &entry->msg_len);
2401 ++entry;
2402 }
2403
2404 if (err)
2405 break;
2406 ++datagrams;
3023898b
SHY
2407 if (msg_data_left(&msg_sys))
2408 break;
a78cb84c 2409 cond_resched();
228e548e
AB
2410 }
2411
228e548e
AB
2412 fput_light(sock->file, fput_needed);
2413
728ffb86
AB
2414 /* We only return an error if no datagrams were able to be sent */
2415 if (datagrams != 0)
228e548e
AB
2416 return datagrams;
2417
228e548e
AB
2418 return err;
2419}
2420
2421SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2422 unsigned int, vlen, unsigned int, flags)
2423{
e1834a32 2424 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2425}
2426
666547ff 2427static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2428 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2429{
89bddce5
SH
2430 struct compat_msghdr __user *msg_compat =
2431 (struct compat_msghdr __user *)msg;
1da177e4 2432 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2433 struct iovec *iov = iovstack;
1da177e4 2434 unsigned long cmsg_ptr;
2da62906 2435 int len;
08adb7da 2436 ssize_t err;
1da177e4
LT
2437
2438 /* kernel mode address */
230b1839 2439 struct sockaddr_storage addr;
1da177e4
LT
2440
2441 /* user mode address pointers */
2442 struct sockaddr __user *uaddr;
08adb7da 2443 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2444
08adb7da 2445 msg_sys->msg_name = &addr;
1da177e4 2446
f3d33426 2447 if (MSG_CMSG_COMPAT & flags)
08adb7da 2448 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2449 else
08adb7da 2450 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2451 if (err < 0)
da184284 2452 return err;
1da177e4 2453
a2e27255
ACM
2454 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2455 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2456
f3d33426
HFS
2457 /* We assume all kernel code knows the size of sockaddr_storage */
2458 msg_sys->msg_namelen = 0;
2459
1da177e4
LT
2460 if (sock->file->f_flags & O_NONBLOCK)
2461 flags |= MSG_DONTWAIT;
2da62906 2462 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2463 if (err < 0)
2464 goto out_freeiov;
2465 len = err;
2466
2467 if (uaddr != NULL) {
43db362d 2468 err = move_addr_to_user(&addr,
a2e27255 2469 msg_sys->msg_namelen, uaddr,
89bddce5 2470 uaddr_len);
1da177e4
LT
2471 if (err < 0)
2472 goto out_freeiov;
2473 }
a2e27255 2474 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2475 COMPAT_FLAGS(msg));
1da177e4
LT
2476 if (err)
2477 goto out_freeiov;
2478 if (MSG_CMSG_COMPAT & flags)
a2e27255 2479 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2480 &msg_compat->msg_controllen);
2481 else
a2e27255 2482 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2483 &msg->msg_controllen);
2484 if (err)
2485 goto out_freeiov;
2486 err = len;
2487
2488out_freeiov:
da184284 2489 kfree(iov);
a2e27255
ACM
2490 return err;
2491}
2492
2493/*
2494 * BSD recvmsg interface
2495 */
2496
e1834a32
DB
2497long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2498 bool forbid_cmsg_compat)
a2e27255
ACM
2499{
2500 int fput_needed, err;
2501 struct msghdr msg_sys;
1be374a0
AL
2502 struct socket *sock;
2503
e1834a32
DB
2504 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2505 return -EINVAL;
2506
1be374a0 2507 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2508 if (!sock)
2509 goto out;
2510
a7526eb5 2511 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2512
6cb153ca 2513 fput_light(sock->file, fput_needed);
1da177e4
LT
2514out:
2515 return err;
2516}
2517
666547ff 2518SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2519 unsigned int, flags)
2520{
e1834a32 2521 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2522}
2523
a2e27255
ACM
2524/*
2525 * Linux recvmmsg interface
2526 */
2527
e11d4284
AB
2528static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2529 unsigned int vlen, unsigned int flags,
2530 struct timespec64 *timeout)
a2e27255
ACM
2531{
2532 int fput_needed, err, datagrams;
2533 struct socket *sock;
2534 struct mmsghdr __user *entry;
d7256d0e 2535 struct compat_mmsghdr __user *compat_entry;
a2e27255 2536 struct msghdr msg_sys;
766b9f92
DD
2537 struct timespec64 end_time;
2538 struct timespec64 timeout64;
a2e27255
ACM
2539
2540 if (timeout &&
2541 poll_select_set_timeout(&end_time, timeout->tv_sec,
2542 timeout->tv_nsec))
2543 return -EINVAL;
2544
2545 datagrams = 0;
2546
2547 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2548 if (!sock)
2549 return err;
2550
7797dc41
SHY
2551 if (likely(!(flags & MSG_ERRQUEUE))) {
2552 err = sock_error(sock->sk);
2553 if (err) {
2554 datagrams = err;
2555 goto out_put;
2556 }
e623a9e9 2557 }
a2e27255
ACM
2558
2559 entry = mmsg;
d7256d0e 2560 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2561
2562 while (datagrams < vlen) {
2563 /*
2564 * No need to ask LSM for more than the first datagram.
2565 */
d7256d0e 2566 if (MSG_CMSG_COMPAT & flags) {
666547ff 2567 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2568 &msg_sys, flags & ~MSG_WAITFORONE,
2569 datagrams);
d7256d0e
JMG
2570 if (err < 0)
2571 break;
2572 err = __put_user(err, &compat_entry->msg_len);
2573 ++compat_entry;
2574 } else {
a7526eb5 2575 err = ___sys_recvmsg(sock,
666547ff 2576 (struct user_msghdr __user *)entry,
a7526eb5
AL
2577 &msg_sys, flags & ~MSG_WAITFORONE,
2578 datagrams);
d7256d0e
JMG
2579 if (err < 0)
2580 break;
2581 err = put_user(err, &entry->msg_len);
2582 ++entry;
2583 }
2584
a2e27255
ACM
2585 if (err)
2586 break;
a2e27255
ACM
2587 ++datagrams;
2588
71c5c159
BB
2589 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2590 if (flags & MSG_WAITFORONE)
2591 flags |= MSG_DONTWAIT;
2592
a2e27255 2593 if (timeout) {
766b9f92 2594 ktime_get_ts64(&timeout64);
c2e6c856 2595 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2596 if (timeout->tv_sec < 0) {
2597 timeout->tv_sec = timeout->tv_nsec = 0;
2598 break;
2599 }
2600
2601 /* Timeout, return less than vlen datagrams */
2602 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2603 break;
2604 }
2605
2606 /* Out of band data, return right away */
2607 if (msg_sys.msg_flags & MSG_OOB)
2608 break;
a78cb84c 2609 cond_resched();
a2e27255
ACM
2610 }
2611
a2e27255 2612 if (err == 0)
34b88a68
ACM
2613 goto out_put;
2614
2615 if (datagrams == 0) {
2616 datagrams = err;
2617 goto out_put;
2618 }
a2e27255 2619
34b88a68
ACM
2620 /*
2621 * We may return less entries than requested (vlen) if the
2622 * sock is non block and there aren't enough datagrams...
2623 */
2624 if (err != -EAGAIN) {
a2e27255 2625 /*
34b88a68
ACM
2626 * ... or if recvmsg returns an error after we
2627 * received some datagrams, where we record the
2628 * error to return on the next call or if the
2629 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2630 */
34b88a68 2631 sock->sk->sk_err = -err;
a2e27255 2632 }
34b88a68
ACM
2633out_put:
2634 fput_light(sock->file, fput_needed);
a2e27255 2635
34b88a68 2636 return datagrams;
a2e27255
ACM
2637}
2638
e11d4284
AB
2639int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2640 unsigned int vlen, unsigned int flags,
2641 struct __kernel_timespec __user *timeout,
2642 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2643{
2644 int datagrams;
c2e6c856 2645 struct timespec64 timeout_sys;
a2e27255 2646
e11d4284
AB
2647 if (timeout && get_timespec64(&timeout_sys, timeout))
2648 return -EFAULT;
a2e27255 2649
e11d4284 2650 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2651 return -EFAULT;
2652
e11d4284
AB
2653 if (!timeout && !timeout32)
2654 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2655
2656 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2657
e11d4284
AB
2658 if (datagrams <= 0)
2659 return datagrams;
2660
2661 if (timeout && put_timespec64(&timeout_sys, timeout))
2662 datagrams = -EFAULT;
2663
2664 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2665 datagrams = -EFAULT;
2666
2667 return datagrams;
2668}
2669
1255e269
DB
2670SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2671 unsigned int, vlen, unsigned int, flags,
c2e6c856 2672 struct __kernel_timespec __user *, timeout)
1255e269 2673{
e11d4284
AB
2674 if (flags & MSG_CMSG_COMPAT)
2675 return -EINVAL;
2676
2677 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2678}
2679
2680#ifdef CONFIG_COMPAT_32BIT_TIME
2681SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2682 unsigned int, vlen, unsigned int, flags,
2683 struct old_timespec32 __user *, timeout)
2684{
2685 if (flags & MSG_CMSG_COMPAT)
2686 return -EINVAL;
2687
2688 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2689}
e11d4284 2690#endif
1255e269 2691
a2e27255 2692#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2693/* Argument list sizes for sys_socketcall */
2694#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2695static const unsigned char nargs[21] = {
c6d409cf
ED
2696 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2697 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2698 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2699 AL(4), AL(5), AL(4)
89bddce5
SH
2700};
2701
1da177e4
LT
2702#undef AL
2703
2704/*
89bddce5 2705 * System call vectors.
1da177e4
LT
2706 *
2707 * Argument checking cleaned up. Saved 20% in size.
2708 * This function doesn't need to set the kernel lock because
89bddce5 2709 * it is set by the callees.
1da177e4
LT
2710 */
2711
3e0fa65f 2712SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2713{
2950fa9d 2714 unsigned long a[AUDITSC_ARGS];
89bddce5 2715 unsigned long a0, a1;
1da177e4 2716 int err;
47379052 2717 unsigned int len;
1da177e4 2718
228e548e 2719 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2720 return -EINVAL;
c8e8cd57 2721 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2722
47379052
AV
2723 len = nargs[call];
2724 if (len > sizeof(a))
2725 return -EINVAL;
2726
1da177e4 2727 /* copy_from_user should be SMP safe. */
47379052 2728 if (copy_from_user(a, args, len))
1da177e4 2729 return -EFAULT;
3ec3b2fb 2730
2950fa9d
CG
2731 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2732 if (err)
2733 return err;
3ec3b2fb 2734
89bddce5
SH
2735 a0 = a[0];
2736 a1 = a[1];
2737
2738 switch (call) {
2739 case SYS_SOCKET:
9d6a15c3 2740 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2741 break;
2742 case SYS_BIND:
a87d35d8 2743 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2744 break;
2745 case SYS_CONNECT:
1387c2c2 2746 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2747 break;
2748 case SYS_LISTEN:
25e290ee 2749 err = __sys_listen(a0, a1);
89bddce5
SH
2750 break;
2751 case SYS_ACCEPT:
4541e805
DB
2752 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2753 (int __user *)a[2], 0);
89bddce5
SH
2754 break;
2755 case SYS_GETSOCKNAME:
2756 err =
8882a107
DB
2757 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2758 (int __user *)a[2]);
89bddce5
SH
2759 break;
2760 case SYS_GETPEERNAME:
2761 err =
b21c8f83
DB
2762 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2763 (int __user *)a[2]);
89bddce5
SH
2764 break;
2765 case SYS_SOCKETPAIR:
6debc8d8 2766 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2767 break;
2768 case SYS_SEND:
f3bf896b
DB
2769 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2770 NULL, 0);
89bddce5
SH
2771 break;
2772 case SYS_SENDTO:
211b634b
DB
2773 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2774 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2775 break;
2776 case SYS_RECV:
d27e9afc
DB
2777 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2778 NULL, NULL);
89bddce5
SH
2779 break;
2780 case SYS_RECVFROM:
7a09e1eb
DB
2781 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2782 (struct sockaddr __user *)a[4],
2783 (int __user *)a[5]);
89bddce5
SH
2784 break;
2785 case SYS_SHUTDOWN:
005a1aea 2786 err = __sys_shutdown(a0, a1);
89bddce5
SH
2787 break;
2788 case SYS_SETSOCKOPT:
cc36dca0
DB
2789 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2790 a[4]);
89bddce5
SH
2791 break;
2792 case SYS_GETSOCKOPT:
2793 err =
13a2d70e
DB
2794 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2795 (int __user *)a[4]);
89bddce5
SH
2796 break;
2797 case SYS_SENDMSG:
e1834a32
DB
2798 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2799 a[2], true);
89bddce5 2800 break;
228e548e 2801 case SYS_SENDMMSG:
e1834a32
DB
2802 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2803 a[3], true);
228e548e 2804 break;
89bddce5 2805 case SYS_RECVMSG:
e1834a32
DB
2806 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2807 a[2], true);
89bddce5 2808 break;
a2e27255 2809 case SYS_RECVMMSG:
e11d4284
AB
2810 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2811 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2812 a[2], a[3],
2813 (struct __kernel_timespec __user *)a[4],
2814 NULL);
2815 else
2816 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2817 a[2], a[3], NULL,
2818 (struct old_timespec32 __user *)a[4]);
a2e27255 2819 break;
de11defe 2820 case SYS_ACCEPT4:
4541e805
DB
2821 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2822 (int __user *)a[2], a[3]);
aaca0bdc 2823 break;
89bddce5
SH
2824 default:
2825 err = -EINVAL;
2826 break;
1da177e4
LT
2827 }
2828 return err;
2829}
2830
89bddce5 2831#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2832
55737fda
SH
2833/**
2834 * sock_register - add a socket protocol handler
2835 * @ops: description of protocol
2836 *
1da177e4
LT
2837 * This function is called by a protocol handler that wants to
2838 * advertise its address family, and have it linked into the
e793c0f7 2839 * socket interface. The value ops->family corresponds to the
55737fda 2840 * socket system call protocol family.
1da177e4 2841 */
f0fd27d4 2842int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2843{
2844 int err;
2845
2846 if (ops->family >= NPROTO) {
3410f22e 2847 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2848 return -ENOBUFS;
2849 }
55737fda
SH
2850
2851 spin_lock(&net_family_lock);
190683a9
ED
2852 if (rcu_dereference_protected(net_families[ops->family],
2853 lockdep_is_held(&net_family_lock)))
55737fda
SH
2854 err = -EEXIST;
2855 else {
cf778b00 2856 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2857 err = 0;
2858 }
55737fda
SH
2859 spin_unlock(&net_family_lock);
2860
3410f22e 2861 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2862 return err;
2863}
c6d409cf 2864EXPORT_SYMBOL(sock_register);
1da177e4 2865
55737fda
SH
2866/**
2867 * sock_unregister - remove a protocol handler
2868 * @family: protocol family to remove
2869 *
1da177e4
LT
2870 * This function is called by a protocol handler that wants to
2871 * remove its address family, and have it unlinked from the
55737fda
SH
2872 * new socket creation.
2873 *
2874 * If protocol handler is a module, then it can use module reference
2875 * counts to protect against new references. If protocol handler is not
2876 * a module then it needs to provide its own protection in
2877 * the ops->create routine.
1da177e4 2878 */
f0fd27d4 2879void sock_unregister(int family)
1da177e4 2880{
f0fd27d4 2881 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2882
55737fda 2883 spin_lock(&net_family_lock);
a9b3cd7f 2884 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2885 spin_unlock(&net_family_lock);
2886
2887 synchronize_rcu();
2888
3410f22e 2889 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2890}
c6d409cf 2891EXPORT_SYMBOL(sock_unregister);
1da177e4 2892
bf2ae2e4
XL
2893bool sock_is_registered(int family)
2894{
66b51b0a 2895 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2896}
2897
77d76ea3 2898static int __init sock_init(void)
1da177e4 2899{
b3e19d92 2900 int err;
2ca794e5
EB
2901 /*
2902 * Initialize the network sysctl infrastructure.
2903 */
2904 err = net_sysctl_init();
2905 if (err)
2906 goto out;
b3e19d92 2907
1da177e4 2908 /*
89bddce5 2909 * Initialize skbuff SLAB cache
1da177e4
LT
2910 */
2911 skb_init();
1da177e4
LT
2912
2913 /*
89bddce5 2914 * Initialize the protocols module.
1da177e4
LT
2915 */
2916
2917 init_inodecache();
b3e19d92
NP
2918
2919 err = register_filesystem(&sock_fs_type);
2920 if (err)
2921 goto out_fs;
1da177e4 2922 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2923 if (IS_ERR(sock_mnt)) {
2924 err = PTR_ERR(sock_mnt);
2925 goto out_mount;
2926 }
77d76ea3
AK
2927
2928 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2929 */
2930
2931#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2932 err = netfilter_init();
2933 if (err)
2934 goto out;
1da177e4 2935#endif
cbeb321a 2936
408eccce 2937 ptp_classifier_init();
c1f19b51 2938
b3e19d92
NP
2939out:
2940 return err;
2941
2942out_mount:
2943 unregister_filesystem(&sock_fs_type);
2944out_fs:
2945 goto out;
1da177e4
LT
2946}
2947
77d76ea3
AK
2948core_initcall(sock_init); /* early initcall */
2949
1da177e4
LT
2950#ifdef CONFIG_PROC_FS
2951void socket_seq_show(struct seq_file *seq)
2952{
648845ab
TZ
2953 seq_printf(seq, "sockets: used %d\n",
2954 sock_inuse_get(seq->private));
1da177e4 2955}
89bddce5 2956#endif /* CONFIG_PROC_FS */
1da177e4 2957
89bbfc95 2958#ifdef CONFIG_COMPAT
36fd633e 2959static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2960{
6b96018b 2961 struct compat_ifconf ifc32;
7a229387 2962 struct ifconf ifc;
7a229387
AB
2963 int err;
2964
6b96018b 2965 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2966 return -EFAULT;
2967
36fd633e
AV
2968 ifc.ifc_len = ifc32.ifc_len;
2969 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2970
36fd633e
AV
2971 rtnl_lock();
2972 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2973 rtnl_unlock();
7a229387
AB
2974 if (err)
2975 return err;
2976
36fd633e 2977 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2978 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2979 return -EFAULT;
2980
2981 return 0;
2982}
2983
6b96018b 2984static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2985{
3a7da39d
BH
2986 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2987 bool convert_in = false, convert_out = false;
44c02a2c
AV
2988 size_t buf_size = 0;
2989 struct ethtool_rxnfc __user *rxnfc = NULL;
2990 struct ifreq ifr;
3a7da39d
BH
2991 u32 rule_cnt = 0, actual_rule_cnt;
2992 u32 ethcmd;
7a229387 2993 u32 data;
3a7da39d 2994 int ret;
7a229387 2995
3a7da39d
BH
2996 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2997 return -EFAULT;
7a229387 2998
3a7da39d
BH
2999 compat_rxnfc = compat_ptr(data);
3000
3001 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3002 return -EFAULT;
3003
3a7da39d
BH
3004 /* Most ethtool structures are defined without padding.
3005 * Unfortunately struct ethtool_rxnfc is an exception.
3006 */
3007 switch (ethcmd) {
3008 default:
3009 break;
3010 case ETHTOOL_GRXCLSRLALL:
3011 /* Buffer size is variable */
3012 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3013 return -EFAULT;
3014 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3015 return -ENOMEM;
3016 buf_size += rule_cnt * sizeof(u32);
3017 /* fall through */
3018 case ETHTOOL_GRXRINGS:
3019 case ETHTOOL_GRXCLSRLCNT:
3020 case ETHTOOL_GRXCLSRULE:
55664f32 3021 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3022 convert_out = true;
3023 /* fall through */
3024 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3025 buf_size += sizeof(struct ethtool_rxnfc);
3026 convert_in = true;
44c02a2c 3027 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3028 break;
3029 }
3030
44c02a2c 3031 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3032 return -EFAULT;
3033
44c02a2c 3034 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3035
3a7da39d 3036 if (convert_in) {
127fe533 3037 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3038 * fs.ring_cookie and at the end of fs, but nowhere else.
3039 */
127fe533
AD
3040 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3041 sizeof(compat_rxnfc->fs.m_ext) !=
3042 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3043 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3044 BUILD_BUG_ON(
3045 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3046 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3047 offsetof(struct ethtool_rxnfc, fs.location) -
3048 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3049
3050 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3051 (void __user *)(&rxnfc->fs.m_ext + 1) -
3052 (void __user *)rxnfc) ||
3a7da39d
BH
3053 copy_in_user(&rxnfc->fs.ring_cookie,
3054 &compat_rxnfc->fs.ring_cookie,
954b1244 3055 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3056 (void __user *)&rxnfc->fs.ring_cookie))
3057 return -EFAULT;
3058 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3059 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3060 return -EFAULT;
3061 } else if (copy_in_user(&rxnfc->rule_cnt,
3062 &compat_rxnfc->rule_cnt,
3063 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3064 return -EFAULT;
3065 }
3066
44c02a2c 3067 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3068 if (ret)
3069 return ret;
3070
3071 if (convert_out) {
3072 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3073 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3074 (const void __user *)rxnfc) ||
3a7da39d
BH
3075 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3076 &rxnfc->fs.ring_cookie,
954b1244
SH
3077 (const void __user *)(&rxnfc->fs.location + 1) -
3078 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3079 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3080 sizeof(rxnfc->rule_cnt)))
3081 return -EFAULT;
3082
3083 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3084 /* As an optimisation, we only copy the actual
3085 * number of rules that the underlying
3086 * function returned. Since Mallory might
3087 * change the rule count in user memory, we
3088 * check that it is less than the rule count
3089 * originally given (as the user buffer size),
3090 * which has been range-checked.
3091 */
3092 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3093 return -EFAULT;
3094 if (actual_rule_cnt < rule_cnt)
3095 rule_cnt = actual_rule_cnt;
3096 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3097 &rxnfc->rule_locs[0],
3098 rule_cnt * sizeof(u32)))
3099 return -EFAULT;
3100 }
3101 }
3102
3103 return 0;
7a229387
AB
3104}
3105
7a50a240
AB
3106static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3107{
7a50a240 3108 compat_uptr_t uptr32;
44c02a2c
AV
3109 struct ifreq ifr;
3110 void __user *saved;
3111 int err;
7a50a240 3112
44c02a2c 3113 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3114 return -EFAULT;
3115
3116 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3117 return -EFAULT;
3118
44c02a2c
AV
3119 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3120 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3121
44c02a2c
AV
3122 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3123 if (!err) {
3124 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3125 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3126 err = -EFAULT;
ccbd6a5a 3127 }
44c02a2c 3128 return err;
7a229387
AB
3129}
3130
590d4693
BH
3131/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3132static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3133 struct compat_ifreq __user *u_ifreq32)
7a229387 3134{
44c02a2c 3135 struct ifreq ifreq;
7a229387
AB
3136 u32 data32;
3137
44c02a2c 3138 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3139 return -EFAULT;
44c02a2c 3140 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3141 return -EFAULT;
44c02a2c 3142 ifreq.ifr_data = compat_ptr(data32);
7a229387 3143
44c02a2c 3144 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3145}
3146
37ac39bd
JB
3147static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3148 unsigned int cmd,
3149 struct compat_ifreq __user *uifr32)
3150{
3151 struct ifreq __user *uifr;
3152 int err;
3153
3154 /* Handle the fact that while struct ifreq has the same *layout* on
3155 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3156 * which are handled elsewhere, it still has different *size* due to
3157 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3158 * resulting in struct ifreq being 32 and 40 bytes respectively).
3159 * As a result, if the struct happens to be at the end of a page and
3160 * the next page isn't readable/writable, we get a fault. To prevent
3161 * that, copy back and forth to the full size.
3162 */
3163
3164 uifr = compat_alloc_user_space(sizeof(*uifr));
3165 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3166 return -EFAULT;
3167
3168 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3169
3170 if (!err) {
3171 switch (cmd) {
3172 case SIOCGIFFLAGS:
3173 case SIOCGIFMETRIC:
3174 case SIOCGIFMTU:
3175 case SIOCGIFMEM:
3176 case SIOCGIFHWADDR:
3177 case SIOCGIFINDEX:
3178 case SIOCGIFADDR:
3179 case SIOCGIFBRDADDR:
3180 case SIOCGIFDSTADDR:
3181 case SIOCGIFNETMASK:
3182 case SIOCGIFPFLAGS:
3183 case SIOCGIFTXQLEN:
3184 case SIOCGMIIPHY:
3185 case SIOCGMIIREG:
c6c9fee3 3186 case SIOCGIFNAME:
37ac39bd
JB
3187 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3188 err = -EFAULT;
3189 break;
3190 }
3191 }
3192 return err;
3193}
3194
a2116ed2
AB
3195static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3196 struct compat_ifreq __user *uifr32)
3197{
3198 struct ifreq ifr;
3199 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3200 int err;
3201
3202 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3203 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3204 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3205 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3206 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3207 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3208 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3209 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3210 if (err)
3211 return -EFAULT;
3212
44c02a2c 3213 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3214
3215 if (cmd == SIOCGIFMAP && !err) {
3216 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3217 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3218 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3219 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3220 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3221 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3222 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3223 if (err)
3224 err = -EFAULT;
3225 }
3226 return err;
3227}
3228
7a229387 3229struct rtentry32 {
c6d409cf 3230 u32 rt_pad1;
7a229387
AB
3231 struct sockaddr rt_dst; /* target address */
3232 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3233 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3234 unsigned short rt_flags;
3235 short rt_pad2;
3236 u32 rt_pad3;
3237 unsigned char rt_tos;
3238 unsigned char rt_class;
3239 short rt_pad4;
3240 short rt_metric; /* +1 for binary compatibility! */
7a229387 3241 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3242 u32 rt_mtu; /* per route MTU/Window */
3243 u32 rt_window; /* Window clamping */
7a229387
AB
3244 unsigned short rt_irtt; /* Initial RTT */
3245};
3246
3247struct in6_rtmsg32 {
3248 struct in6_addr rtmsg_dst;
3249 struct in6_addr rtmsg_src;
3250 struct in6_addr rtmsg_gateway;
3251 u32 rtmsg_type;
3252 u16 rtmsg_dst_len;
3253 u16 rtmsg_src_len;
3254 u32 rtmsg_metric;
3255 u32 rtmsg_info;
3256 u32 rtmsg_flags;
3257 s32 rtmsg_ifindex;
3258};
3259
6b96018b
AB
3260static int routing_ioctl(struct net *net, struct socket *sock,
3261 unsigned int cmd, void __user *argp)
7a229387
AB
3262{
3263 int ret;
3264 void *r = NULL;
3265 struct in6_rtmsg r6;
3266 struct rtentry r4;
3267 char devname[16];
3268 u32 rtdev;
3269 mm_segment_t old_fs = get_fs();
3270
6b96018b
AB
3271 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3272 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3273 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3274 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3275 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3276 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3277 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3278 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3279 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3280 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3281 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3282
3283 r = (void *) &r6;
3284 } else { /* ipv4 */
6b96018b 3285 struct rtentry32 __user *ur4 = argp;
c6d409cf 3286 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3287 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3288 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3289 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3290 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3291 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3292 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3293 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3294 if (rtdev) {
c6d409cf 3295 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3296 r4.rt_dev = (char __user __force *)devname;
3297 devname[15] = 0;
7a229387
AB
3298 } else
3299 r4.rt_dev = NULL;
3300
3301 r = (void *) &r4;
3302 }
3303
3304 if (ret) {
3305 ret = -EFAULT;
3306 goto out;
3307 }
3308
c6d409cf 3309 set_fs(KERNEL_DS);
63ff03ab 3310 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3311 set_fs(old_fs);
7a229387
AB
3312
3313out:
7a229387
AB
3314 return ret;
3315}
3316
3317/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3318 * for some operations; this forces use of the newer bridge-utils that
25985edc 3319 * use compatible ioctls
7a229387 3320 */
6b96018b 3321static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3322{
6b96018b 3323 compat_ulong_t tmp;
7a229387 3324
6b96018b 3325 if (get_user(tmp, argp))
7a229387
AB
3326 return -EFAULT;
3327 if (tmp == BRCTL_GET_VERSION)
3328 return BRCTL_VERSION + 1;
3329 return -EINVAL;
3330}
3331
6b96018b
AB
3332static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3333 unsigned int cmd, unsigned long arg)
3334{
3335 void __user *argp = compat_ptr(arg);
3336 struct sock *sk = sock->sk;
3337 struct net *net = sock_net(sk);
7a229387 3338
6b96018b 3339 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3340 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3341
3342 switch (cmd) {
3343 case SIOCSIFBR:
3344 case SIOCGIFBR:
3345 return old_bridge_ioctl(argp);
6b96018b 3346 case SIOCGIFCONF:
36fd633e 3347 return compat_dev_ifconf(net, argp);
6b96018b
AB
3348 case SIOCETHTOOL:
3349 return ethtool_ioctl(net, argp);
7a50a240
AB
3350 case SIOCWANDEV:
3351 return compat_siocwandev(net, argp);
a2116ed2
AB
3352 case SIOCGIFMAP:
3353 case SIOCSIFMAP:
3354 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3355 case SIOCADDRT:
3356 case SIOCDELRT:
3357 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3358 case SIOCGSTAMP_OLD:
3359 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3360 if (!sock->ops->gettstamp)
3361 return -ENOIOCTLCMD;
0768e170 3362 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3363 !COMPAT_USE_64BIT_TIME);
3364
590d4693
BH
3365 case SIOCBONDSLAVEINFOQUERY:
3366 case SIOCBONDINFOQUERY:
a2116ed2 3367 case SIOCSHWTSTAMP:
fd468c74 3368 case SIOCGHWTSTAMP:
590d4693 3369 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3370
3371 case FIOSETOWN:
3372 case SIOCSPGRP:
3373 case FIOGETOWN:
3374 case SIOCGPGRP:
3375 case SIOCBRADDBR:
3376 case SIOCBRDELBR:
3377 case SIOCGIFVLAN:
3378 case SIOCSIFVLAN:
3379 case SIOCADDDLCI:
3380 case SIOCDELDLCI:
c62cce2c 3381 case SIOCGSKNS:
0768e170
AB
3382 case SIOCGSTAMP_NEW:
3383 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3384 return sock_ioctl(file, cmd, arg);
3385
3386 case SIOCGIFFLAGS:
3387 case SIOCSIFFLAGS:
3388 case SIOCGIFMETRIC:
3389 case SIOCSIFMETRIC:
3390 case SIOCGIFMTU:
3391 case SIOCSIFMTU:
3392 case SIOCGIFMEM:
3393 case SIOCSIFMEM:
3394 case SIOCGIFHWADDR:
3395 case SIOCSIFHWADDR:
3396 case SIOCADDMULTI:
3397 case SIOCDELMULTI:
3398 case SIOCGIFINDEX:
6b96018b
AB
3399 case SIOCGIFADDR:
3400 case SIOCSIFADDR:
3401 case SIOCSIFHWBROADCAST:
6b96018b 3402 case SIOCDIFADDR:
6b96018b
AB
3403 case SIOCGIFBRDADDR:
3404 case SIOCSIFBRDADDR:
3405 case SIOCGIFDSTADDR:
3406 case SIOCSIFDSTADDR:
3407 case SIOCGIFNETMASK:
3408 case SIOCSIFNETMASK:
3409 case SIOCSIFPFLAGS:
3410 case SIOCGIFPFLAGS:
3411 case SIOCGIFTXQLEN:
3412 case SIOCSIFTXQLEN:
3413 case SIOCBRADDIF:
3414 case SIOCBRDELIF:
c6c9fee3 3415 case SIOCGIFNAME:
9177efd3
AB
3416 case SIOCSIFNAME:
3417 case SIOCGMIIPHY:
3418 case SIOCGMIIREG:
3419 case SIOCSMIIREG:
f92d4fc9
AV
3420 case SIOCBONDENSLAVE:
3421 case SIOCBONDRELEASE:
3422 case SIOCBONDSETHWADDR:
3423 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3424 return compat_ifreq_ioctl(net, sock, cmd, argp);
3425
6b96018b
AB
3426 case SIOCSARP:
3427 case SIOCGARP:
3428 case SIOCDARP:
6b96018b 3429 case SIOCATMARK:
63ff03ab 3430 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3431 }
3432
6b96018b
AB
3433 return -ENOIOCTLCMD;
3434}
7a229387 3435
95c96174 3436static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3437 unsigned long arg)
89bbfc95
SP
3438{
3439 struct socket *sock = file->private_data;
3440 int ret = -ENOIOCTLCMD;
87de87d5
DM
3441 struct sock *sk;
3442 struct net *net;
3443
3444 sk = sock->sk;
3445 net = sock_net(sk);
89bbfc95
SP
3446
3447 if (sock->ops->compat_ioctl)
3448 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3449
87de87d5
DM
3450 if (ret == -ENOIOCTLCMD &&
3451 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3452 ret = compat_wext_handle_ioctl(net, cmd, arg);
3453
6b96018b
AB
3454 if (ret == -ENOIOCTLCMD)
3455 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3456
89bbfc95
SP
3457 return ret;
3458}
3459#endif
3460
8a3c245c
PT
3461/**
3462 * kernel_bind - bind an address to a socket (kernel space)
3463 * @sock: socket
3464 * @addr: address
3465 * @addrlen: length of address
3466 *
3467 * Returns 0 or an error.
3468 */
3469
ac5a488e
SS
3470int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3471{
3472 return sock->ops->bind(sock, addr, addrlen);
3473}
c6d409cf 3474EXPORT_SYMBOL(kernel_bind);
ac5a488e 3475
8a3c245c
PT
3476/**
3477 * kernel_listen - move socket to listening state (kernel space)
3478 * @sock: socket
3479 * @backlog: pending connections queue size
3480 *
3481 * Returns 0 or an error.
3482 */
3483
ac5a488e
SS
3484int kernel_listen(struct socket *sock, int backlog)
3485{
3486 return sock->ops->listen(sock, backlog);
3487}
c6d409cf 3488EXPORT_SYMBOL(kernel_listen);
ac5a488e 3489
8a3c245c
PT
3490/**
3491 * kernel_accept - accept a connection (kernel space)
3492 * @sock: listening socket
3493 * @newsock: new connected socket
3494 * @flags: flags
3495 *
3496 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3497 * If it fails, @newsock is guaranteed to be %NULL.
3498 * Returns 0 or an error.
3499 */
3500
ac5a488e
SS
3501int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3502{
3503 struct sock *sk = sock->sk;
3504 int err;
3505
3506 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3507 newsock);
3508 if (err < 0)
3509 goto done;
3510
cdfbabfb 3511 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3512 if (err < 0) {
3513 sock_release(*newsock);
fa8705b0 3514 *newsock = NULL;
ac5a488e
SS
3515 goto done;
3516 }
3517
3518 (*newsock)->ops = sock->ops;
1b08534e 3519 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3520
3521done:
3522 return err;
3523}
c6d409cf 3524EXPORT_SYMBOL(kernel_accept);
ac5a488e 3525
8a3c245c
PT
3526/**
3527 * kernel_connect - connect a socket (kernel space)
3528 * @sock: socket
3529 * @addr: address
3530 * @addrlen: address length
3531 * @flags: flags (O_NONBLOCK, ...)
3532 *
3533 * For datagram sockets, @addr is the addres to which datagrams are sent
3534 * by default, and the only address from which datagrams are received.
3535 * For stream sockets, attempts to connect to @addr.
3536 * Returns 0 or an error code.
3537 */
3538
ac5a488e 3539int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3540 int flags)
ac5a488e
SS
3541{
3542 return sock->ops->connect(sock, addr, addrlen, flags);
3543}
c6d409cf 3544EXPORT_SYMBOL(kernel_connect);
ac5a488e 3545
8a3c245c
PT
3546/**
3547 * kernel_getsockname - get the address which the socket is bound (kernel space)
3548 * @sock: socket
3549 * @addr: address holder
3550 *
3551 * Fills the @addr pointer with the address which the socket is bound.
3552 * Returns 0 or an error code.
3553 */
3554
9b2c45d4 3555int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3556{
9b2c45d4 3557 return sock->ops->getname(sock, addr, 0);
ac5a488e 3558}
c6d409cf 3559EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3560
8a3c245c
PT
3561/**
3562 * kernel_peername - get the address which the socket is connected (kernel space)
3563 * @sock: socket
3564 * @addr: address holder
3565 *
3566 * Fills the @addr pointer with the address which the socket is connected.
3567 * Returns 0 or an error code.
3568 */
3569
9b2c45d4 3570int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3571{
9b2c45d4 3572 return sock->ops->getname(sock, addr, 1);
ac5a488e 3573}
c6d409cf 3574EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3575
8a3c245c
PT
3576/**
3577 * kernel_getsockopt - get a socket option (kernel space)
3578 * @sock: socket
3579 * @level: API level (SOL_SOCKET, ...)
3580 * @optname: option tag
3581 * @optval: option value
3582 * @optlen: option length
3583 *
3584 * Assigns the option length to @optlen.
3585 * Returns 0 or an error.
3586 */
3587
ac5a488e
SS
3588int kernel_getsockopt(struct socket *sock, int level, int optname,
3589 char *optval, int *optlen)
3590{
3591 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3592 char __user *uoptval;
3593 int __user *uoptlen;
ac5a488e
SS
3594 int err;
3595
fb8621bb
NK
3596 uoptval = (char __user __force *) optval;
3597 uoptlen = (int __user __force *) optlen;
3598
ac5a488e
SS
3599 set_fs(KERNEL_DS);
3600 if (level == SOL_SOCKET)
fb8621bb 3601 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3602 else
fb8621bb
NK
3603 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3604 uoptlen);
ac5a488e
SS
3605 set_fs(oldfs);
3606 return err;
3607}
c6d409cf 3608EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3609
8a3c245c
PT
3610/**
3611 * kernel_setsockopt - set a socket option (kernel space)
3612 * @sock: socket
3613 * @level: API level (SOL_SOCKET, ...)
3614 * @optname: option tag
3615 * @optval: option value
3616 * @optlen: option length
3617 *
3618 * Returns 0 or an error.
3619 */
3620
ac5a488e 3621int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3622 char *optval, unsigned int optlen)
ac5a488e
SS
3623{
3624 mm_segment_t oldfs = get_fs();
fb8621bb 3625 char __user *uoptval;
ac5a488e
SS
3626 int err;
3627
fb8621bb
NK
3628 uoptval = (char __user __force *) optval;
3629
ac5a488e
SS
3630 set_fs(KERNEL_DS);
3631 if (level == SOL_SOCKET)
fb8621bb 3632 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3633 else
fb8621bb 3634 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3635 optlen);
3636 set_fs(oldfs);
3637 return err;
3638}
c6d409cf 3639EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3640
8a3c245c
PT
3641/**
3642 * kernel_sendpage - send a &page through a socket (kernel space)
3643 * @sock: socket
3644 * @page: page
3645 * @offset: page offset
3646 * @size: total size in bytes
3647 * @flags: flags (MSG_DONTWAIT, ...)
3648 *
3649 * Returns the total amount sent in bytes or an error.
3650 */
3651
ac5a488e
SS
3652int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3653 size_t size, int flags)
3654{
3655 if (sock->ops->sendpage)
3656 return sock->ops->sendpage(sock, page, offset, size, flags);
3657
3658 return sock_no_sendpage(sock, page, offset, size, flags);
3659}
c6d409cf 3660EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3661
8a3c245c
PT
3662/**
3663 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3664 * @sk: sock
3665 * @page: page
3666 * @offset: page offset
3667 * @size: total size in bytes
3668 * @flags: flags (MSG_DONTWAIT, ...)
3669 *
3670 * Returns the total amount sent in bytes or an error.
3671 * Caller must hold @sk.
3672 */
3673
306b13eb
TH
3674int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3675 size_t size, int flags)
3676{
3677 struct socket *sock = sk->sk_socket;
3678
3679 if (sock->ops->sendpage_locked)
3680 return sock->ops->sendpage_locked(sk, page, offset, size,
3681 flags);
3682
3683 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3684}
3685EXPORT_SYMBOL(kernel_sendpage_locked);
3686
8a3c245c
PT
3687/**
3688 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3689 * @sock: socket
3690 * @how: connection part
3691 *
3692 * Returns 0 or an error.
3693 */
3694
91cf45f0
TM
3695int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3696{
3697 return sock->ops->shutdown(sock, how);
3698}
91cf45f0 3699EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3700
8a3c245c
PT
3701/**
3702 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3703 * @sk: socket
3704 *
3705 * This routine returns the IP overhead imposed by a socket i.e.
3706 * the length of the underlying IP header, depending on whether
3707 * this is an IPv4 or IPv6 socket and the length from IP options turned
3708 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3709 */
8a3c245c 3710
113c3075
P
3711u32 kernel_sock_ip_overhead(struct sock *sk)
3712{
3713 struct inet_sock *inet;
3714 struct ip_options_rcu *opt;
3715 u32 overhead = 0;
113c3075
P
3716#if IS_ENABLED(CONFIG_IPV6)
3717 struct ipv6_pinfo *np;
3718 struct ipv6_txoptions *optv6 = NULL;
3719#endif /* IS_ENABLED(CONFIG_IPV6) */
3720
3721 if (!sk)
3722 return overhead;
3723
113c3075
P
3724 switch (sk->sk_family) {
3725 case AF_INET:
3726 inet = inet_sk(sk);
3727 overhead += sizeof(struct iphdr);
3728 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3729 sock_owned_by_user(sk));
113c3075
P
3730 if (opt)
3731 overhead += opt->opt.optlen;
3732 return overhead;
3733#if IS_ENABLED(CONFIG_IPV6)
3734 case AF_INET6:
3735 np = inet6_sk(sk);
3736 overhead += sizeof(struct ipv6hdr);
3737 if (np)
3738 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3739 sock_owned_by_user(sk));
113c3075
P
3740 if (optv6)
3741 overhead += (optv6->opt_flen + optv6->opt_nflen);
3742 return overhead;
3743#endif /* IS_ENABLED(CONFIG_IPV6) */
3744 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3745 return overhead;
3746 }
3747}
3748EXPORT_SYMBOL(kernel_sock_ip_overhead);