]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
io_uring: ensure async punted read/write requests copy iovec
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
1da177e4 131
1da177e4
LT
132/*
133 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
134 * in the operation structures but are done directly via the socketcall() multiplexor.
135 */
136
da7071d7 137static const struct file_operations socket_file_ops = {
1da177e4
LT
138 .owner = THIS_MODULE,
139 .llseek = no_llseek,
8ae5e030
AV
140 .read_iter = sock_read_iter,
141 .write_iter = sock_write_iter,
1da177e4
LT
142 .poll = sock_poll,
143 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
144#ifdef CONFIG_COMPAT
145 .compat_ioctl = compat_sock_ioctl,
146#endif
1da177e4 147 .mmap = sock_mmap,
1da177e4
LT
148 .release = sock_close,
149 .fasync = sock_fasync,
5274f052
JA
150 .sendpage = sock_sendpage,
151 .splice_write = generic_splice_sendpage,
9c55e01c 152 .splice_read = sock_splice_read,
1da177e4
LT
153};
154
155/*
156 * The protocol list. Each protocol is registered in here.
157 */
158
1da177e4 159static DEFINE_SPINLOCK(net_family_lock);
190683a9 160static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 161
1da177e4 162/*
89bddce5
SH
163 * Support routines.
164 * Move socket addresses back and forth across the kernel/user
165 * divide and look after the messy bits.
1da177e4
LT
166 */
167
1da177e4
LT
168/**
169 * move_addr_to_kernel - copy a socket address into kernel space
170 * @uaddr: Address in user space
171 * @kaddr: Address in kernel space
172 * @ulen: Length in user space
173 *
174 * The address is copied into kernel space. If the provided address is
175 * too long an error code of -EINVAL is returned. If the copy gives
176 * invalid addresses -EFAULT is returned. On a success 0 is returned.
177 */
178
43db362d 179int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 180{
230b1839 181 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 182 return -EINVAL;
89bddce5 183 if (ulen == 0)
1da177e4 184 return 0;
89bddce5 185 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 186 return -EFAULT;
3ec3b2fb 187 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
188}
189
190/**
191 * move_addr_to_user - copy an address to user space
192 * @kaddr: kernel space address
193 * @klen: length of address in kernel
194 * @uaddr: user space address
195 * @ulen: pointer to user length field
196 *
197 * The value pointed to by ulen on entry is the buffer length available.
198 * This is overwritten with the buffer space used. -EINVAL is returned
199 * if an overlong buffer is specified or a negative buffer size. -EFAULT
200 * is returned if either the buffer or the length field are not
201 * accessible.
202 * After copying the data up to the limit the user specifies, the true
203 * length of the data is written over the length limit the user
204 * specified. Zero is returned for a success.
205 */
89bddce5 206
43db362d 207static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 208 void __user *uaddr, int __user *ulen)
1da177e4
LT
209{
210 int err;
211 int len;
212
68c6beb3 213 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
214 err = get_user(len, ulen);
215 if (err)
1da177e4 216 return err;
89bddce5
SH
217 if (len > klen)
218 len = klen;
68c6beb3 219 if (len < 0)
1da177e4 220 return -EINVAL;
89bddce5 221 if (len) {
d6fe3945
SG
222 if (audit_sockaddr(klen, kaddr))
223 return -ENOMEM;
89bddce5 224 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
225 return -EFAULT;
226 }
227 /*
89bddce5
SH
228 * "fromlen shall refer to the value before truncation.."
229 * 1003.1g
1da177e4
LT
230 */
231 return __put_user(klen, ulen);
232}
233
08009a76 234static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
333f7909
AV
243 init_waitqueue_head(&ei->socket.wq.wait);
244 ei->socket.wq.fasync_list = NULL;
245 ei->socket.wq.flags = 0;
89bddce5 246
1da177e4
LT
247 ei->socket.state = SS_UNCONNECTED;
248 ei->socket.flags = 0;
249 ei->socket.ops = NULL;
250 ei->socket.sk = NULL;
251 ei->socket.file = NULL;
1da177e4
LT
252
253 return &ei->vfs_inode;
254}
255
6d7855c5 256static void sock_free_inode(struct inode *inode)
1da177e4 257{
43815482
ED
258 struct socket_alloc *ei;
259
260 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 261 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
262}
263
51cc5068 264static void init_once(void *foo)
1da177e4 265{
89bddce5 266 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 267
a35afb83 268 inode_init_once(&ei->vfs_inode);
1da177e4 269}
89bddce5 270
1e911632 271static void init_inodecache(void)
1da177e4
LT
272{
273 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
274 sizeof(struct socket_alloc),
275 0,
276 (SLAB_HWCACHE_ALIGN |
277 SLAB_RECLAIM_ACCOUNT |
5d097056 278 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 279 init_once);
1e911632 280 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
281}
282
b87221de 283static const struct super_operations sockfs_ops = {
c6d409cf 284 .alloc_inode = sock_alloc_inode,
6d7855c5 285 .free_inode = sock_free_inode,
c6d409cf 286 .statfs = simple_statfs,
1da177e4
LT
287};
288
c23fbb6b
ED
289/*
290 * sockfs_dname() is called from d_path().
291 */
292static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
293{
294 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 295 d_inode(dentry)->i_ino);
c23fbb6b
ED
296}
297
3ba13d17 298static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 299 .d_dname = sockfs_dname,
1da177e4
LT
300};
301
bba0bd31
AG
302static int sockfs_xattr_get(const struct xattr_handler *handler,
303 struct dentry *dentry, struct inode *inode,
304 const char *suffix, void *value, size_t size)
305{
306 if (value) {
307 if (dentry->d_name.len + 1 > size)
308 return -ERANGE;
309 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
310 }
311 return dentry->d_name.len + 1;
312}
313
314#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
315#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
316#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
317
318static const struct xattr_handler sockfs_xattr_handler = {
319 .name = XATTR_NAME_SOCKPROTONAME,
320 .get = sockfs_xattr_get,
321};
322
4a590153
AG
323static int sockfs_security_xattr_set(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, const void *value,
326 size_t size, int flags)
327{
328 /* Handled by LSM. */
329 return -EAGAIN;
330}
331
332static const struct xattr_handler sockfs_security_xattr_handler = {
333 .prefix = XATTR_SECURITY_PREFIX,
334 .set = sockfs_security_xattr_set,
335};
336
bba0bd31
AG
337static const struct xattr_handler *sockfs_xattr_handlers[] = {
338 &sockfs_xattr_handler,
4a590153 339 &sockfs_security_xattr_handler,
bba0bd31
AG
340 NULL
341};
342
fba9be49 343static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 344{
fba9be49
DH
345 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
346 if (!ctx)
347 return -ENOMEM;
348 ctx->ops = &sockfs_ops;
349 ctx->dops = &sockfs_dentry_operations;
350 ctx->xattr = sockfs_xattr_handlers;
351 return 0;
c74a1cbb
AV
352}
353
354static struct vfsmount *sock_mnt __read_mostly;
355
356static struct file_system_type sock_fs_type = {
357 .name = "sockfs",
fba9be49 358 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
359 .kill_sb = kill_anon_super,
360};
361
1da177e4
LT
362/*
363 * Obtains the first available file descriptor and sets it up for use.
364 *
39d8c1b6
DM
365 * These functions create file structures and maps them to fd space
366 * of the current process. On success it returns file descriptor
1da177e4
LT
367 * and file struct implicitly stored in sock->file.
368 * Note that another thread may close file descriptor before we return
369 * from this function. We use the fact that now we do not refer
370 * to socket after mapping. If one day we will need it, this
371 * function will increment ref. count on file by 1.
372 *
373 * In any case returned fd MAY BE not valid!
374 * This race condition is unavoidable
375 * with shared fd spaces, we cannot solve it inside kernel,
376 * but we take care of internal coherence yet.
377 */
378
8a3c245c
PT
379/**
380 * sock_alloc_file - Bind a &socket to a &file
381 * @sock: socket
382 * @flags: file status flags
383 * @dname: protocol name
384 *
385 * Returns the &file bound with @sock, implicitly storing it
386 * in sock->file. If dname is %NULL, sets to "".
387 * On failure the return is a ERR pointer (see linux/err.h).
388 * This function uses GFP_KERNEL internally.
389 */
390
aab174f0 391struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 392{
7cbe66b6 393 struct file *file;
1da177e4 394
d93aa9d8
AV
395 if (!dname)
396 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 397
d93aa9d8
AV
398 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
399 O_RDWR | (flags & O_NONBLOCK),
400 &socket_file_ops);
b5ffe634 401 if (IS_ERR(file)) {
8e1611e2 402 sock_release(sock);
39b65252 403 return file;
cc3808f8
AV
404 }
405
406 sock->file = file;
39d8c1b6 407 file->private_data = sock;
d8e464ec 408 stream_open(SOCK_INODE(sock), file);
28407630 409 return file;
39d8c1b6 410}
56b31d1c 411EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 412
56b31d1c 413static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
414{
415 struct file *newfile;
28407630 416 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
417 if (unlikely(fd < 0)) {
418 sock_release(sock);
28407630 419 return fd;
ce4bb04c 420 }
39d8c1b6 421
aab174f0 422 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 423 if (!IS_ERR(newfile)) {
39d8c1b6 424 fd_install(fd, newfile);
28407630
AV
425 return fd;
426 }
7cbe66b6 427
28407630
AV
428 put_unused_fd(fd);
429 return PTR_ERR(newfile);
1da177e4
LT
430}
431
8a3c245c
PT
432/**
433 * sock_from_file - Return the &socket bounded to @file.
434 * @file: file
435 * @err: pointer to an error code return
436 *
437 * On failure returns %NULL and assigns -ENOTSOCK to @err.
438 */
439
406a3c63 440struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 441{
6cb153ca
BL
442 if (file->f_op == &socket_file_ops)
443 return file->private_data; /* set in sock_map_fd */
444
23bb80d2
ED
445 *err = -ENOTSOCK;
446 return NULL;
6cb153ca 447}
406a3c63 448EXPORT_SYMBOL(sock_from_file);
6cb153ca 449
1da177e4 450/**
c6d409cf 451 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
452 * @fd: file handle
453 * @err: pointer to an error code return
454 *
455 * The file handle passed in is locked and the socket it is bound
241c4667 456 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
457 * with a negative errno code and NULL is returned. The function checks
458 * for both invalid handles and passing a handle which is not a socket.
459 *
460 * On a success the socket object pointer is returned.
461 */
462
463struct socket *sockfd_lookup(int fd, int *err)
464{
465 struct file *file;
1da177e4
LT
466 struct socket *sock;
467
89bddce5
SH
468 file = fget(fd);
469 if (!file) {
1da177e4
LT
470 *err = -EBADF;
471 return NULL;
472 }
89bddce5 473
6cb153ca
BL
474 sock = sock_from_file(file, err);
475 if (!sock)
1da177e4 476 fput(file);
6cb153ca
BL
477 return sock;
478}
c6d409cf 479EXPORT_SYMBOL(sockfd_lookup);
1da177e4 480
6cb153ca
BL
481static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
482{
00e188ef 483 struct fd f = fdget(fd);
6cb153ca
BL
484 struct socket *sock;
485
3672558c 486 *err = -EBADF;
00e188ef
AV
487 if (f.file) {
488 sock = sock_from_file(f.file, err);
489 if (likely(sock)) {
490 *fput_needed = f.flags;
6cb153ca 491 return sock;
00e188ef
AV
492 }
493 fdput(f);
1da177e4 494 }
6cb153ca 495 return NULL;
1da177e4
LT
496}
497
600e1779
MY
498static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
499 size_t size)
500{
501 ssize_t len;
502 ssize_t used = 0;
503
c5ef6035 504 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
505 if (len < 0)
506 return len;
507 used += len;
508 if (buffer) {
509 if (size < used)
510 return -ERANGE;
511 buffer += len;
512 }
513
514 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
520 buffer += len;
521 }
522
523 return used;
524}
525
dc647ec8 526static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
527{
528 int err = simple_setattr(dentry, iattr);
529
e1a3a60a 530 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
531 struct socket *sock = SOCKET_I(d_inode(dentry));
532
6d8c50dc
CW
533 if (sock->sk)
534 sock->sk->sk_uid = iattr->ia_uid;
535 else
536 err = -ENOENT;
86741ec2
LC
537 }
538
539 return err;
540}
541
600e1779 542static const struct inode_operations sockfs_inode_ops = {
600e1779 543 .listxattr = sockfs_listxattr,
86741ec2 544 .setattr = sockfs_setattr,
600e1779
MY
545};
546
1da177e4 547/**
8a3c245c 548 * sock_alloc - allocate a socket
89bddce5 549 *
1da177e4
LT
550 * Allocate a new inode and socket object. The two are bound together
551 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 552 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
553 */
554
f4a00aac 555struct socket *sock_alloc(void)
1da177e4 556{
89bddce5
SH
557 struct inode *inode;
558 struct socket *sock;
1da177e4 559
a209dfc7 560 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
561 if (!inode)
562 return NULL;
563
564 sock = SOCKET_I(inode);
565
85fe4025 566 inode->i_ino = get_next_ino();
89bddce5 567 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
568 inode->i_uid = current_fsuid();
569 inode->i_gid = current_fsgid();
600e1779 570 inode->i_op = &sockfs_inode_ops;
1da177e4 571
1da177e4
LT
572 return sock;
573}
f4a00aac 574EXPORT_SYMBOL(sock_alloc);
1da177e4 575
1da177e4 576/**
8a3c245c 577 * sock_release - close a socket
1da177e4
LT
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
6d8c50dc 585static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
6d8c50dc
CW
590 if (inode)
591 inode_lock(inode);
1da177e4 592 sock->ops->release(sock);
ff7b11aa 593 sock->sk = NULL;
6d8c50dc
CW
594 if (inode)
595 inode_unlock(inode);
1da177e4
LT
596 sock->ops = NULL;
597 module_put(owner);
598 }
599
333f7909 600 if (sock->wq.fasync_list)
3410f22e 601 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 602
1da177e4
LT
603 if (!sock->file) {
604 iput(SOCK_INODE(sock));
605 return;
606 }
89bddce5 607 sock->file = NULL;
1da177e4 608}
6d8c50dc
CW
609
610void sock_release(struct socket *sock)
611{
612 __sock_release(sock, NULL);
613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
8c3c447b
PA
633INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
634 size_t));
a648a592
PA
635INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
636 size_t));
d8725c86 637static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 638{
a648a592
PA
639 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
640 inet_sendmsg, sock, msg,
641 msg_data_left(msg));
d8725c86
AV
642 BUG_ON(ret == -EIOCBQUEUED);
643 return ret;
1da177e4
LT
644}
645
85806af0
RD
646/**
647 * sock_sendmsg - send a message through @sock
648 * @sock: socket
649 * @msg: message to send
650 *
651 * Sends @msg through @sock, passing through LSM.
652 * Returns the number of bytes sent, or an error code.
653 */
d8725c86 654int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 655{
d8725c86 656 int err = security_socket_sendmsg(sock, msg,
01e97e65 657 msg_data_left(msg));
228e548e 658
d8725c86 659 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 660}
c6d409cf 661EXPORT_SYMBOL(sock_sendmsg);
1da177e4 662
8a3c245c
PT
663/**
664 * kernel_sendmsg - send a message through @sock (kernel-space)
665 * @sock: socket
666 * @msg: message header
667 * @vec: kernel vec
668 * @num: vec array length
669 * @size: total message data size
670 *
671 * Builds the message data with @vec and sends it through @sock.
672 * Returns the number of bytes sent, or an error code.
673 */
674
1da177e4
LT
675int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
676 struct kvec *vec, size_t num, size_t size)
677{
aa563d7b 678 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 679 return sock_sendmsg(sock, msg);
1da177e4 680}
c6d409cf 681EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 682
8a3c245c
PT
683/**
684 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
685 * @sk: sock
686 * @msg: message header
687 * @vec: output s/g array
688 * @num: output s/g array length
689 * @size: total message data size
690 *
691 * Builds the message data with @vec and sends it through @sock.
692 * Returns the number of bytes sent, or an error code.
693 * Caller must hold @sk.
694 */
695
306b13eb
TH
696int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
697 struct kvec *vec, size_t num, size_t size)
698{
699 struct socket *sock = sk->sk_socket;
700
701 if (!sock->ops->sendmsg_locked)
db5980d8 702 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 703
aa563d7b 704 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
705
706 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
707}
708EXPORT_SYMBOL(kernel_sendmsg_locked);
709
8605330a
SHY
710static bool skb_is_err_queue(const struct sk_buff *skb)
711{
712 /* pkt_type of skbs enqueued on the error queue are set to
713 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
714 * in recvmsg, since skbs received on a local socket will never
715 * have a pkt_type of PACKET_OUTGOING.
716 */
717 return skb->pkt_type == PACKET_OUTGOING;
718}
719
b50a5c70
ML
720/* On transmit, software and hardware timestamps are returned independently.
721 * As the two skb clones share the hardware timestamp, which may be updated
722 * before the software timestamp is received, a hardware TX timestamp may be
723 * returned only if there is no software TX timestamp. Ignore false software
724 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 725 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
726 * hardware timestamp.
727 */
728static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
729{
730 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
731}
732
aad9c8c4
ML
733static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
734{
735 struct scm_ts_pktinfo ts_pktinfo;
736 struct net_device *orig_dev;
737
738 if (!skb_mac_header_was_set(skb))
739 return;
740
741 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
742
743 rcu_read_lock();
744 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
745 if (orig_dev)
746 ts_pktinfo.if_index = orig_dev->ifindex;
747 rcu_read_unlock();
748
749 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
750 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
751 sizeof(ts_pktinfo), &ts_pktinfo);
752}
753
92f37fd2
ED
754/*
755 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
756 */
757void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
758 struct sk_buff *skb)
759{
20d49473 760 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 761 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
762 struct scm_timestamping_internal tss;
763
b50a5c70 764 int empty = 1, false_tstamp = 0;
20d49473
PO
765 struct skb_shared_hwtstamps *shhwtstamps =
766 skb_hwtstamps(skb);
767
768 /* Race occurred between timestamp enabling and packet
769 receiving. Fill in the current time for now. */
b50a5c70 770 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 771 __net_timestamp(skb);
b50a5c70
ML
772 false_tstamp = 1;
773 }
20d49473
PO
774
775 if (need_software_tstamp) {
776 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
777 if (new_tstamp) {
778 struct __kernel_sock_timeval tv;
779
780 skb_get_new_timestamp(skb, &tv);
781 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
782 sizeof(tv), &tv);
783 } else {
784 struct __kernel_old_timeval tv;
785
786 skb_get_timestamp(skb, &tv);
787 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
788 sizeof(tv), &tv);
789 }
20d49473 790 } else {
887feae3
DD
791 if (new_tstamp) {
792 struct __kernel_timespec ts;
793
794 skb_get_new_timestampns(skb, &ts);
795 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
796 sizeof(ts), &ts);
797 } else {
df1b4ba9 798 struct __kernel_old_timespec ts;
887feae3
DD
799
800 skb_get_timestampns(skb, &ts);
801 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
802 sizeof(ts), &ts);
803 }
20d49473
PO
804 }
805 }
806
f24b9be5 807 memset(&tss, 0, sizeof(tss));
c199105d 808 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 809 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 810 empty = 0;
4d276eb6 811 if (shhwtstamps &&
b9f40e21 812 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 813 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 814 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 815 empty = 0;
aad9c8c4
ML
816 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
817 !skb_is_err_queue(skb))
818 put_ts_pktinfo(msg, skb);
819 }
1c885808 820 if (!empty) {
9718475e
DD
821 if (sock_flag(sk, SOCK_TSTAMP_NEW))
822 put_cmsg_scm_timestamping64(msg, &tss);
823 else
824 put_cmsg_scm_timestamping(msg, &tss);
1c885808 825
8605330a 826 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 827 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
828 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
829 skb->len, skb->data);
830 }
92f37fd2 831}
7c81fd8b
ACM
832EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
833
6e3e939f
JB
834void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
835 struct sk_buff *skb)
836{
837 int ack;
838
839 if (!sock_flag(sk, SOCK_WIFI_STATUS))
840 return;
841 if (!skb->wifi_acked_valid)
842 return;
843
844 ack = skb->wifi_acked;
845
846 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
847}
848EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
849
11165f14 850static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
851 struct sk_buff *skb)
3b885787 852{
744d5a3e 853 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 854 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 855 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
856}
857
767dd033 858void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
859 struct sk_buff *skb)
860{
861 sock_recv_timestamp(msg, sk, skb);
862 sock_recv_drops(msg, sk, skb);
863}
767dd033 864EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 865
8c3c447b 866INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
867 size_t, int));
868INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
869 size_t, int));
1b784140 870static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 871 int flags)
1da177e4 872{
a648a592
PA
873 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
874 inet_recvmsg, sock, msg, msg_data_left(msg),
875 flags);
1da177e4
LT
876}
877
85806af0
RD
878/**
879 * sock_recvmsg - receive a message from @sock
880 * @sock: socket
881 * @msg: message to receive
882 * @flags: message flags
883 *
884 * Receives @msg from @sock, passing through LSM. Returns the total number
885 * of bytes received, or an error.
886 */
2da62906 887int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 888{
2da62906 889 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 890
2da62906 891 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 892}
c6d409cf 893EXPORT_SYMBOL(sock_recvmsg);
1da177e4 894
c1249c0a 895/**
8a3c245c
PT
896 * kernel_recvmsg - Receive a message from a socket (kernel space)
897 * @sock: The socket to receive the message from
898 * @msg: Received message
899 * @vec: Input s/g array for message data
900 * @num: Size of input s/g array
901 * @size: Number of bytes to read
902 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 903 *
8a3c245c
PT
904 * On return the msg structure contains the scatter/gather array passed in the
905 * vec argument. The array is modified so that it consists of the unfilled
906 * portion of the original array.
c1249c0a 907 *
8a3c245c 908 * The returned value is the total number of bytes received, or an error.
c1249c0a 909 */
8a3c245c 910
89bddce5
SH
911int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
912 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
913{
914 mm_segment_t oldfs = get_fs();
915 int result;
916
aa563d7b 917 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 918 set_fs(KERNEL_DS);
2da62906 919 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
920 set_fs(oldfs);
921 return result;
922}
c6d409cf 923EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 924
ce1d4d3e
CH
925static ssize_t sock_sendpage(struct file *file, struct page *page,
926 int offset, size_t size, loff_t *ppos, int more)
1da177e4 927{
1da177e4
LT
928 struct socket *sock;
929 int flags;
930
ce1d4d3e
CH
931 sock = file->private_data;
932
35f9c09f
ED
933 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
934 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
935 flags |= more;
ce1d4d3e 936
e6949583 937 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 938}
1da177e4 939
9c55e01c 940static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 941 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
942 unsigned int flags)
943{
944 struct socket *sock = file->private_data;
945
997b37da 946 if (unlikely(!sock->ops->splice_read))
95506588 947 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 948
9c55e01c
JA
949 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
950}
951
8ae5e030 952static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 953{
6d652330
AV
954 struct file *file = iocb->ki_filp;
955 struct socket *sock = file->private_data;
0345f931 956 struct msghdr msg = {.msg_iter = *to,
957 .msg_iocb = iocb};
8ae5e030 958 ssize_t res;
ce1d4d3e 959
8ae5e030
AV
960 if (file->f_flags & O_NONBLOCK)
961 msg.msg_flags = MSG_DONTWAIT;
962
963 if (iocb->ki_pos != 0)
1da177e4 964 return -ESPIPE;
027445c3 965
66ee59af 966 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
967 return 0;
968
2da62906 969 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
970 *to = msg.msg_iter;
971 return res;
1da177e4
LT
972}
973
8ae5e030 974static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 975{
6d652330
AV
976 struct file *file = iocb->ki_filp;
977 struct socket *sock = file->private_data;
0345f931 978 struct msghdr msg = {.msg_iter = *from,
979 .msg_iocb = iocb};
8ae5e030 980 ssize_t res;
1da177e4 981
8ae5e030 982 if (iocb->ki_pos != 0)
ce1d4d3e 983 return -ESPIPE;
027445c3 984
8ae5e030
AV
985 if (file->f_flags & O_NONBLOCK)
986 msg.msg_flags = MSG_DONTWAIT;
987
6d652330
AV
988 if (sock->type == SOCK_SEQPACKET)
989 msg.msg_flags |= MSG_EOR;
990
d8725c86 991 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
992 *from = msg.msg_iter;
993 return res;
1da177e4
LT
994}
995
1da177e4
LT
996/*
997 * Atomic setting of ioctl hooks to avoid race
998 * with module unload.
999 */
1000
4a3e2f71 1001static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1002static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1003
881d966b 1004void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1005{
4a3e2f71 1006 mutex_lock(&br_ioctl_mutex);
1da177e4 1007 br_ioctl_hook = hook;
4a3e2f71 1008 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1009}
1010EXPORT_SYMBOL(brioctl_set);
1011
4a3e2f71 1012static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1013static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1014
881d966b 1015void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1016{
4a3e2f71 1017 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1018 vlan_ioctl_hook = hook;
4a3e2f71 1019 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1020}
1021EXPORT_SYMBOL(vlan_ioctl_set);
1022
4a3e2f71 1023static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1024static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1025
89bddce5 1026void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1027{
4a3e2f71 1028 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1029 dlci_ioctl_hook = hook;
4a3e2f71 1030 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1031}
1032EXPORT_SYMBOL(dlci_ioctl_set);
1033
6b96018b 1034static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1035 unsigned int cmd, unsigned long arg)
6b96018b
AB
1036{
1037 int err;
1038 void __user *argp = (void __user *)arg;
1039
1040 err = sock->ops->ioctl(sock, cmd, arg);
1041
1042 /*
1043 * If this ioctl is unknown try to hand it down
1044 * to the NIC driver.
1045 */
36fd633e
AV
1046 if (err != -ENOIOCTLCMD)
1047 return err;
6b96018b 1048
36fd633e
AV
1049 if (cmd == SIOCGIFCONF) {
1050 struct ifconf ifc;
1051 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1052 return -EFAULT;
1053 rtnl_lock();
1054 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1055 rtnl_unlock();
1056 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1057 err = -EFAULT;
44c02a2c
AV
1058 } else {
1059 struct ifreq ifr;
1060 bool need_copyout;
63ff03ab 1061 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1062 return -EFAULT;
1063 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1064 if (!err && need_copyout)
63ff03ab 1065 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1066 return -EFAULT;
36fd633e 1067 }
6b96018b
AB
1068 return err;
1069}
1070
1da177e4
LT
1071/*
1072 * With an ioctl, arg may well be a user mode pointer, but we don't know
1073 * what to do with it - that's up to the protocol still.
1074 */
1075
8a3c245c
PT
1076/**
1077 * get_net_ns - increment the refcount of the network namespace
1078 * @ns: common namespace (net)
1079 *
1080 * Returns the net's common namespace.
1081 */
1082
d8d211a2 1083struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1084{
1085 return &get_net(container_of(ns, struct net, ns))->ns;
1086}
d8d211a2 1087EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1088
1da177e4
LT
1089static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1090{
1091 struct socket *sock;
881d966b 1092 struct sock *sk;
1da177e4
LT
1093 void __user *argp = (void __user *)arg;
1094 int pid, err;
881d966b 1095 struct net *net;
1da177e4 1096
b69aee04 1097 sock = file->private_data;
881d966b 1098 sk = sock->sk;
3b1e0a65 1099 net = sock_net(sk);
44c02a2c
AV
1100 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1101 struct ifreq ifr;
1102 bool need_copyout;
1103 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1104 return -EFAULT;
1105 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1106 if (!err && need_copyout)
1107 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1108 return -EFAULT;
1da177e4 1109 } else
3d23e349 1110#ifdef CONFIG_WEXT_CORE
1da177e4 1111 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1112 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1113 } else
3d23e349 1114#endif
89bddce5 1115 switch (cmd) {
1da177e4
LT
1116 case FIOSETOWN:
1117 case SIOCSPGRP:
1118 err = -EFAULT;
1119 if (get_user(pid, (int __user *)argp))
1120 break;
393cc3f5 1121 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1122 break;
1123 case FIOGETOWN:
1124 case SIOCGPGRP:
609d7fa9 1125 err = put_user(f_getown(sock->file),
89bddce5 1126 (int __user *)argp);
1da177e4
LT
1127 break;
1128 case SIOCGIFBR:
1129 case SIOCSIFBR:
1130 case SIOCBRADDBR:
1131 case SIOCBRDELBR:
1132 err = -ENOPKG;
1133 if (!br_ioctl_hook)
1134 request_module("bridge");
1135
4a3e2f71 1136 mutex_lock(&br_ioctl_mutex);
89bddce5 1137 if (br_ioctl_hook)
881d966b 1138 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1139 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1140 break;
1141 case SIOCGIFVLAN:
1142 case SIOCSIFVLAN:
1143 err = -ENOPKG;
1144 if (!vlan_ioctl_hook)
1145 request_module("8021q");
1146
4a3e2f71 1147 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1148 if (vlan_ioctl_hook)
881d966b 1149 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1150 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1151 break;
1da177e4
LT
1152 case SIOCADDDLCI:
1153 case SIOCDELDLCI:
1154 err = -ENOPKG;
1155 if (!dlci_ioctl_hook)
1156 request_module("dlci");
1157
7512cbf6
PE
1158 mutex_lock(&dlci_ioctl_mutex);
1159 if (dlci_ioctl_hook)
1da177e4 1160 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1161 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1162 break;
c62cce2c
AV
1163 case SIOCGSKNS:
1164 err = -EPERM;
1165 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1166 break;
1167
1168 err = open_related_ns(&net->ns, get_net_ns);
1169 break;
0768e170
AB
1170 case SIOCGSTAMP_OLD:
1171 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1172 if (!sock->ops->gettstamp) {
1173 err = -ENOIOCTLCMD;
1174 break;
1175 }
1176 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1177 cmd == SIOCGSTAMP_OLD,
1178 !IS_ENABLED(CONFIG_64BIT));
60747828 1179 break;
0768e170
AB
1180 case SIOCGSTAMP_NEW:
1181 case SIOCGSTAMPNS_NEW:
1182 if (!sock->ops->gettstamp) {
1183 err = -ENOIOCTLCMD;
1184 break;
1185 }
1186 err = sock->ops->gettstamp(sock, argp,
1187 cmd == SIOCGSTAMP_NEW,
1188 false);
c7cbdbf2 1189 break;
1da177e4 1190 default:
63ff03ab 1191 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1192 break;
89bddce5 1193 }
1da177e4
LT
1194 return err;
1195}
1196
8a3c245c
PT
1197/**
1198 * sock_create_lite - creates a socket
1199 * @family: protocol family (AF_INET, ...)
1200 * @type: communication type (SOCK_STREAM, ...)
1201 * @protocol: protocol (0, ...)
1202 * @res: new socket
1203 *
1204 * Creates a new socket and assigns it to @res, passing through LSM.
1205 * The new socket initialization is not complete, see kernel_accept().
1206 * Returns 0 or an error. On failure @res is set to %NULL.
1207 * This function internally uses GFP_KERNEL.
1208 */
1209
1da177e4
LT
1210int sock_create_lite(int family, int type, int protocol, struct socket **res)
1211{
1212 int err;
1213 struct socket *sock = NULL;
89bddce5 1214
1da177e4
LT
1215 err = security_socket_create(family, type, protocol, 1);
1216 if (err)
1217 goto out;
1218
1219 sock = sock_alloc();
1220 if (!sock) {
1221 err = -ENOMEM;
1222 goto out;
1223 }
1224
1da177e4 1225 sock->type = type;
7420ed23
VY
1226 err = security_socket_post_create(sock, family, type, protocol, 1);
1227 if (err)
1228 goto out_release;
1229
1da177e4
LT
1230out:
1231 *res = sock;
1232 return err;
7420ed23
VY
1233out_release:
1234 sock_release(sock);
1235 sock = NULL;
1236 goto out;
1da177e4 1237}
c6d409cf 1238EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1239
1240/* No kernel lock held - perfect */
ade994f4 1241static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1242{
3cafb376 1243 struct socket *sock = file->private_data;
a331de3b 1244 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1245
e88958e6
CH
1246 if (!sock->ops->poll)
1247 return 0;
f641f13b 1248
a331de3b
CH
1249 if (sk_can_busy_loop(sock->sk)) {
1250 /* poll once if requested by the syscall */
1251 if (events & POLL_BUSY_LOOP)
1252 sk_busy_loop(sock->sk, 1);
1253
1254 /* if this socket can poll_ll, tell the system call */
1255 flag = POLL_BUSY_LOOP;
1256 }
1257
1258 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1259}
1260
89bddce5 1261static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1262{
b69aee04 1263 struct socket *sock = file->private_data;
1da177e4
LT
1264
1265 return sock->ops->mmap(file, sock, vma);
1266}
1267
20380731 1268static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1269{
6d8c50dc 1270 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1271 return 0;
1272}
1273
1274/*
1275 * Update the socket async list
1276 *
1277 * Fasync_list locking strategy.
1278 *
1279 * 1. fasync_list is modified only under process context socket lock
1280 * i.e. under semaphore.
1281 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1282 * or under socket lock
1da177e4
LT
1283 */
1284
1285static int sock_fasync(int fd, struct file *filp, int on)
1286{
989a2979
ED
1287 struct socket *sock = filp->private_data;
1288 struct sock *sk = sock->sk;
333f7909 1289 struct socket_wq *wq = &sock->wq;
1da177e4 1290
989a2979 1291 if (sk == NULL)
1da177e4 1292 return -EINVAL;
1da177e4
LT
1293
1294 lock_sock(sk);
eaefd110 1295 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1296
eaefd110 1297 if (!wq->fasync_list)
989a2979
ED
1298 sock_reset_flag(sk, SOCK_FASYNC);
1299 else
bcdce719 1300 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1301
989a2979 1302 release_sock(sk);
1da177e4
LT
1303 return 0;
1304}
1305
ceb5d58b 1306/* This function may be called only under rcu_lock */
1da177e4 1307
ceb5d58b 1308int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1309{
ceb5d58b 1310 if (!wq || !wq->fasync_list)
1da177e4 1311 return -1;
ceb5d58b 1312
89bddce5 1313 switch (how) {
8d8ad9d7 1314 case SOCK_WAKE_WAITD:
ceb5d58b 1315 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1316 break;
1317 goto call_kill;
8d8ad9d7 1318 case SOCK_WAKE_SPACE:
ceb5d58b 1319 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1320 break;
1321 /* fall through */
8d8ad9d7 1322 case SOCK_WAKE_IO:
89bddce5 1323call_kill:
43815482 1324 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1325 break;
8d8ad9d7 1326 case SOCK_WAKE_URG:
43815482 1327 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1328 }
ceb5d58b 1329
1da177e4
LT
1330 return 0;
1331}
c6d409cf 1332EXPORT_SYMBOL(sock_wake_async);
1da177e4 1333
8a3c245c
PT
1334/**
1335 * __sock_create - creates a socket
1336 * @net: net namespace
1337 * @family: protocol family (AF_INET, ...)
1338 * @type: communication type (SOCK_STREAM, ...)
1339 * @protocol: protocol (0, ...)
1340 * @res: new socket
1341 * @kern: boolean for kernel space sockets
1342 *
1343 * Creates a new socket and assigns it to @res, passing through LSM.
1344 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1345 * be set to true if the socket resides in kernel space.
1346 * This function internally uses GFP_KERNEL.
1347 */
1348
721db93a 1349int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1350 struct socket **res, int kern)
1da177e4
LT
1351{
1352 int err;
1353 struct socket *sock;
55737fda 1354 const struct net_proto_family *pf;
1da177e4
LT
1355
1356 /*
89bddce5 1357 * Check protocol is in range
1da177e4
LT
1358 */
1359 if (family < 0 || family >= NPROTO)
1360 return -EAFNOSUPPORT;
1361 if (type < 0 || type >= SOCK_MAX)
1362 return -EINVAL;
1363
1364 /* Compatibility.
1365
1366 This uglymoron is moved from INET layer to here to avoid
1367 deadlock in module load.
1368 */
1369 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1370 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1371 current->comm);
1da177e4
LT
1372 family = PF_PACKET;
1373 }
1374
1375 err = security_socket_create(family, type, protocol, kern);
1376 if (err)
1377 return err;
89bddce5 1378
55737fda
SH
1379 /*
1380 * Allocate the socket and allow the family to set things up. if
1381 * the protocol is 0, the family is instructed to select an appropriate
1382 * default.
1383 */
1384 sock = sock_alloc();
1385 if (!sock) {
e87cc472 1386 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1387 return -ENFILE; /* Not exactly a match, but its the
1388 closest posix thing */
1389 }
1390
1391 sock->type = type;
1392
95a5afca 1393#ifdef CONFIG_MODULES
89bddce5
SH
1394 /* Attempt to load a protocol module if the find failed.
1395 *
1396 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1397 * requested real, full-featured networking support upon configuration.
1398 * Otherwise module support will break!
1399 */
190683a9 1400 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1401 request_module("net-pf-%d", family);
1da177e4
LT
1402#endif
1403
55737fda
SH
1404 rcu_read_lock();
1405 pf = rcu_dereference(net_families[family]);
1406 err = -EAFNOSUPPORT;
1407 if (!pf)
1408 goto out_release;
1da177e4
LT
1409
1410 /*
1411 * We will call the ->create function, that possibly is in a loadable
1412 * module, so we have to bump that loadable module refcnt first.
1413 */
55737fda 1414 if (!try_module_get(pf->owner))
1da177e4
LT
1415 goto out_release;
1416
55737fda
SH
1417 /* Now protected by module ref count */
1418 rcu_read_unlock();
1419
3f378b68 1420 err = pf->create(net, sock, protocol, kern);
55737fda 1421 if (err < 0)
1da177e4 1422 goto out_module_put;
a79af59e 1423
1da177e4
LT
1424 /*
1425 * Now to bump the refcnt of the [loadable] module that owns this
1426 * socket at sock_release time we decrement its refcnt.
1427 */
55737fda
SH
1428 if (!try_module_get(sock->ops->owner))
1429 goto out_module_busy;
1430
1da177e4
LT
1431 /*
1432 * Now that we're done with the ->create function, the [loadable]
1433 * module can have its refcnt decremented
1434 */
55737fda 1435 module_put(pf->owner);
7420ed23
VY
1436 err = security_socket_post_create(sock, family, type, protocol, kern);
1437 if (err)
3b185525 1438 goto out_sock_release;
55737fda 1439 *res = sock;
1da177e4 1440
55737fda
SH
1441 return 0;
1442
1443out_module_busy:
1444 err = -EAFNOSUPPORT;
1da177e4 1445out_module_put:
55737fda
SH
1446 sock->ops = NULL;
1447 module_put(pf->owner);
1448out_sock_release:
1da177e4 1449 sock_release(sock);
55737fda
SH
1450 return err;
1451
1452out_release:
1453 rcu_read_unlock();
1454 goto out_sock_release;
1da177e4 1455}
721db93a 1456EXPORT_SYMBOL(__sock_create);
1da177e4 1457
8a3c245c
PT
1458/**
1459 * sock_create - creates a socket
1460 * @family: protocol family (AF_INET, ...)
1461 * @type: communication type (SOCK_STREAM, ...)
1462 * @protocol: protocol (0, ...)
1463 * @res: new socket
1464 *
1465 * A wrapper around __sock_create().
1466 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1467 */
1468
1da177e4
LT
1469int sock_create(int family, int type, int protocol, struct socket **res)
1470{
1b8d7ae4 1471 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1472}
c6d409cf 1473EXPORT_SYMBOL(sock_create);
1da177e4 1474
8a3c245c
PT
1475/**
1476 * sock_create_kern - creates a socket (kernel space)
1477 * @net: net namespace
1478 * @family: protocol family (AF_INET, ...)
1479 * @type: communication type (SOCK_STREAM, ...)
1480 * @protocol: protocol (0, ...)
1481 * @res: new socket
1482 *
1483 * A wrapper around __sock_create().
1484 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1485 */
1486
eeb1bd5c 1487int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1488{
eeb1bd5c 1489 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1490}
c6d409cf 1491EXPORT_SYMBOL(sock_create_kern);
1da177e4 1492
9d6a15c3 1493int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1494{
1495 int retval;
1496 struct socket *sock;
a677a039
UD
1497 int flags;
1498
e38b36f3
UD
1499 /* Check the SOCK_* constants for consistency. */
1500 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1501 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1502 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1503 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1504
a677a039 1505 flags = type & ~SOCK_TYPE_MASK;
77d27200 1506 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1507 return -EINVAL;
1508 type &= SOCK_TYPE_MASK;
1da177e4 1509
aaca0bdc
UD
1510 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1511 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1512
1da177e4
LT
1513 retval = sock_create(family, type, protocol, &sock);
1514 if (retval < 0)
8e1611e2 1515 return retval;
1da177e4 1516
8e1611e2 1517 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1518}
1519
9d6a15c3
DB
1520SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1521{
1522 return __sys_socket(family, type, protocol);
1523}
1524
1da177e4
LT
1525/*
1526 * Create a pair of connected sockets.
1527 */
1528
6debc8d8 1529int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1530{
1531 struct socket *sock1, *sock2;
1532 int fd1, fd2, err;
db349509 1533 struct file *newfile1, *newfile2;
a677a039
UD
1534 int flags;
1535
1536 flags = type & ~SOCK_TYPE_MASK;
77d27200 1537 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1538 return -EINVAL;
1539 type &= SOCK_TYPE_MASK;
1da177e4 1540
aaca0bdc
UD
1541 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1542 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1543
016a266b
AV
1544 /*
1545 * reserve descriptors and make sure we won't fail
1546 * to return them to userland.
1547 */
1548 fd1 = get_unused_fd_flags(flags);
1549 if (unlikely(fd1 < 0))
1550 return fd1;
1551
1552 fd2 = get_unused_fd_flags(flags);
1553 if (unlikely(fd2 < 0)) {
1554 put_unused_fd(fd1);
1555 return fd2;
1556 }
1557
1558 err = put_user(fd1, &usockvec[0]);
1559 if (err)
1560 goto out;
1561
1562 err = put_user(fd2, &usockvec[1]);
1563 if (err)
1564 goto out;
1565
1da177e4
LT
1566 /*
1567 * Obtain the first socket and check if the underlying protocol
1568 * supports the socketpair call.
1569 */
1570
1571 err = sock_create(family, type, protocol, &sock1);
016a266b 1572 if (unlikely(err < 0))
1da177e4
LT
1573 goto out;
1574
1575 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1576 if (unlikely(err < 0)) {
1577 sock_release(sock1);
1578 goto out;
bf3c23d1 1579 }
d73aa286 1580
d47cd945
DH
1581 err = security_socket_socketpair(sock1, sock2);
1582 if (unlikely(err)) {
1583 sock_release(sock2);
1584 sock_release(sock1);
1585 goto out;
1586 }
1587
016a266b
AV
1588 err = sock1->ops->socketpair(sock1, sock2);
1589 if (unlikely(err < 0)) {
1590 sock_release(sock2);
1591 sock_release(sock1);
1592 goto out;
28407630
AV
1593 }
1594
aab174f0 1595 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1596 if (IS_ERR(newfile1)) {
28407630 1597 err = PTR_ERR(newfile1);
016a266b
AV
1598 sock_release(sock2);
1599 goto out;
28407630
AV
1600 }
1601
aab174f0 1602 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1603 if (IS_ERR(newfile2)) {
1604 err = PTR_ERR(newfile2);
016a266b
AV
1605 fput(newfile1);
1606 goto out;
db349509
AV
1607 }
1608
157cf649 1609 audit_fd_pair(fd1, fd2);
d73aa286 1610
db349509
AV
1611 fd_install(fd1, newfile1);
1612 fd_install(fd2, newfile2);
d73aa286 1613 return 0;
1da177e4 1614
016a266b 1615out:
d73aa286 1616 put_unused_fd(fd2);
d73aa286 1617 put_unused_fd(fd1);
1da177e4
LT
1618 return err;
1619}
1620
6debc8d8
DB
1621SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1622 int __user *, usockvec)
1623{
1624 return __sys_socketpair(family, type, protocol, usockvec);
1625}
1626
1da177e4
LT
1627/*
1628 * Bind a name to a socket. Nothing much to do here since it's
1629 * the protocol's responsibility to handle the local address.
1630 *
1631 * We move the socket address to kernel space before we call
1632 * the protocol layer (having also checked the address is ok).
1633 */
1634
a87d35d8 1635int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1636{
1637 struct socket *sock;
230b1839 1638 struct sockaddr_storage address;
6cb153ca 1639 int err, fput_needed;
1da177e4 1640
89bddce5 1641 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1642 if (sock) {
43db362d 1643 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1644 if (!err) {
89bddce5 1645 err = security_socket_bind(sock,
230b1839 1646 (struct sockaddr *)&address,
89bddce5 1647 addrlen);
6cb153ca
BL
1648 if (!err)
1649 err = sock->ops->bind(sock,
89bddce5 1650 (struct sockaddr *)
230b1839 1651 &address, addrlen);
1da177e4 1652 }
6cb153ca 1653 fput_light(sock->file, fput_needed);
89bddce5 1654 }
1da177e4
LT
1655 return err;
1656}
1657
a87d35d8
DB
1658SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1659{
1660 return __sys_bind(fd, umyaddr, addrlen);
1661}
1662
1da177e4
LT
1663/*
1664 * Perform a listen. Basically, we allow the protocol to do anything
1665 * necessary for a listen, and if that works, we mark the socket as
1666 * ready for listening.
1667 */
1668
25e290ee 1669int __sys_listen(int fd, int backlog)
1da177e4
LT
1670{
1671 struct socket *sock;
6cb153ca 1672 int err, fput_needed;
b8e1f9b5 1673 int somaxconn;
89bddce5
SH
1674
1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1676 if (sock) {
8efa6e93 1677 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1678 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1679 backlog = somaxconn;
1da177e4
LT
1680
1681 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1682 if (!err)
1683 err = sock->ops->listen(sock, backlog);
1da177e4 1684
6cb153ca 1685 fput_light(sock->file, fput_needed);
1da177e4
LT
1686 }
1687 return err;
1688}
1689
25e290ee
DB
1690SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1691{
1692 return __sys_listen(fd, backlog);
1693}
1694
de2ea4b6
JA
1695int __sys_accept4_file(struct file *file, unsigned file_flags,
1696 struct sockaddr __user *upeer_sockaddr,
1697 int __user *upeer_addrlen, int flags)
1da177e4
LT
1698{
1699 struct socket *sock, *newsock;
39d8c1b6 1700 struct file *newfile;
de2ea4b6 1701 int err, len, newfd;
230b1839 1702 struct sockaddr_storage address;
1da177e4 1703
77d27200 1704 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1705 return -EINVAL;
1706
1707 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1708 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1709
de2ea4b6 1710 sock = sock_from_file(file, &err);
1da177e4
LT
1711 if (!sock)
1712 goto out;
1713
1714 err = -ENFILE;
c6d409cf
ED
1715 newsock = sock_alloc();
1716 if (!newsock)
de2ea4b6 1717 goto out;
1da177e4
LT
1718
1719 newsock->type = sock->type;
1720 newsock->ops = sock->ops;
1721
1da177e4
LT
1722 /*
1723 * We don't need try_module_get here, as the listening socket (sock)
1724 * has the protocol module (sock->ops->owner) held.
1725 */
1726 __module_get(newsock->ops->owner);
1727
28407630 1728 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1729 if (unlikely(newfd < 0)) {
1730 err = newfd;
9a1875e6 1731 sock_release(newsock);
de2ea4b6 1732 goto out;
39d8c1b6 1733 }
aab174f0 1734 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1735 if (IS_ERR(newfile)) {
28407630
AV
1736 err = PTR_ERR(newfile);
1737 put_unused_fd(newfd);
de2ea4b6 1738 goto out;
28407630 1739 }
39d8c1b6 1740
a79af59e
FF
1741 err = security_socket_accept(sock, newsock);
1742 if (err)
39d8c1b6 1743 goto out_fd;
a79af59e 1744
de2ea4b6
JA
1745 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1746 false);
1da177e4 1747 if (err < 0)
39d8c1b6 1748 goto out_fd;
1da177e4
LT
1749
1750 if (upeer_sockaddr) {
9b2c45d4
DV
1751 len = newsock->ops->getname(newsock,
1752 (struct sockaddr *)&address, 2);
1753 if (len < 0) {
1da177e4 1754 err = -ECONNABORTED;
39d8c1b6 1755 goto out_fd;
1da177e4 1756 }
43db362d 1757 err = move_addr_to_user(&address,
230b1839 1758 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1759 if (err < 0)
39d8c1b6 1760 goto out_fd;
1da177e4
LT
1761 }
1762
1763 /* File flags are not inherited via accept() unlike another OSes. */
1764
39d8c1b6
DM
1765 fd_install(newfd, newfile);
1766 err = newfd;
1da177e4
LT
1767out:
1768 return err;
39d8c1b6 1769out_fd:
9606a216 1770 fput(newfile);
39d8c1b6 1771 put_unused_fd(newfd);
de2ea4b6
JA
1772 goto out;
1773
1774}
1775
1776/*
1777 * For accept, we attempt to create a new socket, set up the link
1778 * with the client, wake up the client, then return the new
1779 * connected fd. We collect the address of the connector in kernel
1780 * space and move it to user at the very end. This is unclean because
1781 * we open the socket then return an error.
1782 *
1783 * 1003.1g adds the ability to recvmsg() to query connection pending
1784 * status to recvmsg. We need to add that support in a way thats
1785 * clean when we restructure accept also.
1786 */
1787
1788int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1789 int __user *upeer_addrlen, int flags)
1790{
1791 int ret = -EBADF;
1792 struct fd f;
1793
1794 f = fdget(fd);
1795 if (f.file) {
1796 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1797 upeer_addrlen, flags);
1798 if (f.flags)
1799 fput(f.file);
1800 }
1801
1802 return ret;
1da177e4
LT
1803}
1804
4541e805
DB
1805SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1806 int __user *, upeer_addrlen, int, flags)
1807{
1808 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1809}
1810
20f37034
HC
1811SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1812 int __user *, upeer_addrlen)
aaca0bdc 1813{
4541e805 1814 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1815}
1816
1da177e4
LT
1817/*
1818 * Attempt to connect to a socket with the server address. The address
1819 * is in user space so we verify it is OK and move it to kernel space.
1820 *
1821 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1822 * break bindings
1823 *
1824 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1825 * other SEQPACKET protocols that take time to connect() as it doesn't
1826 * include the -EINPROGRESS status for such sockets.
1827 */
1828
bd3ded31
JA
1829int __sys_connect_file(struct file *file, struct sockaddr __user *uservaddr,
1830 int addrlen, int file_flags)
1da177e4
LT
1831{
1832 struct socket *sock;
230b1839 1833 struct sockaddr_storage address;
bd3ded31 1834 int err;
1da177e4 1835
bd3ded31 1836 sock = sock_from_file(file, &err);
1da177e4
LT
1837 if (!sock)
1838 goto out;
43db362d 1839 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4 1840 if (err < 0)
bd3ded31 1841 goto out;
1da177e4 1842
89bddce5 1843 err =
230b1839 1844 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4 1845 if (err)
bd3ded31 1846 goto out;
1da177e4 1847
230b1839 1848 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
bd3ded31 1849 sock->file->f_flags | file_flags);
1da177e4
LT
1850out:
1851 return err;
1852}
1853
bd3ded31
JA
1854int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1855{
1856 int ret = -EBADF;
1857 struct fd f;
1858
1859 f = fdget(fd);
1860 if (f.file) {
1861 ret = __sys_connect_file(f.file, uservaddr, addrlen, 0);
1862 if (f.flags)
1863 fput(f.file);
1864 }
1865
1866 return ret;
1867}
1868
1387c2c2
DB
1869SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1870 int, addrlen)
1871{
1872 return __sys_connect(fd, uservaddr, addrlen);
1873}
1874
1da177e4
LT
1875/*
1876 * Get the local address ('name') of a socket object. Move the obtained
1877 * name to user space.
1878 */
1879
8882a107
DB
1880int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1881 int __user *usockaddr_len)
1da177e4
LT
1882{
1883 struct socket *sock;
230b1839 1884 struct sockaddr_storage address;
9b2c45d4 1885 int err, fput_needed;
89bddce5 1886
6cb153ca 1887 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1888 if (!sock)
1889 goto out;
1890
1891 err = security_socket_getsockname(sock);
1892 if (err)
1893 goto out_put;
1894
9b2c45d4
DV
1895 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1896 if (err < 0)
1da177e4 1897 goto out_put;
9b2c45d4
DV
1898 /* "err" is actually length in this case */
1899 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1900
1901out_put:
6cb153ca 1902 fput_light(sock->file, fput_needed);
1da177e4
LT
1903out:
1904 return err;
1905}
1906
8882a107
DB
1907SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1908 int __user *, usockaddr_len)
1909{
1910 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1911}
1912
1da177e4
LT
1913/*
1914 * Get the remote address ('name') of a socket object. Move the obtained
1915 * name to user space.
1916 */
1917
b21c8f83
DB
1918int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1919 int __user *usockaddr_len)
1da177e4
LT
1920{
1921 struct socket *sock;
230b1839 1922 struct sockaddr_storage address;
9b2c45d4 1923 int err, fput_needed;
1da177e4 1924
89bddce5
SH
1925 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1926 if (sock != NULL) {
1da177e4
LT
1927 err = security_socket_getpeername(sock);
1928 if (err) {
6cb153ca 1929 fput_light(sock->file, fput_needed);
1da177e4
LT
1930 return err;
1931 }
1932
9b2c45d4
DV
1933 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1934 if (err >= 0)
1935 /* "err" is actually length in this case */
1936 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1937 usockaddr_len);
6cb153ca 1938 fput_light(sock->file, fput_needed);
1da177e4
LT
1939 }
1940 return err;
1941}
1942
b21c8f83
DB
1943SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1944 int __user *, usockaddr_len)
1945{
1946 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1947}
1948
1da177e4
LT
1949/*
1950 * Send a datagram to a given address. We move the address into kernel
1951 * space and check the user space data area is readable before invoking
1952 * the protocol.
1953 */
211b634b
DB
1954int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1955 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1956{
1957 struct socket *sock;
230b1839 1958 struct sockaddr_storage address;
1da177e4
LT
1959 int err;
1960 struct msghdr msg;
1961 struct iovec iov;
6cb153ca 1962 int fput_needed;
6cb153ca 1963
602bd0e9
AV
1964 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1965 if (unlikely(err))
1966 return err;
de0fa95c
PE
1967 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1968 if (!sock)
4387ff75 1969 goto out;
6cb153ca 1970
89bddce5 1971 msg.msg_name = NULL;
89bddce5
SH
1972 msg.msg_control = NULL;
1973 msg.msg_controllen = 0;
1974 msg.msg_namelen = 0;
6cb153ca 1975 if (addr) {
43db362d 1976 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1977 if (err < 0)
1978 goto out_put;
230b1839 1979 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1980 msg.msg_namelen = addr_len;
1da177e4
LT
1981 }
1982 if (sock->file->f_flags & O_NONBLOCK)
1983 flags |= MSG_DONTWAIT;
1984 msg.msg_flags = flags;
d8725c86 1985 err = sock_sendmsg(sock, &msg);
1da177e4 1986
89bddce5 1987out_put:
de0fa95c 1988 fput_light(sock->file, fput_needed);
4387ff75 1989out:
1da177e4
LT
1990 return err;
1991}
1992
211b634b
DB
1993SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1994 unsigned int, flags, struct sockaddr __user *, addr,
1995 int, addr_len)
1996{
1997 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1998}
1999
1da177e4 2000/*
89bddce5 2001 * Send a datagram down a socket.
1da177e4
LT
2002 */
2003
3e0fa65f 2004SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2005 unsigned int, flags)
1da177e4 2006{
211b634b 2007 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2008}
2009
2010/*
89bddce5 2011 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2012 * sender. We verify the buffers are writable and if needed move the
2013 * sender address from kernel to user space.
2014 */
7a09e1eb
DB
2015int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2016 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2017{
2018 struct socket *sock;
2019 struct iovec iov;
2020 struct msghdr msg;
230b1839 2021 struct sockaddr_storage address;
89bddce5 2022 int err, err2;
6cb153ca
BL
2023 int fput_needed;
2024
602bd0e9
AV
2025 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2026 if (unlikely(err))
2027 return err;
de0fa95c 2028 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2029 if (!sock)
de0fa95c 2030 goto out;
1da177e4 2031
89bddce5
SH
2032 msg.msg_control = NULL;
2033 msg.msg_controllen = 0;
f3d33426
HFS
2034 /* Save some cycles and don't copy the address if not needed */
2035 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2036 /* We assume all kernel code knows the size of sockaddr_storage */
2037 msg.msg_namelen = 0;
130ed5d1 2038 msg.msg_iocb = NULL;
9f138fa6 2039 msg.msg_flags = 0;
1da177e4
LT
2040 if (sock->file->f_flags & O_NONBLOCK)
2041 flags |= MSG_DONTWAIT;
2da62906 2042 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2043
89bddce5 2044 if (err >= 0 && addr != NULL) {
43db362d 2045 err2 = move_addr_to_user(&address,
230b1839 2046 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2047 if (err2 < 0)
2048 err = err2;
1da177e4 2049 }
de0fa95c
PE
2050
2051 fput_light(sock->file, fput_needed);
4387ff75 2052out:
1da177e4
LT
2053 return err;
2054}
2055
7a09e1eb
DB
2056SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2057 unsigned int, flags, struct sockaddr __user *, addr,
2058 int __user *, addr_len)
2059{
2060 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2061}
2062
1da177e4 2063/*
89bddce5 2064 * Receive a datagram from a socket.
1da177e4
LT
2065 */
2066
b7c0ddf5
JG
2067SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2068 unsigned int, flags)
1da177e4 2069{
7a09e1eb 2070 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2071}
2072
2073/*
2074 * Set a socket option. Because we don't know the option lengths we have
2075 * to pass the user mode parameter for the protocols to sort out.
2076 */
2077
cc36dca0
DB
2078static int __sys_setsockopt(int fd, int level, int optname,
2079 char __user *optval, int optlen)
1da177e4 2080{
0d01da6a
SF
2081 mm_segment_t oldfs = get_fs();
2082 char *kernel_optval = NULL;
6cb153ca 2083 int err, fput_needed;
1da177e4
LT
2084 struct socket *sock;
2085
2086 if (optlen < 0)
2087 return -EINVAL;
89bddce5
SH
2088
2089 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2090 if (sock != NULL) {
2091 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2092 if (err)
2093 goto out_put;
1da177e4 2094
0d01da6a
SF
2095 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2096 &optname, optval, &optlen,
2097 &kernel_optval);
2098
2099 if (err < 0) {
2100 goto out_put;
2101 } else if (err > 0) {
2102 err = 0;
2103 goto out_put;
2104 }
2105
2106 if (kernel_optval) {
2107 set_fs(KERNEL_DS);
2108 optval = (char __user __force *)kernel_optval;
2109 }
2110
1da177e4 2111 if (level == SOL_SOCKET)
89bddce5
SH
2112 err =
2113 sock_setsockopt(sock, level, optname, optval,
2114 optlen);
1da177e4 2115 else
89bddce5
SH
2116 err =
2117 sock->ops->setsockopt(sock, level, optname, optval,
2118 optlen);
0d01da6a
SF
2119
2120 if (kernel_optval) {
2121 set_fs(oldfs);
2122 kfree(kernel_optval);
2123 }
6cb153ca
BL
2124out_put:
2125 fput_light(sock->file, fput_needed);
1da177e4
LT
2126 }
2127 return err;
2128}
2129
cc36dca0
DB
2130SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2131 char __user *, optval, int, optlen)
2132{
2133 return __sys_setsockopt(fd, level, optname, optval, optlen);
2134}
2135
1da177e4
LT
2136/*
2137 * Get a socket option. Because we don't know the option lengths we have
2138 * to pass a user mode parameter for the protocols to sort out.
2139 */
2140
13a2d70e
DB
2141static int __sys_getsockopt(int fd, int level, int optname,
2142 char __user *optval, int __user *optlen)
1da177e4 2143{
6cb153ca 2144 int err, fput_needed;
1da177e4 2145 struct socket *sock;
0d01da6a 2146 int max_optlen;
1da177e4 2147
89bddce5
SH
2148 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2149 if (sock != NULL) {
6cb153ca
BL
2150 err = security_socket_getsockopt(sock, level, optname);
2151 if (err)
2152 goto out_put;
1da177e4 2153
0d01da6a
SF
2154 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2155
1da177e4 2156 if (level == SOL_SOCKET)
89bddce5
SH
2157 err =
2158 sock_getsockopt(sock, level, optname, optval,
2159 optlen);
1da177e4 2160 else
89bddce5
SH
2161 err =
2162 sock->ops->getsockopt(sock, level, optname, optval,
2163 optlen);
0d01da6a
SF
2164
2165 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2166 optval, optlen,
2167 max_optlen, err);
6cb153ca
BL
2168out_put:
2169 fput_light(sock->file, fput_needed);
1da177e4
LT
2170 }
2171 return err;
2172}
2173
13a2d70e
DB
2174SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2175 char __user *, optval, int __user *, optlen)
2176{
2177 return __sys_getsockopt(fd, level, optname, optval, optlen);
2178}
2179
1da177e4
LT
2180/*
2181 * Shutdown a socket.
2182 */
2183
005a1aea 2184int __sys_shutdown(int fd, int how)
1da177e4 2185{
6cb153ca 2186 int err, fput_needed;
1da177e4
LT
2187 struct socket *sock;
2188
89bddce5
SH
2189 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2190 if (sock != NULL) {
1da177e4 2191 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2192 if (!err)
2193 err = sock->ops->shutdown(sock, how);
2194 fput_light(sock->file, fput_needed);
1da177e4
LT
2195 }
2196 return err;
2197}
2198
005a1aea
DB
2199SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2200{
2201 return __sys_shutdown(fd, how);
2202}
2203
89bddce5 2204/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2205 * fields which are the same type (int / unsigned) on our platforms.
2206 */
2207#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2208#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2209#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2210
c71d8ebe
TH
2211struct used_address {
2212 struct sockaddr_storage name;
2213 unsigned int name_len;
2214};
2215
da184284
AV
2216static int copy_msghdr_from_user(struct msghdr *kmsg,
2217 struct user_msghdr __user *umsg,
2218 struct sockaddr __user **save_addr,
2219 struct iovec **iov)
1661bf36 2220{
ffb07550 2221 struct user_msghdr msg;
08adb7da
AV
2222 ssize_t err;
2223
ffb07550 2224 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2225 return -EFAULT;
dbb490b9 2226
864d9664 2227 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2228 kmsg->msg_controllen = msg.msg_controllen;
2229 kmsg->msg_flags = msg.msg_flags;
2230
2231 kmsg->msg_namelen = msg.msg_namelen;
2232 if (!msg.msg_name)
6a2a2b3a
AS
2233 kmsg->msg_namelen = 0;
2234
dbb490b9
ML
2235 if (kmsg->msg_namelen < 0)
2236 return -EINVAL;
2237
1661bf36 2238 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2239 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2240
2241 if (save_addr)
ffb07550 2242 *save_addr = msg.msg_name;
08adb7da 2243
ffb07550 2244 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2245 if (!save_addr) {
864d9664
PA
2246 err = move_addr_to_kernel(msg.msg_name,
2247 kmsg->msg_namelen,
08adb7da
AV
2248 kmsg->msg_name);
2249 if (err < 0)
2250 return err;
2251 }
2252 } else {
2253 kmsg->msg_name = NULL;
2254 kmsg->msg_namelen = 0;
2255 }
2256
ffb07550 2257 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2258 return -EMSGSIZE;
2259
0345f931 2260 kmsg->msg_iocb = NULL;
2261
87e5e6da 2262 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2263 msg.msg_iov, msg.msg_iovlen,
da184284 2264 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2265 return err < 0 ? err : 0;
1661bf36
DC
2266}
2267
4257c8ca
JA
2268static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2269 unsigned int flags, struct used_address *used_address,
2270 unsigned int allowed_msghdr_flags)
1da177e4 2271{
b9d717a7 2272 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2273 __aligned(sizeof(__kernel_size_t));
89bddce5 2274 /* 20 is size of ipv6_pktinfo */
1da177e4 2275 unsigned char *ctl_buf = ctl;
d8725c86 2276 int ctl_len;
08adb7da 2277 ssize_t err;
89bddce5 2278
1da177e4
LT
2279 err = -ENOBUFS;
2280
228e548e 2281 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2282 goto out;
28a94d8f 2283 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2284 ctl_len = msg_sys->msg_controllen;
1da177e4 2285 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2286 err =
228e548e 2287 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2288 sizeof(ctl));
1da177e4 2289 if (err)
4257c8ca 2290 goto out;
228e548e
AB
2291 ctl_buf = msg_sys->msg_control;
2292 ctl_len = msg_sys->msg_controllen;
1da177e4 2293 } else if (ctl_len) {
ac4340fc
DM
2294 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2295 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2296 if (ctl_len > sizeof(ctl)) {
1da177e4 2297 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2298 if (ctl_buf == NULL)
4257c8ca 2299 goto out;
1da177e4
LT
2300 }
2301 err = -EFAULT;
2302 /*
228e548e 2303 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2304 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2305 * checking falls down on this.
2306 */
fb8621bb 2307 if (copy_from_user(ctl_buf,
228e548e 2308 (void __user __force *)msg_sys->msg_control,
89bddce5 2309 ctl_len))
1da177e4 2310 goto out_freectl;
228e548e 2311 msg_sys->msg_control = ctl_buf;
1da177e4 2312 }
228e548e 2313 msg_sys->msg_flags = flags;
1da177e4
LT
2314
2315 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2316 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2317 /*
2318 * If this is sendmmsg() and current destination address is same as
2319 * previously succeeded address, omit asking LSM's decision.
2320 * used_address->name_len is initialized to UINT_MAX so that the first
2321 * destination address never matches.
2322 */
bc909d9d
MD
2323 if (used_address && msg_sys->msg_name &&
2324 used_address->name_len == msg_sys->msg_namelen &&
2325 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2326 used_address->name_len)) {
d8725c86 2327 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2328 goto out_freectl;
2329 }
d8725c86 2330 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2331 /*
2332 * If this is sendmmsg() and sending to current destination address was
2333 * successful, remember it.
2334 */
2335 if (used_address && err >= 0) {
2336 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2337 if (msg_sys->msg_name)
2338 memcpy(&used_address->name, msg_sys->msg_name,
2339 used_address->name_len);
c71d8ebe 2340 }
1da177e4
LT
2341
2342out_freectl:
89bddce5 2343 if (ctl_buf != ctl)
1da177e4 2344 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2345out:
2346 return err;
2347}
2348
2349static int sendmsg_copy_msghdr(struct msghdr *msg,
2350 struct user_msghdr __user *umsg, unsigned flags,
2351 struct iovec **iov)
2352{
2353 int err;
2354
2355 if (flags & MSG_CMSG_COMPAT) {
2356 struct compat_msghdr __user *msg_compat;
2357
2358 msg_compat = (struct compat_msghdr __user *) umsg;
2359 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2360 } else {
2361 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2362 }
2363 if (err < 0)
2364 return err;
2365
2366 return 0;
2367}
2368
2369static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2370 struct msghdr *msg_sys, unsigned int flags,
2371 struct used_address *used_address,
2372 unsigned int allowed_msghdr_flags)
2373{
2374 struct sockaddr_storage address;
2375 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2376 ssize_t err;
2377
2378 msg_sys->msg_name = &address;
2379
2380 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2381 if (err < 0)
2382 return err;
2383
2384 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2385 allowed_msghdr_flags);
da184284 2386 kfree(iov);
228e548e
AB
2387 return err;
2388}
2389
2390/*
2391 * BSD sendmsg interface
2392 */
d69e0779 2393long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
0fa03c62
JA
2394 unsigned int flags)
2395{
d69e0779
JA
2396 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2397 struct sockaddr_storage address;
2398 struct msghdr msg = { .msg_name = &address };
2399 ssize_t err;
0fa03c62 2400
d69e0779
JA
2401 err = sendmsg_copy_msghdr(&msg, umsg, flags, &iov);
2402 if (err)
2403 return err;
2404 /* disallow ancillary data requests from this path */
2405 if (msg.msg_control || msg.msg_controllen) {
2406 err = -EINVAL;
2407 goto out;
2408 }
2409
2410 err = ____sys_sendmsg(sock, &msg, flags, NULL, 0);
2411out:
2412 kfree(iov);
2413 return err;
0fa03c62 2414}
228e548e 2415
e1834a32
DB
2416long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2417 bool forbid_cmsg_compat)
228e548e
AB
2418{
2419 int fput_needed, err;
2420 struct msghdr msg_sys;
1be374a0
AL
2421 struct socket *sock;
2422
e1834a32
DB
2423 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2424 return -EINVAL;
2425
1be374a0 2426 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2427 if (!sock)
2428 goto out;
2429
28a94d8f 2430 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2431
6cb153ca 2432 fput_light(sock->file, fput_needed);
89bddce5 2433out:
1da177e4
LT
2434 return err;
2435}
2436
666547ff 2437SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2438{
e1834a32 2439 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2440}
2441
228e548e
AB
2442/*
2443 * Linux sendmmsg interface
2444 */
2445
2446int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2447 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2448{
2449 int fput_needed, err, datagrams;
2450 struct socket *sock;
2451 struct mmsghdr __user *entry;
2452 struct compat_mmsghdr __user *compat_entry;
2453 struct msghdr msg_sys;
c71d8ebe 2454 struct used_address used_address;
f092276d 2455 unsigned int oflags = flags;
228e548e 2456
e1834a32
DB
2457 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2458 return -EINVAL;
2459
98382f41
AB
2460 if (vlen > UIO_MAXIOV)
2461 vlen = UIO_MAXIOV;
228e548e
AB
2462
2463 datagrams = 0;
2464
2465 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2466 if (!sock)
2467 return err;
2468
c71d8ebe 2469 used_address.name_len = UINT_MAX;
228e548e
AB
2470 entry = mmsg;
2471 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2472 err = 0;
f092276d 2473 flags |= MSG_BATCH;
228e548e
AB
2474
2475 while (datagrams < vlen) {
f092276d
TH
2476 if (datagrams == vlen - 1)
2477 flags = oflags;
2478
228e548e 2479 if (MSG_CMSG_COMPAT & flags) {
666547ff 2480 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2481 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2482 if (err < 0)
2483 break;
2484 err = __put_user(err, &compat_entry->msg_len);
2485 ++compat_entry;
2486 } else {
a7526eb5 2487 err = ___sys_sendmsg(sock,
666547ff 2488 (struct user_msghdr __user *)entry,
28a94d8f 2489 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2490 if (err < 0)
2491 break;
2492 err = put_user(err, &entry->msg_len);
2493 ++entry;
2494 }
2495
2496 if (err)
2497 break;
2498 ++datagrams;
3023898b
SHY
2499 if (msg_data_left(&msg_sys))
2500 break;
a78cb84c 2501 cond_resched();
228e548e
AB
2502 }
2503
228e548e
AB
2504 fput_light(sock->file, fput_needed);
2505
728ffb86
AB
2506 /* We only return an error if no datagrams were able to be sent */
2507 if (datagrams != 0)
228e548e
AB
2508 return datagrams;
2509
228e548e
AB
2510 return err;
2511}
2512
2513SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2514 unsigned int, vlen, unsigned int, flags)
2515{
e1834a32 2516 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2517}
2518
4257c8ca
JA
2519static int recvmsg_copy_msghdr(struct msghdr *msg,
2520 struct user_msghdr __user *umsg, unsigned flags,
2521 struct sockaddr __user **uaddr,
2522 struct iovec **iov)
1da177e4 2523{
08adb7da 2524 ssize_t err;
1da177e4 2525
4257c8ca
JA
2526 if (MSG_CMSG_COMPAT & flags) {
2527 struct compat_msghdr __user *msg_compat;
1da177e4 2528
4257c8ca
JA
2529 msg_compat = (struct compat_msghdr __user *) umsg;
2530 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2531 } else {
2532 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2533 }
1da177e4 2534 if (err < 0)
da184284 2535 return err;
1da177e4 2536
4257c8ca
JA
2537 return 0;
2538}
2539
2540static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2541 struct user_msghdr __user *msg,
2542 struct sockaddr __user *uaddr,
2543 unsigned int flags, int nosec)
2544{
2545 struct compat_msghdr __user *msg_compat =
2546 (struct compat_msghdr __user *) msg;
2547 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2548 struct sockaddr_storage addr;
2549 unsigned long cmsg_ptr;
2550 int len;
2551 ssize_t err;
2552
2553 msg_sys->msg_name = &addr;
a2e27255
ACM
2554 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2555 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2556
f3d33426
HFS
2557 /* We assume all kernel code knows the size of sockaddr_storage */
2558 msg_sys->msg_namelen = 0;
2559
1da177e4
LT
2560 if (sock->file->f_flags & O_NONBLOCK)
2561 flags |= MSG_DONTWAIT;
2da62906 2562 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4 2563 if (err < 0)
4257c8ca 2564 goto out;
1da177e4
LT
2565 len = err;
2566
2567 if (uaddr != NULL) {
43db362d 2568 err = move_addr_to_user(&addr,
a2e27255 2569 msg_sys->msg_namelen, uaddr,
89bddce5 2570 uaddr_len);
1da177e4 2571 if (err < 0)
4257c8ca 2572 goto out;
1da177e4 2573 }
a2e27255 2574 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2575 COMPAT_FLAGS(msg));
1da177e4 2576 if (err)
4257c8ca 2577 goto out;
1da177e4 2578 if (MSG_CMSG_COMPAT & flags)
a2e27255 2579 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2580 &msg_compat->msg_controllen);
2581 else
a2e27255 2582 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2583 &msg->msg_controllen);
2584 if (err)
4257c8ca 2585 goto out;
1da177e4 2586 err = len;
4257c8ca
JA
2587out:
2588 return err;
2589}
2590
2591static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2592 struct msghdr *msg_sys, unsigned int flags, int nosec)
2593{
2594 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2595 /* user mode address pointers */
2596 struct sockaddr __user *uaddr;
2597 ssize_t err;
2598
2599 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2600 if (err < 0)
2601 return err;
1da177e4 2602
4257c8ca 2603 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2604 kfree(iov);
a2e27255
ACM
2605 return err;
2606}
2607
2608/*
2609 * BSD recvmsg interface
2610 */
2611
d69e0779 2612long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
aa1fa28f
JA
2613 unsigned int flags)
2614{
d69e0779
JA
2615 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2616 struct sockaddr_storage address;
2617 struct msghdr msg = { .msg_name = &address };
2618 struct sockaddr __user *uaddr;
2619 ssize_t err;
2620
2621 err = recvmsg_copy_msghdr(&msg, umsg, flags, &uaddr, &iov);
2622 if (err)
2623 return err;
2624 /* disallow ancillary data requests from this path */
2625 if (msg.msg_control || msg.msg_controllen) {
2626 err = -EINVAL;
2627 goto out;
2628 }
aa1fa28f 2629
d69e0779
JA
2630 err = ____sys_recvmsg(sock, &msg, umsg, uaddr, flags, 0);
2631out:
2632 kfree(iov);
2633 return err;
aa1fa28f
JA
2634}
2635
e1834a32
DB
2636long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2637 bool forbid_cmsg_compat)
a2e27255
ACM
2638{
2639 int fput_needed, err;
2640 struct msghdr msg_sys;
1be374a0
AL
2641 struct socket *sock;
2642
e1834a32
DB
2643 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2644 return -EINVAL;
2645
1be374a0 2646 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2647 if (!sock)
2648 goto out;
2649
a7526eb5 2650 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2651
6cb153ca 2652 fput_light(sock->file, fput_needed);
1da177e4
LT
2653out:
2654 return err;
2655}
2656
666547ff 2657SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2658 unsigned int, flags)
2659{
e1834a32 2660 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2661}
2662
a2e27255
ACM
2663/*
2664 * Linux recvmmsg interface
2665 */
2666
e11d4284
AB
2667static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2668 unsigned int vlen, unsigned int flags,
2669 struct timespec64 *timeout)
a2e27255
ACM
2670{
2671 int fput_needed, err, datagrams;
2672 struct socket *sock;
2673 struct mmsghdr __user *entry;
d7256d0e 2674 struct compat_mmsghdr __user *compat_entry;
a2e27255 2675 struct msghdr msg_sys;
766b9f92
DD
2676 struct timespec64 end_time;
2677 struct timespec64 timeout64;
a2e27255
ACM
2678
2679 if (timeout &&
2680 poll_select_set_timeout(&end_time, timeout->tv_sec,
2681 timeout->tv_nsec))
2682 return -EINVAL;
2683
2684 datagrams = 0;
2685
2686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2687 if (!sock)
2688 return err;
2689
7797dc41
SHY
2690 if (likely(!(flags & MSG_ERRQUEUE))) {
2691 err = sock_error(sock->sk);
2692 if (err) {
2693 datagrams = err;
2694 goto out_put;
2695 }
e623a9e9 2696 }
a2e27255
ACM
2697
2698 entry = mmsg;
d7256d0e 2699 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2700
2701 while (datagrams < vlen) {
2702 /*
2703 * No need to ask LSM for more than the first datagram.
2704 */
d7256d0e 2705 if (MSG_CMSG_COMPAT & flags) {
666547ff 2706 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2707 &msg_sys, flags & ~MSG_WAITFORONE,
2708 datagrams);
d7256d0e
JMG
2709 if (err < 0)
2710 break;
2711 err = __put_user(err, &compat_entry->msg_len);
2712 ++compat_entry;
2713 } else {
a7526eb5 2714 err = ___sys_recvmsg(sock,
666547ff 2715 (struct user_msghdr __user *)entry,
a7526eb5
AL
2716 &msg_sys, flags & ~MSG_WAITFORONE,
2717 datagrams);
d7256d0e
JMG
2718 if (err < 0)
2719 break;
2720 err = put_user(err, &entry->msg_len);
2721 ++entry;
2722 }
2723
a2e27255
ACM
2724 if (err)
2725 break;
a2e27255
ACM
2726 ++datagrams;
2727
71c5c159
BB
2728 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2729 if (flags & MSG_WAITFORONE)
2730 flags |= MSG_DONTWAIT;
2731
a2e27255 2732 if (timeout) {
766b9f92 2733 ktime_get_ts64(&timeout64);
c2e6c856 2734 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2735 if (timeout->tv_sec < 0) {
2736 timeout->tv_sec = timeout->tv_nsec = 0;
2737 break;
2738 }
2739
2740 /* Timeout, return less than vlen datagrams */
2741 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2742 break;
2743 }
2744
2745 /* Out of band data, return right away */
2746 if (msg_sys.msg_flags & MSG_OOB)
2747 break;
a78cb84c 2748 cond_resched();
a2e27255
ACM
2749 }
2750
a2e27255 2751 if (err == 0)
34b88a68
ACM
2752 goto out_put;
2753
2754 if (datagrams == 0) {
2755 datagrams = err;
2756 goto out_put;
2757 }
a2e27255 2758
34b88a68
ACM
2759 /*
2760 * We may return less entries than requested (vlen) if the
2761 * sock is non block and there aren't enough datagrams...
2762 */
2763 if (err != -EAGAIN) {
a2e27255 2764 /*
34b88a68
ACM
2765 * ... or if recvmsg returns an error after we
2766 * received some datagrams, where we record the
2767 * error to return on the next call or if the
2768 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2769 */
34b88a68 2770 sock->sk->sk_err = -err;
a2e27255 2771 }
34b88a68
ACM
2772out_put:
2773 fput_light(sock->file, fput_needed);
a2e27255 2774
34b88a68 2775 return datagrams;
a2e27255
ACM
2776}
2777
e11d4284
AB
2778int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2779 unsigned int vlen, unsigned int flags,
2780 struct __kernel_timespec __user *timeout,
2781 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2782{
2783 int datagrams;
c2e6c856 2784 struct timespec64 timeout_sys;
a2e27255 2785
e11d4284
AB
2786 if (timeout && get_timespec64(&timeout_sys, timeout))
2787 return -EFAULT;
a2e27255 2788
e11d4284 2789 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2790 return -EFAULT;
2791
e11d4284
AB
2792 if (!timeout && !timeout32)
2793 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2794
2795 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2796
e11d4284
AB
2797 if (datagrams <= 0)
2798 return datagrams;
2799
2800 if (timeout && put_timespec64(&timeout_sys, timeout))
2801 datagrams = -EFAULT;
2802
2803 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2804 datagrams = -EFAULT;
2805
2806 return datagrams;
2807}
2808
1255e269
DB
2809SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2810 unsigned int, vlen, unsigned int, flags,
c2e6c856 2811 struct __kernel_timespec __user *, timeout)
1255e269 2812{
e11d4284
AB
2813 if (flags & MSG_CMSG_COMPAT)
2814 return -EINVAL;
2815
2816 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2817}
2818
2819#ifdef CONFIG_COMPAT_32BIT_TIME
2820SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2821 unsigned int, vlen, unsigned int, flags,
2822 struct old_timespec32 __user *, timeout)
2823{
2824 if (flags & MSG_CMSG_COMPAT)
2825 return -EINVAL;
2826
2827 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2828}
e11d4284 2829#endif
1255e269 2830
a2e27255 2831#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2832/* Argument list sizes for sys_socketcall */
2833#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2834static const unsigned char nargs[21] = {
c6d409cf
ED
2835 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2836 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2837 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2838 AL(4), AL(5), AL(4)
89bddce5
SH
2839};
2840
1da177e4
LT
2841#undef AL
2842
2843/*
89bddce5 2844 * System call vectors.
1da177e4
LT
2845 *
2846 * Argument checking cleaned up. Saved 20% in size.
2847 * This function doesn't need to set the kernel lock because
89bddce5 2848 * it is set by the callees.
1da177e4
LT
2849 */
2850
3e0fa65f 2851SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2852{
2950fa9d 2853 unsigned long a[AUDITSC_ARGS];
89bddce5 2854 unsigned long a0, a1;
1da177e4 2855 int err;
47379052 2856 unsigned int len;
1da177e4 2857
228e548e 2858 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2859 return -EINVAL;
c8e8cd57 2860 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2861
47379052
AV
2862 len = nargs[call];
2863 if (len > sizeof(a))
2864 return -EINVAL;
2865
1da177e4 2866 /* copy_from_user should be SMP safe. */
47379052 2867 if (copy_from_user(a, args, len))
1da177e4 2868 return -EFAULT;
3ec3b2fb 2869
2950fa9d
CG
2870 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2871 if (err)
2872 return err;
3ec3b2fb 2873
89bddce5
SH
2874 a0 = a[0];
2875 a1 = a[1];
2876
2877 switch (call) {
2878 case SYS_SOCKET:
9d6a15c3 2879 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2880 break;
2881 case SYS_BIND:
a87d35d8 2882 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2883 break;
2884 case SYS_CONNECT:
1387c2c2 2885 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2886 break;
2887 case SYS_LISTEN:
25e290ee 2888 err = __sys_listen(a0, a1);
89bddce5
SH
2889 break;
2890 case SYS_ACCEPT:
4541e805
DB
2891 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2892 (int __user *)a[2], 0);
89bddce5
SH
2893 break;
2894 case SYS_GETSOCKNAME:
2895 err =
8882a107
DB
2896 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2897 (int __user *)a[2]);
89bddce5
SH
2898 break;
2899 case SYS_GETPEERNAME:
2900 err =
b21c8f83
DB
2901 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2902 (int __user *)a[2]);
89bddce5
SH
2903 break;
2904 case SYS_SOCKETPAIR:
6debc8d8 2905 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2906 break;
2907 case SYS_SEND:
f3bf896b
DB
2908 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2909 NULL, 0);
89bddce5
SH
2910 break;
2911 case SYS_SENDTO:
211b634b
DB
2912 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2913 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2914 break;
2915 case SYS_RECV:
d27e9afc
DB
2916 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2917 NULL, NULL);
89bddce5
SH
2918 break;
2919 case SYS_RECVFROM:
7a09e1eb
DB
2920 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2921 (struct sockaddr __user *)a[4],
2922 (int __user *)a[5]);
89bddce5
SH
2923 break;
2924 case SYS_SHUTDOWN:
005a1aea 2925 err = __sys_shutdown(a0, a1);
89bddce5
SH
2926 break;
2927 case SYS_SETSOCKOPT:
cc36dca0
DB
2928 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2929 a[4]);
89bddce5
SH
2930 break;
2931 case SYS_GETSOCKOPT:
2932 err =
13a2d70e
DB
2933 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2934 (int __user *)a[4]);
89bddce5
SH
2935 break;
2936 case SYS_SENDMSG:
e1834a32
DB
2937 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2938 a[2], true);
89bddce5 2939 break;
228e548e 2940 case SYS_SENDMMSG:
e1834a32
DB
2941 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2942 a[3], true);
228e548e 2943 break;
89bddce5 2944 case SYS_RECVMSG:
e1834a32
DB
2945 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2946 a[2], true);
89bddce5 2947 break;
a2e27255 2948 case SYS_RECVMMSG:
3ca47e95 2949 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2950 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2951 a[2], a[3],
2952 (struct __kernel_timespec __user *)a[4],
2953 NULL);
2954 else
2955 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2956 a[2], a[3], NULL,
2957 (struct old_timespec32 __user *)a[4]);
a2e27255 2958 break;
de11defe 2959 case SYS_ACCEPT4:
4541e805
DB
2960 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2961 (int __user *)a[2], a[3]);
aaca0bdc 2962 break;
89bddce5
SH
2963 default:
2964 err = -EINVAL;
2965 break;
1da177e4
LT
2966 }
2967 return err;
2968}
2969
89bddce5 2970#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2971
55737fda
SH
2972/**
2973 * sock_register - add a socket protocol handler
2974 * @ops: description of protocol
2975 *
1da177e4
LT
2976 * This function is called by a protocol handler that wants to
2977 * advertise its address family, and have it linked into the
e793c0f7 2978 * socket interface. The value ops->family corresponds to the
55737fda 2979 * socket system call protocol family.
1da177e4 2980 */
f0fd27d4 2981int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2982{
2983 int err;
2984
2985 if (ops->family >= NPROTO) {
3410f22e 2986 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2987 return -ENOBUFS;
2988 }
55737fda
SH
2989
2990 spin_lock(&net_family_lock);
190683a9
ED
2991 if (rcu_dereference_protected(net_families[ops->family],
2992 lockdep_is_held(&net_family_lock)))
55737fda
SH
2993 err = -EEXIST;
2994 else {
cf778b00 2995 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2996 err = 0;
2997 }
55737fda
SH
2998 spin_unlock(&net_family_lock);
2999
3410f22e 3000 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
3001 return err;
3002}
c6d409cf 3003EXPORT_SYMBOL(sock_register);
1da177e4 3004
55737fda
SH
3005/**
3006 * sock_unregister - remove a protocol handler
3007 * @family: protocol family to remove
3008 *
1da177e4
LT
3009 * This function is called by a protocol handler that wants to
3010 * remove its address family, and have it unlinked from the
55737fda
SH
3011 * new socket creation.
3012 *
3013 * If protocol handler is a module, then it can use module reference
3014 * counts to protect against new references. If protocol handler is not
3015 * a module then it needs to provide its own protection in
3016 * the ops->create routine.
1da177e4 3017 */
f0fd27d4 3018void sock_unregister(int family)
1da177e4 3019{
f0fd27d4 3020 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3021
55737fda 3022 spin_lock(&net_family_lock);
a9b3cd7f 3023 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3024 spin_unlock(&net_family_lock);
3025
3026 synchronize_rcu();
3027
3410f22e 3028 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3029}
c6d409cf 3030EXPORT_SYMBOL(sock_unregister);
1da177e4 3031
bf2ae2e4
XL
3032bool sock_is_registered(int family)
3033{
66b51b0a 3034 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3035}
3036
77d76ea3 3037static int __init sock_init(void)
1da177e4 3038{
b3e19d92 3039 int err;
2ca794e5
EB
3040 /*
3041 * Initialize the network sysctl infrastructure.
3042 */
3043 err = net_sysctl_init();
3044 if (err)
3045 goto out;
b3e19d92 3046
1da177e4 3047 /*
89bddce5 3048 * Initialize skbuff SLAB cache
1da177e4
LT
3049 */
3050 skb_init();
1da177e4
LT
3051
3052 /*
89bddce5 3053 * Initialize the protocols module.
1da177e4
LT
3054 */
3055
3056 init_inodecache();
b3e19d92
NP
3057
3058 err = register_filesystem(&sock_fs_type);
3059 if (err)
3060 goto out_fs;
1da177e4 3061 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3062 if (IS_ERR(sock_mnt)) {
3063 err = PTR_ERR(sock_mnt);
3064 goto out_mount;
3065 }
77d76ea3
AK
3066
3067 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3068 */
3069
3070#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3071 err = netfilter_init();
3072 if (err)
3073 goto out;
1da177e4 3074#endif
cbeb321a 3075
408eccce 3076 ptp_classifier_init();
c1f19b51 3077
b3e19d92
NP
3078out:
3079 return err;
3080
3081out_mount:
3082 unregister_filesystem(&sock_fs_type);
3083out_fs:
3084 goto out;
1da177e4
LT
3085}
3086
77d76ea3
AK
3087core_initcall(sock_init); /* early initcall */
3088
1da177e4
LT
3089#ifdef CONFIG_PROC_FS
3090void socket_seq_show(struct seq_file *seq)
3091{
648845ab
TZ
3092 seq_printf(seq, "sockets: used %d\n",
3093 sock_inuse_get(seq->private));
1da177e4 3094}
89bddce5 3095#endif /* CONFIG_PROC_FS */
1da177e4 3096
89bbfc95 3097#ifdef CONFIG_COMPAT
36fd633e 3098static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3099{
6b96018b 3100 struct compat_ifconf ifc32;
7a229387 3101 struct ifconf ifc;
7a229387
AB
3102 int err;
3103
6b96018b 3104 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3105 return -EFAULT;
3106
36fd633e
AV
3107 ifc.ifc_len = ifc32.ifc_len;
3108 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3109
36fd633e
AV
3110 rtnl_lock();
3111 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3112 rtnl_unlock();
7a229387
AB
3113 if (err)
3114 return err;
3115
36fd633e 3116 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3117 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3118 return -EFAULT;
3119
3120 return 0;
3121}
3122
6b96018b 3123static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3124{
3a7da39d
BH
3125 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3126 bool convert_in = false, convert_out = false;
44c02a2c
AV
3127 size_t buf_size = 0;
3128 struct ethtool_rxnfc __user *rxnfc = NULL;
3129 struct ifreq ifr;
3a7da39d
BH
3130 u32 rule_cnt = 0, actual_rule_cnt;
3131 u32 ethcmd;
7a229387 3132 u32 data;
3a7da39d 3133 int ret;
7a229387 3134
3a7da39d
BH
3135 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3136 return -EFAULT;
7a229387 3137
3a7da39d
BH
3138 compat_rxnfc = compat_ptr(data);
3139
3140 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3141 return -EFAULT;
3142
3a7da39d
BH
3143 /* Most ethtool structures are defined without padding.
3144 * Unfortunately struct ethtool_rxnfc is an exception.
3145 */
3146 switch (ethcmd) {
3147 default:
3148 break;
3149 case ETHTOOL_GRXCLSRLALL:
3150 /* Buffer size is variable */
3151 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3152 return -EFAULT;
3153 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3154 return -ENOMEM;
3155 buf_size += rule_cnt * sizeof(u32);
3156 /* fall through */
3157 case ETHTOOL_GRXRINGS:
3158 case ETHTOOL_GRXCLSRLCNT:
3159 case ETHTOOL_GRXCLSRULE:
55664f32 3160 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3161 convert_out = true;
3162 /* fall through */
3163 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3164 buf_size += sizeof(struct ethtool_rxnfc);
3165 convert_in = true;
44c02a2c 3166 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3167 break;
3168 }
3169
44c02a2c 3170 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3171 return -EFAULT;
3172
44c02a2c 3173 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3174
3a7da39d 3175 if (convert_in) {
127fe533 3176 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3177 * fs.ring_cookie and at the end of fs, but nowhere else.
3178 */
127fe533
AD
3179 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3180 sizeof(compat_rxnfc->fs.m_ext) !=
3181 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3182 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3183 BUILD_BUG_ON(
3184 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3185 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3186 offsetof(struct ethtool_rxnfc, fs.location) -
3187 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3188
3189 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3190 (void __user *)(&rxnfc->fs.m_ext + 1) -
3191 (void __user *)rxnfc) ||
3a7da39d
BH
3192 copy_in_user(&rxnfc->fs.ring_cookie,
3193 &compat_rxnfc->fs.ring_cookie,
954b1244 3194 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3195 (void __user *)&rxnfc->fs.ring_cookie))
3196 return -EFAULT;
3197 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3198 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3199 return -EFAULT;
3200 } else if (copy_in_user(&rxnfc->rule_cnt,
3201 &compat_rxnfc->rule_cnt,
3202 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3203 return -EFAULT;
3204 }
3205
44c02a2c 3206 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3207 if (ret)
3208 return ret;
3209
3210 if (convert_out) {
3211 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3212 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3213 (const void __user *)rxnfc) ||
3a7da39d
BH
3214 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3215 &rxnfc->fs.ring_cookie,
954b1244
SH
3216 (const void __user *)(&rxnfc->fs.location + 1) -
3217 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3218 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3219 sizeof(rxnfc->rule_cnt)))
3220 return -EFAULT;
3221
3222 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3223 /* As an optimisation, we only copy the actual
3224 * number of rules that the underlying
3225 * function returned. Since Mallory might
3226 * change the rule count in user memory, we
3227 * check that it is less than the rule count
3228 * originally given (as the user buffer size),
3229 * which has been range-checked.
3230 */
3231 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3232 return -EFAULT;
3233 if (actual_rule_cnt < rule_cnt)
3234 rule_cnt = actual_rule_cnt;
3235 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3236 &rxnfc->rule_locs[0],
3237 rule_cnt * sizeof(u32)))
3238 return -EFAULT;
3239 }
3240 }
3241
3242 return 0;
7a229387
AB
3243}
3244
7a50a240
AB
3245static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3246{
7a50a240 3247 compat_uptr_t uptr32;
44c02a2c
AV
3248 struct ifreq ifr;
3249 void __user *saved;
3250 int err;
7a50a240 3251
44c02a2c 3252 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3253 return -EFAULT;
3254
3255 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3256 return -EFAULT;
3257
44c02a2c
AV
3258 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3259 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3260
44c02a2c
AV
3261 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3262 if (!err) {
3263 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3264 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3265 err = -EFAULT;
ccbd6a5a 3266 }
44c02a2c 3267 return err;
7a229387
AB
3268}
3269
590d4693
BH
3270/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3271static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3272 struct compat_ifreq __user *u_ifreq32)
7a229387 3273{
44c02a2c 3274 struct ifreq ifreq;
7a229387
AB
3275 u32 data32;
3276
44c02a2c 3277 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3278 return -EFAULT;
44c02a2c 3279 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3280 return -EFAULT;
44c02a2c 3281 ifreq.ifr_data = compat_ptr(data32);
7a229387 3282
44c02a2c 3283 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3284}
3285
37ac39bd
JB
3286static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3287 unsigned int cmd,
3288 struct compat_ifreq __user *uifr32)
3289{
3290 struct ifreq __user *uifr;
3291 int err;
3292
3293 /* Handle the fact that while struct ifreq has the same *layout* on
3294 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3295 * which are handled elsewhere, it still has different *size* due to
3296 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3297 * resulting in struct ifreq being 32 and 40 bytes respectively).
3298 * As a result, if the struct happens to be at the end of a page and
3299 * the next page isn't readable/writable, we get a fault. To prevent
3300 * that, copy back and forth to the full size.
3301 */
3302
3303 uifr = compat_alloc_user_space(sizeof(*uifr));
3304 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3305 return -EFAULT;
3306
3307 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3308
3309 if (!err) {
3310 switch (cmd) {
3311 case SIOCGIFFLAGS:
3312 case SIOCGIFMETRIC:
3313 case SIOCGIFMTU:
3314 case SIOCGIFMEM:
3315 case SIOCGIFHWADDR:
3316 case SIOCGIFINDEX:
3317 case SIOCGIFADDR:
3318 case SIOCGIFBRDADDR:
3319 case SIOCGIFDSTADDR:
3320 case SIOCGIFNETMASK:
3321 case SIOCGIFPFLAGS:
3322 case SIOCGIFTXQLEN:
3323 case SIOCGMIIPHY:
3324 case SIOCGMIIREG:
c6c9fee3 3325 case SIOCGIFNAME:
37ac39bd
JB
3326 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3327 err = -EFAULT;
3328 break;
3329 }
3330 }
3331 return err;
3332}
3333
a2116ed2
AB
3334static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3335 struct compat_ifreq __user *uifr32)
3336{
3337 struct ifreq ifr;
3338 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3339 int err;
3340
3341 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3342 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3343 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3344 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3345 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3346 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3347 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3348 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3349 if (err)
3350 return -EFAULT;
3351
44c02a2c 3352 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3353
3354 if (cmd == SIOCGIFMAP && !err) {
3355 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3356 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3357 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3358 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3359 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3360 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3361 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3362 if (err)
3363 err = -EFAULT;
3364 }
3365 return err;
3366}
3367
7a229387 3368struct rtentry32 {
c6d409cf 3369 u32 rt_pad1;
7a229387
AB
3370 struct sockaddr rt_dst; /* target address */
3371 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3372 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3373 unsigned short rt_flags;
3374 short rt_pad2;
3375 u32 rt_pad3;
3376 unsigned char rt_tos;
3377 unsigned char rt_class;
3378 short rt_pad4;
3379 short rt_metric; /* +1 for binary compatibility! */
7a229387 3380 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3381 u32 rt_mtu; /* per route MTU/Window */
3382 u32 rt_window; /* Window clamping */
7a229387
AB
3383 unsigned short rt_irtt; /* Initial RTT */
3384};
3385
3386struct in6_rtmsg32 {
3387 struct in6_addr rtmsg_dst;
3388 struct in6_addr rtmsg_src;
3389 struct in6_addr rtmsg_gateway;
3390 u32 rtmsg_type;
3391 u16 rtmsg_dst_len;
3392 u16 rtmsg_src_len;
3393 u32 rtmsg_metric;
3394 u32 rtmsg_info;
3395 u32 rtmsg_flags;
3396 s32 rtmsg_ifindex;
3397};
3398
6b96018b
AB
3399static int routing_ioctl(struct net *net, struct socket *sock,
3400 unsigned int cmd, void __user *argp)
7a229387
AB
3401{
3402 int ret;
3403 void *r = NULL;
3404 struct in6_rtmsg r6;
3405 struct rtentry r4;
3406 char devname[16];
3407 u32 rtdev;
3408 mm_segment_t old_fs = get_fs();
3409
6b96018b
AB
3410 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3411 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3412 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3413 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3414 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3415 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3416 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3417 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3418 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3419 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3420 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3421
3422 r = (void *) &r6;
3423 } else { /* ipv4 */
6b96018b 3424 struct rtentry32 __user *ur4 = argp;
c6d409cf 3425 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3426 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3427 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3428 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3429 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3430 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3431 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3432 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3433 if (rtdev) {
c6d409cf 3434 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3435 r4.rt_dev = (char __user __force *)devname;
3436 devname[15] = 0;
7a229387
AB
3437 } else
3438 r4.rt_dev = NULL;
3439
3440 r = (void *) &r4;
3441 }
3442
3443 if (ret) {
3444 ret = -EFAULT;
3445 goto out;
3446 }
3447
c6d409cf 3448 set_fs(KERNEL_DS);
63ff03ab 3449 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3450 set_fs(old_fs);
7a229387
AB
3451
3452out:
7a229387
AB
3453 return ret;
3454}
3455
3456/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3457 * for some operations; this forces use of the newer bridge-utils that
25985edc 3458 * use compatible ioctls
7a229387 3459 */
6b96018b 3460static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3461{
6b96018b 3462 compat_ulong_t tmp;
7a229387 3463
6b96018b 3464 if (get_user(tmp, argp))
7a229387
AB
3465 return -EFAULT;
3466 if (tmp == BRCTL_GET_VERSION)
3467 return BRCTL_VERSION + 1;
3468 return -EINVAL;
3469}
3470
6b96018b
AB
3471static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3472 unsigned int cmd, unsigned long arg)
3473{
3474 void __user *argp = compat_ptr(arg);
3475 struct sock *sk = sock->sk;
3476 struct net *net = sock_net(sk);
7a229387 3477
6b96018b 3478 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3479 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3480
3481 switch (cmd) {
3482 case SIOCSIFBR:
3483 case SIOCGIFBR:
3484 return old_bridge_ioctl(argp);
6b96018b 3485 case SIOCGIFCONF:
36fd633e 3486 return compat_dev_ifconf(net, argp);
6b96018b
AB
3487 case SIOCETHTOOL:
3488 return ethtool_ioctl(net, argp);
7a50a240
AB
3489 case SIOCWANDEV:
3490 return compat_siocwandev(net, argp);
a2116ed2
AB
3491 case SIOCGIFMAP:
3492 case SIOCSIFMAP:
3493 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3494 case SIOCADDRT:
3495 case SIOCDELRT:
3496 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3497 case SIOCGSTAMP_OLD:
3498 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3499 if (!sock->ops->gettstamp)
3500 return -ENOIOCTLCMD;
0768e170 3501 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3502 !COMPAT_USE_64BIT_TIME);
3503
590d4693
BH
3504 case SIOCBONDSLAVEINFOQUERY:
3505 case SIOCBONDINFOQUERY:
a2116ed2 3506 case SIOCSHWTSTAMP:
fd468c74 3507 case SIOCGHWTSTAMP:
590d4693 3508 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3509
3510 case FIOSETOWN:
3511 case SIOCSPGRP:
3512 case FIOGETOWN:
3513 case SIOCGPGRP:
3514 case SIOCBRADDBR:
3515 case SIOCBRDELBR:
3516 case SIOCGIFVLAN:
3517 case SIOCSIFVLAN:
3518 case SIOCADDDLCI:
3519 case SIOCDELDLCI:
c62cce2c 3520 case SIOCGSKNS:
0768e170
AB
3521 case SIOCGSTAMP_NEW:
3522 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3523 return sock_ioctl(file, cmd, arg);
3524
3525 case SIOCGIFFLAGS:
3526 case SIOCSIFFLAGS:
3527 case SIOCGIFMETRIC:
3528 case SIOCSIFMETRIC:
3529 case SIOCGIFMTU:
3530 case SIOCSIFMTU:
3531 case SIOCGIFMEM:
3532 case SIOCSIFMEM:
3533 case SIOCGIFHWADDR:
3534 case SIOCSIFHWADDR:
3535 case SIOCADDMULTI:
3536 case SIOCDELMULTI:
3537 case SIOCGIFINDEX:
6b96018b
AB
3538 case SIOCGIFADDR:
3539 case SIOCSIFADDR:
3540 case SIOCSIFHWBROADCAST:
6b96018b 3541 case SIOCDIFADDR:
6b96018b
AB
3542 case SIOCGIFBRDADDR:
3543 case SIOCSIFBRDADDR:
3544 case SIOCGIFDSTADDR:
3545 case SIOCSIFDSTADDR:
3546 case SIOCGIFNETMASK:
3547 case SIOCSIFNETMASK:
3548 case SIOCSIFPFLAGS:
3549 case SIOCGIFPFLAGS:
3550 case SIOCGIFTXQLEN:
3551 case SIOCSIFTXQLEN:
3552 case SIOCBRADDIF:
3553 case SIOCBRDELIF:
c6c9fee3 3554 case SIOCGIFNAME:
9177efd3
AB
3555 case SIOCSIFNAME:
3556 case SIOCGMIIPHY:
3557 case SIOCGMIIREG:
3558 case SIOCSMIIREG:
f92d4fc9
AV
3559 case SIOCBONDENSLAVE:
3560 case SIOCBONDRELEASE:
3561 case SIOCBONDSETHWADDR:
3562 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3563 return compat_ifreq_ioctl(net, sock, cmd, argp);
3564
6b96018b
AB
3565 case SIOCSARP:
3566 case SIOCGARP:
3567 case SIOCDARP:
c7dc504e 3568 case SIOCOUTQ:
9d7bf41f 3569 case SIOCOUTQNSD:
6b96018b 3570 case SIOCATMARK:
63ff03ab 3571 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3572 }
3573
6b96018b
AB
3574 return -ENOIOCTLCMD;
3575}
7a229387 3576
95c96174 3577static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3578 unsigned long arg)
89bbfc95
SP
3579{
3580 struct socket *sock = file->private_data;
3581 int ret = -ENOIOCTLCMD;
87de87d5
DM
3582 struct sock *sk;
3583 struct net *net;
3584
3585 sk = sock->sk;
3586 net = sock_net(sk);
89bbfc95
SP
3587
3588 if (sock->ops->compat_ioctl)
3589 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3590
87de87d5
DM
3591 if (ret == -ENOIOCTLCMD &&
3592 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3593 ret = compat_wext_handle_ioctl(net, cmd, arg);
3594
6b96018b
AB
3595 if (ret == -ENOIOCTLCMD)
3596 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3597
89bbfc95
SP
3598 return ret;
3599}
3600#endif
3601
8a3c245c
PT
3602/**
3603 * kernel_bind - bind an address to a socket (kernel space)
3604 * @sock: socket
3605 * @addr: address
3606 * @addrlen: length of address
3607 *
3608 * Returns 0 or an error.
3609 */
3610
ac5a488e
SS
3611int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3612{
3613 return sock->ops->bind(sock, addr, addrlen);
3614}
c6d409cf 3615EXPORT_SYMBOL(kernel_bind);
ac5a488e 3616
8a3c245c
PT
3617/**
3618 * kernel_listen - move socket to listening state (kernel space)
3619 * @sock: socket
3620 * @backlog: pending connections queue size
3621 *
3622 * Returns 0 or an error.
3623 */
3624
ac5a488e
SS
3625int kernel_listen(struct socket *sock, int backlog)
3626{
3627 return sock->ops->listen(sock, backlog);
3628}
c6d409cf 3629EXPORT_SYMBOL(kernel_listen);
ac5a488e 3630
8a3c245c
PT
3631/**
3632 * kernel_accept - accept a connection (kernel space)
3633 * @sock: listening socket
3634 * @newsock: new connected socket
3635 * @flags: flags
3636 *
3637 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3638 * If it fails, @newsock is guaranteed to be %NULL.
3639 * Returns 0 or an error.
3640 */
3641
ac5a488e
SS
3642int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3643{
3644 struct sock *sk = sock->sk;
3645 int err;
3646
3647 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3648 newsock);
3649 if (err < 0)
3650 goto done;
3651
cdfbabfb 3652 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3653 if (err < 0) {
3654 sock_release(*newsock);
fa8705b0 3655 *newsock = NULL;
ac5a488e
SS
3656 goto done;
3657 }
3658
3659 (*newsock)->ops = sock->ops;
1b08534e 3660 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3661
3662done:
3663 return err;
3664}
c6d409cf 3665EXPORT_SYMBOL(kernel_accept);
ac5a488e 3666
8a3c245c
PT
3667/**
3668 * kernel_connect - connect a socket (kernel space)
3669 * @sock: socket
3670 * @addr: address
3671 * @addrlen: address length
3672 * @flags: flags (O_NONBLOCK, ...)
3673 *
3674 * For datagram sockets, @addr is the addres to which datagrams are sent
3675 * by default, and the only address from which datagrams are received.
3676 * For stream sockets, attempts to connect to @addr.
3677 * Returns 0 or an error code.
3678 */
3679
ac5a488e 3680int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3681 int flags)
ac5a488e
SS
3682{
3683 return sock->ops->connect(sock, addr, addrlen, flags);
3684}
c6d409cf 3685EXPORT_SYMBOL(kernel_connect);
ac5a488e 3686
8a3c245c
PT
3687/**
3688 * kernel_getsockname - get the address which the socket is bound (kernel space)
3689 * @sock: socket
3690 * @addr: address holder
3691 *
3692 * Fills the @addr pointer with the address which the socket is bound.
3693 * Returns 0 or an error code.
3694 */
3695
9b2c45d4 3696int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3697{
9b2c45d4 3698 return sock->ops->getname(sock, addr, 0);
ac5a488e 3699}
c6d409cf 3700EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3701
8a3c245c
PT
3702/**
3703 * kernel_peername - get the address which the socket is connected (kernel space)
3704 * @sock: socket
3705 * @addr: address holder
3706 *
3707 * Fills the @addr pointer with the address which the socket is connected.
3708 * Returns 0 or an error code.
3709 */
3710
9b2c45d4 3711int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3712{
9b2c45d4 3713 return sock->ops->getname(sock, addr, 1);
ac5a488e 3714}
c6d409cf 3715EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3716
8a3c245c
PT
3717/**
3718 * kernel_getsockopt - get a socket option (kernel space)
3719 * @sock: socket
3720 * @level: API level (SOL_SOCKET, ...)
3721 * @optname: option tag
3722 * @optval: option value
3723 * @optlen: option length
3724 *
3725 * Assigns the option length to @optlen.
3726 * Returns 0 or an error.
3727 */
3728
ac5a488e
SS
3729int kernel_getsockopt(struct socket *sock, int level, int optname,
3730 char *optval, int *optlen)
3731{
3732 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3733 char __user *uoptval;
3734 int __user *uoptlen;
ac5a488e
SS
3735 int err;
3736
fb8621bb
NK
3737 uoptval = (char __user __force *) optval;
3738 uoptlen = (int __user __force *) optlen;
3739
ac5a488e
SS
3740 set_fs(KERNEL_DS);
3741 if (level == SOL_SOCKET)
fb8621bb 3742 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3743 else
fb8621bb
NK
3744 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3745 uoptlen);
ac5a488e
SS
3746 set_fs(oldfs);
3747 return err;
3748}
c6d409cf 3749EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3750
8a3c245c
PT
3751/**
3752 * kernel_setsockopt - set a socket option (kernel space)
3753 * @sock: socket
3754 * @level: API level (SOL_SOCKET, ...)
3755 * @optname: option tag
3756 * @optval: option value
3757 * @optlen: option length
3758 *
3759 * Returns 0 or an error.
3760 */
3761
ac5a488e 3762int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3763 char *optval, unsigned int optlen)
ac5a488e
SS
3764{
3765 mm_segment_t oldfs = get_fs();
fb8621bb 3766 char __user *uoptval;
ac5a488e
SS
3767 int err;
3768
fb8621bb
NK
3769 uoptval = (char __user __force *) optval;
3770
ac5a488e
SS
3771 set_fs(KERNEL_DS);
3772 if (level == SOL_SOCKET)
fb8621bb 3773 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3774 else
fb8621bb 3775 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3776 optlen);
3777 set_fs(oldfs);
3778 return err;
3779}
c6d409cf 3780EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3781
8a3c245c
PT
3782/**
3783 * kernel_sendpage - send a &page through a socket (kernel space)
3784 * @sock: socket
3785 * @page: page
3786 * @offset: page offset
3787 * @size: total size in bytes
3788 * @flags: flags (MSG_DONTWAIT, ...)
3789 *
3790 * Returns the total amount sent in bytes or an error.
3791 */
3792
ac5a488e
SS
3793int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3794 size_t size, int flags)
3795{
3796 if (sock->ops->sendpage)
3797 return sock->ops->sendpage(sock, page, offset, size, flags);
3798
3799 return sock_no_sendpage(sock, page, offset, size, flags);
3800}
c6d409cf 3801EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3802
8a3c245c
PT
3803/**
3804 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3805 * @sk: sock
3806 * @page: page
3807 * @offset: page offset
3808 * @size: total size in bytes
3809 * @flags: flags (MSG_DONTWAIT, ...)
3810 *
3811 * Returns the total amount sent in bytes or an error.
3812 * Caller must hold @sk.
3813 */
3814
306b13eb
TH
3815int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3816 size_t size, int flags)
3817{
3818 struct socket *sock = sk->sk_socket;
3819
3820 if (sock->ops->sendpage_locked)
3821 return sock->ops->sendpage_locked(sk, page, offset, size,
3822 flags);
3823
3824 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3825}
3826EXPORT_SYMBOL(kernel_sendpage_locked);
3827
8a3c245c
PT
3828/**
3829 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3830 * @sock: socket
3831 * @how: connection part
3832 *
3833 * Returns 0 or an error.
3834 */
3835
91cf45f0
TM
3836int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3837{
3838 return sock->ops->shutdown(sock, how);
3839}
91cf45f0 3840EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3841
8a3c245c
PT
3842/**
3843 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3844 * @sk: socket
3845 *
3846 * This routine returns the IP overhead imposed by a socket i.e.
3847 * the length of the underlying IP header, depending on whether
3848 * this is an IPv4 or IPv6 socket and the length from IP options turned
3849 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3850 */
8a3c245c 3851
113c3075
P
3852u32 kernel_sock_ip_overhead(struct sock *sk)
3853{
3854 struct inet_sock *inet;
3855 struct ip_options_rcu *opt;
3856 u32 overhead = 0;
113c3075
P
3857#if IS_ENABLED(CONFIG_IPV6)
3858 struct ipv6_pinfo *np;
3859 struct ipv6_txoptions *optv6 = NULL;
3860#endif /* IS_ENABLED(CONFIG_IPV6) */
3861
3862 if (!sk)
3863 return overhead;
3864
113c3075
P
3865 switch (sk->sk_family) {
3866 case AF_INET:
3867 inet = inet_sk(sk);
3868 overhead += sizeof(struct iphdr);
3869 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3870 sock_owned_by_user(sk));
113c3075
P
3871 if (opt)
3872 overhead += opt->opt.optlen;
3873 return overhead;
3874#if IS_ENABLED(CONFIG_IPV6)
3875 case AF_INET6:
3876 np = inet6_sk(sk);
3877 overhead += sizeof(struct ipv6hdr);
3878 if (np)
3879 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3880 sock_owned_by_user(sk));
113c3075
P
3881 if (optv6)
3882 overhead += (optv6->opt_flen + optv6->opt_nflen);
3883 return overhead;
3884#endif /* IS_ENABLED(CONFIG_IPV6) */
3885 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3886 return overhead;
3887 }
3888}
3889EXPORT_SYMBOL(kernel_sock_ip_overhead);