]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
Merge branch 'vsock-add-local-transport-support'
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
1da177e4 131
1da177e4
LT
132/*
133 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
134 * in the operation structures but are done directly via the socketcall() multiplexor.
135 */
136
da7071d7 137static const struct file_operations socket_file_ops = {
1da177e4
LT
138 .owner = THIS_MODULE,
139 .llseek = no_llseek,
8ae5e030
AV
140 .read_iter = sock_read_iter,
141 .write_iter = sock_write_iter,
1da177e4
LT
142 .poll = sock_poll,
143 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
144#ifdef CONFIG_COMPAT
145 .compat_ioctl = compat_sock_ioctl,
146#endif
1da177e4 147 .mmap = sock_mmap,
1da177e4
LT
148 .release = sock_close,
149 .fasync = sock_fasync,
5274f052
JA
150 .sendpage = sock_sendpage,
151 .splice_write = generic_splice_sendpage,
9c55e01c 152 .splice_read = sock_splice_read,
1da177e4
LT
153};
154
155/*
156 * The protocol list. Each protocol is registered in here.
157 */
158
1da177e4 159static DEFINE_SPINLOCK(net_family_lock);
190683a9 160static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 161
1da177e4 162/*
89bddce5
SH
163 * Support routines.
164 * Move socket addresses back and forth across the kernel/user
165 * divide and look after the messy bits.
1da177e4
LT
166 */
167
1da177e4
LT
168/**
169 * move_addr_to_kernel - copy a socket address into kernel space
170 * @uaddr: Address in user space
171 * @kaddr: Address in kernel space
172 * @ulen: Length in user space
173 *
174 * The address is copied into kernel space. If the provided address is
175 * too long an error code of -EINVAL is returned. If the copy gives
176 * invalid addresses -EFAULT is returned. On a success 0 is returned.
177 */
178
43db362d 179int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 180{
230b1839 181 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 182 return -EINVAL;
89bddce5 183 if (ulen == 0)
1da177e4 184 return 0;
89bddce5 185 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 186 return -EFAULT;
3ec3b2fb 187 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
188}
189
190/**
191 * move_addr_to_user - copy an address to user space
192 * @kaddr: kernel space address
193 * @klen: length of address in kernel
194 * @uaddr: user space address
195 * @ulen: pointer to user length field
196 *
197 * The value pointed to by ulen on entry is the buffer length available.
198 * This is overwritten with the buffer space used. -EINVAL is returned
199 * if an overlong buffer is specified or a negative buffer size. -EFAULT
200 * is returned if either the buffer or the length field are not
201 * accessible.
202 * After copying the data up to the limit the user specifies, the true
203 * length of the data is written over the length limit the user
204 * specified. Zero is returned for a success.
205 */
89bddce5 206
43db362d 207static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 208 void __user *uaddr, int __user *ulen)
1da177e4
LT
209{
210 int err;
211 int len;
212
68c6beb3 213 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
214 err = get_user(len, ulen);
215 if (err)
1da177e4 216 return err;
89bddce5
SH
217 if (len > klen)
218 len = klen;
68c6beb3 219 if (len < 0)
1da177e4 220 return -EINVAL;
89bddce5 221 if (len) {
d6fe3945
SG
222 if (audit_sockaddr(klen, kaddr))
223 return -ENOMEM;
89bddce5 224 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
225 return -EFAULT;
226 }
227 /*
89bddce5
SH
228 * "fromlen shall refer to the value before truncation.."
229 * 1003.1g
1da177e4
LT
230 */
231 return __put_user(klen, ulen);
232}
233
08009a76 234static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
333f7909
AV
243 init_waitqueue_head(&ei->socket.wq.wait);
244 ei->socket.wq.fasync_list = NULL;
245 ei->socket.wq.flags = 0;
89bddce5 246
1da177e4
LT
247 ei->socket.state = SS_UNCONNECTED;
248 ei->socket.flags = 0;
249 ei->socket.ops = NULL;
250 ei->socket.sk = NULL;
251 ei->socket.file = NULL;
1da177e4
LT
252
253 return &ei->vfs_inode;
254}
255
6d7855c5 256static void sock_free_inode(struct inode *inode)
1da177e4 257{
43815482
ED
258 struct socket_alloc *ei;
259
260 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 261 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
262}
263
51cc5068 264static void init_once(void *foo)
1da177e4 265{
89bddce5 266 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 267
a35afb83 268 inode_init_once(&ei->vfs_inode);
1da177e4 269}
89bddce5 270
1e911632 271static void init_inodecache(void)
1da177e4
LT
272{
273 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
274 sizeof(struct socket_alloc),
275 0,
276 (SLAB_HWCACHE_ALIGN |
277 SLAB_RECLAIM_ACCOUNT |
5d097056 278 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 279 init_once);
1e911632 280 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
281}
282
b87221de 283static const struct super_operations sockfs_ops = {
c6d409cf 284 .alloc_inode = sock_alloc_inode,
6d7855c5 285 .free_inode = sock_free_inode,
c6d409cf 286 .statfs = simple_statfs,
1da177e4
LT
287};
288
c23fbb6b
ED
289/*
290 * sockfs_dname() is called from d_path().
291 */
292static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
293{
294 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 295 d_inode(dentry)->i_ino);
c23fbb6b
ED
296}
297
3ba13d17 298static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 299 .d_dname = sockfs_dname,
1da177e4
LT
300};
301
bba0bd31
AG
302static int sockfs_xattr_get(const struct xattr_handler *handler,
303 struct dentry *dentry, struct inode *inode,
304 const char *suffix, void *value, size_t size)
305{
306 if (value) {
307 if (dentry->d_name.len + 1 > size)
308 return -ERANGE;
309 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
310 }
311 return dentry->d_name.len + 1;
312}
313
314#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
315#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
316#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
317
318static const struct xattr_handler sockfs_xattr_handler = {
319 .name = XATTR_NAME_SOCKPROTONAME,
320 .get = sockfs_xattr_get,
321};
322
4a590153
AG
323static int sockfs_security_xattr_set(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, const void *value,
326 size_t size, int flags)
327{
328 /* Handled by LSM. */
329 return -EAGAIN;
330}
331
332static const struct xattr_handler sockfs_security_xattr_handler = {
333 .prefix = XATTR_SECURITY_PREFIX,
334 .set = sockfs_security_xattr_set,
335};
336
bba0bd31
AG
337static const struct xattr_handler *sockfs_xattr_handlers[] = {
338 &sockfs_xattr_handler,
4a590153 339 &sockfs_security_xattr_handler,
bba0bd31
AG
340 NULL
341};
342
fba9be49 343static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 344{
fba9be49
DH
345 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
346 if (!ctx)
347 return -ENOMEM;
348 ctx->ops = &sockfs_ops;
349 ctx->dops = &sockfs_dentry_operations;
350 ctx->xattr = sockfs_xattr_handlers;
351 return 0;
c74a1cbb
AV
352}
353
354static struct vfsmount *sock_mnt __read_mostly;
355
356static struct file_system_type sock_fs_type = {
357 .name = "sockfs",
fba9be49 358 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
359 .kill_sb = kill_anon_super,
360};
361
1da177e4
LT
362/*
363 * Obtains the first available file descriptor and sets it up for use.
364 *
39d8c1b6
DM
365 * These functions create file structures and maps them to fd space
366 * of the current process. On success it returns file descriptor
1da177e4
LT
367 * and file struct implicitly stored in sock->file.
368 * Note that another thread may close file descriptor before we return
369 * from this function. We use the fact that now we do not refer
370 * to socket after mapping. If one day we will need it, this
371 * function will increment ref. count on file by 1.
372 *
373 * In any case returned fd MAY BE not valid!
374 * This race condition is unavoidable
375 * with shared fd spaces, we cannot solve it inside kernel,
376 * but we take care of internal coherence yet.
377 */
378
8a3c245c
PT
379/**
380 * sock_alloc_file - Bind a &socket to a &file
381 * @sock: socket
382 * @flags: file status flags
383 * @dname: protocol name
384 *
385 * Returns the &file bound with @sock, implicitly storing it
386 * in sock->file. If dname is %NULL, sets to "".
387 * On failure the return is a ERR pointer (see linux/err.h).
388 * This function uses GFP_KERNEL internally.
389 */
390
aab174f0 391struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 392{
7cbe66b6 393 struct file *file;
1da177e4 394
d93aa9d8
AV
395 if (!dname)
396 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 397
d93aa9d8
AV
398 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
399 O_RDWR | (flags & O_NONBLOCK),
400 &socket_file_ops);
b5ffe634 401 if (IS_ERR(file)) {
8e1611e2 402 sock_release(sock);
39b65252 403 return file;
cc3808f8
AV
404 }
405
406 sock->file = file;
39d8c1b6 407 file->private_data = sock;
d8e464ec 408 stream_open(SOCK_INODE(sock), file);
28407630 409 return file;
39d8c1b6 410}
56b31d1c 411EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 412
56b31d1c 413static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
414{
415 struct file *newfile;
28407630 416 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
417 if (unlikely(fd < 0)) {
418 sock_release(sock);
28407630 419 return fd;
ce4bb04c 420 }
39d8c1b6 421
aab174f0 422 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 423 if (!IS_ERR(newfile)) {
39d8c1b6 424 fd_install(fd, newfile);
28407630
AV
425 return fd;
426 }
7cbe66b6 427
28407630
AV
428 put_unused_fd(fd);
429 return PTR_ERR(newfile);
1da177e4
LT
430}
431
8a3c245c
PT
432/**
433 * sock_from_file - Return the &socket bounded to @file.
434 * @file: file
435 * @err: pointer to an error code return
436 *
437 * On failure returns %NULL and assigns -ENOTSOCK to @err.
438 */
439
406a3c63 440struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 441{
6cb153ca
BL
442 if (file->f_op == &socket_file_ops)
443 return file->private_data; /* set in sock_map_fd */
444
23bb80d2
ED
445 *err = -ENOTSOCK;
446 return NULL;
6cb153ca 447}
406a3c63 448EXPORT_SYMBOL(sock_from_file);
6cb153ca 449
1da177e4 450/**
c6d409cf 451 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
452 * @fd: file handle
453 * @err: pointer to an error code return
454 *
455 * The file handle passed in is locked and the socket it is bound
241c4667 456 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
457 * with a negative errno code and NULL is returned. The function checks
458 * for both invalid handles and passing a handle which is not a socket.
459 *
460 * On a success the socket object pointer is returned.
461 */
462
463struct socket *sockfd_lookup(int fd, int *err)
464{
465 struct file *file;
1da177e4
LT
466 struct socket *sock;
467
89bddce5
SH
468 file = fget(fd);
469 if (!file) {
1da177e4
LT
470 *err = -EBADF;
471 return NULL;
472 }
89bddce5 473
6cb153ca
BL
474 sock = sock_from_file(file, err);
475 if (!sock)
1da177e4 476 fput(file);
6cb153ca
BL
477 return sock;
478}
c6d409cf 479EXPORT_SYMBOL(sockfd_lookup);
1da177e4 480
6cb153ca
BL
481static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
482{
00e188ef 483 struct fd f = fdget(fd);
6cb153ca
BL
484 struct socket *sock;
485
3672558c 486 *err = -EBADF;
00e188ef
AV
487 if (f.file) {
488 sock = sock_from_file(f.file, err);
489 if (likely(sock)) {
490 *fput_needed = f.flags;
6cb153ca 491 return sock;
00e188ef
AV
492 }
493 fdput(f);
1da177e4 494 }
6cb153ca 495 return NULL;
1da177e4
LT
496}
497
600e1779
MY
498static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
499 size_t size)
500{
501 ssize_t len;
502 ssize_t used = 0;
503
c5ef6035 504 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
505 if (len < 0)
506 return len;
507 used += len;
508 if (buffer) {
509 if (size < used)
510 return -ERANGE;
511 buffer += len;
512 }
513
514 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
520 buffer += len;
521 }
522
523 return used;
524}
525
dc647ec8 526static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
527{
528 int err = simple_setattr(dentry, iattr);
529
e1a3a60a 530 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
531 struct socket *sock = SOCKET_I(d_inode(dentry));
532
6d8c50dc
CW
533 if (sock->sk)
534 sock->sk->sk_uid = iattr->ia_uid;
535 else
536 err = -ENOENT;
86741ec2
LC
537 }
538
539 return err;
540}
541
600e1779 542static const struct inode_operations sockfs_inode_ops = {
600e1779 543 .listxattr = sockfs_listxattr,
86741ec2 544 .setattr = sockfs_setattr,
600e1779
MY
545};
546
1da177e4 547/**
8a3c245c 548 * sock_alloc - allocate a socket
89bddce5 549 *
1da177e4
LT
550 * Allocate a new inode and socket object. The two are bound together
551 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 552 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
553 */
554
f4a00aac 555struct socket *sock_alloc(void)
1da177e4 556{
89bddce5
SH
557 struct inode *inode;
558 struct socket *sock;
1da177e4 559
a209dfc7 560 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
561 if (!inode)
562 return NULL;
563
564 sock = SOCKET_I(inode);
565
85fe4025 566 inode->i_ino = get_next_ino();
89bddce5 567 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
568 inode->i_uid = current_fsuid();
569 inode->i_gid = current_fsgid();
600e1779 570 inode->i_op = &sockfs_inode_ops;
1da177e4 571
1da177e4
LT
572 return sock;
573}
f4a00aac 574EXPORT_SYMBOL(sock_alloc);
1da177e4 575
1da177e4 576/**
8a3c245c 577 * sock_release - close a socket
1da177e4
LT
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
6d8c50dc 585static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
6d8c50dc
CW
590 if (inode)
591 inode_lock(inode);
1da177e4 592 sock->ops->release(sock);
ff7b11aa 593 sock->sk = NULL;
6d8c50dc
CW
594 if (inode)
595 inode_unlock(inode);
1da177e4
LT
596 sock->ops = NULL;
597 module_put(owner);
598 }
599
333f7909 600 if (sock->wq.fasync_list)
3410f22e 601 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 602
1da177e4
LT
603 if (!sock->file) {
604 iput(SOCK_INODE(sock));
605 return;
606 }
89bddce5 607 sock->file = NULL;
1da177e4 608}
6d8c50dc
CW
609
610void sock_release(struct socket *sock)
611{
612 __sock_release(sock, NULL);
613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
8c3c447b
PA
633INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
634 size_t));
a648a592
PA
635INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
636 size_t));
d8725c86 637static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 638{
a648a592
PA
639 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
640 inet_sendmsg, sock, msg,
641 msg_data_left(msg));
d8725c86
AV
642 BUG_ON(ret == -EIOCBQUEUED);
643 return ret;
1da177e4
LT
644}
645
85806af0
RD
646/**
647 * sock_sendmsg - send a message through @sock
648 * @sock: socket
649 * @msg: message to send
650 *
651 * Sends @msg through @sock, passing through LSM.
652 * Returns the number of bytes sent, or an error code.
653 */
d8725c86 654int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 655{
d8725c86 656 int err = security_socket_sendmsg(sock, msg,
01e97e65 657 msg_data_left(msg));
228e548e 658
d8725c86 659 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 660}
c6d409cf 661EXPORT_SYMBOL(sock_sendmsg);
1da177e4 662
8a3c245c
PT
663/**
664 * kernel_sendmsg - send a message through @sock (kernel-space)
665 * @sock: socket
666 * @msg: message header
667 * @vec: kernel vec
668 * @num: vec array length
669 * @size: total message data size
670 *
671 * Builds the message data with @vec and sends it through @sock.
672 * Returns the number of bytes sent, or an error code.
673 */
674
1da177e4
LT
675int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
676 struct kvec *vec, size_t num, size_t size)
677{
aa563d7b 678 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 679 return sock_sendmsg(sock, msg);
1da177e4 680}
c6d409cf 681EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 682
8a3c245c
PT
683/**
684 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
685 * @sk: sock
686 * @msg: message header
687 * @vec: output s/g array
688 * @num: output s/g array length
689 * @size: total message data size
690 *
691 * Builds the message data with @vec and sends it through @sock.
692 * Returns the number of bytes sent, or an error code.
693 * Caller must hold @sk.
694 */
695
306b13eb
TH
696int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
697 struct kvec *vec, size_t num, size_t size)
698{
699 struct socket *sock = sk->sk_socket;
700
701 if (!sock->ops->sendmsg_locked)
db5980d8 702 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 703
aa563d7b 704 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
705
706 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
707}
708EXPORT_SYMBOL(kernel_sendmsg_locked);
709
8605330a
SHY
710static bool skb_is_err_queue(const struct sk_buff *skb)
711{
712 /* pkt_type of skbs enqueued on the error queue are set to
713 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
714 * in recvmsg, since skbs received on a local socket will never
715 * have a pkt_type of PACKET_OUTGOING.
716 */
717 return skb->pkt_type == PACKET_OUTGOING;
718}
719
b50a5c70
ML
720/* On transmit, software and hardware timestamps are returned independently.
721 * As the two skb clones share the hardware timestamp, which may be updated
722 * before the software timestamp is received, a hardware TX timestamp may be
723 * returned only if there is no software TX timestamp. Ignore false software
724 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 725 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
726 * hardware timestamp.
727 */
728static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
729{
730 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
731}
732
aad9c8c4
ML
733static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
734{
735 struct scm_ts_pktinfo ts_pktinfo;
736 struct net_device *orig_dev;
737
738 if (!skb_mac_header_was_set(skb))
739 return;
740
741 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
742
743 rcu_read_lock();
744 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
745 if (orig_dev)
746 ts_pktinfo.if_index = orig_dev->ifindex;
747 rcu_read_unlock();
748
749 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
750 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
751 sizeof(ts_pktinfo), &ts_pktinfo);
752}
753
92f37fd2
ED
754/*
755 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
756 */
757void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
758 struct sk_buff *skb)
759{
20d49473 760 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 761 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
762 struct scm_timestamping_internal tss;
763
b50a5c70 764 int empty = 1, false_tstamp = 0;
20d49473
PO
765 struct skb_shared_hwtstamps *shhwtstamps =
766 skb_hwtstamps(skb);
767
768 /* Race occurred between timestamp enabling and packet
769 receiving. Fill in the current time for now. */
b50a5c70 770 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 771 __net_timestamp(skb);
b50a5c70
ML
772 false_tstamp = 1;
773 }
20d49473
PO
774
775 if (need_software_tstamp) {
776 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
777 if (new_tstamp) {
778 struct __kernel_sock_timeval tv;
779
780 skb_get_new_timestamp(skb, &tv);
781 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
782 sizeof(tv), &tv);
783 } else {
784 struct __kernel_old_timeval tv;
785
786 skb_get_timestamp(skb, &tv);
787 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
788 sizeof(tv), &tv);
789 }
20d49473 790 } else {
887feae3
DD
791 if (new_tstamp) {
792 struct __kernel_timespec ts;
793
794 skb_get_new_timestampns(skb, &ts);
795 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
796 sizeof(ts), &ts);
797 } else {
df1b4ba9 798 struct __kernel_old_timespec ts;
887feae3
DD
799
800 skb_get_timestampns(skb, &ts);
801 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
802 sizeof(ts), &ts);
803 }
20d49473
PO
804 }
805 }
806
f24b9be5 807 memset(&tss, 0, sizeof(tss));
c199105d 808 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 809 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 810 empty = 0;
4d276eb6 811 if (shhwtstamps &&
b9f40e21 812 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 813 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 814 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 815 empty = 0;
aad9c8c4
ML
816 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
817 !skb_is_err_queue(skb))
818 put_ts_pktinfo(msg, skb);
819 }
1c885808 820 if (!empty) {
9718475e
DD
821 if (sock_flag(sk, SOCK_TSTAMP_NEW))
822 put_cmsg_scm_timestamping64(msg, &tss);
823 else
824 put_cmsg_scm_timestamping(msg, &tss);
1c885808 825
8605330a 826 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 827 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
828 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
829 skb->len, skb->data);
830 }
92f37fd2 831}
7c81fd8b
ACM
832EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
833
6e3e939f
JB
834void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
835 struct sk_buff *skb)
836{
837 int ack;
838
839 if (!sock_flag(sk, SOCK_WIFI_STATUS))
840 return;
841 if (!skb->wifi_acked_valid)
842 return;
843
844 ack = skb->wifi_acked;
845
846 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
847}
848EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
849
11165f14 850static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
851 struct sk_buff *skb)
3b885787 852{
744d5a3e 853 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 854 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 855 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
856}
857
767dd033 858void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
859 struct sk_buff *skb)
860{
861 sock_recv_timestamp(msg, sk, skb);
862 sock_recv_drops(msg, sk, skb);
863}
767dd033 864EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 865
8c3c447b 866INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
867 size_t, int));
868INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
869 size_t, int));
1b784140 870static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 871 int flags)
1da177e4 872{
a648a592
PA
873 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
874 inet_recvmsg, sock, msg, msg_data_left(msg),
875 flags);
1da177e4
LT
876}
877
85806af0
RD
878/**
879 * sock_recvmsg - receive a message from @sock
880 * @sock: socket
881 * @msg: message to receive
882 * @flags: message flags
883 *
884 * Receives @msg from @sock, passing through LSM. Returns the total number
885 * of bytes received, or an error.
886 */
2da62906 887int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 888{
2da62906 889 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 890
2da62906 891 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 892}
c6d409cf 893EXPORT_SYMBOL(sock_recvmsg);
1da177e4 894
c1249c0a 895/**
8a3c245c
PT
896 * kernel_recvmsg - Receive a message from a socket (kernel space)
897 * @sock: The socket to receive the message from
898 * @msg: Received message
899 * @vec: Input s/g array for message data
900 * @num: Size of input s/g array
901 * @size: Number of bytes to read
902 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 903 *
8a3c245c
PT
904 * On return the msg structure contains the scatter/gather array passed in the
905 * vec argument. The array is modified so that it consists of the unfilled
906 * portion of the original array.
c1249c0a 907 *
8a3c245c 908 * The returned value is the total number of bytes received, or an error.
c1249c0a 909 */
8a3c245c 910
89bddce5
SH
911int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
912 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
913{
914 mm_segment_t oldfs = get_fs();
915 int result;
916
aa563d7b 917 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 918 set_fs(KERNEL_DS);
2da62906 919 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
920 set_fs(oldfs);
921 return result;
922}
c6d409cf 923EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 924
ce1d4d3e
CH
925static ssize_t sock_sendpage(struct file *file, struct page *page,
926 int offset, size_t size, loff_t *ppos, int more)
1da177e4 927{
1da177e4
LT
928 struct socket *sock;
929 int flags;
930
ce1d4d3e
CH
931 sock = file->private_data;
932
35f9c09f
ED
933 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
934 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
935 flags |= more;
ce1d4d3e 936
e6949583 937 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 938}
1da177e4 939
9c55e01c 940static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 941 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
942 unsigned int flags)
943{
944 struct socket *sock = file->private_data;
945
997b37da 946 if (unlikely(!sock->ops->splice_read))
95506588 947 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 948
9c55e01c
JA
949 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
950}
951
8ae5e030 952static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 953{
6d652330
AV
954 struct file *file = iocb->ki_filp;
955 struct socket *sock = file->private_data;
0345f931 956 struct msghdr msg = {.msg_iter = *to,
957 .msg_iocb = iocb};
8ae5e030 958 ssize_t res;
ce1d4d3e 959
8ae5e030
AV
960 if (file->f_flags & O_NONBLOCK)
961 msg.msg_flags = MSG_DONTWAIT;
962
963 if (iocb->ki_pos != 0)
1da177e4 964 return -ESPIPE;
027445c3 965
66ee59af 966 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
967 return 0;
968
2da62906 969 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
970 *to = msg.msg_iter;
971 return res;
1da177e4
LT
972}
973
8ae5e030 974static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 975{
6d652330
AV
976 struct file *file = iocb->ki_filp;
977 struct socket *sock = file->private_data;
0345f931 978 struct msghdr msg = {.msg_iter = *from,
979 .msg_iocb = iocb};
8ae5e030 980 ssize_t res;
1da177e4 981
8ae5e030 982 if (iocb->ki_pos != 0)
ce1d4d3e 983 return -ESPIPE;
027445c3 984
8ae5e030
AV
985 if (file->f_flags & O_NONBLOCK)
986 msg.msg_flags = MSG_DONTWAIT;
987
6d652330
AV
988 if (sock->type == SOCK_SEQPACKET)
989 msg.msg_flags |= MSG_EOR;
990
d8725c86 991 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
992 *from = msg.msg_iter;
993 return res;
1da177e4
LT
994}
995
1da177e4
LT
996/*
997 * Atomic setting of ioctl hooks to avoid race
998 * with module unload.
999 */
1000
4a3e2f71 1001static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1002static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1003
881d966b 1004void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1005{
4a3e2f71 1006 mutex_lock(&br_ioctl_mutex);
1da177e4 1007 br_ioctl_hook = hook;
4a3e2f71 1008 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1009}
1010EXPORT_SYMBOL(brioctl_set);
1011
4a3e2f71 1012static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1013static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1014
881d966b 1015void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1016{
4a3e2f71 1017 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1018 vlan_ioctl_hook = hook;
4a3e2f71 1019 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1020}
1021EXPORT_SYMBOL(vlan_ioctl_set);
1022
4a3e2f71 1023static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1024static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1025
89bddce5 1026void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1027{
4a3e2f71 1028 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1029 dlci_ioctl_hook = hook;
4a3e2f71 1030 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1031}
1032EXPORT_SYMBOL(dlci_ioctl_set);
1033
6b96018b 1034static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1035 unsigned int cmd, unsigned long arg)
6b96018b
AB
1036{
1037 int err;
1038 void __user *argp = (void __user *)arg;
1039
1040 err = sock->ops->ioctl(sock, cmd, arg);
1041
1042 /*
1043 * If this ioctl is unknown try to hand it down
1044 * to the NIC driver.
1045 */
36fd633e
AV
1046 if (err != -ENOIOCTLCMD)
1047 return err;
6b96018b 1048
36fd633e
AV
1049 if (cmd == SIOCGIFCONF) {
1050 struct ifconf ifc;
1051 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1052 return -EFAULT;
1053 rtnl_lock();
1054 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1055 rtnl_unlock();
1056 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1057 err = -EFAULT;
44c02a2c
AV
1058 } else {
1059 struct ifreq ifr;
1060 bool need_copyout;
63ff03ab 1061 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1062 return -EFAULT;
1063 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1064 if (!err && need_copyout)
63ff03ab 1065 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1066 return -EFAULT;
36fd633e 1067 }
6b96018b
AB
1068 return err;
1069}
1070
1da177e4
LT
1071/*
1072 * With an ioctl, arg may well be a user mode pointer, but we don't know
1073 * what to do with it - that's up to the protocol still.
1074 */
1075
8a3c245c
PT
1076/**
1077 * get_net_ns - increment the refcount of the network namespace
1078 * @ns: common namespace (net)
1079 *
1080 * Returns the net's common namespace.
1081 */
1082
d8d211a2 1083struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1084{
1085 return &get_net(container_of(ns, struct net, ns))->ns;
1086}
d8d211a2 1087EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1088
1da177e4
LT
1089static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1090{
1091 struct socket *sock;
881d966b 1092 struct sock *sk;
1da177e4
LT
1093 void __user *argp = (void __user *)arg;
1094 int pid, err;
881d966b 1095 struct net *net;
1da177e4 1096
b69aee04 1097 sock = file->private_data;
881d966b 1098 sk = sock->sk;
3b1e0a65 1099 net = sock_net(sk);
44c02a2c
AV
1100 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1101 struct ifreq ifr;
1102 bool need_copyout;
1103 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1104 return -EFAULT;
1105 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1106 if (!err && need_copyout)
1107 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1108 return -EFAULT;
1da177e4 1109 } else
3d23e349 1110#ifdef CONFIG_WEXT_CORE
1da177e4 1111 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1112 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1113 } else
3d23e349 1114#endif
89bddce5 1115 switch (cmd) {
1da177e4
LT
1116 case FIOSETOWN:
1117 case SIOCSPGRP:
1118 err = -EFAULT;
1119 if (get_user(pid, (int __user *)argp))
1120 break;
393cc3f5 1121 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1122 break;
1123 case FIOGETOWN:
1124 case SIOCGPGRP:
609d7fa9 1125 err = put_user(f_getown(sock->file),
89bddce5 1126 (int __user *)argp);
1da177e4
LT
1127 break;
1128 case SIOCGIFBR:
1129 case SIOCSIFBR:
1130 case SIOCBRADDBR:
1131 case SIOCBRDELBR:
1132 err = -ENOPKG;
1133 if (!br_ioctl_hook)
1134 request_module("bridge");
1135
4a3e2f71 1136 mutex_lock(&br_ioctl_mutex);
89bddce5 1137 if (br_ioctl_hook)
881d966b 1138 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1139 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1140 break;
1141 case SIOCGIFVLAN:
1142 case SIOCSIFVLAN:
1143 err = -ENOPKG;
1144 if (!vlan_ioctl_hook)
1145 request_module("8021q");
1146
4a3e2f71 1147 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1148 if (vlan_ioctl_hook)
881d966b 1149 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1150 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1151 break;
1da177e4
LT
1152 case SIOCADDDLCI:
1153 case SIOCDELDLCI:
1154 err = -ENOPKG;
1155 if (!dlci_ioctl_hook)
1156 request_module("dlci");
1157
7512cbf6
PE
1158 mutex_lock(&dlci_ioctl_mutex);
1159 if (dlci_ioctl_hook)
1da177e4 1160 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1161 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1162 break;
c62cce2c
AV
1163 case SIOCGSKNS:
1164 err = -EPERM;
1165 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1166 break;
1167
1168 err = open_related_ns(&net->ns, get_net_ns);
1169 break;
0768e170
AB
1170 case SIOCGSTAMP_OLD:
1171 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1172 if (!sock->ops->gettstamp) {
1173 err = -ENOIOCTLCMD;
1174 break;
1175 }
1176 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1177 cmd == SIOCGSTAMP_OLD,
1178 !IS_ENABLED(CONFIG_64BIT));
60747828 1179 break;
0768e170
AB
1180 case SIOCGSTAMP_NEW:
1181 case SIOCGSTAMPNS_NEW:
1182 if (!sock->ops->gettstamp) {
1183 err = -ENOIOCTLCMD;
1184 break;
1185 }
1186 err = sock->ops->gettstamp(sock, argp,
1187 cmd == SIOCGSTAMP_NEW,
1188 false);
c7cbdbf2 1189 break;
1da177e4 1190 default:
63ff03ab 1191 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1192 break;
89bddce5 1193 }
1da177e4
LT
1194 return err;
1195}
1196
8a3c245c
PT
1197/**
1198 * sock_create_lite - creates a socket
1199 * @family: protocol family (AF_INET, ...)
1200 * @type: communication type (SOCK_STREAM, ...)
1201 * @protocol: protocol (0, ...)
1202 * @res: new socket
1203 *
1204 * Creates a new socket and assigns it to @res, passing through LSM.
1205 * The new socket initialization is not complete, see kernel_accept().
1206 * Returns 0 or an error. On failure @res is set to %NULL.
1207 * This function internally uses GFP_KERNEL.
1208 */
1209
1da177e4
LT
1210int sock_create_lite(int family, int type, int protocol, struct socket **res)
1211{
1212 int err;
1213 struct socket *sock = NULL;
89bddce5 1214
1da177e4
LT
1215 err = security_socket_create(family, type, protocol, 1);
1216 if (err)
1217 goto out;
1218
1219 sock = sock_alloc();
1220 if (!sock) {
1221 err = -ENOMEM;
1222 goto out;
1223 }
1224
1da177e4 1225 sock->type = type;
7420ed23
VY
1226 err = security_socket_post_create(sock, family, type, protocol, 1);
1227 if (err)
1228 goto out_release;
1229
1da177e4
LT
1230out:
1231 *res = sock;
1232 return err;
7420ed23
VY
1233out_release:
1234 sock_release(sock);
1235 sock = NULL;
1236 goto out;
1da177e4 1237}
c6d409cf 1238EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1239
1240/* No kernel lock held - perfect */
ade994f4 1241static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1242{
3cafb376 1243 struct socket *sock = file->private_data;
a331de3b 1244 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1245
e88958e6
CH
1246 if (!sock->ops->poll)
1247 return 0;
f641f13b 1248
a331de3b
CH
1249 if (sk_can_busy_loop(sock->sk)) {
1250 /* poll once if requested by the syscall */
1251 if (events & POLL_BUSY_LOOP)
1252 sk_busy_loop(sock->sk, 1);
1253
1254 /* if this socket can poll_ll, tell the system call */
1255 flag = POLL_BUSY_LOOP;
1256 }
1257
1258 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1259}
1260
89bddce5 1261static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1262{
b69aee04 1263 struct socket *sock = file->private_data;
1da177e4
LT
1264
1265 return sock->ops->mmap(file, sock, vma);
1266}
1267
20380731 1268static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1269{
6d8c50dc 1270 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1271 return 0;
1272}
1273
1274/*
1275 * Update the socket async list
1276 *
1277 * Fasync_list locking strategy.
1278 *
1279 * 1. fasync_list is modified only under process context socket lock
1280 * i.e. under semaphore.
1281 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1282 * or under socket lock
1da177e4
LT
1283 */
1284
1285static int sock_fasync(int fd, struct file *filp, int on)
1286{
989a2979
ED
1287 struct socket *sock = filp->private_data;
1288 struct sock *sk = sock->sk;
333f7909 1289 struct socket_wq *wq = &sock->wq;
1da177e4 1290
989a2979 1291 if (sk == NULL)
1da177e4 1292 return -EINVAL;
1da177e4
LT
1293
1294 lock_sock(sk);
eaefd110 1295 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1296
eaefd110 1297 if (!wq->fasync_list)
989a2979
ED
1298 sock_reset_flag(sk, SOCK_FASYNC);
1299 else
bcdce719 1300 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1301
989a2979 1302 release_sock(sk);
1da177e4
LT
1303 return 0;
1304}
1305
ceb5d58b 1306/* This function may be called only under rcu_lock */
1da177e4 1307
ceb5d58b 1308int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1309{
ceb5d58b 1310 if (!wq || !wq->fasync_list)
1da177e4 1311 return -1;
ceb5d58b 1312
89bddce5 1313 switch (how) {
8d8ad9d7 1314 case SOCK_WAKE_WAITD:
ceb5d58b 1315 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1316 break;
1317 goto call_kill;
8d8ad9d7 1318 case SOCK_WAKE_SPACE:
ceb5d58b 1319 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1320 break;
1321 /* fall through */
8d8ad9d7 1322 case SOCK_WAKE_IO:
89bddce5 1323call_kill:
43815482 1324 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1325 break;
8d8ad9d7 1326 case SOCK_WAKE_URG:
43815482 1327 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1328 }
ceb5d58b 1329
1da177e4
LT
1330 return 0;
1331}
c6d409cf 1332EXPORT_SYMBOL(sock_wake_async);
1da177e4 1333
8a3c245c
PT
1334/**
1335 * __sock_create - creates a socket
1336 * @net: net namespace
1337 * @family: protocol family (AF_INET, ...)
1338 * @type: communication type (SOCK_STREAM, ...)
1339 * @protocol: protocol (0, ...)
1340 * @res: new socket
1341 * @kern: boolean for kernel space sockets
1342 *
1343 * Creates a new socket and assigns it to @res, passing through LSM.
1344 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1345 * be set to true if the socket resides in kernel space.
1346 * This function internally uses GFP_KERNEL.
1347 */
1348
721db93a 1349int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1350 struct socket **res, int kern)
1da177e4
LT
1351{
1352 int err;
1353 struct socket *sock;
55737fda 1354 const struct net_proto_family *pf;
1da177e4
LT
1355
1356 /*
89bddce5 1357 * Check protocol is in range
1da177e4
LT
1358 */
1359 if (family < 0 || family >= NPROTO)
1360 return -EAFNOSUPPORT;
1361 if (type < 0 || type >= SOCK_MAX)
1362 return -EINVAL;
1363
1364 /* Compatibility.
1365
1366 This uglymoron is moved from INET layer to here to avoid
1367 deadlock in module load.
1368 */
1369 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1370 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1371 current->comm);
1da177e4
LT
1372 family = PF_PACKET;
1373 }
1374
1375 err = security_socket_create(family, type, protocol, kern);
1376 if (err)
1377 return err;
89bddce5 1378
55737fda
SH
1379 /*
1380 * Allocate the socket and allow the family to set things up. if
1381 * the protocol is 0, the family is instructed to select an appropriate
1382 * default.
1383 */
1384 sock = sock_alloc();
1385 if (!sock) {
e87cc472 1386 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1387 return -ENFILE; /* Not exactly a match, but its the
1388 closest posix thing */
1389 }
1390
1391 sock->type = type;
1392
95a5afca 1393#ifdef CONFIG_MODULES
89bddce5
SH
1394 /* Attempt to load a protocol module if the find failed.
1395 *
1396 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1397 * requested real, full-featured networking support upon configuration.
1398 * Otherwise module support will break!
1399 */
190683a9 1400 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1401 request_module("net-pf-%d", family);
1da177e4
LT
1402#endif
1403
55737fda
SH
1404 rcu_read_lock();
1405 pf = rcu_dereference(net_families[family]);
1406 err = -EAFNOSUPPORT;
1407 if (!pf)
1408 goto out_release;
1da177e4
LT
1409
1410 /*
1411 * We will call the ->create function, that possibly is in a loadable
1412 * module, so we have to bump that loadable module refcnt first.
1413 */
55737fda 1414 if (!try_module_get(pf->owner))
1da177e4
LT
1415 goto out_release;
1416
55737fda
SH
1417 /* Now protected by module ref count */
1418 rcu_read_unlock();
1419
3f378b68 1420 err = pf->create(net, sock, protocol, kern);
55737fda 1421 if (err < 0)
1da177e4 1422 goto out_module_put;
a79af59e 1423
1da177e4
LT
1424 /*
1425 * Now to bump the refcnt of the [loadable] module that owns this
1426 * socket at sock_release time we decrement its refcnt.
1427 */
55737fda
SH
1428 if (!try_module_get(sock->ops->owner))
1429 goto out_module_busy;
1430
1da177e4
LT
1431 /*
1432 * Now that we're done with the ->create function, the [loadable]
1433 * module can have its refcnt decremented
1434 */
55737fda 1435 module_put(pf->owner);
7420ed23
VY
1436 err = security_socket_post_create(sock, family, type, protocol, kern);
1437 if (err)
3b185525 1438 goto out_sock_release;
55737fda 1439 *res = sock;
1da177e4 1440
55737fda
SH
1441 return 0;
1442
1443out_module_busy:
1444 err = -EAFNOSUPPORT;
1da177e4 1445out_module_put:
55737fda
SH
1446 sock->ops = NULL;
1447 module_put(pf->owner);
1448out_sock_release:
1da177e4 1449 sock_release(sock);
55737fda
SH
1450 return err;
1451
1452out_release:
1453 rcu_read_unlock();
1454 goto out_sock_release;
1da177e4 1455}
721db93a 1456EXPORT_SYMBOL(__sock_create);
1da177e4 1457
8a3c245c
PT
1458/**
1459 * sock_create - creates a socket
1460 * @family: protocol family (AF_INET, ...)
1461 * @type: communication type (SOCK_STREAM, ...)
1462 * @protocol: protocol (0, ...)
1463 * @res: new socket
1464 *
1465 * A wrapper around __sock_create().
1466 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1467 */
1468
1da177e4
LT
1469int sock_create(int family, int type, int protocol, struct socket **res)
1470{
1b8d7ae4 1471 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1472}
c6d409cf 1473EXPORT_SYMBOL(sock_create);
1da177e4 1474
8a3c245c
PT
1475/**
1476 * sock_create_kern - creates a socket (kernel space)
1477 * @net: net namespace
1478 * @family: protocol family (AF_INET, ...)
1479 * @type: communication type (SOCK_STREAM, ...)
1480 * @protocol: protocol (0, ...)
1481 * @res: new socket
1482 *
1483 * A wrapper around __sock_create().
1484 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1485 */
1486
eeb1bd5c 1487int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1488{
eeb1bd5c 1489 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1490}
c6d409cf 1491EXPORT_SYMBOL(sock_create_kern);
1da177e4 1492
9d6a15c3 1493int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1494{
1495 int retval;
1496 struct socket *sock;
a677a039
UD
1497 int flags;
1498
e38b36f3
UD
1499 /* Check the SOCK_* constants for consistency. */
1500 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1501 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1502 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1503 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1504
a677a039 1505 flags = type & ~SOCK_TYPE_MASK;
77d27200 1506 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1507 return -EINVAL;
1508 type &= SOCK_TYPE_MASK;
1da177e4 1509
aaca0bdc
UD
1510 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1511 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1512
1da177e4
LT
1513 retval = sock_create(family, type, protocol, &sock);
1514 if (retval < 0)
8e1611e2 1515 return retval;
1da177e4 1516
8e1611e2 1517 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1518}
1519
9d6a15c3
DB
1520SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1521{
1522 return __sys_socket(family, type, protocol);
1523}
1524
1da177e4
LT
1525/*
1526 * Create a pair of connected sockets.
1527 */
1528
6debc8d8 1529int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1530{
1531 struct socket *sock1, *sock2;
1532 int fd1, fd2, err;
db349509 1533 struct file *newfile1, *newfile2;
a677a039
UD
1534 int flags;
1535
1536 flags = type & ~SOCK_TYPE_MASK;
77d27200 1537 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1538 return -EINVAL;
1539 type &= SOCK_TYPE_MASK;
1da177e4 1540
aaca0bdc
UD
1541 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1542 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1543
016a266b
AV
1544 /*
1545 * reserve descriptors and make sure we won't fail
1546 * to return them to userland.
1547 */
1548 fd1 = get_unused_fd_flags(flags);
1549 if (unlikely(fd1 < 0))
1550 return fd1;
1551
1552 fd2 = get_unused_fd_flags(flags);
1553 if (unlikely(fd2 < 0)) {
1554 put_unused_fd(fd1);
1555 return fd2;
1556 }
1557
1558 err = put_user(fd1, &usockvec[0]);
1559 if (err)
1560 goto out;
1561
1562 err = put_user(fd2, &usockvec[1]);
1563 if (err)
1564 goto out;
1565
1da177e4
LT
1566 /*
1567 * Obtain the first socket and check if the underlying protocol
1568 * supports the socketpair call.
1569 */
1570
1571 err = sock_create(family, type, protocol, &sock1);
016a266b 1572 if (unlikely(err < 0))
1da177e4
LT
1573 goto out;
1574
1575 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1576 if (unlikely(err < 0)) {
1577 sock_release(sock1);
1578 goto out;
bf3c23d1 1579 }
d73aa286 1580
d47cd945
DH
1581 err = security_socket_socketpair(sock1, sock2);
1582 if (unlikely(err)) {
1583 sock_release(sock2);
1584 sock_release(sock1);
1585 goto out;
1586 }
1587
016a266b
AV
1588 err = sock1->ops->socketpair(sock1, sock2);
1589 if (unlikely(err < 0)) {
1590 sock_release(sock2);
1591 sock_release(sock1);
1592 goto out;
28407630
AV
1593 }
1594
aab174f0 1595 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1596 if (IS_ERR(newfile1)) {
28407630 1597 err = PTR_ERR(newfile1);
016a266b
AV
1598 sock_release(sock2);
1599 goto out;
28407630
AV
1600 }
1601
aab174f0 1602 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1603 if (IS_ERR(newfile2)) {
1604 err = PTR_ERR(newfile2);
016a266b
AV
1605 fput(newfile1);
1606 goto out;
db349509
AV
1607 }
1608
157cf649 1609 audit_fd_pair(fd1, fd2);
d73aa286 1610
db349509
AV
1611 fd_install(fd1, newfile1);
1612 fd_install(fd2, newfile2);
d73aa286 1613 return 0;
1da177e4 1614
016a266b 1615out:
d73aa286 1616 put_unused_fd(fd2);
d73aa286 1617 put_unused_fd(fd1);
1da177e4
LT
1618 return err;
1619}
1620
6debc8d8
DB
1621SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1622 int __user *, usockvec)
1623{
1624 return __sys_socketpair(family, type, protocol, usockvec);
1625}
1626
1da177e4
LT
1627/*
1628 * Bind a name to a socket. Nothing much to do here since it's
1629 * the protocol's responsibility to handle the local address.
1630 *
1631 * We move the socket address to kernel space before we call
1632 * the protocol layer (having also checked the address is ok).
1633 */
1634
a87d35d8 1635int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1636{
1637 struct socket *sock;
230b1839 1638 struct sockaddr_storage address;
6cb153ca 1639 int err, fput_needed;
1da177e4 1640
89bddce5 1641 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1642 if (sock) {
43db362d 1643 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1644 if (!err) {
89bddce5 1645 err = security_socket_bind(sock,
230b1839 1646 (struct sockaddr *)&address,
89bddce5 1647 addrlen);
6cb153ca
BL
1648 if (!err)
1649 err = sock->ops->bind(sock,
89bddce5 1650 (struct sockaddr *)
230b1839 1651 &address, addrlen);
1da177e4 1652 }
6cb153ca 1653 fput_light(sock->file, fput_needed);
89bddce5 1654 }
1da177e4
LT
1655 return err;
1656}
1657
a87d35d8
DB
1658SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1659{
1660 return __sys_bind(fd, umyaddr, addrlen);
1661}
1662
1da177e4
LT
1663/*
1664 * Perform a listen. Basically, we allow the protocol to do anything
1665 * necessary for a listen, and if that works, we mark the socket as
1666 * ready for listening.
1667 */
1668
25e290ee 1669int __sys_listen(int fd, int backlog)
1da177e4
LT
1670{
1671 struct socket *sock;
6cb153ca 1672 int err, fput_needed;
b8e1f9b5 1673 int somaxconn;
89bddce5
SH
1674
1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1676 if (sock) {
8efa6e93 1677 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1678 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1679 backlog = somaxconn;
1da177e4
LT
1680
1681 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1682 if (!err)
1683 err = sock->ops->listen(sock, backlog);
1da177e4 1684
6cb153ca 1685 fput_light(sock->file, fput_needed);
1da177e4
LT
1686 }
1687 return err;
1688}
1689
25e290ee
DB
1690SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1691{
1692 return __sys_listen(fd, backlog);
1693}
1694
de2ea4b6
JA
1695int __sys_accept4_file(struct file *file, unsigned file_flags,
1696 struct sockaddr __user *upeer_sockaddr,
1697 int __user *upeer_addrlen, int flags)
1da177e4
LT
1698{
1699 struct socket *sock, *newsock;
39d8c1b6 1700 struct file *newfile;
de2ea4b6 1701 int err, len, newfd;
230b1839 1702 struct sockaddr_storage address;
1da177e4 1703
77d27200 1704 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1705 return -EINVAL;
1706
1707 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1708 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1709
de2ea4b6 1710 sock = sock_from_file(file, &err);
1da177e4
LT
1711 if (!sock)
1712 goto out;
1713
1714 err = -ENFILE;
c6d409cf
ED
1715 newsock = sock_alloc();
1716 if (!newsock)
de2ea4b6 1717 goto out;
1da177e4
LT
1718
1719 newsock->type = sock->type;
1720 newsock->ops = sock->ops;
1721
1da177e4
LT
1722 /*
1723 * We don't need try_module_get here, as the listening socket (sock)
1724 * has the protocol module (sock->ops->owner) held.
1725 */
1726 __module_get(newsock->ops->owner);
1727
28407630 1728 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1729 if (unlikely(newfd < 0)) {
1730 err = newfd;
9a1875e6 1731 sock_release(newsock);
de2ea4b6 1732 goto out;
39d8c1b6 1733 }
aab174f0 1734 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1735 if (IS_ERR(newfile)) {
28407630
AV
1736 err = PTR_ERR(newfile);
1737 put_unused_fd(newfd);
de2ea4b6 1738 goto out;
28407630 1739 }
39d8c1b6 1740
a79af59e
FF
1741 err = security_socket_accept(sock, newsock);
1742 if (err)
39d8c1b6 1743 goto out_fd;
a79af59e 1744
de2ea4b6
JA
1745 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1746 false);
1da177e4 1747 if (err < 0)
39d8c1b6 1748 goto out_fd;
1da177e4
LT
1749
1750 if (upeer_sockaddr) {
9b2c45d4
DV
1751 len = newsock->ops->getname(newsock,
1752 (struct sockaddr *)&address, 2);
1753 if (len < 0) {
1da177e4 1754 err = -ECONNABORTED;
39d8c1b6 1755 goto out_fd;
1da177e4 1756 }
43db362d 1757 err = move_addr_to_user(&address,
230b1839 1758 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1759 if (err < 0)
39d8c1b6 1760 goto out_fd;
1da177e4
LT
1761 }
1762
1763 /* File flags are not inherited via accept() unlike another OSes. */
1764
39d8c1b6
DM
1765 fd_install(newfd, newfile);
1766 err = newfd;
1da177e4
LT
1767out:
1768 return err;
39d8c1b6 1769out_fd:
9606a216 1770 fput(newfile);
39d8c1b6 1771 put_unused_fd(newfd);
de2ea4b6
JA
1772 goto out;
1773
1774}
1775
1776/*
1777 * For accept, we attempt to create a new socket, set up the link
1778 * with the client, wake up the client, then return the new
1779 * connected fd. We collect the address of the connector in kernel
1780 * space and move it to user at the very end. This is unclean because
1781 * we open the socket then return an error.
1782 *
1783 * 1003.1g adds the ability to recvmsg() to query connection pending
1784 * status to recvmsg. We need to add that support in a way thats
1785 * clean when we restructure accept also.
1786 */
1787
1788int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1789 int __user *upeer_addrlen, int flags)
1790{
1791 int ret = -EBADF;
1792 struct fd f;
1793
1794 f = fdget(fd);
1795 if (f.file) {
1796 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1797 upeer_addrlen, flags);
1798 if (f.flags)
1799 fput(f.file);
1800 }
1801
1802 return ret;
1da177e4
LT
1803}
1804
4541e805
DB
1805SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1806 int __user *, upeer_addrlen, int, flags)
1807{
1808 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1809}
1810
20f37034
HC
1811SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1812 int __user *, upeer_addrlen)
aaca0bdc 1813{
4541e805 1814 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1815}
1816
1da177e4
LT
1817/*
1818 * Attempt to connect to a socket with the server address. The address
1819 * is in user space so we verify it is OK and move it to kernel space.
1820 *
1821 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1822 * break bindings
1823 *
1824 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1825 * other SEQPACKET protocols that take time to connect() as it doesn't
1826 * include the -EINPROGRESS status for such sockets.
1827 */
1828
f499a021 1829int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1830 int addrlen, int file_flags)
1da177e4
LT
1831{
1832 struct socket *sock;
bd3ded31 1833 int err;
1da177e4 1834
bd3ded31 1835 sock = sock_from_file(file, &err);
1da177e4
LT
1836 if (!sock)
1837 goto out;
1da177e4 1838
89bddce5 1839 err =
f499a021 1840 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1841 if (err)
bd3ded31 1842 goto out;
1da177e4 1843
f499a021 1844 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1845 sock->file->f_flags | file_flags);
1da177e4
LT
1846out:
1847 return err;
1848}
1849
bd3ded31
JA
1850int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1851{
1852 int ret = -EBADF;
1853 struct fd f;
1854
1855 f = fdget(fd);
1856 if (f.file) {
f499a021
JA
1857 struct sockaddr_storage address;
1858
1859 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1860 if (!ret)
1861 ret = __sys_connect_file(f.file, &address, addrlen, 0);
bd3ded31
JA
1862 if (f.flags)
1863 fput(f.file);
1864 }
1865
1866 return ret;
1867}
1868
1387c2c2
DB
1869SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1870 int, addrlen)
1871{
1872 return __sys_connect(fd, uservaddr, addrlen);
1873}
1874
1da177e4
LT
1875/*
1876 * Get the local address ('name') of a socket object. Move the obtained
1877 * name to user space.
1878 */
1879
8882a107
DB
1880int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1881 int __user *usockaddr_len)
1da177e4
LT
1882{
1883 struct socket *sock;
230b1839 1884 struct sockaddr_storage address;
9b2c45d4 1885 int err, fput_needed;
89bddce5 1886
6cb153ca 1887 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1888 if (!sock)
1889 goto out;
1890
1891 err = security_socket_getsockname(sock);
1892 if (err)
1893 goto out_put;
1894
9b2c45d4
DV
1895 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1896 if (err < 0)
1da177e4 1897 goto out_put;
9b2c45d4
DV
1898 /* "err" is actually length in this case */
1899 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1900
1901out_put:
6cb153ca 1902 fput_light(sock->file, fput_needed);
1da177e4
LT
1903out:
1904 return err;
1905}
1906
8882a107
DB
1907SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1908 int __user *, usockaddr_len)
1909{
1910 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1911}
1912
1da177e4
LT
1913/*
1914 * Get the remote address ('name') of a socket object. Move the obtained
1915 * name to user space.
1916 */
1917
b21c8f83
DB
1918int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1919 int __user *usockaddr_len)
1da177e4
LT
1920{
1921 struct socket *sock;
230b1839 1922 struct sockaddr_storage address;
9b2c45d4 1923 int err, fput_needed;
1da177e4 1924
89bddce5
SH
1925 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1926 if (sock != NULL) {
1da177e4
LT
1927 err = security_socket_getpeername(sock);
1928 if (err) {
6cb153ca 1929 fput_light(sock->file, fput_needed);
1da177e4
LT
1930 return err;
1931 }
1932
9b2c45d4
DV
1933 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1934 if (err >= 0)
1935 /* "err" is actually length in this case */
1936 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1937 usockaddr_len);
6cb153ca 1938 fput_light(sock->file, fput_needed);
1da177e4
LT
1939 }
1940 return err;
1941}
1942
b21c8f83
DB
1943SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1944 int __user *, usockaddr_len)
1945{
1946 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1947}
1948
1da177e4
LT
1949/*
1950 * Send a datagram to a given address. We move the address into kernel
1951 * space and check the user space data area is readable before invoking
1952 * the protocol.
1953 */
211b634b
DB
1954int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1955 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1956{
1957 struct socket *sock;
230b1839 1958 struct sockaddr_storage address;
1da177e4
LT
1959 int err;
1960 struct msghdr msg;
1961 struct iovec iov;
6cb153ca 1962 int fput_needed;
6cb153ca 1963
602bd0e9
AV
1964 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1965 if (unlikely(err))
1966 return err;
de0fa95c
PE
1967 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1968 if (!sock)
4387ff75 1969 goto out;
6cb153ca 1970
89bddce5 1971 msg.msg_name = NULL;
89bddce5
SH
1972 msg.msg_control = NULL;
1973 msg.msg_controllen = 0;
1974 msg.msg_namelen = 0;
6cb153ca 1975 if (addr) {
43db362d 1976 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1977 if (err < 0)
1978 goto out_put;
230b1839 1979 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1980 msg.msg_namelen = addr_len;
1da177e4
LT
1981 }
1982 if (sock->file->f_flags & O_NONBLOCK)
1983 flags |= MSG_DONTWAIT;
1984 msg.msg_flags = flags;
d8725c86 1985 err = sock_sendmsg(sock, &msg);
1da177e4 1986
89bddce5 1987out_put:
de0fa95c 1988 fput_light(sock->file, fput_needed);
4387ff75 1989out:
1da177e4
LT
1990 return err;
1991}
1992
211b634b
DB
1993SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1994 unsigned int, flags, struct sockaddr __user *, addr,
1995 int, addr_len)
1996{
1997 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1998}
1999
1da177e4 2000/*
89bddce5 2001 * Send a datagram down a socket.
1da177e4
LT
2002 */
2003
3e0fa65f 2004SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2005 unsigned int, flags)
1da177e4 2006{
211b634b 2007 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2008}
2009
2010/*
89bddce5 2011 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2012 * sender. We verify the buffers are writable and if needed move the
2013 * sender address from kernel to user space.
2014 */
7a09e1eb
DB
2015int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2016 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2017{
2018 struct socket *sock;
2019 struct iovec iov;
2020 struct msghdr msg;
230b1839 2021 struct sockaddr_storage address;
89bddce5 2022 int err, err2;
6cb153ca
BL
2023 int fput_needed;
2024
602bd0e9
AV
2025 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2026 if (unlikely(err))
2027 return err;
de0fa95c 2028 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2029 if (!sock)
de0fa95c 2030 goto out;
1da177e4 2031
89bddce5
SH
2032 msg.msg_control = NULL;
2033 msg.msg_controllen = 0;
f3d33426
HFS
2034 /* Save some cycles and don't copy the address if not needed */
2035 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2036 /* We assume all kernel code knows the size of sockaddr_storage */
2037 msg.msg_namelen = 0;
130ed5d1 2038 msg.msg_iocb = NULL;
9f138fa6 2039 msg.msg_flags = 0;
1da177e4
LT
2040 if (sock->file->f_flags & O_NONBLOCK)
2041 flags |= MSG_DONTWAIT;
2da62906 2042 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2043
89bddce5 2044 if (err >= 0 && addr != NULL) {
43db362d 2045 err2 = move_addr_to_user(&address,
230b1839 2046 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2047 if (err2 < 0)
2048 err = err2;
1da177e4 2049 }
de0fa95c
PE
2050
2051 fput_light(sock->file, fput_needed);
4387ff75 2052out:
1da177e4
LT
2053 return err;
2054}
2055
7a09e1eb
DB
2056SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2057 unsigned int, flags, struct sockaddr __user *, addr,
2058 int __user *, addr_len)
2059{
2060 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2061}
2062
1da177e4 2063/*
89bddce5 2064 * Receive a datagram from a socket.
1da177e4
LT
2065 */
2066
b7c0ddf5
JG
2067SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2068 unsigned int, flags)
1da177e4 2069{
7a09e1eb 2070 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2071}
2072
2073/*
2074 * Set a socket option. Because we don't know the option lengths we have
2075 * to pass the user mode parameter for the protocols to sort out.
2076 */
2077
cc36dca0
DB
2078static int __sys_setsockopt(int fd, int level, int optname,
2079 char __user *optval, int optlen)
1da177e4 2080{
0d01da6a
SF
2081 mm_segment_t oldfs = get_fs();
2082 char *kernel_optval = NULL;
6cb153ca 2083 int err, fput_needed;
1da177e4
LT
2084 struct socket *sock;
2085
2086 if (optlen < 0)
2087 return -EINVAL;
89bddce5
SH
2088
2089 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2090 if (sock != NULL) {
2091 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2092 if (err)
2093 goto out_put;
1da177e4 2094
0d01da6a
SF
2095 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2096 &optname, optval, &optlen,
2097 &kernel_optval);
2098
2099 if (err < 0) {
2100 goto out_put;
2101 } else if (err > 0) {
2102 err = 0;
2103 goto out_put;
2104 }
2105
2106 if (kernel_optval) {
2107 set_fs(KERNEL_DS);
2108 optval = (char __user __force *)kernel_optval;
2109 }
2110
1da177e4 2111 if (level == SOL_SOCKET)
89bddce5
SH
2112 err =
2113 sock_setsockopt(sock, level, optname, optval,
2114 optlen);
1da177e4 2115 else
89bddce5
SH
2116 err =
2117 sock->ops->setsockopt(sock, level, optname, optval,
2118 optlen);
0d01da6a
SF
2119
2120 if (kernel_optval) {
2121 set_fs(oldfs);
2122 kfree(kernel_optval);
2123 }
6cb153ca
BL
2124out_put:
2125 fput_light(sock->file, fput_needed);
1da177e4
LT
2126 }
2127 return err;
2128}
2129
cc36dca0
DB
2130SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2131 char __user *, optval, int, optlen)
2132{
2133 return __sys_setsockopt(fd, level, optname, optval, optlen);
2134}
2135
1da177e4
LT
2136/*
2137 * Get a socket option. Because we don't know the option lengths we have
2138 * to pass a user mode parameter for the protocols to sort out.
2139 */
2140
13a2d70e
DB
2141static int __sys_getsockopt(int fd, int level, int optname,
2142 char __user *optval, int __user *optlen)
1da177e4 2143{
6cb153ca 2144 int err, fput_needed;
1da177e4 2145 struct socket *sock;
0d01da6a 2146 int max_optlen;
1da177e4 2147
89bddce5
SH
2148 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2149 if (sock != NULL) {
6cb153ca
BL
2150 err = security_socket_getsockopt(sock, level, optname);
2151 if (err)
2152 goto out_put;
1da177e4 2153
0d01da6a
SF
2154 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2155
1da177e4 2156 if (level == SOL_SOCKET)
89bddce5
SH
2157 err =
2158 sock_getsockopt(sock, level, optname, optval,
2159 optlen);
1da177e4 2160 else
89bddce5
SH
2161 err =
2162 sock->ops->getsockopt(sock, level, optname, optval,
2163 optlen);
0d01da6a
SF
2164
2165 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2166 optval, optlen,
2167 max_optlen, err);
6cb153ca
BL
2168out_put:
2169 fput_light(sock->file, fput_needed);
1da177e4
LT
2170 }
2171 return err;
2172}
2173
13a2d70e
DB
2174SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2175 char __user *, optval, int __user *, optlen)
2176{
2177 return __sys_getsockopt(fd, level, optname, optval, optlen);
2178}
2179
1da177e4
LT
2180/*
2181 * Shutdown a socket.
2182 */
2183
005a1aea 2184int __sys_shutdown(int fd, int how)
1da177e4 2185{
6cb153ca 2186 int err, fput_needed;
1da177e4
LT
2187 struct socket *sock;
2188
89bddce5
SH
2189 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2190 if (sock != NULL) {
1da177e4 2191 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2192 if (!err)
2193 err = sock->ops->shutdown(sock, how);
2194 fput_light(sock->file, fput_needed);
1da177e4
LT
2195 }
2196 return err;
2197}
2198
005a1aea
DB
2199SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2200{
2201 return __sys_shutdown(fd, how);
2202}
2203
89bddce5 2204/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2205 * fields which are the same type (int / unsigned) on our platforms.
2206 */
2207#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2208#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2209#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2210
c71d8ebe
TH
2211struct used_address {
2212 struct sockaddr_storage name;
2213 unsigned int name_len;
2214};
2215
da184284
AV
2216static int copy_msghdr_from_user(struct msghdr *kmsg,
2217 struct user_msghdr __user *umsg,
2218 struct sockaddr __user **save_addr,
2219 struct iovec **iov)
1661bf36 2220{
ffb07550 2221 struct user_msghdr msg;
08adb7da
AV
2222 ssize_t err;
2223
ffb07550 2224 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2225 return -EFAULT;
dbb490b9 2226
864d9664 2227 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2228 kmsg->msg_controllen = msg.msg_controllen;
2229 kmsg->msg_flags = msg.msg_flags;
2230
2231 kmsg->msg_namelen = msg.msg_namelen;
2232 if (!msg.msg_name)
6a2a2b3a
AS
2233 kmsg->msg_namelen = 0;
2234
dbb490b9
ML
2235 if (kmsg->msg_namelen < 0)
2236 return -EINVAL;
2237
1661bf36 2238 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2239 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2240
2241 if (save_addr)
ffb07550 2242 *save_addr = msg.msg_name;
08adb7da 2243
ffb07550 2244 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2245 if (!save_addr) {
864d9664
PA
2246 err = move_addr_to_kernel(msg.msg_name,
2247 kmsg->msg_namelen,
08adb7da
AV
2248 kmsg->msg_name);
2249 if (err < 0)
2250 return err;
2251 }
2252 } else {
2253 kmsg->msg_name = NULL;
2254 kmsg->msg_namelen = 0;
2255 }
2256
ffb07550 2257 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2258 return -EMSGSIZE;
2259
0345f931 2260 kmsg->msg_iocb = NULL;
2261
87e5e6da 2262 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2263 msg.msg_iov, msg.msg_iovlen,
da184284 2264 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2265 return err < 0 ? err : 0;
1661bf36
DC
2266}
2267
4257c8ca
JA
2268static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2269 unsigned int flags, struct used_address *used_address,
2270 unsigned int allowed_msghdr_flags)
1da177e4 2271{
b9d717a7 2272 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2273 __aligned(sizeof(__kernel_size_t));
89bddce5 2274 /* 20 is size of ipv6_pktinfo */
1da177e4 2275 unsigned char *ctl_buf = ctl;
d8725c86 2276 int ctl_len;
08adb7da 2277 ssize_t err;
89bddce5 2278
1da177e4
LT
2279 err = -ENOBUFS;
2280
228e548e 2281 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2282 goto out;
28a94d8f 2283 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2284 ctl_len = msg_sys->msg_controllen;
1da177e4 2285 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2286 err =
228e548e 2287 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2288 sizeof(ctl));
1da177e4 2289 if (err)
4257c8ca 2290 goto out;
228e548e
AB
2291 ctl_buf = msg_sys->msg_control;
2292 ctl_len = msg_sys->msg_controllen;
1da177e4 2293 } else if (ctl_len) {
ac4340fc
DM
2294 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2295 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2296 if (ctl_len > sizeof(ctl)) {
1da177e4 2297 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2298 if (ctl_buf == NULL)
4257c8ca 2299 goto out;
1da177e4
LT
2300 }
2301 err = -EFAULT;
2302 /*
228e548e 2303 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2304 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2305 * checking falls down on this.
2306 */
fb8621bb 2307 if (copy_from_user(ctl_buf,
228e548e 2308 (void __user __force *)msg_sys->msg_control,
89bddce5 2309 ctl_len))
1da177e4 2310 goto out_freectl;
228e548e 2311 msg_sys->msg_control = ctl_buf;
1da177e4 2312 }
228e548e 2313 msg_sys->msg_flags = flags;
1da177e4
LT
2314
2315 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2316 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2317 /*
2318 * If this is sendmmsg() and current destination address is same as
2319 * previously succeeded address, omit asking LSM's decision.
2320 * used_address->name_len is initialized to UINT_MAX so that the first
2321 * destination address never matches.
2322 */
bc909d9d
MD
2323 if (used_address && msg_sys->msg_name &&
2324 used_address->name_len == msg_sys->msg_namelen &&
2325 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2326 used_address->name_len)) {
d8725c86 2327 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2328 goto out_freectl;
2329 }
d8725c86 2330 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2331 /*
2332 * If this is sendmmsg() and sending to current destination address was
2333 * successful, remember it.
2334 */
2335 if (used_address && err >= 0) {
2336 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2337 if (msg_sys->msg_name)
2338 memcpy(&used_address->name, msg_sys->msg_name,
2339 used_address->name_len);
c71d8ebe 2340 }
1da177e4
LT
2341
2342out_freectl:
89bddce5 2343 if (ctl_buf != ctl)
1da177e4 2344 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2345out:
2346 return err;
2347}
2348
03b1230c
JA
2349int sendmsg_copy_msghdr(struct msghdr *msg,
2350 struct user_msghdr __user *umsg, unsigned flags,
2351 struct iovec **iov)
4257c8ca
JA
2352{
2353 int err;
2354
2355 if (flags & MSG_CMSG_COMPAT) {
2356 struct compat_msghdr __user *msg_compat;
2357
2358 msg_compat = (struct compat_msghdr __user *) umsg;
2359 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2360 } else {
2361 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2362 }
2363 if (err < 0)
2364 return err;
2365
2366 return 0;
2367}
2368
2369static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2370 struct msghdr *msg_sys, unsigned int flags,
2371 struct used_address *used_address,
2372 unsigned int allowed_msghdr_flags)
2373{
2374 struct sockaddr_storage address;
2375 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2376 ssize_t err;
2377
2378 msg_sys->msg_name = &address;
2379
2380 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2381 if (err < 0)
2382 return err;
2383
2384 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2385 allowed_msghdr_flags);
da184284 2386 kfree(iov);
228e548e
AB
2387 return err;
2388}
2389
2390/*
2391 * BSD sendmsg interface
2392 */
03b1230c 2393long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2394 unsigned int flags)
2395{
d69e0779 2396 /* disallow ancillary data requests from this path */
03b1230c
JA
2397 if (msg->msg_control || msg->msg_controllen)
2398 return -EINVAL;
d69e0779 2399
03b1230c 2400 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2401}
228e548e 2402
e1834a32
DB
2403long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2404 bool forbid_cmsg_compat)
228e548e
AB
2405{
2406 int fput_needed, err;
2407 struct msghdr msg_sys;
1be374a0
AL
2408 struct socket *sock;
2409
e1834a32
DB
2410 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2411 return -EINVAL;
2412
1be374a0 2413 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2414 if (!sock)
2415 goto out;
2416
28a94d8f 2417 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2418
6cb153ca 2419 fput_light(sock->file, fput_needed);
89bddce5 2420out:
1da177e4
LT
2421 return err;
2422}
2423
666547ff 2424SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2425{
e1834a32 2426 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2427}
2428
228e548e
AB
2429/*
2430 * Linux sendmmsg interface
2431 */
2432
2433int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2434 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2435{
2436 int fput_needed, err, datagrams;
2437 struct socket *sock;
2438 struct mmsghdr __user *entry;
2439 struct compat_mmsghdr __user *compat_entry;
2440 struct msghdr msg_sys;
c71d8ebe 2441 struct used_address used_address;
f092276d 2442 unsigned int oflags = flags;
228e548e 2443
e1834a32
DB
2444 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2445 return -EINVAL;
2446
98382f41
AB
2447 if (vlen > UIO_MAXIOV)
2448 vlen = UIO_MAXIOV;
228e548e
AB
2449
2450 datagrams = 0;
2451
2452 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2453 if (!sock)
2454 return err;
2455
c71d8ebe 2456 used_address.name_len = UINT_MAX;
228e548e
AB
2457 entry = mmsg;
2458 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2459 err = 0;
f092276d 2460 flags |= MSG_BATCH;
228e548e
AB
2461
2462 while (datagrams < vlen) {
f092276d
TH
2463 if (datagrams == vlen - 1)
2464 flags = oflags;
2465
228e548e 2466 if (MSG_CMSG_COMPAT & flags) {
666547ff 2467 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2468 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2469 if (err < 0)
2470 break;
2471 err = __put_user(err, &compat_entry->msg_len);
2472 ++compat_entry;
2473 } else {
a7526eb5 2474 err = ___sys_sendmsg(sock,
666547ff 2475 (struct user_msghdr __user *)entry,
28a94d8f 2476 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2477 if (err < 0)
2478 break;
2479 err = put_user(err, &entry->msg_len);
2480 ++entry;
2481 }
2482
2483 if (err)
2484 break;
2485 ++datagrams;
3023898b
SHY
2486 if (msg_data_left(&msg_sys))
2487 break;
a78cb84c 2488 cond_resched();
228e548e
AB
2489 }
2490
228e548e
AB
2491 fput_light(sock->file, fput_needed);
2492
728ffb86
AB
2493 /* We only return an error if no datagrams were able to be sent */
2494 if (datagrams != 0)
228e548e
AB
2495 return datagrams;
2496
228e548e
AB
2497 return err;
2498}
2499
2500SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2501 unsigned int, vlen, unsigned int, flags)
2502{
e1834a32 2503 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2504}
2505
03b1230c
JA
2506int recvmsg_copy_msghdr(struct msghdr *msg,
2507 struct user_msghdr __user *umsg, unsigned flags,
2508 struct sockaddr __user **uaddr,
2509 struct iovec **iov)
1da177e4 2510{
08adb7da 2511 ssize_t err;
1da177e4 2512
4257c8ca
JA
2513 if (MSG_CMSG_COMPAT & flags) {
2514 struct compat_msghdr __user *msg_compat;
1da177e4 2515
4257c8ca
JA
2516 msg_compat = (struct compat_msghdr __user *) umsg;
2517 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2518 } else {
2519 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2520 }
1da177e4 2521 if (err < 0)
da184284 2522 return err;
1da177e4 2523
4257c8ca
JA
2524 return 0;
2525}
2526
2527static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2528 struct user_msghdr __user *msg,
2529 struct sockaddr __user *uaddr,
2530 unsigned int flags, int nosec)
2531{
2532 struct compat_msghdr __user *msg_compat =
2533 (struct compat_msghdr __user *) msg;
2534 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2535 struct sockaddr_storage addr;
2536 unsigned long cmsg_ptr;
2537 int len;
2538 ssize_t err;
2539
2540 msg_sys->msg_name = &addr;
a2e27255
ACM
2541 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2542 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2543
f3d33426
HFS
2544 /* We assume all kernel code knows the size of sockaddr_storage */
2545 msg_sys->msg_namelen = 0;
2546
1da177e4
LT
2547 if (sock->file->f_flags & O_NONBLOCK)
2548 flags |= MSG_DONTWAIT;
1af66221
ED
2549
2550 if (unlikely(nosec))
2551 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2552 else
2553 err = sock_recvmsg(sock, msg_sys, flags);
2554
1da177e4 2555 if (err < 0)
4257c8ca 2556 goto out;
1da177e4
LT
2557 len = err;
2558
2559 if (uaddr != NULL) {
43db362d 2560 err = move_addr_to_user(&addr,
a2e27255 2561 msg_sys->msg_namelen, uaddr,
89bddce5 2562 uaddr_len);
1da177e4 2563 if (err < 0)
4257c8ca 2564 goto out;
1da177e4 2565 }
a2e27255 2566 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2567 COMPAT_FLAGS(msg));
1da177e4 2568 if (err)
4257c8ca 2569 goto out;
1da177e4 2570 if (MSG_CMSG_COMPAT & flags)
a2e27255 2571 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2572 &msg_compat->msg_controllen);
2573 else
a2e27255 2574 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2575 &msg->msg_controllen);
2576 if (err)
4257c8ca 2577 goto out;
1da177e4 2578 err = len;
4257c8ca
JA
2579out:
2580 return err;
2581}
2582
2583static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2584 struct msghdr *msg_sys, unsigned int flags, int nosec)
2585{
2586 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2587 /* user mode address pointers */
2588 struct sockaddr __user *uaddr;
2589 ssize_t err;
2590
2591 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2592 if (err < 0)
2593 return err;
1da177e4 2594
4257c8ca 2595 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2596 kfree(iov);
a2e27255
ACM
2597 return err;
2598}
2599
2600/*
2601 * BSD recvmsg interface
2602 */
2603
03b1230c
JA
2604long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2605 struct user_msghdr __user *umsg,
2606 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2607{
d69e0779 2608 /* disallow ancillary data requests from this path */
03b1230c
JA
2609 if (msg->msg_control || msg->msg_controllen)
2610 return -EINVAL;
aa1fa28f 2611
03b1230c 2612 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2613}
2614
e1834a32
DB
2615long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2616 bool forbid_cmsg_compat)
a2e27255
ACM
2617{
2618 int fput_needed, err;
2619 struct msghdr msg_sys;
1be374a0
AL
2620 struct socket *sock;
2621
e1834a32
DB
2622 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2623 return -EINVAL;
2624
1be374a0 2625 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2626 if (!sock)
2627 goto out;
2628
a7526eb5 2629 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2630
6cb153ca 2631 fput_light(sock->file, fput_needed);
1da177e4
LT
2632out:
2633 return err;
2634}
2635
666547ff 2636SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2637 unsigned int, flags)
2638{
e1834a32 2639 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2640}
2641
a2e27255
ACM
2642/*
2643 * Linux recvmmsg interface
2644 */
2645
e11d4284
AB
2646static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2647 unsigned int vlen, unsigned int flags,
2648 struct timespec64 *timeout)
a2e27255
ACM
2649{
2650 int fput_needed, err, datagrams;
2651 struct socket *sock;
2652 struct mmsghdr __user *entry;
d7256d0e 2653 struct compat_mmsghdr __user *compat_entry;
a2e27255 2654 struct msghdr msg_sys;
766b9f92
DD
2655 struct timespec64 end_time;
2656 struct timespec64 timeout64;
a2e27255
ACM
2657
2658 if (timeout &&
2659 poll_select_set_timeout(&end_time, timeout->tv_sec,
2660 timeout->tv_nsec))
2661 return -EINVAL;
2662
2663 datagrams = 0;
2664
2665 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2666 if (!sock)
2667 return err;
2668
7797dc41
SHY
2669 if (likely(!(flags & MSG_ERRQUEUE))) {
2670 err = sock_error(sock->sk);
2671 if (err) {
2672 datagrams = err;
2673 goto out_put;
2674 }
e623a9e9 2675 }
a2e27255
ACM
2676
2677 entry = mmsg;
d7256d0e 2678 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2679
2680 while (datagrams < vlen) {
2681 /*
2682 * No need to ask LSM for more than the first datagram.
2683 */
d7256d0e 2684 if (MSG_CMSG_COMPAT & flags) {
666547ff 2685 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2686 &msg_sys, flags & ~MSG_WAITFORONE,
2687 datagrams);
d7256d0e
JMG
2688 if (err < 0)
2689 break;
2690 err = __put_user(err, &compat_entry->msg_len);
2691 ++compat_entry;
2692 } else {
a7526eb5 2693 err = ___sys_recvmsg(sock,
666547ff 2694 (struct user_msghdr __user *)entry,
a7526eb5
AL
2695 &msg_sys, flags & ~MSG_WAITFORONE,
2696 datagrams);
d7256d0e
JMG
2697 if (err < 0)
2698 break;
2699 err = put_user(err, &entry->msg_len);
2700 ++entry;
2701 }
2702
a2e27255
ACM
2703 if (err)
2704 break;
a2e27255
ACM
2705 ++datagrams;
2706
71c5c159
BB
2707 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2708 if (flags & MSG_WAITFORONE)
2709 flags |= MSG_DONTWAIT;
2710
a2e27255 2711 if (timeout) {
766b9f92 2712 ktime_get_ts64(&timeout64);
c2e6c856 2713 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2714 if (timeout->tv_sec < 0) {
2715 timeout->tv_sec = timeout->tv_nsec = 0;
2716 break;
2717 }
2718
2719 /* Timeout, return less than vlen datagrams */
2720 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2721 break;
2722 }
2723
2724 /* Out of band data, return right away */
2725 if (msg_sys.msg_flags & MSG_OOB)
2726 break;
a78cb84c 2727 cond_resched();
a2e27255
ACM
2728 }
2729
a2e27255 2730 if (err == 0)
34b88a68
ACM
2731 goto out_put;
2732
2733 if (datagrams == 0) {
2734 datagrams = err;
2735 goto out_put;
2736 }
a2e27255 2737
34b88a68
ACM
2738 /*
2739 * We may return less entries than requested (vlen) if the
2740 * sock is non block and there aren't enough datagrams...
2741 */
2742 if (err != -EAGAIN) {
a2e27255 2743 /*
34b88a68
ACM
2744 * ... or if recvmsg returns an error after we
2745 * received some datagrams, where we record the
2746 * error to return on the next call or if the
2747 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2748 */
34b88a68 2749 sock->sk->sk_err = -err;
a2e27255 2750 }
34b88a68
ACM
2751out_put:
2752 fput_light(sock->file, fput_needed);
a2e27255 2753
34b88a68 2754 return datagrams;
a2e27255
ACM
2755}
2756
e11d4284
AB
2757int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2758 unsigned int vlen, unsigned int flags,
2759 struct __kernel_timespec __user *timeout,
2760 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2761{
2762 int datagrams;
c2e6c856 2763 struct timespec64 timeout_sys;
a2e27255 2764
e11d4284
AB
2765 if (timeout && get_timespec64(&timeout_sys, timeout))
2766 return -EFAULT;
a2e27255 2767
e11d4284 2768 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2769 return -EFAULT;
2770
e11d4284
AB
2771 if (!timeout && !timeout32)
2772 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2773
2774 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2775
e11d4284
AB
2776 if (datagrams <= 0)
2777 return datagrams;
2778
2779 if (timeout && put_timespec64(&timeout_sys, timeout))
2780 datagrams = -EFAULT;
2781
2782 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2783 datagrams = -EFAULT;
2784
2785 return datagrams;
2786}
2787
1255e269
DB
2788SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2789 unsigned int, vlen, unsigned int, flags,
c2e6c856 2790 struct __kernel_timespec __user *, timeout)
1255e269 2791{
e11d4284
AB
2792 if (flags & MSG_CMSG_COMPAT)
2793 return -EINVAL;
2794
2795 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2796}
2797
2798#ifdef CONFIG_COMPAT_32BIT_TIME
2799SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2800 unsigned int, vlen, unsigned int, flags,
2801 struct old_timespec32 __user *, timeout)
2802{
2803 if (flags & MSG_CMSG_COMPAT)
2804 return -EINVAL;
2805
2806 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2807}
e11d4284 2808#endif
1255e269 2809
a2e27255 2810#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2811/* Argument list sizes for sys_socketcall */
2812#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2813static const unsigned char nargs[21] = {
c6d409cf
ED
2814 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2815 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2816 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2817 AL(4), AL(5), AL(4)
89bddce5
SH
2818};
2819
1da177e4
LT
2820#undef AL
2821
2822/*
89bddce5 2823 * System call vectors.
1da177e4
LT
2824 *
2825 * Argument checking cleaned up. Saved 20% in size.
2826 * This function doesn't need to set the kernel lock because
89bddce5 2827 * it is set by the callees.
1da177e4
LT
2828 */
2829
3e0fa65f 2830SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2831{
2950fa9d 2832 unsigned long a[AUDITSC_ARGS];
89bddce5 2833 unsigned long a0, a1;
1da177e4 2834 int err;
47379052 2835 unsigned int len;
1da177e4 2836
228e548e 2837 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2838 return -EINVAL;
c8e8cd57 2839 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2840
47379052
AV
2841 len = nargs[call];
2842 if (len > sizeof(a))
2843 return -EINVAL;
2844
1da177e4 2845 /* copy_from_user should be SMP safe. */
47379052 2846 if (copy_from_user(a, args, len))
1da177e4 2847 return -EFAULT;
3ec3b2fb 2848
2950fa9d
CG
2849 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2850 if (err)
2851 return err;
3ec3b2fb 2852
89bddce5
SH
2853 a0 = a[0];
2854 a1 = a[1];
2855
2856 switch (call) {
2857 case SYS_SOCKET:
9d6a15c3 2858 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2859 break;
2860 case SYS_BIND:
a87d35d8 2861 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2862 break;
2863 case SYS_CONNECT:
1387c2c2 2864 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2865 break;
2866 case SYS_LISTEN:
25e290ee 2867 err = __sys_listen(a0, a1);
89bddce5
SH
2868 break;
2869 case SYS_ACCEPT:
4541e805
DB
2870 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2871 (int __user *)a[2], 0);
89bddce5
SH
2872 break;
2873 case SYS_GETSOCKNAME:
2874 err =
8882a107
DB
2875 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2876 (int __user *)a[2]);
89bddce5
SH
2877 break;
2878 case SYS_GETPEERNAME:
2879 err =
b21c8f83
DB
2880 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2881 (int __user *)a[2]);
89bddce5
SH
2882 break;
2883 case SYS_SOCKETPAIR:
6debc8d8 2884 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2885 break;
2886 case SYS_SEND:
f3bf896b
DB
2887 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2888 NULL, 0);
89bddce5
SH
2889 break;
2890 case SYS_SENDTO:
211b634b
DB
2891 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2892 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2893 break;
2894 case SYS_RECV:
d27e9afc
DB
2895 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2896 NULL, NULL);
89bddce5
SH
2897 break;
2898 case SYS_RECVFROM:
7a09e1eb
DB
2899 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2900 (struct sockaddr __user *)a[4],
2901 (int __user *)a[5]);
89bddce5
SH
2902 break;
2903 case SYS_SHUTDOWN:
005a1aea 2904 err = __sys_shutdown(a0, a1);
89bddce5
SH
2905 break;
2906 case SYS_SETSOCKOPT:
cc36dca0
DB
2907 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2908 a[4]);
89bddce5
SH
2909 break;
2910 case SYS_GETSOCKOPT:
2911 err =
13a2d70e
DB
2912 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2913 (int __user *)a[4]);
89bddce5
SH
2914 break;
2915 case SYS_SENDMSG:
e1834a32
DB
2916 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2917 a[2], true);
89bddce5 2918 break;
228e548e 2919 case SYS_SENDMMSG:
e1834a32
DB
2920 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2921 a[3], true);
228e548e 2922 break;
89bddce5 2923 case SYS_RECVMSG:
e1834a32
DB
2924 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2925 a[2], true);
89bddce5 2926 break;
a2e27255 2927 case SYS_RECVMMSG:
3ca47e95 2928 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2929 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2930 a[2], a[3],
2931 (struct __kernel_timespec __user *)a[4],
2932 NULL);
2933 else
2934 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2935 a[2], a[3], NULL,
2936 (struct old_timespec32 __user *)a[4]);
a2e27255 2937 break;
de11defe 2938 case SYS_ACCEPT4:
4541e805
DB
2939 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2940 (int __user *)a[2], a[3]);
aaca0bdc 2941 break;
89bddce5
SH
2942 default:
2943 err = -EINVAL;
2944 break;
1da177e4
LT
2945 }
2946 return err;
2947}
2948
89bddce5 2949#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2950
55737fda
SH
2951/**
2952 * sock_register - add a socket protocol handler
2953 * @ops: description of protocol
2954 *
1da177e4
LT
2955 * This function is called by a protocol handler that wants to
2956 * advertise its address family, and have it linked into the
e793c0f7 2957 * socket interface. The value ops->family corresponds to the
55737fda 2958 * socket system call protocol family.
1da177e4 2959 */
f0fd27d4 2960int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2961{
2962 int err;
2963
2964 if (ops->family >= NPROTO) {
3410f22e 2965 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2966 return -ENOBUFS;
2967 }
55737fda
SH
2968
2969 spin_lock(&net_family_lock);
190683a9
ED
2970 if (rcu_dereference_protected(net_families[ops->family],
2971 lockdep_is_held(&net_family_lock)))
55737fda
SH
2972 err = -EEXIST;
2973 else {
cf778b00 2974 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2975 err = 0;
2976 }
55737fda
SH
2977 spin_unlock(&net_family_lock);
2978
3410f22e 2979 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2980 return err;
2981}
c6d409cf 2982EXPORT_SYMBOL(sock_register);
1da177e4 2983
55737fda
SH
2984/**
2985 * sock_unregister - remove a protocol handler
2986 * @family: protocol family to remove
2987 *
1da177e4
LT
2988 * This function is called by a protocol handler that wants to
2989 * remove its address family, and have it unlinked from the
55737fda
SH
2990 * new socket creation.
2991 *
2992 * If protocol handler is a module, then it can use module reference
2993 * counts to protect against new references. If protocol handler is not
2994 * a module then it needs to provide its own protection in
2995 * the ops->create routine.
1da177e4 2996 */
f0fd27d4 2997void sock_unregister(int family)
1da177e4 2998{
f0fd27d4 2999 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3000
55737fda 3001 spin_lock(&net_family_lock);
a9b3cd7f 3002 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3003 spin_unlock(&net_family_lock);
3004
3005 synchronize_rcu();
3006
3410f22e 3007 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3008}
c6d409cf 3009EXPORT_SYMBOL(sock_unregister);
1da177e4 3010
bf2ae2e4
XL
3011bool sock_is_registered(int family)
3012{
66b51b0a 3013 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3014}
3015
77d76ea3 3016static int __init sock_init(void)
1da177e4 3017{
b3e19d92 3018 int err;
2ca794e5
EB
3019 /*
3020 * Initialize the network sysctl infrastructure.
3021 */
3022 err = net_sysctl_init();
3023 if (err)
3024 goto out;
b3e19d92 3025
1da177e4 3026 /*
89bddce5 3027 * Initialize skbuff SLAB cache
1da177e4
LT
3028 */
3029 skb_init();
1da177e4
LT
3030
3031 /*
89bddce5 3032 * Initialize the protocols module.
1da177e4
LT
3033 */
3034
3035 init_inodecache();
b3e19d92
NP
3036
3037 err = register_filesystem(&sock_fs_type);
3038 if (err)
3039 goto out_fs;
1da177e4 3040 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3041 if (IS_ERR(sock_mnt)) {
3042 err = PTR_ERR(sock_mnt);
3043 goto out_mount;
3044 }
77d76ea3
AK
3045
3046 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3047 */
3048
3049#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3050 err = netfilter_init();
3051 if (err)
3052 goto out;
1da177e4 3053#endif
cbeb321a 3054
408eccce 3055 ptp_classifier_init();
c1f19b51 3056
b3e19d92
NP
3057out:
3058 return err;
3059
3060out_mount:
3061 unregister_filesystem(&sock_fs_type);
3062out_fs:
3063 goto out;
1da177e4
LT
3064}
3065
77d76ea3
AK
3066core_initcall(sock_init); /* early initcall */
3067
1da177e4
LT
3068#ifdef CONFIG_PROC_FS
3069void socket_seq_show(struct seq_file *seq)
3070{
648845ab
TZ
3071 seq_printf(seq, "sockets: used %d\n",
3072 sock_inuse_get(seq->private));
1da177e4 3073}
89bddce5 3074#endif /* CONFIG_PROC_FS */
1da177e4 3075
89bbfc95 3076#ifdef CONFIG_COMPAT
36fd633e 3077static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3078{
6b96018b 3079 struct compat_ifconf ifc32;
7a229387 3080 struct ifconf ifc;
7a229387
AB
3081 int err;
3082
6b96018b 3083 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3084 return -EFAULT;
3085
36fd633e
AV
3086 ifc.ifc_len = ifc32.ifc_len;
3087 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3088
36fd633e
AV
3089 rtnl_lock();
3090 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3091 rtnl_unlock();
7a229387
AB
3092 if (err)
3093 return err;
3094
36fd633e 3095 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3096 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3097 return -EFAULT;
3098
3099 return 0;
3100}
3101
6b96018b 3102static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3103{
3a7da39d
BH
3104 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3105 bool convert_in = false, convert_out = false;
44c02a2c
AV
3106 size_t buf_size = 0;
3107 struct ethtool_rxnfc __user *rxnfc = NULL;
3108 struct ifreq ifr;
3a7da39d
BH
3109 u32 rule_cnt = 0, actual_rule_cnt;
3110 u32 ethcmd;
7a229387 3111 u32 data;
3a7da39d 3112 int ret;
7a229387 3113
3a7da39d
BH
3114 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3115 return -EFAULT;
7a229387 3116
3a7da39d
BH
3117 compat_rxnfc = compat_ptr(data);
3118
3119 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3120 return -EFAULT;
3121
3a7da39d
BH
3122 /* Most ethtool structures are defined without padding.
3123 * Unfortunately struct ethtool_rxnfc is an exception.
3124 */
3125 switch (ethcmd) {
3126 default:
3127 break;
3128 case ETHTOOL_GRXCLSRLALL:
3129 /* Buffer size is variable */
3130 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3131 return -EFAULT;
3132 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3133 return -ENOMEM;
3134 buf_size += rule_cnt * sizeof(u32);
3135 /* fall through */
3136 case ETHTOOL_GRXRINGS:
3137 case ETHTOOL_GRXCLSRLCNT:
3138 case ETHTOOL_GRXCLSRULE:
55664f32 3139 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3140 convert_out = true;
3141 /* fall through */
3142 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3143 buf_size += sizeof(struct ethtool_rxnfc);
3144 convert_in = true;
44c02a2c 3145 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3146 break;
3147 }
3148
44c02a2c 3149 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3150 return -EFAULT;
3151
44c02a2c 3152 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3153
3a7da39d 3154 if (convert_in) {
127fe533 3155 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3156 * fs.ring_cookie and at the end of fs, but nowhere else.
3157 */
127fe533
AD
3158 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3159 sizeof(compat_rxnfc->fs.m_ext) !=
3160 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3161 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3162 BUILD_BUG_ON(
3163 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3164 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3165 offsetof(struct ethtool_rxnfc, fs.location) -
3166 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3167
3168 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3169 (void __user *)(&rxnfc->fs.m_ext + 1) -
3170 (void __user *)rxnfc) ||
3a7da39d
BH
3171 copy_in_user(&rxnfc->fs.ring_cookie,
3172 &compat_rxnfc->fs.ring_cookie,
954b1244 3173 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3174 (void __user *)&rxnfc->fs.ring_cookie))
3175 return -EFAULT;
3176 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3177 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3178 return -EFAULT;
3179 } else if (copy_in_user(&rxnfc->rule_cnt,
3180 &compat_rxnfc->rule_cnt,
3181 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3182 return -EFAULT;
3183 }
3184
44c02a2c 3185 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3186 if (ret)
3187 return ret;
3188
3189 if (convert_out) {
3190 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3191 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3192 (const void __user *)rxnfc) ||
3a7da39d
BH
3193 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3194 &rxnfc->fs.ring_cookie,
954b1244
SH
3195 (const void __user *)(&rxnfc->fs.location + 1) -
3196 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3197 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3198 sizeof(rxnfc->rule_cnt)))
3199 return -EFAULT;
3200
3201 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3202 /* As an optimisation, we only copy the actual
3203 * number of rules that the underlying
3204 * function returned. Since Mallory might
3205 * change the rule count in user memory, we
3206 * check that it is less than the rule count
3207 * originally given (as the user buffer size),
3208 * which has been range-checked.
3209 */
3210 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3211 return -EFAULT;
3212 if (actual_rule_cnt < rule_cnt)
3213 rule_cnt = actual_rule_cnt;
3214 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3215 &rxnfc->rule_locs[0],
3216 rule_cnt * sizeof(u32)))
3217 return -EFAULT;
3218 }
3219 }
3220
3221 return 0;
7a229387
AB
3222}
3223
7a50a240
AB
3224static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3225{
7a50a240 3226 compat_uptr_t uptr32;
44c02a2c
AV
3227 struct ifreq ifr;
3228 void __user *saved;
3229 int err;
7a50a240 3230
44c02a2c 3231 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3232 return -EFAULT;
3233
3234 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3235 return -EFAULT;
3236
44c02a2c
AV
3237 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3238 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3239
44c02a2c
AV
3240 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3241 if (!err) {
3242 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3243 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3244 err = -EFAULT;
ccbd6a5a 3245 }
44c02a2c 3246 return err;
7a229387
AB
3247}
3248
590d4693
BH
3249/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3250static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3251 struct compat_ifreq __user *u_ifreq32)
7a229387 3252{
44c02a2c 3253 struct ifreq ifreq;
7a229387
AB
3254 u32 data32;
3255
44c02a2c 3256 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3257 return -EFAULT;
44c02a2c 3258 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3259 return -EFAULT;
44c02a2c 3260 ifreq.ifr_data = compat_ptr(data32);
7a229387 3261
44c02a2c 3262 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3263}
3264
37ac39bd
JB
3265static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3266 unsigned int cmd,
3267 struct compat_ifreq __user *uifr32)
3268{
3269 struct ifreq __user *uifr;
3270 int err;
3271
3272 /* Handle the fact that while struct ifreq has the same *layout* on
3273 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3274 * which are handled elsewhere, it still has different *size* due to
3275 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3276 * resulting in struct ifreq being 32 and 40 bytes respectively).
3277 * As a result, if the struct happens to be at the end of a page and
3278 * the next page isn't readable/writable, we get a fault. To prevent
3279 * that, copy back and forth to the full size.
3280 */
3281
3282 uifr = compat_alloc_user_space(sizeof(*uifr));
3283 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3284 return -EFAULT;
3285
3286 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3287
3288 if (!err) {
3289 switch (cmd) {
3290 case SIOCGIFFLAGS:
3291 case SIOCGIFMETRIC:
3292 case SIOCGIFMTU:
3293 case SIOCGIFMEM:
3294 case SIOCGIFHWADDR:
3295 case SIOCGIFINDEX:
3296 case SIOCGIFADDR:
3297 case SIOCGIFBRDADDR:
3298 case SIOCGIFDSTADDR:
3299 case SIOCGIFNETMASK:
3300 case SIOCGIFPFLAGS:
3301 case SIOCGIFTXQLEN:
3302 case SIOCGMIIPHY:
3303 case SIOCGMIIREG:
c6c9fee3 3304 case SIOCGIFNAME:
37ac39bd
JB
3305 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3306 err = -EFAULT;
3307 break;
3308 }
3309 }
3310 return err;
3311}
3312
a2116ed2
AB
3313static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3314 struct compat_ifreq __user *uifr32)
3315{
3316 struct ifreq ifr;
3317 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3318 int err;
3319
3320 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3321 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3322 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3323 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3324 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3325 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3326 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3327 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3328 if (err)
3329 return -EFAULT;
3330
44c02a2c 3331 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3332
3333 if (cmd == SIOCGIFMAP && !err) {
3334 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3335 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3336 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3337 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3338 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3339 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3340 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3341 if (err)
3342 err = -EFAULT;
3343 }
3344 return err;
3345}
3346
7a229387 3347struct rtentry32 {
c6d409cf 3348 u32 rt_pad1;
7a229387
AB
3349 struct sockaddr rt_dst; /* target address */
3350 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3351 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3352 unsigned short rt_flags;
3353 short rt_pad2;
3354 u32 rt_pad3;
3355 unsigned char rt_tos;
3356 unsigned char rt_class;
3357 short rt_pad4;
3358 short rt_metric; /* +1 for binary compatibility! */
7a229387 3359 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3360 u32 rt_mtu; /* per route MTU/Window */
3361 u32 rt_window; /* Window clamping */
7a229387
AB
3362 unsigned short rt_irtt; /* Initial RTT */
3363};
3364
3365struct in6_rtmsg32 {
3366 struct in6_addr rtmsg_dst;
3367 struct in6_addr rtmsg_src;
3368 struct in6_addr rtmsg_gateway;
3369 u32 rtmsg_type;
3370 u16 rtmsg_dst_len;
3371 u16 rtmsg_src_len;
3372 u32 rtmsg_metric;
3373 u32 rtmsg_info;
3374 u32 rtmsg_flags;
3375 s32 rtmsg_ifindex;
3376};
3377
6b96018b
AB
3378static int routing_ioctl(struct net *net, struct socket *sock,
3379 unsigned int cmd, void __user *argp)
7a229387
AB
3380{
3381 int ret;
3382 void *r = NULL;
3383 struct in6_rtmsg r6;
3384 struct rtentry r4;
3385 char devname[16];
3386 u32 rtdev;
3387 mm_segment_t old_fs = get_fs();
3388
6b96018b
AB
3389 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3390 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3391 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3392 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3393 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3394 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3395 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3396 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3397 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3398 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3399 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3400
3401 r = (void *) &r6;
3402 } else { /* ipv4 */
6b96018b 3403 struct rtentry32 __user *ur4 = argp;
c6d409cf 3404 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3405 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3406 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3407 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3408 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3409 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3410 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3411 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3412 if (rtdev) {
c6d409cf 3413 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3414 r4.rt_dev = (char __user __force *)devname;
3415 devname[15] = 0;
7a229387
AB
3416 } else
3417 r4.rt_dev = NULL;
3418
3419 r = (void *) &r4;
3420 }
3421
3422 if (ret) {
3423 ret = -EFAULT;
3424 goto out;
3425 }
3426
c6d409cf 3427 set_fs(KERNEL_DS);
63ff03ab 3428 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3429 set_fs(old_fs);
7a229387
AB
3430
3431out:
7a229387
AB
3432 return ret;
3433}
3434
3435/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3436 * for some operations; this forces use of the newer bridge-utils that
25985edc 3437 * use compatible ioctls
7a229387 3438 */
6b96018b 3439static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3440{
6b96018b 3441 compat_ulong_t tmp;
7a229387 3442
6b96018b 3443 if (get_user(tmp, argp))
7a229387
AB
3444 return -EFAULT;
3445 if (tmp == BRCTL_GET_VERSION)
3446 return BRCTL_VERSION + 1;
3447 return -EINVAL;
3448}
3449
6b96018b
AB
3450static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3451 unsigned int cmd, unsigned long arg)
3452{
3453 void __user *argp = compat_ptr(arg);
3454 struct sock *sk = sock->sk;
3455 struct net *net = sock_net(sk);
7a229387 3456
6b96018b 3457 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3458 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3459
3460 switch (cmd) {
3461 case SIOCSIFBR:
3462 case SIOCGIFBR:
3463 return old_bridge_ioctl(argp);
6b96018b 3464 case SIOCGIFCONF:
36fd633e 3465 return compat_dev_ifconf(net, argp);
6b96018b
AB
3466 case SIOCETHTOOL:
3467 return ethtool_ioctl(net, argp);
7a50a240
AB
3468 case SIOCWANDEV:
3469 return compat_siocwandev(net, argp);
a2116ed2
AB
3470 case SIOCGIFMAP:
3471 case SIOCSIFMAP:
3472 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3473 case SIOCADDRT:
3474 case SIOCDELRT:
3475 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3476 case SIOCGSTAMP_OLD:
3477 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3478 if (!sock->ops->gettstamp)
3479 return -ENOIOCTLCMD;
0768e170 3480 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3481 !COMPAT_USE_64BIT_TIME);
3482
590d4693
BH
3483 case SIOCBONDSLAVEINFOQUERY:
3484 case SIOCBONDINFOQUERY:
a2116ed2 3485 case SIOCSHWTSTAMP:
fd468c74 3486 case SIOCGHWTSTAMP:
590d4693 3487 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3488
3489 case FIOSETOWN:
3490 case SIOCSPGRP:
3491 case FIOGETOWN:
3492 case SIOCGPGRP:
3493 case SIOCBRADDBR:
3494 case SIOCBRDELBR:
3495 case SIOCGIFVLAN:
3496 case SIOCSIFVLAN:
3497 case SIOCADDDLCI:
3498 case SIOCDELDLCI:
c62cce2c 3499 case SIOCGSKNS:
0768e170
AB
3500 case SIOCGSTAMP_NEW:
3501 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3502 return sock_ioctl(file, cmd, arg);
3503
3504 case SIOCGIFFLAGS:
3505 case SIOCSIFFLAGS:
3506 case SIOCGIFMETRIC:
3507 case SIOCSIFMETRIC:
3508 case SIOCGIFMTU:
3509 case SIOCSIFMTU:
3510 case SIOCGIFMEM:
3511 case SIOCSIFMEM:
3512 case SIOCGIFHWADDR:
3513 case SIOCSIFHWADDR:
3514 case SIOCADDMULTI:
3515 case SIOCDELMULTI:
3516 case SIOCGIFINDEX:
6b96018b
AB
3517 case SIOCGIFADDR:
3518 case SIOCSIFADDR:
3519 case SIOCSIFHWBROADCAST:
6b96018b 3520 case SIOCDIFADDR:
6b96018b
AB
3521 case SIOCGIFBRDADDR:
3522 case SIOCSIFBRDADDR:
3523 case SIOCGIFDSTADDR:
3524 case SIOCSIFDSTADDR:
3525 case SIOCGIFNETMASK:
3526 case SIOCSIFNETMASK:
3527 case SIOCSIFPFLAGS:
3528 case SIOCGIFPFLAGS:
3529 case SIOCGIFTXQLEN:
3530 case SIOCSIFTXQLEN:
3531 case SIOCBRADDIF:
3532 case SIOCBRDELIF:
c6c9fee3 3533 case SIOCGIFNAME:
9177efd3
AB
3534 case SIOCSIFNAME:
3535 case SIOCGMIIPHY:
3536 case SIOCGMIIREG:
3537 case SIOCSMIIREG:
f92d4fc9
AV
3538 case SIOCBONDENSLAVE:
3539 case SIOCBONDRELEASE:
3540 case SIOCBONDSETHWADDR:
3541 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3542 return compat_ifreq_ioctl(net, sock, cmd, argp);
3543
6b96018b
AB
3544 case SIOCSARP:
3545 case SIOCGARP:
3546 case SIOCDARP:
c7dc504e 3547 case SIOCOUTQ:
9d7bf41f 3548 case SIOCOUTQNSD:
6b96018b 3549 case SIOCATMARK:
63ff03ab 3550 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3551 }
3552
6b96018b
AB
3553 return -ENOIOCTLCMD;
3554}
7a229387 3555
95c96174 3556static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3557 unsigned long arg)
89bbfc95
SP
3558{
3559 struct socket *sock = file->private_data;
3560 int ret = -ENOIOCTLCMD;
87de87d5
DM
3561 struct sock *sk;
3562 struct net *net;
3563
3564 sk = sock->sk;
3565 net = sock_net(sk);
89bbfc95
SP
3566
3567 if (sock->ops->compat_ioctl)
3568 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3569
87de87d5
DM
3570 if (ret == -ENOIOCTLCMD &&
3571 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3572 ret = compat_wext_handle_ioctl(net, cmd, arg);
3573
6b96018b
AB
3574 if (ret == -ENOIOCTLCMD)
3575 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3576
89bbfc95
SP
3577 return ret;
3578}
3579#endif
3580
8a3c245c
PT
3581/**
3582 * kernel_bind - bind an address to a socket (kernel space)
3583 * @sock: socket
3584 * @addr: address
3585 * @addrlen: length of address
3586 *
3587 * Returns 0 or an error.
3588 */
3589
ac5a488e
SS
3590int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3591{
3592 return sock->ops->bind(sock, addr, addrlen);
3593}
c6d409cf 3594EXPORT_SYMBOL(kernel_bind);
ac5a488e 3595
8a3c245c
PT
3596/**
3597 * kernel_listen - move socket to listening state (kernel space)
3598 * @sock: socket
3599 * @backlog: pending connections queue size
3600 *
3601 * Returns 0 or an error.
3602 */
3603
ac5a488e
SS
3604int kernel_listen(struct socket *sock, int backlog)
3605{
3606 return sock->ops->listen(sock, backlog);
3607}
c6d409cf 3608EXPORT_SYMBOL(kernel_listen);
ac5a488e 3609
8a3c245c
PT
3610/**
3611 * kernel_accept - accept a connection (kernel space)
3612 * @sock: listening socket
3613 * @newsock: new connected socket
3614 * @flags: flags
3615 *
3616 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3617 * If it fails, @newsock is guaranteed to be %NULL.
3618 * Returns 0 or an error.
3619 */
3620
ac5a488e
SS
3621int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3622{
3623 struct sock *sk = sock->sk;
3624 int err;
3625
3626 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3627 newsock);
3628 if (err < 0)
3629 goto done;
3630
cdfbabfb 3631 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3632 if (err < 0) {
3633 sock_release(*newsock);
fa8705b0 3634 *newsock = NULL;
ac5a488e
SS
3635 goto done;
3636 }
3637
3638 (*newsock)->ops = sock->ops;
1b08534e 3639 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3640
3641done:
3642 return err;
3643}
c6d409cf 3644EXPORT_SYMBOL(kernel_accept);
ac5a488e 3645
8a3c245c
PT
3646/**
3647 * kernel_connect - connect a socket (kernel space)
3648 * @sock: socket
3649 * @addr: address
3650 * @addrlen: address length
3651 * @flags: flags (O_NONBLOCK, ...)
3652 *
3653 * For datagram sockets, @addr is the addres to which datagrams are sent
3654 * by default, and the only address from which datagrams are received.
3655 * For stream sockets, attempts to connect to @addr.
3656 * Returns 0 or an error code.
3657 */
3658
ac5a488e 3659int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3660 int flags)
ac5a488e
SS
3661{
3662 return sock->ops->connect(sock, addr, addrlen, flags);
3663}
c6d409cf 3664EXPORT_SYMBOL(kernel_connect);
ac5a488e 3665
8a3c245c
PT
3666/**
3667 * kernel_getsockname - get the address which the socket is bound (kernel space)
3668 * @sock: socket
3669 * @addr: address holder
3670 *
3671 * Fills the @addr pointer with the address which the socket is bound.
3672 * Returns 0 or an error code.
3673 */
3674
9b2c45d4 3675int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3676{
9b2c45d4 3677 return sock->ops->getname(sock, addr, 0);
ac5a488e 3678}
c6d409cf 3679EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3680
8a3c245c
PT
3681/**
3682 * kernel_peername - get the address which the socket is connected (kernel space)
3683 * @sock: socket
3684 * @addr: address holder
3685 *
3686 * Fills the @addr pointer with the address which the socket is connected.
3687 * Returns 0 or an error code.
3688 */
3689
9b2c45d4 3690int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3691{
9b2c45d4 3692 return sock->ops->getname(sock, addr, 1);
ac5a488e 3693}
c6d409cf 3694EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3695
8a3c245c
PT
3696/**
3697 * kernel_getsockopt - get a socket option (kernel space)
3698 * @sock: socket
3699 * @level: API level (SOL_SOCKET, ...)
3700 * @optname: option tag
3701 * @optval: option value
3702 * @optlen: option length
3703 *
3704 * Assigns the option length to @optlen.
3705 * Returns 0 or an error.
3706 */
3707
ac5a488e
SS
3708int kernel_getsockopt(struct socket *sock, int level, int optname,
3709 char *optval, int *optlen)
3710{
3711 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3712 char __user *uoptval;
3713 int __user *uoptlen;
ac5a488e
SS
3714 int err;
3715
fb8621bb
NK
3716 uoptval = (char __user __force *) optval;
3717 uoptlen = (int __user __force *) optlen;
3718
ac5a488e
SS
3719 set_fs(KERNEL_DS);
3720 if (level == SOL_SOCKET)
fb8621bb 3721 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3722 else
fb8621bb
NK
3723 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3724 uoptlen);
ac5a488e
SS
3725 set_fs(oldfs);
3726 return err;
3727}
c6d409cf 3728EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3729
8a3c245c
PT
3730/**
3731 * kernel_setsockopt - set a socket option (kernel space)
3732 * @sock: socket
3733 * @level: API level (SOL_SOCKET, ...)
3734 * @optname: option tag
3735 * @optval: option value
3736 * @optlen: option length
3737 *
3738 * Returns 0 or an error.
3739 */
3740
ac5a488e 3741int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3742 char *optval, unsigned int optlen)
ac5a488e
SS
3743{
3744 mm_segment_t oldfs = get_fs();
fb8621bb 3745 char __user *uoptval;
ac5a488e
SS
3746 int err;
3747
fb8621bb
NK
3748 uoptval = (char __user __force *) optval;
3749
ac5a488e
SS
3750 set_fs(KERNEL_DS);
3751 if (level == SOL_SOCKET)
fb8621bb 3752 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3753 else
fb8621bb 3754 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3755 optlen);
3756 set_fs(oldfs);
3757 return err;
3758}
c6d409cf 3759EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3760
8a3c245c
PT
3761/**
3762 * kernel_sendpage - send a &page through a socket (kernel space)
3763 * @sock: socket
3764 * @page: page
3765 * @offset: page offset
3766 * @size: total size in bytes
3767 * @flags: flags (MSG_DONTWAIT, ...)
3768 *
3769 * Returns the total amount sent in bytes or an error.
3770 */
3771
ac5a488e
SS
3772int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3773 size_t size, int flags)
3774{
3775 if (sock->ops->sendpage)
3776 return sock->ops->sendpage(sock, page, offset, size, flags);
3777
3778 return sock_no_sendpage(sock, page, offset, size, flags);
3779}
c6d409cf 3780EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3781
8a3c245c
PT
3782/**
3783 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3784 * @sk: sock
3785 * @page: page
3786 * @offset: page offset
3787 * @size: total size in bytes
3788 * @flags: flags (MSG_DONTWAIT, ...)
3789 *
3790 * Returns the total amount sent in bytes or an error.
3791 * Caller must hold @sk.
3792 */
3793
306b13eb
TH
3794int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3795 size_t size, int flags)
3796{
3797 struct socket *sock = sk->sk_socket;
3798
3799 if (sock->ops->sendpage_locked)
3800 return sock->ops->sendpage_locked(sk, page, offset, size,
3801 flags);
3802
3803 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3804}
3805EXPORT_SYMBOL(kernel_sendpage_locked);
3806
8a3c245c
PT
3807/**
3808 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3809 * @sock: socket
3810 * @how: connection part
3811 *
3812 * Returns 0 or an error.
3813 */
3814
91cf45f0
TM
3815int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3816{
3817 return sock->ops->shutdown(sock, how);
3818}
91cf45f0 3819EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3820
8a3c245c
PT
3821/**
3822 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3823 * @sk: socket
3824 *
3825 * This routine returns the IP overhead imposed by a socket i.e.
3826 * the length of the underlying IP header, depending on whether
3827 * this is an IPv4 or IPv6 socket and the length from IP options turned
3828 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3829 */
8a3c245c 3830
113c3075
P
3831u32 kernel_sock_ip_overhead(struct sock *sk)
3832{
3833 struct inet_sock *inet;
3834 struct ip_options_rcu *opt;
3835 u32 overhead = 0;
113c3075
P
3836#if IS_ENABLED(CONFIG_IPV6)
3837 struct ipv6_pinfo *np;
3838 struct ipv6_txoptions *optv6 = NULL;
3839#endif /* IS_ENABLED(CONFIG_IPV6) */
3840
3841 if (!sk)
3842 return overhead;
3843
113c3075
P
3844 switch (sk->sk_family) {
3845 case AF_INET:
3846 inet = inet_sk(sk);
3847 overhead += sizeof(struct iphdr);
3848 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3849 sock_owned_by_user(sk));
113c3075
P
3850 if (opt)
3851 overhead += opt->opt.optlen;
3852 return overhead;
3853#if IS_ENABLED(CONFIG_IPV6)
3854 case AF_INET6:
3855 np = inet6_sk(sk);
3856 overhead += sizeof(struct ipv6hdr);
3857 if (np)
3858 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3859 sock_owned_by_user(sk));
113c3075
P
3860 if (optv6)
3861 overhead += (optv6->opt_flen + optv6->opt_nflen);
3862 return overhead;
3863#endif /* IS_ENABLED(CONFIG_IPV6) */
3864 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3865 return overhead;
3866 }
3867}
3868EXPORT_SYMBOL(kernel_sock_ip_overhead);