]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/socket.c
Merge tag 'for-linus-5.10b-rc1c-tag' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-hirsute-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
542d3065
AB
131
132#ifdef CONFIG_PROC_FS
133static void sock_show_fdinfo(struct seq_file *m, struct file *f)
134{
135 struct socket *sock = f->private_data;
136
137 if (sock->ops->show_fdinfo)
138 sock->ops->show_fdinfo(m, sock);
139}
140#else
141#define sock_show_fdinfo NULL
142#endif
1da177e4 143
1da177e4
LT
144/*
145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
146 * in the operation structures but are done directly via the socketcall() multiplexor.
147 */
148
da7071d7 149static const struct file_operations socket_file_ops = {
1da177e4
LT
150 .owner = THIS_MODULE,
151 .llseek = no_llseek,
8ae5e030
AV
152 .read_iter = sock_read_iter,
153 .write_iter = sock_write_iter,
1da177e4
LT
154 .poll = sock_poll,
155 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
156#ifdef CONFIG_COMPAT
157 .compat_ioctl = compat_sock_ioctl,
158#endif
1da177e4 159 .mmap = sock_mmap,
1da177e4
LT
160 .release = sock_close,
161 .fasync = sock_fasync,
5274f052
JA
162 .sendpage = sock_sendpage,
163 .splice_write = generic_splice_sendpage,
9c55e01c 164 .splice_read = sock_splice_read,
b4653342 165 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
166};
167
168/*
169 * The protocol list. Each protocol is registered in here.
170 */
171
1da177e4 172static DEFINE_SPINLOCK(net_family_lock);
190683a9 173static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 174
1da177e4 175/*
89bddce5
SH
176 * Support routines.
177 * Move socket addresses back and forth across the kernel/user
178 * divide and look after the messy bits.
1da177e4
LT
179 */
180
1da177e4
LT
181/**
182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space
186 *
187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */
191
43db362d 192int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 193{
230b1839 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 195 return -EINVAL;
89bddce5 196 if (ulen == 0)
1da177e4 197 return 0;
89bddce5 198 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 199 return -EFAULT;
3ec3b2fb 200 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
201}
202
203/**
204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address
206 * @klen: length of address in kernel
207 * @uaddr: user space address
208 * @ulen: pointer to user length field
209 *
210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not
214 * accessible.
215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success.
218 */
89bddce5 219
43db362d 220static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 221 void __user *uaddr, int __user *ulen)
1da177e4
LT
222{
223 int err;
224 int len;
225
68c6beb3 226 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
227 err = get_user(len, ulen);
228 if (err)
1da177e4 229 return err;
89bddce5
SH
230 if (len > klen)
231 len = klen;
68c6beb3 232 if (len < 0)
1da177e4 233 return -EINVAL;
89bddce5 234 if (len) {
d6fe3945
SG
235 if (audit_sockaddr(klen, kaddr))
236 return -ENOMEM;
89bddce5 237 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
238 return -EFAULT;
239 }
240 /*
89bddce5
SH
241 * "fromlen shall refer to the value before truncation.."
242 * 1003.1g
1da177e4
LT
243 */
244 return __put_user(klen, ulen);
245}
246
08009a76 247static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
248
249static struct inode *sock_alloc_inode(struct super_block *sb)
250{
251 struct socket_alloc *ei;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
333f7909
AV
256 init_waitqueue_head(&ei->socket.wq.wait);
257 ei->socket.wq.fasync_list = NULL;
258 ei->socket.wq.flags = 0;
89bddce5 259
1da177e4
LT
260 ei->socket.state = SS_UNCONNECTED;
261 ei->socket.flags = 0;
262 ei->socket.ops = NULL;
263 ei->socket.sk = NULL;
264 ei->socket.file = NULL;
1da177e4
LT
265
266 return &ei->vfs_inode;
267}
268
6d7855c5 269static void sock_free_inode(struct inode *inode)
1da177e4 270{
43815482
ED
271 struct socket_alloc *ei;
272
273 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1e911632 284static void init_inodecache(void)
1da177e4
LT
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
5d097056 291 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 292 init_once);
1e911632 293 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
294}
295
b87221de 296static const struct super_operations sockfs_ops = {
c6d409cf 297 .alloc_inode = sock_alloc_inode,
6d7855c5 298 .free_inode = sock_free_inode,
c6d409cf 299 .statfs = simple_statfs,
1da177e4
LT
300};
301
c23fbb6b
ED
302/*
303 * sockfs_dname() is called from d_path().
304 */
305static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
306{
307 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 308 d_inode(dentry)->i_ino);
c23fbb6b
ED
309}
310
3ba13d17 311static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 312 .d_dname = sockfs_dname,
1da177e4
LT
313};
314
bba0bd31
AG
315static int sockfs_xattr_get(const struct xattr_handler *handler,
316 struct dentry *dentry, struct inode *inode,
317 const char *suffix, void *value, size_t size)
318{
319 if (value) {
320 if (dentry->d_name.len + 1 > size)
321 return -ERANGE;
322 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
323 }
324 return dentry->d_name.len + 1;
325}
326
327#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
328#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
329#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
330
331static const struct xattr_handler sockfs_xattr_handler = {
332 .name = XATTR_NAME_SOCKPROTONAME,
333 .get = sockfs_xattr_get,
334};
335
4a590153
AG
336static int sockfs_security_xattr_set(const struct xattr_handler *handler,
337 struct dentry *dentry, struct inode *inode,
338 const char *suffix, const void *value,
339 size_t size, int flags)
340{
341 /* Handled by LSM. */
342 return -EAGAIN;
343}
344
345static const struct xattr_handler sockfs_security_xattr_handler = {
346 .prefix = XATTR_SECURITY_PREFIX,
347 .set = sockfs_security_xattr_set,
348};
349
bba0bd31
AG
350static const struct xattr_handler *sockfs_xattr_handlers[] = {
351 &sockfs_xattr_handler,
4a590153 352 &sockfs_security_xattr_handler,
bba0bd31
AG
353 NULL
354};
355
fba9be49 356static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 357{
fba9be49
DH
358 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
359 if (!ctx)
360 return -ENOMEM;
361 ctx->ops = &sockfs_ops;
362 ctx->dops = &sockfs_dentry_operations;
363 ctx->xattr = sockfs_xattr_handlers;
364 return 0;
c74a1cbb
AV
365}
366
367static struct vfsmount *sock_mnt __read_mostly;
368
369static struct file_system_type sock_fs_type = {
370 .name = "sockfs",
fba9be49 371 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
372 .kill_sb = kill_anon_super,
373};
374
1da177e4
LT
375/*
376 * Obtains the first available file descriptor and sets it up for use.
377 *
39d8c1b6
DM
378 * These functions create file structures and maps them to fd space
379 * of the current process. On success it returns file descriptor
1da177e4
LT
380 * and file struct implicitly stored in sock->file.
381 * Note that another thread may close file descriptor before we return
382 * from this function. We use the fact that now we do not refer
383 * to socket after mapping. If one day we will need it, this
384 * function will increment ref. count on file by 1.
385 *
386 * In any case returned fd MAY BE not valid!
387 * This race condition is unavoidable
388 * with shared fd spaces, we cannot solve it inside kernel,
389 * but we take care of internal coherence yet.
390 */
391
8a3c245c
PT
392/**
393 * sock_alloc_file - Bind a &socket to a &file
394 * @sock: socket
395 * @flags: file status flags
396 * @dname: protocol name
397 *
398 * Returns the &file bound with @sock, implicitly storing it
399 * in sock->file. If dname is %NULL, sets to "".
400 * On failure the return is a ERR pointer (see linux/err.h).
401 * This function uses GFP_KERNEL internally.
402 */
403
aab174f0 404struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 405{
7cbe66b6 406 struct file *file;
1da177e4 407
d93aa9d8
AV
408 if (!dname)
409 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 410
d93aa9d8
AV
411 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
412 O_RDWR | (flags & O_NONBLOCK),
413 &socket_file_ops);
b5ffe634 414 if (IS_ERR(file)) {
8e1611e2 415 sock_release(sock);
39b65252 416 return file;
cc3808f8
AV
417 }
418
419 sock->file = file;
39d8c1b6 420 file->private_data = sock;
d8e464ec 421 stream_open(SOCK_INODE(sock), file);
28407630 422 return file;
39d8c1b6 423}
56b31d1c 424EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 425
56b31d1c 426static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
427{
428 struct file *newfile;
28407630 429 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
430 if (unlikely(fd < 0)) {
431 sock_release(sock);
28407630 432 return fd;
ce4bb04c 433 }
39d8c1b6 434
aab174f0 435 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 436 if (!IS_ERR(newfile)) {
39d8c1b6 437 fd_install(fd, newfile);
28407630
AV
438 return fd;
439 }
7cbe66b6 440
28407630
AV
441 put_unused_fd(fd);
442 return PTR_ERR(newfile);
1da177e4
LT
443}
444
8a3c245c
PT
445/**
446 * sock_from_file - Return the &socket bounded to @file.
447 * @file: file
448 * @err: pointer to an error code return
449 *
450 * On failure returns %NULL and assigns -ENOTSOCK to @err.
451 */
452
406a3c63 453struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 454{
6cb153ca
BL
455 if (file->f_op == &socket_file_ops)
456 return file->private_data; /* set in sock_map_fd */
457
23bb80d2
ED
458 *err = -ENOTSOCK;
459 return NULL;
6cb153ca 460}
406a3c63 461EXPORT_SYMBOL(sock_from_file);
6cb153ca 462
1da177e4 463/**
c6d409cf 464 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
465 * @fd: file handle
466 * @err: pointer to an error code return
467 *
468 * The file handle passed in is locked and the socket it is bound
241c4667 469 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
470 * with a negative errno code and NULL is returned. The function checks
471 * for both invalid handles and passing a handle which is not a socket.
472 *
473 * On a success the socket object pointer is returned.
474 */
475
476struct socket *sockfd_lookup(int fd, int *err)
477{
478 struct file *file;
1da177e4
LT
479 struct socket *sock;
480
89bddce5
SH
481 file = fget(fd);
482 if (!file) {
1da177e4
LT
483 *err = -EBADF;
484 return NULL;
485 }
89bddce5 486
6cb153ca
BL
487 sock = sock_from_file(file, err);
488 if (!sock)
1da177e4 489 fput(file);
6cb153ca
BL
490 return sock;
491}
c6d409cf 492EXPORT_SYMBOL(sockfd_lookup);
1da177e4 493
6cb153ca
BL
494static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
495{
00e188ef 496 struct fd f = fdget(fd);
6cb153ca
BL
497 struct socket *sock;
498
3672558c 499 *err = -EBADF;
00e188ef
AV
500 if (f.file) {
501 sock = sock_from_file(f.file, err);
502 if (likely(sock)) {
ce787a5a 503 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 504 return sock;
00e188ef
AV
505 }
506 fdput(f);
1da177e4 507 }
6cb153ca 508 return NULL;
1da177e4
LT
509}
510
600e1779
MY
511static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
512 size_t size)
513{
514 ssize_t len;
515 ssize_t used = 0;
516
c5ef6035 517 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
518 if (len < 0)
519 return len;
520 used += len;
521 if (buffer) {
522 if (size < used)
523 return -ERANGE;
524 buffer += len;
525 }
526
527 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
528 used += len;
529 if (buffer) {
530 if (size < used)
531 return -ERANGE;
532 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
533 buffer += len;
534 }
535
536 return used;
537}
538
dc647ec8 539static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
540{
541 int err = simple_setattr(dentry, iattr);
542
e1a3a60a 543 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
544 struct socket *sock = SOCKET_I(d_inode(dentry));
545
6d8c50dc
CW
546 if (sock->sk)
547 sock->sk->sk_uid = iattr->ia_uid;
548 else
549 err = -ENOENT;
86741ec2
LC
550 }
551
552 return err;
553}
554
600e1779 555static const struct inode_operations sockfs_inode_ops = {
600e1779 556 .listxattr = sockfs_listxattr,
86741ec2 557 .setattr = sockfs_setattr,
600e1779
MY
558};
559
1da177e4 560/**
8a3c245c 561 * sock_alloc - allocate a socket
89bddce5 562 *
1da177e4
LT
563 * Allocate a new inode and socket object. The two are bound together
564 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 565 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
566 */
567
f4a00aac 568struct socket *sock_alloc(void)
1da177e4 569{
89bddce5
SH
570 struct inode *inode;
571 struct socket *sock;
1da177e4 572
a209dfc7 573 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
574 if (!inode)
575 return NULL;
576
577 sock = SOCKET_I(inode);
578
85fe4025 579 inode->i_ino = get_next_ino();
89bddce5 580 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
581 inode->i_uid = current_fsuid();
582 inode->i_gid = current_fsgid();
600e1779 583 inode->i_op = &sockfs_inode_ops;
1da177e4 584
1da177e4
LT
585 return sock;
586}
f4a00aac 587EXPORT_SYMBOL(sock_alloc);
1da177e4 588
6d8c50dc 589static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
590{
591 if (sock->ops) {
592 struct module *owner = sock->ops->owner;
593
6d8c50dc
CW
594 if (inode)
595 inode_lock(inode);
1da177e4 596 sock->ops->release(sock);
ff7b11aa 597 sock->sk = NULL;
6d8c50dc
CW
598 if (inode)
599 inode_unlock(inode);
1da177e4
LT
600 sock->ops = NULL;
601 module_put(owner);
602 }
603
333f7909 604 if (sock->wq.fasync_list)
3410f22e 605 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 606
1da177e4
LT
607 if (!sock->file) {
608 iput(SOCK_INODE(sock));
609 return;
610 }
89bddce5 611 sock->file = NULL;
1da177e4 612}
6d8c50dc 613
9a8ad9ac
AL
614/**
615 * sock_release - close a socket
616 * @sock: socket to close
617 *
618 * The socket is released from the protocol stack if it has a release
619 * callback, and the inode is then released if the socket is bound to
620 * an inode not a file.
621 */
6d8c50dc
CW
622void sock_release(struct socket *sock)
623{
624 __sock_release(sock, NULL);
625}
c6d409cf 626EXPORT_SYMBOL(sock_release);
1da177e4 627
c14ac945 628void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 629{
140c55d4
ED
630 u8 flags = *tx_flags;
631
c14ac945 632 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
633 flags |= SKBTX_HW_TSTAMP;
634
c14ac945 635 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
636 flags |= SKBTX_SW_TSTAMP;
637
c14ac945 638 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
639 flags |= SKBTX_SCHED_TSTAMP;
640
140c55d4 641 *tx_flags = flags;
20d49473 642}
67cc0d40 643EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 644
8c3c447b
PA
645INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
646 size_t));
a648a592
PA
647INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
648 size_t));
d8725c86 649static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 650{
a648a592
PA
651 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
652 inet_sendmsg, sock, msg,
653 msg_data_left(msg));
d8725c86
AV
654 BUG_ON(ret == -EIOCBQUEUED);
655 return ret;
1da177e4
LT
656}
657
85806af0
RD
658/**
659 * sock_sendmsg - send a message through @sock
660 * @sock: socket
661 * @msg: message to send
662 *
663 * Sends @msg through @sock, passing through LSM.
664 * Returns the number of bytes sent, or an error code.
665 */
d8725c86 666int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 667{
d8725c86 668 int err = security_socket_sendmsg(sock, msg,
01e97e65 669 msg_data_left(msg));
228e548e 670
d8725c86 671 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 672}
c6d409cf 673EXPORT_SYMBOL(sock_sendmsg);
1da177e4 674
8a3c245c
PT
675/**
676 * kernel_sendmsg - send a message through @sock (kernel-space)
677 * @sock: socket
678 * @msg: message header
679 * @vec: kernel vec
680 * @num: vec array length
681 * @size: total message data size
682 *
683 * Builds the message data with @vec and sends it through @sock.
684 * Returns the number of bytes sent, or an error code.
685 */
686
1da177e4
LT
687int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
688 struct kvec *vec, size_t num, size_t size)
689{
aa563d7b 690 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 691 return sock_sendmsg(sock, msg);
1da177e4 692}
c6d409cf 693EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 694
8a3c245c
PT
695/**
696 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
697 * @sk: sock
698 * @msg: message header
699 * @vec: output s/g array
700 * @num: output s/g array length
701 * @size: total message data size
702 *
703 * Builds the message data with @vec and sends it through @sock.
704 * Returns the number of bytes sent, or an error code.
705 * Caller must hold @sk.
706 */
707
306b13eb
TH
708int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
709 struct kvec *vec, size_t num, size_t size)
710{
711 struct socket *sock = sk->sk_socket;
712
713 if (!sock->ops->sendmsg_locked)
db5980d8 714 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 715
aa563d7b 716 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
717
718 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
719}
720EXPORT_SYMBOL(kernel_sendmsg_locked);
721
8605330a
SHY
722static bool skb_is_err_queue(const struct sk_buff *skb)
723{
724 /* pkt_type of skbs enqueued on the error queue are set to
725 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
726 * in recvmsg, since skbs received on a local socket will never
727 * have a pkt_type of PACKET_OUTGOING.
728 */
729 return skb->pkt_type == PACKET_OUTGOING;
730}
731
b50a5c70
ML
732/* On transmit, software and hardware timestamps are returned independently.
733 * As the two skb clones share the hardware timestamp, which may be updated
734 * before the software timestamp is received, a hardware TX timestamp may be
735 * returned only if there is no software TX timestamp. Ignore false software
736 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 737 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
738 * hardware timestamp.
739 */
740static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
741{
742 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
743}
744
aad9c8c4
ML
745static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
746{
747 struct scm_ts_pktinfo ts_pktinfo;
748 struct net_device *orig_dev;
749
750 if (!skb_mac_header_was_set(skb))
751 return;
752
753 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
754
755 rcu_read_lock();
756 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
757 if (orig_dev)
758 ts_pktinfo.if_index = orig_dev->ifindex;
759 rcu_read_unlock();
760
761 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
762 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
763 sizeof(ts_pktinfo), &ts_pktinfo);
764}
765
92f37fd2
ED
766/*
767 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
768 */
769void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
770 struct sk_buff *skb)
771{
20d49473 772 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 773 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
774 struct scm_timestamping_internal tss;
775
b50a5c70 776 int empty = 1, false_tstamp = 0;
20d49473
PO
777 struct skb_shared_hwtstamps *shhwtstamps =
778 skb_hwtstamps(skb);
779
780 /* Race occurred between timestamp enabling and packet
781 receiving. Fill in the current time for now. */
b50a5c70 782 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 783 __net_timestamp(skb);
b50a5c70
ML
784 false_tstamp = 1;
785 }
20d49473
PO
786
787 if (need_software_tstamp) {
788 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
789 if (new_tstamp) {
790 struct __kernel_sock_timeval tv;
791
792 skb_get_new_timestamp(skb, &tv);
793 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
794 sizeof(tv), &tv);
795 } else {
796 struct __kernel_old_timeval tv;
797
798 skb_get_timestamp(skb, &tv);
799 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
800 sizeof(tv), &tv);
801 }
20d49473 802 } else {
887feae3
DD
803 if (new_tstamp) {
804 struct __kernel_timespec ts;
805
806 skb_get_new_timestampns(skb, &ts);
807 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
808 sizeof(ts), &ts);
809 } else {
df1b4ba9 810 struct __kernel_old_timespec ts;
887feae3
DD
811
812 skb_get_timestampns(skb, &ts);
813 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
814 sizeof(ts), &ts);
815 }
20d49473
PO
816 }
817 }
818
f24b9be5 819 memset(&tss, 0, sizeof(tss));
c199105d 820 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 821 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 822 empty = 0;
4d276eb6 823 if (shhwtstamps &&
b9f40e21 824 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 825 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 826 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 827 empty = 0;
aad9c8c4
ML
828 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
829 !skb_is_err_queue(skb))
830 put_ts_pktinfo(msg, skb);
831 }
1c885808 832 if (!empty) {
9718475e
DD
833 if (sock_flag(sk, SOCK_TSTAMP_NEW))
834 put_cmsg_scm_timestamping64(msg, &tss);
835 else
836 put_cmsg_scm_timestamping(msg, &tss);
1c885808 837
8605330a 838 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 839 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
840 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
841 skb->len, skb->data);
842 }
92f37fd2 843}
7c81fd8b
ACM
844EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
845
6e3e939f
JB
846void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
847 struct sk_buff *skb)
848{
849 int ack;
850
851 if (!sock_flag(sk, SOCK_WIFI_STATUS))
852 return;
853 if (!skb->wifi_acked_valid)
854 return;
855
856 ack = skb->wifi_acked;
857
858 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
859}
860EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
861
11165f14 862static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
863 struct sk_buff *skb)
3b885787 864{
744d5a3e 865 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 866 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 867 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
868}
869
767dd033 870void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
871 struct sk_buff *skb)
872{
873 sock_recv_timestamp(msg, sk, skb);
874 sock_recv_drops(msg, sk, skb);
875}
767dd033 876EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 877
8c3c447b 878INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
879 size_t, int));
880INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
881 size_t, int));
1b784140 882static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 883 int flags)
1da177e4 884{
a648a592
PA
885 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
886 inet_recvmsg, sock, msg, msg_data_left(msg),
887 flags);
1da177e4
LT
888}
889
85806af0
RD
890/**
891 * sock_recvmsg - receive a message from @sock
892 * @sock: socket
893 * @msg: message to receive
894 * @flags: message flags
895 *
896 * Receives @msg from @sock, passing through LSM. Returns the total number
897 * of bytes received, or an error.
898 */
2da62906 899int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 900{
2da62906 901 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 902
2da62906 903 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 904}
c6d409cf 905EXPORT_SYMBOL(sock_recvmsg);
1da177e4 906
c1249c0a 907/**
8a3c245c
PT
908 * kernel_recvmsg - Receive a message from a socket (kernel space)
909 * @sock: The socket to receive the message from
910 * @msg: Received message
911 * @vec: Input s/g array for message data
912 * @num: Size of input s/g array
913 * @size: Number of bytes to read
914 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 915 *
8a3c245c
PT
916 * On return the msg structure contains the scatter/gather array passed in the
917 * vec argument. The array is modified so that it consists of the unfilled
918 * portion of the original array.
c1249c0a 919 *
8a3c245c 920 * The returned value is the total number of bytes received, or an error.
c1249c0a 921 */
8a3c245c 922
89bddce5
SH
923int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
924 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 925{
1f466e1f 926 msg->msg_control_is_user = false;
aa563d7b 927 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 928 return sock_recvmsg(sock, msg, flags);
1da177e4 929}
c6d409cf 930EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 931
ce1d4d3e
CH
932static ssize_t sock_sendpage(struct file *file, struct page *page,
933 int offset, size_t size, loff_t *ppos, int more)
1da177e4 934{
1da177e4
LT
935 struct socket *sock;
936 int flags;
937
ce1d4d3e
CH
938 sock = file->private_data;
939
35f9c09f
ED
940 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
941 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
942 flags |= more;
ce1d4d3e 943
e6949583 944 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 945}
1da177e4 946
9c55e01c 947static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 948 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
949 unsigned int flags)
950{
951 struct socket *sock = file->private_data;
952
997b37da 953 if (unlikely(!sock->ops->splice_read))
95506588 954 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 955
9c55e01c
JA
956 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
957}
958
8ae5e030 959static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 960{
6d652330
AV
961 struct file *file = iocb->ki_filp;
962 struct socket *sock = file->private_data;
0345f931 963 struct msghdr msg = {.msg_iter = *to,
964 .msg_iocb = iocb};
8ae5e030 965 ssize_t res;
ce1d4d3e 966
ebfcd895 967 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
968 msg.msg_flags = MSG_DONTWAIT;
969
970 if (iocb->ki_pos != 0)
1da177e4 971 return -ESPIPE;
027445c3 972
66ee59af 973 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
974 return 0;
975
2da62906 976 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
977 *to = msg.msg_iter;
978 return res;
1da177e4
LT
979}
980
8ae5e030 981static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 982{
6d652330
AV
983 struct file *file = iocb->ki_filp;
984 struct socket *sock = file->private_data;
0345f931 985 struct msghdr msg = {.msg_iter = *from,
986 .msg_iocb = iocb};
8ae5e030 987 ssize_t res;
1da177e4 988
8ae5e030 989 if (iocb->ki_pos != 0)
ce1d4d3e 990 return -ESPIPE;
027445c3 991
ebfcd895 992 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
993 msg.msg_flags = MSG_DONTWAIT;
994
6d652330
AV
995 if (sock->type == SOCK_SEQPACKET)
996 msg.msg_flags |= MSG_EOR;
997
d8725c86 998 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
999 *from = msg.msg_iter;
1000 return res;
1da177e4
LT
1001}
1002
1da177e4
LT
1003/*
1004 * Atomic setting of ioctl hooks to avoid race
1005 * with module unload.
1006 */
1007
4a3e2f71 1008static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1009static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1010
881d966b 1011void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1012{
4a3e2f71 1013 mutex_lock(&br_ioctl_mutex);
1da177e4 1014 br_ioctl_hook = hook;
4a3e2f71 1015 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1016}
1017EXPORT_SYMBOL(brioctl_set);
1018
4a3e2f71 1019static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1020static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1021
881d966b 1022void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1023{
4a3e2f71 1024 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1025 vlan_ioctl_hook = hook;
4a3e2f71 1026 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1027}
1028EXPORT_SYMBOL(vlan_ioctl_set);
1029
4a3e2f71 1030static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1031static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1032
89bddce5 1033void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1034{
4a3e2f71 1035 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1036 dlci_ioctl_hook = hook;
4a3e2f71 1037 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1038}
1039EXPORT_SYMBOL(dlci_ioctl_set);
1040
6b96018b 1041static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1042 unsigned int cmd, unsigned long arg)
6b96018b
AB
1043{
1044 int err;
1045 void __user *argp = (void __user *)arg;
1046
1047 err = sock->ops->ioctl(sock, cmd, arg);
1048
1049 /*
1050 * If this ioctl is unknown try to hand it down
1051 * to the NIC driver.
1052 */
36fd633e
AV
1053 if (err != -ENOIOCTLCMD)
1054 return err;
6b96018b 1055
36fd633e
AV
1056 if (cmd == SIOCGIFCONF) {
1057 struct ifconf ifc;
1058 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1059 return -EFAULT;
1060 rtnl_lock();
1061 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1062 rtnl_unlock();
1063 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1064 err = -EFAULT;
44c02a2c
AV
1065 } else {
1066 struct ifreq ifr;
1067 bool need_copyout;
63ff03ab 1068 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1069 return -EFAULT;
1070 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1071 if (!err && need_copyout)
63ff03ab 1072 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1073 return -EFAULT;
36fd633e 1074 }
6b96018b
AB
1075 return err;
1076}
1077
1da177e4
LT
1078/*
1079 * With an ioctl, arg may well be a user mode pointer, but we don't know
1080 * what to do with it - that's up to the protocol still.
1081 */
1082
8a3c245c
PT
1083/**
1084 * get_net_ns - increment the refcount of the network namespace
1085 * @ns: common namespace (net)
1086 *
1087 * Returns the net's common namespace.
1088 */
1089
d8d211a2 1090struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1091{
1092 return &get_net(container_of(ns, struct net, ns))->ns;
1093}
d8d211a2 1094EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1095
1da177e4
LT
1096static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1097{
1098 struct socket *sock;
881d966b 1099 struct sock *sk;
1da177e4
LT
1100 void __user *argp = (void __user *)arg;
1101 int pid, err;
881d966b 1102 struct net *net;
1da177e4 1103
b69aee04 1104 sock = file->private_data;
881d966b 1105 sk = sock->sk;
3b1e0a65 1106 net = sock_net(sk);
44c02a2c
AV
1107 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1108 struct ifreq ifr;
1109 bool need_copyout;
1110 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1111 return -EFAULT;
1112 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1113 if (!err && need_copyout)
1114 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1115 return -EFAULT;
1da177e4 1116 } else
3d23e349 1117#ifdef CONFIG_WEXT_CORE
1da177e4 1118 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1119 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1120 } else
3d23e349 1121#endif
89bddce5 1122 switch (cmd) {
1da177e4
LT
1123 case FIOSETOWN:
1124 case SIOCSPGRP:
1125 err = -EFAULT;
1126 if (get_user(pid, (int __user *)argp))
1127 break;
393cc3f5 1128 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1129 break;
1130 case FIOGETOWN:
1131 case SIOCGPGRP:
609d7fa9 1132 err = put_user(f_getown(sock->file),
89bddce5 1133 (int __user *)argp);
1da177e4
LT
1134 break;
1135 case SIOCGIFBR:
1136 case SIOCSIFBR:
1137 case SIOCBRADDBR:
1138 case SIOCBRDELBR:
1139 err = -ENOPKG;
1140 if (!br_ioctl_hook)
1141 request_module("bridge");
1142
4a3e2f71 1143 mutex_lock(&br_ioctl_mutex);
89bddce5 1144 if (br_ioctl_hook)
881d966b 1145 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1146 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1147 break;
1148 case SIOCGIFVLAN:
1149 case SIOCSIFVLAN:
1150 err = -ENOPKG;
1151 if (!vlan_ioctl_hook)
1152 request_module("8021q");
1153
4a3e2f71 1154 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1155 if (vlan_ioctl_hook)
881d966b 1156 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1157 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1158 break;
1da177e4
LT
1159 case SIOCADDDLCI:
1160 case SIOCDELDLCI:
1161 err = -ENOPKG;
1162 if (!dlci_ioctl_hook)
1163 request_module("dlci");
1164
7512cbf6
PE
1165 mutex_lock(&dlci_ioctl_mutex);
1166 if (dlci_ioctl_hook)
1da177e4 1167 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1168 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1169 break;
c62cce2c
AV
1170 case SIOCGSKNS:
1171 err = -EPERM;
1172 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1173 break;
1174
1175 err = open_related_ns(&net->ns, get_net_ns);
1176 break;
0768e170
AB
1177 case SIOCGSTAMP_OLD:
1178 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1179 if (!sock->ops->gettstamp) {
1180 err = -ENOIOCTLCMD;
1181 break;
1182 }
1183 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1184 cmd == SIOCGSTAMP_OLD,
1185 !IS_ENABLED(CONFIG_64BIT));
60747828 1186 break;
0768e170
AB
1187 case SIOCGSTAMP_NEW:
1188 case SIOCGSTAMPNS_NEW:
1189 if (!sock->ops->gettstamp) {
1190 err = -ENOIOCTLCMD;
1191 break;
1192 }
1193 err = sock->ops->gettstamp(sock, argp,
1194 cmd == SIOCGSTAMP_NEW,
1195 false);
c7cbdbf2 1196 break;
1da177e4 1197 default:
63ff03ab 1198 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1199 break;
89bddce5 1200 }
1da177e4
LT
1201 return err;
1202}
1203
8a3c245c
PT
1204/**
1205 * sock_create_lite - creates a socket
1206 * @family: protocol family (AF_INET, ...)
1207 * @type: communication type (SOCK_STREAM, ...)
1208 * @protocol: protocol (0, ...)
1209 * @res: new socket
1210 *
1211 * Creates a new socket and assigns it to @res, passing through LSM.
1212 * The new socket initialization is not complete, see kernel_accept().
1213 * Returns 0 or an error. On failure @res is set to %NULL.
1214 * This function internally uses GFP_KERNEL.
1215 */
1216
1da177e4
LT
1217int sock_create_lite(int family, int type, int protocol, struct socket **res)
1218{
1219 int err;
1220 struct socket *sock = NULL;
89bddce5 1221
1da177e4
LT
1222 err = security_socket_create(family, type, protocol, 1);
1223 if (err)
1224 goto out;
1225
1226 sock = sock_alloc();
1227 if (!sock) {
1228 err = -ENOMEM;
1229 goto out;
1230 }
1231
1da177e4 1232 sock->type = type;
7420ed23
VY
1233 err = security_socket_post_create(sock, family, type, protocol, 1);
1234 if (err)
1235 goto out_release;
1236
1da177e4
LT
1237out:
1238 *res = sock;
1239 return err;
7420ed23
VY
1240out_release:
1241 sock_release(sock);
1242 sock = NULL;
1243 goto out;
1da177e4 1244}
c6d409cf 1245EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1246
1247/* No kernel lock held - perfect */
ade994f4 1248static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1249{
3cafb376 1250 struct socket *sock = file->private_data;
a331de3b 1251 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1252
e88958e6
CH
1253 if (!sock->ops->poll)
1254 return 0;
f641f13b 1255
a331de3b
CH
1256 if (sk_can_busy_loop(sock->sk)) {
1257 /* poll once if requested by the syscall */
1258 if (events & POLL_BUSY_LOOP)
1259 sk_busy_loop(sock->sk, 1);
1260
1261 /* if this socket can poll_ll, tell the system call */
1262 flag = POLL_BUSY_LOOP;
1263 }
1264
1265 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1266}
1267
89bddce5 1268static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1269{
b69aee04 1270 struct socket *sock = file->private_data;
1da177e4
LT
1271
1272 return sock->ops->mmap(file, sock, vma);
1273}
1274
20380731 1275static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1276{
6d8c50dc 1277 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1278 return 0;
1279}
1280
1281/*
1282 * Update the socket async list
1283 *
1284 * Fasync_list locking strategy.
1285 *
1286 * 1. fasync_list is modified only under process context socket lock
1287 * i.e. under semaphore.
1288 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1289 * or under socket lock
1da177e4
LT
1290 */
1291
1292static int sock_fasync(int fd, struct file *filp, int on)
1293{
989a2979
ED
1294 struct socket *sock = filp->private_data;
1295 struct sock *sk = sock->sk;
333f7909 1296 struct socket_wq *wq = &sock->wq;
1da177e4 1297
989a2979 1298 if (sk == NULL)
1da177e4 1299 return -EINVAL;
1da177e4
LT
1300
1301 lock_sock(sk);
eaefd110 1302 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1303
eaefd110 1304 if (!wq->fasync_list)
989a2979
ED
1305 sock_reset_flag(sk, SOCK_FASYNC);
1306 else
bcdce719 1307 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1308
989a2979 1309 release_sock(sk);
1da177e4
LT
1310 return 0;
1311}
1312
ceb5d58b 1313/* This function may be called only under rcu_lock */
1da177e4 1314
ceb5d58b 1315int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1316{
ceb5d58b 1317 if (!wq || !wq->fasync_list)
1da177e4 1318 return -1;
ceb5d58b 1319
89bddce5 1320 switch (how) {
8d8ad9d7 1321 case SOCK_WAKE_WAITD:
ceb5d58b 1322 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1323 break;
1324 goto call_kill;
8d8ad9d7 1325 case SOCK_WAKE_SPACE:
ceb5d58b 1326 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1327 break;
7c7ab580 1328 fallthrough;
8d8ad9d7 1329 case SOCK_WAKE_IO:
89bddce5 1330call_kill:
43815482 1331 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1332 break;
8d8ad9d7 1333 case SOCK_WAKE_URG:
43815482 1334 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1335 }
ceb5d58b 1336
1da177e4
LT
1337 return 0;
1338}
c6d409cf 1339EXPORT_SYMBOL(sock_wake_async);
1da177e4 1340
8a3c245c
PT
1341/**
1342 * __sock_create - creates a socket
1343 * @net: net namespace
1344 * @family: protocol family (AF_INET, ...)
1345 * @type: communication type (SOCK_STREAM, ...)
1346 * @protocol: protocol (0, ...)
1347 * @res: new socket
1348 * @kern: boolean for kernel space sockets
1349 *
1350 * Creates a new socket and assigns it to @res, passing through LSM.
1351 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1352 * be set to true if the socket resides in kernel space.
1353 * This function internally uses GFP_KERNEL.
1354 */
1355
721db93a 1356int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1357 struct socket **res, int kern)
1da177e4
LT
1358{
1359 int err;
1360 struct socket *sock;
55737fda 1361 const struct net_proto_family *pf;
1da177e4
LT
1362
1363 /*
89bddce5 1364 * Check protocol is in range
1da177e4
LT
1365 */
1366 if (family < 0 || family >= NPROTO)
1367 return -EAFNOSUPPORT;
1368 if (type < 0 || type >= SOCK_MAX)
1369 return -EINVAL;
1370
1371 /* Compatibility.
1372
1373 This uglymoron is moved from INET layer to here to avoid
1374 deadlock in module load.
1375 */
1376 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1377 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1378 current->comm);
1da177e4
LT
1379 family = PF_PACKET;
1380 }
1381
1382 err = security_socket_create(family, type, protocol, kern);
1383 if (err)
1384 return err;
89bddce5 1385
55737fda
SH
1386 /*
1387 * Allocate the socket and allow the family to set things up. if
1388 * the protocol is 0, the family is instructed to select an appropriate
1389 * default.
1390 */
1391 sock = sock_alloc();
1392 if (!sock) {
e87cc472 1393 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1394 return -ENFILE; /* Not exactly a match, but its the
1395 closest posix thing */
1396 }
1397
1398 sock->type = type;
1399
95a5afca 1400#ifdef CONFIG_MODULES
89bddce5
SH
1401 /* Attempt to load a protocol module if the find failed.
1402 *
1403 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1404 * requested real, full-featured networking support upon configuration.
1405 * Otherwise module support will break!
1406 */
190683a9 1407 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1408 request_module("net-pf-%d", family);
1da177e4
LT
1409#endif
1410
55737fda
SH
1411 rcu_read_lock();
1412 pf = rcu_dereference(net_families[family]);
1413 err = -EAFNOSUPPORT;
1414 if (!pf)
1415 goto out_release;
1da177e4
LT
1416
1417 /*
1418 * We will call the ->create function, that possibly is in a loadable
1419 * module, so we have to bump that loadable module refcnt first.
1420 */
55737fda 1421 if (!try_module_get(pf->owner))
1da177e4
LT
1422 goto out_release;
1423
55737fda
SH
1424 /* Now protected by module ref count */
1425 rcu_read_unlock();
1426
3f378b68 1427 err = pf->create(net, sock, protocol, kern);
55737fda 1428 if (err < 0)
1da177e4 1429 goto out_module_put;
a79af59e 1430
1da177e4
LT
1431 /*
1432 * Now to bump the refcnt of the [loadable] module that owns this
1433 * socket at sock_release time we decrement its refcnt.
1434 */
55737fda
SH
1435 if (!try_module_get(sock->ops->owner))
1436 goto out_module_busy;
1437
1da177e4
LT
1438 /*
1439 * Now that we're done with the ->create function, the [loadable]
1440 * module can have its refcnt decremented
1441 */
55737fda 1442 module_put(pf->owner);
7420ed23
VY
1443 err = security_socket_post_create(sock, family, type, protocol, kern);
1444 if (err)
3b185525 1445 goto out_sock_release;
55737fda 1446 *res = sock;
1da177e4 1447
55737fda
SH
1448 return 0;
1449
1450out_module_busy:
1451 err = -EAFNOSUPPORT;
1da177e4 1452out_module_put:
55737fda
SH
1453 sock->ops = NULL;
1454 module_put(pf->owner);
1455out_sock_release:
1da177e4 1456 sock_release(sock);
55737fda
SH
1457 return err;
1458
1459out_release:
1460 rcu_read_unlock();
1461 goto out_sock_release;
1da177e4 1462}
721db93a 1463EXPORT_SYMBOL(__sock_create);
1da177e4 1464
8a3c245c
PT
1465/**
1466 * sock_create - creates a socket
1467 * @family: protocol family (AF_INET, ...)
1468 * @type: communication type (SOCK_STREAM, ...)
1469 * @protocol: protocol (0, ...)
1470 * @res: new socket
1471 *
1472 * A wrapper around __sock_create().
1473 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1474 */
1475
1da177e4
LT
1476int sock_create(int family, int type, int protocol, struct socket **res)
1477{
1b8d7ae4 1478 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1479}
c6d409cf 1480EXPORT_SYMBOL(sock_create);
1da177e4 1481
8a3c245c
PT
1482/**
1483 * sock_create_kern - creates a socket (kernel space)
1484 * @net: net namespace
1485 * @family: protocol family (AF_INET, ...)
1486 * @type: communication type (SOCK_STREAM, ...)
1487 * @protocol: protocol (0, ...)
1488 * @res: new socket
1489 *
1490 * A wrapper around __sock_create().
1491 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1492 */
1493
eeb1bd5c 1494int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1495{
eeb1bd5c 1496 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1497}
c6d409cf 1498EXPORT_SYMBOL(sock_create_kern);
1da177e4 1499
9d6a15c3 1500int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1501{
1502 int retval;
1503 struct socket *sock;
a677a039
UD
1504 int flags;
1505
e38b36f3
UD
1506 /* Check the SOCK_* constants for consistency. */
1507 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1508 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1509 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1510 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1511
a677a039 1512 flags = type & ~SOCK_TYPE_MASK;
77d27200 1513 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1514 return -EINVAL;
1515 type &= SOCK_TYPE_MASK;
1da177e4 1516
aaca0bdc
UD
1517 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1518 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1519
1da177e4
LT
1520 retval = sock_create(family, type, protocol, &sock);
1521 if (retval < 0)
8e1611e2 1522 return retval;
1da177e4 1523
8e1611e2 1524 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1525}
1526
9d6a15c3
DB
1527SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1528{
1529 return __sys_socket(family, type, protocol);
1530}
1531
1da177e4
LT
1532/*
1533 * Create a pair of connected sockets.
1534 */
1535
6debc8d8 1536int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1537{
1538 struct socket *sock1, *sock2;
1539 int fd1, fd2, err;
db349509 1540 struct file *newfile1, *newfile2;
a677a039
UD
1541 int flags;
1542
1543 flags = type & ~SOCK_TYPE_MASK;
77d27200 1544 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1545 return -EINVAL;
1546 type &= SOCK_TYPE_MASK;
1da177e4 1547
aaca0bdc
UD
1548 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1549 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1550
016a266b
AV
1551 /*
1552 * reserve descriptors and make sure we won't fail
1553 * to return them to userland.
1554 */
1555 fd1 = get_unused_fd_flags(flags);
1556 if (unlikely(fd1 < 0))
1557 return fd1;
1558
1559 fd2 = get_unused_fd_flags(flags);
1560 if (unlikely(fd2 < 0)) {
1561 put_unused_fd(fd1);
1562 return fd2;
1563 }
1564
1565 err = put_user(fd1, &usockvec[0]);
1566 if (err)
1567 goto out;
1568
1569 err = put_user(fd2, &usockvec[1]);
1570 if (err)
1571 goto out;
1572
1da177e4
LT
1573 /*
1574 * Obtain the first socket and check if the underlying protocol
1575 * supports the socketpair call.
1576 */
1577
1578 err = sock_create(family, type, protocol, &sock1);
016a266b 1579 if (unlikely(err < 0))
1da177e4
LT
1580 goto out;
1581
1582 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1583 if (unlikely(err < 0)) {
1584 sock_release(sock1);
1585 goto out;
bf3c23d1 1586 }
d73aa286 1587
d47cd945
DH
1588 err = security_socket_socketpair(sock1, sock2);
1589 if (unlikely(err)) {
1590 sock_release(sock2);
1591 sock_release(sock1);
1592 goto out;
1593 }
1594
016a266b
AV
1595 err = sock1->ops->socketpair(sock1, sock2);
1596 if (unlikely(err < 0)) {
1597 sock_release(sock2);
1598 sock_release(sock1);
1599 goto out;
28407630
AV
1600 }
1601
aab174f0 1602 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1603 if (IS_ERR(newfile1)) {
28407630 1604 err = PTR_ERR(newfile1);
016a266b
AV
1605 sock_release(sock2);
1606 goto out;
28407630
AV
1607 }
1608
aab174f0 1609 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1610 if (IS_ERR(newfile2)) {
1611 err = PTR_ERR(newfile2);
016a266b
AV
1612 fput(newfile1);
1613 goto out;
db349509
AV
1614 }
1615
157cf649 1616 audit_fd_pair(fd1, fd2);
d73aa286 1617
db349509
AV
1618 fd_install(fd1, newfile1);
1619 fd_install(fd2, newfile2);
d73aa286 1620 return 0;
1da177e4 1621
016a266b 1622out:
d73aa286 1623 put_unused_fd(fd2);
d73aa286 1624 put_unused_fd(fd1);
1da177e4
LT
1625 return err;
1626}
1627
6debc8d8
DB
1628SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1629 int __user *, usockvec)
1630{
1631 return __sys_socketpair(family, type, protocol, usockvec);
1632}
1633
1da177e4
LT
1634/*
1635 * Bind a name to a socket. Nothing much to do here since it's
1636 * the protocol's responsibility to handle the local address.
1637 *
1638 * We move the socket address to kernel space before we call
1639 * the protocol layer (having also checked the address is ok).
1640 */
1641
a87d35d8 1642int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1643{
1644 struct socket *sock;
230b1839 1645 struct sockaddr_storage address;
6cb153ca 1646 int err, fput_needed;
1da177e4 1647
89bddce5 1648 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1649 if (sock) {
43db362d 1650 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1651 if (!err) {
89bddce5 1652 err = security_socket_bind(sock,
230b1839 1653 (struct sockaddr *)&address,
89bddce5 1654 addrlen);
6cb153ca
BL
1655 if (!err)
1656 err = sock->ops->bind(sock,
89bddce5 1657 (struct sockaddr *)
230b1839 1658 &address, addrlen);
1da177e4 1659 }
6cb153ca 1660 fput_light(sock->file, fput_needed);
89bddce5 1661 }
1da177e4
LT
1662 return err;
1663}
1664
a87d35d8
DB
1665SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1666{
1667 return __sys_bind(fd, umyaddr, addrlen);
1668}
1669
1da177e4
LT
1670/*
1671 * Perform a listen. Basically, we allow the protocol to do anything
1672 * necessary for a listen, and if that works, we mark the socket as
1673 * ready for listening.
1674 */
1675
25e290ee 1676int __sys_listen(int fd, int backlog)
1da177e4
LT
1677{
1678 struct socket *sock;
6cb153ca 1679 int err, fput_needed;
b8e1f9b5 1680 int somaxconn;
89bddce5
SH
1681
1682 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1683 if (sock) {
8efa6e93 1684 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1685 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1686 backlog = somaxconn;
1da177e4
LT
1687
1688 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1689 if (!err)
1690 err = sock->ops->listen(sock, backlog);
1da177e4 1691
6cb153ca 1692 fput_light(sock->file, fput_needed);
1da177e4
LT
1693 }
1694 return err;
1695}
1696
25e290ee
DB
1697SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1698{
1699 return __sys_listen(fd, backlog);
1700}
1701
de2ea4b6
JA
1702int __sys_accept4_file(struct file *file, unsigned file_flags,
1703 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1704 int __user *upeer_addrlen, int flags,
1705 unsigned long nofile)
1da177e4
LT
1706{
1707 struct socket *sock, *newsock;
39d8c1b6 1708 struct file *newfile;
de2ea4b6 1709 int err, len, newfd;
230b1839 1710 struct sockaddr_storage address;
1da177e4 1711
77d27200 1712 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1713 return -EINVAL;
1714
1715 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1716 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1717
de2ea4b6 1718 sock = sock_from_file(file, &err);
1da177e4
LT
1719 if (!sock)
1720 goto out;
1721
1722 err = -ENFILE;
c6d409cf
ED
1723 newsock = sock_alloc();
1724 if (!newsock)
de2ea4b6 1725 goto out;
1da177e4
LT
1726
1727 newsock->type = sock->type;
1728 newsock->ops = sock->ops;
1729
1da177e4
LT
1730 /*
1731 * We don't need try_module_get here, as the listening socket (sock)
1732 * has the protocol module (sock->ops->owner) held.
1733 */
1734 __module_get(newsock->ops->owner);
1735
09952e3e 1736 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1737 if (unlikely(newfd < 0)) {
1738 err = newfd;
9a1875e6 1739 sock_release(newsock);
de2ea4b6 1740 goto out;
39d8c1b6 1741 }
aab174f0 1742 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1743 if (IS_ERR(newfile)) {
28407630
AV
1744 err = PTR_ERR(newfile);
1745 put_unused_fd(newfd);
de2ea4b6 1746 goto out;
28407630 1747 }
39d8c1b6 1748
a79af59e
FF
1749 err = security_socket_accept(sock, newsock);
1750 if (err)
39d8c1b6 1751 goto out_fd;
a79af59e 1752
de2ea4b6
JA
1753 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1754 false);
1da177e4 1755 if (err < 0)
39d8c1b6 1756 goto out_fd;
1da177e4
LT
1757
1758 if (upeer_sockaddr) {
9b2c45d4
DV
1759 len = newsock->ops->getname(newsock,
1760 (struct sockaddr *)&address, 2);
1761 if (len < 0) {
1da177e4 1762 err = -ECONNABORTED;
39d8c1b6 1763 goto out_fd;
1da177e4 1764 }
43db362d 1765 err = move_addr_to_user(&address,
230b1839 1766 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1767 if (err < 0)
39d8c1b6 1768 goto out_fd;
1da177e4
LT
1769 }
1770
1771 /* File flags are not inherited via accept() unlike another OSes. */
1772
39d8c1b6
DM
1773 fd_install(newfd, newfile);
1774 err = newfd;
1da177e4
LT
1775out:
1776 return err;
39d8c1b6 1777out_fd:
9606a216 1778 fput(newfile);
39d8c1b6 1779 put_unused_fd(newfd);
de2ea4b6
JA
1780 goto out;
1781
1782}
1783
1784/*
1785 * For accept, we attempt to create a new socket, set up the link
1786 * with the client, wake up the client, then return the new
1787 * connected fd. We collect the address of the connector in kernel
1788 * space and move it to user at the very end. This is unclean because
1789 * we open the socket then return an error.
1790 *
1791 * 1003.1g adds the ability to recvmsg() to query connection pending
1792 * status to recvmsg. We need to add that support in a way thats
1793 * clean when we restructure accept also.
1794 */
1795
1796int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1797 int __user *upeer_addrlen, int flags)
1798{
1799 int ret = -EBADF;
1800 struct fd f;
1801
1802 f = fdget(fd);
1803 if (f.file) {
1804 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1805 upeer_addrlen, flags,
1806 rlimit(RLIMIT_NOFILE));
6b07edeb 1807 fdput(f);
de2ea4b6
JA
1808 }
1809
1810 return ret;
1da177e4
LT
1811}
1812
4541e805
DB
1813SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1814 int __user *, upeer_addrlen, int, flags)
1815{
1816 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1817}
1818
20f37034
HC
1819SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1820 int __user *, upeer_addrlen)
aaca0bdc 1821{
4541e805 1822 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1823}
1824
1da177e4
LT
1825/*
1826 * Attempt to connect to a socket with the server address. The address
1827 * is in user space so we verify it is OK and move it to kernel space.
1828 *
1829 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1830 * break bindings
1831 *
1832 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1833 * other SEQPACKET protocols that take time to connect() as it doesn't
1834 * include the -EINPROGRESS status for such sockets.
1835 */
1836
f499a021 1837int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1838 int addrlen, int file_flags)
1da177e4
LT
1839{
1840 struct socket *sock;
bd3ded31 1841 int err;
1da177e4 1842
bd3ded31 1843 sock = sock_from_file(file, &err);
1da177e4
LT
1844 if (!sock)
1845 goto out;
1da177e4 1846
89bddce5 1847 err =
f499a021 1848 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1849 if (err)
bd3ded31 1850 goto out;
1da177e4 1851
f499a021 1852 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1853 sock->file->f_flags | file_flags);
1da177e4
LT
1854out:
1855 return err;
1856}
1857
bd3ded31
JA
1858int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1859{
1860 int ret = -EBADF;
1861 struct fd f;
1862
1863 f = fdget(fd);
1864 if (f.file) {
f499a021
JA
1865 struct sockaddr_storage address;
1866
1867 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1868 if (!ret)
1869 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1870 fdput(f);
bd3ded31
JA
1871 }
1872
1873 return ret;
1874}
1875
1387c2c2
DB
1876SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1877 int, addrlen)
1878{
1879 return __sys_connect(fd, uservaddr, addrlen);
1880}
1881
1da177e4
LT
1882/*
1883 * Get the local address ('name') of a socket object. Move the obtained
1884 * name to user space.
1885 */
1886
8882a107
DB
1887int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1888 int __user *usockaddr_len)
1da177e4
LT
1889{
1890 struct socket *sock;
230b1839 1891 struct sockaddr_storage address;
9b2c45d4 1892 int err, fput_needed;
89bddce5 1893
6cb153ca 1894 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1895 if (!sock)
1896 goto out;
1897
1898 err = security_socket_getsockname(sock);
1899 if (err)
1900 goto out_put;
1901
9b2c45d4
DV
1902 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1903 if (err < 0)
1da177e4 1904 goto out_put;
9b2c45d4
DV
1905 /* "err" is actually length in this case */
1906 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1907
1908out_put:
6cb153ca 1909 fput_light(sock->file, fput_needed);
1da177e4
LT
1910out:
1911 return err;
1912}
1913
8882a107
DB
1914SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1915 int __user *, usockaddr_len)
1916{
1917 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1918}
1919
1da177e4
LT
1920/*
1921 * Get the remote address ('name') of a socket object. Move the obtained
1922 * name to user space.
1923 */
1924
b21c8f83
DB
1925int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1926 int __user *usockaddr_len)
1da177e4
LT
1927{
1928 struct socket *sock;
230b1839 1929 struct sockaddr_storage address;
9b2c45d4 1930 int err, fput_needed;
1da177e4 1931
89bddce5
SH
1932 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1933 if (sock != NULL) {
1da177e4
LT
1934 err = security_socket_getpeername(sock);
1935 if (err) {
6cb153ca 1936 fput_light(sock->file, fput_needed);
1da177e4
LT
1937 return err;
1938 }
1939
9b2c45d4
DV
1940 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1941 if (err >= 0)
1942 /* "err" is actually length in this case */
1943 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1944 usockaddr_len);
6cb153ca 1945 fput_light(sock->file, fput_needed);
1da177e4
LT
1946 }
1947 return err;
1948}
1949
b21c8f83
DB
1950SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1951 int __user *, usockaddr_len)
1952{
1953 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1954}
1955
1da177e4
LT
1956/*
1957 * Send a datagram to a given address. We move the address into kernel
1958 * space and check the user space data area is readable before invoking
1959 * the protocol.
1960 */
211b634b
DB
1961int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1962 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1963{
1964 struct socket *sock;
230b1839 1965 struct sockaddr_storage address;
1da177e4
LT
1966 int err;
1967 struct msghdr msg;
1968 struct iovec iov;
6cb153ca 1969 int fput_needed;
6cb153ca 1970
602bd0e9
AV
1971 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1972 if (unlikely(err))
1973 return err;
de0fa95c
PE
1974 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1975 if (!sock)
4387ff75 1976 goto out;
6cb153ca 1977
89bddce5 1978 msg.msg_name = NULL;
89bddce5
SH
1979 msg.msg_control = NULL;
1980 msg.msg_controllen = 0;
1981 msg.msg_namelen = 0;
6cb153ca 1982 if (addr) {
43db362d 1983 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1984 if (err < 0)
1985 goto out_put;
230b1839 1986 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1987 msg.msg_namelen = addr_len;
1da177e4
LT
1988 }
1989 if (sock->file->f_flags & O_NONBLOCK)
1990 flags |= MSG_DONTWAIT;
1991 msg.msg_flags = flags;
d8725c86 1992 err = sock_sendmsg(sock, &msg);
1da177e4 1993
89bddce5 1994out_put:
de0fa95c 1995 fput_light(sock->file, fput_needed);
4387ff75 1996out:
1da177e4
LT
1997 return err;
1998}
1999
211b634b
DB
2000SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2001 unsigned int, flags, struct sockaddr __user *, addr,
2002 int, addr_len)
2003{
2004 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2005}
2006
1da177e4 2007/*
89bddce5 2008 * Send a datagram down a socket.
1da177e4
LT
2009 */
2010
3e0fa65f 2011SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2012 unsigned int, flags)
1da177e4 2013{
211b634b 2014 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2015}
2016
2017/*
89bddce5 2018 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2019 * sender. We verify the buffers are writable and if needed move the
2020 * sender address from kernel to user space.
2021 */
7a09e1eb
DB
2022int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2023 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2024{
2025 struct socket *sock;
2026 struct iovec iov;
2027 struct msghdr msg;
230b1839 2028 struct sockaddr_storage address;
89bddce5 2029 int err, err2;
6cb153ca
BL
2030 int fput_needed;
2031
602bd0e9
AV
2032 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2033 if (unlikely(err))
2034 return err;
de0fa95c 2035 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2036 if (!sock)
de0fa95c 2037 goto out;
1da177e4 2038
89bddce5
SH
2039 msg.msg_control = NULL;
2040 msg.msg_controllen = 0;
f3d33426
HFS
2041 /* Save some cycles and don't copy the address if not needed */
2042 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2043 /* We assume all kernel code knows the size of sockaddr_storage */
2044 msg.msg_namelen = 0;
130ed5d1 2045 msg.msg_iocb = NULL;
9f138fa6 2046 msg.msg_flags = 0;
1da177e4
LT
2047 if (sock->file->f_flags & O_NONBLOCK)
2048 flags |= MSG_DONTWAIT;
2da62906 2049 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2050
89bddce5 2051 if (err >= 0 && addr != NULL) {
43db362d 2052 err2 = move_addr_to_user(&address,
230b1839 2053 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2054 if (err2 < 0)
2055 err = err2;
1da177e4 2056 }
de0fa95c
PE
2057
2058 fput_light(sock->file, fput_needed);
4387ff75 2059out:
1da177e4
LT
2060 return err;
2061}
2062
7a09e1eb
DB
2063SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2064 unsigned int, flags, struct sockaddr __user *, addr,
2065 int __user *, addr_len)
2066{
2067 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2068}
2069
1da177e4 2070/*
89bddce5 2071 * Receive a datagram from a socket.
1da177e4
LT
2072 */
2073
b7c0ddf5
JG
2074SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2075 unsigned int, flags)
1da177e4 2076{
7a09e1eb 2077 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2078}
2079
83f0c10b
FW
2080static bool sock_use_custom_sol_socket(const struct socket *sock)
2081{
2082 const struct sock *sk = sock->sk;
2083
2084 /* Use sock->ops->setsockopt() for MPTCP */
2085 return IS_ENABLED(CONFIG_MPTCP) &&
2086 sk->sk_protocol == IPPROTO_MPTCP &&
2087 sk->sk_type == SOCK_STREAM &&
2088 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2089}
2090
1da177e4
LT
2091/*
2092 * Set a socket option. Because we don't know the option lengths we have
2093 * to pass the user mode parameter for the protocols to sort out.
2094 */
a7b75c5a 2095int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2096 int optlen)
1da177e4 2097{
519a8a6c 2098 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2099 char *kernel_optval = NULL;
6cb153ca 2100 int err, fput_needed;
1da177e4
LT
2101 struct socket *sock;
2102
2103 if (optlen < 0)
2104 return -EINVAL;
89bddce5
SH
2105
2106 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2107 if (!sock)
2108 return err;
1da177e4 2109
4a367299
CH
2110 err = security_socket_setsockopt(sock, level, optname);
2111 if (err)
2112 goto out_put;
0d01da6a 2113
55db9c0e
CH
2114 if (!in_compat_syscall())
2115 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2116 user_optval, &optlen,
55db9c0e 2117 &kernel_optval);
4a367299
CH
2118 if (err < 0)
2119 goto out_put;
2120 if (err > 0) {
2121 err = 0;
2122 goto out_put;
2123 }
0d01da6a 2124
a7b75c5a
CH
2125 if (kernel_optval)
2126 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2127 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2128 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2129 else if (unlikely(!sock->ops->setsockopt))
2130 err = -EOPNOTSUPP;
4a367299
CH
2131 else
2132 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2133 optlen);
a7b75c5a 2134 kfree(kernel_optval);
4a367299
CH
2135out_put:
2136 fput_light(sock->file, fput_needed);
1da177e4
LT
2137 return err;
2138}
2139
cc36dca0
DB
2140SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2141 char __user *, optval, int, optlen)
2142{
2143 return __sys_setsockopt(fd, level, optname, optval, optlen);
2144}
2145
1da177e4
LT
2146/*
2147 * Get a socket option. Because we don't know the option lengths we have
2148 * to pass a user mode parameter for the protocols to sort out.
2149 */
55db9c0e
CH
2150int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2151 int __user *optlen)
1da177e4 2152{
6cb153ca 2153 int err, fput_needed;
1da177e4 2154 struct socket *sock;
0d01da6a 2155 int max_optlen;
1da177e4 2156
89bddce5 2157 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2158 if (!sock)
2159 return err;
2160
2161 err = security_socket_getsockopt(sock, level, optname);
2162 if (err)
2163 goto out_put;
1da177e4 2164
55db9c0e
CH
2165 if (!in_compat_syscall())
2166 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2167
d8a9b38f
CH
2168 if (level == SOL_SOCKET)
2169 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2170 else if (unlikely(!sock->ops->getsockopt))
2171 err = -EOPNOTSUPP;
d8a9b38f
CH
2172 else
2173 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2174 optlen);
0d01da6a 2175
55db9c0e
CH
2176 if (!in_compat_syscall())
2177 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2178 optval, optlen, max_optlen,
2179 err);
6cb153ca 2180out_put:
d8a9b38f 2181 fput_light(sock->file, fput_needed);
1da177e4
LT
2182 return err;
2183}
2184
13a2d70e
DB
2185SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2186 char __user *, optval, int __user *, optlen)
2187{
2188 return __sys_getsockopt(fd, level, optname, optval, optlen);
2189}
2190
1da177e4
LT
2191/*
2192 * Shutdown a socket.
2193 */
2194
005a1aea 2195int __sys_shutdown(int fd, int how)
1da177e4 2196{
6cb153ca 2197 int err, fput_needed;
1da177e4
LT
2198 struct socket *sock;
2199
89bddce5
SH
2200 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2201 if (sock != NULL) {
1da177e4 2202 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2203 if (!err)
2204 err = sock->ops->shutdown(sock, how);
2205 fput_light(sock->file, fput_needed);
1da177e4
LT
2206 }
2207 return err;
2208}
2209
005a1aea
DB
2210SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2211{
2212 return __sys_shutdown(fd, how);
2213}
2214
89bddce5 2215/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2216 * fields which are the same type (int / unsigned) on our platforms.
2217 */
2218#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2219#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2220#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2221
c71d8ebe
TH
2222struct used_address {
2223 struct sockaddr_storage name;
2224 unsigned int name_len;
2225};
2226
0a384abf
JA
2227int __copy_msghdr_from_user(struct msghdr *kmsg,
2228 struct user_msghdr __user *umsg,
2229 struct sockaddr __user **save_addr,
2230 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2231{
ffb07550 2232 struct user_msghdr msg;
08adb7da
AV
2233 ssize_t err;
2234
ffb07550 2235 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2236 return -EFAULT;
dbb490b9 2237
1f466e1f
CH
2238 kmsg->msg_control_is_user = true;
2239 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2240 kmsg->msg_controllen = msg.msg_controllen;
2241 kmsg->msg_flags = msg.msg_flags;
2242
2243 kmsg->msg_namelen = msg.msg_namelen;
2244 if (!msg.msg_name)
6a2a2b3a
AS
2245 kmsg->msg_namelen = 0;
2246
dbb490b9
ML
2247 if (kmsg->msg_namelen < 0)
2248 return -EINVAL;
2249
1661bf36 2250 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2251 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2252
2253 if (save_addr)
ffb07550 2254 *save_addr = msg.msg_name;
08adb7da 2255
ffb07550 2256 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2257 if (!save_addr) {
864d9664
PA
2258 err = move_addr_to_kernel(msg.msg_name,
2259 kmsg->msg_namelen,
08adb7da
AV
2260 kmsg->msg_name);
2261 if (err < 0)
2262 return err;
2263 }
2264 } else {
2265 kmsg->msg_name = NULL;
2266 kmsg->msg_namelen = 0;
2267 }
2268
ffb07550 2269 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2270 return -EMSGSIZE;
2271
0345f931 2272 kmsg->msg_iocb = NULL;
0a384abf
JA
2273 *uiov = msg.msg_iov;
2274 *nsegs = msg.msg_iovlen;
2275 return 0;
2276}
2277
2278static int copy_msghdr_from_user(struct msghdr *kmsg,
2279 struct user_msghdr __user *umsg,
2280 struct sockaddr __user **save_addr,
2281 struct iovec **iov)
2282{
2283 struct user_msghdr msg;
2284 ssize_t err;
2285
2286 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2287 &msg.msg_iovlen);
2288 if (err)
2289 return err;
0345f931 2290
87e5e6da 2291 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2292 msg.msg_iov, msg.msg_iovlen,
da184284 2293 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2294 return err < 0 ? err : 0;
1661bf36
DC
2295}
2296
4257c8ca
JA
2297static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2298 unsigned int flags, struct used_address *used_address,
2299 unsigned int allowed_msghdr_flags)
1da177e4 2300{
b9d717a7 2301 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2302 __aligned(sizeof(__kernel_size_t));
89bddce5 2303 /* 20 is size of ipv6_pktinfo */
1da177e4 2304 unsigned char *ctl_buf = ctl;
d8725c86 2305 int ctl_len;
08adb7da 2306 ssize_t err;
89bddce5 2307
1da177e4
LT
2308 err = -ENOBUFS;
2309
228e548e 2310 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2311 goto out;
28a94d8f 2312 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2313 ctl_len = msg_sys->msg_controllen;
1da177e4 2314 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2315 err =
228e548e 2316 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2317 sizeof(ctl));
1da177e4 2318 if (err)
4257c8ca 2319 goto out;
228e548e
AB
2320 ctl_buf = msg_sys->msg_control;
2321 ctl_len = msg_sys->msg_controllen;
1da177e4 2322 } else if (ctl_len) {
ac4340fc
DM
2323 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2324 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2325 if (ctl_len > sizeof(ctl)) {
1da177e4 2326 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2327 if (ctl_buf == NULL)
4257c8ca 2328 goto out;
1da177e4
LT
2329 }
2330 err = -EFAULT;
1f466e1f 2331 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2332 goto out_freectl;
228e548e 2333 msg_sys->msg_control = ctl_buf;
1f466e1f 2334 msg_sys->msg_control_is_user = false;
1da177e4 2335 }
228e548e 2336 msg_sys->msg_flags = flags;
1da177e4
LT
2337
2338 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2339 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2340 /*
2341 * If this is sendmmsg() and current destination address is same as
2342 * previously succeeded address, omit asking LSM's decision.
2343 * used_address->name_len is initialized to UINT_MAX so that the first
2344 * destination address never matches.
2345 */
bc909d9d
MD
2346 if (used_address && msg_sys->msg_name &&
2347 used_address->name_len == msg_sys->msg_namelen &&
2348 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2349 used_address->name_len)) {
d8725c86 2350 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2351 goto out_freectl;
2352 }
d8725c86 2353 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2354 /*
2355 * If this is sendmmsg() and sending to current destination address was
2356 * successful, remember it.
2357 */
2358 if (used_address && err >= 0) {
2359 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2360 if (msg_sys->msg_name)
2361 memcpy(&used_address->name, msg_sys->msg_name,
2362 used_address->name_len);
c71d8ebe 2363 }
1da177e4
LT
2364
2365out_freectl:
89bddce5 2366 if (ctl_buf != ctl)
1da177e4 2367 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2368out:
2369 return err;
2370}
2371
03b1230c
JA
2372int sendmsg_copy_msghdr(struct msghdr *msg,
2373 struct user_msghdr __user *umsg, unsigned flags,
2374 struct iovec **iov)
4257c8ca
JA
2375{
2376 int err;
2377
2378 if (flags & MSG_CMSG_COMPAT) {
2379 struct compat_msghdr __user *msg_compat;
2380
2381 msg_compat = (struct compat_msghdr __user *) umsg;
2382 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2383 } else {
2384 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2385 }
2386 if (err < 0)
2387 return err;
2388
2389 return 0;
2390}
2391
2392static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2393 struct msghdr *msg_sys, unsigned int flags,
2394 struct used_address *used_address,
2395 unsigned int allowed_msghdr_flags)
2396{
2397 struct sockaddr_storage address;
2398 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2399 ssize_t err;
2400
2401 msg_sys->msg_name = &address;
2402
2403 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2404 if (err < 0)
2405 return err;
2406
2407 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2408 allowed_msghdr_flags);
da184284 2409 kfree(iov);
228e548e
AB
2410 return err;
2411}
2412
2413/*
2414 * BSD sendmsg interface
2415 */
03b1230c 2416long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2417 unsigned int flags)
2418{
d69e0779 2419 /* disallow ancillary data requests from this path */
03b1230c
JA
2420 if (msg->msg_control || msg->msg_controllen)
2421 return -EINVAL;
d69e0779 2422
03b1230c 2423 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2424}
228e548e 2425
e1834a32
DB
2426long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2427 bool forbid_cmsg_compat)
228e548e
AB
2428{
2429 int fput_needed, err;
2430 struct msghdr msg_sys;
1be374a0
AL
2431 struct socket *sock;
2432
e1834a32
DB
2433 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2434 return -EINVAL;
2435
1be374a0 2436 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2437 if (!sock)
2438 goto out;
2439
28a94d8f 2440 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2441
6cb153ca 2442 fput_light(sock->file, fput_needed);
89bddce5 2443out:
1da177e4
LT
2444 return err;
2445}
2446
666547ff 2447SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2448{
e1834a32 2449 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2450}
2451
228e548e
AB
2452/*
2453 * Linux sendmmsg interface
2454 */
2455
2456int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2457 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2458{
2459 int fput_needed, err, datagrams;
2460 struct socket *sock;
2461 struct mmsghdr __user *entry;
2462 struct compat_mmsghdr __user *compat_entry;
2463 struct msghdr msg_sys;
c71d8ebe 2464 struct used_address used_address;
f092276d 2465 unsigned int oflags = flags;
228e548e 2466
e1834a32
DB
2467 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2468 return -EINVAL;
2469
98382f41
AB
2470 if (vlen > UIO_MAXIOV)
2471 vlen = UIO_MAXIOV;
228e548e
AB
2472
2473 datagrams = 0;
2474
2475 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2476 if (!sock)
2477 return err;
2478
c71d8ebe 2479 used_address.name_len = UINT_MAX;
228e548e
AB
2480 entry = mmsg;
2481 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2482 err = 0;
f092276d 2483 flags |= MSG_BATCH;
228e548e
AB
2484
2485 while (datagrams < vlen) {
f092276d
TH
2486 if (datagrams == vlen - 1)
2487 flags = oflags;
2488
228e548e 2489 if (MSG_CMSG_COMPAT & flags) {
666547ff 2490 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2491 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2492 if (err < 0)
2493 break;
2494 err = __put_user(err, &compat_entry->msg_len);
2495 ++compat_entry;
2496 } else {
a7526eb5 2497 err = ___sys_sendmsg(sock,
666547ff 2498 (struct user_msghdr __user *)entry,
28a94d8f 2499 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2500 if (err < 0)
2501 break;
2502 err = put_user(err, &entry->msg_len);
2503 ++entry;
2504 }
2505
2506 if (err)
2507 break;
2508 ++datagrams;
3023898b
SHY
2509 if (msg_data_left(&msg_sys))
2510 break;
a78cb84c 2511 cond_resched();
228e548e
AB
2512 }
2513
228e548e
AB
2514 fput_light(sock->file, fput_needed);
2515
728ffb86
AB
2516 /* We only return an error if no datagrams were able to be sent */
2517 if (datagrams != 0)
228e548e
AB
2518 return datagrams;
2519
228e548e
AB
2520 return err;
2521}
2522
2523SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2524 unsigned int, vlen, unsigned int, flags)
2525{
e1834a32 2526 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2527}
2528
03b1230c
JA
2529int recvmsg_copy_msghdr(struct msghdr *msg,
2530 struct user_msghdr __user *umsg, unsigned flags,
2531 struct sockaddr __user **uaddr,
2532 struct iovec **iov)
1da177e4 2533{
08adb7da 2534 ssize_t err;
1da177e4 2535
4257c8ca
JA
2536 if (MSG_CMSG_COMPAT & flags) {
2537 struct compat_msghdr __user *msg_compat;
1da177e4 2538
4257c8ca
JA
2539 msg_compat = (struct compat_msghdr __user *) umsg;
2540 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2541 } else {
2542 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2543 }
1da177e4 2544 if (err < 0)
da184284 2545 return err;
1da177e4 2546
4257c8ca
JA
2547 return 0;
2548}
2549
2550static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2551 struct user_msghdr __user *msg,
2552 struct sockaddr __user *uaddr,
2553 unsigned int flags, int nosec)
2554{
2555 struct compat_msghdr __user *msg_compat =
2556 (struct compat_msghdr __user *) msg;
2557 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2558 struct sockaddr_storage addr;
2559 unsigned long cmsg_ptr;
2560 int len;
2561 ssize_t err;
2562
2563 msg_sys->msg_name = &addr;
a2e27255
ACM
2564 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2565 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2566
f3d33426
HFS
2567 /* We assume all kernel code knows the size of sockaddr_storage */
2568 msg_sys->msg_namelen = 0;
2569
1da177e4
LT
2570 if (sock->file->f_flags & O_NONBLOCK)
2571 flags |= MSG_DONTWAIT;
1af66221
ED
2572
2573 if (unlikely(nosec))
2574 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2575 else
2576 err = sock_recvmsg(sock, msg_sys, flags);
2577
1da177e4 2578 if (err < 0)
4257c8ca 2579 goto out;
1da177e4
LT
2580 len = err;
2581
2582 if (uaddr != NULL) {
43db362d 2583 err = move_addr_to_user(&addr,
a2e27255 2584 msg_sys->msg_namelen, uaddr,
89bddce5 2585 uaddr_len);
1da177e4 2586 if (err < 0)
4257c8ca 2587 goto out;
1da177e4 2588 }
a2e27255 2589 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2590 COMPAT_FLAGS(msg));
1da177e4 2591 if (err)
4257c8ca 2592 goto out;
1da177e4 2593 if (MSG_CMSG_COMPAT & flags)
a2e27255 2594 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2595 &msg_compat->msg_controllen);
2596 else
a2e27255 2597 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2598 &msg->msg_controllen);
2599 if (err)
4257c8ca 2600 goto out;
1da177e4 2601 err = len;
4257c8ca
JA
2602out:
2603 return err;
2604}
2605
2606static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2607 struct msghdr *msg_sys, unsigned int flags, int nosec)
2608{
2609 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2610 /* user mode address pointers */
2611 struct sockaddr __user *uaddr;
2612 ssize_t err;
2613
2614 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2615 if (err < 0)
2616 return err;
1da177e4 2617
4257c8ca 2618 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2619 kfree(iov);
a2e27255
ACM
2620 return err;
2621}
2622
2623/*
2624 * BSD recvmsg interface
2625 */
2626
03b1230c
JA
2627long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2628 struct user_msghdr __user *umsg,
2629 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2630{
583bbf06
LH
2631 if (msg->msg_control || msg->msg_controllen) {
2632 /* disallow ancillary data reqs unless cmsg is plain data */
2633 if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
2634 return -EINVAL;
2635 }
aa1fa28f 2636
03b1230c 2637 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2638}
2639
e1834a32
DB
2640long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2641 bool forbid_cmsg_compat)
a2e27255
ACM
2642{
2643 int fput_needed, err;
2644 struct msghdr msg_sys;
1be374a0
AL
2645 struct socket *sock;
2646
e1834a32
DB
2647 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2648 return -EINVAL;
2649
1be374a0 2650 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2651 if (!sock)
2652 goto out;
2653
a7526eb5 2654 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2655
6cb153ca 2656 fput_light(sock->file, fput_needed);
1da177e4
LT
2657out:
2658 return err;
2659}
2660
666547ff 2661SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2662 unsigned int, flags)
2663{
e1834a32 2664 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2665}
2666
a2e27255
ACM
2667/*
2668 * Linux recvmmsg interface
2669 */
2670
e11d4284
AB
2671static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2672 unsigned int vlen, unsigned int flags,
2673 struct timespec64 *timeout)
a2e27255
ACM
2674{
2675 int fput_needed, err, datagrams;
2676 struct socket *sock;
2677 struct mmsghdr __user *entry;
d7256d0e 2678 struct compat_mmsghdr __user *compat_entry;
a2e27255 2679 struct msghdr msg_sys;
766b9f92
DD
2680 struct timespec64 end_time;
2681 struct timespec64 timeout64;
a2e27255
ACM
2682
2683 if (timeout &&
2684 poll_select_set_timeout(&end_time, timeout->tv_sec,
2685 timeout->tv_nsec))
2686 return -EINVAL;
2687
2688 datagrams = 0;
2689
2690 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2691 if (!sock)
2692 return err;
2693
7797dc41
SHY
2694 if (likely(!(flags & MSG_ERRQUEUE))) {
2695 err = sock_error(sock->sk);
2696 if (err) {
2697 datagrams = err;
2698 goto out_put;
2699 }
e623a9e9 2700 }
a2e27255
ACM
2701
2702 entry = mmsg;
d7256d0e 2703 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2704
2705 while (datagrams < vlen) {
2706 /*
2707 * No need to ask LSM for more than the first datagram.
2708 */
d7256d0e 2709 if (MSG_CMSG_COMPAT & flags) {
666547ff 2710 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2711 &msg_sys, flags & ~MSG_WAITFORONE,
2712 datagrams);
d7256d0e
JMG
2713 if (err < 0)
2714 break;
2715 err = __put_user(err, &compat_entry->msg_len);
2716 ++compat_entry;
2717 } else {
a7526eb5 2718 err = ___sys_recvmsg(sock,
666547ff 2719 (struct user_msghdr __user *)entry,
a7526eb5
AL
2720 &msg_sys, flags & ~MSG_WAITFORONE,
2721 datagrams);
d7256d0e
JMG
2722 if (err < 0)
2723 break;
2724 err = put_user(err, &entry->msg_len);
2725 ++entry;
2726 }
2727
a2e27255
ACM
2728 if (err)
2729 break;
a2e27255
ACM
2730 ++datagrams;
2731
71c5c159
BB
2732 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2733 if (flags & MSG_WAITFORONE)
2734 flags |= MSG_DONTWAIT;
2735
a2e27255 2736 if (timeout) {
766b9f92 2737 ktime_get_ts64(&timeout64);
c2e6c856 2738 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2739 if (timeout->tv_sec < 0) {
2740 timeout->tv_sec = timeout->tv_nsec = 0;
2741 break;
2742 }
2743
2744 /* Timeout, return less than vlen datagrams */
2745 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2746 break;
2747 }
2748
2749 /* Out of band data, return right away */
2750 if (msg_sys.msg_flags & MSG_OOB)
2751 break;
a78cb84c 2752 cond_resched();
a2e27255
ACM
2753 }
2754
a2e27255 2755 if (err == 0)
34b88a68
ACM
2756 goto out_put;
2757
2758 if (datagrams == 0) {
2759 datagrams = err;
2760 goto out_put;
2761 }
a2e27255 2762
34b88a68
ACM
2763 /*
2764 * We may return less entries than requested (vlen) if the
2765 * sock is non block and there aren't enough datagrams...
2766 */
2767 if (err != -EAGAIN) {
a2e27255 2768 /*
34b88a68
ACM
2769 * ... or if recvmsg returns an error after we
2770 * received some datagrams, where we record the
2771 * error to return on the next call or if the
2772 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2773 */
34b88a68 2774 sock->sk->sk_err = -err;
a2e27255 2775 }
34b88a68
ACM
2776out_put:
2777 fput_light(sock->file, fput_needed);
a2e27255 2778
34b88a68 2779 return datagrams;
a2e27255
ACM
2780}
2781
e11d4284
AB
2782int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2783 unsigned int vlen, unsigned int flags,
2784 struct __kernel_timespec __user *timeout,
2785 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2786{
2787 int datagrams;
c2e6c856 2788 struct timespec64 timeout_sys;
a2e27255 2789
e11d4284
AB
2790 if (timeout && get_timespec64(&timeout_sys, timeout))
2791 return -EFAULT;
a2e27255 2792
e11d4284 2793 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2794 return -EFAULT;
2795
e11d4284
AB
2796 if (!timeout && !timeout32)
2797 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2798
2799 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2800
e11d4284
AB
2801 if (datagrams <= 0)
2802 return datagrams;
2803
2804 if (timeout && put_timespec64(&timeout_sys, timeout))
2805 datagrams = -EFAULT;
2806
2807 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2808 datagrams = -EFAULT;
2809
2810 return datagrams;
2811}
2812
1255e269
DB
2813SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2814 unsigned int, vlen, unsigned int, flags,
c2e6c856 2815 struct __kernel_timespec __user *, timeout)
1255e269 2816{
e11d4284
AB
2817 if (flags & MSG_CMSG_COMPAT)
2818 return -EINVAL;
2819
2820 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2821}
2822
2823#ifdef CONFIG_COMPAT_32BIT_TIME
2824SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2825 unsigned int, vlen, unsigned int, flags,
2826 struct old_timespec32 __user *, timeout)
2827{
2828 if (flags & MSG_CMSG_COMPAT)
2829 return -EINVAL;
2830
2831 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2832}
e11d4284 2833#endif
1255e269 2834
a2e27255 2835#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2836/* Argument list sizes for sys_socketcall */
2837#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2838static const unsigned char nargs[21] = {
c6d409cf
ED
2839 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2840 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2841 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2842 AL(4), AL(5), AL(4)
89bddce5
SH
2843};
2844
1da177e4
LT
2845#undef AL
2846
2847/*
89bddce5 2848 * System call vectors.
1da177e4
LT
2849 *
2850 * Argument checking cleaned up. Saved 20% in size.
2851 * This function doesn't need to set the kernel lock because
89bddce5 2852 * it is set by the callees.
1da177e4
LT
2853 */
2854
3e0fa65f 2855SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2856{
2950fa9d 2857 unsigned long a[AUDITSC_ARGS];
89bddce5 2858 unsigned long a0, a1;
1da177e4 2859 int err;
47379052 2860 unsigned int len;
1da177e4 2861
228e548e 2862 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2863 return -EINVAL;
c8e8cd57 2864 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2865
47379052
AV
2866 len = nargs[call];
2867 if (len > sizeof(a))
2868 return -EINVAL;
2869
1da177e4 2870 /* copy_from_user should be SMP safe. */
47379052 2871 if (copy_from_user(a, args, len))
1da177e4 2872 return -EFAULT;
3ec3b2fb 2873
2950fa9d
CG
2874 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2875 if (err)
2876 return err;
3ec3b2fb 2877
89bddce5
SH
2878 a0 = a[0];
2879 a1 = a[1];
2880
2881 switch (call) {
2882 case SYS_SOCKET:
9d6a15c3 2883 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2884 break;
2885 case SYS_BIND:
a87d35d8 2886 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2887 break;
2888 case SYS_CONNECT:
1387c2c2 2889 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2890 break;
2891 case SYS_LISTEN:
25e290ee 2892 err = __sys_listen(a0, a1);
89bddce5
SH
2893 break;
2894 case SYS_ACCEPT:
4541e805
DB
2895 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2896 (int __user *)a[2], 0);
89bddce5
SH
2897 break;
2898 case SYS_GETSOCKNAME:
2899 err =
8882a107
DB
2900 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2901 (int __user *)a[2]);
89bddce5
SH
2902 break;
2903 case SYS_GETPEERNAME:
2904 err =
b21c8f83
DB
2905 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2906 (int __user *)a[2]);
89bddce5
SH
2907 break;
2908 case SYS_SOCKETPAIR:
6debc8d8 2909 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2910 break;
2911 case SYS_SEND:
f3bf896b
DB
2912 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2913 NULL, 0);
89bddce5
SH
2914 break;
2915 case SYS_SENDTO:
211b634b
DB
2916 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2917 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2918 break;
2919 case SYS_RECV:
d27e9afc
DB
2920 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2921 NULL, NULL);
89bddce5
SH
2922 break;
2923 case SYS_RECVFROM:
7a09e1eb
DB
2924 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2925 (struct sockaddr __user *)a[4],
2926 (int __user *)a[5]);
89bddce5
SH
2927 break;
2928 case SYS_SHUTDOWN:
005a1aea 2929 err = __sys_shutdown(a0, a1);
89bddce5
SH
2930 break;
2931 case SYS_SETSOCKOPT:
cc36dca0
DB
2932 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2933 a[4]);
89bddce5
SH
2934 break;
2935 case SYS_GETSOCKOPT:
2936 err =
13a2d70e
DB
2937 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2938 (int __user *)a[4]);
89bddce5
SH
2939 break;
2940 case SYS_SENDMSG:
e1834a32
DB
2941 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2942 a[2], true);
89bddce5 2943 break;
228e548e 2944 case SYS_SENDMMSG:
e1834a32
DB
2945 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2946 a[3], true);
228e548e 2947 break;
89bddce5 2948 case SYS_RECVMSG:
e1834a32
DB
2949 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2950 a[2], true);
89bddce5 2951 break;
a2e27255 2952 case SYS_RECVMMSG:
3ca47e95 2953 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2954 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2955 a[2], a[3],
2956 (struct __kernel_timespec __user *)a[4],
2957 NULL);
2958 else
2959 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2960 a[2], a[3], NULL,
2961 (struct old_timespec32 __user *)a[4]);
a2e27255 2962 break;
de11defe 2963 case SYS_ACCEPT4:
4541e805
DB
2964 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2965 (int __user *)a[2], a[3]);
aaca0bdc 2966 break;
89bddce5
SH
2967 default:
2968 err = -EINVAL;
2969 break;
1da177e4
LT
2970 }
2971 return err;
2972}
2973
89bddce5 2974#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2975
55737fda
SH
2976/**
2977 * sock_register - add a socket protocol handler
2978 * @ops: description of protocol
2979 *
1da177e4
LT
2980 * This function is called by a protocol handler that wants to
2981 * advertise its address family, and have it linked into the
e793c0f7 2982 * socket interface. The value ops->family corresponds to the
55737fda 2983 * socket system call protocol family.
1da177e4 2984 */
f0fd27d4 2985int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2986{
2987 int err;
2988
2989 if (ops->family >= NPROTO) {
3410f22e 2990 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2991 return -ENOBUFS;
2992 }
55737fda
SH
2993
2994 spin_lock(&net_family_lock);
190683a9
ED
2995 if (rcu_dereference_protected(net_families[ops->family],
2996 lockdep_is_held(&net_family_lock)))
55737fda
SH
2997 err = -EEXIST;
2998 else {
cf778b00 2999 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3000 err = 0;
3001 }
55737fda
SH
3002 spin_unlock(&net_family_lock);
3003
3410f22e 3004 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
3005 return err;
3006}
c6d409cf 3007EXPORT_SYMBOL(sock_register);
1da177e4 3008
55737fda
SH
3009/**
3010 * sock_unregister - remove a protocol handler
3011 * @family: protocol family to remove
3012 *
1da177e4
LT
3013 * This function is called by a protocol handler that wants to
3014 * remove its address family, and have it unlinked from the
55737fda
SH
3015 * new socket creation.
3016 *
3017 * If protocol handler is a module, then it can use module reference
3018 * counts to protect against new references. If protocol handler is not
3019 * a module then it needs to provide its own protection in
3020 * the ops->create routine.
1da177e4 3021 */
f0fd27d4 3022void sock_unregister(int family)
1da177e4 3023{
f0fd27d4 3024 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3025
55737fda 3026 spin_lock(&net_family_lock);
a9b3cd7f 3027 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3028 spin_unlock(&net_family_lock);
3029
3030 synchronize_rcu();
3031
3410f22e 3032 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3033}
c6d409cf 3034EXPORT_SYMBOL(sock_unregister);
1da177e4 3035
bf2ae2e4
XL
3036bool sock_is_registered(int family)
3037{
66b51b0a 3038 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3039}
3040
77d76ea3 3041static int __init sock_init(void)
1da177e4 3042{
b3e19d92 3043 int err;
2ca794e5
EB
3044 /*
3045 * Initialize the network sysctl infrastructure.
3046 */
3047 err = net_sysctl_init();
3048 if (err)
3049 goto out;
b3e19d92 3050
1da177e4 3051 /*
89bddce5 3052 * Initialize skbuff SLAB cache
1da177e4
LT
3053 */
3054 skb_init();
1da177e4
LT
3055
3056 /*
89bddce5 3057 * Initialize the protocols module.
1da177e4
LT
3058 */
3059
3060 init_inodecache();
b3e19d92
NP
3061
3062 err = register_filesystem(&sock_fs_type);
3063 if (err)
47260ba9 3064 goto out;
1da177e4 3065 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3066 if (IS_ERR(sock_mnt)) {
3067 err = PTR_ERR(sock_mnt);
3068 goto out_mount;
3069 }
77d76ea3
AK
3070
3071 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3072 */
3073
3074#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3075 err = netfilter_init();
3076 if (err)
3077 goto out;
1da177e4 3078#endif
cbeb321a 3079
408eccce 3080 ptp_classifier_init();
c1f19b51 3081
b3e19d92
NP
3082out:
3083 return err;
3084
3085out_mount:
3086 unregister_filesystem(&sock_fs_type);
b3e19d92 3087 goto out;
1da177e4
LT
3088}
3089
77d76ea3
AK
3090core_initcall(sock_init); /* early initcall */
3091
1da177e4
LT
3092#ifdef CONFIG_PROC_FS
3093void socket_seq_show(struct seq_file *seq)
3094{
648845ab
TZ
3095 seq_printf(seq, "sockets: used %d\n",
3096 sock_inuse_get(seq->private));
1da177e4 3097}
89bddce5 3098#endif /* CONFIG_PROC_FS */
1da177e4 3099
89bbfc95 3100#ifdef CONFIG_COMPAT
36fd633e 3101static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3102{
6b96018b 3103 struct compat_ifconf ifc32;
7a229387 3104 struct ifconf ifc;
7a229387
AB
3105 int err;
3106
6b96018b 3107 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3108 return -EFAULT;
3109
36fd633e
AV
3110 ifc.ifc_len = ifc32.ifc_len;
3111 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3112
36fd633e
AV
3113 rtnl_lock();
3114 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3115 rtnl_unlock();
7a229387
AB
3116 if (err)
3117 return err;
3118
36fd633e 3119 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3120 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3121 return -EFAULT;
3122
3123 return 0;
3124}
3125
6b96018b 3126static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3127{
3a7da39d
BH
3128 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3129 bool convert_in = false, convert_out = false;
44c02a2c
AV
3130 size_t buf_size = 0;
3131 struct ethtool_rxnfc __user *rxnfc = NULL;
3132 struct ifreq ifr;
3a7da39d
BH
3133 u32 rule_cnt = 0, actual_rule_cnt;
3134 u32 ethcmd;
7a229387 3135 u32 data;
3a7da39d 3136 int ret;
7a229387 3137
3a7da39d
BH
3138 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3139 return -EFAULT;
7a229387 3140
3a7da39d
BH
3141 compat_rxnfc = compat_ptr(data);
3142
3143 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3144 return -EFAULT;
3145
3a7da39d
BH
3146 /* Most ethtool structures are defined without padding.
3147 * Unfortunately struct ethtool_rxnfc is an exception.
3148 */
3149 switch (ethcmd) {
3150 default:
3151 break;
3152 case ETHTOOL_GRXCLSRLALL:
3153 /* Buffer size is variable */
3154 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3155 return -EFAULT;
3156 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3157 return -ENOMEM;
3158 buf_size += rule_cnt * sizeof(u32);
7c7ab580 3159 fallthrough;
3a7da39d
BH
3160 case ETHTOOL_GRXRINGS:
3161 case ETHTOOL_GRXCLSRLCNT:
3162 case ETHTOOL_GRXCLSRULE:
55664f32 3163 case ETHTOOL_SRXCLSRLINS:
3a7da39d 3164 convert_out = true;
7c7ab580 3165 fallthrough;
3a7da39d 3166 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3167 buf_size += sizeof(struct ethtool_rxnfc);
3168 convert_in = true;
44c02a2c 3169 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3170 break;
3171 }
3172
44c02a2c 3173 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3174 return -EFAULT;
3175
44c02a2c 3176 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3177
3a7da39d 3178 if (convert_in) {
127fe533 3179 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3180 * fs.ring_cookie and at the end of fs, but nowhere else.
3181 */
127fe533
AD
3182 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3183 sizeof(compat_rxnfc->fs.m_ext) !=
3184 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3185 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3186 BUILD_BUG_ON(
3187 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3188 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3189 offsetof(struct ethtool_rxnfc, fs.location) -
3190 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3191
3192 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3193 (void __user *)(&rxnfc->fs.m_ext + 1) -
3194 (void __user *)rxnfc) ||
3a7da39d
BH
3195 copy_in_user(&rxnfc->fs.ring_cookie,
3196 &compat_rxnfc->fs.ring_cookie,
954b1244 3197 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3198 (void __user *)&rxnfc->fs.ring_cookie))
3199 return -EFAULT;
3200 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3201 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3202 return -EFAULT;
3203 } else if (copy_in_user(&rxnfc->rule_cnt,
3204 &compat_rxnfc->rule_cnt,
3205 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3206 return -EFAULT;
3207 }
3208
44c02a2c 3209 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3210 if (ret)
3211 return ret;
3212
3213 if (convert_out) {
3214 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3215 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3216 (const void __user *)rxnfc) ||
3a7da39d
BH
3217 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3218 &rxnfc->fs.ring_cookie,
954b1244
SH
3219 (const void __user *)(&rxnfc->fs.location + 1) -
3220 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3221 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3222 sizeof(rxnfc->rule_cnt)))
3223 return -EFAULT;
3224
3225 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3226 /* As an optimisation, we only copy the actual
3227 * number of rules that the underlying
3228 * function returned. Since Mallory might
3229 * change the rule count in user memory, we
3230 * check that it is less than the rule count
3231 * originally given (as the user buffer size),
3232 * which has been range-checked.
3233 */
3234 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3235 return -EFAULT;
3236 if (actual_rule_cnt < rule_cnt)
3237 rule_cnt = actual_rule_cnt;
3238 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3239 &rxnfc->rule_locs[0],
3240 rule_cnt * sizeof(u32)))
3241 return -EFAULT;
3242 }
3243 }
3244
3245 return 0;
7a229387
AB
3246}
3247
7a50a240
AB
3248static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3249{
7a50a240 3250 compat_uptr_t uptr32;
44c02a2c
AV
3251 struct ifreq ifr;
3252 void __user *saved;
3253 int err;
7a50a240 3254
44c02a2c 3255 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3256 return -EFAULT;
3257
3258 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3259 return -EFAULT;
3260
44c02a2c
AV
3261 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3262 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3263
44c02a2c
AV
3264 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3265 if (!err) {
3266 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3267 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3268 err = -EFAULT;
ccbd6a5a 3269 }
44c02a2c 3270 return err;
7a229387
AB
3271}
3272
590d4693
BH
3273/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3274static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3275 struct compat_ifreq __user *u_ifreq32)
7a229387 3276{
44c02a2c 3277 struct ifreq ifreq;
7a229387
AB
3278 u32 data32;
3279
44c02a2c 3280 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3281 return -EFAULT;
44c02a2c 3282 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3283 return -EFAULT;
44c02a2c 3284 ifreq.ifr_data = compat_ptr(data32);
7a229387 3285
44c02a2c 3286 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3287}
3288
37ac39bd
JB
3289static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3290 unsigned int cmd,
3291 struct compat_ifreq __user *uifr32)
3292{
3293 struct ifreq __user *uifr;
3294 int err;
3295
3296 /* Handle the fact that while struct ifreq has the same *layout* on
3297 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3298 * which are handled elsewhere, it still has different *size* due to
3299 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3300 * resulting in struct ifreq being 32 and 40 bytes respectively).
3301 * As a result, if the struct happens to be at the end of a page and
3302 * the next page isn't readable/writable, we get a fault. To prevent
3303 * that, copy back and forth to the full size.
3304 */
3305
3306 uifr = compat_alloc_user_space(sizeof(*uifr));
3307 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3308 return -EFAULT;
3309
3310 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3311
3312 if (!err) {
3313 switch (cmd) {
3314 case SIOCGIFFLAGS:
3315 case SIOCGIFMETRIC:
3316 case SIOCGIFMTU:
3317 case SIOCGIFMEM:
3318 case SIOCGIFHWADDR:
3319 case SIOCGIFINDEX:
3320 case SIOCGIFADDR:
3321 case SIOCGIFBRDADDR:
3322 case SIOCGIFDSTADDR:
3323 case SIOCGIFNETMASK:
3324 case SIOCGIFPFLAGS:
3325 case SIOCGIFTXQLEN:
3326 case SIOCGMIIPHY:
3327 case SIOCGMIIREG:
c6c9fee3 3328 case SIOCGIFNAME:
37ac39bd
JB
3329 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3330 err = -EFAULT;
3331 break;
3332 }
3333 }
3334 return err;
3335}
3336
a2116ed2
AB
3337static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3338 struct compat_ifreq __user *uifr32)
3339{
3340 struct ifreq ifr;
3341 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3342 int err;
3343
3344 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3345 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3346 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3347 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3348 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3349 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3350 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3351 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3352 if (err)
3353 return -EFAULT;
3354
44c02a2c 3355 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3356
3357 if (cmd == SIOCGIFMAP && !err) {
3358 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3359 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3360 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3361 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3362 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3363 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3364 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3365 if (err)
3366 err = -EFAULT;
3367 }
3368 return err;
3369}
3370
7a229387
AB
3371/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3372 * for some operations; this forces use of the newer bridge-utils that
25985edc 3373 * use compatible ioctls
7a229387 3374 */
6b96018b 3375static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3376{
6b96018b 3377 compat_ulong_t tmp;
7a229387 3378
6b96018b 3379 if (get_user(tmp, argp))
7a229387
AB
3380 return -EFAULT;
3381 if (tmp == BRCTL_GET_VERSION)
3382 return BRCTL_VERSION + 1;
3383 return -EINVAL;
3384}
3385
6b96018b
AB
3386static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3387 unsigned int cmd, unsigned long arg)
3388{
3389 void __user *argp = compat_ptr(arg);
3390 struct sock *sk = sock->sk;
3391 struct net *net = sock_net(sk);
7a229387 3392
6b96018b 3393 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3394 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3395
3396 switch (cmd) {
3397 case SIOCSIFBR:
3398 case SIOCGIFBR:
3399 return old_bridge_ioctl(argp);
6b96018b 3400 case SIOCGIFCONF:
36fd633e 3401 return compat_dev_ifconf(net, argp);
6b96018b
AB
3402 case SIOCETHTOOL:
3403 return ethtool_ioctl(net, argp);
7a50a240
AB
3404 case SIOCWANDEV:
3405 return compat_siocwandev(net, argp);
a2116ed2
AB
3406 case SIOCGIFMAP:
3407 case SIOCSIFMAP:
3408 return compat_sioc_ifmap(net, cmd, argp);
0768e170
AB
3409 case SIOCGSTAMP_OLD:
3410 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3411 if (!sock->ops->gettstamp)
3412 return -ENOIOCTLCMD;
0768e170 3413 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3414 !COMPAT_USE_64BIT_TIME);
3415
590d4693
BH
3416 case SIOCBONDSLAVEINFOQUERY:
3417 case SIOCBONDINFOQUERY:
a2116ed2 3418 case SIOCSHWTSTAMP:
fd468c74 3419 case SIOCGHWTSTAMP:
590d4693 3420 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3421
3422 case FIOSETOWN:
3423 case SIOCSPGRP:
3424 case FIOGETOWN:
3425 case SIOCGPGRP:
3426 case SIOCBRADDBR:
3427 case SIOCBRDELBR:
3428 case SIOCGIFVLAN:
3429 case SIOCSIFVLAN:
3430 case SIOCADDDLCI:
3431 case SIOCDELDLCI:
c62cce2c 3432 case SIOCGSKNS:
0768e170
AB
3433 case SIOCGSTAMP_NEW:
3434 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3435 return sock_ioctl(file, cmd, arg);
3436
3437 case SIOCGIFFLAGS:
3438 case SIOCSIFFLAGS:
3439 case SIOCGIFMETRIC:
3440 case SIOCSIFMETRIC:
3441 case SIOCGIFMTU:
3442 case SIOCSIFMTU:
3443 case SIOCGIFMEM:
3444 case SIOCSIFMEM:
3445 case SIOCGIFHWADDR:
3446 case SIOCSIFHWADDR:
3447 case SIOCADDMULTI:
3448 case SIOCDELMULTI:
3449 case SIOCGIFINDEX:
6b96018b
AB
3450 case SIOCGIFADDR:
3451 case SIOCSIFADDR:
3452 case SIOCSIFHWBROADCAST:
6b96018b 3453 case SIOCDIFADDR:
6b96018b
AB
3454 case SIOCGIFBRDADDR:
3455 case SIOCSIFBRDADDR:
3456 case SIOCGIFDSTADDR:
3457 case SIOCSIFDSTADDR:
3458 case SIOCGIFNETMASK:
3459 case SIOCSIFNETMASK:
3460 case SIOCSIFPFLAGS:
3461 case SIOCGIFPFLAGS:
3462 case SIOCGIFTXQLEN:
3463 case SIOCSIFTXQLEN:
3464 case SIOCBRADDIF:
3465 case SIOCBRDELIF:
c6c9fee3 3466 case SIOCGIFNAME:
9177efd3
AB
3467 case SIOCSIFNAME:
3468 case SIOCGMIIPHY:
3469 case SIOCGMIIREG:
3470 case SIOCSMIIREG:
f92d4fc9
AV
3471 case SIOCBONDENSLAVE:
3472 case SIOCBONDRELEASE:
3473 case SIOCBONDSETHWADDR:
3474 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3475 return compat_ifreq_ioctl(net, sock, cmd, argp);
3476
6b96018b
AB
3477 case SIOCSARP:
3478 case SIOCGARP:
3479 case SIOCDARP:
c7dc504e 3480 case SIOCOUTQ:
9d7bf41f 3481 case SIOCOUTQNSD:
6b96018b 3482 case SIOCATMARK:
63ff03ab 3483 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3484 }
3485
6b96018b
AB
3486 return -ENOIOCTLCMD;
3487}
7a229387 3488
95c96174 3489static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3490 unsigned long arg)
89bbfc95
SP
3491{
3492 struct socket *sock = file->private_data;
3493 int ret = -ENOIOCTLCMD;
87de87d5
DM
3494 struct sock *sk;
3495 struct net *net;
3496
3497 sk = sock->sk;
3498 net = sock_net(sk);
89bbfc95
SP
3499
3500 if (sock->ops->compat_ioctl)
3501 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3502
87de87d5
DM
3503 if (ret == -ENOIOCTLCMD &&
3504 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3505 ret = compat_wext_handle_ioctl(net, cmd, arg);
3506
6b96018b
AB
3507 if (ret == -ENOIOCTLCMD)
3508 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3509
89bbfc95
SP
3510 return ret;
3511}
3512#endif
3513
8a3c245c
PT
3514/**
3515 * kernel_bind - bind an address to a socket (kernel space)
3516 * @sock: socket
3517 * @addr: address
3518 * @addrlen: length of address
3519 *
3520 * Returns 0 or an error.
3521 */
3522
ac5a488e
SS
3523int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3524{
3525 return sock->ops->bind(sock, addr, addrlen);
3526}
c6d409cf 3527EXPORT_SYMBOL(kernel_bind);
ac5a488e 3528
8a3c245c
PT
3529/**
3530 * kernel_listen - move socket to listening state (kernel space)
3531 * @sock: socket
3532 * @backlog: pending connections queue size
3533 *
3534 * Returns 0 or an error.
3535 */
3536
ac5a488e
SS
3537int kernel_listen(struct socket *sock, int backlog)
3538{
3539 return sock->ops->listen(sock, backlog);
3540}
c6d409cf 3541EXPORT_SYMBOL(kernel_listen);
ac5a488e 3542
8a3c245c
PT
3543/**
3544 * kernel_accept - accept a connection (kernel space)
3545 * @sock: listening socket
3546 * @newsock: new connected socket
3547 * @flags: flags
3548 *
3549 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3550 * If it fails, @newsock is guaranteed to be %NULL.
3551 * Returns 0 or an error.
3552 */
3553
ac5a488e
SS
3554int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3555{
3556 struct sock *sk = sock->sk;
3557 int err;
3558
3559 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3560 newsock);
3561 if (err < 0)
3562 goto done;
3563
cdfbabfb 3564 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3565 if (err < 0) {
3566 sock_release(*newsock);
fa8705b0 3567 *newsock = NULL;
ac5a488e
SS
3568 goto done;
3569 }
3570
3571 (*newsock)->ops = sock->ops;
1b08534e 3572 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3573
3574done:
3575 return err;
3576}
c6d409cf 3577EXPORT_SYMBOL(kernel_accept);
ac5a488e 3578
8a3c245c
PT
3579/**
3580 * kernel_connect - connect a socket (kernel space)
3581 * @sock: socket
3582 * @addr: address
3583 * @addrlen: address length
3584 * @flags: flags (O_NONBLOCK, ...)
3585 *
3586 * For datagram sockets, @addr is the addres to which datagrams are sent
3587 * by default, and the only address from which datagrams are received.
3588 * For stream sockets, attempts to connect to @addr.
3589 * Returns 0 or an error code.
3590 */
3591
ac5a488e 3592int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3593 int flags)
ac5a488e
SS
3594{
3595 return sock->ops->connect(sock, addr, addrlen, flags);
3596}
c6d409cf 3597EXPORT_SYMBOL(kernel_connect);
ac5a488e 3598
8a3c245c
PT
3599/**
3600 * kernel_getsockname - get the address which the socket is bound (kernel space)
3601 * @sock: socket
3602 * @addr: address holder
3603 *
3604 * Fills the @addr pointer with the address which the socket is bound.
3605 * Returns 0 or an error code.
3606 */
3607
9b2c45d4 3608int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3609{
9b2c45d4 3610 return sock->ops->getname(sock, addr, 0);
ac5a488e 3611}
c6d409cf 3612EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3613
8a3c245c 3614/**
645f0897 3615 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3616 * @sock: socket
3617 * @addr: address holder
3618 *
3619 * Fills the @addr pointer with the address which the socket is connected.
3620 * Returns 0 or an error code.
3621 */
3622
9b2c45d4 3623int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3624{
9b2c45d4 3625 return sock->ops->getname(sock, addr, 1);
ac5a488e 3626}
c6d409cf 3627EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3628
8a3c245c
PT
3629/**
3630 * kernel_sendpage - send a &page through a socket (kernel space)
3631 * @sock: socket
3632 * @page: page
3633 * @offset: page offset
3634 * @size: total size in bytes
3635 * @flags: flags (MSG_DONTWAIT, ...)
3636 *
3637 * Returns the total amount sent in bytes or an error.
3638 */
3639
ac5a488e
SS
3640int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3641 size_t size, int flags)
3642{
7b62d31d
CL
3643 if (sock->ops->sendpage) {
3644 /* Warn in case the improper page to zero-copy send */
3645 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3646 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3647 }
ac5a488e
SS
3648 return sock_no_sendpage(sock, page, offset, size, flags);
3649}
c6d409cf 3650EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3651
8a3c245c
PT
3652/**
3653 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3654 * @sk: sock
3655 * @page: page
3656 * @offset: page offset
3657 * @size: total size in bytes
3658 * @flags: flags (MSG_DONTWAIT, ...)
3659 *
3660 * Returns the total amount sent in bytes or an error.
3661 * Caller must hold @sk.
3662 */
3663
306b13eb
TH
3664int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3665 size_t size, int flags)
3666{
3667 struct socket *sock = sk->sk_socket;
3668
3669 if (sock->ops->sendpage_locked)
3670 return sock->ops->sendpage_locked(sk, page, offset, size,
3671 flags);
3672
3673 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3674}
3675EXPORT_SYMBOL(kernel_sendpage_locked);
3676
8a3c245c 3677/**
645f0897 3678 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3679 * @sock: socket
3680 * @how: connection part
3681 *
3682 * Returns 0 or an error.
3683 */
3684
91cf45f0
TM
3685int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3686{
3687 return sock->ops->shutdown(sock, how);
3688}
91cf45f0 3689EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3690
8a3c245c
PT
3691/**
3692 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3693 * @sk: socket
3694 *
3695 * This routine returns the IP overhead imposed by a socket i.e.
3696 * the length of the underlying IP header, depending on whether
3697 * this is an IPv4 or IPv6 socket and the length from IP options turned
3698 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3699 */
8a3c245c 3700
113c3075
P
3701u32 kernel_sock_ip_overhead(struct sock *sk)
3702{
3703 struct inet_sock *inet;
3704 struct ip_options_rcu *opt;
3705 u32 overhead = 0;
113c3075
P
3706#if IS_ENABLED(CONFIG_IPV6)
3707 struct ipv6_pinfo *np;
3708 struct ipv6_txoptions *optv6 = NULL;
3709#endif /* IS_ENABLED(CONFIG_IPV6) */
3710
3711 if (!sk)
3712 return overhead;
3713
113c3075
P
3714 switch (sk->sk_family) {
3715 case AF_INET:
3716 inet = inet_sk(sk);
3717 overhead += sizeof(struct iphdr);
3718 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3719 sock_owned_by_user(sk));
113c3075
P
3720 if (opt)
3721 overhead += opt->opt.optlen;
3722 return overhead;
3723#if IS_ENABLED(CONFIG_IPV6)
3724 case AF_INET6:
3725 np = inet6_sk(sk);
3726 overhead += sizeof(struct ipv6hdr);
3727 if (np)
3728 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3729 sock_owned_by_user(sk));
113c3075
P
3730 if (optv6)
3731 overhead += (optv6->opt_flen + optv6->opt_nflen);
3732 return overhead;
3733#endif /* IS_ENABLED(CONFIG_IPV6) */
3734 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3735 return overhead;
3736 }
3737}
3738EXPORT_SYMBOL(kernel_sock_ip_overhead);