]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
io_uring: remove superfluous check for sqe->off in io_accept()
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
6b96018b 103#include <linux/sockios.h>
076bb0c8 104#include <net/busy_poll.h>
f24b9be5 105#include <linux/errqueue.h>
06021292 106
e0d1095a 107#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
108unsigned int sysctl_net_busy_read __read_mostly;
109unsigned int sysctl_net_busy_poll __read_mostly;
06021292 110#endif
6b96018b 111
8ae5e030
AV
112static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
113static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 114static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
115
116static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
117static __poll_t sock_poll(struct file *file,
118 struct poll_table_struct *wait);
89bddce5 119static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
120#ifdef CONFIG_COMPAT
121static long compat_sock_ioctl(struct file *file,
89bddce5 122 unsigned int cmd, unsigned long arg);
89bbfc95 123#endif
1da177e4 124static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
125static ssize_t sock_sendpage(struct file *file, struct page *page,
126 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 127static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 128 struct pipe_inode_info *pipe, size_t len,
9c55e01c 129 unsigned int flags);
1da177e4 130
1da177e4
LT
131/*
132 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
133 * in the operation structures but are done directly via the socketcall() multiplexor.
134 */
135
da7071d7 136static const struct file_operations socket_file_ops = {
1da177e4
LT
137 .owner = THIS_MODULE,
138 .llseek = no_llseek,
8ae5e030
AV
139 .read_iter = sock_read_iter,
140 .write_iter = sock_write_iter,
1da177e4
LT
141 .poll = sock_poll,
142 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
143#ifdef CONFIG_COMPAT
144 .compat_ioctl = compat_sock_ioctl,
145#endif
1da177e4 146 .mmap = sock_mmap,
1da177e4
LT
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4 161/*
89bddce5
SH
162 * Support routines.
163 * Move socket addresses back and forth across the kernel/user
164 * divide and look after the messy bits.
1da177e4
LT
165 */
166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
43db362d 178int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 179{
230b1839 180 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5 205
43db362d 206static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 207 void __user *uaddr, int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
68c6beb3 212 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
213 err = get_user(len, ulen);
214 if (err)
1da177e4 215 return err;
89bddce5
SH
216 if (len > klen)
217 len = klen;
68c6beb3 218 if (len < 0)
1da177e4 219 return -EINVAL;
89bddce5 220 if (len) {
d6fe3945
SG
221 if (audit_sockaddr(klen, kaddr))
222 return -ENOMEM;
89bddce5 223 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
224 return -EFAULT;
225 }
226 /*
89bddce5
SH
227 * "fromlen shall refer to the value before truncation.."
228 * 1003.1g
1da177e4
LT
229 */
230 return __put_user(klen, ulen);
231}
232
08009a76 233static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
333f7909
AV
242 init_waitqueue_head(&ei->socket.wq.wait);
243 ei->socket.wq.fasync_list = NULL;
244 ei->socket.wq.flags = 0;
89bddce5 245
1da177e4
LT
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
6d7855c5 255static void sock_free_inode(struct inode *inode)
1da177e4 256{
43815482
ED
257 struct socket_alloc *ei;
258
259 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 260 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
261}
262
51cc5068 263static void init_once(void *foo)
1da177e4 264{
89bddce5 265 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 266
a35afb83 267 inode_init_once(&ei->vfs_inode);
1da177e4 268}
89bddce5 269
1e911632 270static void init_inodecache(void)
1da177e4
LT
271{
272 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
273 sizeof(struct socket_alloc),
274 0,
275 (SLAB_HWCACHE_ALIGN |
276 SLAB_RECLAIM_ACCOUNT |
5d097056 277 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 278 init_once);
1e911632 279 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
280}
281
b87221de 282static const struct super_operations sockfs_ops = {
c6d409cf 283 .alloc_inode = sock_alloc_inode,
6d7855c5 284 .free_inode = sock_free_inode,
c6d409cf 285 .statfs = simple_statfs,
1da177e4
LT
286};
287
c23fbb6b
ED
288/*
289 * sockfs_dname() is called from d_path().
290 */
291static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
292{
293 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 294 d_inode(dentry)->i_ino);
c23fbb6b
ED
295}
296
3ba13d17 297static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 298 .d_dname = sockfs_dname,
1da177e4
LT
299};
300
bba0bd31
AG
301static int sockfs_xattr_get(const struct xattr_handler *handler,
302 struct dentry *dentry, struct inode *inode,
303 const char *suffix, void *value, size_t size)
304{
305 if (value) {
306 if (dentry->d_name.len + 1 > size)
307 return -ERANGE;
308 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
309 }
310 return dentry->d_name.len + 1;
311}
312
313#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
314#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
315#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
316
317static const struct xattr_handler sockfs_xattr_handler = {
318 .name = XATTR_NAME_SOCKPROTONAME,
319 .get = sockfs_xattr_get,
320};
321
4a590153
AG
322static int sockfs_security_xattr_set(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, const void *value,
325 size_t size, int flags)
326{
327 /* Handled by LSM. */
328 return -EAGAIN;
329}
330
331static const struct xattr_handler sockfs_security_xattr_handler = {
332 .prefix = XATTR_SECURITY_PREFIX,
333 .set = sockfs_security_xattr_set,
334};
335
bba0bd31
AG
336static const struct xattr_handler *sockfs_xattr_handlers[] = {
337 &sockfs_xattr_handler,
4a590153 338 &sockfs_security_xattr_handler,
bba0bd31
AG
339 NULL
340};
341
fba9be49 342static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 343{
fba9be49
DH
344 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
345 if (!ctx)
346 return -ENOMEM;
347 ctx->ops = &sockfs_ops;
348 ctx->dops = &sockfs_dentry_operations;
349 ctx->xattr = sockfs_xattr_handlers;
350 return 0;
c74a1cbb
AV
351}
352
353static struct vfsmount *sock_mnt __read_mostly;
354
355static struct file_system_type sock_fs_type = {
356 .name = "sockfs",
fba9be49 357 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
358 .kill_sb = kill_anon_super,
359};
360
1da177e4
LT
361/*
362 * Obtains the first available file descriptor and sets it up for use.
363 *
39d8c1b6
DM
364 * These functions create file structures and maps them to fd space
365 * of the current process. On success it returns file descriptor
1da177e4
LT
366 * and file struct implicitly stored in sock->file.
367 * Note that another thread may close file descriptor before we return
368 * from this function. We use the fact that now we do not refer
369 * to socket after mapping. If one day we will need it, this
370 * function will increment ref. count on file by 1.
371 *
372 * In any case returned fd MAY BE not valid!
373 * This race condition is unavoidable
374 * with shared fd spaces, we cannot solve it inside kernel,
375 * but we take care of internal coherence yet.
376 */
377
8a3c245c
PT
378/**
379 * sock_alloc_file - Bind a &socket to a &file
380 * @sock: socket
381 * @flags: file status flags
382 * @dname: protocol name
383 *
384 * Returns the &file bound with @sock, implicitly storing it
385 * in sock->file. If dname is %NULL, sets to "".
386 * On failure the return is a ERR pointer (see linux/err.h).
387 * This function uses GFP_KERNEL internally.
388 */
389
aab174f0 390struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 391{
7cbe66b6 392 struct file *file;
1da177e4 393
d93aa9d8
AV
394 if (!dname)
395 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 396
d93aa9d8
AV
397 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
398 O_RDWR | (flags & O_NONBLOCK),
399 &socket_file_ops);
b5ffe634 400 if (IS_ERR(file)) {
8e1611e2 401 sock_release(sock);
39b65252 402 return file;
cc3808f8
AV
403 }
404
405 sock->file = file;
39d8c1b6 406 file->private_data = sock;
d8e464ec 407 stream_open(SOCK_INODE(sock), file);
28407630 408 return file;
39d8c1b6 409}
56b31d1c 410EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 411
56b31d1c 412static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
413{
414 struct file *newfile;
28407630 415 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
416 if (unlikely(fd < 0)) {
417 sock_release(sock);
28407630 418 return fd;
ce4bb04c 419 }
39d8c1b6 420
aab174f0 421 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 422 if (!IS_ERR(newfile)) {
39d8c1b6 423 fd_install(fd, newfile);
28407630
AV
424 return fd;
425 }
7cbe66b6 426
28407630
AV
427 put_unused_fd(fd);
428 return PTR_ERR(newfile);
1da177e4
LT
429}
430
8a3c245c
PT
431/**
432 * sock_from_file - Return the &socket bounded to @file.
433 * @file: file
434 * @err: pointer to an error code return
435 *
436 * On failure returns %NULL and assigns -ENOTSOCK to @err.
437 */
438
406a3c63 439struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 440{
6cb153ca
BL
441 if (file->f_op == &socket_file_ops)
442 return file->private_data; /* set in sock_map_fd */
443
23bb80d2
ED
444 *err = -ENOTSOCK;
445 return NULL;
6cb153ca 446}
406a3c63 447EXPORT_SYMBOL(sock_from_file);
6cb153ca 448
1da177e4 449/**
c6d409cf 450 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
451 * @fd: file handle
452 * @err: pointer to an error code return
453 *
454 * The file handle passed in is locked and the socket it is bound
241c4667 455 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
456 * with a negative errno code and NULL is returned. The function checks
457 * for both invalid handles and passing a handle which is not a socket.
458 *
459 * On a success the socket object pointer is returned.
460 */
461
462struct socket *sockfd_lookup(int fd, int *err)
463{
464 struct file *file;
1da177e4
LT
465 struct socket *sock;
466
89bddce5
SH
467 file = fget(fd);
468 if (!file) {
1da177e4
LT
469 *err = -EBADF;
470 return NULL;
471 }
89bddce5 472
6cb153ca
BL
473 sock = sock_from_file(file, err);
474 if (!sock)
1da177e4 475 fput(file);
6cb153ca
BL
476 return sock;
477}
c6d409cf 478EXPORT_SYMBOL(sockfd_lookup);
1da177e4 479
6cb153ca
BL
480static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
481{
00e188ef 482 struct fd f = fdget(fd);
6cb153ca
BL
483 struct socket *sock;
484
3672558c 485 *err = -EBADF;
00e188ef
AV
486 if (f.file) {
487 sock = sock_from_file(f.file, err);
488 if (likely(sock)) {
489 *fput_needed = f.flags;
6cb153ca 490 return sock;
00e188ef
AV
491 }
492 fdput(f);
1da177e4 493 }
6cb153ca 494 return NULL;
1da177e4
LT
495}
496
600e1779
MY
497static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
498 size_t size)
499{
500 ssize_t len;
501 ssize_t used = 0;
502
c5ef6035 503 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
504 if (len < 0)
505 return len;
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 buffer += len;
511 }
512
513 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
519 buffer += len;
520 }
521
522 return used;
523}
524
dc647ec8 525static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
526{
527 int err = simple_setattr(dentry, iattr);
528
e1a3a60a 529 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
530 struct socket *sock = SOCKET_I(d_inode(dentry));
531
6d8c50dc
CW
532 if (sock->sk)
533 sock->sk->sk_uid = iattr->ia_uid;
534 else
535 err = -ENOENT;
86741ec2
LC
536 }
537
538 return err;
539}
540
600e1779 541static const struct inode_operations sockfs_inode_ops = {
600e1779 542 .listxattr = sockfs_listxattr,
86741ec2 543 .setattr = sockfs_setattr,
600e1779
MY
544};
545
1da177e4 546/**
8a3c245c 547 * sock_alloc - allocate a socket
89bddce5 548 *
1da177e4
LT
549 * Allocate a new inode and socket object. The two are bound together
550 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 551 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
552 */
553
f4a00aac 554struct socket *sock_alloc(void)
1da177e4 555{
89bddce5
SH
556 struct inode *inode;
557 struct socket *sock;
1da177e4 558
a209dfc7 559 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
560 if (!inode)
561 return NULL;
562
563 sock = SOCKET_I(inode);
564
85fe4025 565 inode->i_ino = get_next_ino();
89bddce5 566 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
567 inode->i_uid = current_fsuid();
568 inode->i_gid = current_fsgid();
600e1779 569 inode->i_op = &sockfs_inode_ops;
1da177e4 570
1da177e4
LT
571 return sock;
572}
f4a00aac 573EXPORT_SYMBOL(sock_alloc);
1da177e4 574
1da177e4 575/**
8a3c245c 576 * sock_release - close a socket
1da177e4
LT
577 * @sock: socket to close
578 *
579 * The socket is released from the protocol stack if it has a release
580 * callback, and the inode is then released if the socket is bound to
89bddce5 581 * an inode not a file.
1da177e4 582 */
89bddce5 583
6d8c50dc 584static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
585{
586 if (sock->ops) {
587 struct module *owner = sock->ops->owner;
588
6d8c50dc
CW
589 if (inode)
590 inode_lock(inode);
1da177e4 591 sock->ops->release(sock);
ff7b11aa 592 sock->sk = NULL;
6d8c50dc
CW
593 if (inode)
594 inode_unlock(inode);
1da177e4
LT
595 sock->ops = NULL;
596 module_put(owner);
597 }
598
333f7909 599 if (sock->wq.fasync_list)
3410f22e 600 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 601
1da177e4
LT
602 if (!sock->file) {
603 iput(SOCK_INODE(sock));
604 return;
605 }
89bddce5 606 sock->file = NULL;
1da177e4 607}
6d8c50dc
CW
608
609void sock_release(struct socket *sock)
610{
611 __sock_release(sock, NULL);
612}
c6d409cf 613EXPORT_SYMBOL(sock_release);
1da177e4 614
c14ac945 615void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 616{
140c55d4
ED
617 u8 flags = *tx_flags;
618
c14ac945 619 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
620 flags |= SKBTX_HW_TSTAMP;
621
c14ac945 622 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
623 flags |= SKBTX_SW_TSTAMP;
624
c14ac945 625 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
626 flags |= SKBTX_SCHED_TSTAMP;
627
140c55d4 628 *tx_flags = flags;
20d49473 629}
67cc0d40 630EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 631
8c3c447b
PA
632INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
633 size_t));
a648a592
PA
634INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
635 size_t));
d8725c86 636static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 637{
a648a592
PA
638 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
639 inet_sendmsg, sock, msg,
640 msg_data_left(msg));
d8725c86
AV
641 BUG_ON(ret == -EIOCBQUEUED);
642 return ret;
1da177e4
LT
643}
644
85806af0
RD
645/**
646 * sock_sendmsg - send a message through @sock
647 * @sock: socket
648 * @msg: message to send
649 *
650 * Sends @msg through @sock, passing through LSM.
651 * Returns the number of bytes sent, or an error code.
652 */
d8725c86 653int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 654{
d8725c86 655 int err = security_socket_sendmsg(sock, msg,
01e97e65 656 msg_data_left(msg));
228e548e 657
d8725c86 658 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 659}
c6d409cf 660EXPORT_SYMBOL(sock_sendmsg);
1da177e4 661
8a3c245c
PT
662/**
663 * kernel_sendmsg - send a message through @sock (kernel-space)
664 * @sock: socket
665 * @msg: message header
666 * @vec: kernel vec
667 * @num: vec array length
668 * @size: total message data size
669 *
670 * Builds the message data with @vec and sends it through @sock.
671 * Returns the number of bytes sent, or an error code.
672 */
673
1da177e4
LT
674int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
675 struct kvec *vec, size_t num, size_t size)
676{
aa563d7b 677 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 678 return sock_sendmsg(sock, msg);
1da177e4 679}
c6d409cf 680EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 681
8a3c245c
PT
682/**
683 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
684 * @sk: sock
685 * @msg: message header
686 * @vec: output s/g array
687 * @num: output s/g array length
688 * @size: total message data size
689 *
690 * Builds the message data with @vec and sends it through @sock.
691 * Returns the number of bytes sent, or an error code.
692 * Caller must hold @sk.
693 */
694
306b13eb
TH
695int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
696 struct kvec *vec, size_t num, size_t size)
697{
698 struct socket *sock = sk->sk_socket;
699
700 if (!sock->ops->sendmsg_locked)
db5980d8 701 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 702
aa563d7b 703 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
704
705 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
706}
707EXPORT_SYMBOL(kernel_sendmsg_locked);
708
8605330a
SHY
709static bool skb_is_err_queue(const struct sk_buff *skb)
710{
711 /* pkt_type of skbs enqueued on the error queue are set to
712 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
713 * in recvmsg, since skbs received on a local socket will never
714 * have a pkt_type of PACKET_OUTGOING.
715 */
716 return skb->pkt_type == PACKET_OUTGOING;
717}
718
b50a5c70
ML
719/* On transmit, software and hardware timestamps are returned independently.
720 * As the two skb clones share the hardware timestamp, which may be updated
721 * before the software timestamp is received, a hardware TX timestamp may be
722 * returned only if there is no software TX timestamp. Ignore false software
723 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 724 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
725 * hardware timestamp.
726 */
727static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
728{
729 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
730}
731
aad9c8c4
ML
732static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
733{
734 struct scm_ts_pktinfo ts_pktinfo;
735 struct net_device *orig_dev;
736
737 if (!skb_mac_header_was_set(skb))
738 return;
739
740 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
741
742 rcu_read_lock();
743 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
744 if (orig_dev)
745 ts_pktinfo.if_index = orig_dev->ifindex;
746 rcu_read_unlock();
747
748 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
749 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
750 sizeof(ts_pktinfo), &ts_pktinfo);
751}
752
92f37fd2
ED
753/*
754 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
755 */
756void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
757 struct sk_buff *skb)
758{
20d49473 759 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 760 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
761 struct scm_timestamping_internal tss;
762
b50a5c70 763 int empty = 1, false_tstamp = 0;
20d49473
PO
764 struct skb_shared_hwtstamps *shhwtstamps =
765 skb_hwtstamps(skb);
766
767 /* Race occurred between timestamp enabling and packet
768 receiving. Fill in the current time for now. */
b50a5c70 769 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 770 __net_timestamp(skb);
b50a5c70
ML
771 false_tstamp = 1;
772 }
20d49473
PO
773
774 if (need_software_tstamp) {
775 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
776 if (new_tstamp) {
777 struct __kernel_sock_timeval tv;
778
779 skb_get_new_timestamp(skb, &tv);
780 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
781 sizeof(tv), &tv);
782 } else {
783 struct __kernel_old_timeval tv;
784
785 skb_get_timestamp(skb, &tv);
786 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
787 sizeof(tv), &tv);
788 }
20d49473 789 } else {
887feae3
DD
790 if (new_tstamp) {
791 struct __kernel_timespec ts;
792
793 skb_get_new_timestampns(skb, &ts);
794 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
795 sizeof(ts), &ts);
796 } else {
797 struct timespec ts;
798
799 skb_get_timestampns(skb, &ts);
800 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
801 sizeof(ts), &ts);
802 }
20d49473
PO
803 }
804 }
805
f24b9be5 806 memset(&tss, 0, sizeof(tss));
c199105d 807 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 808 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 809 empty = 0;
4d276eb6 810 if (shhwtstamps &&
b9f40e21 811 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 812 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 813 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 814 empty = 0;
aad9c8c4
ML
815 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
816 !skb_is_err_queue(skb))
817 put_ts_pktinfo(msg, skb);
818 }
1c885808 819 if (!empty) {
9718475e
DD
820 if (sock_flag(sk, SOCK_TSTAMP_NEW))
821 put_cmsg_scm_timestamping64(msg, &tss);
822 else
823 put_cmsg_scm_timestamping(msg, &tss);
1c885808 824
8605330a 825 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 826 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
827 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
828 skb->len, skb->data);
829 }
92f37fd2 830}
7c81fd8b
ACM
831EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
832
6e3e939f
JB
833void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
834 struct sk_buff *skb)
835{
836 int ack;
837
838 if (!sock_flag(sk, SOCK_WIFI_STATUS))
839 return;
840 if (!skb->wifi_acked_valid)
841 return;
842
843 ack = skb->wifi_acked;
844
845 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
846}
847EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
848
11165f14 849static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
850 struct sk_buff *skb)
3b885787 851{
744d5a3e 852 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 853 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 854 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
855}
856
767dd033 857void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
858 struct sk_buff *skb)
859{
860 sock_recv_timestamp(msg, sk, skb);
861 sock_recv_drops(msg, sk, skb);
862}
767dd033 863EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 864
8c3c447b 865INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
866 size_t, int));
867INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
868 size_t, int));
1b784140 869static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 870 int flags)
1da177e4 871{
a648a592
PA
872 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
873 inet_recvmsg, sock, msg, msg_data_left(msg),
874 flags);
1da177e4
LT
875}
876
85806af0
RD
877/**
878 * sock_recvmsg - receive a message from @sock
879 * @sock: socket
880 * @msg: message to receive
881 * @flags: message flags
882 *
883 * Receives @msg from @sock, passing through LSM. Returns the total number
884 * of bytes received, or an error.
885 */
2da62906 886int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 887{
2da62906 888 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 889
2da62906 890 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 891}
c6d409cf 892EXPORT_SYMBOL(sock_recvmsg);
1da177e4 893
c1249c0a 894/**
8a3c245c
PT
895 * kernel_recvmsg - Receive a message from a socket (kernel space)
896 * @sock: The socket to receive the message from
897 * @msg: Received message
898 * @vec: Input s/g array for message data
899 * @num: Size of input s/g array
900 * @size: Number of bytes to read
901 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 902 *
8a3c245c
PT
903 * On return the msg structure contains the scatter/gather array passed in the
904 * vec argument. The array is modified so that it consists of the unfilled
905 * portion of the original array.
c1249c0a 906 *
8a3c245c 907 * The returned value is the total number of bytes received, or an error.
c1249c0a 908 */
8a3c245c 909
89bddce5
SH
910int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
911 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
912{
913 mm_segment_t oldfs = get_fs();
914 int result;
915
aa563d7b 916 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 917 set_fs(KERNEL_DS);
2da62906 918 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
919 set_fs(oldfs);
920 return result;
921}
c6d409cf 922EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 923
ce1d4d3e
CH
924static ssize_t sock_sendpage(struct file *file, struct page *page,
925 int offset, size_t size, loff_t *ppos, int more)
1da177e4 926{
1da177e4
LT
927 struct socket *sock;
928 int flags;
929
ce1d4d3e
CH
930 sock = file->private_data;
931
35f9c09f
ED
932 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
933 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
934 flags |= more;
ce1d4d3e 935
e6949583 936 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 937}
1da177e4 938
9c55e01c 939static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 940 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
941 unsigned int flags)
942{
943 struct socket *sock = file->private_data;
944
997b37da 945 if (unlikely(!sock->ops->splice_read))
95506588 946 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 947
9c55e01c
JA
948 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
949}
950
8ae5e030 951static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 952{
6d652330
AV
953 struct file *file = iocb->ki_filp;
954 struct socket *sock = file->private_data;
0345f931 955 struct msghdr msg = {.msg_iter = *to,
956 .msg_iocb = iocb};
8ae5e030 957 ssize_t res;
ce1d4d3e 958
8ae5e030
AV
959 if (file->f_flags & O_NONBLOCK)
960 msg.msg_flags = MSG_DONTWAIT;
961
962 if (iocb->ki_pos != 0)
1da177e4 963 return -ESPIPE;
027445c3 964
66ee59af 965 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
966 return 0;
967
2da62906 968 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
969 *to = msg.msg_iter;
970 return res;
1da177e4
LT
971}
972
8ae5e030 973static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 974{
6d652330
AV
975 struct file *file = iocb->ki_filp;
976 struct socket *sock = file->private_data;
0345f931 977 struct msghdr msg = {.msg_iter = *from,
978 .msg_iocb = iocb};
8ae5e030 979 ssize_t res;
1da177e4 980
8ae5e030 981 if (iocb->ki_pos != 0)
ce1d4d3e 982 return -ESPIPE;
027445c3 983
8ae5e030
AV
984 if (file->f_flags & O_NONBLOCK)
985 msg.msg_flags = MSG_DONTWAIT;
986
6d652330
AV
987 if (sock->type == SOCK_SEQPACKET)
988 msg.msg_flags |= MSG_EOR;
989
d8725c86 990 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
991 *from = msg.msg_iter;
992 return res;
1da177e4
LT
993}
994
1da177e4
LT
995/*
996 * Atomic setting of ioctl hooks to avoid race
997 * with module unload.
998 */
999
4a3e2f71 1000static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1001static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1002
881d966b 1003void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1004{
4a3e2f71 1005 mutex_lock(&br_ioctl_mutex);
1da177e4 1006 br_ioctl_hook = hook;
4a3e2f71 1007 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1008}
1009EXPORT_SYMBOL(brioctl_set);
1010
4a3e2f71 1011static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1012static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1013
881d966b 1014void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1015{
4a3e2f71 1016 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1017 vlan_ioctl_hook = hook;
4a3e2f71 1018 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1019}
1020EXPORT_SYMBOL(vlan_ioctl_set);
1021
4a3e2f71 1022static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1023static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1024
89bddce5 1025void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1026{
4a3e2f71 1027 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1028 dlci_ioctl_hook = hook;
4a3e2f71 1029 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1030}
1031EXPORT_SYMBOL(dlci_ioctl_set);
1032
6b96018b 1033static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1034 unsigned int cmd, unsigned long arg)
6b96018b
AB
1035{
1036 int err;
1037 void __user *argp = (void __user *)arg;
1038
1039 err = sock->ops->ioctl(sock, cmd, arg);
1040
1041 /*
1042 * If this ioctl is unknown try to hand it down
1043 * to the NIC driver.
1044 */
36fd633e
AV
1045 if (err != -ENOIOCTLCMD)
1046 return err;
6b96018b 1047
36fd633e
AV
1048 if (cmd == SIOCGIFCONF) {
1049 struct ifconf ifc;
1050 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1051 return -EFAULT;
1052 rtnl_lock();
1053 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1054 rtnl_unlock();
1055 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1056 err = -EFAULT;
44c02a2c
AV
1057 } else {
1058 struct ifreq ifr;
1059 bool need_copyout;
63ff03ab 1060 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1061 return -EFAULT;
1062 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1063 if (!err && need_copyout)
63ff03ab 1064 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1065 return -EFAULT;
36fd633e 1066 }
6b96018b
AB
1067 return err;
1068}
1069
1da177e4
LT
1070/*
1071 * With an ioctl, arg may well be a user mode pointer, but we don't know
1072 * what to do with it - that's up to the protocol still.
1073 */
1074
8a3c245c
PT
1075/**
1076 * get_net_ns - increment the refcount of the network namespace
1077 * @ns: common namespace (net)
1078 *
1079 * Returns the net's common namespace.
1080 */
1081
d8d211a2 1082struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1083{
1084 return &get_net(container_of(ns, struct net, ns))->ns;
1085}
d8d211a2 1086EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1087
1da177e4
LT
1088static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1089{
1090 struct socket *sock;
881d966b 1091 struct sock *sk;
1da177e4
LT
1092 void __user *argp = (void __user *)arg;
1093 int pid, err;
881d966b 1094 struct net *net;
1da177e4 1095
b69aee04 1096 sock = file->private_data;
881d966b 1097 sk = sock->sk;
3b1e0a65 1098 net = sock_net(sk);
44c02a2c
AV
1099 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1100 struct ifreq ifr;
1101 bool need_copyout;
1102 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1103 return -EFAULT;
1104 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1105 if (!err && need_copyout)
1106 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1107 return -EFAULT;
1da177e4 1108 } else
3d23e349 1109#ifdef CONFIG_WEXT_CORE
1da177e4 1110 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1111 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1112 } else
3d23e349 1113#endif
89bddce5 1114 switch (cmd) {
1da177e4
LT
1115 case FIOSETOWN:
1116 case SIOCSPGRP:
1117 err = -EFAULT;
1118 if (get_user(pid, (int __user *)argp))
1119 break;
393cc3f5 1120 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1121 break;
1122 case FIOGETOWN:
1123 case SIOCGPGRP:
609d7fa9 1124 err = put_user(f_getown(sock->file),
89bddce5 1125 (int __user *)argp);
1da177e4
LT
1126 break;
1127 case SIOCGIFBR:
1128 case SIOCSIFBR:
1129 case SIOCBRADDBR:
1130 case SIOCBRDELBR:
1131 err = -ENOPKG;
1132 if (!br_ioctl_hook)
1133 request_module("bridge");
1134
4a3e2f71 1135 mutex_lock(&br_ioctl_mutex);
89bddce5 1136 if (br_ioctl_hook)
881d966b 1137 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1138 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1139 break;
1140 case SIOCGIFVLAN:
1141 case SIOCSIFVLAN:
1142 err = -ENOPKG;
1143 if (!vlan_ioctl_hook)
1144 request_module("8021q");
1145
4a3e2f71 1146 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1147 if (vlan_ioctl_hook)
881d966b 1148 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1149 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1150 break;
1da177e4
LT
1151 case SIOCADDDLCI:
1152 case SIOCDELDLCI:
1153 err = -ENOPKG;
1154 if (!dlci_ioctl_hook)
1155 request_module("dlci");
1156
7512cbf6
PE
1157 mutex_lock(&dlci_ioctl_mutex);
1158 if (dlci_ioctl_hook)
1da177e4 1159 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1160 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1161 break;
c62cce2c
AV
1162 case SIOCGSKNS:
1163 err = -EPERM;
1164 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1165 break;
1166
1167 err = open_related_ns(&net->ns, get_net_ns);
1168 break;
0768e170
AB
1169 case SIOCGSTAMP_OLD:
1170 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1171 if (!sock->ops->gettstamp) {
1172 err = -ENOIOCTLCMD;
1173 break;
1174 }
1175 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1176 cmd == SIOCGSTAMP_OLD,
1177 !IS_ENABLED(CONFIG_64BIT));
60747828 1178 break;
0768e170
AB
1179 case SIOCGSTAMP_NEW:
1180 case SIOCGSTAMPNS_NEW:
1181 if (!sock->ops->gettstamp) {
1182 err = -ENOIOCTLCMD;
1183 break;
1184 }
1185 err = sock->ops->gettstamp(sock, argp,
1186 cmd == SIOCGSTAMP_NEW,
1187 false);
c7cbdbf2 1188 break;
1da177e4 1189 default:
63ff03ab 1190 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1191 break;
89bddce5 1192 }
1da177e4
LT
1193 return err;
1194}
1195
8a3c245c
PT
1196/**
1197 * sock_create_lite - creates a socket
1198 * @family: protocol family (AF_INET, ...)
1199 * @type: communication type (SOCK_STREAM, ...)
1200 * @protocol: protocol (0, ...)
1201 * @res: new socket
1202 *
1203 * Creates a new socket and assigns it to @res, passing through LSM.
1204 * The new socket initialization is not complete, see kernel_accept().
1205 * Returns 0 or an error. On failure @res is set to %NULL.
1206 * This function internally uses GFP_KERNEL.
1207 */
1208
1da177e4
LT
1209int sock_create_lite(int family, int type, int protocol, struct socket **res)
1210{
1211 int err;
1212 struct socket *sock = NULL;
89bddce5 1213
1da177e4
LT
1214 err = security_socket_create(family, type, protocol, 1);
1215 if (err)
1216 goto out;
1217
1218 sock = sock_alloc();
1219 if (!sock) {
1220 err = -ENOMEM;
1221 goto out;
1222 }
1223
1da177e4 1224 sock->type = type;
7420ed23
VY
1225 err = security_socket_post_create(sock, family, type, protocol, 1);
1226 if (err)
1227 goto out_release;
1228
1da177e4
LT
1229out:
1230 *res = sock;
1231 return err;
7420ed23
VY
1232out_release:
1233 sock_release(sock);
1234 sock = NULL;
1235 goto out;
1da177e4 1236}
c6d409cf 1237EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1238
1239/* No kernel lock held - perfect */
ade994f4 1240static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1241{
3cafb376 1242 struct socket *sock = file->private_data;
a331de3b 1243 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1244
e88958e6
CH
1245 if (!sock->ops->poll)
1246 return 0;
f641f13b 1247
a331de3b
CH
1248 if (sk_can_busy_loop(sock->sk)) {
1249 /* poll once if requested by the syscall */
1250 if (events & POLL_BUSY_LOOP)
1251 sk_busy_loop(sock->sk, 1);
1252
1253 /* if this socket can poll_ll, tell the system call */
1254 flag = POLL_BUSY_LOOP;
1255 }
1256
1257 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1258}
1259
89bddce5 1260static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1261{
b69aee04 1262 struct socket *sock = file->private_data;
1da177e4
LT
1263
1264 return sock->ops->mmap(file, sock, vma);
1265}
1266
20380731 1267static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1268{
6d8c50dc 1269 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1270 return 0;
1271}
1272
1273/*
1274 * Update the socket async list
1275 *
1276 * Fasync_list locking strategy.
1277 *
1278 * 1. fasync_list is modified only under process context socket lock
1279 * i.e. under semaphore.
1280 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1281 * or under socket lock
1da177e4
LT
1282 */
1283
1284static int sock_fasync(int fd, struct file *filp, int on)
1285{
989a2979
ED
1286 struct socket *sock = filp->private_data;
1287 struct sock *sk = sock->sk;
333f7909 1288 struct socket_wq *wq = &sock->wq;
1da177e4 1289
989a2979 1290 if (sk == NULL)
1da177e4 1291 return -EINVAL;
1da177e4
LT
1292
1293 lock_sock(sk);
eaefd110 1294 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1295
eaefd110 1296 if (!wq->fasync_list)
989a2979
ED
1297 sock_reset_flag(sk, SOCK_FASYNC);
1298 else
bcdce719 1299 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1300
989a2979 1301 release_sock(sk);
1da177e4
LT
1302 return 0;
1303}
1304
ceb5d58b 1305/* This function may be called only under rcu_lock */
1da177e4 1306
ceb5d58b 1307int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1308{
ceb5d58b 1309 if (!wq || !wq->fasync_list)
1da177e4 1310 return -1;
ceb5d58b 1311
89bddce5 1312 switch (how) {
8d8ad9d7 1313 case SOCK_WAKE_WAITD:
ceb5d58b 1314 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1315 break;
1316 goto call_kill;
8d8ad9d7 1317 case SOCK_WAKE_SPACE:
ceb5d58b 1318 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1319 break;
1320 /* fall through */
8d8ad9d7 1321 case SOCK_WAKE_IO:
89bddce5 1322call_kill:
43815482 1323 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1324 break;
8d8ad9d7 1325 case SOCK_WAKE_URG:
43815482 1326 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1327 }
ceb5d58b 1328
1da177e4
LT
1329 return 0;
1330}
c6d409cf 1331EXPORT_SYMBOL(sock_wake_async);
1da177e4 1332
8a3c245c
PT
1333/**
1334 * __sock_create - creates a socket
1335 * @net: net namespace
1336 * @family: protocol family (AF_INET, ...)
1337 * @type: communication type (SOCK_STREAM, ...)
1338 * @protocol: protocol (0, ...)
1339 * @res: new socket
1340 * @kern: boolean for kernel space sockets
1341 *
1342 * Creates a new socket and assigns it to @res, passing through LSM.
1343 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1344 * be set to true if the socket resides in kernel space.
1345 * This function internally uses GFP_KERNEL.
1346 */
1347
721db93a 1348int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1349 struct socket **res, int kern)
1da177e4
LT
1350{
1351 int err;
1352 struct socket *sock;
55737fda 1353 const struct net_proto_family *pf;
1da177e4
LT
1354
1355 /*
89bddce5 1356 * Check protocol is in range
1da177e4
LT
1357 */
1358 if (family < 0 || family >= NPROTO)
1359 return -EAFNOSUPPORT;
1360 if (type < 0 || type >= SOCK_MAX)
1361 return -EINVAL;
1362
1363 /* Compatibility.
1364
1365 This uglymoron is moved from INET layer to here to avoid
1366 deadlock in module load.
1367 */
1368 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1369 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1370 current->comm);
1da177e4
LT
1371 family = PF_PACKET;
1372 }
1373
1374 err = security_socket_create(family, type, protocol, kern);
1375 if (err)
1376 return err;
89bddce5 1377
55737fda
SH
1378 /*
1379 * Allocate the socket and allow the family to set things up. if
1380 * the protocol is 0, the family is instructed to select an appropriate
1381 * default.
1382 */
1383 sock = sock_alloc();
1384 if (!sock) {
e87cc472 1385 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1386 return -ENFILE; /* Not exactly a match, but its the
1387 closest posix thing */
1388 }
1389
1390 sock->type = type;
1391
95a5afca 1392#ifdef CONFIG_MODULES
89bddce5
SH
1393 /* Attempt to load a protocol module if the find failed.
1394 *
1395 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1396 * requested real, full-featured networking support upon configuration.
1397 * Otherwise module support will break!
1398 */
190683a9 1399 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1400 request_module("net-pf-%d", family);
1da177e4
LT
1401#endif
1402
55737fda
SH
1403 rcu_read_lock();
1404 pf = rcu_dereference(net_families[family]);
1405 err = -EAFNOSUPPORT;
1406 if (!pf)
1407 goto out_release;
1da177e4
LT
1408
1409 /*
1410 * We will call the ->create function, that possibly is in a loadable
1411 * module, so we have to bump that loadable module refcnt first.
1412 */
55737fda 1413 if (!try_module_get(pf->owner))
1da177e4
LT
1414 goto out_release;
1415
55737fda
SH
1416 /* Now protected by module ref count */
1417 rcu_read_unlock();
1418
3f378b68 1419 err = pf->create(net, sock, protocol, kern);
55737fda 1420 if (err < 0)
1da177e4 1421 goto out_module_put;
a79af59e 1422
1da177e4
LT
1423 /*
1424 * Now to bump the refcnt of the [loadable] module that owns this
1425 * socket at sock_release time we decrement its refcnt.
1426 */
55737fda
SH
1427 if (!try_module_get(sock->ops->owner))
1428 goto out_module_busy;
1429
1da177e4
LT
1430 /*
1431 * Now that we're done with the ->create function, the [loadable]
1432 * module can have its refcnt decremented
1433 */
55737fda 1434 module_put(pf->owner);
7420ed23
VY
1435 err = security_socket_post_create(sock, family, type, protocol, kern);
1436 if (err)
3b185525 1437 goto out_sock_release;
55737fda 1438 *res = sock;
1da177e4 1439
55737fda
SH
1440 return 0;
1441
1442out_module_busy:
1443 err = -EAFNOSUPPORT;
1da177e4 1444out_module_put:
55737fda
SH
1445 sock->ops = NULL;
1446 module_put(pf->owner);
1447out_sock_release:
1da177e4 1448 sock_release(sock);
55737fda
SH
1449 return err;
1450
1451out_release:
1452 rcu_read_unlock();
1453 goto out_sock_release;
1da177e4 1454}
721db93a 1455EXPORT_SYMBOL(__sock_create);
1da177e4 1456
8a3c245c
PT
1457/**
1458 * sock_create - creates a socket
1459 * @family: protocol family (AF_INET, ...)
1460 * @type: communication type (SOCK_STREAM, ...)
1461 * @protocol: protocol (0, ...)
1462 * @res: new socket
1463 *
1464 * A wrapper around __sock_create().
1465 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1466 */
1467
1da177e4
LT
1468int sock_create(int family, int type, int protocol, struct socket **res)
1469{
1b8d7ae4 1470 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1471}
c6d409cf 1472EXPORT_SYMBOL(sock_create);
1da177e4 1473
8a3c245c
PT
1474/**
1475 * sock_create_kern - creates a socket (kernel space)
1476 * @net: net namespace
1477 * @family: protocol family (AF_INET, ...)
1478 * @type: communication type (SOCK_STREAM, ...)
1479 * @protocol: protocol (0, ...)
1480 * @res: new socket
1481 *
1482 * A wrapper around __sock_create().
1483 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1484 */
1485
eeb1bd5c 1486int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1487{
eeb1bd5c 1488 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1489}
c6d409cf 1490EXPORT_SYMBOL(sock_create_kern);
1da177e4 1491
9d6a15c3 1492int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1493{
1494 int retval;
1495 struct socket *sock;
a677a039
UD
1496 int flags;
1497
e38b36f3
UD
1498 /* Check the SOCK_* constants for consistency. */
1499 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1500 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1501 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1502 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1503
a677a039 1504 flags = type & ~SOCK_TYPE_MASK;
77d27200 1505 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1506 return -EINVAL;
1507 type &= SOCK_TYPE_MASK;
1da177e4 1508
aaca0bdc
UD
1509 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1510 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1511
1da177e4
LT
1512 retval = sock_create(family, type, protocol, &sock);
1513 if (retval < 0)
8e1611e2 1514 return retval;
1da177e4 1515
8e1611e2 1516 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1517}
1518
9d6a15c3
DB
1519SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1520{
1521 return __sys_socket(family, type, protocol);
1522}
1523
1da177e4
LT
1524/*
1525 * Create a pair of connected sockets.
1526 */
1527
6debc8d8 1528int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1529{
1530 struct socket *sock1, *sock2;
1531 int fd1, fd2, err;
db349509 1532 struct file *newfile1, *newfile2;
a677a039
UD
1533 int flags;
1534
1535 flags = type & ~SOCK_TYPE_MASK;
77d27200 1536 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1537 return -EINVAL;
1538 type &= SOCK_TYPE_MASK;
1da177e4 1539
aaca0bdc
UD
1540 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1541 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1542
016a266b
AV
1543 /*
1544 * reserve descriptors and make sure we won't fail
1545 * to return them to userland.
1546 */
1547 fd1 = get_unused_fd_flags(flags);
1548 if (unlikely(fd1 < 0))
1549 return fd1;
1550
1551 fd2 = get_unused_fd_flags(flags);
1552 if (unlikely(fd2 < 0)) {
1553 put_unused_fd(fd1);
1554 return fd2;
1555 }
1556
1557 err = put_user(fd1, &usockvec[0]);
1558 if (err)
1559 goto out;
1560
1561 err = put_user(fd2, &usockvec[1]);
1562 if (err)
1563 goto out;
1564
1da177e4
LT
1565 /*
1566 * Obtain the first socket and check if the underlying protocol
1567 * supports the socketpair call.
1568 */
1569
1570 err = sock_create(family, type, protocol, &sock1);
016a266b 1571 if (unlikely(err < 0))
1da177e4
LT
1572 goto out;
1573
1574 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1575 if (unlikely(err < 0)) {
1576 sock_release(sock1);
1577 goto out;
bf3c23d1 1578 }
d73aa286 1579
d47cd945
DH
1580 err = security_socket_socketpair(sock1, sock2);
1581 if (unlikely(err)) {
1582 sock_release(sock2);
1583 sock_release(sock1);
1584 goto out;
1585 }
1586
016a266b
AV
1587 err = sock1->ops->socketpair(sock1, sock2);
1588 if (unlikely(err < 0)) {
1589 sock_release(sock2);
1590 sock_release(sock1);
1591 goto out;
28407630
AV
1592 }
1593
aab174f0 1594 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1595 if (IS_ERR(newfile1)) {
28407630 1596 err = PTR_ERR(newfile1);
016a266b
AV
1597 sock_release(sock2);
1598 goto out;
28407630
AV
1599 }
1600
aab174f0 1601 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1602 if (IS_ERR(newfile2)) {
1603 err = PTR_ERR(newfile2);
016a266b
AV
1604 fput(newfile1);
1605 goto out;
db349509
AV
1606 }
1607
157cf649 1608 audit_fd_pair(fd1, fd2);
d73aa286 1609
db349509
AV
1610 fd_install(fd1, newfile1);
1611 fd_install(fd2, newfile2);
d73aa286 1612 return 0;
1da177e4 1613
016a266b 1614out:
d73aa286 1615 put_unused_fd(fd2);
d73aa286 1616 put_unused_fd(fd1);
1da177e4
LT
1617 return err;
1618}
1619
6debc8d8
DB
1620SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1621 int __user *, usockvec)
1622{
1623 return __sys_socketpair(family, type, protocol, usockvec);
1624}
1625
1da177e4
LT
1626/*
1627 * Bind a name to a socket. Nothing much to do here since it's
1628 * the protocol's responsibility to handle the local address.
1629 *
1630 * We move the socket address to kernel space before we call
1631 * the protocol layer (having also checked the address is ok).
1632 */
1633
a87d35d8 1634int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1635{
1636 struct socket *sock;
230b1839 1637 struct sockaddr_storage address;
6cb153ca 1638 int err, fput_needed;
1da177e4 1639
89bddce5 1640 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1641 if (sock) {
43db362d 1642 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1643 if (!err) {
89bddce5 1644 err = security_socket_bind(sock,
230b1839 1645 (struct sockaddr *)&address,
89bddce5 1646 addrlen);
6cb153ca
BL
1647 if (!err)
1648 err = sock->ops->bind(sock,
89bddce5 1649 (struct sockaddr *)
230b1839 1650 &address, addrlen);
1da177e4 1651 }
6cb153ca 1652 fput_light(sock->file, fput_needed);
89bddce5 1653 }
1da177e4
LT
1654 return err;
1655}
1656
a87d35d8
DB
1657SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1658{
1659 return __sys_bind(fd, umyaddr, addrlen);
1660}
1661
1da177e4
LT
1662/*
1663 * Perform a listen. Basically, we allow the protocol to do anything
1664 * necessary for a listen, and if that works, we mark the socket as
1665 * ready for listening.
1666 */
1667
25e290ee 1668int __sys_listen(int fd, int backlog)
1da177e4
LT
1669{
1670 struct socket *sock;
6cb153ca 1671 int err, fput_needed;
b8e1f9b5 1672 int somaxconn;
89bddce5
SH
1673
1674 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1675 if (sock) {
8efa6e93 1676 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1677 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1678 backlog = somaxconn;
1da177e4
LT
1679
1680 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1681 if (!err)
1682 err = sock->ops->listen(sock, backlog);
1da177e4 1683
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685 }
1686 return err;
1687}
1688
25e290ee
DB
1689SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1690{
1691 return __sys_listen(fd, backlog);
1692}
1693
de2ea4b6
JA
1694int __sys_accept4_file(struct file *file, unsigned file_flags,
1695 struct sockaddr __user *upeer_sockaddr,
1696 int __user *upeer_addrlen, int flags)
1da177e4
LT
1697{
1698 struct socket *sock, *newsock;
39d8c1b6 1699 struct file *newfile;
de2ea4b6 1700 int err, len, newfd;
230b1839 1701 struct sockaddr_storage address;
1da177e4 1702
77d27200 1703 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1704 return -EINVAL;
1705
1706 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1707 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1708
de2ea4b6 1709 sock = sock_from_file(file, &err);
1da177e4
LT
1710 if (!sock)
1711 goto out;
1712
1713 err = -ENFILE;
c6d409cf
ED
1714 newsock = sock_alloc();
1715 if (!newsock)
de2ea4b6 1716 goto out;
1da177e4
LT
1717
1718 newsock->type = sock->type;
1719 newsock->ops = sock->ops;
1720
1da177e4
LT
1721 /*
1722 * We don't need try_module_get here, as the listening socket (sock)
1723 * has the protocol module (sock->ops->owner) held.
1724 */
1725 __module_get(newsock->ops->owner);
1726
28407630 1727 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1728 if (unlikely(newfd < 0)) {
1729 err = newfd;
9a1875e6 1730 sock_release(newsock);
de2ea4b6 1731 goto out;
39d8c1b6 1732 }
aab174f0 1733 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1734 if (IS_ERR(newfile)) {
28407630
AV
1735 err = PTR_ERR(newfile);
1736 put_unused_fd(newfd);
de2ea4b6 1737 goto out;
28407630 1738 }
39d8c1b6 1739
a79af59e
FF
1740 err = security_socket_accept(sock, newsock);
1741 if (err)
39d8c1b6 1742 goto out_fd;
a79af59e 1743
de2ea4b6
JA
1744 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1745 false);
1da177e4 1746 if (err < 0)
39d8c1b6 1747 goto out_fd;
1da177e4
LT
1748
1749 if (upeer_sockaddr) {
9b2c45d4
DV
1750 len = newsock->ops->getname(newsock,
1751 (struct sockaddr *)&address, 2);
1752 if (len < 0) {
1da177e4 1753 err = -ECONNABORTED;
39d8c1b6 1754 goto out_fd;
1da177e4 1755 }
43db362d 1756 err = move_addr_to_user(&address,
230b1839 1757 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1758 if (err < 0)
39d8c1b6 1759 goto out_fd;
1da177e4
LT
1760 }
1761
1762 /* File flags are not inherited via accept() unlike another OSes. */
1763
39d8c1b6
DM
1764 fd_install(newfd, newfile);
1765 err = newfd;
1da177e4
LT
1766out:
1767 return err;
39d8c1b6 1768out_fd:
9606a216 1769 fput(newfile);
39d8c1b6 1770 put_unused_fd(newfd);
de2ea4b6
JA
1771 goto out;
1772
1773}
1774
1775/*
1776 * For accept, we attempt to create a new socket, set up the link
1777 * with the client, wake up the client, then return the new
1778 * connected fd. We collect the address of the connector in kernel
1779 * space and move it to user at the very end. This is unclean because
1780 * we open the socket then return an error.
1781 *
1782 * 1003.1g adds the ability to recvmsg() to query connection pending
1783 * status to recvmsg. We need to add that support in a way thats
1784 * clean when we restructure accept also.
1785 */
1786
1787int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1788 int __user *upeer_addrlen, int flags)
1789{
1790 int ret = -EBADF;
1791 struct fd f;
1792
1793 f = fdget(fd);
1794 if (f.file) {
1795 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1796 upeer_addrlen, flags);
1797 if (f.flags)
1798 fput(f.file);
1799 }
1800
1801 return ret;
1da177e4
LT
1802}
1803
4541e805
DB
1804SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1805 int __user *, upeer_addrlen, int, flags)
1806{
1807 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1808}
1809
20f37034
HC
1810SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1811 int __user *, upeer_addrlen)
aaca0bdc 1812{
4541e805 1813 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1814}
1815
1da177e4
LT
1816/*
1817 * Attempt to connect to a socket with the server address. The address
1818 * is in user space so we verify it is OK and move it to kernel space.
1819 *
1820 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1821 * break bindings
1822 *
1823 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1824 * other SEQPACKET protocols that take time to connect() as it doesn't
1825 * include the -EINPROGRESS status for such sockets.
1826 */
1827
bd3ded31
JA
1828int __sys_connect_file(struct file *file, struct sockaddr __user *uservaddr,
1829 int addrlen, int file_flags)
1da177e4
LT
1830{
1831 struct socket *sock;
230b1839 1832 struct sockaddr_storage address;
bd3ded31 1833 int err;
1da177e4 1834
bd3ded31 1835 sock = sock_from_file(file, &err);
1da177e4
LT
1836 if (!sock)
1837 goto out;
43db362d 1838 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4 1839 if (err < 0)
bd3ded31 1840 goto out;
1da177e4 1841
89bddce5 1842 err =
230b1839 1843 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4 1844 if (err)
bd3ded31 1845 goto out;
1da177e4 1846
230b1839 1847 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
bd3ded31 1848 sock->file->f_flags | file_flags);
1da177e4
LT
1849out:
1850 return err;
1851}
1852
bd3ded31
JA
1853int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1854{
1855 int ret = -EBADF;
1856 struct fd f;
1857
1858 f = fdget(fd);
1859 if (f.file) {
1860 ret = __sys_connect_file(f.file, uservaddr, addrlen, 0);
1861 if (f.flags)
1862 fput(f.file);
1863 }
1864
1865 return ret;
1866}
1867
1387c2c2
DB
1868SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1869 int, addrlen)
1870{
1871 return __sys_connect(fd, uservaddr, addrlen);
1872}
1873
1da177e4
LT
1874/*
1875 * Get the local address ('name') of a socket object. Move the obtained
1876 * name to user space.
1877 */
1878
8882a107
DB
1879int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1880 int __user *usockaddr_len)
1da177e4
LT
1881{
1882 struct socket *sock;
230b1839 1883 struct sockaddr_storage address;
9b2c45d4 1884 int err, fput_needed;
89bddce5 1885
6cb153ca 1886 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1887 if (!sock)
1888 goto out;
1889
1890 err = security_socket_getsockname(sock);
1891 if (err)
1892 goto out_put;
1893
9b2c45d4
DV
1894 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1895 if (err < 0)
1da177e4 1896 goto out_put;
9b2c45d4
DV
1897 /* "err" is actually length in this case */
1898 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1899
1900out_put:
6cb153ca 1901 fput_light(sock->file, fput_needed);
1da177e4
LT
1902out:
1903 return err;
1904}
1905
8882a107
DB
1906SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1907 int __user *, usockaddr_len)
1908{
1909 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1910}
1911
1da177e4
LT
1912/*
1913 * Get the remote address ('name') of a socket object. Move the obtained
1914 * name to user space.
1915 */
1916
b21c8f83
DB
1917int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1918 int __user *usockaddr_len)
1da177e4
LT
1919{
1920 struct socket *sock;
230b1839 1921 struct sockaddr_storage address;
9b2c45d4 1922 int err, fput_needed;
1da177e4 1923
89bddce5
SH
1924 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1925 if (sock != NULL) {
1da177e4
LT
1926 err = security_socket_getpeername(sock);
1927 if (err) {
6cb153ca 1928 fput_light(sock->file, fput_needed);
1da177e4
LT
1929 return err;
1930 }
1931
9b2c45d4
DV
1932 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1933 if (err >= 0)
1934 /* "err" is actually length in this case */
1935 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1936 usockaddr_len);
6cb153ca 1937 fput_light(sock->file, fput_needed);
1da177e4
LT
1938 }
1939 return err;
1940}
1941
b21c8f83
DB
1942SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1943 int __user *, usockaddr_len)
1944{
1945 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1946}
1947
1da177e4
LT
1948/*
1949 * Send a datagram to a given address. We move the address into kernel
1950 * space and check the user space data area is readable before invoking
1951 * the protocol.
1952 */
211b634b
DB
1953int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1954 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1955{
1956 struct socket *sock;
230b1839 1957 struct sockaddr_storage address;
1da177e4
LT
1958 int err;
1959 struct msghdr msg;
1960 struct iovec iov;
6cb153ca 1961 int fput_needed;
6cb153ca 1962
602bd0e9
AV
1963 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1964 if (unlikely(err))
1965 return err;
de0fa95c
PE
1966 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1967 if (!sock)
4387ff75 1968 goto out;
6cb153ca 1969
89bddce5 1970 msg.msg_name = NULL;
89bddce5
SH
1971 msg.msg_control = NULL;
1972 msg.msg_controllen = 0;
1973 msg.msg_namelen = 0;
6cb153ca 1974 if (addr) {
43db362d 1975 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1976 if (err < 0)
1977 goto out_put;
230b1839 1978 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1979 msg.msg_namelen = addr_len;
1da177e4
LT
1980 }
1981 if (sock->file->f_flags & O_NONBLOCK)
1982 flags |= MSG_DONTWAIT;
1983 msg.msg_flags = flags;
d8725c86 1984 err = sock_sendmsg(sock, &msg);
1da177e4 1985
89bddce5 1986out_put:
de0fa95c 1987 fput_light(sock->file, fput_needed);
4387ff75 1988out:
1da177e4
LT
1989 return err;
1990}
1991
211b634b
DB
1992SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1993 unsigned int, flags, struct sockaddr __user *, addr,
1994 int, addr_len)
1995{
1996 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1997}
1998
1da177e4 1999/*
89bddce5 2000 * Send a datagram down a socket.
1da177e4
LT
2001 */
2002
3e0fa65f 2003SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2004 unsigned int, flags)
1da177e4 2005{
211b634b 2006 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2007}
2008
2009/*
89bddce5 2010 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2011 * sender. We verify the buffers are writable and if needed move the
2012 * sender address from kernel to user space.
2013 */
7a09e1eb
DB
2014int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2015 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2016{
2017 struct socket *sock;
2018 struct iovec iov;
2019 struct msghdr msg;
230b1839 2020 struct sockaddr_storage address;
89bddce5 2021 int err, err2;
6cb153ca
BL
2022 int fput_needed;
2023
602bd0e9
AV
2024 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2025 if (unlikely(err))
2026 return err;
de0fa95c 2027 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2028 if (!sock)
de0fa95c 2029 goto out;
1da177e4 2030
89bddce5
SH
2031 msg.msg_control = NULL;
2032 msg.msg_controllen = 0;
f3d33426
HFS
2033 /* Save some cycles and don't copy the address if not needed */
2034 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2035 /* We assume all kernel code knows the size of sockaddr_storage */
2036 msg.msg_namelen = 0;
130ed5d1 2037 msg.msg_iocb = NULL;
9f138fa6 2038 msg.msg_flags = 0;
1da177e4
LT
2039 if (sock->file->f_flags & O_NONBLOCK)
2040 flags |= MSG_DONTWAIT;
2da62906 2041 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2042
89bddce5 2043 if (err >= 0 && addr != NULL) {
43db362d 2044 err2 = move_addr_to_user(&address,
230b1839 2045 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2046 if (err2 < 0)
2047 err = err2;
1da177e4 2048 }
de0fa95c
PE
2049
2050 fput_light(sock->file, fput_needed);
4387ff75 2051out:
1da177e4
LT
2052 return err;
2053}
2054
7a09e1eb
DB
2055SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2056 unsigned int, flags, struct sockaddr __user *, addr,
2057 int __user *, addr_len)
2058{
2059 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2060}
2061
1da177e4 2062/*
89bddce5 2063 * Receive a datagram from a socket.
1da177e4
LT
2064 */
2065
b7c0ddf5
JG
2066SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2067 unsigned int, flags)
1da177e4 2068{
7a09e1eb 2069 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2070}
2071
2072/*
2073 * Set a socket option. Because we don't know the option lengths we have
2074 * to pass the user mode parameter for the protocols to sort out.
2075 */
2076
cc36dca0
DB
2077static int __sys_setsockopt(int fd, int level, int optname,
2078 char __user *optval, int optlen)
1da177e4 2079{
0d01da6a
SF
2080 mm_segment_t oldfs = get_fs();
2081 char *kernel_optval = NULL;
6cb153ca 2082 int err, fput_needed;
1da177e4
LT
2083 struct socket *sock;
2084
2085 if (optlen < 0)
2086 return -EINVAL;
89bddce5
SH
2087
2088 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2089 if (sock != NULL) {
2090 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2091 if (err)
2092 goto out_put;
1da177e4 2093
0d01da6a
SF
2094 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2095 &optname, optval, &optlen,
2096 &kernel_optval);
2097
2098 if (err < 0) {
2099 goto out_put;
2100 } else if (err > 0) {
2101 err = 0;
2102 goto out_put;
2103 }
2104
2105 if (kernel_optval) {
2106 set_fs(KERNEL_DS);
2107 optval = (char __user __force *)kernel_optval;
2108 }
2109
1da177e4 2110 if (level == SOL_SOCKET)
89bddce5
SH
2111 err =
2112 sock_setsockopt(sock, level, optname, optval,
2113 optlen);
1da177e4 2114 else
89bddce5
SH
2115 err =
2116 sock->ops->setsockopt(sock, level, optname, optval,
2117 optlen);
0d01da6a
SF
2118
2119 if (kernel_optval) {
2120 set_fs(oldfs);
2121 kfree(kernel_optval);
2122 }
6cb153ca
BL
2123out_put:
2124 fput_light(sock->file, fput_needed);
1da177e4
LT
2125 }
2126 return err;
2127}
2128
cc36dca0
DB
2129SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2130 char __user *, optval, int, optlen)
2131{
2132 return __sys_setsockopt(fd, level, optname, optval, optlen);
2133}
2134
1da177e4
LT
2135/*
2136 * Get a socket option. Because we don't know the option lengths we have
2137 * to pass a user mode parameter for the protocols to sort out.
2138 */
2139
13a2d70e
DB
2140static int __sys_getsockopt(int fd, int level, int optname,
2141 char __user *optval, int __user *optlen)
1da177e4 2142{
6cb153ca 2143 int err, fput_needed;
1da177e4 2144 struct socket *sock;
0d01da6a 2145 int max_optlen;
1da177e4 2146
89bddce5
SH
2147 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2148 if (sock != NULL) {
6cb153ca
BL
2149 err = security_socket_getsockopt(sock, level, optname);
2150 if (err)
2151 goto out_put;
1da177e4 2152
0d01da6a
SF
2153 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2154
1da177e4 2155 if (level == SOL_SOCKET)
89bddce5
SH
2156 err =
2157 sock_getsockopt(sock, level, optname, optval,
2158 optlen);
1da177e4 2159 else
89bddce5
SH
2160 err =
2161 sock->ops->getsockopt(sock, level, optname, optval,
2162 optlen);
0d01da6a
SF
2163
2164 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2165 optval, optlen,
2166 max_optlen, err);
6cb153ca
BL
2167out_put:
2168 fput_light(sock->file, fput_needed);
1da177e4
LT
2169 }
2170 return err;
2171}
2172
13a2d70e
DB
2173SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2174 char __user *, optval, int __user *, optlen)
2175{
2176 return __sys_getsockopt(fd, level, optname, optval, optlen);
2177}
2178
1da177e4
LT
2179/*
2180 * Shutdown a socket.
2181 */
2182
005a1aea 2183int __sys_shutdown(int fd, int how)
1da177e4 2184{
6cb153ca 2185 int err, fput_needed;
1da177e4
LT
2186 struct socket *sock;
2187
89bddce5
SH
2188 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2189 if (sock != NULL) {
1da177e4 2190 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2191 if (!err)
2192 err = sock->ops->shutdown(sock, how);
2193 fput_light(sock->file, fput_needed);
1da177e4
LT
2194 }
2195 return err;
2196}
2197
005a1aea
DB
2198SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2199{
2200 return __sys_shutdown(fd, how);
2201}
2202
89bddce5 2203/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2204 * fields which are the same type (int / unsigned) on our platforms.
2205 */
2206#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2207#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2208#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2209
c71d8ebe
TH
2210struct used_address {
2211 struct sockaddr_storage name;
2212 unsigned int name_len;
2213};
2214
da184284
AV
2215static int copy_msghdr_from_user(struct msghdr *kmsg,
2216 struct user_msghdr __user *umsg,
2217 struct sockaddr __user **save_addr,
2218 struct iovec **iov)
1661bf36 2219{
ffb07550 2220 struct user_msghdr msg;
08adb7da
AV
2221 ssize_t err;
2222
ffb07550 2223 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2224 return -EFAULT;
dbb490b9 2225
864d9664 2226 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2227 kmsg->msg_controllen = msg.msg_controllen;
2228 kmsg->msg_flags = msg.msg_flags;
2229
2230 kmsg->msg_namelen = msg.msg_namelen;
2231 if (!msg.msg_name)
6a2a2b3a
AS
2232 kmsg->msg_namelen = 0;
2233
dbb490b9
ML
2234 if (kmsg->msg_namelen < 0)
2235 return -EINVAL;
2236
1661bf36 2237 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2238 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2239
2240 if (save_addr)
ffb07550 2241 *save_addr = msg.msg_name;
08adb7da 2242
ffb07550 2243 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2244 if (!save_addr) {
864d9664
PA
2245 err = move_addr_to_kernel(msg.msg_name,
2246 kmsg->msg_namelen,
08adb7da
AV
2247 kmsg->msg_name);
2248 if (err < 0)
2249 return err;
2250 }
2251 } else {
2252 kmsg->msg_name = NULL;
2253 kmsg->msg_namelen = 0;
2254 }
2255
ffb07550 2256 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2257 return -EMSGSIZE;
2258
0345f931 2259 kmsg->msg_iocb = NULL;
2260
87e5e6da 2261 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2262 msg.msg_iov, msg.msg_iovlen,
da184284 2263 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2264 return err < 0 ? err : 0;
1661bf36
DC
2265}
2266
666547ff 2267static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2268 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2269 struct used_address *used_address,
2270 unsigned int allowed_msghdr_flags)
1da177e4 2271{
89bddce5
SH
2272 struct compat_msghdr __user *msg_compat =
2273 (struct compat_msghdr __user *)msg;
230b1839 2274 struct sockaddr_storage address;
1da177e4 2275 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2276 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2277 __aligned(sizeof(__kernel_size_t));
89bddce5 2278 /* 20 is size of ipv6_pktinfo */
1da177e4 2279 unsigned char *ctl_buf = ctl;
d8725c86 2280 int ctl_len;
08adb7da 2281 ssize_t err;
89bddce5 2282
08adb7da 2283 msg_sys->msg_name = &address;
1da177e4 2284
08449320 2285 if (MSG_CMSG_COMPAT & flags)
08adb7da 2286 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2287 else
08adb7da 2288 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2289 if (err < 0)
da184284 2290 return err;
1da177e4
LT
2291
2292 err = -ENOBUFS;
2293
228e548e 2294 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2295 goto out_freeiov;
28a94d8f 2296 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2297 ctl_len = msg_sys->msg_controllen;
1da177e4 2298 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2299 err =
228e548e 2300 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2301 sizeof(ctl));
1da177e4
LT
2302 if (err)
2303 goto out_freeiov;
228e548e
AB
2304 ctl_buf = msg_sys->msg_control;
2305 ctl_len = msg_sys->msg_controllen;
1da177e4 2306 } else if (ctl_len) {
ac4340fc
DM
2307 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2308 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2309 if (ctl_len > sizeof(ctl)) {
1da177e4 2310 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2311 if (ctl_buf == NULL)
1da177e4
LT
2312 goto out_freeiov;
2313 }
2314 err = -EFAULT;
2315 /*
228e548e 2316 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2317 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2318 * checking falls down on this.
2319 */
fb8621bb 2320 if (copy_from_user(ctl_buf,
228e548e 2321 (void __user __force *)msg_sys->msg_control,
89bddce5 2322 ctl_len))
1da177e4 2323 goto out_freectl;
228e548e 2324 msg_sys->msg_control = ctl_buf;
1da177e4 2325 }
228e548e 2326 msg_sys->msg_flags = flags;
1da177e4
LT
2327
2328 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2329 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2330 /*
2331 * If this is sendmmsg() and current destination address is same as
2332 * previously succeeded address, omit asking LSM's decision.
2333 * used_address->name_len is initialized to UINT_MAX so that the first
2334 * destination address never matches.
2335 */
bc909d9d
MD
2336 if (used_address && msg_sys->msg_name &&
2337 used_address->name_len == msg_sys->msg_namelen &&
2338 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2339 used_address->name_len)) {
d8725c86 2340 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2341 goto out_freectl;
2342 }
d8725c86 2343 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2344 /*
2345 * If this is sendmmsg() and sending to current destination address was
2346 * successful, remember it.
2347 */
2348 if (used_address && err >= 0) {
2349 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2350 if (msg_sys->msg_name)
2351 memcpy(&used_address->name, msg_sys->msg_name,
2352 used_address->name_len);
c71d8ebe 2353 }
1da177e4
LT
2354
2355out_freectl:
89bddce5 2356 if (ctl_buf != ctl)
1da177e4
LT
2357 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2358out_freeiov:
da184284 2359 kfree(iov);
228e548e
AB
2360 return err;
2361}
2362
2363/*
2364 * BSD sendmsg interface
2365 */
0fa03c62
JA
2366long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2367 unsigned int flags)
2368{
2369 struct msghdr msg_sys;
2370
2371 return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2372}
228e548e 2373
e1834a32
DB
2374long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2375 bool forbid_cmsg_compat)
228e548e
AB
2376{
2377 int fput_needed, err;
2378 struct msghdr msg_sys;
1be374a0
AL
2379 struct socket *sock;
2380
e1834a32
DB
2381 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2382 return -EINVAL;
2383
1be374a0 2384 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2385 if (!sock)
2386 goto out;
2387
28a94d8f 2388 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2389
6cb153ca 2390 fput_light(sock->file, fput_needed);
89bddce5 2391out:
1da177e4
LT
2392 return err;
2393}
2394
666547ff 2395SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2396{
e1834a32 2397 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2398}
2399
228e548e
AB
2400/*
2401 * Linux sendmmsg interface
2402 */
2403
2404int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2405 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2406{
2407 int fput_needed, err, datagrams;
2408 struct socket *sock;
2409 struct mmsghdr __user *entry;
2410 struct compat_mmsghdr __user *compat_entry;
2411 struct msghdr msg_sys;
c71d8ebe 2412 struct used_address used_address;
f092276d 2413 unsigned int oflags = flags;
228e548e 2414
e1834a32
DB
2415 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2416 return -EINVAL;
2417
98382f41
AB
2418 if (vlen > UIO_MAXIOV)
2419 vlen = UIO_MAXIOV;
228e548e
AB
2420
2421 datagrams = 0;
2422
2423 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2424 if (!sock)
2425 return err;
2426
c71d8ebe 2427 used_address.name_len = UINT_MAX;
228e548e
AB
2428 entry = mmsg;
2429 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2430 err = 0;
f092276d 2431 flags |= MSG_BATCH;
228e548e
AB
2432
2433 while (datagrams < vlen) {
f092276d
TH
2434 if (datagrams == vlen - 1)
2435 flags = oflags;
2436
228e548e 2437 if (MSG_CMSG_COMPAT & flags) {
666547ff 2438 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2439 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2440 if (err < 0)
2441 break;
2442 err = __put_user(err, &compat_entry->msg_len);
2443 ++compat_entry;
2444 } else {
a7526eb5 2445 err = ___sys_sendmsg(sock,
666547ff 2446 (struct user_msghdr __user *)entry,
28a94d8f 2447 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2448 if (err < 0)
2449 break;
2450 err = put_user(err, &entry->msg_len);
2451 ++entry;
2452 }
2453
2454 if (err)
2455 break;
2456 ++datagrams;
3023898b
SHY
2457 if (msg_data_left(&msg_sys))
2458 break;
a78cb84c 2459 cond_resched();
228e548e
AB
2460 }
2461
228e548e
AB
2462 fput_light(sock->file, fput_needed);
2463
728ffb86
AB
2464 /* We only return an error if no datagrams were able to be sent */
2465 if (datagrams != 0)
228e548e
AB
2466 return datagrams;
2467
228e548e
AB
2468 return err;
2469}
2470
2471SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2472 unsigned int, vlen, unsigned int, flags)
2473{
e1834a32 2474 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2475}
2476
666547ff 2477static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2478 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2479{
89bddce5
SH
2480 struct compat_msghdr __user *msg_compat =
2481 (struct compat_msghdr __user *)msg;
1da177e4 2482 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2483 struct iovec *iov = iovstack;
1da177e4 2484 unsigned long cmsg_ptr;
2da62906 2485 int len;
08adb7da 2486 ssize_t err;
1da177e4
LT
2487
2488 /* kernel mode address */
230b1839 2489 struct sockaddr_storage addr;
1da177e4
LT
2490
2491 /* user mode address pointers */
2492 struct sockaddr __user *uaddr;
08adb7da 2493 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2494
08adb7da 2495 msg_sys->msg_name = &addr;
1da177e4 2496
f3d33426 2497 if (MSG_CMSG_COMPAT & flags)
08adb7da 2498 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2499 else
08adb7da 2500 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2501 if (err < 0)
da184284 2502 return err;
1da177e4 2503
a2e27255
ACM
2504 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2505 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2506
f3d33426
HFS
2507 /* We assume all kernel code knows the size of sockaddr_storage */
2508 msg_sys->msg_namelen = 0;
2509
1da177e4
LT
2510 if (sock->file->f_flags & O_NONBLOCK)
2511 flags |= MSG_DONTWAIT;
2da62906 2512 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2513 if (err < 0)
2514 goto out_freeiov;
2515 len = err;
2516
2517 if (uaddr != NULL) {
43db362d 2518 err = move_addr_to_user(&addr,
a2e27255 2519 msg_sys->msg_namelen, uaddr,
89bddce5 2520 uaddr_len);
1da177e4
LT
2521 if (err < 0)
2522 goto out_freeiov;
2523 }
a2e27255 2524 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2525 COMPAT_FLAGS(msg));
1da177e4
LT
2526 if (err)
2527 goto out_freeiov;
2528 if (MSG_CMSG_COMPAT & flags)
a2e27255 2529 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2530 &msg_compat->msg_controllen);
2531 else
a2e27255 2532 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2533 &msg->msg_controllen);
2534 if (err)
2535 goto out_freeiov;
2536 err = len;
2537
2538out_freeiov:
da184284 2539 kfree(iov);
a2e27255
ACM
2540 return err;
2541}
2542
2543/*
2544 * BSD recvmsg interface
2545 */
2546
aa1fa28f
JA
2547long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2548 unsigned int flags)
2549{
2550 struct msghdr msg_sys;
2551
2552 return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2553}
2554
e1834a32
DB
2555long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2556 bool forbid_cmsg_compat)
a2e27255
ACM
2557{
2558 int fput_needed, err;
2559 struct msghdr msg_sys;
1be374a0
AL
2560 struct socket *sock;
2561
e1834a32
DB
2562 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2563 return -EINVAL;
2564
1be374a0 2565 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2566 if (!sock)
2567 goto out;
2568
a7526eb5 2569 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2570
6cb153ca 2571 fput_light(sock->file, fput_needed);
1da177e4
LT
2572out:
2573 return err;
2574}
2575
666547ff 2576SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2577 unsigned int, flags)
2578{
e1834a32 2579 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2580}
2581
a2e27255
ACM
2582/*
2583 * Linux recvmmsg interface
2584 */
2585
e11d4284
AB
2586static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2587 unsigned int vlen, unsigned int flags,
2588 struct timespec64 *timeout)
a2e27255
ACM
2589{
2590 int fput_needed, err, datagrams;
2591 struct socket *sock;
2592 struct mmsghdr __user *entry;
d7256d0e 2593 struct compat_mmsghdr __user *compat_entry;
a2e27255 2594 struct msghdr msg_sys;
766b9f92
DD
2595 struct timespec64 end_time;
2596 struct timespec64 timeout64;
a2e27255
ACM
2597
2598 if (timeout &&
2599 poll_select_set_timeout(&end_time, timeout->tv_sec,
2600 timeout->tv_nsec))
2601 return -EINVAL;
2602
2603 datagrams = 0;
2604
2605 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2606 if (!sock)
2607 return err;
2608
7797dc41
SHY
2609 if (likely(!(flags & MSG_ERRQUEUE))) {
2610 err = sock_error(sock->sk);
2611 if (err) {
2612 datagrams = err;
2613 goto out_put;
2614 }
e623a9e9 2615 }
a2e27255
ACM
2616
2617 entry = mmsg;
d7256d0e 2618 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2619
2620 while (datagrams < vlen) {
2621 /*
2622 * No need to ask LSM for more than the first datagram.
2623 */
d7256d0e 2624 if (MSG_CMSG_COMPAT & flags) {
666547ff 2625 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2626 &msg_sys, flags & ~MSG_WAITFORONE,
2627 datagrams);
d7256d0e
JMG
2628 if (err < 0)
2629 break;
2630 err = __put_user(err, &compat_entry->msg_len);
2631 ++compat_entry;
2632 } else {
a7526eb5 2633 err = ___sys_recvmsg(sock,
666547ff 2634 (struct user_msghdr __user *)entry,
a7526eb5
AL
2635 &msg_sys, flags & ~MSG_WAITFORONE,
2636 datagrams);
d7256d0e
JMG
2637 if (err < 0)
2638 break;
2639 err = put_user(err, &entry->msg_len);
2640 ++entry;
2641 }
2642
a2e27255
ACM
2643 if (err)
2644 break;
a2e27255
ACM
2645 ++datagrams;
2646
71c5c159
BB
2647 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2648 if (flags & MSG_WAITFORONE)
2649 flags |= MSG_DONTWAIT;
2650
a2e27255 2651 if (timeout) {
766b9f92 2652 ktime_get_ts64(&timeout64);
c2e6c856 2653 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2654 if (timeout->tv_sec < 0) {
2655 timeout->tv_sec = timeout->tv_nsec = 0;
2656 break;
2657 }
2658
2659 /* Timeout, return less than vlen datagrams */
2660 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2661 break;
2662 }
2663
2664 /* Out of band data, return right away */
2665 if (msg_sys.msg_flags & MSG_OOB)
2666 break;
a78cb84c 2667 cond_resched();
a2e27255
ACM
2668 }
2669
a2e27255 2670 if (err == 0)
34b88a68
ACM
2671 goto out_put;
2672
2673 if (datagrams == 0) {
2674 datagrams = err;
2675 goto out_put;
2676 }
a2e27255 2677
34b88a68
ACM
2678 /*
2679 * We may return less entries than requested (vlen) if the
2680 * sock is non block and there aren't enough datagrams...
2681 */
2682 if (err != -EAGAIN) {
a2e27255 2683 /*
34b88a68
ACM
2684 * ... or if recvmsg returns an error after we
2685 * received some datagrams, where we record the
2686 * error to return on the next call or if the
2687 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2688 */
34b88a68 2689 sock->sk->sk_err = -err;
a2e27255 2690 }
34b88a68
ACM
2691out_put:
2692 fput_light(sock->file, fput_needed);
a2e27255 2693
34b88a68 2694 return datagrams;
a2e27255
ACM
2695}
2696
e11d4284
AB
2697int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2698 unsigned int vlen, unsigned int flags,
2699 struct __kernel_timespec __user *timeout,
2700 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2701{
2702 int datagrams;
c2e6c856 2703 struct timespec64 timeout_sys;
a2e27255 2704
e11d4284
AB
2705 if (timeout && get_timespec64(&timeout_sys, timeout))
2706 return -EFAULT;
a2e27255 2707
e11d4284 2708 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2709 return -EFAULT;
2710
e11d4284
AB
2711 if (!timeout && !timeout32)
2712 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2713
2714 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2715
e11d4284
AB
2716 if (datagrams <= 0)
2717 return datagrams;
2718
2719 if (timeout && put_timespec64(&timeout_sys, timeout))
2720 datagrams = -EFAULT;
2721
2722 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2723 datagrams = -EFAULT;
2724
2725 return datagrams;
2726}
2727
1255e269
DB
2728SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2729 unsigned int, vlen, unsigned int, flags,
c2e6c856 2730 struct __kernel_timespec __user *, timeout)
1255e269 2731{
e11d4284
AB
2732 if (flags & MSG_CMSG_COMPAT)
2733 return -EINVAL;
2734
2735 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2736}
2737
2738#ifdef CONFIG_COMPAT_32BIT_TIME
2739SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2740 unsigned int, vlen, unsigned int, flags,
2741 struct old_timespec32 __user *, timeout)
2742{
2743 if (flags & MSG_CMSG_COMPAT)
2744 return -EINVAL;
2745
2746 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2747}
e11d4284 2748#endif
1255e269 2749
a2e27255 2750#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2751/* Argument list sizes for sys_socketcall */
2752#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2753static const unsigned char nargs[21] = {
c6d409cf
ED
2754 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2755 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2756 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2757 AL(4), AL(5), AL(4)
89bddce5
SH
2758};
2759
1da177e4
LT
2760#undef AL
2761
2762/*
89bddce5 2763 * System call vectors.
1da177e4
LT
2764 *
2765 * Argument checking cleaned up. Saved 20% in size.
2766 * This function doesn't need to set the kernel lock because
89bddce5 2767 * it is set by the callees.
1da177e4
LT
2768 */
2769
3e0fa65f 2770SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2771{
2950fa9d 2772 unsigned long a[AUDITSC_ARGS];
89bddce5 2773 unsigned long a0, a1;
1da177e4 2774 int err;
47379052 2775 unsigned int len;
1da177e4 2776
228e548e 2777 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2778 return -EINVAL;
c8e8cd57 2779 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2780
47379052
AV
2781 len = nargs[call];
2782 if (len > sizeof(a))
2783 return -EINVAL;
2784
1da177e4 2785 /* copy_from_user should be SMP safe. */
47379052 2786 if (copy_from_user(a, args, len))
1da177e4 2787 return -EFAULT;
3ec3b2fb 2788
2950fa9d
CG
2789 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2790 if (err)
2791 return err;
3ec3b2fb 2792
89bddce5
SH
2793 a0 = a[0];
2794 a1 = a[1];
2795
2796 switch (call) {
2797 case SYS_SOCKET:
9d6a15c3 2798 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2799 break;
2800 case SYS_BIND:
a87d35d8 2801 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2802 break;
2803 case SYS_CONNECT:
1387c2c2 2804 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2805 break;
2806 case SYS_LISTEN:
25e290ee 2807 err = __sys_listen(a0, a1);
89bddce5
SH
2808 break;
2809 case SYS_ACCEPT:
4541e805
DB
2810 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2811 (int __user *)a[2], 0);
89bddce5
SH
2812 break;
2813 case SYS_GETSOCKNAME:
2814 err =
8882a107
DB
2815 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2816 (int __user *)a[2]);
89bddce5
SH
2817 break;
2818 case SYS_GETPEERNAME:
2819 err =
b21c8f83
DB
2820 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2821 (int __user *)a[2]);
89bddce5
SH
2822 break;
2823 case SYS_SOCKETPAIR:
6debc8d8 2824 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2825 break;
2826 case SYS_SEND:
f3bf896b
DB
2827 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2828 NULL, 0);
89bddce5
SH
2829 break;
2830 case SYS_SENDTO:
211b634b
DB
2831 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2832 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2833 break;
2834 case SYS_RECV:
d27e9afc
DB
2835 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2836 NULL, NULL);
89bddce5
SH
2837 break;
2838 case SYS_RECVFROM:
7a09e1eb
DB
2839 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2840 (struct sockaddr __user *)a[4],
2841 (int __user *)a[5]);
89bddce5
SH
2842 break;
2843 case SYS_SHUTDOWN:
005a1aea 2844 err = __sys_shutdown(a0, a1);
89bddce5
SH
2845 break;
2846 case SYS_SETSOCKOPT:
cc36dca0
DB
2847 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2848 a[4]);
89bddce5
SH
2849 break;
2850 case SYS_GETSOCKOPT:
2851 err =
13a2d70e
DB
2852 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2853 (int __user *)a[4]);
89bddce5
SH
2854 break;
2855 case SYS_SENDMSG:
e1834a32
DB
2856 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2857 a[2], true);
89bddce5 2858 break;
228e548e 2859 case SYS_SENDMMSG:
e1834a32
DB
2860 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2861 a[3], true);
228e548e 2862 break;
89bddce5 2863 case SYS_RECVMSG:
e1834a32
DB
2864 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2865 a[2], true);
89bddce5 2866 break;
a2e27255 2867 case SYS_RECVMMSG:
e11d4284
AB
2868 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2869 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2870 a[2], a[3],
2871 (struct __kernel_timespec __user *)a[4],
2872 NULL);
2873 else
2874 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2875 a[2], a[3], NULL,
2876 (struct old_timespec32 __user *)a[4]);
a2e27255 2877 break;
de11defe 2878 case SYS_ACCEPT4:
4541e805
DB
2879 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2880 (int __user *)a[2], a[3]);
aaca0bdc 2881 break;
89bddce5
SH
2882 default:
2883 err = -EINVAL;
2884 break;
1da177e4
LT
2885 }
2886 return err;
2887}
2888
89bddce5 2889#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2890
55737fda
SH
2891/**
2892 * sock_register - add a socket protocol handler
2893 * @ops: description of protocol
2894 *
1da177e4
LT
2895 * This function is called by a protocol handler that wants to
2896 * advertise its address family, and have it linked into the
e793c0f7 2897 * socket interface. The value ops->family corresponds to the
55737fda 2898 * socket system call protocol family.
1da177e4 2899 */
f0fd27d4 2900int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2901{
2902 int err;
2903
2904 if (ops->family >= NPROTO) {
3410f22e 2905 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2906 return -ENOBUFS;
2907 }
55737fda
SH
2908
2909 spin_lock(&net_family_lock);
190683a9
ED
2910 if (rcu_dereference_protected(net_families[ops->family],
2911 lockdep_is_held(&net_family_lock)))
55737fda
SH
2912 err = -EEXIST;
2913 else {
cf778b00 2914 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2915 err = 0;
2916 }
55737fda
SH
2917 spin_unlock(&net_family_lock);
2918
3410f22e 2919 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2920 return err;
2921}
c6d409cf 2922EXPORT_SYMBOL(sock_register);
1da177e4 2923
55737fda
SH
2924/**
2925 * sock_unregister - remove a protocol handler
2926 * @family: protocol family to remove
2927 *
1da177e4
LT
2928 * This function is called by a protocol handler that wants to
2929 * remove its address family, and have it unlinked from the
55737fda
SH
2930 * new socket creation.
2931 *
2932 * If protocol handler is a module, then it can use module reference
2933 * counts to protect against new references. If protocol handler is not
2934 * a module then it needs to provide its own protection in
2935 * the ops->create routine.
1da177e4 2936 */
f0fd27d4 2937void sock_unregister(int family)
1da177e4 2938{
f0fd27d4 2939 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2940
55737fda 2941 spin_lock(&net_family_lock);
a9b3cd7f 2942 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2943 spin_unlock(&net_family_lock);
2944
2945 synchronize_rcu();
2946
3410f22e 2947 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2948}
c6d409cf 2949EXPORT_SYMBOL(sock_unregister);
1da177e4 2950
bf2ae2e4
XL
2951bool sock_is_registered(int family)
2952{
66b51b0a 2953 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2954}
2955
77d76ea3 2956static int __init sock_init(void)
1da177e4 2957{
b3e19d92 2958 int err;
2ca794e5
EB
2959 /*
2960 * Initialize the network sysctl infrastructure.
2961 */
2962 err = net_sysctl_init();
2963 if (err)
2964 goto out;
b3e19d92 2965
1da177e4 2966 /*
89bddce5 2967 * Initialize skbuff SLAB cache
1da177e4
LT
2968 */
2969 skb_init();
1da177e4
LT
2970
2971 /*
89bddce5 2972 * Initialize the protocols module.
1da177e4
LT
2973 */
2974
2975 init_inodecache();
b3e19d92
NP
2976
2977 err = register_filesystem(&sock_fs_type);
2978 if (err)
2979 goto out_fs;
1da177e4 2980 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2981 if (IS_ERR(sock_mnt)) {
2982 err = PTR_ERR(sock_mnt);
2983 goto out_mount;
2984 }
77d76ea3
AK
2985
2986 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2987 */
2988
2989#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2990 err = netfilter_init();
2991 if (err)
2992 goto out;
1da177e4 2993#endif
cbeb321a 2994
408eccce 2995 ptp_classifier_init();
c1f19b51 2996
b3e19d92
NP
2997out:
2998 return err;
2999
3000out_mount:
3001 unregister_filesystem(&sock_fs_type);
3002out_fs:
3003 goto out;
1da177e4
LT
3004}
3005
77d76ea3
AK
3006core_initcall(sock_init); /* early initcall */
3007
1da177e4
LT
3008#ifdef CONFIG_PROC_FS
3009void socket_seq_show(struct seq_file *seq)
3010{
648845ab
TZ
3011 seq_printf(seq, "sockets: used %d\n",
3012 sock_inuse_get(seq->private));
1da177e4 3013}
89bddce5 3014#endif /* CONFIG_PROC_FS */
1da177e4 3015
89bbfc95 3016#ifdef CONFIG_COMPAT
36fd633e 3017static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3018{
6b96018b 3019 struct compat_ifconf ifc32;
7a229387 3020 struct ifconf ifc;
7a229387
AB
3021 int err;
3022
6b96018b 3023 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3024 return -EFAULT;
3025
36fd633e
AV
3026 ifc.ifc_len = ifc32.ifc_len;
3027 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3028
36fd633e
AV
3029 rtnl_lock();
3030 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3031 rtnl_unlock();
7a229387
AB
3032 if (err)
3033 return err;
3034
36fd633e 3035 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3036 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3037 return -EFAULT;
3038
3039 return 0;
3040}
3041
6b96018b 3042static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3043{
3a7da39d
BH
3044 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3045 bool convert_in = false, convert_out = false;
44c02a2c
AV
3046 size_t buf_size = 0;
3047 struct ethtool_rxnfc __user *rxnfc = NULL;
3048 struct ifreq ifr;
3a7da39d
BH
3049 u32 rule_cnt = 0, actual_rule_cnt;
3050 u32 ethcmd;
7a229387 3051 u32 data;
3a7da39d 3052 int ret;
7a229387 3053
3a7da39d
BH
3054 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3055 return -EFAULT;
7a229387 3056
3a7da39d
BH
3057 compat_rxnfc = compat_ptr(data);
3058
3059 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3060 return -EFAULT;
3061
3a7da39d
BH
3062 /* Most ethtool structures are defined without padding.
3063 * Unfortunately struct ethtool_rxnfc is an exception.
3064 */
3065 switch (ethcmd) {
3066 default:
3067 break;
3068 case ETHTOOL_GRXCLSRLALL:
3069 /* Buffer size is variable */
3070 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3071 return -EFAULT;
3072 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3073 return -ENOMEM;
3074 buf_size += rule_cnt * sizeof(u32);
3075 /* fall through */
3076 case ETHTOOL_GRXRINGS:
3077 case ETHTOOL_GRXCLSRLCNT:
3078 case ETHTOOL_GRXCLSRULE:
55664f32 3079 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3080 convert_out = true;
3081 /* fall through */
3082 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3083 buf_size += sizeof(struct ethtool_rxnfc);
3084 convert_in = true;
44c02a2c 3085 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3086 break;
3087 }
3088
44c02a2c 3089 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3090 return -EFAULT;
3091
44c02a2c 3092 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3093
3a7da39d 3094 if (convert_in) {
127fe533 3095 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3096 * fs.ring_cookie and at the end of fs, but nowhere else.
3097 */
127fe533
AD
3098 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3099 sizeof(compat_rxnfc->fs.m_ext) !=
3100 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3101 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3102 BUILD_BUG_ON(
3103 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3104 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3105 offsetof(struct ethtool_rxnfc, fs.location) -
3106 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3107
3108 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3109 (void __user *)(&rxnfc->fs.m_ext + 1) -
3110 (void __user *)rxnfc) ||
3a7da39d
BH
3111 copy_in_user(&rxnfc->fs.ring_cookie,
3112 &compat_rxnfc->fs.ring_cookie,
954b1244 3113 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3114 (void __user *)&rxnfc->fs.ring_cookie))
3115 return -EFAULT;
3116 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3117 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3118 return -EFAULT;
3119 } else if (copy_in_user(&rxnfc->rule_cnt,
3120 &compat_rxnfc->rule_cnt,
3121 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3122 return -EFAULT;
3123 }
3124
44c02a2c 3125 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3126 if (ret)
3127 return ret;
3128
3129 if (convert_out) {
3130 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3131 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3132 (const void __user *)rxnfc) ||
3a7da39d
BH
3133 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3134 &rxnfc->fs.ring_cookie,
954b1244
SH
3135 (const void __user *)(&rxnfc->fs.location + 1) -
3136 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3137 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3138 sizeof(rxnfc->rule_cnt)))
3139 return -EFAULT;
3140
3141 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3142 /* As an optimisation, we only copy the actual
3143 * number of rules that the underlying
3144 * function returned. Since Mallory might
3145 * change the rule count in user memory, we
3146 * check that it is less than the rule count
3147 * originally given (as the user buffer size),
3148 * which has been range-checked.
3149 */
3150 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3151 return -EFAULT;
3152 if (actual_rule_cnt < rule_cnt)
3153 rule_cnt = actual_rule_cnt;
3154 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3155 &rxnfc->rule_locs[0],
3156 rule_cnt * sizeof(u32)))
3157 return -EFAULT;
3158 }
3159 }
3160
3161 return 0;
7a229387
AB
3162}
3163
7a50a240
AB
3164static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3165{
7a50a240 3166 compat_uptr_t uptr32;
44c02a2c
AV
3167 struct ifreq ifr;
3168 void __user *saved;
3169 int err;
7a50a240 3170
44c02a2c 3171 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3172 return -EFAULT;
3173
3174 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3175 return -EFAULT;
3176
44c02a2c
AV
3177 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3178 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3179
44c02a2c
AV
3180 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3181 if (!err) {
3182 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3183 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3184 err = -EFAULT;
ccbd6a5a 3185 }
44c02a2c 3186 return err;
7a229387
AB
3187}
3188
590d4693
BH
3189/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3190static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3191 struct compat_ifreq __user *u_ifreq32)
7a229387 3192{
44c02a2c 3193 struct ifreq ifreq;
7a229387
AB
3194 u32 data32;
3195
44c02a2c 3196 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3197 return -EFAULT;
44c02a2c 3198 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3199 return -EFAULT;
44c02a2c 3200 ifreq.ifr_data = compat_ptr(data32);
7a229387 3201
44c02a2c 3202 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3203}
3204
37ac39bd
JB
3205static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3206 unsigned int cmd,
3207 struct compat_ifreq __user *uifr32)
3208{
3209 struct ifreq __user *uifr;
3210 int err;
3211
3212 /* Handle the fact that while struct ifreq has the same *layout* on
3213 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3214 * which are handled elsewhere, it still has different *size* due to
3215 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3216 * resulting in struct ifreq being 32 and 40 bytes respectively).
3217 * As a result, if the struct happens to be at the end of a page and
3218 * the next page isn't readable/writable, we get a fault. To prevent
3219 * that, copy back and forth to the full size.
3220 */
3221
3222 uifr = compat_alloc_user_space(sizeof(*uifr));
3223 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3224 return -EFAULT;
3225
3226 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3227
3228 if (!err) {
3229 switch (cmd) {
3230 case SIOCGIFFLAGS:
3231 case SIOCGIFMETRIC:
3232 case SIOCGIFMTU:
3233 case SIOCGIFMEM:
3234 case SIOCGIFHWADDR:
3235 case SIOCGIFINDEX:
3236 case SIOCGIFADDR:
3237 case SIOCGIFBRDADDR:
3238 case SIOCGIFDSTADDR:
3239 case SIOCGIFNETMASK:
3240 case SIOCGIFPFLAGS:
3241 case SIOCGIFTXQLEN:
3242 case SIOCGMIIPHY:
3243 case SIOCGMIIREG:
c6c9fee3 3244 case SIOCGIFNAME:
37ac39bd
JB
3245 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3246 err = -EFAULT;
3247 break;
3248 }
3249 }
3250 return err;
3251}
3252
a2116ed2
AB
3253static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3254 struct compat_ifreq __user *uifr32)
3255{
3256 struct ifreq ifr;
3257 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3258 int err;
3259
3260 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3261 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3262 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3263 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3264 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3265 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3266 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3267 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3268 if (err)
3269 return -EFAULT;
3270
44c02a2c 3271 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3272
3273 if (cmd == SIOCGIFMAP && !err) {
3274 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3275 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3276 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3277 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3278 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3279 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3280 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3281 if (err)
3282 err = -EFAULT;
3283 }
3284 return err;
3285}
3286
7a229387 3287struct rtentry32 {
c6d409cf 3288 u32 rt_pad1;
7a229387
AB
3289 struct sockaddr rt_dst; /* target address */
3290 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3291 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3292 unsigned short rt_flags;
3293 short rt_pad2;
3294 u32 rt_pad3;
3295 unsigned char rt_tos;
3296 unsigned char rt_class;
3297 short rt_pad4;
3298 short rt_metric; /* +1 for binary compatibility! */
7a229387 3299 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3300 u32 rt_mtu; /* per route MTU/Window */
3301 u32 rt_window; /* Window clamping */
7a229387
AB
3302 unsigned short rt_irtt; /* Initial RTT */
3303};
3304
3305struct in6_rtmsg32 {
3306 struct in6_addr rtmsg_dst;
3307 struct in6_addr rtmsg_src;
3308 struct in6_addr rtmsg_gateway;
3309 u32 rtmsg_type;
3310 u16 rtmsg_dst_len;
3311 u16 rtmsg_src_len;
3312 u32 rtmsg_metric;
3313 u32 rtmsg_info;
3314 u32 rtmsg_flags;
3315 s32 rtmsg_ifindex;
3316};
3317
6b96018b
AB
3318static int routing_ioctl(struct net *net, struct socket *sock,
3319 unsigned int cmd, void __user *argp)
7a229387
AB
3320{
3321 int ret;
3322 void *r = NULL;
3323 struct in6_rtmsg r6;
3324 struct rtentry r4;
3325 char devname[16];
3326 u32 rtdev;
3327 mm_segment_t old_fs = get_fs();
3328
6b96018b
AB
3329 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3330 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3331 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3332 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3333 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3334 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3335 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3336 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3337 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3338 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3339 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3340
3341 r = (void *) &r6;
3342 } else { /* ipv4 */
6b96018b 3343 struct rtentry32 __user *ur4 = argp;
c6d409cf 3344 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3345 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3346 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3347 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3348 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3349 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3350 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3351 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3352 if (rtdev) {
c6d409cf 3353 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3354 r4.rt_dev = (char __user __force *)devname;
3355 devname[15] = 0;
7a229387
AB
3356 } else
3357 r4.rt_dev = NULL;
3358
3359 r = (void *) &r4;
3360 }
3361
3362 if (ret) {
3363 ret = -EFAULT;
3364 goto out;
3365 }
3366
c6d409cf 3367 set_fs(KERNEL_DS);
63ff03ab 3368 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3369 set_fs(old_fs);
7a229387
AB
3370
3371out:
7a229387
AB
3372 return ret;
3373}
3374
3375/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3376 * for some operations; this forces use of the newer bridge-utils that
25985edc 3377 * use compatible ioctls
7a229387 3378 */
6b96018b 3379static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3380{
6b96018b 3381 compat_ulong_t tmp;
7a229387 3382
6b96018b 3383 if (get_user(tmp, argp))
7a229387
AB
3384 return -EFAULT;
3385 if (tmp == BRCTL_GET_VERSION)
3386 return BRCTL_VERSION + 1;
3387 return -EINVAL;
3388}
3389
6b96018b
AB
3390static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3391 unsigned int cmd, unsigned long arg)
3392{
3393 void __user *argp = compat_ptr(arg);
3394 struct sock *sk = sock->sk;
3395 struct net *net = sock_net(sk);
7a229387 3396
6b96018b 3397 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3398 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3399
3400 switch (cmd) {
3401 case SIOCSIFBR:
3402 case SIOCGIFBR:
3403 return old_bridge_ioctl(argp);
6b96018b 3404 case SIOCGIFCONF:
36fd633e 3405 return compat_dev_ifconf(net, argp);
6b96018b
AB
3406 case SIOCETHTOOL:
3407 return ethtool_ioctl(net, argp);
7a50a240
AB
3408 case SIOCWANDEV:
3409 return compat_siocwandev(net, argp);
a2116ed2
AB
3410 case SIOCGIFMAP:
3411 case SIOCSIFMAP:
3412 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3413 case SIOCADDRT:
3414 case SIOCDELRT:
3415 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3416 case SIOCGSTAMP_OLD:
3417 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3418 if (!sock->ops->gettstamp)
3419 return -ENOIOCTLCMD;
0768e170 3420 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3421 !COMPAT_USE_64BIT_TIME);
3422
590d4693
BH
3423 case SIOCBONDSLAVEINFOQUERY:
3424 case SIOCBONDINFOQUERY:
a2116ed2 3425 case SIOCSHWTSTAMP:
fd468c74 3426 case SIOCGHWTSTAMP:
590d4693 3427 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3428
3429 case FIOSETOWN:
3430 case SIOCSPGRP:
3431 case FIOGETOWN:
3432 case SIOCGPGRP:
3433 case SIOCBRADDBR:
3434 case SIOCBRDELBR:
3435 case SIOCGIFVLAN:
3436 case SIOCSIFVLAN:
3437 case SIOCADDDLCI:
3438 case SIOCDELDLCI:
c62cce2c 3439 case SIOCGSKNS:
0768e170
AB
3440 case SIOCGSTAMP_NEW:
3441 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3442 return sock_ioctl(file, cmd, arg);
3443
3444 case SIOCGIFFLAGS:
3445 case SIOCSIFFLAGS:
3446 case SIOCGIFMETRIC:
3447 case SIOCSIFMETRIC:
3448 case SIOCGIFMTU:
3449 case SIOCSIFMTU:
3450 case SIOCGIFMEM:
3451 case SIOCSIFMEM:
3452 case SIOCGIFHWADDR:
3453 case SIOCSIFHWADDR:
3454 case SIOCADDMULTI:
3455 case SIOCDELMULTI:
3456 case SIOCGIFINDEX:
6b96018b
AB
3457 case SIOCGIFADDR:
3458 case SIOCSIFADDR:
3459 case SIOCSIFHWBROADCAST:
6b96018b 3460 case SIOCDIFADDR:
6b96018b
AB
3461 case SIOCGIFBRDADDR:
3462 case SIOCSIFBRDADDR:
3463 case SIOCGIFDSTADDR:
3464 case SIOCSIFDSTADDR:
3465 case SIOCGIFNETMASK:
3466 case SIOCSIFNETMASK:
3467 case SIOCSIFPFLAGS:
3468 case SIOCGIFPFLAGS:
3469 case SIOCGIFTXQLEN:
3470 case SIOCSIFTXQLEN:
3471 case SIOCBRADDIF:
3472 case SIOCBRDELIF:
c6c9fee3 3473 case SIOCGIFNAME:
9177efd3
AB
3474 case SIOCSIFNAME:
3475 case SIOCGMIIPHY:
3476 case SIOCGMIIREG:
3477 case SIOCSMIIREG:
f92d4fc9
AV
3478 case SIOCBONDENSLAVE:
3479 case SIOCBONDRELEASE:
3480 case SIOCBONDSETHWADDR:
3481 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3482 return compat_ifreq_ioctl(net, sock, cmd, argp);
3483
6b96018b
AB
3484 case SIOCSARP:
3485 case SIOCGARP:
3486 case SIOCDARP:
6b96018b 3487 case SIOCATMARK:
63ff03ab 3488 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3489 }
3490
6b96018b
AB
3491 return -ENOIOCTLCMD;
3492}
7a229387 3493
95c96174 3494static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3495 unsigned long arg)
89bbfc95
SP
3496{
3497 struct socket *sock = file->private_data;
3498 int ret = -ENOIOCTLCMD;
87de87d5
DM
3499 struct sock *sk;
3500 struct net *net;
3501
3502 sk = sock->sk;
3503 net = sock_net(sk);
89bbfc95
SP
3504
3505 if (sock->ops->compat_ioctl)
3506 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3507
87de87d5
DM
3508 if (ret == -ENOIOCTLCMD &&
3509 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3510 ret = compat_wext_handle_ioctl(net, cmd, arg);
3511
6b96018b
AB
3512 if (ret == -ENOIOCTLCMD)
3513 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3514
89bbfc95
SP
3515 return ret;
3516}
3517#endif
3518
8a3c245c
PT
3519/**
3520 * kernel_bind - bind an address to a socket (kernel space)
3521 * @sock: socket
3522 * @addr: address
3523 * @addrlen: length of address
3524 *
3525 * Returns 0 or an error.
3526 */
3527
ac5a488e
SS
3528int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3529{
3530 return sock->ops->bind(sock, addr, addrlen);
3531}
c6d409cf 3532EXPORT_SYMBOL(kernel_bind);
ac5a488e 3533
8a3c245c
PT
3534/**
3535 * kernel_listen - move socket to listening state (kernel space)
3536 * @sock: socket
3537 * @backlog: pending connections queue size
3538 *
3539 * Returns 0 or an error.
3540 */
3541
ac5a488e
SS
3542int kernel_listen(struct socket *sock, int backlog)
3543{
3544 return sock->ops->listen(sock, backlog);
3545}
c6d409cf 3546EXPORT_SYMBOL(kernel_listen);
ac5a488e 3547
8a3c245c
PT
3548/**
3549 * kernel_accept - accept a connection (kernel space)
3550 * @sock: listening socket
3551 * @newsock: new connected socket
3552 * @flags: flags
3553 *
3554 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3555 * If it fails, @newsock is guaranteed to be %NULL.
3556 * Returns 0 or an error.
3557 */
3558
ac5a488e
SS
3559int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3560{
3561 struct sock *sk = sock->sk;
3562 int err;
3563
3564 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3565 newsock);
3566 if (err < 0)
3567 goto done;
3568
cdfbabfb 3569 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3570 if (err < 0) {
3571 sock_release(*newsock);
fa8705b0 3572 *newsock = NULL;
ac5a488e
SS
3573 goto done;
3574 }
3575
3576 (*newsock)->ops = sock->ops;
1b08534e 3577 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3578
3579done:
3580 return err;
3581}
c6d409cf 3582EXPORT_SYMBOL(kernel_accept);
ac5a488e 3583
8a3c245c
PT
3584/**
3585 * kernel_connect - connect a socket (kernel space)
3586 * @sock: socket
3587 * @addr: address
3588 * @addrlen: address length
3589 * @flags: flags (O_NONBLOCK, ...)
3590 *
3591 * For datagram sockets, @addr is the addres to which datagrams are sent
3592 * by default, and the only address from which datagrams are received.
3593 * For stream sockets, attempts to connect to @addr.
3594 * Returns 0 or an error code.
3595 */
3596
ac5a488e 3597int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3598 int flags)
ac5a488e
SS
3599{
3600 return sock->ops->connect(sock, addr, addrlen, flags);
3601}
c6d409cf 3602EXPORT_SYMBOL(kernel_connect);
ac5a488e 3603
8a3c245c
PT
3604/**
3605 * kernel_getsockname - get the address which the socket is bound (kernel space)
3606 * @sock: socket
3607 * @addr: address holder
3608 *
3609 * Fills the @addr pointer with the address which the socket is bound.
3610 * Returns 0 or an error code.
3611 */
3612
9b2c45d4 3613int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3614{
9b2c45d4 3615 return sock->ops->getname(sock, addr, 0);
ac5a488e 3616}
c6d409cf 3617EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3618
8a3c245c
PT
3619/**
3620 * kernel_peername - get the address which the socket is connected (kernel space)
3621 * @sock: socket
3622 * @addr: address holder
3623 *
3624 * Fills the @addr pointer with the address which the socket is connected.
3625 * Returns 0 or an error code.
3626 */
3627
9b2c45d4 3628int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3629{
9b2c45d4 3630 return sock->ops->getname(sock, addr, 1);
ac5a488e 3631}
c6d409cf 3632EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3633
8a3c245c
PT
3634/**
3635 * kernel_getsockopt - get a socket option (kernel space)
3636 * @sock: socket
3637 * @level: API level (SOL_SOCKET, ...)
3638 * @optname: option tag
3639 * @optval: option value
3640 * @optlen: option length
3641 *
3642 * Assigns the option length to @optlen.
3643 * Returns 0 or an error.
3644 */
3645
ac5a488e
SS
3646int kernel_getsockopt(struct socket *sock, int level, int optname,
3647 char *optval, int *optlen)
3648{
3649 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3650 char __user *uoptval;
3651 int __user *uoptlen;
ac5a488e
SS
3652 int err;
3653
fb8621bb
NK
3654 uoptval = (char __user __force *) optval;
3655 uoptlen = (int __user __force *) optlen;
3656
ac5a488e
SS
3657 set_fs(KERNEL_DS);
3658 if (level == SOL_SOCKET)
fb8621bb 3659 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3660 else
fb8621bb
NK
3661 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3662 uoptlen);
ac5a488e
SS
3663 set_fs(oldfs);
3664 return err;
3665}
c6d409cf 3666EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3667
8a3c245c
PT
3668/**
3669 * kernel_setsockopt - set a socket option (kernel space)
3670 * @sock: socket
3671 * @level: API level (SOL_SOCKET, ...)
3672 * @optname: option tag
3673 * @optval: option value
3674 * @optlen: option length
3675 *
3676 * Returns 0 or an error.
3677 */
3678
ac5a488e 3679int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3680 char *optval, unsigned int optlen)
ac5a488e
SS
3681{
3682 mm_segment_t oldfs = get_fs();
fb8621bb 3683 char __user *uoptval;
ac5a488e
SS
3684 int err;
3685
fb8621bb
NK
3686 uoptval = (char __user __force *) optval;
3687
ac5a488e
SS
3688 set_fs(KERNEL_DS);
3689 if (level == SOL_SOCKET)
fb8621bb 3690 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3691 else
fb8621bb 3692 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3693 optlen);
3694 set_fs(oldfs);
3695 return err;
3696}
c6d409cf 3697EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3698
8a3c245c
PT
3699/**
3700 * kernel_sendpage - send a &page through a socket (kernel space)
3701 * @sock: socket
3702 * @page: page
3703 * @offset: page offset
3704 * @size: total size in bytes
3705 * @flags: flags (MSG_DONTWAIT, ...)
3706 *
3707 * Returns the total amount sent in bytes or an error.
3708 */
3709
ac5a488e
SS
3710int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3711 size_t size, int flags)
3712{
3713 if (sock->ops->sendpage)
3714 return sock->ops->sendpage(sock, page, offset, size, flags);
3715
3716 return sock_no_sendpage(sock, page, offset, size, flags);
3717}
c6d409cf 3718EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3719
8a3c245c
PT
3720/**
3721 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3722 * @sk: sock
3723 * @page: page
3724 * @offset: page offset
3725 * @size: total size in bytes
3726 * @flags: flags (MSG_DONTWAIT, ...)
3727 *
3728 * Returns the total amount sent in bytes or an error.
3729 * Caller must hold @sk.
3730 */
3731
306b13eb
TH
3732int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3733 size_t size, int flags)
3734{
3735 struct socket *sock = sk->sk_socket;
3736
3737 if (sock->ops->sendpage_locked)
3738 return sock->ops->sendpage_locked(sk, page, offset, size,
3739 flags);
3740
3741 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3742}
3743EXPORT_SYMBOL(kernel_sendpage_locked);
3744
8a3c245c
PT
3745/**
3746 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3747 * @sock: socket
3748 * @how: connection part
3749 *
3750 * Returns 0 or an error.
3751 */
3752
91cf45f0
TM
3753int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3754{
3755 return sock->ops->shutdown(sock, how);
3756}
91cf45f0 3757EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3758
8a3c245c
PT
3759/**
3760 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3761 * @sk: socket
3762 *
3763 * This routine returns the IP overhead imposed by a socket i.e.
3764 * the length of the underlying IP header, depending on whether
3765 * this is an IPv4 or IPv6 socket and the length from IP options turned
3766 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3767 */
8a3c245c 3768
113c3075
P
3769u32 kernel_sock_ip_overhead(struct sock *sk)
3770{
3771 struct inet_sock *inet;
3772 struct ip_options_rcu *opt;
3773 u32 overhead = 0;
113c3075
P
3774#if IS_ENABLED(CONFIG_IPV6)
3775 struct ipv6_pinfo *np;
3776 struct ipv6_txoptions *optv6 = NULL;
3777#endif /* IS_ENABLED(CONFIG_IPV6) */
3778
3779 if (!sk)
3780 return overhead;
3781
113c3075
P
3782 switch (sk->sk_family) {
3783 case AF_INET:
3784 inet = inet_sk(sk);
3785 overhead += sizeof(struct iphdr);
3786 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3787 sock_owned_by_user(sk));
113c3075
P
3788 if (opt)
3789 overhead += opt->opt.optlen;
3790 return overhead;
3791#if IS_ENABLED(CONFIG_IPV6)
3792 case AF_INET6:
3793 np = inet6_sk(sk);
3794 overhead += sizeof(struct ipv6hdr);
3795 if (np)
3796 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3797 sock_owned_by_user(sk));
113c3075
P
3798 if (optv6)
3799 overhead += (optv6->opt_flen + optv6->opt_nflen);
3800 return overhead;
3801#endif /* IS_ENABLED(CONFIG_IPV6) */
3802 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3803 return overhead;
3804 }
3805}
3806EXPORT_SYMBOL(kernel_sock_ip_overhead);